freedreno/ir3: xfb fix for duplicate outputs
We can't rely on regid to be unique, shaders can have multiple varyings with the same output value. Normally shader linking deduplicates these, but we still need to handle the case for xfb. So use slot instead as the unique identifier. Fixes KHR-GLES31.core.gpu_shader5.fma_precision_* Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13605>
This commit is contained in:
parent
f6f760a98d
commit
7e998783db
|
@ -14,12 +14,6 @@ KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
|
|||
# "*** Color comparison failed"
|
||||
KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail
|
||||
|
||||
# "The values of resultStd[i] & 0xFFFFFFFE and resultFma[i] & 0xFFFFFFFE and resultCPU[i] & 0xFFFFFFFE are not bitwise equal for i = 0..99 "
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_float,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
|
||||
|
||||
# Lots of errors like "[279] Check failed. Received: [3,0,0,2] instead of: [5,0,0,2]"
|
||||
KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail
|
||||
|
||||
|
|
|
@ -823,14 +823,16 @@ ir3_link_stream_out(struct ir3_shader_linkage *l,
|
|||
continue;
|
||||
|
||||
for (idx = 0; idx < l->cnt; idx++) {
|
||||
if (l->var[idx].regid == v->outputs[k].regid)
|
||||
if (l->var[idx].slot == v->outputs[k].slot)
|
||||
break;
|
||||
nextloc = MAX2(nextloc, l->var[idx].loc + 4);
|
||||
}
|
||||
|
||||
/* add if not already in linkage map: */
|
||||
if (idx == l->cnt)
|
||||
ir3_link_add(l, v->outputs[k].regid, compmask, nextloc);
|
||||
if (idx == l->cnt) {
|
||||
ir3_link_add(l, v->outputs[k].slot, v->outputs[k].regid,
|
||||
compmask, nextloc);
|
||||
}
|
||||
|
||||
/* expand component-mask if needed, ie streaming out all components
|
||||
* but frag shader doesn't consume all components:
|
||||
|
|
|
@ -891,6 +891,7 @@ struct ir3_shader_linkage {
|
|||
|
||||
/* Map from VS output to location. */
|
||||
struct {
|
||||
uint8_t slot;
|
||||
uint8_t regid;
|
||||
uint8_t compmask;
|
||||
uint8_t loc;
|
||||
|
@ -907,8 +908,8 @@ struct ir3_shader_linkage {
|
|||
};
|
||||
|
||||
static inline void
|
||||
ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask,
|
||||
uint8_t loc)
|
||||
ir3_link_add(struct ir3_shader_linkage *l, uint8_t slot, uint8_t regid_,
|
||||
uint8_t compmask, uint8_t loc)
|
||||
{
|
||||
for (int j = 0; j < util_last_bit(compmask); j++) {
|
||||
uint8_t comploc = loc + j;
|
||||
|
@ -921,6 +922,7 @@ ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask,
|
|||
int i = l->cnt++;
|
||||
debug_assert(i < ARRAY_SIZE(l->var));
|
||||
|
||||
l->var[i].slot = slot;
|
||||
l->var[i].regid = regid_;
|
||||
l->var[i].compmask = compmask;
|
||||
l->var[i].loc = loc;
|
||||
|
@ -974,7 +976,8 @@ ir3_link_shaders(struct ir3_shader_linkage *l,
|
|||
if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1)
|
||||
l->clip1_loc = fs->inputs[j].inloc;
|
||||
|
||||
ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid,
|
||||
ir3_link_add(l, fs->inputs[j].slot,
|
||||
k >= 0 ? vs->outputs[k].regid : default_regid,
|
||||
fs->inputs[j].compmask, fs->inputs[j].inloc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -774,7 +774,7 @@ tu6_setup_streamout(struct tu_cs *cs,
|
|||
* a bit less ideal here..
|
||||
*/
|
||||
for (idx = 0; idx < l->cnt; idx++)
|
||||
if (l->var[idx].regid == v->outputs[k].regid)
|
||||
if (l->var[idx].slot == v->outputs[k].slot)
|
||||
break;
|
||||
|
||||
debug_assert(idx < l->cnt);
|
||||
|
@ -1006,12 +1006,12 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
|
||||
if (layer_regid != regid(63, 0)) {
|
||||
layer_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, layer_regid, 0x1, linkage.max_loc);
|
||||
ir3_link_add(&linkage, VARYING_SLOT_LAYER, layer_regid, 0x1, linkage.max_loc);
|
||||
}
|
||||
|
||||
if (view_regid != regid(63, 0)) {
|
||||
view_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, view_regid, 0x1, linkage.max_loc);
|
||||
ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
|
||||
}
|
||||
|
||||
unsigned extra_pos = 0;
|
||||
|
@ -1023,14 +1023,15 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
if (position_loc == 0xff)
|
||||
position_loc = linkage.max_loc;
|
||||
|
||||
ir3_link_add(&linkage, last_shader->outputs[i].regid,
|
||||
ir3_link_add(&linkage, last_shader->outputs[i].slot,
|
||||
last_shader->outputs[i].regid,
|
||||
0xf, position_loc + 4 * last_shader->outputs[i].view);
|
||||
extra_pos = MAX2(extra_pos, last_shader->outputs[i].view);
|
||||
}
|
||||
|
||||
if (pointsize_regid != regid(63, 0)) {
|
||||
pointsize_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc);
|
||||
ir3_link_add(&linkage, VARYING_SLOT_PSIZ, pointsize_regid, 0x1, linkage.max_loc);
|
||||
}
|
||||
|
||||
uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
|
||||
|
@ -1039,11 +1040,13 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
|
||||
if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
|
||||
clip0_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc);
|
||||
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST0, clip0_regid,
|
||||
clip_cull_mask & 0xf, linkage.max_loc);
|
||||
}
|
||||
if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
|
||||
clip1_loc = linkage.max_loc;
|
||||
ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc);
|
||||
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST1, clip1_regid,
|
||||
clip_cull_mask >> 4, linkage.max_loc);
|
||||
}
|
||||
|
||||
tu6_setup_streamout(cs, last_shader, &linkage);
|
||||
|
@ -1054,7 +1057,7 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
* any unused code and make sure that optimizations don't remove it.
|
||||
*/
|
||||
if (linkage.cnt == 0)
|
||||
ir3_link_add(&linkage, 0, 0x1, linkage.max_loc);
|
||||
ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
|
||||
|
||||
/* map outputs of the last shader to VPC */
|
||||
assert(linkage.cnt <= 32);
|
||||
|
|
|
@ -105,7 +105,7 @@ emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
|||
* a bit less ideal here..
|
||||
*/
|
||||
for (idx = 0; idx < l->cnt; idx++)
|
||||
if (l->var[idx].regid == v->outputs[k].regid)
|
||||
if (l->var[idx].slot == v->outputs[k].slot)
|
||||
break;
|
||||
|
||||
debug_assert(idx < l->cnt);
|
||||
|
@ -408,11 +408,11 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
|
||||
/* a5xx appends pos/psize to end of the linkage map: */
|
||||
if (VALIDREG(pos_regid))
|
||||
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
|
||||
|
||||
if (VALIDREG(psize_regid)) {
|
||||
psize_loc = l.max_loc;
|
||||
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
|
||||
}
|
||||
|
||||
/* Handle the case where clip/cull distances aren't read by the FS. Make
|
||||
|
@ -422,13 +422,15 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
|
||||
(clip_cull_mask & 0xf) != 0) {
|
||||
clip0_loc = l.max_loc;
|
||||
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
|
||||
clip_cull_mask & 0xf, l.max_loc);
|
||||
}
|
||||
|
||||
if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
|
||||
(clip_cull_mask >> 4) != 0) {
|
||||
clip1_loc = l.max_loc;
|
||||
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
|
||||
clip_cull_mask >> 4, l.max_loc);
|
||||
}
|
||||
|
||||
/* If we have stream-out, we use the full shader for binning
|
||||
|
|
|
@ -210,7 +210,7 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
|
|||
* a bit less ideal here..
|
||||
*/
|
||||
for (idx = 0; idx < l->cnt; idx++)
|
||||
if (l->var[idx].regid == v->outputs[k].regid)
|
||||
if (l->var[idx].slot == v->outputs[k].slot)
|
||||
break;
|
||||
|
||||
debug_assert(idx < l->cnt);
|
||||
|
@ -560,17 +560,17 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
|||
|
||||
if (VALIDREG(layer_regid)) {
|
||||
layer_loc = l.max_loc;
|
||||
ir3_link_add(&l, layer_regid, 0x1, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_LAYER, layer_regid, 0x1, l.max_loc);
|
||||
}
|
||||
|
||||
if (VALIDREG(pos_regid)) {
|
||||
pos_loc = l.max_loc;
|
||||
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
|
||||
}
|
||||
|
||||
if (VALIDREG(psize_regid)) {
|
||||
psize_loc = l.max_loc;
|
||||
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
|
||||
}
|
||||
|
||||
/* Handle the case where clip/cull distances aren't read by the FS. Make
|
||||
|
@ -580,13 +580,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
|||
if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
|
||||
(clip_cull_mask & 0xf) != 0) {
|
||||
clip0_loc = l.max_loc;
|
||||
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
|
||||
clip_cull_mask & 0xf, l.max_loc);
|
||||
}
|
||||
|
||||
if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
|
||||
(clip_cull_mask >> 4) != 0) {
|
||||
clip1_loc = l.max_loc;
|
||||
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
|
||||
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
|
||||
clip_cull_mask >> 4, l.max_loc);
|
||||
}
|
||||
|
||||
/* If we have stream-out, we use the full shader for binning
|
||||
|
|
Loading…
Reference in New Issue