freedreno/ir3: xfb fix for duplicate outputs

We can't rely on regid to be unique, shaders can have multiple varyings
with the same output value.  Normally shader linking deduplicates these,
but we still need to handle the case for xfb.  So use slot instead as
the unique identifier.

Fixes KHR-GLES31.core.gpu_shader5.fma_precision_*

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13605>
This commit is contained in:
Rob Clark 2021-10-30 10:51:59 -07:00 committed by Marge Bot
parent f6f760a98d
commit 7e998783db
6 changed files with 37 additions and 31 deletions

View File

@ -14,12 +14,6 @@ KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
# "*** Color comparison failed"
KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail
# "The values of resultStd[i] & 0xFFFFFFFE and resultFma[i] & 0xFFFFFFFE and resultCPU[i] & 0xFFFFFFFE are not bitwise equal for i = 0..99 "
KHR-GLES31.core.gpu_shader5.fma_precision_float,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
# Lots of errors like "[279] Check failed. Received: [3,0,0,2] instead of: [5,0,0,2]"
KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail

View File

@ -823,14 +823,16 @@ ir3_link_stream_out(struct ir3_shader_linkage *l,
continue;
for (idx = 0; idx < l->cnt; idx++) {
if (l->var[idx].regid == v->outputs[k].regid)
if (l->var[idx].slot == v->outputs[k].slot)
break;
nextloc = MAX2(nextloc, l->var[idx].loc + 4);
}
/* add if not already in linkage map: */
if (idx == l->cnt)
ir3_link_add(l, v->outputs[k].regid, compmask, nextloc);
if (idx == l->cnt) {
ir3_link_add(l, v->outputs[k].slot, v->outputs[k].regid,
compmask, nextloc);
}
/* expand component-mask if needed, ie streaming out all components
* but frag shader doesn't consume all components:

View File

@ -891,6 +891,7 @@ struct ir3_shader_linkage {
/* Map from VS output to location. */
struct {
uint8_t slot;
uint8_t regid;
uint8_t compmask;
uint8_t loc;
@ -907,8 +908,8 @@ struct ir3_shader_linkage {
};
static inline void
ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask,
uint8_t loc)
ir3_link_add(struct ir3_shader_linkage *l, uint8_t slot, uint8_t regid_,
uint8_t compmask, uint8_t loc)
{
for (int j = 0; j < util_last_bit(compmask); j++) {
uint8_t comploc = loc + j;
@ -921,6 +922,7 @@ ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask,
int i = l->cnt++;
debug_assert(i < ARRAY_SIZE(l->var));
l->var[i].slot = slot;
l->var[i].regid = regid_;
l->var[i].compmask = compmask;
l->var[i].loc = loc;
@ -974,7 +976,8 @@ ir3_link_shaders(struct ir3_shader_linkage *l,
if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1)
l->clip1_loc = fs->inputs[j].inloc;
ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid,
ir3_link_add(l, fs->inputs[j].slot,
k >= 0 ? vs->outputs[k].regid : default_regid,
fs->inputs[j].compmask, fs->inputs[j].inloc);
}
}

View File

@ -774,7 +774,7 @@ tu6_setup_streamout(struct tu_cs *cs,
* a bit less ideal here..
*/
for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid)
if (l->var[idx].slot == v->outputs[k].slot)
break;
debug_assert(idx < l->cnt);
@ -1006,12 +1006,12 @@ tu6_emit_vpc(struct tu_cs *cs,
if (layer_regid != regid(63, 0)) {
layer_loc = linkage.max_loc;
ir3_link_add(&linkage, layer_regid, 0x1, linkage.max_loc);
ir3_link_add(&linkage, VARYING_SLOT_LAYER, layer_regid, 0x1, linkage.max_loc);
}
if (view_regid != regid(63, 0)) {
view_loc = linkage.max_loc;
ir3_link_add(&linkage, view_regid, 0x1, linkage.max_loc);
ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
}
unsigned extra_pos = 0;
@ -1023,14 +1023,15 @@ tu6_emit_vpc(struct tu_cs *cs,
if (position_loc == 0xff)
position_loc = linkage.max_loc;
ir3_link_add(&linkage, last_shader->outputs[i].regid,
ir3_link_add(&linkage, last_shader->outputs[i].slot,
last_shader->outputs[i].regid,
0xf, position_loc + 4 * last_shader->outputs[i].view);
extra_pos = MAX2(extra_pos, last_shader->outputs[i].view);
}
if (pointsize_regid != regid(63, 0)) {
pointsize_loc = linkage.max_loc;
ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc);
ir3_link_add(&linkage, VARYING_SLOT_PSIZ, pointsize_regid, 0x1, linkage.max_loc);
}
uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
@ -1039,11 +1040,13 @@ tu6_emit_vpc(struct tu_cs *cs,
uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
clip0_loc = linkage.max_loc;
ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc);
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, linkage.max_loc);
}
if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
clip1_loc = linkage.max_loc;
ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc);
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, linkage.max_loc);
}
tu6_setup_streamout(cs, last_shader, &linkage);
@ -1054,7 +1057,7 @@ tu6_emit_vpc(struct tu_cs *cs,
* any unused code and make sure that optimizations don't remove it.
*/
if (linkage.cnt == 0)
ir3_link_add(&linkage, 0, 0x1, linkage.max_loc);
ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
/* map outputs of the last shader to VPC */
assert(linkage.cnt <= 32);

View File

@ -105,7 +105,7 @@ emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
* a bit less ideal here..
*/
for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid)
if (l->var[idx].slot == v->outputs[k].slot)
break;
debug_assert(idx < l->cnt);
@ -408,11 +408,11 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
/* a5xx appends pos/psize to end of the linkage map: */
if (VALIDREG(pos_regid))
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
if (VALIDREG(psize_regid)) {
psize_loc = l.max_loc;
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
}
/* Handle the case where clip/cull distances aren't read by the FS. Make
@ -422,13 +422,15 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
(clip_cull_mask & 0xf) != 0) {
clip0_loc = l.max_loc;
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, l.max_loc);
}
if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
(clip_cull_mask >> 4) != 0) {
clip1_loc = l.max_loc;
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, l.max_loc);
}
/* If we have stream-out, we use the full shader for binning

View File

@ -210,7 +210,7 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
* a bit less ideal here..
*/
for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid)
if (l->var[idx].slot == v->outputs[k].slot)
break;
debug_assert(idx < l->cnt);
@ -560,17 +560,17 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
if (VALIDREG(layer_regid)) {
layer_loc = l.max_loc;
ir3_link_add(&l, layer_regid, 0x1, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_LAYER, layer_regid, 0x1, l.max_loc);
}
if (VALIDREG(pos_regid)) {
pos_loc = l.max_loc;
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
}
if (VALIDREG(psize_regid)) {
psize_loc = l.max_loc;
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
}
/* Handle the case where clip/cull distances aren't read by the FS. Make
@ -580,13 +580,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
(clip_cull_mask & 0xf) != 0) {
clip0_loc = l.max_loc;
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, l.max_loc);
}
if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
(clip_cull_mask >> 4) != 0) {
clip1_loc = l.max_loc;
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc);
ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, l.max_loc);
}
/* If we have stream-out, we use the full shader for binning