freedreno/ir3: xfb fix for duplicate outputs

We can't rely on regid to be unique, shaders can have multiple varyings
with the same output value.  Normally shader linking deduplicates these,
but we still need to handle the case for xfb.  So use slot instead as
the unique identifier.

Fixes KHR-GLES31.core.gpu_shader5.fma_precision_*

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13605>
This commit is contained in:
Rob Clark 2021-10-30 10:51:59 -07:00 committed by Marge Bot
parent f6f760a98d
commit 7e998783db
6 changed files with 37 additions and 31 deletions

View File

@ -14,12 +14,6 @@ KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
# "*** Color comparison failed" # "*** Color comparison failed"
KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail
# "The values of resultStd[i] & 0xFFFFFFFE and resultFma[i] & 0xFFFFFFFE and resultCPU[i] & 0xFFFFFFFE are not bitwise equal for i = 0..99 "
KHR-GLES31.core.gpu_shader5.fma_precision_float,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
# Lots of errors like "[279] Check failed. Received: [3,0,0,2] instead of: [5,0,0,2]" # Lots of errors like "[279] Check failed. Received: [3,0,0,2] instead of: [5,0,0,2]"
KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail

View File

@ -823,14 +823,16 @@ ir3_link_stream_out(struct ir3_shader_linkage *l,
continue; continue;
for (idx = 0; idx < l->cnt; idx++) { for (idx = 0; idx < l->cnt; idx++) {
if (l->var[idx].regid == v->outputs[k].regid) if (l->var[idx].slot == v->outputs[k].slot)
break; break;
nextloc = MAX2(nextloc, l->var[idx].loc + 4); nextloc = MAX2(nextloc, l->var[idx].loc + 4);
} }
/* add if not already in linkage map: */ /* add if not already in linkage map: */
if (idx == l->cnt) if (idx == l->cnt) {
ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); ir3_link_add(l, v->outputs[k].slot, v->outputs[k].regid,
compmask, nextloc);
}
/* expand component-mask if needed, ie streaming out all components /* expand component-mask if needed, ie streaming out all components
* but frag shader doesn't consume all components: * but frag shader doesn't consume all components:

View File

@ -891,6 +891,7 @@ struct ir3_shader_linkage {
/* Map from VS output to location. */ /* Map from VS output to location. */
struct { struct {
uint8_t slot;
uint8_t regid; uint8_t regid;
uint8_t compmask; uint8_t compmask;
uint8_t loc; uint8_t loc;
@ -907,8 +908,8 @@ struct ir3_shader_linkage {
}; };
static inline void static inline void
ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask, ir3_link_add(struct ir3_shader_linkage *l, uint8_t slot, uint8_t regid_,
uint8_t loc) uint8_t compmask, uint8_t loc)
{ {
for (int j = 0; j < util_last_bit(compmask); j++) { for (int j = 0; j < util_last_bit(compmask); j++) {
uint8_t comploc = loc + j; uint8_t comploc = loc + j;
@ -921,6 +922,7 @@ ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask,
int i = l->cnt++; int i = l->cnt++;
debug_assert(i < ARRAY_SIZE(l->var)); debug_assert(i < ARRAY_SIZE(l->var));
l->var[i].slot = slot;
l->var[i].regid = regid_; l->var[i].regid = regid_;
l->var[i].compmask = compmask; l->var[i].compmask = compmask;
l->var[i].loc = loc; l->var[i].loc = loc;
@ -974,7 +976,8 @@ ir3_link_shaders(struct ir3_shader_linkage *l,
if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1) if (fs->inputs[j].slot == VARYING_SLOT_CLIP_DIST1)
l->clip1_loc = fs->inputs[j].inloc; l->clip1_loc = fs->inputs[j].inloc;
ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid, ir3_link_add(l, fs->inputs[j].slot,
k >= 0 ? vs->outputs[k].regid : default_regid,
fs->inputs[j].compmask, fs->inputs[j].inloc); fs->inputs[j].compmask, fs->inputs[j].inloc);
} }
} }

View File

@ -774,7 +774,7 @@ tu6_setup_streamout(struct tu_cs *cs,
* a bit less ideal here.. * a bit less ideal here..
*/ */
for (idx = 0; idx < l->cnt; idx++) for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid) if (l->var[idx].slot == v->outputs[k].slot)
break; break;
debug_assert(idx < l->cnt); debug_assert(idx < l->cnt);
@ -1006,12 +1006,12 @@ tu6_emit_vpc(struct tu_cs *cs,
if (layer_regid != regid(63, 0)) { if (layer_regid != regid(63, 0)) {
layer_loc = linkage.max_loc; layer_loc = linkage.max_loc;
ir3_link_add(&linkage, layer_regid, 0x1, linkage.max_loc); ir3_link_add(&linkage, VARYING_SLOT_LAYER, layer_regid, 0x1, linkage.max_loc);
} }
if (view_regid != regid(63, 0)) { if (view_regid != regid(63, 0)) {
view_loc = linkage.max_loc; view_loc = linkage.max_loc;
ir3_link_add(&linkage, view_regid, 0x1, linkage.max_loc); ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
} }
unsigned extra_pos = 0; unsigned extra_pos = 0;
@ -1023,14 +1023,15 @@ tu6_emit_vpc(struct tu_cs *cs,
if (position_loc == 0xff) if (position_loc == 0xff)
position_loc = linkage.max_loc; position_loc = linkage.max_loc;
ir3_link_add(&linkage, last_shader->outputs[i].regid, ir3_link_add(&linkage, last_shader->outputs[i].slot,
last_shader->outputs[i].regid,
0xf, position_loc + 4 * last_shader->outputs[i].view); 0xf, position_loc + 4 * last_shader->outputs[i].view);
extra_pos = MAX2(extra_pos, last_shader->outputs[i].view); extra_pos = MAX2(extra_pos, last_shader->outputs[i].view);
} }
if (pointsize_regid != regid(63, 0)) { if (pointsize_regid != regid(63, 0)) {
pointsize_loc = linkage.max_loc; pointsize_loc = linkage.max_loc;
ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc); ir3_link_add(&linkage, VARYING_SLOT_PSIZ, pointsize_regid, 0x1, linkage.max_loc);
} }
uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask; uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
@ -1039,11 +1040,13 @@ tu6_emit_vpc(struct tu_cs *cs,
uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc; uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) { if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
clip0_loc = linkage.max_loc; clip0_loc = linkage.max_loc;
ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc); ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, linkage.max_loc);
} }
if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) { if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
clip1_loc = linkage.max_loc; clip1_loc = linkage.max_loc;
ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc); ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, linkage.max_loc);
} }
tu6_setup_streamout(cs, last_shader, &linkage); tu6_setup_streamout(cs, last_shader, &linkage);
@ -1054,7 +1057,7 @@ tu6_emit_vpc(struct tu_cs *cs,
* any unused code and make sure that optimizations don't remove it. * any unused code and make sure that optimizations don't remove it.
*/ */
if (linkage.cnt == 0) if (linkage.cnt == 0)
ir3_link_add(&linkage, 0, 0x1, linkage.max_loc); ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
/* map outputs of the last shader to VPC */ /* map outputs of the last shader to VPC */
assert(linkage.cnt <= 32); assert(linkage.cnt <= 32);

View File

@ -105,7 +105,7 @@ emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
* a bit less ideal here.. * a bit less ideal here..
*/ */
for (idx = 0; idx < l->cnt; idx++) for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid) if (l->var[idx].slot == v->outputs[k].slot)
break; break;
debug_assert(idx < l->cnt); debug_assert(idx < l->cnt);
@ -408,11 +408,11 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
/* a5xx appends pos/psize to end of the linkage map: */ /* a5xx appends pos/psize to end of the linkage map: */
if (VALIDREG(pos_regid)) if (VALIDREG(pos_regid))
ir3_link_add(&l, pos_regid, 0xf, l.max_loc); ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
if (VALIDREG(psize_regid)) { if (VALIDREG(psize_regid)) {
psize_loc = l.max_loc; psize_loc = l.max_loc;
ir3_link_add(&l, psize_regid, 0x1, l.max_loc); ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
} }
/* Handle the case where clip/cull distances aren't read by the FS. Make /* Handle the case where clip/cull distances aren't read by the FS. Make
@ -422,13 +422,15 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (clip0_loc == 0xff && VALIDREG(clip0_regid) && if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
(clip_cull_mask & 0xf) != 0) { (clip_cull_mask & 0xf) != 0) {
clip0_loc = l.max_loc; clip0_loc = l.max_loc;
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc); ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, l.max_loc);
} }
if (clip1_loc == 0xff && VALIDREG(clip1_regid) && if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
(clip_cull_mask >> 4) != 0) { (clip_cull_mask >> 4) != 0) {
clip1_loc = l.max_loc; clip1_loc = l.max_loc;
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc); ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, l.max_loc);
} }
/* If we have stream-out, we use the full shader for binning /* If we have stream-out, we use the full shader for binning

View File

@ -210,7 +210,7 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state,
* a bit less ideal here.. * a bit less ideal here..
*/ */
for (idx = 0; idx < l->cnt; idx++) for (idx = 0; idx < l->cnt; idx++)
if (l->var[idx].regid == v->outputs[k].regid) if (l->var[idx].slot == v->outputs[k].slot)
break; break;
debug_assert(idx < l->cnt); debug_assert(idx < l->cnt);
@ -560,17 +560,17 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
if (VALIDREG(layer_regid)) { if (VALIDREG(layer_regid)) {
layer_loc = l.max_loc; layer_loc = l.max_loc;
ir3_link_add(&l, layer_regid, 0x1, l.max_loc); ir3_link_add(&l, VARYING_SLOT_LAYER, layer_regid, 0x1, l.max_loc);
} }
if (VALIDREG(pos_regid)) { if (VALIDREG(pos_regid)) {
pos_loc = l.max_loc; pos_loc = l.max_loc;
ir3_link_add(&l, pos_regid, 0xf, l.max_loc); ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc);
} }
if (VALIDREG(psize_regid)) { if (VALIDREG(psize_regid)) {
psize_loc = l.max_loc; psize_loc = l.max_loc;
ir3_link_add(&l, psize_regid, 0x1, l.max_loc); ir3_link_add(&l, VARYING_SLOT_PSIZ, psize_regid, 0x1, l.max_loc);
} }
/* Handle the case where clip/cull distances aren't read by the FS. Make /* Handle the case where clip/cull distances aren't read by the FS. Make
@ -580,13 +580,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
if (clip0_loc == 0xff && VALIDREG(clip0_regid) && if (clip0_loc == 0xff && VALIDREG(clip0_regid) &&
(clip_cull_mask & 0xf) != 0) { (clip_cull_mask & 0xf) != 0) {
clip0_loc = l.max_loc; clip0_loc = l.max_loc;
ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc); ir3_link_add(&l, VARYING_SLOT_CLIP_DIST0, clip0_regid,
clip_cull_mask & 0xf, l.max_loc);
} }
if (clip1_loc == 0xff && VALIDREG(clip1_regid) && if (clip1_loc == 0xff && VALIDREG(clip1_regid) &&
(clip_cull_mask >> 4) != 0) { (clip_cull_mask >> 4) != 0) {
clip1_loc = l.max_loc; clip1_loc = l.max_loc;
ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc); ir3_link_add(&l, VARYING_SLOT_CLIP_DIST1, clip1_regid,
clip_cull_mask >> 4, l.max_loc);
} }
/* If we have stream-out, we use the full shader for binning /* If we have stream-out, we use the full shader for binning