freedreno/ir3: limit pre-fetched tex dest

Teach RA to setup additional interference to prevent textures fetched
before the FS starts from ending up in a register that is too high to
encode.

Fixes mis-rendering in multiple playcanv.as webgl apps.

Note that the regression was not actually 733bee57eb8's fault, but
that was the commit that exposed the problem.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3108
Fixes: 733bee57eb ("glsl: lower samplers with highp coordinates correctly")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5431>
This commit is contained in:
Rob Clark 2020-06-11 12:03:03 -07:00 committed by Marge Bot
parent f80092dad2
commit ee29c682fe
3 changed files with 60 additions and 4 deletions

View File

@ -563,6 +563,9 @@ ra_init(struct ir3_ra_ctx *ctx)
ctx->hr0_xyz_nodes = ctx->alloc_count;
ctx->alloc_count += 3;
/* Add vreg name for prefetch-exclusion range: */
ctx->prefetch_exclude_node = ctx->alloc_count++;
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@ -711,11 +714,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (is_tex_or_prefetch(instr)) {
int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
int r0_xyz = is_half(instr) ?
ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
for (int i = 0; i < writemask_skipped_regs; i++)
ra_add_node_interference(ctx->g, name, r0_xyz + i);
}
/* Pre-fetched textures have a lower limit for bits to encode dst
* register, so add additional interference with registers above
* that limit.
*/
if (instr->opc == OPC_META_TEX_PREFETCH) {
ra_add_node_interference(ctx->g, name,
ctx->prefetch_exclude_node);
}
}
foreach_use (name, ctx, instr) {
@ -1011,7 +1023,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
arr->end_ip = 0;
}
/* set up the r0.xyz precolor regs. */
for (int i = 0; i < 3; i++) {
ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
@ -1019,6 +1030,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
ctx->set->first_half_reg + i);
}
/* pre-color node that conflict with half/full regs higher than what
* can be encoded for tex-prefetch:
*/
ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
ctx->set->prefetch_exclude_reg);
/* compute live ranges (use/def) on a block level, also updating
* block's def/use bitmasks (used below to calculate per-block
* livein/liveout):

View File

@ -89,6 +89,14 @@ struct ir3_ra_reg_set {
unsigned int half_classes[half_class_count];
unsigned int high_classes[high_class_count];
/* pre-fetched tex dst is limited, on current gens to regs
* 0x3f and below. An additional register class, with one
* vreg, that is setup to conflict with any regs above that
* limit.
*/
unsigned prefetch_exclude_class;
unsigned prefetch_exclude_reg;
/* The virtual register space flattens out all the classes,
* starting with full, followed by half and then high, ie:
*
@ -145,7 +153,8 @@ struct ir3_ra_ctx {
unsigned alloc_count;
unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
unsigned prefetch_exclude_node;
/* one per class, plus one slot for arrays: */
unsigned class_alloc_count[total_class_count + 1];
unsigned class_base[total_class_count + 1];

View File

@ -70,6 +70,21 @@ setup_conflicts(struct ir3_ra_reg_set *set)
reg++;
}
}
/*
* Setup conflicts with registers over 0x3f for the special vreg
* that exists to use as interference for tex-prefetch:
*/
for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
ra_add_transitive_reg_conflict(set->regs, i,
set->prefetch_exclude_reg);
}
for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
set->prefetch_exclude_reg);
}
}
/* One-time setup of RA register-set, which describes all the possible
@ -104,6 +119,8 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
for (unsigned i = 0; i < high_class_count; i++)
ra_reg_count += HIGH_CLASS_REGS(i);
ra_reg_count += 1; /* for tex-prefetch excludes */
/* allocate the reg-set.. */
set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
@ -164,7 +181,20 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
}
}
/* starting a6xx, half precision regs conflict w/ full precision regs: */
/*
* Setup an additional class, with one vreg, to simply conflict
* with registers that are too high to encode tex-prefetch. This
* vreg is only used to setup additional conflicts so that RA
* knows to allocate prefetch dst regs below the limit:
*/
set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
set->prefetch_exclude_reg = reg++;
/*
* And finally setup conflicts. Starting a6xx, half precision regs
* conflict w/ full precision regs (when using MERGEDREGS):
*/
if (compiler->gpu_id >= 600) {
for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
unsigned freg = set->gpr_to_ra_reg[0][i];