freedreno/ir3: limit pre-fetched tex dest
Teach RA to setup additional interference to prevent textures fetched
before the FS starts from ending up in a register that is too high to
encode.
Fixes mis-rendering in multiple playcanv.as webgl apps.
Note that the regression was not actually 733bee57eb8's fault, but
that was the commit that exposed the problem.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3108
Fixes: 733bee57eb
("glsl: lower samplers with highp coordinates correctly")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5431>
This commit is contained in:
parent
f80092dad2
commit
ee29c682fe
|
@ -563,6 +563,9 @@ ra_init(struct ir3_ra_ctx *ctx)
|
|||
ctx->hr0_xyz_nodes = ctx->alloc_count;
|
||||
ctx->alloc_count += 3;
|
||||
|
||||
/* Add vreg name for prefetch-exclusion range: */
|
||||
ctx->prefetch_exclude_node = ctx->alloc_count++;
|
||||
|
||||
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
|
||||
ralloc_steal(ctx->g, ctx->instrd);
|
||||
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
|
||||
|
@ -711,11 +714,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
*/
|
||||
if (is_tex_or_prefetch(instr)) {
|
||||
int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
|
||||
int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
|
||||
int r0_xyz = is_half(instr) ?
|
||||
ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
|
||||
for (int i = 0; i < writemask_skipped_regs; i++)
|
||||
ra_add_node_interference(ctx->g, name, r0_xyz + i);
|
||||
}
|
||||
|
||||
/* Pre-fetched textures have a lower limit for bits to encode dst
|
||||
* register, so add additional interference with registers above
|
||||
* that limit.
|
||||
*/
|
||||
if (instr->opc == OPC_META_TEX_PREFETCH) {
|
||||
ra_add_node_interference(ctx->g, name,
|
||||
ctx->prefetch_exclude_node);
|
||||
}
|
||||
}
|
||||
|
||||
foreach_use (name, ctx, instr) {
|
||||
|
@ -1011,7 +1023,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
|
|||
arr->end_ip = 0;
|
||||
}
|
||||
|
||||
|
||||
/* set up the r0.xyz precolor regs. */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
|
||||
|
@ -1019,6 +1030,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
|
|||
ctx->set->first_half_reg + i);
|
||||
}
|
||||
|
||||
/* pre-color node that conflict with half/full regs higher than what
|
||||
* can be encoded for tex-prefetch:
|
||||
*/
|
||||
ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
|
||||
ctx->set->prefetch_exclude_reg);
|
||||
|
||||
/* compute live ranges (use/def) on a block level, also updating
|
||||
* block's def/use bitmasks (used below to calculate per-block
|
||||
* livein/liveout):
|
||||
|
|
|
@ -89,6 +89,14 @@ struct ir3_ra_reg_set {
|
|||
unsigned int half_classes[half_class_count];
|
||||
unsigned int high_classes[high_class_count];
|
||||
|
||||
/* pre-fetched tex dst is limited, on current gens to regs
|
||||
* 0x3f and below. An additional register class, with one
|
||||
* vreg, that is setup to conflict with any regs above that
|
||||
* limit.
|
||||
*/
|
||||
unsigned prefetch_exclude_class;
|
||||
unsigned prefetch_exclude_reg;
|
||||
|
||||
/* The virtual register space flattens out all the classes,
|
||||
* starting with full, followed by half and then high, ie:
|
||||
*
|
||||
|
@ -145,7 +153,8 @@ struct ir3_ra_ctx {
|
|||
|
||||
unsigned alloc_count;
|
||||
unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
|
||||
unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
|
||||
unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
|
||||
unsigned prefetch_exclude_node;
|
||||
/* one per class, plus one slot for arrays: */
|
||||
unsigned class_alloc_count[total_class_count + 1];
|
||||
unsigned class_base[total_class_count + 1];
|
||||
|
|
|
@ -70,6 +70,21 @@ setup_conflicts(struct ir3_ra_reg_set *set)
|
|||
reg++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup conflicts with registers over 0x3f for the special vreg
|
||||
* that exists to use as interference for tex-prefetch:
|
||||
*/
|
||||
|
||||
for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
|
||||
ra_add_transitive_reg_conflict(set->regs, i,
|
||||
set->prefetch_exclude_reg);
|
||||
}
|
||||
|
||||
for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
|
||||
ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
|
||||
set->prefetch_exclude_reg);
|
||||
}
|
||||
}
|
||||
|
||||
/* One-time setup of RA register-set, which describes all the possible
|
||||
|
@ -104,6 +119,8 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
|
|||
for (unsigned i = 0; i < high_class_count; i++)
|
||||
ra_reg_count += HIGH_CLASS_REGS(i);
|
||||
|
||||
ra_reg_count += 1; /* for tex-prefetch excludes */
|
||||
|
||||
/* allocate the reg-set.. */
|
||||
set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
|
||||
set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
|
||||
|
@ -164,7 +181,20 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
|
|||
}
|
||||
}
|
||||
|
||||
/* starting a6xx, half precision regs conflict w/ full precision regs: */
|
||||
/*
|
||||
* Setup an additional class, with one vreg, to simply conflict
|
||||
* with registers that are too high to encode tex-prefetch. This
|
||||
* vreg is only used to setup additional conflicts so that RA
|
||||
* knows to allocate prefetch dst regs below the limit:
|
||||
*/
|
||||
set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
|
||||
ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
|
||||
set->prefetch_exclude_reg = reg++;
|
||||
|
||||
/*
|
||||
* And finally setup conflicts. Starting a6xx, half precision regs
|
||||
* conflict w/ full precision regs (when using MERGEDREGS):
|
||||
*/
|
||||
if (compiler->gpu_id >= 600) {
|
||||
for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
|
||||
unsigned freg = set->gpr_to_ra_reg[0][i];
|
||||
|
|
Loading…
Reference in New Issue