ir3: set UL flag before ir3_lower_subgroups

ir3_legalize_relative, extracted from ir3_legalize, assumes a0 is loaded
first in each block if there is any user in the block.
ir3_lower_subgroups breaks the assumption.  We need to do
ir3_legalize_relative first.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6902
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17720>
This commit is contained in:
Chia-I Wu 2022-07-22 10:33:20 -07:00 committed by Marge Bot
parent 7023cab093
commit 8001c78d49
5 changed files with 73 additions and 17 deletions

View File

@ -1865,6 +1865,7 @@ bool ir3_lower_subgroups(struct ir3 *ir);
/* legalize: */
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
bool ir3_legalize_relative(struct ir3 *ir);
static inline bool
ir3_has_latency_to_hide(struct ir3 *ir)

View File

@ -4892,6 +4892,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
IR3_PASS(ir, ir3_postsched, so);
IR3_PASS(ir, ir3_legalize_relative);
IR3_PASS(ir, ir3_lower_subgroups);
if (so->type == MESA_SHADER_FRAGMENT)

View File

@ -39,7 +39,7 @@
* 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
* based on state flowing out of predecessor blocks until there is
* no further change. In some cases this requires inserting nops.
* 2) Mark (ei) on last varying input, and (ul) on last use of a0.x
* 2) Mark (ei) on last varying input
* 3) Final nop scheduling for instruction latency
* 4) Resolve jumps and schedule blocks, marking potential convergence
* points with (jp)
@ -88,7 +88,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
if (bd->valid)
return false;
struct ir3_instruction *last_rel = NULL;
struct ir3_instruction *last_n = NULL;
struct list_head instr_list;
struct ir3_legalize_state prev_state = bd->state;
@ -207,13 +206,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
regmask_init(&state->needs_sy, mergedregs);
}
}
/* TODO: is it valid to have address reg loaded from a
* relative src (ie. mova a0, c<a0.x+4>)? If so, the
* last_rel check below should be moved ahead of this:
*/
if (reg->flags & IR3_REG_RELATIV)
last_rel = n;
}
foreach_dst (reg, n) {
@ -223,11 +215,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
}
if (last_rel && (reg->num == regid(REG_A0, 0))) {
last_rel->flags |= IR3_INSTR_UL;
last_rel = NULL;
}
}
/* cat5+ does not have an (ss) bit, if needed we need to
@ -367,9 +354,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
list_add(&baryf->node, &block->instr_list);
}
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;
bd->valid = true;
if (memcmp(&prev_state, state, sizeof(*state))) {

View File

@ -0,0 +1,69 @@
/*
* Copyright 2022 Google LLC
* SPDX-License-Identifier: MIT
*/
#include "ir3.h"
/*
* Mark (ul) on the last user of a0 before a0 is loaded again. emit_block
* makes sure a0 is loaded first if there is any user in the block. This
* allows us to process each block independently.
*
* Note that this must be called before passes that break the assumption, such
* as ir3_lower_subgroups.
*/
static bool
is_reg_relative(const struct ir3_instruction *instr)
{
foreach_dst (reg, instr) {
if (reg->flags & IR3_REG_RELATIV)
return true;
}
foreach_src (reg, instr) {
if (reg->flags & IR3_REG_RELATIV)
return true;
}
return false;
}
static bool
is_dst_a0(const struct ir3_instruction *instr)
{
foreach_dst (reg, instr) {
if (reg->num == regid(REG_A0, 0))
return true;
}
return false;
}
bool
ir3_legalize_relative(struct ir3 *ir)
{
foreach_block (block, &ir->block_list) {
struct ir3_instruction *last_user = NULL;
foreach_instr (instr, &block->instr_list) {
if (is_reg_relative(instr))
last_user = instr;
/* Is it valid to have address reg loaded from a relative src (ie.
* mova a0, c<a0.x+4>)? This marks the load (ul), which may or may
* not be valid.
*/
if (last_user && is_dst_a0(instr)) {
last_user->flags |= IR3_INSTR_UL;
last_user = NULL;
}
}
if (last_user)
last_user->flags |= IR3_INSTR_UL;
}
return true;
}

View File

@ -85,6 +85,7 @@ libfreedreno_ir3_files = files(
'ir3_image.h',
'ir3.h',
'ir3_legalize.c',
'ir3_legalize_relative.c',
'ir3_liveness.c',
'ir3_lower_parallelcopy.c',
'ir3_lower_spill.c',