ir3: set UL flag before ir3_lower_subgroups
ir3_legalize_relative, extracted from ir3_legalize, assumes a0 is loaded first in each block if there is any user in the block. ir3_lower_subgroups breaks the assumption. We need to do ir3_legalize_relative first. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6902 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17720>
This commit is contained in:
parent
7023cab093
commit
8001c78d49
|
@ -1865,6 +1865,7 @@ bool ir3_lower_subgroups(struct ir3 *ir);
|
||||||
|
|
||||||
/* legalize: */
|
/* legalize: */
|
||||||
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
|
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
|
||||||
|
bool ir3_legalize_relative(struct ir3 *ir);
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
ir3_has_latency_to_hide(struct ir3 *ir)
|
ir3_has_latency_to_hide(struct ir3 *ir)
|
||||||
|
|
|
@ -4892,6 +4892,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
||||||
|
|
||||||
IR3_PASS(ir, ir3_postsched, so);
|
IR3_PASS(ir, ir3_postsched, so);
|
||||||
|
|
||||||
|
IR3_PASS(ir, ir3_legalize_relative);
|
||||||
IR3_PASS(ir, ir3_lower_subgroups);
|
IR3_PASS(ir, ir3_lower_subgroups);
|
||||||
|
|
||||||
if (so->type == MESA_SHADER_FRAGMENT)
|
if (so->type == MESA_SHADER_FRAGMENT)
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
* 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
|
* 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
|
||||||
* based on state flowing out of predecessor blocks until there is
|
* based on state flowing out of predecessor blocks until there is
|
||||||
* no further change. In some cases this requires inserting nops.
|
* no further change. In some cases this requires inserting nops.
|
||||||
* 2) Mark (ei) on last varying input, and (ul) on last use of a0.x
|
* 2) Mark (ei) on last varying input
|
||||||
* 3) Final nop scheduling for instruction latency
|
* 3) Final nop scheduling for instruction latency
|
||||||
* 4) Resolve jumps and schedule blocks, marking potential convergence
|
* 4) Resolve jumps and schedule blocks, marking potential convergence
|
||||||
* points with (jp)
|
* points with (jp)
|
||||||
|
@ -88,7 +88,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||||
if (bd->valid)
|
if (bd->valid)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
struct ir3_instruction *last_rel = NULL;
|
|
||||||
struct ir3_instruction *last_n = NULL;
|
struct ir3_instruction *last_n = NULL;
|
||||||
struct list_head instr_list;
|
struct list_head instr_list;
|
||||||
struct ir3_legalize_state prev_state = bd->state;
|
struct ir3_legalize_state prev_state = bd->state;
|
||||||
|
@ -207,13 +206,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||||
regmask_init(&state->needs_sy, mergedregs);
|
regmask_init(&state->needs_sy, mergedregs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: is it valid to have address reg loaded from a
|
|
||||||
* relative src (ie. mova a0, c<a0.x+4>)? If so, the
|
|
||||||
* last_rel check below should be moved ahead of this:
|
|
||||||
*/
|
|
||||||
if (reg->flags & IR3_REG_RELATIV)
|
|
||||||
last_rel = n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach_dst (reg, n) {
|
foreach_dst (reg, n) {
|
||||||
|
@ -223,11 +215,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||||
regmask_init(&state->needs_ss_war, mergedregs);
|
regmask_init(&state->needs_ss_war, mergedregs);
|
||||||
regmask_init(&state->needs_ss, mergedregs);
|
regmask_init(&state->needs_ss, mergedregs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (last_rel && (reg->num == regid(REG_A0, 0))) {
|
|
||||||
last_rel->flags |= IR3_INSTR_UL;
|
|
||||||
last_rel = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cat5+ does not have an (ss) bit, if needed we need to
|
/* cat5+ does not have an (ss) bit, if needed we need to
|
||||||
|
@ -367,9 +354,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
||||||
list_add(&baryf->node, &block->instr_list);
|
list_add(&baryf->node, &block->instr_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (last_rel)
|
|
||||||
last_rel->flags |= IR3_INSTR_UL;
|
|
||||||
|
|
||||||
bd->valid = true;
|
bd->valid = true;
|
||||||
|
|
||||||
if (memcmp(&prev_state, state, sizeof(*state))) {
|
if (memcmp(&prev_state, state, sizeof(*state))) {
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2022 Google LLC
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ir3.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark (ul) on the last user of a0 before a0 is loaded again. emit_block
|
||||||
|
* makes sure a0 is loaded first if there is any user in the block. This
|
||||||
|
* allows us to process each block independently.
|
||||||
|
*
|
||||||
|
* Note that this must be called before passes that break the assumption, such
|
||||||
|
* as ir3_lower_subgroups.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_reg_relative(const struct ir3_instruction *instr)
|
||||||
|
{
|
||||||
|
foreach_dst (reg, instr) {
|
||||||
|
if (reg->flags & IR3_REG_RELATIV)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach_src (reg, instr) {
|
||||||
|
if (reg->flags & IR3_REG_RELATIV)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_dst_a0(const struct ir3_instruction *instr)
|
||||||
|
{
|
||||||
|
foreach_dst (reg, instr) {
|
||||||
|
if (reg->num == regid(REG_A0, 0))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ir3_legalize_relative(struct ir3 *ir)
|
||||||
|
{
|
||||||
|
foreach_block (block, &ir->block_list) {
|
||||||
|
struct ir3_instruction *last_user = NULL;
|
||||||
|
|
||||||
|
foreach_instr (instr, &block->instr_list) {
|
||||||
|
if (is_reg_relative(instr))
|
||||||
|
last_user = instr;
|
||||||
|
|
||||||
|
/* Is it valid to have address reg loaded from a relative src (ie.
|
||||||
|
* mova a0, c<a0.x+4>)? This marks the load (ul), which may or may
|
||||||
|
* not be valid.
|
||||||
|
*/
|
||||||
|
if (last_user && is_dst_a0(instr)) {
|
||||||
|
last_user->flags |= IR3_INSTR_UL;
|
||||||
|
last_user = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_user)
|
||||||
|
last_user->flags |= IR3_INSTR_UL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -85,6 +85,7 @@ libfreedreno_ir3_files = files(
|
||||||
'ir3_image.h',
|
'ir3_image.h',
|
||||||
'ir3.h',
|
'ir3.h',
|
||||||
'ir3_legalize.c',
|
'ir3_legalize.c',
|
||||||
|
'ir3_legalize_relative.c',
|
||||||
'ir3_liveness.c',
|
'ir3_liveness.c',
|
||||||
'ir3_lower_parallelcopy.c',
|
'ir3_lower_parallelcopy.c',
|
||||||
'ir3_lower_spill.c',
|
'ir3_lower_spill.c',
|
||||||
|
|
Loading…
Reference in New Issue