nir/opt_offsets: Use nir_ssa_scalar to chase offset additions.

For nir_to_tgsi, I want to be able to fold into the base from a vector
load_const, which the ad-hoc scalar chasing couldn't handle.

r300:
total instructions in shared programs: 1278731 -> 1256502 (-1.74%)
instructions in affected programs: 457909 -> 435680 (-4.85%)
total flowcontrol in shared programs: 8316 -> 8313 (-0.04%)
flowcontrol in affected programs: 5 -> 2 (-60.00%)
total temps in shared programs: 213687 -> 213774 (0.04%)
temps in affected programs: 13140 -> 13227 (0.66%)
total consts in shared programs: 952850 -> 949929 (-0.31%)
consts in affected programs: 386352 -> 383431 (-0.76%)

Fixes: #5781
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14309>
This commit is contained in:
Emma Anholt 2021-12-23 17:28:40 -08:00 committed by Marge Bot
parent 1048e6113e
commit cac6f633b2
1 changed files with 42 additions and 27 deletions

View File

@ -34,17 +34,26 @@ typedef struct
const nir_opt_offsets_options *options;
} opt_offsets_state;
static nir_ssa_def *
try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *state, unsigned *out_const, uint32_t max)
static nir_ssa_scalar
try_extract_const_addition(nir_builder *b, nir_ssa_scalar val, opt_offsets_state *state, unsigned *out_const, uint32_t max)
{
if (instr->type != nir_instr_type_alu)
return NULL;
val = nir_ssa_scalar_chase_movs(val);
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (!nir_ssa_scalar_is_alu(val))
return val;
nir_alu_instr *alu = nir_instr_as_alu(val.def->parent_instr);
if (alu->op != nir_op_iadd ||
!nir_alu_src_is_trivial_ssa(alu, 0) ||
!nir_alu_src_is_trivial_ssa(alu, 1))
return NULL;
!alu->src[0].src.is_ssa ||
!alu->src[1].src.is_ssa ||
alu->src[0].negate || alu->src[0].abs ||
alu->src[1].negate || alu->src[1].abs)
return val;
nir_ssa_scalar src[2] = {
{alu->src[0].src.ssa, alu->src[0].swizzle[val.comp]},
{alu->src[1].src.ssa, alu->src[1].swizzle[val.comp]},
};
/* Make sure that we aren't taking out an addition that could trigger
* unsigned wrapping in a way that would change the semantics of the load.
@ -58,39 +67,38 @@ try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *
}
/* Check if there can really be an unsigned wrap. */
nir_ssa_scalar src0 = {alu->src[0].src.ssa, 0};
nir_ssa_scalar src1 = {alu->src[1].src.ssa, 0};
uint32_t ub0 = nir_unsigned_upper_bound(b->shader, state->range_ht, src0, NULL);
uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, src1, NULL);
uint32_t ub0 = nir_unsigned_upper_bound(b->shader, state->range_ht, src[0], NULL);
uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, src[1], NULL);
if ((UINT32_MAX - ub0) < ub1)
return NULL;
return val;
/* We proved that unsigned wrap won't be possible, so we can set the flag too. */
alu->no_unsigned_wrap = true;
}
for (unsigned i = 0; i < 2; ++i) {
if (nir_src_is_const(alu->src[i].src)) {
uint32_t offset = nir_src_as_uint(alu->src[i].src);
src[i] = nir_ssa_scalar_chase_movs(src[i]);
if (nir_ssa_scalar_is_const(src[i])) {
uint32_t offset = nir_ssa_scalar_as_uint(src[i]);
if (offset + *out_const <= max) {
*out_const += offset;
nir_ssa_def *replace_src =
try_extract_const_addition(b, alu->src[1 - i].src.ssa->parent_instr, state, out_const, max);
return replace_src ? replace_src : alu->src[1 - i].src.ssa;
return try_extract_const_addition(b, src[1 - i], state, out_const, max);
}
}
}
nir_ssa_def *replace_src0 = try_extract_const_addition(b, alu->src[0].src.ssa->parent_instr, state, out_const, max);
nir_ssa_def *replace_src1 = try_extract_const_addition(b, alu->src[1].src.ssa->parent_instr, state, out_const, max);
if (!replace_src0 && !replace_src1)
return NULL;
uint32_t orig_offset = *out_const;
src[0] = try_extract_const_addition(b, src[0], state, out_const, max);
src[1] = try_extract_const_addition(b, src[1], state, out_const, max);
if (*out_const == orig_offset)
return val;
b->cursor = nir_before_instr(&alu->instr);
replace_src0 = replace_src0 ? replace_src0 : nir_ssa_for_alu_src(b, alu, 0);
replace_src1 = replace_src1 ? replace_src1 : nir_ssa_for_alu_src(b, alu, 1);
return nir_iadd(b, replace_src0, replace_src1);
nir_ssa_def *r =
nir_iadd(b, nir_channel(b, src[0].def, src[0].comp),
nir_channel(b, src[1].def, src[1].comp));
return (nir_ssa_scalar){r, 0};
}
static bool
@ -113,8 +121,15 @@ try_fold_load_store(nir_builder *b,
return false;
if (!nir_src_is_const(*off_src)) {
replace_src = try_extract_const_addition(b, off_src->ssa->parent_instr, state, &off_const, max);
} else if (nir_src_as_uint(*off_src) && nir_src_as_uint(*off_src) < max) {
uint32_t add_offset = 0;
nir_ssa_scalar val = {.def = off_src->ssa, .comp = 0};
val = try_extract_const_addition(b, val, state, &add_offset, max);
if (add_offset == 0)
return false;
off_const += add_offset;
b->cursor = nir_before_instr(&intrin->instr);
replace_src = nir_channel(b, val.def, val.comp);
} else if (nir_src_as_uint(*off_src) && off_const + nir_src_as_uint(*off_src) <= max) {
off_const += nir_src_as_uint(*off_src);
b->cursor = nir_before_instr(&intrin->instr);
replace_src = nir_imm_zero(b, off_src->ssa->num_components, off_src->ssa->bit_size);