r300: compact scalar uniforms into empty slots

We are not doing this on R5xx unless we have more than 200 constants,
because emitting constants one by one will add extra overhead at emit
time which we want to avoid if possible.

RV410:
total instructions in shared programs: 98778 -> 98703 (-0.08%)
instructions in affected programs: 7106 -> 7031 (-1.06%)
helped: 80
HURT: 25
total presub in shared programs: 2266 -> 2227 (-1.72%)
presub in affected programs: 134 -> 95 (-29.10%)
helped: 22
HURT: 10
total temps in shared programs: 15662 -> 15660 (-0.01%)
temps in affected programs: 330 -> 328 (-0.61%)
helped: 16
HURT: 13
total consts in shared programs: 85632 -> 84400 (-1.44%)
consts in affected programs: 6646 -> 5414 (-18.54%)
helped: 617
HURT: 0
total cycles in shared programs: 156305 -> 156234 (-0.05%)
cycles in affected programs: 14167 -> 14096 (-0.50%)
helped: 79
HURT: 28
LOST:   shaders/ck2/160.shader_test FS
GAINED: shaders/ck2/157.shader_test FS
GAINED: shaders/tropics/249.shader_test FS
GAINED: shaders/tropics/252.shader_test FS

RV530:
total consts in shared programs: 93209 -> 93198 (-0.01%)
consts in affected programs: 72 -> 61 (-15.28%)
helped: 6
HURT: 0

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.comm>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28630>
This commit is contained in:
Pavel Ondračka 2024-03-28 22:49:45 +01:00
parent 5d3483bfe4
commit 11ad056ee9
3 changed files with 50 additions and 8 deletions

View File

@ -91,12 +91,10 @@ dEQP-GLES2.functional.uniform_api.random.81,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.fragment,Fail
# depth texture is sampling as as white instead of red.
KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail

View File

@ -98,12 +98,10 @@ dEQP-GLES2.functional.uniform_api.random.81,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_fragment,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.both,Fail
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.fragment,Fail
# This bunch is not reproducible outside of CI
KHR-GLES2.shaders.aggressive_optimizations.sin_vec3_frag,Fail

View File

@ -82,6 +82,31 @@ static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
s->new_constants.Count++;
}
static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
{
assert(util_bitcount(s->constants[i].UseMask) == 1);
for (unsigned j = 0; j < s->new_constants.Count; j++) {
for (unsigned chan = 0; chan < 4; chan++) {
if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
/* Writemask to swizzle */
unsigned swizzle = 0;
for (; swizzle < 4; swizzle++)
if (s->constants[i].UseMask >> swizzle == 1)
break;
/* Update the remap tables. */
s->remap_table[j].index[chan] = i;
s->remap_table[j].swizzle[chan] = swizzle;
s->inv_remap_table[i].index[swizzle] = j;
s->inv_remap_table[i].swizzle[swizzle] = chan;
s->are_externals_remapped = true;
s->is_identity = false;
return;
}
}
}
place_constant_in_free_slot(s, i);
}
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
{
s->is_identity = true;
@ -90,7 +115,6 @@ static void init_constant_remap_state(struct radeon_compiler *c, struct const_re
s->new_constants._Reserved = c->Program.Constants.Count;
s->constants = c->Program.Constants.Constants;
/* Initialize the remap tables. */
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
s->inv_remap_table =
malloc(c->Program.Constants.Count * sizeof(struct const_remap));
@ -134,16 +158,38 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
/* Pass 3: Make the remapping table and remap constants.
* This pass removes unused constants simply by overwriting them by other constants. */
* First iterate over used vec2, vec3 and vec4 externals and place them in a free
* slots. While we could in theory merge 2 vec2 together, its not worth it
* as we would have to a) check that the swizzle is valid, b) transforming
* xy to zw would mean we need rgb and alpha source slot, thus it would hurt
* us potentially during pair scheduling. */
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (s->constants[i].UseMask) {
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
continue;
if (util_bitcount(s->constants[i].UseMask) > 1) {
place_constant_in_free_slot(s, i);
}
}
/* Now iterate over scalarar externals and put them into empty slots. */
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
continue;
if (util_bitcount(s->constants[i].UseMask) == 1)
try_merge_constants_external(s, i);
}
/* Now put the immediates and state constants. */
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
continue;
if (util_bitcount(s->constants[i].UseMask) > 0) {
place_constant_in_free_slot(s, i);
}
}
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert(s->is_identity || s->new_constants.Count < c->Program.Constants.Count);
assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */