mirror of https://gitlab.freedesktop.org/mesa/mesa
r300: compact scalar uniforms into empty slots
We are not doing this on R5xx unless we have more than 200 constants, because emitting constants one by one will add extra overhead at emit time which we want to avoid if possible. RV410: total instructions in shared programs: 98778 -> 98703 (-0.08%) instructions in affected programs: 7106 -> 7031 (-1.06%) helped: 80 HURT: 25 total presub in shared programs: 2266 -> 2227 (-1.72%) presub in affected programs: 134 -> 95 (-29.10%) helped: 22 HURT: 10 total temps in shared programs: 15662 -> 15660 (-0.01%) temps in affected programs: 330 -> 328 (-0.61%) helped: 16 HURT: 13 total consts in shared programs: 85632 -> 84400 (-1.44%) consts in affected programs: 6646 -> 5414 (-18.54%) helped: 617 HURT: 0 total cycles in shared programs: 156305 -> 156234 (-0.05%) cycles in affected programs: 14167 -> 14096 (-0.50%) helped: 79 HURT: 28 LOST: shaders/ck2/160.shader_test FS GAINED: shaders/ck2/157.shader_test FS GAINED: shaders/tropics/249.shader_test FS GAINED: shaders/tropics/252.shader_test FS RV530: total consts in shared programs: 93209 -> 93198 (-0.01%) consts in affected programs: 72 -> 61 (-15.28%) helped: 6 HURT: 0 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.comm> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28630>
This commit is contained in:
parent
5d3483bfe4
commit
11ad056ee9
|
@ -91,12 +91,10 @@ dEQP-GLES2.functional.uniform_api.random.81,Fail
|
|||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.fragment,Fail
|
||||
|
||||
# depth texture is sampling as as white instead of red.
|
||||
KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail
|
||||
|
|
|
@ -98,12 +98,10 @@ dEQP-GLES2.functional.uniform_api.random.81,Fail
|
|||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_fragment,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.both,Fail
|
||||
dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.fragment,Fail
|
||||
|
||||
# This bunch is not reproducible outside of CI
|
||||
KHR-GLES2.shaders.aggressive_optimizations.sin_vec3_frag,Fail
|
||||
|
|
|
@ -82,6 +82,31 @@ static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
|
|||
s->new_constants.Count++;
|
||||
}
|
||||
|
||||
static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
assert(util_bitcount(s->constants[i].UseMask) == 1);
|
||||
for (unsigned j = 0; j < s->new_constants.Count; j++) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
|
||||
/* Writemask to swizzle */
|
||||
unsigned swizzle = 0;
|
||||
for (; swizzle < 4; swizzle++)
|
||||
if (s->constants[i].UseMask >> swizzle == 1)
|
||||
break;
|
||||
/* Update the remap tables. */
|
||||
s->remap_table[j].index[chan] = i;
|
||||
s->remap_table[j].swizzle[chan] = swizzle;
|
||||
s->inv_remap_table[i].index[swizzle] = j;
|
||||
s->inv_remap_table[i].swizzle[swizzle] = chan;
|
||||
s->are_externals_remapped = true;
|
||||
s->is_identity = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
|
||||
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
|
||||
{
|
||||
s->is_identity = true;
|
||||
|
@ -90,7 +115,6 @@ static void init_constant_remap_state(struct radeon_compiler *c, struct const_re
|
|||
s->new_constants._Reserved = c->Program.Constants.Count;
|
||||
s->constants = c->Program.Constants.Constants;
|
||||
|
||||
/* Initialize the remap tables. */
|
||||
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
s->inv_remap_table =
|
||||
malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
|
@ -134,16 +158,38 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
|||
|
||||
|
||||
/* Pass 3: Make the remapping table and remap constants.
|
||||
* This pass removes unused constants simply by overwriting them by other constants. */
|
||||
* First iterate over used vec2, vec3 and vec4 externals and place them in a free
|
||||
* slots. While we could in theory merge 2 vec2 together, its not worth it
|
||||
* as we would have to a) check that the swizzle is valid, b) transforming
|
||||
* xy to zw would mean we need rgb and alpha source slot, thus it would hurt
|
||||
* us potentially during pair scheduling. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (s->constants[i].UseMask) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 1) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now iterate over scalarar externals and put them into empty slots. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) == 1)
|
||||
try_merge_constants_external(s, i);
|
||||
}
|
||||
|
||||
/* Now put the immediates and state constants. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 0) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* is_identity ==> new_count == old_count
|
||||
* !is_identity ==> new_count < old_count */
|
||||
assert(s->is_identity || s->new_constants.Count < c->Program.Constants.Count);
|
||||
assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
|
||||
|
||||
/* Pass 4: Redirect reads of all constants to their new locations. */
|
||||
|
|
Loading…
Reference in New Issue