radeonsi: unroll loops of up to 128 iterations
It's not exactly 128 because longer loop bodies scale the number down. This improves perf for VP13/Creo and Piano. Most other tests either didn't show any difference or are CPU-bound. v2: - The lowering passes had to be moved to the optimization loop because unrolling creates lowerable variables. - Piano has some pattern that looks like corruption and the pattern changed with loop unrolling. The pattern is present on other drivers as well. v3: - I removed the Piano test from CI traces because the image is random. The output was wrong even before this MR, and now it's randomly wrong. | PERCENTAGE DELTAS | Shaders | SGPRs | VGPRs |SpillSGPR |SpillVGPR | PrivVGPR | Scratch | CodeSize | MaxWaves | |------------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------| | alien_isolation | 2936| . | 0.02 %| . | . | . | . | 0.83 %| . | | deadcore | 76| 18.47 %| . | . | . | . | . | 167.69 %| . | | deus_ex_mankind_div.. | 1410| 0.10 %| 0.15 %| . | . | . | . | 1.70 %| . | | f1-2015 | 775| 0.37 %| 0.16 %| . | . | . | . | 3.25 %| -0.07 %| | hitman | 1413| 0.10 %| -0.03 %| 6.45 %| . | . | . | 0.61 %| 0.03 %| | metro_2033_redux | 2670| . | . | . | . | . | . | 0.13 %| 0.01 %| | pixmark-piano-0.7.0 | 2| . | 14.29 %| -100.00 %| . | . | . | 78.07 %| -4.76 %| | reflections_subway | 98| -0.53 %| . | . | . | . | . | 7.64 %| . | | thea | 172| 0.12 %| -0.81 %| . | . | . | . | 0.65 %| 0.15 %| | ubershaders | 54| . | . | . | . | . | . | 61.13 %| . | | ue4_effects_cave | 290| 0.05 %| . | . | . | . | . | 2.62 %| . | | vp13-creo | 26| -3.38 %| -4.20 %| . | . | . | . | 88.56 %| 2.62 %| | vp13-sw | 100| -0.36 %| -9.14 %| . | -100.00 %| . | -100.00 %| -17.97 %| 0.39 %| | vp20-creo | 22| -0.82 %| -3.33 %| . | . | . | . | 81.59 %| 1.51 %| | vp20-sw | 296| -4.51 %| -0.63 %| . | . | . | . | 58.93 %| 0.20 %| |------------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------| | All affected | 189| 3.05 %| -2.87 %| 500.00 %| -100.00 %| . | -100.00 %| 135.61 %| 1.32 %| |------------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------| | Total | 57794| 0.01 %| -0.02 %| 0.27 %| -3.13 %| . | -2.89 %| 1.73 %| . | Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> (v1) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13966>
This commit is contained in:
parent
af9ec3c45d
commit
9ff086052a
|
@ -34,10 +34,6 @@ traces:
|
|||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: 84c499203944cdc59e70450c324bb8df
|
||||
- path: gputest/pixmark-piano.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
checksum: a7317d54d452d19ce630c7f554f2279b
|
||||
- path: gputest/triangle.trace
|
||||
expectations:
|
||||
- device: gl-radeonsi-stoney
|
||||
|
|
|
@ -1054,7 +1054,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
.has_dot_4x8 = sscreen->info.has_accelerated_dot_product,
|
||||
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product,
|
||||
.optimize_sample_mask_in = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.max_unroll_iterations = 128,
|
||||
.max_unroll_iterations_aggressive = 128,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
|
|
|
@ -597,15 +597,15 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
|
|||
{
|
||||
bool progress;
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen);
|
||||
NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
bool lower_alu_to_scalar = false;
|
||||
bool lower_phis_to_scalar = false;
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen);
|
||||
NIR_PASS(progress, nir, nir_lower_phis_to_scalar, false);
|
||||
|
||||
if (first) {
|
||||
NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
|
||||
NIR_PASS(lower_alu_to_scalar, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
|
||||
|
|
Loading…
Reference in New Issue