freedreno/ir3: Pass 16-bit sampler coordinates when possible.

shader-db highlights from Rob's android shaders:

total instructions in shared programs: 769641 -> 767536 (-0.27%)
instructions in affected programs: 151139 -> 149034 (-1.39%)
total last-baryf in shared programs: 55908 -> 55607 (-0.54%)
last-baryf in affected programs: 35219 -> 34918 (-0.85%)
total sstall in shared programs: 67074 -> 65767 (-1.95%)
total full in shared programs: 36115 -> 36080 (-0.10%)
full in affected programs: 203 -> 168 (-17.24%)
sstall in affected programs: 9510 -> 8203 (-13.74%)
total (ss) in shared programs: 14380 -> 14239 (-0.98%)
(ss) in affected programs: 2965 -> 2824 (-4.76%)
total systall in shared programs: 92425 -> 91522 (-0.98%)
systall in affected programs: 13146 -> 12243 (-6.87%)
total (sy) in shared programs: 4330 -> 4314 (-0.37%)
(sy) in affected programs: 167 -> 151 (-9.58%)
total waves in shared programs: 71580 -> 71584 (<.01%)
waves in affected programs: 12 -> 16 (33.33%)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16616>
This commit is contained in:
Matt Turner 2021-02-22 20:11:14 +00:00 committed by Marge Bot
parent ac24c49c37
commit 003327dd95
1 changed files with 35 additions and 2 deletions

View File

@ -738,8 +738,41 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
bool more_late_algebraic = true;
while (more_late_algebraic) {
more_late_algebraic = OPT(s, nir_opt_algebraic_late);
if (!more_late_algebraic)
OPT(s, nir_fold_16bit_sampler_conversions, 0, ~0);
if (!more_late_algebraic) {
/* Lowers texture operations that have only f2f16 or u2u16 called on
* them to have a 16-bit destination. Also, lower 16-bit texture
* coordinates that had been upconverted to 32-bits just for the
* sampler to just be 16-bit texture sources.
*/
OPT(s, nir_fold_16bit_sampler_conversions,
(1 << nir_tex_src_coord) |
(1 << nir_tex_src_lod) |
(1 << nir_tex_src_bias) |
(1 << nir_tex_src_comparator) |
(1 << nir_tex_src_min_lod) |
(1 << nir_tex_src_ms_index) |
(1 << nir_tex_src_ddx) |
(1 << nir_tex_src_ddy),
~0);
/* Now that we stripped off the 16-bit conversions, legalize so that we
* don't have a mix of 16- and 32-bit args that will need to be
* collected together in the coordinate vector.
*/
nir_tex_src_type_constraints tex_constraints = {
[nir_tex_src_lod] = {true, 0, nir_tex_src_coord},
[nir_tex_src_bias] = {true, 0, nir_tex_src_coord},
[nir_tex_src_offset] = {true, 0, nir_tex_src_coord},
[nir_tex_src_comparator] = {true, 0, nir_tex_src_coord},
[nir_tex_src_min_lod] = {true, 0, nir_tex_src_coord},
[nir_tex_src_ms_index] = {true, 0, nir_tex_src_coord},
[nir_tex_src_ddx] = {true, 0, nir_tex_src_coord},
[nir_tex_src_ddy] = {true, 0, nir_tex_src_coord},
};
NIR_PASS_V(s, nir_legalize_16bit_sampler_srcs, tex_constraints);
}
OPT_V(s, nir_opt_constant_folding);
OPT_V(s, nir_copy_prop);
OPT_V(s, nir_opt_dce);