freedreno/ir3: Pass 16-bit sampler coordinates when possible.

shader-db highlights from Rob's android shaders: total instructions in shared programs: 769641 -> 767536 (-0.27%) instructions in affected programs: 151139 -> 149034 (-1.39%) total last-baryf in shared programs: 55908 -> 55607 (-0.54%) last-baryf in affected programs: 35219 -> 34918 (-0.85%) total sstall in shared programs: 67074 -> 65767 (-1.95%) total full in shared programs: 36115 -> 36080 (-0.10%) full in affected programs: 203 -> 168 (-17.24%) sstall in affected programs: 9510 -> 8203 (-13.74%) total (ss) in shared programs: 14380 -> 14239 (-0.98%) (ss) in affected programs: 2965 -> 2824 (-4.76%) total systall in shared programs: 92425 -> 91522 (-0.98%) systall in affected programs: 13146 -> 12243 (-6.87%) total (sy) in shared programs: 4330 -> 4314 (-0.37%) (sy) in affected programs: 167 -> 151 (-9.58%) total waves in shared programs: 71580 -> 71584 (<.01%) waves in affected programs: 12 -> 16 (33.33%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16616>
2021-02-22 20:11:14 +00:00 · 2021-02-22 20:11:14 +00:00 · 003327dd95
parent ac24c49c37
commit 003327dd95
1 changed files with 35 additions and 2 deletions
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -738,8 +738,41 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
   bool more_late_algebraic = true;
   while (more_late_algebraic) {
      more_late_algebraic = OPT(s, nir_opt_algebraic_late);
-      if (!more_late_algebraic)
-         OPT(s, nir_fold_16bit_sampler_conversions, 0, ~0);
+      if (!more_late_algebraic) {
+         /* Lowers texture operations that have only f2f16 or u2u16 called on
+          * them to have a 16-bit destination.  Also, lower 16-bit texture
+          * coordinates that had been upconverted to 32-bits just for the
+          * sampler to just be 16-bit texture sources.
+          */
+         OPT(s, nir_fold_16bit_sampler_conversions,
+            (1 << nir_tex_src_coord) |
+            (1 << nir_tex_src_lod) |
+            (1 << nir_tex_src_bias) |
+            (1 << nir_tex_src_comparator) |
+            (1 << nir_tex_src_min_lod) |
+            (1 << nir_tex_src_ms_index) |
+            (1 << nir_tex_src_ddx) |
+            (1 << nir_tex_src_ddy),
+            ~0);
+
+         /* Now that we stripped off the 16-bit conversions, legalize so that we
+          * don't have a mix of 16- and 32-bit args that will need to be
+          * collected together in the coordinate vector.
+          */
+         nir_tex_src_type_constraints tex_constraints = {
+            [nir_tex_src_lod] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_bias] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_offset] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_comparator] = {true, 0, nir_tex_src_coord},
+
+            [nir_tex_src_min_lod] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_ms_index] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_ddx] = {true, 0, nir_tex_src_coord},
+            [nir_tex_src_ddy] = {true, 0, nir_tex_src_coord},
+
+         };
+         NIR_PASS_V(s, nir_legalize_16bit_sampler_srcs, tex_constraints);
+      }
      OPT_V(s, nir_opt_constant_folding);
      OPT_V(s, nir_copy_prop);
      OPT_V(s, nir_opt_dce);