From d52d500f17a0fa7d96e63fd6e558583f137a5997 Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Fri, 4 Feb 2022 14:27:54 -0800
Subject: [PATCH] r300: Request that nir-to-tgsi avoid generating
 TGSI_OPCODE_CMP.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Given that our fcsels are on float-bools, we can emit the LRP directly and
save the backend having to emit a SLT to turn the CMP src[0] into a bool.

This required passing a codegen flags struct for nir-to-tgsi.  I think
this is a good way forward for it, as the alternative I think has mostly
been adding flags to nir_shader_compiler_options (since adding
PIPE_SHADER_CAPs is an unreasonable amount of pain).

r300 shader-db:
total instructions in shared programs: 1484320 -> 1472463 (-0.80%)
instructions in affected programs: 243588 -> 231731 (-4.87%)
total temps in shared programs: 212485 -> 212143 (-0.16%)
temps in affected programs: 3845 -> 3503 (-8.89%)

Acked-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14886>
---
 src/gallium/auxiliary/nir/nir_to_tgsi.c       | 38 +++++++++++++++----
 src/gallium/auxiliary/nir/nir_to_tgsi.h       | 10 +++++
 .../drivers/r300/ci/r300-rv515-fails.txt      |  6 ---
 src/gallium/drivers/r300/r300_state.c         |  8 +++-
 4 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index b2527a3e70b..216ec912ff7 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -37,6 +37,7 @@
 struct ntt_compile {
    nir_shader *s;
    nir_function_impl *impl;
+   const struct nir_to_tgsi_options *options;
    struct pipe_screen *screen;
    struct ureg_program *ureg;
 
@@ -1156,15 +1157,26 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
          break;
 
       case nir_op_fcsel:
-         /* NIR is src0 != 0 ? src1 : src2.
-          * TGSI is src0 < 0 ? src1 : src2.
+         /* NIR fcsel is src0 != 0 ? src1 : src2.
+          * TGSI CMP is src0 < 0 ? src1 : src2.
           *
           * However, fcsel so far as I can find only appears on bools-as-floats
           * (1.0 or 0.0), so we can just negate it for the TGSI op.  It's
           * important to not have an abs here, as i915g has to make extra
           * instructions to do the abs.
           */
-         ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
+         if (c->options->lower_cmp) {
+            /* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a
+             * LRP on the boolean 1.0/0.0 value, instead of requiring the
+             * backend to turn the src0 into 1.0/0.0 first.
+             *
+             * We don't use this in general because some hardware (i915 FS) the
+             * LRP gets expanded to MUL/MAD.
+             */
+            ureg_LRP(c->ureg, dst, src[0], src[1], src[2]);
+         } else {
+            ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]);
+         }
          break;
 
          /* It would be nice if we could get this left as scalar in NIR, since
@@ -2906,7 +2918,8 @@ nir_to_tgsi_lower_tex(nir_shader *s)
 }
 
 static void
-ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
+ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
+                    const struct nir_to_tgsi_options *ntt_options)
 {
    const struct nir_shader_compiler_options *options = s->options;
    bool lower_fsqrt =
@@ -3051,6 +3064,14 @@ nir_lower_primid_sysval_to_input(nir_shader *s)
                                         nir_lower_primid_sysval_to_input_lower, &input);
 }
 
+const void *
+nir_to_tgsi(struct nir_shader *s,
+            struct pipe_screen *screen)
+{
+   static const struct nir_to_tgsi_options default_ntt_options = {0};
+   return nir_to_tgsi_options(s, screen, &default_ntt_options);
+}
+
 /**
  * Translates the NIR shader to TGSI.
  *
@@ -3058,9 +3079,9 @@ nir_lower_primid_sysval_to_input(nir_shader *s)
  * We take ownership of the NIR shader passed, returning a reference to the new
  * TGSI tokens instead.  If you need to keep the NIR, then pass us a clone.
  */
-const void *
-nir_to_tgsi(struct nir_shader *s,
-            struct pipe_screen *screen)
+const void *nir_to_tgsi_options(struct nir_shader *s,
+                                struct pipe_screen *screen,
+                                const struct nir_to_tgsi_options *options)
 {
    struct ntt_compile *c;
    const void *tgsi_tokens;
@@ -3070,7 +3091,7 @@ nir_to_tgsi(struct nir_shader *s,
                                                    PIPE_SHADER_CAP_INTEGERS);
    const struct nir_shader_compiler_options *original_options = s->options;
 
-   ntt_fix_nir_options(screen, s);
+   ntt_fix_nir_options(screen, s, options);
 
    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
               type_size, (nir_lower_io_options)0);
@@ -3157,6 +3178,7 @@ nir_to_tgsi(struct nir_shader *s,
 
    c = rzalloc(NULL, struct ntt_compile);
    c->screen = screen;
+   c->options = options;
 
    c->needs_texcoord_semantic =
       screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.h b/src/gallium/auxiliary/nir/nir_to_tgsi.h
index 73bb0ab896d..3b38b5d0918 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.h
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.h
@@ -24,12 +24,22 @@
 #ifndef NIR_TO_TGSI_H
 #define NIR_TO_TGSI_H
 
+#include <stdbool.h>
+
 struct nir_shader;
 struct pipe_screen;
 
+struct nir_to_tgsi_options {
+   bool lower_cmp;
+};
+
 const void *nir_to_tgsi(struct nir_shader *s,
                         struct pipe_screen *screen);
 
+const void *nir_to_tgsi_options(struct nir_shader *s,
+                                struct pipe_screen *screen,
+                                const struct nir_to_tgsi_options *ntt_options);
+
 const void *
 nir_to_tgsi_get_compiler_options(struct pipe_screen *pscreen,
                                  enum pipe_shader_ir ir,
diff --git a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
index 6d129ee2797..278279835bf 100644
--- a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
@@ -48,12 +48,6 @@ dEQP-GLES2.functional.rasterization.primitives.line_loop_wide,Fail
 dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail
 dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_fragment,Fail
 
-# "No free temporary to use for predicate stack counter."
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail
-
 # "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1"
 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail
 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index e3f482b6ca8..80acf0d489b 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1943,7 +1943,13 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
     vs->state = *shader;
 
     if (vs->state.type == PIPE_SHADER_IR_NIR) {
-       vs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
+       static const struct nir_to_tgsi_options swtcl_options = {0};
+       static const struct nir_to_tgsi_options hwtcl_options = {
+           .lower_cmp = true,
+       };
+       vs->state.tokens = nir_to_tgsi_options(shader->ir.nir, pipe->screen,
+                                              r300->screen->caps.has_tcl ?
+                                              &hwtcl_options : &swtcl_options);
     } else {
        assert(vs->state.type == PIPE_SHADER_IR_TGSI);
        /* we need to keep a local copy of the tokens */