From d52d500f17a0fa7d96e63fd6e558583f137a5997 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 4 Feb 2022 14:27:54 -0800 Subject: [PATCH] r300: Request that nir-to-tgsi avoid generating TGSI_OPCODE_CMP. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that our fcsels are on float-bools, we can emit the LRP directly and save the backend having to emit a SLT to turn the CMP src[0] into a bool. This required passing a codegen flags struct for nir-to-tgsi. I think this is a good way forward for it, as the alternative I think has mostly been adding flags to nir_shader_compiler_options (since adding PIPE_SHADER_CAPs is an unreasonable amount of pain). r300 shader-db: total instructions in shared programs: 1484320 -> 1472463 (-0.80%) instructions in affected programs: 243588 -> 231731 (-4.87%) total temps in shared programs: 212485 -> 212143 (-0.16%) temps in affected programs: 3845 -> 3503 (-8.89%) Acked-by: Pavel Ondračka Part-of: --- src/gallium/auxiliary/nir/nir_to_tgsi.c | 38 +++++++++++++++---- src/gallium/auxiliary/nir/nir_to_tgsi.h | 10 +++++ .../drivers/r300/ci/r300-rv515-fails.txt | 6 --- src/gallium/drivers/r300/r300_state.c | 8 +++- 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index b2527a3e70b..216ec912ff7 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -37,6 +37,7 @@ struct ntt_compile { nir_shader *s; nir_function_impl *impl; + const struct nir_to_tgsi_options *options; struct pipe_screen *screen; struct ureg_program *ureg; @@ -1156,15 +1157,26 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) break; case nir_op_fcsel: - /* NIR is src0 != 0 ? src1 : src2. - * TGSI is src0 < 0 ? src1 : src2. + /* NIR fcsel is src0 != 0 ? src1 : src2. + * TGSI CMP is src0 < 0 ? src1 : src2. * * However, fcsel so far as I can find only appears on bools-as-floats * (1.0 or 0.0), so we can just negate it for the TGSI op. It's * important to not have an abs here, as i915g has to make extra * instructions to do the abs. */ - ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]); + if (c->options->lower_cmp) { + /* If the HW doesn't support TGSI CMP (r300 VS), then lower it to a + * LRP on the boolean 1.0/0.0 value, instead of requiring the + * backend to turn the src0 into 1.0/0.0 first. + * + * We don't use this in general because some hardware (i915 FS) the + * LRP gets expanded to MUL/MAD. + */ + ureg_LRP(c->ureg, dst, src[0], src[1], src[2]); + } else { + ureg_CMP(c->ureg, dst, ureg_negate(src[0]), src[1], src[2]); + } break; /* It would be nice if we could get this left as scalar in NIR, since @@ -2906,7 +2918,8 @@ nir_to_tgsi_lower_tex(nir_shader *s) } static void -ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) +ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, + const struct nir_to_tgsi_options *ntt_options) { const struct nir_shader_compiler_options *options = s->options; bool lower_fsqrt = @@ -3051,6 +3064,14 @@ nir_lower_primid_sysval_to_input(nir_shader *s) nir_lower_primid_sysval_to_input_lower, &input); } +const void * +nir_to_tgsi(struct nir_shader *s, + struct pipe_screen *screen) +{ + static const struct nir_to_tgsi_options default_ntt_options = {0}; + return nir_to_tgsi_options(s, screen, &default_ntt_options); +} + /** * Translates the NIR shader to TGSI. * @@ -3058,9 +3079,9 @@ nir_lower_primid_sysval_to_input(nir_shader *s) * We take ownership of the NIR shader passed, returning a reference to the new * TGSI tokens instead. If you need to keep the NIR, then pass us a clone. */ -const void * -nir_to_tgsi(struct nir_shader *s, - struct pipe_screen *screen) +const void *nir_to_tgsi_options(struct nir_shader *s, + struct pipe_screen *screen, + const struct nir_to_tgsi_options *options) { struct ntt_compile *c; const void *tgsi_tokens; @@ -3070,7 +3091,7 @@ nir_to_tgsi(struct nir_shader *s, PIPE_SHADER_CAP_INTEGERS); const struct nir_shader_compiler_options *original_options = s->options; - ntt_fix_nir_options(screen, s); + ntt_fix_nir_options(screen, s, options); NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size, (nir_lower_io_options)0); @@ -3157,6 +3178,7 @@ nir_to_tgsi(struct nir_shader *s, c = rzalloc(NULL, struct ntt_compile); c->screen = screen; + c->options = options; c->needs_texcoord_semantic = screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD); diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.h b/src/gallium/auxiliary/nir/nir_to_tgsi.h index 73bb0ab896d..3b38b5d0918 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.h +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.h @@ -24,12 +24,22 @@ #ifndef NIR_TO_TGSI_H #define NIR_TO_TGSI_H +#include + struct nir_shader; struct pipe_screen; +struct nir_to_tgsi_options { + bool lower_cmp; +}; + const void *nir_to_tgsi(struct nir_shader *s, struct pipe_screen *screen); +const void *nir_to_tgsi_options(struct nir_shader *s, + struct pipe_screen *screen, + const struct nir_to_tgsi_options *ntt_options); + const void * nir_to_tgsi_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, diff --git a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt index 6d129ee2797..278279835bf 100644 --- a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt +++ b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt @@ -48,12 +48,6 @@ dEQP-GLES2.functional.rasterization.primitives.line_loop_wide,Fail dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_fragment,Fail -# "No free temporary to use for predicate stack counter." -dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail - # "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1" dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e3f482b6ca8..80acf0d489b 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1943,7 +1943,13 @@ static void* r300_create_vs_state(struct pipe_context* pipe, vs->state = *shader; if (vs->state.type == PIPE_SHADER_IR_NIR) { - vs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen); + static const struct nir_to_tgsi_options swtcl_options = {0}; + static const struct nir_to_tgsi_options hwtcl_options = { + .lower_cmp = true, + }; + vs->state.tokens = nir_to_tgsi_options(shader->ir.nir, pipe->screen, + r300->screen->caps.has_tcl ? + &hwtcl_options : &swtcl_options); } else { assert(vs->state.type == PIPE_SHADER_IR_TGSI); /* we need to keep a local copy of the tokens */