From 2883e8f33db809b2f34f4e9b973ab81ec9b7eac2 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 8 Feb 2022 10:13:59 -0800 Subject: [PATCH] nir_to_tgsi: Add a flag for lowering fabs, and use it in r300/i915. Saves instructions if the same fabs value is used multiple times. i915g: total instructions in shared programs: 397005 -> 396525 (-0.12%) instructions in affected programs: 11061 -> 10581 (-4.34%) LOST: 0 GAINED: 22 r300 (not r500): total instructions in shared programs: 180286 -> 179767 (-0.29%) instructions in affected programs: 27102 -> 26583 (-1.91%) total temps in shared programs: 29692 -> 29638 (-0.18%) temps in affected programs: 356 -> 302 (-15.17%) Reviewed-by: Matt Turner Part-of: --- src/gallium/auxiliary/nir/nir_to_tgsi.c | 14 ++++++++++++-- src/gallium/auxiliary/nir/nir_to_tgsi.h | 2 ++ src/gallium/drivers/i915/ci/i915-g33-fails.txt | 12 +----------- src/gallium/drivers/i915/i915_state.c | 5 ++++- src/gallium/drivers/r300/r300_state.c | 18 +++++++++++++++--- 5 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 216ec912ff7..17776d08786 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1009,7 +1009,10 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr) break; case nir_op_fabs: - ureg_MOV(c->ureg, dst, ureg_abs(src[0])); + if (c->options->lower_fabs) + ureg_MAX(c->ureg, dst, src[0], ureg_negate(src[0])); + else + ureg_MOV(c->ureg, dst, ureg_abs(src[0])); break; case nir_op_fsat: @@ -3162,8 +3165,15 @@ const void *nir_to_tgsi_options(struct nir_shader *s, /* Only lower 32-bit floats. The only other modifier type officially * supported by TGSI is 32-bit integer negates, but even those are broken on * virglrenderer, so skip lowering all integer and f64 float mods. + * + * The options->lower_fabs requests that we not have native source modifiers + * for fabs, and instead emit MAX(a,-a) for nir_op_fabs. */ - NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_float_source_mods); + nir_lower_to_source_mods_flags source_mods = nir_lower_fneg_source_mods; + if (!options->lower_fabs) + source_mods |= nir_lower_fabs_source_mods; + NIR_PASS_V(s, nir_lower_to_source_mods, source_mods); + NIR_PASS_V(s, nir_convert_from_ssa, true); NIR_PASS_V(s, nir_lower_vec_to_movs, NULL, NULL); diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.h b/src/gallium/auxiliary/nir/nir_to_tgsi.h index 3b38b5d0918..3d1d5166991 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.h +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.h @@ -31,6 +31,8 @@ struct pipe_screen; struct nir_to_tgsi_options { bool lower_cmp; + /* Emit MAX(a,-a) instead of abs src modifier) */ + bool lower_fabs; }; const void *nir_to_tgsi(struct nir_shader *s, diff --git a/src/gallium/drivers/i915/ci/i915-g33-fails.txt b/src/gallium/drivers/i915/ci/i915-g33-fails.txt index a6c62d470df..337e3556aaf 100644 --- a/src/gallium/drivers/i915/ci/i915-g33-fails.txt +++ b/src/gallium/drivers/i915/ci/i915-g33-fails.txt @@ -47,19 +47,14 @@ dEQP-GLES2.functional.shaders.loops.for_constant_iterations.double_continue_frag dEQP-GLES2.functional.shaders.loops.for_constant_iterations.mixed_break_continue_fragment,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.22,Fail -dEQP-GLES2.functional.shaders.random.all_features.fragment.32,Fail -dEQP-GLES2.functional.shaders.random.all_features.fragment.34,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.38,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.5,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.79,Fail -dEQP-GLES2.functional.shaders.random.all_features.fragment.80,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.86,Fail -dEQP-GLES2.functional.shaders.random.all_features.fragment.89,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.93,Fail dEQP-GLES2.functional.shaders.random.all_features.fragment.97,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.15,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.26,Fail -dEQP-GLES2.functional.shaders.random.trigonometric.fragment.4,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.42,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.45,Fail dEQP-GLES2.functional.shaders.random.trigonometric.fragment.52,Fail @@ -109,11 +104,9 @@ dEQP-GLES2.functional.uniform_api.random.20,Fail dEQP-GLES2.functional.uniform_api.random.54,Fail dEQP-GLES2.functional.uniform_api.random.71,Fail dEQP-GLES2.functional.uniform_api.random.74,Fail -dEQP-GLES2.functional.uniform_api.random.80,Fail dEQP-GLES2.functional.uniform_api.random.81,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail -dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.sampler2D_samplerCube_fragment,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.basic_array.mat4_both,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.basic_array.mat4_fragment,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nested_structs_arrays.both,Fail @@ -121,13 +114,10 @@ dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.multiple_nest dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_both,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.struct_in_array.mat4_mat2_fragment,Fail -dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.array_in_struct.sampler2D_samplerCube_fragment,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.both,Fail dEQP-GLES2.functional.uniform_api.value.assigned.by_value.render.multiple_nested_structs_arrays.fragment,Fail dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_both,Fail dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_fragment,Fail -dEQP-GLES2.functional.uniform_api.value.initial.render.basic_array.mat4_both,Fail -dEQP-GLES2.functional.uniform_api.value.initial.render.basic_array.mat4_fragment,Fail dEQP-GLES2.functional.uniform_api.value.initial.render.multiple_nested_structs_arrays.both,Fail dEQP-GLES2.functional.uniform_api.value.initial.render.multiple_nested_structs_arrays.fragment,Fail dEQP-GLES2.functional.uniform_api.value.initial.render.nested_structs_arrays.mat4_mat2_fragment,Fail @@ -900,7 +890,7 @@ spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-bias,Fail spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj,Fail spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj-bias,Fail -spec@glsl-1.10@execution@samplers@glsl-fs-texture2d-dependent-5,Fail + spec@glsl-1.10@execution@temp-array-indexing@glsl-fs-vec4-indexing-temp-dst-in-loop,Fail spec@glsl-1.10@execution@temp-array-indexing@glsl-fs-vec4-indexing-temp-dst-in-nested-loop-combined,Fail spec@glsl-1.10@execution@temp-array-indexing@glsl-fs-vec4-indexing-temp-src-in-loop,Fail diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index c0c5ce5e648..0546db5b1fe 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -550,7 +550,10 @@ i915_create_fs_state(struct pipe_context *pipe, NIR_PASS_V(s, i915_nir_lower_sincos); - ifs->state.tokens = nir_to_tgsi(s, pipe->screen); + static const struct nir_to_tgsi_options ntt_options = { + .lower_fabs = true, + }; + ifs->state.tokens = nir_to_tgsi_options(s, pipe->screen, &ntt_options); } else { assert(templ->type == PIPE_SHADER_IR_TGSI); /* we need to keep a local copy of the tokens */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 80acf0d489b..d1c3cdd83ea 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1944,12 +1944,24 @@ static void* r300_create_vs_state(struct pipe_context* pipe, if (vs->state.type == PIPE_SHADER_IR_NIR) { static const struct nir_to_tgsi_options swtcl_options = {0}; - static const struct nir_to_tgsi_options hwtcl_options = { + static const struct nir_to_tgsi_options hwtcl_r300_options = { + .lower_cmp = true, + .lower_fabs = true, + }; + static const struct nir_to_tgsi_options hwtcl_r500_options = { .lower_cmp = true, }; + const struct nir_to_tgsi_options *ntt_options; + if (r300->screen->caps.has_tcl) { + if (r300->screen->caps.is_r500) + ntt_options = &hwtcl_r500_options; + else + ntt_options = &hwtcl_r300_options; + } else { + ntt_options = &swtcl_options; + } vs->state.tokens = nir_to_tgsi_options(shader->ir.nir, pipe->screen, - r300->screen->caps.has_tcl ? - &hwtcl_options : &swtcl_options); + ntt_options); } else { assert(vs->state.type == PIPE_SHADER_IR_TGSI); /* we need to keep a local copy of the tokens */