From f6c5b1d6c6b6a2b0e743377c3bdd9f2b2071b21f Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 19 Jul 2022 12:34:03 -0700 Subject: [PATCH] nir: Split usub_sat lowering flag from uadd_sat. Intel vec4 would like to do uadd_sat, but use lowering for usub_sat. Reviewed-by: Ian Romanick Part-of: --- src/broadcom/vulkan/v3dv_pipeline.c | 1 + src/compiler/nir/nir.h | 11 +++++++++-- src/compiler/nir/nir_opt_algebraic.py | 2 +- src/gallium/auxiliary/nir/nir_to_tgsi.c | 3 +++ src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/r600/r600_pipe_common.c | 1 + src/gallium/drivers/v3d/v3d_screen.c | 1 + src/gallium/drivers/zink/zink_compiler.c | 1 + src/microsoft/compiler/nir_to_dxil.c | 1 + src/nouveau/codegen/nv50_ir_from_nir.cpp | 1 + 11 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 0021bac3448..c5c84e15512 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -189,6 +189,7 @@ static const struct spirv_to_nir_options default_spirv_options = { const nir_shader_compiler_options v3dv_nir_options = { .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_iadd_sat = true, .lower_all_io_to_temps = true, .lower_extract_byte = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 82a2177bc39..6ebad48346c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3459,14 +3459,21 @@ typedef struct nir_shader_compiler_options { bool lower_hadd64; /** - * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple - * arithmetic. + * Set if nir_op_uadd_sat should be lowered to simple arithmetic. * * If this flag is set, the lowering will be applied to all bit-sizes of * these instructions. */ bool lower_uadd_sat; + /** + * Set if nir_op_usub_sat should be lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + */ + bool lower_usub_sat; + /** * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple * arithmetic. diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f7f1da5aac5..ade29e3ed63 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1694,7 +1694,7 @@ optimizations.extend([ (('uadd_sat@64', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat || (options->lower_int64_options & nir_lower_iadd64) != 0'), (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat'), - (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_uadd_sat'), + (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_usub_sat'), (('usub_sat@64', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), '(options->lower_int64_options & nir_lower_usub_sat64) != 0'), # int64_t sum = a + b; diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 9532b81e197..2cfc4ebf035 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -3541,6 +3541,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, !options->lower_fmod || !options->lower_rotate || !options->lower_uadd_sat || + !options->lower_usub_sat || !options->lower_uniforms_to_ubo || !options->lower_vector_cmp || options->lower_fsqrt != lower_fsqrt || @@ -3558,6 +3559,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s, new_options->lower_fmod = true; new_options->lower_rotate = true; new_options->lower_uadd_sat = true; + new_options->lower_usub_sat = true; new_options->lower_uniforms_to_ubo = true; new_options->lower_vector_cmp = true; new_options->lower_fsqrt = lower_fsqrt; @@ -3910,6 +3912,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_vector_cmp = true, .lower_int64_options = nir_lower_imul_2x32_64, .use_interpolated_input_intrinsics = true, diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 4ad008d5aec..06a48259122 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -142,6 +142,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_fmod = true, .lower_hadd = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_iadd_sat = true, .lower_ldexp = true, .lower_pack_snorm_2x16 = true, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 57f4927538c..51468be09cd 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -597,6 +597,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_fmod = true, .lower_hadd = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_iadd_sat = true, .lower_ldexp = true, .lower_pack_snorm_2x16 = true, diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 7d9231da38f..e3326c9d0fb 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1355,6 +1355,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, .has_isub = true, .lower_iabs = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_bitfield_extract = true, .lower_bitfield_insert_to_bitfield_select = true, .has_fused_comp_and_csel = true, diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 0a42300b0cc..6c2b2bf6f84 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -688,6 +688,7 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen, static const nir_shader_compiler_options v3d_nir_options = { .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_iadd_sat = true, .lower_all_io_to_temps = true, .lower_extract_byte = true, diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 3920389f370..dddc82307bc 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -349,6 +349,7 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_rotate = true, .lower_uadd_carry = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_vector_cmp = true, .lower_int64_options = 0, .lower_doubles_options = 0, diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index 578103d3888..bf4d0ddbf2b 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -97,6 +97,7 @@ nir_options = { .lower_all_io_to_temps = true, .lower_hadd = true, .lower_uadd_sat = true, + .lower_usub_sat = true, .lower_iadd_sat = true, .lower_uadd_carry = true, .lower_mul_high = true, diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index bc4ffeb7ddc..9546ef86819 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3370,6 +3370,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) op.lower_wpos_pntc = false; // TODO op.lower_hadd = true; // TODO op.lower_uadd_sat = true; // TODO + op.lower_usub_sat = true; // TODO op.lower_iadd_sat = true; // TODO op.vectorize_io = false; op.lower_to_scalar = false;