nir: Split usub_sat lowering flag from uadd_sat.

Intel vec4 would like to do uadd_sat, but use lowering for usub_sat. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17637>
2022-07-19 12:34:03 -07:00 · 2022-07-19 12:34:03 -07:00 · f6c5b1d6c6
parent aed433833f
commit f6c5b1d6c6
11 changed files with 21 additions and 3 deletions
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@ -189,6 +189,7 @@ static const struct spirv_to_nir_options default_spirv_options =  {

 const nir_shader_compiler_options v3dv_nir_options = {
   .lower_uadd_sat = true,
+   .lower_usub_sat = true,
   .lower_iadd_sat = true,
   .lower_all_io_to_temps = true,
   .lower_extract_byte = true,
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -3459,14 +3459,21 @@ typedef struct nir_shader_compiler_options {
   bool lower_hadd64;

   /**
-    * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple
-    * arithmetic.
+    * Set if nir_op_uadd_sat should be lowered to simple arithmetic.
    *
    * If this flag is set, the lowering will be applied to all bit-sizes of
    * these instructions.
    */
   bool lower_uadd_sat;

+   /**
+    * Set if nir_op_usub_sat should be lowered to simple arithmetic.
+    *
+    * If this flag is set, the lowering will be applied to all bit-sizes of
+    * these instructions.
+    */
+   bool lower_usub_sat;
+
   /**
    * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
    * arithmetic.
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@ -1694,7 +1694,7 @@ optimizations.extend([

   (('uadd_sat@64', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat || (options->lower_int64_options & nir_lower_iadd64) != 0'),
   (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat'),
-   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_uadd_sat'),
+   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_usub_sat'),
   (('usub_sat@64', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), '(options->lower_int64_options & nir_lower_usub_sat64) != 0'),

   # int64_t sum = a + b;
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@ -3541,6 +3541,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
       !options->lower_fmod ||
       !options->lower_rotate ||
       !options->lower_uadd_sat ||
+       !options->lower_usub_sat ||
       !options->lower_uniforms_to_ubo ||
       !options->lower_vector_cmp ||
       options->lower_fsqrt != lower_fsqrt ||
@ -3558,6 +3559,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
      new_options->lower_fmod = true;
      new_options->lower_rotate = true;
      new_options->lower_uadd_sat = true;
+      new_options->lower_usub_sat = true;
      new_options->lower_uniforms_to_ubo = true;
      new_options->lower_vector_cmp = true;
      new_options->lower_fsqrt = lower_fsqrt;
@ -3910,6 +3912,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
   .lower_rotate = true,
   .lower_uniforms_to_ubo = true,
   .lower_uadd_sat = true,
+   .lower_usub_sat = true,
   .lower_vector_cmp = true,
   .lower_int64_options = nir_lower_imul_2x32_64,
   .use_interpolated_input_intrinsics = true,
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@ -142,6 +142,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
   .lower_fmod = true,
   .lower_hadd = true,
   .lower_uadd_sat = true,
+   .lower_usub_sat = true,
   .lower_iadd_sat = true,
   .lower_ldexp = true,
   .lower_pack_snorm_2x16 = true,
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@ -597,6 +597,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
   .lower_fmod = true,
   .lower_hadd = true,
   .lower_uadd_sat = true,
+   .lower_usub_sat = true,
   .lower_iadd_sat = true,
   .lower_ldexp = true,
   .lower_pack_snorm_2x16 = true,
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@ -1355,6 +1355,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		.has_isub = true,
 		.lower_iabs = true,
 		.lower_uadd_sat = true,
+		.lower_usub_sat = true,
 		.lower_bitfield_extract = true,
 		.lower_bitfield_insert_to_bitfield_select = true,
 		.has_fused_comp_and_csel = true,
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@ -688,6 +688,7 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen,

 static const nir_shader_compiler_options v3d_nir_options = {
        .lower_uadd_sat = true,
+        .lower_usub_sat = true,
        .lower_iadd_sat = true,
        .lower_all_io_to_temps = true,
        .lower_extract_byte = true,
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@ -349,6 +349,7 @@ zink_screen_init_compiler(struct zink_screen *screen)
      .lower_rotate = true,
      .lower_uadd_carry = true,
      .lower_uadd_sat = true,
+      .lower_usub_sat = true,
      .lower_vector_cmp = true,
      .lower_int64_options = 0,
      .lower_doubles_options = 0,
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@ -97,6 +97,7 @@ nir_options = {
   .lower_all_io_to_temps = true,
   .lower_hadd = true,
   .lower_uadd_sat = true,
+   .lower_usub_sat = true,
   .lower_iadd_sat = true,
   .lower_uadd_carry = true,
   .lower_mul_high = true,
--- a/src/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp
@ -3370,6 +3370,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
   op.lower_wpos_pntc = false; // TODO
   op.lower_hadd = true; // TODO
   op.lower_uadd_sat = true; // TODO
+   op.lower_usub_sat = true; // TODO
   op.lower_iadd_sat = true; // TODO
   op.vectorize_io = false;
   op.lower_to_scalar = false;