From f6c5b1d6c6b6a2b0e743377c3bdd9f2b2071b21f Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Tue, 19 Jul 2022 12:34:03 -0700
Subject: [PATCH] nir: Split usub_sat lowering flag from uadd_sat.

Intel vec4 would like to do uadd_sat, but use lowering for usub_sat.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17637>
---
 src/broadcom/vulkan/v3dv_pipeline.c         |  1 +
 src/compiler/nir/nir.h                      | 11 +++++++++--
 src/compiler/nir/nir_opt_algebraic.py       |  2 +-
 src/gallium/auxiliary/nir/nir_to_tgsi.c     |  3 +++
 src/gallium/drivers/i915/i915_screen.c      |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c    |  1 +
 src/gallium/drivers/r600/r600_pipe_common.c |  1 +
 src/gallium/drivers/v3d/v3d_screen.c        |  1 +
 src/gallium/drivers/zink/zink_compiler.c    |  1 +
 src/microsoft/compiler/nir_to_dxil.c        |  1 +
 src/nouveau/codegen/nv50_ir_from_nir.cpp    |  1 +
 11 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 0021bac3448..c5c84e15512 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -189,6 +189,7 @@ static const struct spirv_to_nir_options default_spirv_options =  {
 
 const nir_shader_compiler_options v3dv_nir_options = {
    .lower_uadd_sat = true,
+   .lower_usub_sat = true,
    .lower_iadd_sat = true,
    .lower_all_io_to_temps = true,
    .lower_extract_byte = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 82a2177bc39..6ebad48346c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3459,14 +3459,21 @@ typedef struct nir_shader_compiler_options {
    bool lower_hadd64;
 
    /**
-    * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple
-    * arithmetic.
+    * Set if nir_op_uadd_sat should be lowered to simple arithmetic.
     *
     * If this flag is set, the lowering will be applied to all bit-sizes of
     * these instructions.
     */
    bool lower_uadd_sat;
 
+   /**
+    * Set if nir_op_usub_sat should be lowered to simple arithmetic.
+    *
+    * If this flag is set, the lowering will be applied to all bit-sizes of
+    * these instructions.
+    */
+   bool lower_usub_sat;
+
    /**
     * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
     * arithmetic.
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index f7f1da5aac5..ade29e3ed63 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1694,7 +1694,7 @@ optimizations.extend([
 
    (('uadd_sat@64', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat || (options->lower_int64_options & nir_lower_iadd64) != 0'),
    (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_uadd_sat'),
-   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_uadd_sat'),
+   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_usub_sat'),
    (('usub_sat@64', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), '(options->lower_int64_options & nir_lower_usub_sat64) != 0'),
 
    # int64_t sum = a + b;
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 9532b81e197..2cfc4ebf035 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -3541,6 +3541,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
        !options->lower_fmod ||
        !options->lower_rotate ||
        !options->lower_uadd_sat ||
+       !options->lower_usub_sat ||
        !options->lower_uniforms_to_ubo ||
        !options->lower_vector_cmp ||
        options->lower_fsqrt != lower_fsqrt ||
@@ -3558,6 +3559,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
       new_options->lower_fmod = true;
       new_options->lower_rotate = true;
       new_options->lower_uadd_sat = true;
+      new_options->lower_usub_sat = true;
       new_options->lower_uniforms_to_ubo = true;
       new_options->lower_vector_cmp = true;
       new_options->lower_fsqrt = lower_fsqrt;
@@ -3910,6 +3912,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
    .lower_rotate = true,
    .lower_uniforms_to_ubo = true,
    .lower_uadd_sat = true,
+   .lower_usub_sat = true,
    .lower_vector_cmp = true,
    .lower_int64_options = nir_lower_imul_2x32_64,
    .use_interpolated_input_intrinsics = true,
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 4ad008d5aec..06a48259122 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -142,6 +142,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_fmod = true,
    .lower_hadd = true,
    .lower_uadd_sat = true,
+   .lower_usub_sat = true,
    .lower_iadd_sat = true,
    .lower_ldexp = true,
    .lower_pack_snorm_2x16 = true,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 57f4927538c..51468be09cd 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -597,6 +597,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_fmod = true,
    .lower_hadd = true,
    .lower_uadd_sat = true,
+   .lower_usub_sat = true,
    .lower_iadd_sat = true,
    .lower_ldexp = true,
    .lower_pack_snorm_2x16 = true,
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index 7d9231da38f..e3326c9d0fb 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1355,6 +1355,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		.has_isub = true,
 		.lower_iabs = true,
 		.lower_uadd_sat = true,
+		.lower_usub_sat = true,
 		.lower_bitfield_extract = true,
 		.lower_bitfield_insert_to_bitfield_select = true,
 		.has_fused_comp_and_csel = true,
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 0a42300b0cc..6c2b2bf6f84 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -688,6 +688,7 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen,
 
 static const nir_shader_compiler_options v3d_nir_options = {
         .lower_uadd_sat = true,
+        .lower_usub_sat = true,
         .lower_iadd_sat = true,
         .lower_all_io_to_temps = true,
         .lower_extract_byte = true,
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index 3920389f370..dddc82307bc 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -349,6 +349,7 @@ zink_screen_init_compiler(struct zink_screen *screen)
       .lower_rotate = true,
       .lower_uadd_carry = true,
       .lower_uadd_sat = true,
+      .lower_usub_sat = true,
       .lower_vector_cmp = true,
       .lower_int64_options = 0,
       .lower_doubles_options = 0,
diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c
index 578103d3888..bf4d0ddbf2b 100644
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@@ -97,6 +97,7 @@ nir_options = {
    .lower_all_io_to_temps = true,
    .lower_hadd = true,
    .lower_uadd_sat = true,
+   .lower_usub_sat = true,
    .lower_iadd_sat = true,
    .lower_uadd_carry = true,
    .lower_mul_high = true,
diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp
index bc4ffeb7ddc..9546ef86819 100644
--- a/src/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3370,6 +3370,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
    op.lower_wpos_pntc = false; // TODO
    op.lower_hadd = true; // TODO
    op.lower_uadd_sat = true; // TODO
+   op.lower_usub_sat = true; // TODO
    op.lower_iadd_sat = true; // TODO
    op.vectorize_io = false;
    op.lower_to_scalar = false;