nir: Add opcodes for fused comp + csel and optimizations

Some backends, like r600 support a fused version of int and float compare against zero and and csel. Adding these opcodes here makes it possible to optimize this in nir. v2: Add rules for float compare + csel Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9452>
2021-03-10 09:42:22 +01:00 · 2021-03-10 09:42:22 +01:00 · 0f5b3c37c5
parent a5747f8ab3
commit 0f5b3c37c5
3 changed files with 25 additions and 0 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@ -3392,6 +3392,9 @@ typedef struct nir_shader_compiler_options {
    * to imul with masked inputs and iadd */
   bool has_umad24;
   /* Backend supports fused comapre against zero and csel */
   bool has_fused_comp_and_csel;
   /** Backend supports fsub, if not set fsub will automatically be lowered to
    * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
   bool has_fsub;
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@ -1015,6 +1015,12 @@ opcode("b16csel", 0, tuint, [0, 0, 0],
 opcode("b32csel", 0, tuint, [0, 0, 0],
       [tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
 triop("i32csel_gt", tint32, "", "(src0 > 0.0f) ? src1 : src2")
 triop("i32csel_ge", tint32, "", "(src0 >= 0.0f) ? src1 : src2")
 triop("fcsel_gt", tfloat32, "", "(src0 > 0.0f) ? src1 : src2")
 triop("fcsel_ge", tfloat32, "", "(src0 >= 0.0f) ? src1 : src2")
 # SM5 bfi assembly
 triop("bfi", tuint32, "", """
 unsigned mask = src0, insert = src1, base = src2;
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@ -1662,6 +1662,22 @@ optimizations.extend([
   (('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
   (('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
   (('imul24', a, 0), (0)),
   (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
   (('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
   (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
   (('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
   (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
   (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
   (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"),
   (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
   (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
   (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
   (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
   (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
 ])
 # bit_size dependent lowerings