nir: Add opcodes for fused comp + csel and optimizations

Some backends, like r600 support a fused version of int and float compare
against zero and and csel. Adding these opcodes here makes it possible to
optimize this in nir.

v2: Add rules for float compare + csel

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9452>
This commit is contained in:
Gert Wollny 2021-03-10 09:42:22 +01:00
parent a5747f8ab3
commit 0f5b3c37c5
3 changed files with 25 additions and 0 deletions

View File

@ -3392,6 +3392,9 @@ typedef struct nir_shader_compiler_options {
* to imul with masked inputs and iadd */
bool has_umad24;
/* Backend supports fused comapre against zero and csel */
bool has_fused_comp_and_csel;
/** Backend supports fsub, if not set fsub will automatically be lowered to
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_fsub;

View File

@ -1015,6 +1015,12 @@ opcode("b16csel", 0, tuint, [0, 0, 0],
opcode("b32csel", 0, tuint, [0, 0, 0],
[tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
triop("i32csel_gt", tint32, "", "(src0 > 0.0f) ? src1 : src2")
triop("i32csel_ge", tint32, "", "(src0 >= 0.0f) ? src1 : src2")
triop("fcsel_gt", tfloat32, "", "(src0 > 0.0f) ? src1 : src2")
triop("fcsel_ge", tfloat32, "", "(src0 >= 0.0f) ? src1 : src2")
# SM5 bfi assembly
triop("bfi", tuint32, "", """
unsigned mask = src0, insert = src1, base = src2;

View File

@ -1662,6 +1662,22 @@ optimizations.extend([
(('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
(('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
(('imul24', a, 0), (0)),
(('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
(('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
(('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
(('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
])
# bit_size dependent lowerings