nir: Add opcodes for fused comp + csel and optimizations
Some backends, like r600 support a fused version of int and float compare against zero and and csel. Adding these opcodes here makes it possible to optimize this in nir. v2: Add rules for float compare + csel Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9452>
This commit is contained in:
parent
a5747f8ab3
commit
0f5b3c37c5
|
@ -3392,6 +3392,9 @@ typedef struct nir_shader_compiler_options {
|
||||||
* to imul with masked inputs and iadd */
|
* to imul with masked inputs and iadd */
|
||||||
bool has_umad24;
|
bool has_umad24;
|
||||||
|
|
||||||
|
/* Backend supports fused comapre against zero and csel */
|
||||||
|
bool has_fused_comp_and_csel;
|
||||||
|
|
||||||
/** Backend supports fsub, if not set fsub will automatically be lowered to
|
/** Backend supports fsub, if not set fsub will automatically be lowered to
|
||||||
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
|
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
|
||||||
bool has_fsub;
|
bool has_fsub;
|
||||||
|
|
|
@ -1015,6 +1015,12 @@ opcode("b16csel", 0, tuint, [0, 0, 0],
|
||||||
opcode("b32csel", 0, tuint, [0, 0, 0],
|
opcode("b32csel", 0, tuint, [0, 0, 0],
|
||||||
[tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
|
[tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
|
||||||
|
|
||||||
|
triop("i32csel_gt", tint32, "", "(src0 > 0.0f) ? src1 : src2")
|
||||||
|
triop("i32csel_ge", tint32, "", "(src0 >= 0.0f) ? src1 : src2")
|
||||||
|
|
||||||
|
triop("fcsel_gt", tfloat32, "", "(src0 > 0.0f) ? src1 : src2")
|
||||||
|
triop("fcsel_ge", tfloat32, "", "(src0 >= 0.0f) ? src1 : src2")
|
||||||
|
|
||||||
# SM5 bfi assembly
|
# SM5 bfi assembly
|
||||||
triop("bfi", tuint32, "", """
|
triop("bfi", tuint32, "", """
|
||||||
unsigned mask = src0, insert = src1, base = src2;
|
unsigned mask = src0, insert = src1, base = src2;
|
||||||
|
|
|
@ -1662,6 +1662,22 @@ optimizations.extend([
|
||||||
(('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
|
(('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'),
|
||||||
(('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
|
(('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
|
||||||
(('imul24', a, 0), (0)),
|
(('imul24', a, 0), (0)),
|
||||||
|
|
||||||
|
(('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
(('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
|
||||||
|
(('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
|
||||||
|
(('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"),
|
||||||
|
(('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"),
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
# bit_size dependent lowerings
|
# bit_size dependent lowerings
|
||||||
|
|
Loading…
Reference in New Issue