nir/algebraic: optimize more 64-bit imul with constant source

Two 64-bit shifts and an addition are usually faster than the several
multiplications nir_lower_int64 creates.

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14227>
This commit is contained in:
Rhys Perry 2021-03-09 16:51:25 +00:00 committed by Marge Bot
parent c56cf157c5
commit 403ae3b48e
2 changed files with 21 additions and 0 deletions

View File

@ -100,6 +100,9 @@ optimizations = [
(('imul', a, '#b(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'),
(('ishl', a, '#b'), ('imul', a, ('ishl', 1, b)), 'options->lower_bitops'),
(('imul@64', a, '#b(is_bitcount2)'), ('iadd', ('ishl', a, ('ufind_msb', b)), ('ishl', a, ('find_lsb', b))),
'!options->lower_bitops && (options->lower_int64_options & (nir_lower_imul64 | nir_lower_shift64)) == nir_lower_imul64'),
(('unpack_64_2x32_split_x', ('imul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
(('unpack_64_2x32_split_x', ('umul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
(('imul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('imul_high', a, b)), 'options->lower_mul_2x32_64'),

View File

@ -93,6 +93,24 @@ is_neg_power_of_two(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
return true;
}
static inline bool
is_bitcount2(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
if (util_bitcount64(val) != 2)
return false;
}
return true;
}
#define MULTIPLE(test) \
static inline bool \
is_unsigned_multiple_of_ ## test(UNUSED struct hash_table *ht, \