nir/algebraic: add flrp patterns for 16 and 64 bits
Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6283>
This commit is contained in:
parent
3064feb235
commit
3c8934a644
|
@ -154,35 +154,45 @@ optimizations = [
|
||||||
|
|
||||||
# flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c)
|
# flrp(a, a + b, c) => a + flrp(0, b, c) => a + (b * c)
|
||||||
(('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)),
|
(('~flrp', a, ('fadd(is_used_once)', a, b), c), ('fadd', ('fmul', b, c), a)),
|
||||||
(('~flrp@32', a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp32'),
|
]
|
||||||
(('~flrp@64', a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp64'),
|
|
||||||
|
|
||||||
(('~flrp@32', ('fadd', a, b), ('fadd', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp32'),
|
# Float sizes
|
||||||
(('~flrp@64', ('fadd', a, b), ('fadd', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp64'),
|
for s in [16, 32, 64]:
|
||||||
|
optimizations.extend([
|
||||||
|
(('~flrp@{}'.format(s), a, ('fadd', a, b), c), ('fadd', ('fmul', b, c), a), 'options->lower_flrp{}'.format(s)),
|
||||||
|
(('~flrp@{}'.format(s), ('fadd', a, b), ('fadd', a, c), d), ('fadd', ('flrp', b, c, d), a), 'options->lower_flrp{}'.format(s)),
|
||||||
|
(('~flrp@{}'.format(s), a, ('fmul(is_used_once)', a, b), c), ('fmul', ('flrp', 1.0, b, c), a), 'options->lower_flrp{}'.format(s)),
|
||||||
|
|
||||||
(('~flrp@32', a, ('fmul(is_used_once)', a, b), c), ('fmul', ('flrp', 1.0, b, c), a), 'options->lower_flrp32'),
|
(('~flrp@{}'.format(s), a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
|
||||||
(('~flrp@64', a, ('fmul(is_used_once)', a, b), c), ('fmul', ('flrp', 1.0, b, c), a), 'options->lower_flrp64'),
|
|
||||||
|
|
||||||
|
(('~fadd@{}'.format(s), ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp{}'.format(s)),
|
||||||
|
# These are the same as the previous three rules, but it depends on
|
||||||
|
# 1-fsat(x) <=> fsat(1-x). See below.
|
||||||
|
(('~fadd@{}'.format(s), ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c)))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp{}'.format(s)),
|
||||||
|
(('~fadd@{}'.format(s), a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp{}'.format(s)),
|
||||||
|
|
||||||
|
(('~fadd@{}'.format(s), ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
|
||||||
|
(('~fadd@{}'.format(s), a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp{}'.format(s)),
|
||||||
|
|
||||||
|
# 1 - ((1 - a) * (1 - b))
|
||||||
|
# 1 - (1 - a - b + a*b)
|
||||||
|
# 1 - 1 + a + b - a*b
|
||||||
|
# a + b - a*b
|
||||||
|
# a + b*(1 - a)
|
||||||
|
# b*(1 - a) + 1*a
|
||||||
|
# flrp(b, 1, a)
|
||||||
|
(('~fadd@{}'.format(s), 1.0, ('fneg', ('fmul', ('fadd', 1.0, ('fneg', a)), ('fadd', 1.0, ('fneg', b))))), ('flrp', b, 1.0, a), '!options->lower_flrp{}'.format(s)),
|
||||||
|
])
|
||||||
|
|
||||||
|
optimizations.extend([
|
||||||
(('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
|
(('~flrp', ('fmul(is_used_once)', a, b), ('fmul(is_used_once)', a, c), d), ('fmul', ('flrp', b, c, d), a)),
|
||||||
|
|
||||||
(('~flrp', a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp32'),
|
|
||||||
(('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
|
(('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
|
||||||
(('ftrunc', a), ('bcsel', ('flt', a, 0.0), ('fneg', ('ffloor', ('fabs', a))), ('ffloor', ('fabs', a))), 'options->lower_ftrunc'),
|
(('ftrunc', a), ('bcsel', ('flt', a, 0.0), ('fneg', ('ffloor', ('fabs', a))), ('ffloor', ('fabs', a))), 'options->lower_ftrunc'),
|
||||||
(('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
|
(('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
|
||||||
(('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
|
(('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
|
||||||
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
||||||
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
||||||
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
|
|
||||||
(('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'),
|
|
||||||
(('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'),
|
|
||||||
# These are the same as the previous three rules, but it depends on
|
|
||||||
# 1-fsat(x) <=> fsat(1-x). See below.
|
|
||||||
(('~fadd@32', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp32'),
|
|
||||||
(('~fadd@64', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp64'),
|
|
||||||
|
|
||||||
(('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
|
|
||||||
(('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
|
|
||||||
(('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
|
|
||||||
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
|
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
|
||||||
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
|
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
|
||||||
|
|
||||||
|
@ -213,16 +223,6 @@ optimizations = [
|
||||||
# If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
|
# If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0
|
||||||
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
|
(('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))),
|
||||||
|
|
||||||
# 1 - ((1 - a) * (1 - b))
|
|
||||||
# 1 - (1 - a - b + a*b)
|
|
||||||
# 1 - 1 + a + b - a*b
|
|
||||||
# a + b - a*b
|
|
||||||
# a + b*(1 - a)
|
|
||||||
# b*(1 - a) + 1*a
|
|
||||||
# flrp(b, 1, a)
|
|
||||||
(('~fadd@32', 1.0, ('fneg', ('fmul', ('fadd', 1.0, ('fneg', a)), ('fadd', 1.0, ('fneg', b))))),
|
|
||||||
('flrp', b, 1.0, a), '!options->lower_flrp32'),
|
|
||||||
|
|
||||||
# (a * #b + #c) << #d
|
# (a * #b + #c) << #d
|
||||||
# ((a * #b) << #d) + (#c << #d)
|
# ((a * #b) << #d) + (#c << #d)
|
||||||
# (a * (#b << #d)) + (#c << #d)
|
# (a * (#b << #d)) + (#c << #d)
|
||||||
|
@ -232,7 +232,7 @@ optimizations = [
|
||||||
# (a * #b) << #c
|
# (a * #b) << #c
|
||||||
# a * (#b << #c)
|
# a * (#b << #c)
|
||||||
(('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
|
(('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
|
||||||
]
|
])
|
||||||
|
|
||||||
# Care must be taken here. Shifts in NIR uses only the lower log2(bitsize)
|
# Care must be taken here. Shifts in NIR uses only the lower log2(bitsize)
|
||||||
# bits of the second source. These replacements must correctly handle the
|
# bits of the second source. These replacements must correctly handle the
|
||||||
|
@ -1969,6 +1969,7 @@ late_optimizations = [
|
||||||
|
|
||||||
(('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
|
(('~flrp', ('fadd(is_used_once)', a, b), ('fadd(is_used_once)', a, c), d), ('fadd', ('flrp', b, c, d), a)),
|
||||||
|
|
||||||
|
(('~fadd@16', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp16'),
|
||||||
(('~fadd@32', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp32'),
|
(('~fadd@32', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp32'),
|
||||||
(('~fadd@64', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp64'),
|
(('~fadd@64', 1.0, ('fmul(is_used_once)', c , ('fadd', b, -1.0 ))), ('fadd', ('fadd', 1.0, ('fneg', c)), ('fmul', b, c)), 'options->lower_flrp64'),
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue