mesa/src/compiler/nir/nir_opt_algebraic.py

704 lines
34 KiB
Python
Raw Normal View History

#
# Copyright (C) 2014 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
# Authors:
# Jason Ekstrand (jason@jlekstrand.net)
import nir_algebraic
# Convenience variables
a = 'a'
b = 'b'
c = 'c'
d = 'd'
# Written in the form (<search>, <replace>) where <search> is an expression
# and <replace> is either an expression or a value. An expression is
# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
# where each source is either an expression or a value. A value can be
# either a numeric constant or a string representing a variable name.
#
# If the opcode in a search expression is prefixed by a '~' character, this
# indicates that the operation is inexact. Such operations will only get
# applied to SSA values that do not have the exact bit set. This should be
# used by by any optimizations that are not bit-for-bit exact. It should not,
# however, be used for backend-requested lowering operations as those need to
# happen regardless of precision.
#
# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
# that the given variable will only match constants and the type indicates that
# the given variable will only match values from ALU instructions with the
# given output type, and (cond) specifies an additional condition function
# (see nir_search_helpers.h).
#
# For constants, you have to be careful to make sure that it is the right
# type because python is unaware of the source and destination types of the
# opcodes.
#
# All expression types can have a bit-size specified. For opcodes, this
# looks like "op@32", for variables it is "a@32" or "a@uint32" to specify a
# type and size, and for literals, you can write "2.0@32". In the search half
# of the expression this indicates that it should only match that particular
# bit-size. In the replace half of the expression this indicates that the
# constructed value should have that bit-size.
optimizations = [
(('imul', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b))),
(('imul', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b))))),
(('udiv', a, 1), a),
(('idiv', a, 1), a),
(('umod', a, 1), 0),
(('imod', a, 1), 0),
(('udiv', a, '#b@32(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b))),
(('idiv', a, '#b@32(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), 'options->lower_idiv'),
(('idiv', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), 'options->lower_idiv'),
(('umod', a, '#b(is_pos_power_of_two)'), ('iand', a, ('isub', b, 1))),
(('fneg', ('fneg', a)), a),
(('ineg', ('ineg', a)), a),
(('fabs', ('fabs', a)), ('fabs', a)),
(('fabs', ('fneg', a)), ('fabs', a)),
(('fabs', ('u2f32', a)), ('u2f32', a)),
(('iabs', ('iabs', a)), ('iabs', a)),
(('iabs', ('ineg', a)), ('iabs', a)),
(('~fadd', a, 0.0), a),
(('iadd', a, 0), a),
(('usadd_4x8', a, 0), a),
(('usadd_4x8', a, ~0), ~0),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('~fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
(('~fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
(('umul_unorm_4x8', a, 0), 0),
(('umul_unorm_4x8', a, ~0), a),
(('fmul', a, 1.0), a),
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
(('~ffma', 0.0, a, b), b),
(('~ffma', a, 0.0, b), b),
(('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma', a, 1.0, b), ('fadd', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
(('~flrp', a, b, 0.0), a),
(('~flrp', a, b, 1.0), b),
(('~flrp', a, a, b), a),
(('~flrp', 0.0, a, b), ('fmul', a, b)),
(('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
(('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'),
(('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp64'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'),
(('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
nir/algebraic: Optimize common array indexing sequence Some shaders include code that looks like: uniform int i; uniform vec4 bones[...]; foo(bones[i * 3], bones[i * 3 + 1], bones[i * 3 + 2]); CSE would do some work on this: x = i * 3 foo(bones[x], bones[x + 1], bones[x + 2]); The compiler may then add '<< 4 + base' to the index calculations. This results in expressions like x = i * 3 foo(bones[x << 4], bones[(x + 1) << 4], bones[(x + 2) << 4]); Just rearranging the math to produce (i * 48) + 16 saves an instruction, and it allows CSE to do more work. x = i * 48; foo(bones[x], bones[x + 16], bones[x + 32]); So, ~6 instructions becomes ~3. Some individual shader-db results look pretty bad. However, I have a really, really hard time believing the change in estimated cycles in, for example, 3dmmes-taiji/51.shader_test after looking that change in the generated code. G45 total instructions in shared programs: 4020840 -> 4010070 (-0.27%) instructions in affected programs: 177460 -> 166690 (-6.07%) helped: 894 HURT: 0 total cycles in shared programs: 98829000 -> 98784990 (-0.04%) cycles in affected programs: 3936648 -> 3892638 (-1.12%) helped: 894 HURT: 0 Ironlake total instructions in shared programs: 6418887 -> 6408117 (-0.17%) instructions in affected programs: 177460 -> 166690 (-6.07%) helped: 894 HURT: 0 total cycles in shared programs: 143504542 -> 143460532 (-0.03%) cycles in affected programs: 3936648 -> 3892638 (-1.12%) helped: 894 HURT: 0 Sandy Bridge total instructions in shared programs: 8357887 -> 8339251 (-0.22%) instructions in affected programs: 432715 -> 414079 (-4.31%) helped: 2795 HURT: 0 total cycles in shared programs: 118284184 -> 118207412 (-0.06%) cycles in affected programs: 6114626 -> 6037854 (-1.26%) helped: 2478 HURT: 317 Ivy Bridge total instructions in shared programs: 7669390 -> 7653822 (-0.20%) instructions in affected programs: 388234 -> 372666 (-4.01%) helped: 2795 HURT: 0 total cycles in shared programs: 68381982 -> 68263684 (-0.17%) cycles in affected programs: 1972658 -> 1854360 (-6.00%) helped: 2458 HURT: 307 Haswell total instructions in shared programs: 7082636 -> 7067068 (-0.22%) instructions in affected programs: 388234 -> 372666 (-4.01%) helped: 2795 HURT: 0 total cycles in shared programs: 68282020 -> 68164158 (-0.17%) cycles in affected programs: 1891820 -> 1773958 (-6.23%) helped: 2459 HURT: 261 Broadwell total instructions in shared programs: 9002466 -> 8985875 (-0.18%) instructions in affected programs: 658784 -> 642193 (-2.52%) helped: 2795 HURT: 5 total cycles in shared programs: 78503092 -> 78450404 (-0.07%) cycles in affected programs: 2873304 -> 2820616 (-1.83%) helped: 2275 HURT: 415 Skylake total instructions in shared programs: 9156978 -> 9140387 (-0.18%) instructions in affected programs: 682625 -> 666034 (-2.43%) helped: 2795 HURT: 5 total cycles in shared programs: 75591392 -> 75550574 (-0.05%) cycles in affected programs: 3192120 -> 3151302 (-1.28%) helped: 2271 HURT: 425 Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Thomas Helland <thomashelland90@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-20 01:47:38 +01:00
# (a * #b + #c) << #d
# ((a * #b) << #d) + (#c << #d)
# (a * (#b << #d)) + (#c << #d)
(('ishl', ('iadd', ('imul', a, '#b'), '#c'), '#d'),
('iadd', ('imul', a, ('ishl', b, d)), ('ishl', c, d))),
# (a * #b) << #c
# a * (#b << #c)
(('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
# Comparison simplifications
(('~inot', ('flt', a, b)), ('fge', a, b)),
(('~inot', ('fge', a, b)), ('flt', a, b)),
(('~inot', ('feq', a, b)), ('fne', a, b)),
(('~inot', ('fne', a, b)), ('feq', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),
# 0.0 >= b2f(a)
# b2f(a) <= 0.0
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
# inot(a)
(('fge', 0.0, ('b2f', a)), ('inot', a)),
(('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
nir: Replace an odd comparison involving fmin of -b2f I noticed the fge version while looking at a shader for an unrelated reason. The feq version prevents a regression in a later change that performs strength reduction of some compares. Broadwell and Skylake had similar results. (Skylake shown) total instructions in shared programs: 14514808 -> 14514796 (<.01%) instructions in affected programs: 750 -> 738 (-1.60%) helped: 4 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3 helped stats (rel) min: 0.83% max: 1.96% x̄: 1.40% x̃: 1.40% 95% mean confidence interval for instructions value: -6.67 0.67 95% mean confidence interval for instructions %-change: -2.43% -0.36% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 533144939 -> 533144853 (<.01%) cycles in affected programs: 8911 -> 8825 (-0.97%) helped: 4 HURT: 0 helped stats (abs) min: 16 max: 32 x̄: 21.50 x̃: 19 helped stats (rel) min: 0.60% max: 1.89% x̄: 1.28% x̃: 1.31% 95% mean confidence interval for cycles value: -32.94 -10.06 95% mean confidence interval for cycles %-change: -2.30% -0.26% Cycles are helped. Haswell total instructions in shared programs: 13093785 -> 13093775 (<.01%) instructions in affected programs: 924 -> 914 (-1.08%) helped: 4 HURT: 2 helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3 helped stats (rel) min: 0.82% max: 1.95% x̄: 1.39% x̃: 1.39% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.19% max: 1.19% x̄: 1.19% x̃: 1.19% 95% mean confidence interval for instructions value: -4.53 1.20 95% mean confidence interval for instructions %-change: -2.02% 0.97% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 409580553 -> 409580118 (<.01%) cycles in affected programs: 10909 -> 10474 (-3.99%) helped: 5 HURT: 1 helped stats (abs) min: 6 max: 222 x̄: 89.60 x̃: 18 helped stats (rel) min: 0.16% max: 24.72% x̄: 9.54% x̃: 1.78% HURT stats (abs) min: 13 max: 13 x̄: 13.00 x̃: 13 HURT stats (rel) min: 0.39% max: 0.39% x̄: 0.39% x̃: 0.39% 95% mean confidence interval for cycles value: -180.68 35.68 95% mean confidence interval for cycles %-change: -19.55% 3.79% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 11811851 -> 11811840 (<.01%) instructions in affected programs: 1032 -> 1021 (-1.07%) helped: 5 HURT: 1 helped stats (abs) min: 1 max: 5 x̄: 2.40 x̃: 1 helped stats (rel) min: 0.63% max: 1.95% x̄: 1.13% x̃: 0.97% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 1.19% max: 1.19% x̄: 1.19% x̃: 1.19% 95% mean confidence interval for instructions value: -4.17 0.51 95% mean confidence interval for instructions %-change: -1.86% 0.36% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 257618403 -> 257618168 (<.01%) cycles in affected programs: 10784 -> 10549 (-2.18%) helped: 4 HURT: 2 helped stats (abs) min: 4 max: 220 x̄: 64.50 x̃: 17 helped stats (rel) min: 0.50% max: 24.34% x̄: 7.07% x̃: 1.72% HURT stats (abs) min: 9 max: 14 x̄: 11.50 x̃: 11 HURT stats (rel) min: 0.24% max: 0.42% x̄: 0.33% x̃: 0.33% 95% mean confidence interval for cycles value: -133.11 54.78 95% mean confidence interval for cycles %-change: -14.79% 5.59% Inconclusive result (value mean confidence interval includes 0). GM45, Iron Lake, and Sandy Bridge had similar results. (Sandy Bridge shown) total instructions in shared programs: 10533871 -> 10533859 (<.01%) instructions in affected programs: 865 -> 853 (-1.39%) helped: 4 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 3.00 x̃: 3 helped stats (rel) min: 0.63% max: 1.83% x̄: 1.22% x̃: 1.21% 95% mean confidence interval for instructions value: -6.67 0.67 95% mean confidence interval for instructions %-change: -2.16% -0.29% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 146139904 -> 146139852 (<.01%) cycles in affected programs: 15213 -> 15161 (-0.34%) helped: 4 HURT: 0 helped stats (abs) min: 3 max: 18 x̄: 13.00 x̃: 15 helped stats (rel) min: 0.15% max: 0.84% x̄: 0.39% x̃: 0.29% 95% mean confidence interval for cycles value: -23.79 -2.21 95% mean confidence interval for cycles %-change: -0.88% 0.09% Inconclusive result (%-change mean confidence interval includes 0). Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
2018-01-31 19:11:02 +00:00
# fmin(-b2f(a), b) >= 0.0
# -b2f(a) >= 0.0 && b >= 0.0
# -b2f(a) == 0.0 && b >= 0.0 -b2f can only be 0 or -1, never >0
# b2f(a) == 0.0 && b >= 0.0
# a == False && b >= 0.0
# !a && b >= 0.0
#
# The fge in the second replacement is not a typo. I leave the proof that
# "fmin(-b2f(a), b) >= 0 <=> fmin(-b2f(a), b) == 0" as an exercise for the
# reader.
(('fge', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
(('feq', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
# 0.0 < fabs(a)
# fabs(a) > 0.0
# fabs(a) != 0.0 because fabs(a) must be >= 0
# a != 0.0
(('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
(('fmax', ('b2f(is_used_once)', a), ('b2f', b)), ('b2f', ('ior', a, b))),
(('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', ('b2f', b))), ('fneg', ('b2f', ('ior', a, b)))),
(('fmin', ('b2f(is_used_once)', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', ('b2f', b))), ('fneg', ('b2f', ('iand', a, b)))),
nir: Replace fmin(b2f(a), b) with a bcsel All of the affected shaders are HDR mappers from Serious Sam 3. All Gen7+ platforms had similar results. (Skylake shown) total instructions in shared programs: 14516285 -> 14516273 (<.01%) instructions in affected programs: 348 -> 336 (-3.45%) helped: 12 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 2.08% max: 6.67% x̄: 4.31% x̃: 4.17% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -5.55% -3.06% Instructions are helped. total cycles in shared programs: 533163876 -> 533163808 (<.01%) cycles in affected programs: 1144 -> 1076 (-5.94%) helped: 4 HURT: 0 helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17 helped stats (rel) min: 5.80% max: 6.08% x̄: 5.94% x̃: 5.94% 95% mean confidence interval for cycles value: -18.84 -15.16 95% mean confidence interval for cycles %-change: -6.20% -5.68% Cycles are helped. Sandy Bridge total instructions in shared programs: 10533321 -> 10533309 (<.01%) instructions in affected programs: 372 -> 360 (-3.23%) helped: 12 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 2.00% max: 5.88% x̄: 3.91% x̃: 3.85% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -4.96% -2.86% Instructions are helped. total cycles in shared programs: 146136632 -> 146136428 (<.01%) cycles in affected programs: 11668 -> 11464 (-1.75%) helped: 12 HURT: 0 helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17 helped stats (rel) min: 0.99% max: 3.44% x̄: 2.20% x̃: 2.29% 95% mean confidence interval for cycles value: -17.66 -16.34 95% mean confidence interval for cycles %-change: -2.82% -1.58% Cycles are helped. Iron Lake total instructions in shared programs: 7886301 -> 7886277 (<.01%) instructions in affected programs: 576 -> 552 (-4.17%) helped: 12 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 2.94% max: 6.06% x̄: 4.51% x̃: 4.65% 95% mean confidence interval for instructions value: -2.00 -2.00 95% mean confidence interval for instructions %-change: -5.30% -3.72% Instructions are helped. total cycles in shared programs: 178113176 -> 178113176 (0.00%) cycles in affected programs: 2116 -> 2116 (0.00%) helped: 2 HURT: 4 helped stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 helped stats (rel) min: 1.14% max: 1.14% x̄: 1.14% x̃: 1.14% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.50% max: 0.65% x̄: 0.58% x̃: 0.58% 95% mean confidence interval for cycles value: -3.25 3.25 95% mean confidence interval for cycles %-change: -0.93% 0.94% Inconclusive result (value mean confidence interval includes 0). GM45 total instructions in shared programs: 4857756 -> 4857744 (<.01%) instructions in affected programs: 294 -> 282 (-4.08%) helped: 6 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 2.94% max: 5.71% x̄: 4.40% x̃: 4.55% 95% mean confidence interval for instructions value: -2.00 -2.00 95% mean confidence interval for instructions %-change: -5.71% -3.09% Instructions are helped. total cycles in shared programs: 122178730 -> 122178722 (<.01%) cycles in affected programs: 700 -> 692 (-1.14%) helped: 2 HURT: 0 Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
2018-02-03 01:39:54 +00:00
# fmin(b2f(a), b)
# bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
# bcsel(a, fmin(b2f(True), b), fmin(b2f(False), b))
# bcsel(a, fmin(1.0, b), fmin(0.0, b))
#
# Since b is a constant, constant folding will eliminate the fmin and the
# fmax. If b is > 1.0, the bcsel will be replaced with a b2f.
(('fmin', ('b2f', a), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
# ignore this opt when the result is used by a bcsel or if so we can make
# use of conditional modifiers on supported hardware.
(('flt(is_not_used_by_conditional)', ('fadd(is_used_once)', a, ('fneg', b)), 0.0), ('flt', a, b)),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('~bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
(('~bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('~bcsel', ('fge', a, b), b, a), ('fmin', a, b)),
(('~bcsel', ('fge', b, a), b, a), ('fmax', a, b)),
(('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
(('bcsel', a, True, 'b@bool'), ('ior', a, b)),
(('fmin', a, a), a),
(('fmax', a, a), a),
(('imin', a, a), a),
(('imax', a, a), a),
(('umin', a, a), a),
(('umax', a, a), a),
(('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))),
(('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
(('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
(('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))),
(('fmin', a, ('fabs', a)), a),
(('imin', a, ('iabs', a)), a),
(('fmax', a, ('fneg', ('fabs', a))), a),
(('imax', a, ('ineg', ('iabs', a))), a),
(('fmax', a, ('fabs', a)), ('fabs', a)),
(('imax', a, ('iabs', a)), ('iabs', a)),
(('fmax', a, ('fneg', a)), ('fabs', a)),
(('imax', a, ('ineg', a)), ('iabs', a)),
(('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
(('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
(('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
(('umin', ('umax', ('umin', ('umax', a, b), c), b), c), ('umin', ('umax', a, b), c)),
(('fmax', ('fsat', a), '#b@32(is_zero_to_one)'), ('fsat', ('fmax', a, b))),
(('fmin', ('fsat', a), '#b@32(is_zero_to_one)'), ('fsat', ('fmin', a, b))),
(('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
nir: Be more conservative about rearranging or-compounded compares If both comparisons are used as sources for instructions other than the ior, this transformation is detrimental. If the non-identical value in both compares is constant, the fmin or fmax will be constant-folded away, so the transformation is always a win. shader-db results: Skylake total instructions in shared programs: 14526147 -> 14525898 (<.01%) instructions in affected programs: 70239 -> 69990 (-0.35%) helped: 102 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 2.44 x̃: 1 helped stats (rel) min: 0.07% max: 2.30% x̄: 0.38% x̃: 0.20% 95% mean confidence interval for instructions value: -2.86 -2.02 95% mean confidence interval for instructions %-change: -0.46% -0.31% Instructions are helped. total cycles in shared programs: 533120531 -> 533119892 (<.01%) cycles in affected programs: 994875 -> 994236 (-0.06%) helped: 76 HURT: 26 helped stats (abs) min: 1 max: 324 x̄: 27.09 x̃: 13 helped stats (rel) min: <.01% max: 4.21% x̄: 0.45% x̃: 0.18% HURT stats (abs) min: 1 max: 167 x̄: 54.62 x̃: 26 HURT stats (rel) min: <.01% max: 4.36% x̄: 1.01% x̃: 0.39% 95% mean confidence interval for cycles value: -19.44 6.91 95% mean confidence interval for cycles %-change: -0.30% 0.15% Inconclusive result (value mean confidence interval includes 0). Broadwell total instructions in shared programs: 14816005 -> 14815787 (<.01%) instructions in affected programs: 64658 -> 64440 (-0.34%) helped: 97 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 2.25 x̃: 1 helped stats (rel) min: 0.07% max: 2.30% x̄: 0.38% x̃: 0.20% 95% mean confidence interval for instructions value: -2.62 -1.87 95% mean confidence interval for instructions %-change: -0.45% -0.30% Instructions are helped. total cycles in shared programs: 559340386 -> 559339907 (<.01%) cycles in affected programs: 1090491 -> 1090012 (-0.04%) helped: 66 HURT: 28 helped stats (abs) min: 2 max: 198 x̄: 23.83 x̃: 16 helped stats (rel) min: 0.01% max: 4.21% x̄: 0.47% x̃: 0.27% HURT stats (abs) min: 2 max: 226 x̄: 39.07 x̃: 11 HURT stats (rel) min: <.01% max: 4.61% x̄: 0.64% x̃: 0.20% 95% mean confidence interval for cycles value: -15.94 5.75 95% mean confidence interval for cycles %-change: -0.35% 0.07% Inconclusive result (value mean confidence interval includes 0). LOST: 0 GAINED: 1 Haswell total instructions in shared programs: 9034106 -> 9033948 (<.01%) instructions in affected programs: 24096 -> 23938 (-0.66%) helped: 38 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 4.16 x̃: 4 helped stats (rel) min: 0.42% max: 2.29% x̄: 0.71% x̃: 0.64% 95% mean confidence interval for instructions value: -4.71 -3.60 95% mean confidence interval for instructions %-change: -0.84% -0.58% Instructions are helped. total cycles in shared programs: 84631628 -> 84631402 (<.01%) cycles in affected programs: 148674 -> 148448 (-0.15%) helped: 14 HURT: 14 helped stats (abs) min: 1 max: 114 x̄: 22.14 x̃: 12 helped stats (rel) min: 0.02% max: 2.98% x̄: 0.66% x̃: 0.21% HURT stats (abs) min: 1 max: 10 x̄: 6.00 x̃: 5 HURT stats (rel) min: 0.01% max: 0.20% x̄: 0.12% x̃: 0.11% 95% mean confidence interval for cycles value: -19.42 3.28 95% mean confidence interval for cycles %-change: -0.59% 0.05% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 10015456 -> 10015293 (<.01%) instructions in affected programs: 27701 -> 27538 (-0.59%) helped: 38 HURT: 0 helped stats (abs) min: 1 max: 9 x̄: 4.29 x̃: 4 helped stats (rel) min: 0.33% max: 2.79% x̄: 0.66% x̃: 0.52% 95% mean confidence interval for instructions value: -4.87 -3.71 95% mean confidence interval for instructions %-change: -0.82% -0.51% Instructions are helped. total cycles in shared programs: 87524771 -> 87524569 (<.01%) cycles in affected programs: 112324 -> 112122 (-0.18%) helped: 6 HURT: 12 helped stats (abs) min: 2 max: 111 x̄: 44.67 x̃: 20 helped stats (rel) min: 0.02% max: 2.94% x̄: 1.45% x̃: 1.26% HURT stats (abs) min: 1 max: 16 x̄: 5.50 x̃: 5 HURT stats (rel) min: <.01% max: 0.16% x̄: 0.08% x̃: 0.08% 95% mean confidence interval for cycles value: -29.14 6.69 95% mean confidence interval for cycles %-change: -0.93% 0.08% Inconclusive result (value mean confidence interval includes 0). LOST: 0 GAINED: 2 Sandy Bridge total instructions in shared programs: 10545655 -> 10545465 (<.01%) instructions in affected programs: 37198 -> 37008 (-0.51%) helped: 42 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 4.52 x̃: 4 helped stats (rel) min: 0.31% max: 2.15% x̄: 0.58% x̃: 0.49% 95% mean confidence interval for instructions value: -5.14 -3.91 95% mean confidence interval for instructions %-change: -0.68% -0.47% Instructions are helped. total cycles in shared programs: 146113059 -> 146112427 (<.01%) cycles in affected programs: 423514 -> 422882 (-0.15%) helped: 32 HURT: 10 helped stats (abs) min: 4 max: 162 x̄: 24.34 x̃: 12 helped stats (rel) min: 0.06% max: 2.74% x̄: 0.37% x̃: 0.11% HURT stats (abs) min: 12 max: 19 x̄: 14.70 x̃: 14 HURT stats (rel) min: 0.10% max: 0.18% x̄: 0.16% x̃: 0.14% 95% mean confidence interval for cycles value: -26.03 -4.07 95% mean confidence interval for cycles %-change: -0.43% -0.05% Cycles are helped. Iron Lake total instructions in shared programs: 7886959 -> 7886925 (<.01%) instructions in affected programs: 1340 -> 1306 (-2.54%) helped: 4 HURT: 0 helped stats (abs) min: 2 max: 15 x̄: 8.50 x̃: 8 helped stats (rel) min: 0.63% max: 4.30% x̄: 2.45% x̃: 2.43% 95% mean confidence interval for instructions value: -20.44 3.44 95% mean confidence interval for instructions %-change: -5.78% 0.89% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 178116996 -> 178116888 (<.01%) cycles in affected programs: 6262 -> 6154 (-1.72%) helped: 2 HURT: 2 helped stats (abs) min: 44 max: 78 x̄: 61.00 x̃: 61 helped stats (rel) min: 3.31% max: 3.94% x̄: 3.62% x̃: 3.62% HURT stats (abs) min: 6 max: 8 x̄: 7.00 x̃: 7 HURT stats (rel) min: 0.34% max: 0.68% x̄: 0.51% x̃: 0.51% 95% mean confidence interval for cycles value: -93.27 39.27 95% mean confidence interval for cycles %-change: -5.38% 2.27% Inconclusive result (value mean confidence interval includes 0). GM45 total instructions in shared programs: 4857887 -> 4857870 (<.01%) instructions in affected programs: 674 -> 657 (-2.52%) helped: 2 HURT: 0 total cycles in shared programs: 122180816 -> 122180744 (<.01%) cycles in affected programs: 3764 -> 3692 (-1.91%) helped: 1 HURT: 1 helped stats (abs) min: 78 max: 78 x̄: 78.00 x̃: 78 helped stats (rel) min: 3.94% max: 3.94% x̄: 3.94% x̃: 3.94% HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6 HURT stats (rel) min: 0.34% max: 0.34% x̄: 0.34% x̃: 0.34% Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-04 21:30:49 +00:00
(('~ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
(('~ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
(('~ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
(('~ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
(('~ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmax', b, c))),
(('~ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmin', a, b), c)),
(('~ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmin', b, c))),
(('~ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmax', a, b), c)),
nir: Rearrange and-compounded float compares If both comparisons are used as sources for instructions other than the iand, this transformation is detrimental. If the non-identical value in both compares is constant, the fmin or fmax will be constant-folded away, so the transformation is always a win. It is interesting to me that on Iron Lake only 81 shaders have instruction counts changed, but 726 shaders have cycle counts changed. shader-db results: Skylake total instructions in shared programs: 14525728 -> 14521017 (-0.03%) instructions in affected programs: 1164726 -> 1160015 (-0.40%) helped: 1692 HURT: 5 helped stats (abs) min: 1 max: 637 x̄: 2.79 x̃: 2 helped stats (rel) min: 0.07% max: 16.36% x̄: 0.81% x̃: 0.33% HURT stats (abs) min: 1 max: 12 x̄: 3.20 x̃: 1 HURT stats (rel) min: 0.38% max: 2.86% x̄: 2.36% x̃: 2.86% 95% mean confidence interval for instructions value: -3.52 -2.03 95% mean confidence interval for instructions %-change: -0.86% -0.74% Instructions are helped. total cycles in shared programs: 533115449 -> 532991404 (-0.02%) cycles in affected programs: 119401803 -> 119277758 (-0.10%) helped: 1145 HURT: 467 helped stats (abs) min: 1 max: 34644 x̄: 145.92 x̃: 18 helped stats (rel) min: <.01% max: 45.33% x̄: 1.58% x̃: 0.42% HURT stats (abs) min: 1 max: 1590 x̄: 92.15 x̃: 15 HURT stats (rel) min: <.01% max: 13.48% x̄: 1.26% x̃: 0.39% 95% mean confidence interval for cycles value: -122.16 -31.74 95% mean confidence interval for cycles %-change: -0.94% -0.57% Cycles are helped. total spills in shared programs: 9597 -> 9534 (-0.66%) spills in affected programs: 403 -> 340 (-15.63%) helped: 1 HURT: 1 total fills in shared programs: 13904 -> 13790 (-0.82%) fills in affected programs: 1627 -> 1513 (-7.01%) helped: 2 HURT: 1 LOST: 0 GAINED: 2 Broadwell total instructions in shared programs: 14816966 -> 14812590 (-0.03%) instructions in affected programs: 1499885 -> 1495509 (-0.29%) helped: 1672 HURT: 15 helped stats (abs) min: 1 max: 455 x̄: 2.70 x̃: 2 helped stats (rel) min: 0.05% max: 16.36% x̄: 0.81% x̃: 0.33% HURT stats (abs) min: 1 max: 21 x̄: 9.20 x̃: 8 HURT stats (rel) min: 0.08% max: 2.86% x̄: 1.06% x̃: 0.53% 95% mean confidence interval for instructions value: -3.14 -2.05 95% mean confidence interval for instructions %-change: -0.85% -0.73% Instructions are helped. total cycles in shared programs: 559353622 -> 559345595 (<.01%) cycles in affected programs: 139893703 -> 139885676 (<.01%) helped: 921 HURT: 697 helped stats (abs) min: 1 max: 42424 x̄: 143.45 x̃: 18 helped stats (rel) min: <.01% max: 36.23% x̄: 2.02% x̃: 0.87% HURT stats (abs) min: 1 max: 2370 x̄: 178.03 x̃: 38 HURT stats (rel) min: <.01% max: 17.35% x̄: 0.71% x̃: 0.14% 95% mean confidence interval for cycles value: -59.64 49.72 95% mean confidence interval for cycles %-change: -1.02% -0.66% Inconclusive result (value mean confidence interval includes 0). total spills in shared programs: 78902 -> 78861 (-0.05%) spills in affected programs: 2418 -> 2377 (-1.70%) helped: 1 HURT: 11 total fills in shared programs: 83782 -> 83678 (-0.12%) fills in affected programs: 3515 -> 3411 (-2.96%) helped: 2 HURT: 11 LOST: 0 GAINED: 5 Haswell and Ivy Bridge had similar results. Haswell shown. total instructions in shared programs: 9033898 -> 9032010 (-0.02%) instructions in affected programs: 308064 -> 306176 (-0.61%) helped: 921 HURT: 4 helped stats (abs) min: 1 max: 20 x̄: 2.05 x̃: 1 helped stats (rel) min: 0.17% max: 17.54% x̄: 0.80% x̃: 0.35% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 3.23% max: 3.23% x̄: 3.23% x̃: 3.23% 95% mean confidence interval for instructions value: -2.21 -1.87 95% mean confidence interval for instructions %-change: -0.88% -0.68% Instructions are helped. total cycles in shared programs: 84628949 -> 84620520 (<.01%) cycles in affected programs: 2164913 -> 2156484 (-0.39%) helped: 518 HURT: 359 helped stats (abs) min: 1 max: 440 x̄: 41.52 x̃: 20 helped stats (rel) min: <.01% max: 17.17% x̄: 1.95% x̃: 1.01% HURT stats (abs) min: 1 max: 586 x̄: 36.43 x̃: 8 HURT stats (rel) min: 0.04% max: 18.65% x̄: 1.47% x̃: 0.40% 95% mean confidence interval for cycles value: -15.17 -4.05 95% mean confidence interval for cycles %-change: -0.77% -0.32% Cycles are helped. LOST: 0 GAINED: 4 Sandy Bridge total instructions in shared programs: 10544860 -> 10542933 (-0.02%) instructions in affected programs: 360019 -> 358092 (-0.54%) helped: 931 HURT: 4 helped stats (abs) min: 1 max: 20 x̄: 2.07 x̃: 1 helped stats (rel) min: 0.11% max: 15.52% x̄: 0.68% x̃: 0.30% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 3.33% max: 3.33% x̄: 3.33% x̃: 3.33% 95% mean confidence interval for instructions value: -2.23 -1.89 95% mean confidence interval for instructions %-change: -0.76% -0.58% Instructions are helped. total cycles in shared programs: 146106820 -> 146098397 (<.01%) cycles in affected programs: 3435047 -> 3426624 (-0.25%) helped: 572 HURT: 329 helped stats (abs) min: 1 max: 1289 x̄: 32.52 x̃: 15 helped stats (rel) min: <.01% max: 26.29% x̄: 0.97% x̃: 0.33% HURT stats (abs) min: 1 max: 1714 x̄: 30.93 x̃: 6 HURT stats (rel) min: 0.02% max: 41.31% x̄: 1.13% x̃: 0.19% 95% mean confidence interval for cycles value: -16.85 -1.85 95% mean confidence interval for cycles %-change: -0.39% -0.01% Cycles are helped. LOST: 1 GAINED: 0 Iron Lake total instructions in shared programs: 7886925 -> 7886711 (<.01%) instructions in affected programs: 25763 -> 25549 (-0.83%) helped: 75 HURT: 6 helped stats (abs) min: 1 max: 13 x̄: 3.33 x̃: 1 helped stats (rel) min: 0.35% max: 17.57% x̄: 1.96% x̃: 0.53% HURT stats (abs) min: 1 max: 16 x̄: 6.00 x̃: 1 HURT stats (rel) min: 2.86% max: 4.79% x̄: 3.49% x̃: 2.86% 95% mean confidence interval for instructions value: -3.69 -1.60 95% mean confidence interval for instructions %-change: -2.54% -0.57% Instructions are helped. total cycles in shared programs: 178116888 -> 178115324 (<.01%) cycles in affected programs: 5858790 -> 5857226 (-0.03%) helped: 484 HURT: 242 helped stats (abs) min: 2 max: 76 x̄: 5.27 x̃: 6 helped stats (rel) min: 0.01% max: 10.70% x̄: 0.18% x̃: 0.06% HURT stats (abs) min: 2 max: 76 x̄: 4.07 x̃: 2 HURT stats (rel) min: 0.01% max: 3.99% x̄: 0.19% x̃: 0.03% 95% mean confidence interval for cycles value: -2.76 -1.55 95% mean confidence interval for cycles %-change: -0.12% 0.01% Inconclusive result (%-change mean confidence interval includes 0). GM45 total instructions in shared programs: 4857870 -> 4857762 (<.01%) instructions in affected programs: 13994 -> 13886 (-0.77%) helped: 39 HURT: 5 helped stats (abs) min: 1 max: 13 x̄: 3.28 x̃: 2 helped stats (rel) min: 0.33% max: 17.11% x̄: 1.86% x̃: 0.48% HURT stats (abs) min: 1 max: 16 x̄: 4.00 x̃: 1 HURT stats (rel) min: 2.86% max: 4.71% x̄: 3.23% x̃: 2.86% 95% mean confidence interval for instructions value: -3.86 -1.05 95% mean confidence interval for instructions %-change: -2.61% 0.04% Inconclusive result (%-change mean confidence interval includes 0). total cycles in shared programs: 122180744 -> 122179674 (<.01%) cycles in affected programs: 3686646 -> 3685576 (-0.03%) helped: 273 HURT: 141 helped stats (abs) min: 2 max: 76 x̄: 5.81 x̃: 6 helped stats (rel) min: 0.01% max: 10.70% x̄: 0.18% x̃: 0.06% HURT stats (abs) min: 2 max: 76 x̄: 3.66 x̃: 2 HURT stats (rel) min: 0.01% max: 3.99% x̄: 0.16% x̃: 0.02% 95% mean confidence interval for cycles value: -3.42 -1.75 95% mean confidence interval for cycles %-change: -0.15% 0.03% Inconclusive result (%-change mean confidence interval includes 0). Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-04 23:21:30 +00:00
(('~iand', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmin', b, c))),
(('~iand', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmax', a, b), c)),
(('~iand', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmax', b, c))),
(('~iand', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmin', a, b), c)),
(('~iand', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmin', b, c))),
(('~iand', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmax', a, b), c)),
(('~iand', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmax', b, c))),
(('~iand', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmin', a, b), c)),
nir: Be more conservative about rearranging or-compounded compares If both comparisons are used as sources for instructions other than the ior, this transformation is detrimental. If the non-identical value in both compares is constant, the fmin or fmax will be constant-folded away, so the transformation is always a win. shader-db results: Skylake total instructions in shared programs: 14526147 -> 14525898 (<.01%) instructions in affected programs: 70239 -> 69990 (-0.35%) helped: 102 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 2.44 x̃: 1 helped stats (rel) min: 0.07% max: 2.30% x̄: 0.38% x̃: 0.20% 95% mean confidence interval for instructions value: -2.86 -2.02 95% mean confidence interval for instructions %-change: -0.46% -0.31% Instructions are helped. total cycles in shared programs: 533120531 -> 533119892 (<.01%) cycles in affected programs: 994875 -> 994236 (-0.06%) helped: 76 HURT: 26 helped stats (abs) min: 1 max: 324 x̄: 27.09 x̃: 13 helped stats (rel) min: <.01% max: 4.21% x̄: 0.45% x̃: 0.18% HURT stats (abs) min: 1 max: 167 x̄: 54.62 x̃: 26 HURT stats (rel) min: <.01% max: 4.36% x̄: 1.01% x̃: 0.39% 95% mean confidence interval for cycles value: -19.44 6.91 95% mean confidence interval for cycles %-change: -0.30% 0.15% Inconclusive result (value mean confidence interval includes 0). Broadwell total instructions in shared programs: 14816005 -> 14815787 (<.01%) instructions in affected programs: 64658 -> 64440 (-0.34%) helped: 97 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 2.25 x̃: 1 helped stats (rel) min: 0.07% max: 2.30% x̄: 0.38% x̃: 0.20% 95% mean confidence interval for instructions value: -2.62 -1.87 95% mean confidence interval for instructions %-change: -0.45% -0.30% Instructions are helped. total cycles in shared programs: 559340386 -> 559339907 (<.01%) cycles in affected programs: 1090491 -> 1090012 (-0.04%) helped: 66 HURT: 28 helped stats (abs) min: 2 max: 198 x̄: 23.83 x̃: 16 helped stats (rel) min: 0.01% max: 4.21% x̄: 0.47% x̃: 0.27% HURT stats (abs) min: 2 max: 226 x̄: 39.07 x̃: 11 HURT stats (rel) min: <.01% max: 4.61% x̄: 0.64% x̃: 0.20% 95% mean confidence interval for cycles value: -15.94 5.75 95% mean confidence interval for cycles %-change: -0.35% 0.07% Inconclusive result (value mean confidence interval includes 0). LOST: 0 GAINED: 1 Haswell total instructions in shared programs: 9034106 -> 9033948 (<.01%) instructions in affected programs: 24096 -> 23938 (-0.66%) helped: 38 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 4.16 x̃: 4 helped stats (rel) min: 0.42% max: 2.29% x̄: 0.71% x̃: 0.64% 95% mean confidence interval for instructions value: -4.71 -3.60 95% mean confidence interval for instructions %-change: -0.84% -0.58% Instructions are helped. total cycles in shared programs: 84631628 -> 84631402 (<.01%) cycles in affected programs: 148674 -> 148448 (-0.15%) helped: 14 HURT: 14 helped stats (abs) min: 1 max: 114 x̄: 22.14 x̃: 12 helped stats (rel) min: 0.02% max: 2.98% x̄: 0.66% x̃: 0.21% HURT stats (abs) min: 1 max: 10 x̄: 6.00 x̃: 5 HURT stats (rel) min: 0.01% max: 0.20% x̄: 0.12% x̃: 0.11% 95% mean confidence interval for cycles value: -19.42 3.28 95% mean confidence interval for cycles %-change: -0.59% 0.05% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 10015456 -> 10015293 (<.01%) instructions in affected programs: 27701 -> 27538 (-0.59%) helped: 38 HURT: 0 helped stats (abs) min: 1 max: 9 x̄: 4.29 x̃: 4 helped stats (rel) min: 0.33% max: 2.79% x̄: 0.66% x̃: 0.52% 95% mean confidence interval for instructions value: -4.87 -3.71 95% mean confidence interval for instructions %-change: -0.82% -0.51% Instructions are helped. total cycles in shared programs: 87524771 -> 87524569 (<.01%) cycles in affected programs: 112324 -> 112122 (-0.18%) helped: 6 HURT: 12 helped stats (abs) min: 2 max: 111 x̄: 44.67 x̃: 20 helped stats (rel) min: 0.02% max: 2.94% x̄: 1.45% x̃: 1.26% HURT stats (abs) min: 1 max: 16 x̄: 5.50 x̃: 5 HURT stats (rel) min: <.01% max: 0.16% x̄: 0.08% x̃: 0.08% 95% mean confidence interval for cycles value: -29.14 6.69 95% mean confidence interval for cycles %-change: -0.93% 0.08% Inconclusive result (value mean confidence interval includes 0). LOST: 0 GAINED: 2 Sandy Bridge total instructions in shared programs: 10545655 -> 10545465 (<.01%) instructions in affected programs: 37198 -> 37008 (-0.51%) helped: 42 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 4.52 x̃: 4 helped stats (rel) min: 0.31% max: 2.15% x̄: 0.58% x̃: 0.49% 95% mean confidence interval for instructions value: -5.14 -3.91 95% mean confidence interval for instructions %-change: -0.68% -0.47% Instructions are helped. total cycles in shared programs: 146113059 -> 146112427 (<.01%) cycles in affected programs: 423514 -> 422882 (-0.15%) helped: 32 HURT: 10 helped stats (abs) min: 4 max: 162 x̄: 24.34 x̃: 12 helped stats (rel) min: 0.06% max: 2.74% x̄: 0.37% x̃: 0.11% HURT stats (abs) min: 12 max: 19 x̄: 14.70 x̃: 14 HURT stats (rel) min: 0.10% max: 0.18% x̄: 0.16% x̃: 0.14% 95% mean confidence interval for cycles value: -26.03 -4.07 95% mean confidence interval for cycles %-change: -0.43% -0.05% Cycles are helped. Iron Lake total instructions in shared programs: 7886959 -> 7886925 (<.01%) instructions in affected programs: 1340 -> 1306 (-2.54%) helped: 4 HURT: 0 helped stats (abs) min: 2 max: 15 x̄: 8.50 x̃: 8 helped stats (rel) min: 0.63% max: 4.30% x̄: 2.45% x̃: 2.43% 95% mean confidence interval for instructions value: -20.44 3.44 95% mean confidence interval for instructions %-change: -5.78% 0.89% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 178116996 -> 178116888 (<.01%) cycles in affected programs: 6262 -> 6154 (-1.72%) helped: 2 HURT: 2 helped stats (abs) min: 44 max: 78 x̄: 61.00 x̃: 61 helped stats (rel) min: 3.31% max: 3.94% x̄: 3.62% x̃: 3.62% HURT stats (abs) min: 6 max: 8 x̄: 7.00 x̃: 7 HURT stats (rel) min: 0.34% max: 0.68% x̄: 0.51% x̃: 0.51% 95% mean confidence interval for cycles value: -93.27 39.27 95% mean confidence interval for cycles %-change: -5.38% 2.27% Inconclusive result (value mean confidence interval includes 0). GM45 total instructions in shared programs: 4857887 -> 4857870 (<.01%) instructions in affected programs: 674 -> 657 (-2.52%) helped: 2 HURT: 0 total cycles in shared programs: 122180816 -> 122180744 (<.01%) cycles in affected programs: 3764 -> 3692 (-1.91%) helped: 1 HURT: 1 helped stats (abs) min: 78 max: 78 x̄: 78.00 x̃: 78 helped stats (rel) min: 3.94% max: 3.94% x̄: 3.94% x̃: 3.94% HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6 HURT stats (rel) min: 0.34% max: 0.34% x̄: 0.34% x̃: 0.34% Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-04 21:30:49 +00:00
(('ior', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imax', b, c))),
(('ior', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imin', a, b), c)),
(('ior', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imin', b, c))),
(('ior', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imax', a, b), c)),
(('ior', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umax', b, c))),
(('ior', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umin', a, b), c)),
(('ior', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umin', b, c))),
(('ior', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umax', a, b), c)),
(('iand', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imin', b, c))),
(('iand', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imax', a, b), c)),
(('iand', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imax', b, c))),
(('iand', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imin', a, b), c)),
(('iand', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umin', b, c))),
(('iand', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umax', a, b), c)),
(('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))),
(('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)),
nir: Undo possible damage caused by rearranging or-compounded float compares shader-db results: Skylake and Broadwell had similar results (Skylake shown) total instructions in shared programs: 14525898 -> 14525836 (<.01%) instructions in affected programs: 1964 -> 1902 (-3.16%) helped: 14 HURT: 0 helped stats (abs) min: 1 max: 25 x̄: 4.43 x̃: 1 helped stats (rel) min: 0.68% max: 9.77% x̄: 2.10% x̃: 0.86% 95% mean confidence interval for instructions value: -9.46 0.60 95% mean confidence interval for instructions %-change: -3.97% -0.24% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 533119892 -> 533115756 (<.01%) cycles in affected programs: 96061 -> 91925 (-4.31%) helped: 13 HURT: 1 helped stats (abs) min: 60 max: 596 x̄: 318.77 x̃: 300 helped stats (rel) min: 1.15% max: 5.49% x̄: 4.27% x̃: 4.42% HURT stats (abs) min: 8 max: 8 x̄: 8.00 x̃: 8 HURT stats (rel) min: 0.46% max: 0.46% x̄: 0.46% x̃: 0.46% 95% mean confidence interval for cycles value: -379.43 -211.43 95% mean confidence interval for cycles %-change: -4.84% -3.01% Cycles are helped. Haswell, Ivy Bridge and Sandy Bridge had similar results (Haswell shown). total instructions in shared programs: 9033948 -> 9033898 (<.01%) instructions in affected programs: 535 -> 485 (-9.35%) helped: 2 HURT: 0 total cycles in shared programs: 84631402 -> 84628949 (<.01%) cycles in affected programs: 63197 -> 60744 (-3.88%) helped: 13 HURT: 2 helped stats (abs) min: 1 max: 594 x̄: 189.62 x̃: 140 helped stats (rel) min: 0.07% max: 5.04% x̄: 3.79% x̃: 4.01% HURT stats (abs) min: 4 max: 8 x̄: 6.00 x̃: 6 HURT stats (rel) min: 0.17% max: 0.45% x̄: 0.31% x̃: 0.31% 95% mean confidence interval for cycles value: -253.40 -73.67 95% mean confidence interval for cycles %-change: -4.24% -2.25% Cycles are helped. No changes on GM45 or Iron Lake. v2: Add a couple more tautological compares. Suggested by Elie. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-05 21:29:26 +00:00
# These patterns can result when (a < b || a < c) => (a < min(b, c))
# transformations occur before constant propagation and loop-unrolling.
(('~flt', a, ('fmax', b, a)), ('flt', a, b)),
(('~flt', ('fmin', a, b), a), ('flt', b, a)),
(('~fge', a, ('fmin', b, a)), True),
(('~fge', ('fmax', a, b), a), True),
(('~flt', a, ('fmin', b, a)), False),
(('~flt', ('fmax', a, b), a), False),
(('ilt', a, ('imax', b, a)), ('ilt', a, b)),
(('ilt', ('imin', a, b), a), ('ilt', b, a)),
(('ige', a, ('imin', b, a)), True),
(('ige', ('imax', a, b), a), True),
(('ult', a, ('umax', b, a)), ('ult', a, b)),
(('ult', ('umin', a, b), a), ('ult', b, a)),
(('uge', a, ('umin', b, a)), True),
(('uge', ('umax', a, b), a), True),
(('ilt', '#a', ('imin', '#b', c)), ('iand', ('ilt', a, b), ('ilt', a, c))),
(('ilt', ('imax', '#a', b), '#c'), ('iand', ('ilt', a, c), ('ilt', b, c))),
(('ige', '#a', ('imax', '#b', c)), ('iand', ('ige', a, b), ('ige', a, c))),
(('ige', ('imin', '#a', b), '#c'), ('iand', ('ige', a, c), ('ige', b, c))),
(('ult', '#a', ('umin', '#b', c)), ('iand', ('ult', a, b), ('ult', a, c))),
(('ult', ('umax', '#a', b), '#c'), ('iand', ('ult', a, c), ('ult', b, c))),
(('uge', '#a', ('umax', '#b', c)), ('iand', ('uge', a, b), ('uge', a, c))),
(('uge', ('umin', '#a', b), '#c'), ('iand', ('uge', a, c), ('uge', b, c))),
(('fabs', ('slt', a, b)), ('slt', a, b)),
(('fabs', ('sge', a, b)), ('sge', a, b)),
(('fabs', ('seq', a, b)), ('seq', a, b)),
(('fabs', ('sne', a, b)), ('sne', a, b)),
(('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
(('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
(('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
(('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
(('fne', ('fneg', a), a), ('fne', a, 0.0)),
(('feq', ('fneg', a), a), ('feq', a, 0.0)),
# Emulating booleans
(('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
(('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
(('iand', 'a@bool', 1.0), ('b2f', a)),
# True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True).
(('ineg', ('b2i@32', a)), a),
(('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
(('ilt', a, a), False),
(('ige', a, a), True),
(('ieq', a, a), True),
(('ine', a, a), False),
(('ult', a, a), False),
(('uge', a, a), True),
# Logical and bit operations
(('fand', a, 0.0), 0.0),
(('iand', a, a), a),
(('iand', a, ~0), a),
(('iand', a, 0), 0),
(('ior', a, a), a),
(('ior', a, 0), a),
(('ior', a, True), True),
(('fxor', a, a), 0.0),
(('ixor', a, a), 0),
(('ixor', a, 0), a),
(('inot', ('inot', a)), a),
# DeMorgan's Laws
(('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
(('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
# Shift optimizations
(('ishl', 0, a), 0),
(('ishl', a, 0), a),
(('ishr', 0, a), 0),
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
(('ushr', a, 0), a),
(('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
(('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
# Exponential/logarithmic identities
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
(('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
(('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
(('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
(('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))),
(('~fpow', a, 1.0), a),
(('~fpow', a, 2.0), ('fmul', a, a)),
(('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
(('~fpow', 2.0, a), ('fexp2', a)),
(('~fpow', ('fpow', a, 2.2), 0.454545), a),
(('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
(('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
(('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
(('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
(('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
(('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
(('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
(('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
(('~fmul', ('fexp2(is_used_once)', a), ('fexp2(is_used_once)', b)), ('fexp2', ('fadd', a, b))),
# Division and reciprocal
(('~fdiv', 1.0, a), ('frcp', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
(('~frcp', ('frcp', a)), a),
(('~frcp', ('fsqrt', a)), ('frsq', a)),
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
(('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
# Boolean simplifications
(('ieq', 'a@bool', True), a),
(('ine(is_not_used_by_if)', 'a@bool', True), ('inot', a)),
(('ine', 'a@bool', False), a),
(('ieq(is_not_used_by_if)', 'a@bool', False), ('inot', 'a')),
(('bcsel', a, True, False), a),
(('bcsel', a, False, True), ('inot', a)),
(('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
(('bcsel@32', a, 0.0, 1.0), ('b2f', ('inot', a))),
(('bcsel@32', a, -1.0, -0.0), ('fneg', ('b2f', a))),
(('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
(('bcsel', True, b, c), b),
(('bcsel', False, b, c), c),
(('bcsel', a, ('b2f(is_used_once)', b), ('b2f', c)), ('b2f', ('bcsel', a, b, c))),
# The result of this should be hit by constant propagation and, in the
# next round of opt_algebraic, get picked up by one of the above two.
(('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
(('bcsel', a, b, b), b),
(('fcsel', a, b, b), b),
# Conversions
(('i2b', ('b2i', a)), a),
(('f2i32', ('ftrunc', a)), ('f2i32', a)),
(('f2u32', ('ftrunc', a)), ('f2u32', a)),
nir: Simplify i2b with negated or abs operand This enables removing ssa_201 and ssa_202 in sequences like: vec1 ssa_200 = flt ssa_199, ssa_194 vec1 ssa_201 = b2i ssa_200 vec1 ssa_202 = i2b -ssa_201 shader-db results: Sandy Bridge total instructions in shared programs: 8462257 -> 8462180 (-0.00%) instructions in affected programs: 3846 -> 3769 (-2.00%) helped: 35 HURT: 0 total cycles in shared programs: 117542934 -> 117542462 (-0.00%) cycles in affected programs: 20072 -> 19600 (-2.35%) helped: 20 HURT: 1 Ivy Bridge total instructions in shared programs: 7775252 -> 7775137 (-0.00%) instructions in affected programs: 3645 -> 3530 (-3.16%) helped: 35 HURT: 0 total cycles in shared programs: 65760522 -> 65760068 (-0.00%) cycles in affected programs: 21082 -> 20628 (-2.15%) helped: 25 HURT: 2 Haswell total instructions in shared programs: 7108666 -> 7108589 (-0.00%) instructions in affected programs: 3253 -> 3176 (-2.37%) helped: 35 HURT: 0 total cycles in shared programs: 64675726 -> 64675272 (-0.00%) cycles in affected programs: 21034 -> 20580 (-2.16%) helped: 26 HURT: 1 Broadwell / Skylake total instructions in shared programs: 8980912 -> 8980835 (-0.00%) instructions in affected programs: 3223 -> 3146 (-2.39%) helped: 35 HURT: 0 total cycles in shared programs: 70077926 -> 70077904 (-0.00%) cycles in affected programs: 21886 -> 21864 (-0.10%) helped: 21 HURT: 6 G45 and Ironlake showed no change. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
2016-03-02 02:59:57 +00:00
(('i2b', ('ineg', a)), ('i2b', a)),
(('i2b', ('iabs', a)), ('i2b', a)),
nir: Don't abs the result of b2f or b2i In the results below, 2 SIMD16 shaders in Trine are lost. G4X total instructions in shared programs: 4012279 -> 4011108 (-0.03%) instructions in affected programs: 116776 -> 115605 (-1.00%) helped: 339 HURT: 0 total cycles in shared programs: 84315862 -> 84313584 (-0.00%) cycles in affected programs: 1767232 -> 1764954 (-0.13%) helped: 274 HURT: 81 Ironlake total instructions in shared programs: 6399073 -> 6396998 (-0.03%) instructions in affected programs: 218050 -> 215975 (-0.95%) helped: 600 HURT: 0 total cycles in shared programs: 128892088 -> 128888810 (-0.00%) cycles in affected programs: 2867452 -> 2864174 (-0.11%) helped: 422 HURT: 137 Sandy Bridge total instructions in shared programs: 8462174 -> 8460759 (-0.02%) instructions in affected programs: 178529 -> 177114 (-0.79%) helped: 596 HURT: 0 total cycles in shared programs: 117542276 -> 117534098 (-0.01%) cycles in affected programs: 1239166 -> 1230988 (-0.66%) helped: 369 HURT: 150 Ivy Bridge total instructions in shared programs: 7775131 -> 7773410 (-0.02%) instructions in affected programs: 162903 -> 161182 (-1.06%) helped: 590 HURT: 0 total cycles in shared programs: 65759882 -> 65747268 (-0.02%) cycles in affected programs: 1004354 -> 991740 (-1.26%) helped: 467 HURT: 141 Haswell total instructions in shared programs: 7107786 -> 7106327 (-0.02%) instructions in affected programs: 140954 -> 139495 (-1.04%) helped: 590 HURT: 0 total cycles in shared programs: 64668028 -> 64655322 (-0.02%) cycles in affected programs: 967080 -> 954374 (-1.31%) helped: 452 HURT: 149 LOST: 2 GAINED: 0 Broadwell total instructions in shared programs: 8980029 -> 8978287 (-0.02%) instructions in affected programs: 197232 -> 195490 (-0.88%) helped: 715 HURT: 0 total cycles in shared programs: 70070448 -> 70055970 (-0.02%) cycles in affected programs: 975724 -> 961246 (-1.48%) helped: 471 HURT: 111 LOST: 2 GAINED: 0 Skylake total instructions in shared programs: 9115178 -> 9113436 (-0.02%) instructions in affected programs: 203012 -> 201270 (-0.86%) helped: 715 HURT: 0 total cycles in shared programs: 68848660 -> 68834004 (-0.02%) cycles in affected programs: 993888 -> 979232 (-1.47%) helped: 473 HURT: 116 LOST: 2 GAINED: 0 Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
2016-03-02 21:46:50 +00:00
(('fabs', ('b2f', a)), ('b2f', a)),
(('iabs', ('b2i', a)), ('b2i', a)),
# Packing and then unpacking does nothing
(('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
(('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
(('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('unpack_64_2x32_split_y', a)), a),
# Byte extraction
(('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
(('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
(('iand', 0xff, ('ushr', a, 8)), ('extract_u8', a, 1), '!options->lower_extract_byte'),
(('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
# Word extraction
(('ushr', a, 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
(('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
# Subtracts
(('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
(('ussub_4x8', a, 0), a),
(('ussub_4x8', a, ~0), 0),
(('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
(('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
# Propagate negation up multiplication chains
(('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
2016-03-31 03:18:16 +01:00
(('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
nir: shuffle constants to the top V2: mark float opts as inexact If one of the inputs to an mul/add is the result of another mul/add there is a chance that we can reuse the result of that mul/add in other calls if we do the multiplication in the right order. Also by attempting to move all constants to the top we increase the chance of constant folding. For example it is a fairly common pattern for shaders to do something similar to this: const float a = 0.5; in vec4 b; in float c; ... b.x = b.x * c; b.y = b.y * c; ... b.x = b.x * a + a; b.y = b.y * a + a; So by simply detecting that constant a is part of the multiplication in ffma and switching it with previous fmul that updates b we end up with: ... c = a * c; ... b.x = b.x * c + a; b.y = b.y * c + a; Shader-db results BDW: total instructions in shared programs: 13011050 -> 12967888 (-0.33%) instructions in affected programs: 4118366 -> 4075204 (-1.05%) helped: 17739 HURT: 1343 total cycles in shared programs: 246717952 -> 246410716 (-0.12%) cycles in affected programs: 166870802 -> 166563566 (-0.18%) helped: 18493 HURT: 7965 total spills in shared programs: 14937 -> 14560 (-2.52%) spills in affected programs: 9331 -> 8954 (-4.04%) helped: 284 HURT: 33 total fills in shared programs: 20211 -> 19671 (-2.67%) fills in affected programs: 12586 -> 12046 (-4.29%) helped: 286 HURT: 33 LOST: 39 GAINED: 33 Some of the hurt will go away when we shuffle things back down to the bottom in the following patch. It's also noteworthy that almost all of the spill changes are in Deus Ex both hurt and helped. Reviewed-by: Elie Tournier <elie.tournier@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-12 02:10:55 +00:00
# Propagate constants up multiplication chains
(('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
(('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
(('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
# Reassociate constants in add/mul chains so they can be folded together.
nir: shuffle constants to the top V2: mark float opts as inexact If one of the inputs to an mul/add is the result of another mul/add there is a chance that we can reuse the result of that mul/add in other calls if we do the multiplication in the right order. Also by attempting to move all constants to the top we increase the chance of constant folding. For example it is a fairly common pattern for shaders to do something similar to this: const float a = 0.5; in vec4 b; in float c; ... b.x = b.x * c; b.y = b.y * c; ... b.x = b.x * a + a; b.y = b.y * a + a; So by simply detecting that constant a is part of the multiplication in ffma and switching it with previous fmul that updates b we end up with: ... c = a * c; ... b.x = b.x * c + a; b.y = b.y * c + a; Shader-db results BDW: total instructions in shared programs: 13011050 -> 12967888 (-0.33%) instructions in affected programs: 4118366 -> 4075204 (-1.05%) helped: 17739 HURT: 1343 total cycles in shared programs: 246717952 -> 246410716 (-0.12%) cycles in affected programs: 166870802 -> 166563566 (-0.18%) helped: 18493 HURT: 7965 total spills in shared programs: 14937 -> 14560 (-2.52%) spills in affected programs: 9331 -> 8954 (-4.04%) helped: 284 HURT: 33 total fills in shared programs: 20211 -> 19671 (-2.67%) fills in affected programs: 12586 -> 12046 (-4.29%) helped: 286 HURT: 33 LOST: 39 GAINED: 33 Some of the hurt will go away when we shuffle things back down to the bottom in the following patch. It's also noteworthy that almost all of the spill changes are in Deus Ex both hurt and helped. Reviewed-by: Elie Tournier <elie.tournier@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-12 02:10:55 +00:00
# For now, we mostly only handle cases where the constants are separated by
# a single non-constant. We could do better eventually.
(('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
(('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
nir: See through an fneg to apply existing optimizations Doing the same for the existing feq and fne transformations didn't help anything in shader-db. shader-db results: Broadwell and Skylake (Skylake shown) total instructions in shared programs: 14529463 -> 14526147 (-0.02%) instructions in affected programs: 402420 -> 399104 (-0.82%) helped: 2136 HURT: 131 helped stats (abs) min: 1 max: 10 x̄: 1.61 x̃: 1 helped stats (rel) min: 0.03% max: 16.22% x̄: 3.14% x̃: 1.12% HURT stats (abs) min: 1 max: 2 x̄: 1.01 x̃: 1 HURT stats (rel) min: 0.13% max: 7.69% x̄: 0.75% x̃: 0.57% 95% mean confidence interval for instructions value: -1.51 -1.41 95% mean confidence interval for instructions %-change: -3.06% -2.78% Instructions are helped. total cycles in shared programs: 533146915 -> 533120531 (<.01%) cycles in affected programs: 10356261 -> 10329877 (-0.25%) helped: 1933 HURT: 844 helped stats (abs) min: 1 max: 490 x̄: 29.44 x̃: 16 helped stats (rel) min: <.01% max: 28.57% x̄: 3.43% x̃: 1.88% HURT stats (abs) min: 1 max: 423 x̄: 36.17 x̃: 12 HURT stats (rel) min: <.01% max: 23.75% x̄: 1.90% x̃: 0.59% 95% mean confidence interval for cycles value: -11.78 -7.22 95% mean confidence interval for cycles %-change: -1.98% -1.65% Cycles are helped. Haswell total instructions in shared programs: 9037416 -> 9034106 (-0.04%) instructions in affected programs: 389831 -> 386521 (-0.85%) helped: 2184 HURT: 120 helped stats (abs) min: 1 max: 11 x̄: 1.57 x̃: 1 helped stats (rel) min: 0.03% max: 25.00% x̄: 2.73% x̃: 1.02% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.19% max: 7.69% x̄: 0.81% x̃: 0.57% 95% mean confidence interval for instructions value: -1.49 -1.39 95% mean confidence interval for instructions %-change: -2.68% -2.41% Instructions are helped. total cycles in shared programs: 84636243 -> 84631628 (<.01%) cycles in affected programs: 4745058 -> 4740443 (-0.10%) helped: 1904 HURT: 960 helped stats (abs) min: 1 max: 466 x̄: 30.21 x̃: 18 helped stats (rel) min: 0.02% max: 36.36% x̄: 3.57% x̃: 2.38% HURT stats (abs) min: 1 max: 1080 x̄: 55.11 x̃: 14 HURT stats (rel) min: 0.02% max: 51.33% x̄: 2.77% x̃: 0.81% 95% mean confidence interval for cycles value: -4.51 1.29 95% mean confidence interval for cycles %-change: -1.64% -1.25% Inconclusive result (value mean confidence interval includes 0). LOST: 1 GAINED: 0 Sandy Bridge and Ivy Bridge (Ivy Bridge shown) total instructions in shared programs: 10018873 -> 10015456 (-0.03%) instructions in affected programs: 512820 -> 509403 (-0.67%) helped: 2268 HURT: 162 helped stats (abs) min: 1 max: 11 x̄: 1.62 x̃: 1 helped stats (rel) min: 0.03% max: 25.00% x̄: 2.47% x̃: 0.88% HURT stats (abs) min: 1 max: 4 x̄: 1.59 x̃: 1 HURT stats (rel) min: 0.09% max: 7.69% x̄: 0.86% x̃: 0.50% 95% mean confidence interval for instructions value: -1.46 -1.35 95% mean confidence interval for instructions %-change: -2.38% -2.12% Instructions are helped. total cycles in shared programs: 87538223 -> 87524771 (-0.02%) cycles in affected programs: 5435520 -> 5422068 (-0.25%) helped: 1916 HURT: 946 helped stats (abs) min: 1 max: 1392 x̄: 29.44 x̃: 18 helped stats (rel) min: <.01% max: 34.51% x̄: 3.34% x̃: 1.97% HURT stats (abs) min: 1 max: 633 x̄: 45.41 x̃: 11 HURT stats (rel) min: 0.02% max: 25.95% x̄: 2.41% x̃: 0.62% 95% mean confidence interval for cycles value: -7.34 -2.06 95% mean confidence interval for cycles %-change: -1.62% -1.26% Cycles are helped. LOST: 1 GAINED: 0 Iron Lake total instructions in shared programs: 7888446 -> 7886959 (-0.02%) instructions in affected programs: 331581 -> 330094 (-0.45%) helped: 1160 HURT: 97 helped stats (abs) min: 1 max: 10 x̄: 1.37 x̃: 1 helped stats (rel) min: 0.02% max: 9.68% x̄: 0.93% x̃: 0.43% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.17% max: 4.17% x̄: 0.37% x̃: 0.25% 95% mean confidence interval for instructions value: -1.25 -1.12 95% mean confidence interval for instructions %-change: -0.91% -0.75% Instructions are helped. total cycles in shared programs: 178130766 -> 178116996 (<.01%) cycles in affected programs: 12534564 -> 12520794 (-0.11%) helped: 1856 HURT: 187 helped stats (abs) min: 2 max: 202 x̄: 7.78 x̃: 4 helped stats (rel) min: <.01% max: 6.47% x̄: 0.28% x̃: 0.11% HURT stats (abs) min: 2 max: 26 x̄: 3.55 x̃: 2 HURT stats (rel) min: 0.01% max: 2.14% x̄: 0.08% x̃: 0.02% 95% mean confidence interval for cycles value: -7.41 -6.07 95% mean confidence interval for cycles %-change: -0.28% -0.22% Cycles are helped. GM45 total instructions in shared programs: 4858912 -> 4857887 (-0.02%) instructions in affected programs: 237565 -> 236540 (-0.43%) helped: 867 HURT: 57 helped stats (abs) min: 1 max: 10 x̄: 1.25 x̃: 1 helped stats (rel) min: 0.02% max: 9.38% x̄: 0.87% x̃: 0.43% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.16% max: 3.85% x̄: 0.34% x̃: 0.22% 95% mean confidence interval for instructions value: -1.18 -1.04 95% mean confidence interval for instructions %-change: -0.88% -0.71% Instructions are helped. total cycles in shared programs: 122189118 -> 122180816 (<.01%) cycles in affected programs: 8776418 -> 8768116 (-0.09%) helped: 1213 HURT: 166 helped stats (abs) min: 2 max: 202 x̄: 7.30 x̃: 4 helped stats (rel) min: <.01% max: 6.43% x̄: 0.25% x̃: 0.11% HURT stats (abs) min: 2 max: 26 x̄: 3.35 x̃: 2 HURT stats (rel) min: 0.01% max: 2.14% x̄: 0.06% x̃: 0.02% 95% mean confidence interval for cycles value: -6.78 -5.26 95% mean confidence interval for cycles %-change: -0.24% -0.18% Cycles are helped. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-09 23:32:47 +00:00
(('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
(('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
(('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
# By definition...
(('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', a)),
(('bcsel', ('ige', ('ifind_msb', a), 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
(('bcsel', ('ige', ('ufind_msb', a), 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
(('bcsel', ('ine', a, 0), ('find_lsb', a), -1), ('find_lsb', a)),
(('bcsel', ('ine', a, 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
(('bcsel', ('ine', a, 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
(('bcsel', ('ine', a, -1), ('ifind_msb', a), -1), ('ifind_msb', a)),
# Misc. lowering
(('fmod@32', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod32'),
(('fmod@64', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod64'),
(('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod32'),
(('uadd_carry@32', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
(('usub_borrow@32', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
('bcsel', ('ilt', 31, 'bits'), 'insert',
('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
'options->lower_bitfield_insert'),
(('ibitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ilt', 31, 'bits'), 'value',
('ibfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
(('ubitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'value',
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
nir: Shift count for shift opcodes is always 32-bits Previously both sources were unsized. This caused problems when the thing being shifted was 64-bit but the shift count was 32-bit. The expectation in NIR is that all unsized sources (and destination) will ultimately have the same size. The changes in nir_opt_algebraic.py are to prevent errors like: Failed to parse transformation: 03:12:25 (('extract_i8', 'a', 'b'), ('ishr', ('ishl', 'a', ('imul', ('isub', 3, 'b'), 8)), 24), 'options->lower_extract_byte') 03:12:25 Traceback (most recent call last): 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 610, in __init__ 03:12:25 xform = SearchAndReplace(xform) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 495, in __init__ 03:12:25 BitSizeValidator(varset).validate(self.search, self.replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 311, in validate 03:12:25 validate_dst_class = self._validate_bit_class_up(replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 414, in _validate_bit_class_up 03:12:25 src_class = self._validate_bit_class_up(val.sources[i]) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 420, in _validate_bit_class_up 03:12:25 assert src_class == src_type_bits 03:12:25 AssertionError Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Cc: Jason Ekstrand <jason@jlekstrand.net>
2016-10-27 11:02:58 +01:00
(('extract_i8', a, 'b@32'),
('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
'options->lower_extract_byte'),
nir: Shift count for shift opcodes is always 32-bits Previously both sources were unsized. This caused problems when the thing being shifted was 64-bit but the shift count was 32-bit. The expectation in NIR is that all unsized sources (and destination) will ultimately have the same size. The changes in nir_opt_algebraic.py are to prevent errors like: Failed to parse transformation: 03:12:25 (('extract_i8', 'a', 'b'), ('ishr', ('ishl', 'a', ('imul', ('isub', 3, 'b'), 8)), 24), 'options->lower_extract_byte') 03:12:25 Traceback (most recent call last): 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 610, in __init__ 03:12:25 xform = SearchAndReplace(xform) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 495, in __init__ 03:12:25 BitSizeValidator(varset).validate(self.search, self.replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 311, in validate 03:12:25 validate_dst_class = self._validate_bit_class_up(replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 414, in _validate_bit_class_up 03:12:25 src_class = self._validate_bit_class_up(val.sources[i]) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 420, in _validate_bit_class_up 03:12:25 assert src_class == src_type_bits 03:12:25 AssertionError Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Cc: Jason Ekstrand <jason@jlekstrand.net>
2016-10-27 11:02:58 +01:00
(('extract_u8', a, 'b@32'),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
nir: Shift count for shift opcodes is always 32-bits Previously both sources were unsized. This caused problems when the thing being shifted was 64-bit but the shift count was 32-bit. The expectation in NIR is that all unsized sources (and destination) will ultimately have the same size. The changes in nir_opt_algebraic.py are to prevent errors like: Failed to parse transformation: 03:12:25 (('extract_i8', 'a', 'b'), ('ishr', ('ishl', 'a', ('imul', ('isub', 3, 'b'), 8)), 24), 'options->lower_extract_byte') 03:12:25 Traceback (most recent call last): 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 610, in __init__ 03:12:25 xform = SearchAndReplace(xform) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 495, in __init__ 03:12:25 BitSizeValidator(varset).validate(self.search, self.replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 311, in validate 03:12:25 validate_dst_class = self._validate_bit_class_up(replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 414, in _validate_bit_class_up 03:12:25 src_class = self._validate_bit_class_up(val.sources[i]) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 420, in _validate_bit_class_up 03:12:25 assert src_class == src_type_bits 03:12:25 AssertionError Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Cc: Jason Ekstrand <jason@jlekstrand.net>
2016-10-27 11:02:58 +01:00
(('extract_i16', a, 'b@32'),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
nir: Shift count for shift opcodes is always 32-bits Previously both sources were unsized. This caused problems when the thing being shifted was 64-bit but the shift count was 32-bit. The expectation in NIR is that all unsized sources (and destination) will ultimately have the same size. The changes in nir_opt_algebraic.py are to prevent errors like: Failed to parse transformation: 03:12:25 (('extract_i8', 'a', 'b'), ('ishr', ('ishl', 'a', ('imul', ('isub', 3, 'b'), 8)), 24), 'options->lower_extract_byte') 03:12:25 Traceback (most recent call last): 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 610, in __init__ 03:12:25 xform = SearchAndReplace(xform) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 495, in __init__ 03:12:25 BitSizeValidator(varset).validate(self.search, self.replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 311, in validate 03:12:25 validate_dst_class = self._validate_bit_class_up(replace) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 414, in _validate_bit_class_up 03:12:25 src_class = self._validate_bit_class_up(val.sources[i]) 03:12:25 File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 420, in _validate_bit_class_up 03:12:25 assert src_class == src_type_bits 03:12:25 AssertionError Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Suggested-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Cc: Jason Ekstrand <jason@jlekstrand.net>
2016-10-27 11:02:58 +01:00
(('extract_u16', a, 'b@32'),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
(('pack_unorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
'options->lower_pack_unorm_2x16'),
(('pack_unorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
'options->lower_pack_unorm_4x8'),
(('pack_snorm_2x16', 'v'),
('pack_uvec2_to_uint',
('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
'options->lower_pack_snorm_2x16'),
(('pack_snorm_4x8', 'v'),
('pack_uvec4_to_uint',
('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),
('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0),
('extract_u8', 'v', 1),
('extract_u8', 'v', 2),
('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec2', ('extract_i16', 'v', 0),
('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec4', ('extract_i8', 'v', 0),
('extract_i8', 'v', 1),
('extract_i8', 'v', 2),
('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]
def fexp2i(exp, bits):
# We assume that exp is already in the right range.
if bits == 32:
return ('ishl', ('iadd', exp, 127), 23)
elif bits == 64:
return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
else:
assert False
def ldexp(f, exp, bits):
# First, we clamp exp to a reasonable range. The maximum possible range
# for a normal exponent is [-126, 127] and, throwing in denormals, you get
# a maximum range of [-149, 127]. This means that we can potentially have
# a swing of +-276. If you start with FLT_MAX, you actually have to do
# ldexp(FLT_MAX, -278) to get it to flush all the way to zero. The GLSL
# spec, on the other hand, only requires that we handle an exponent value
# in the range [-126, 128]. This implementation is *mostly* correct; it
# handles a range on exp of [-252, 254] which allows you to create any
# value (including denorms if the hardware supports it) and to adjust the
# exponent of any normal value to anything you want.
if bits == 32:
exp = ('imin', ('imax', exp, -252), 254)
elif bits == 64:
exp = ('imin', ('imax', exp, -2044), 2046)
else:
assert False
# Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
# (We use ishr which isn't the same for -1, but the -1 case still works
# since we use exp-exp/2 as the second exponent.) While the spec
# technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
# work with denormals and doesn't allow for the full swing in exponents
# that you can get with normalized values. Instead, we create two powers
# of two and multiply by them each in turn. That way the effective range
# of our exponent is doubled.
pow2_1 = fexp2i(('ishr', exp, 1), bits)
pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
return ('fmul', ('fmul', f, pow2_1), pow2_2)
optimizations += [
(('ldexp@32', 'x', 'exp'), ldexp('x', 'exp', 32), 'options->lower_ldexp'),
(('ldexp@64', 'x', 'exp'), ldexp('x', 'exp', 64), 'options->lower_ldexp'),
]
# Unreal Engine 4 demo applications open-codes bitfieldReverse()
def bitfield_reverse(u):
step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
return step5
optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))]
# For any float comparison operation, "cmp", if you have "a == a && a cmp b"
# then the "a == a" is redundant because it's equivalent to "a is not NaN"
# and, if a is a NaN then the second comparison will fail anyway.
for op in ['flt', 'fge', 'feq']:
optimizations += [
(('iand', ('feq', a, a), (op, a, b)), (op, a, b)),
(('iand', ('feq', a, a), (op, b, a)), (op, b, a)),
]
# Add optimizations to handle the case where the result of a ternary is
# compared to a constant. This way we can take things like
#
# (a ? 0 : 1) > 0
#
# and turn it into
#
# a ? (0 > 0) : (1 > 0)
#
# which constant folding will eat for lunch. The resulting ternary will
# further get cleaned up by the boolean reductions above and we will be
# left with just the original variable "a".
for op in ['flt', 'fge', 'feq', 'fne',
'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
optimizations += [
((op, ('bcsel', 'a', '#b', '#c'), '#d'),
('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
((op, '#d', ('bcsel', a, '#b', '#c')),
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
nir: Distribute binary operations with constants into bcsel This was specifically designed to simplify 1+mix(0, a-1, condition) to mix(1, a, condition) by pushing the 1+ inside. Skylake, Broadwell, and Haswell had similar results. Skylake shown. total instructions in shared programs: 14521753 -> 14521716 (<.01%) instructions in affected programs: 10619 -> 10582 (-0.35%) helped: 51 HURT: 14 helped stats (abs) min: 1 max: 12 x̄: 1.43 x̃: 1 helped stats (rel) min: 0.20% max: 3.58% x̄: 1.01% x̃: 0.95% HURT stats (abs) min: 1 max: 11 x̄: 2.57 x̃: 1 HURT stats (rel) min: 0.22% max: 1.75% x̄: 1.20% x̃: 1.32% 95% mean confidence interval for instructions value: -1.31 0.17 95% mean confidence interval for instructions %-change: -0.80% -0.27% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 533000205 -> 533003533 (<.01%) cycles in affected programs: 110610 -> 113938 (3.01%) helped: 43 HURT: 28 helped stats (abs) min: 6 max: 440 x̄: 27.12 x̃: 16 helped stats (rel) min: 0.39% max: 4.84% x̄: 1.60% x̃: 1.67% HURT stats (abs) min: 2 max: 3066 x̄: 160.50 x̃: 14 HURT stats (rel) min: 0.08% max: 77.78% x̄: 5.16% x̃: 0.62% 95% mean confidence interval for cycles value: -43.81 137.56 95% mean confidence interval for cycles %-change: -1.47% 3.60% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 10018840 -> 10018713 (<.01%) instructions in affected programs: 9431 -> 9304 (-1.35%) helped: 51 HURT: 3 helped stats (abs) min: 1 max: 80 x̄: 2.76 x̃: 1 helped stats (rel) min: 0.20% max: 16.43% x̄: 1.16% x̃: 0.81% HURT stats (abs) min: 1 max: 12 x̄: 4.67 x̃: 1 HURT stats (rel) min: 0.22% max: 1.33% x̄: 0.59% x̃: 0.22% 95% mean confidence interval for instructions value: -5.36 0.66 95% mean confidence interval for instructions %-change: -1.66% -0.46% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 87571944 -> 87572785 (<.01%) cycles in affected programs: 117234 -> 118075 (0.72%) helped: 42 HURT: 23 helped stats (abs) min: 2 max: 114 x̄: 51.90 x̃: 30 helped stats (rel) min: 0.11% max: 11.01% x̄: 4.45% x̃: 2.74% HURT stats (abs) min: 1 max: 2341 x̄: 131.35 x̃: 10 HURT stats (rel) min: 0.06% max: 37.11% x̄: 2.75% x̃: 0.61% 95% mean confidence interval for cycles value: -61.05 86.93 95% mean confidence interval for cycles %-change: -3.47% -0.33% Inconclusive result (value mean confidence interval includes 0). Sandy Bridge total instructions in shared programs: 10542933 -> 10542844 (<.01%) instructions in affected programs: 11487 -> 11398 (-0.77%) helped: 52 HURT: 3 helped stats (abs) min: 1 max: 40 x̄: 1.96 x̃: 1 helped stats (rel) min: 0.08% max: 8.16% x̄: 0.90% x̃: 0.72% HURT stats (abs) min: 1 max: 11 x̄: 4.33 x̃: 1 HURT stats (rel) min: 0.22% max: 1.22% x̄: 0.55% x̃: 0.22% 95% mean confidence interval for instructions value: -3.17 -0.07 95% mean confidence interval for instructions %-change: -1.13% -0.52% Instructions are helped. total cycles in shared programs: 146098397 -> 146097094 (<.01%) cycles in affected programs: 128140 -> 126837 (-1.02%) helped: 47 HURT: 8 helped stats (abs) min: 2 max: 333 x̄: 29.21 x̃: 18 helped stats (rel) min: 0.13% max: 5.04% x̄: 1.18% x̃: 0.95% HURT stats (abs) min: 1 max: 16 x̄: 8.75 x̃: 9 HURT stats (rel) min: 0.08% max: 0.43% x̄: 0.30% x̃: 0.34% 95% mean confidence interval for cycles value: -37.49 -9.90 95% mean confidence interval for cycles %-change: -1.22% -0.71% Cycles are helped. Iron Lake total instructions in shared programs: 7886711 -> 7886509 (<.01%) instructions in affected programs: 10425 -> 10223 (-1.94%) helped: 50 HURT: 2 helped stats (abs) min: 1 max: 78 x̄: 4.08 x̃: 1 helped stats (rel) min: 0.34% max: 15.38% x̄: 1.12% x̃: 0.54% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.86% max: 0.91% x̄: 0.89% x̃: 0.89% 95% mean confidence interval for instructions value: -8.05 0.28 95% mean confidence interval for instructions %-change: -1.83% -0.26% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 178115324 -> 178114612 (<.01%) cycles in affected programs: 765726 -> 765014 (-0.09%) helped: 39 HURT: 1 helped stats (abs) min: 2 max: 276 x̄: 18.31 x̃: 8 helped stats (rel) min: <.01% max: 8.47% x̄: 0.39% x̃: 0.04% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.03% max: 0.03% x̄: 0.03% x̃: 0.03% 95% mean confidence interval for cycles value: -32.07 -3.53 95% mean confidence interval for cycles %-change: -0.86% 0.10% Inconclusive result (%-change mean confidence interval includes 0). GM45 total instructions in shared programs: 4857762 -> 4857661 (<.01%) instructions in affected programs: 5523 -> 5422 (-1.83%) helped: 25 HURT: 1 helped stats (abs) min: 1 max: 78 x̄: 4.08 x̃: 1 helped stats (rel) min: 0.34% max: 13.61% x̄: 1.04% x̃: 0.52% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.86% max: 0.86% x̄: 0.86% x̃: 0.86% 95% mean confidence interval for instructions value: -9.99 2.22 95% mean confidence interval for instructions %-change: -2.01% 0.08% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 122179674 -> 122179194 (<.01%) cycles in affected programs: 530162 -> 529682 (-0.09%) helped: 22 HURT: 1 helped stats (abs) min: 2 max: 292 x̄: 21.91 x̃: 7 helped stats (rel) min: <.01% max: 8.65% x̄: 0.44% x̃: 0.04% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.03% max: 0.03% x̄: 0.03% x̃: 0.03% 95% mean confidence interval for cycles value: -46.56 4.82 95% mean confidence interval for cycles %-change: -1.20% 0.36% Inconclusive result (value mean confidence interval includes 0). Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2016-03-02 23:39:09 +00:00
# For example, this converts things like
#
# 1 + mix(0, a - 1, condition)
#
# into
#
# mix(1, (a-1)+1, condition)
#
# Other optimizations will rearrange the constants.
for op in ['fadd', 'fmul', 'iadd', 'imul']:
optimizations += [
((op, ('bcsel(is_used_once)', a, '#b', c), '#d'), ('bcsel', a, (op, b, d), (op, c, d)))
]
# This section contains "late" optimizations that should be run before
# creating ffmas and calling regular optimizations for the final time.
# Optimizations should go here if they help code generation and conflict
# with the regular optimizations.
before_ffma_optimizations = [
# Propagate constants down multiplication chains
(('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
(('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
(('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('~fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
]
# This section contains "late" optimizations that should be run after the
# regular optimizations have finished. Optimizations should go here if
# they help code generation but do not necessarily produce code that is
# more easily optimizable.
late_optimizations = [
# Most of these optimizations aren't quite safe when you get infinity or
# Nan involved but the first one should be fine.
nir: See through an fneg to apply existing optimizations Doing the same for the existing feq and fne transformations didn't help anything in shader-db. shader-db results: Broadwell and Skylake (Skylake shown) total instructions in shared programs: 14529463 -> 14526147 (-0.02%) instructions in affected programs: 402420 -> 399104 (-0.82%) helped: 2136 HURT: 131 helped stats (abs) min: 1 max: 10 x̄: 1.61 x̃: 1 helped stats (rel) min: 0.03% max: 16.22% x̄: 3.14% x̃: 1.12% HURT stats (abs) min: 1 max: 2 x̄: 1.01 x̃: 1 HURT stats (rel) min: 0.13% max: 7.69% x̄: 0.75% x̃: 0.57% 95% mean confidence interval for instructions value: -1.51 -1.41 95% mean confidence interval for instructions %-change: -3.06% -2.78% Instructions are helped. total cycles in shared programs: 533146915 -> 533120531 (<.01%) cycles in affected programs: 10356261 -> 10329877 (-0.25%) helped: 1933 HURT: 844 helped stats (abs) min: 1 max: 490 x̄: 29.44 x̃: 16 helped stats (rel) min: <.01% max: 28.57% x̄: 3.43% x̃: 1.88% HURT stats (abs) min: 1 max: 423 x̄: 36.17 x̃: 12 HURT stats (rel) min: <.01% max: 23.75% x̄: 1.90% x̃: 0.59% 95% mean confidence interval for cycles value: -11.78 -7.22 95% mean confidence interval for cycles %-change: -1.98% -1.65% Cycles are helped. Haswell total instructions in shared programs: 9037416 -> 9034106 (-0.04%) instructions in affected programs: 389831 -> 386521 (-0.85%) helped: 2184 HURT: 120 helped stats (abs) min: 1 max: 11 x̄: 1.57 x̃: 1 helped stats (rel) min: 0.03% max: 25.00% x̄: 2.73% x̃: 1.02% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.19% max: 7.69% x̄: 0.81% x̃: 0.57% 95% mean confidence interval for instructions value: -1.49 -1.39 95% mean confidence interval for instructions %-change: -2.68% -2.41% Instructions are helped. total cycles in shared programs: 84636243 -> 84631628 (<.01%) cycles in affected programs: 4745058 -> 4740443 (-0.10%) helped: 1904 HURT: 960 helped stats (abs) min: 1 max: 466 x̄: 30.21 x̃: 18 helped stats (rel) min: 0.02% max: 36.36% x̄: 3.57% x̃: 2.38% HURT stats (abs) min: 1 max: 1080 x̄: 55.11 x̃: 14 HURT stats (rel) min: 0.02% max: 51.33% x̄: 2.77% x̃: 0.81% 95% mean confidence interval for cycles value: -4.51 1.29 95% mean confidence interval for cycles %-change: -1.64% -1.25% Inconclusive result (value mean confidence interval includes 0). LOST: 1 GAINED: 0 Sandy Bridge and Ivy Bridge (Ivy Bridge shown) total instructions in shared programs: 10018873 -> 10015456 (-0.03%) instructions in affected programs: 512820 -> 509403 (-0.67%) helped: 2268 HURT: 162 helped stats (abs) min: 1 max: 11 x̄: 1.62 x̃: 1 helped stats (rel) min: 0.03% max: 25.00% x̄: 2.47% x̃: 0.88% HURT stats (abs) min: 1 max: 4 x̄: 1.59 x̃: 1 HURT stats (rel) min: 0.09% max: 7.69% x̄: 0.86% x̃: 0.50% 95% mean confidence interval for instructions value: -1.46 -1.35 95% mean confidence interval for instructions %-change: -2.38% -2.12% Instructions are helped. total cycles in shared programs: 87538223 -> 87524771 (-0.02%) cycles in affected programs: 5435520 -> 5422068 (-0.25%) helped: 1916 HURT: 946 helped stats (abs) min: 1 max: 1392 x̄: 29.44 x̃: 18 helped stats (rel) min: <.01% max: 34.51% x̄: 3.34% x̃: 1.97% HURT stats (abs) min: 1 max: 633 x̄: 45.41 x̃: 11 HURT stats (rel) min: 0.02% max: 25.95% x̄: 2.41% x̃: 0.62% 95% mean confidence interval for cycles value: -7.34 -2.06 95% mean confidence interval for cycles %-change: -1.62% -1.26% Cycles are helped. LOST: 1 GAINED: 0 Iron Lake total instructions in shared programs: 7888446 -> 7886959 (-0.02%) instructions in affected programs: 331581 -> 330094 (-0.45%) helped: 1160 HURT: 97 helped stats (abs) min: 1 max: 10 x̄: 1.37 x̃: 1 helped stats (rel) min: 0.02% max: 9.68% x̄: 0.93% x̃: 0.43% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.17% max: 4.17% x̄: 0.37% x̃: 0.25% 95% mean confidence interval for instructions value: -1.25 -1.12 95% mean confidence interval for instructions %-change: -0.91% -0.75% Instructions are helped. total cycles in shared programs: 178130766 -> 178116996 (<.01%) cycles in affected programs: 12534564 -> 12520794 (-0.11%) helped: 1856 HURT: 187 helped stats (abs) min: 2 max: 202 x̄: 7.78 x̃: 4 helped stats (rel) min: <.01% max: 6.47% x̄: 0.28% x̃: 0.11% HURT stats (abs) min: 2 max: 26 x̄: 3.55 x̃: 2 HURT stats (rel) min: 0.01% max: 2.14% x̄: 0.08% x̃: 0.02% 95% mean confidence interval for cycles value: -7.41 -6.07 95% mean confidence interval for cycles %-change: -0.28% -0.22% Cycles are helped. GM45 total instructions in shared programs: 4858912 -> 4857887 (-0.02%) instructions in affected programs: 237565 -> 236540 (-0.43%) helped: 867 HURT: 57 helped stats (abs) min: 1 max: 10 x̄: 1.25 x̃: 1 helped stats (rel) min: 0.02% max: 9.38% x̄: 0.87% x̃: 0.43% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.16% max: 3.85% x̄: 0.34% x̃: 0.22% 95% mean confidence interval for instructions value: -1.18 -1.04 95% mean confidence interval for instructions %-change: -0.88% -0.71% Instructions are helped. total cycles in shared programs: 122189118 -> 122180816 (<.01%) cycles in affected programs: 8776418 -> 8768116 (-0.09%) helped: 1213 HURT: 166 helped stats (abs) min: 2 max: 202 x̄: 7.30 x̃: 4 helped stats (rel) min: <.01% max: 6.43% x̄: 0.25% x̃: 0.11% HURT stats (abs) min: 2 max: 26 x̄: 3.35 x̃: 2 HURT stats (rel) min: 0.01% max: 2.14% x̄: 0.06% x̃: 0.02% 95% mean confidence interval for cycles value: -6.78 -5.26 95% mean confidence interval for cycles %-change: -0.24% -0.18% Cycles are helped. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
2018-01-09 23:32:47 +00:00
(('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
(('flt', ('fneg', ('fadd', a, b)), 0.0), ('flt', ('fneg', a), b)),
(('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
(('~fge', ('fneg', ('fadd', a, b)), 0.0), ('fge', ('fneg', a), b)),
(('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
(('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))),
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
(('b2f(is_used_more_than_once)', ('inot', a)), ('bcsel', a, 0.0, 1.0)),
(('fneg(is_used_more_than_once)', ('b2f', ('inot', a))), ('bcsel', a, -0.0, -1.0)),
# we do these late so that we don't get in the way of creating ffmas
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
]
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
print nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
before_ffma_optimizations).render()
print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
late_optimizations).render()