From 2c4fd24c016a405f37e2a01d87c4b4801f54cb79 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 2 Feb 2021 10:18:42 -0800
Subject: [PATCH] nir/algebraic: Apply addition property of equality to the
 other ordering too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inequality comparison operations are not commutative, so `foo < bar` and
`bar < foo` both have to be explicitly listed.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>

All Intel GPUs had similar results. (Ice Lake shown)
total instructions in shared programs: 20027051 -> 20026899 (<.01%)
instructions in affected programs: 37181 -> 37029 (-0.41%)
helped: 85
HURT: 0
helped stats (abs) min: 1 max: 20 x̄: 1.79 x̃: 1
helped stats (rel) min: 0.05% max: 6.78% x̄: 0.92% x̃: 0.68%
95% mean confidence interval for instructions value: -2.42 -1.15
95% mean confidence interval for instructions %-change: -1.23% -0.61%
Instructions are helped.

total cycles in shared programs: 979762793 -> 979753527 (<.01%)
cycles in affected programs: 2653905 -> 2644639 (-0.35%)
helped: 104
HURT: 50
helped stats (abs) min: 1 max: 1048 x̄: 119.99 x̃: 11
helped stats (rel) min: <.01% max: 9.88% x̄: 0.77% x̃: 0.20%
HURT stats (abs)   min: 1 max: 734 x̄: 64.26 x̃: 8
HURT stats (rel)   min: <.01% max: 3.06% x̄: 0.36% x̃: 0.10%
95% mean confidence interval for cycles value: -98.65 -21.68
95% mean confidence interval for cycles %-change: -0.66% -0.15%
Cycles are helped.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9374>
---
 src/compiler/nir/nir_opt_algebraic.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 2185c3101ea..1444be1a2dd 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2163,8 +2163,12 @@ late_optimizations = [
    #      a != -b      false            false       true     true
    (('flt',                        ('fadd(is_used_once)', a, b),  0.0), ('flt',          a, ('fneg', b))),
    (('flt', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('flt', ('fneg', a),         b)),
+   (('flt', 0.0,                        ('fadd(is_used_once)', a, b) ), ('flt', ('fneg', a),         b)),
+   (('flt', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('flt',          a, ('fneg', b))),
    (('~fge',                        ('fadd(is_used_once)', a, b),  0.0), ('fge',          a, ('fneg', b))),
    (('~fge', ('fneg(is_used_once)', ('fadd(is_used_once)', a, b)), 0.0), ('fge', ('fneg', a),         b)),
+   (('~fge', 0.0,                        ('fadd(is_used_once)', a, b) ), ('fge', ('fneg', a),         b)),
+   (('~fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', a, b))), ('fge',          a, ('fneg', b))),
    (('~feq', ('fadd(is_used_once)', a, b), 0.0), ('feq', a, ('fneg', b))),
    (('~fneu', ('fadd(is_used_once)', a, b), 0.0), ('fneu', a, ('fneg', b))),
 
@@ -2173,6 +2177,8 @@ late_optimizations = [
    # result if b is NaN. Therefore, the replacements are exact.
    (('fge',                        ('fadd(is_used_once)', 'a(is_finite)', b),  0.0), ('fge',          a, ('fneg', b))),
    (('fge', ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b)), 0.0), ('fge', ('fneg', a),         b)),
+   (('fge', 0.0,                        ('fadd(is_used_once)', 'a(is_finite)', b) ), ('fge', ('fneg', a),         b)),
+   (('fge', 0.0, ('fneg(is_used_once)', ('fadd(is_used_once)', 'a(is_finite)', b))), ('fge',          a, ('fneg', b))),
    (('feq',  ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('feq',  a, ('fneg', b))),
    (('fneu', ('fadd(is_used_once)', 'a(is_finite)', b), 0.0), ('fneu', a, ('fneg', b))),