From f4a7dbc58f884ea73c3f8fb0e64ed9c32ee9c07d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 27 Jan 2021 19:42:44 -0800 Subject: [PATCH] nir/range_analysis: Fix analysis of fmin, fmax, or fsat with NaN source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recall that when either value is NaN, fmax will pick the other value. This means the result range of the fmax will either be the "ideal" result range (calculated above) or the range of the non-NaN value. Previously, something like fmax({gt_zero}, {lt_zero, is_a_number}) would return a range of gt_zero. However, if the "gt_zero" parameter is NaN, the actual result will be the "lt_zero" parameter. This analysis depends on the is_a_number analysis also added in this MR. Assuming this doesn't cause any unforeseen problems, I believe we should wait a bit, then nominate a subset of the series for the stable branches. This fixes the piglit tests tests/spec/glsl-1.30/execution/range_analysis_fmax_of_nan.shader_test tests/spec/glsl-1.30/execution/range_analysis_fmin_of_nan.shader_test from https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/463. Even with the added fsat fixes, range_analysis_fsat_of_nan.shader_test still fails. There are some other issues there that will be addressed in later commits (in another MR). v2: Add fsat fixes. Suggested by Rhys. Fixes: 405de7ccb6c ("nir/range-analysis: Rudimentary value range analysis pass") Reviewed-by: Rhys Perry Shader-db results: All Intel platforms had similar results. (Tiger Lake shown) total instructions in shared programs: 21049290 -> 21049314 (<.01%) instructions in affected programs: 3175 -> 3199 (0.76%) helped: 0 HURT: 17 HURT stats (abs) min: 1 max: 3 x̄: 1.41 x̃: 1 HURT stats (rel) min: 0.20% max: 1.89% x̄: 0.97% x̃: 0.92% 95% mean confidence interval for instructions value: 1.09 1.73 95% mean confidence interval for instructions %-change: 0.75% 1.19% Instructions are HURT. total cycles in shared programs: 855136176 -> 855136406 (<.01%) cycles in affected programs: 37579 -> 37809 (0.61%) helped: 0 HURT: 17 HURT stats (abs) min: 12 max: 20 x̄: 13.53 x̃: 14 HURT stats (rel) min: 0.17% max: 1.13% x̄: 0.79% x̃: 0.91% 95% mean confidence interval for cycles value: 12.53 14.53 95% mean confidence interval for cycles %-change: 0.63% 0.94% Cycles are HURT. Fossil-db results: Tiger Lake Instructions in all programs: 160901033 -> 160902591 (+0.0%) SENDs in all programs: 6812270 -> 6812270 (+0.0%) Loops in all programs: 38225 -> 38225 (+0.0%) Cycles in all programs: 7430016795 -> 7429003266 (-0.0%) Spills in all programs: 192582 -> 192582 (+0.0%) Fills in all programs: 304539 -> 304539 (+0.0%) Ice Lake Instructions in all programs: 145299102 -> 145301634 (+0.0%) SENDs in all programs: 6863890 -> 6863890 (+0.0%) Loops in all programs: 38219 -> 38219 (+0.0%) Cycles in all programs: 8798390846 -> 8798589772 (+0.0%) Spills in all programs: 216880 -> 216880 (+0.0%) Fills in all programs: 334250 -> 334250 (+0.0%) Skylake Instructions in all programs: 135889478 -> 135892010 (+0.0%) SENDs in all programs: 6802916 -> 6802916 (+0.0%) Loops in all programs: 38216 -> 38216 (+0.0%) Cycles in all programs: 8442624166 -> 8442597324 (-0.0%) Spills in all programs: 194839 -> 194839 (+0.0%) Fills in all programs: 301116 -> 301116 (+0.0%) Part-of: --- src/compiler/nir/nir_range_analysis.c | 38 +++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 26011f73696..9bb5c50fd6a 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -289,6 +289,11 @@ analyze_constant(const struct nir_alu_instr *instr, unsigned src, } \ } while (false) +#else +#define ASSERT_TABLE_IS_COMMUTATIVE(t) +#define ASSERT_TABLE_IS_DIAGONAL(t) +#endif /* !defined(NDEBUG) */ + static enum ssa_ranges union_ranges(enum ssa_ranges a, enum ssa_ranges b) { @@ -309,6 +314,7 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) return union_table[a][b]; } +#ifndef NDEBUG /* Verify that the 'unknown' entry in each row (or column) of the table is the * union of all the other values in the row (or column). */ @@ -406,14 +412,12 @@ union_ranges(enum ssa_ranges a, enum ssa_ranges b) } while (false) #else -#define ASSERT_TABLE_IS_COMMUTATIVE(t) -#define ASSERT_TABLE_IS_DIAGONAL(t) #define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) #define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) #define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) -#endif +#endif /* !defined(NDEBUG) */ /** * Analyze an expression to determine the range of its result @@ -809,6 +813,17 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table); r.range = table[left.range][right.range]; + + /* Recall that when either value is NaN, fmax will pick the other value. + * This means the result range of the fmax will either be the "ideal" + * result range (calculated above) or the range of the non-NaN value. + */ + if (!left.is_a_number) + r.range = union_ranges(r.range, right.range); + + if (!right.is_a_number) + r.range = union_ranges(r.range, left.range); + break; } @@ -884,6 +899,17 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table); r.range = table[left.range][right.range]; + + /* Recall that when either value is NaN, fmin will pick the other value. + * This means the result range of the fmin will either be the "ideal" + * result range (calculated above) or the range of the non-NaN value. + */ + if (!left.is_a_number) + r.range = union_ranges(r.range, right.range); + + if (!right.is_a_number) + r.range = union_ranges(r.range, left.range); + break; } @@ -955,8 +981,10 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, break; case gt_zero: - /* The fsat doesn't add any information in this case. */ - r.range = left.range; + /* fsat is equivalent to fmin(fmax(X, 0.0), 1.0), so if X is not a + * number, the result will be 0. + */ + r.range = left.is_a_number ? gt_zero : ge_zero; r.is_integral = left.is_integral; break;