From 9cfafbb09beb605585672d5b6007a80dfc9d06ce Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 2 Jun 2022 19:07:49 -0400 Subject: [PATCH] pan/va: Try widening small constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many small integers are availabled as small constants, but the table of small constants is tightly packed. Zero and sign extensions are usually required to access small integers. When packing constants, try zero/sign extension for unsigned/signed integer instructions respectively. total instructions in shared programs: 2716912 -> 2707795 (-0.34%) instructions in affected programs: 1045609 -> 1036492 (-0.87%) helped: 4460 HURT: 125 helped stats (abs) min: 1.0 max: 58.0 x̄: 2.14 x̃: 1 helped stats (rel) min: 0.14% max: 23.85% x̄: 1.35% x̃: 0.88% HURT stats (abs) min: 1.0 max: 68.0 x̄: 3.41 x̃: 1 HURT stats (rel) min: 0.34% max: 3.88% x̄: 0.93% x̃: 0.70% 95% mean confidence interval for instructions value: -2.09 -1.89 95% mean confidence interval for instructions %-change: -1.33% -1.25% Instructions are helped. total cycles in shared programs: 141984.06 -> 141932.42 (-0.04%) cycles in affected programs: 552.08 -> 500.44 (-9.35%) helped: 18 HURT: 0 helped stats (abs) min: 0.015625 max: 11.0 x̄: 2.87 x̃: 0 helped stats (rel) min: 0.50% max: 19.64% x̄: 5.36% x̃: 1.53% 95% mean confidence interval for cycles value: -5.17 -0.56 95% mean confidence interval for cycles %-change: -9.28% -1.44% Cycles are helped. total cvt in shared programs: 13805.05 -> 13663.39 (-1.03%) cvt in affected programs: 6127.45 -> 5985.80 (-2.31%) helped: 4460 HURT: 125 helped stats (abs) min: 0.015625 max: 0.90625 x̄: 0.03 x̃: 0 helped stats (rel) min: 0.35% max: 50.00% x̄: 5.19% x̃: 4.00% HURT stats (abs) min: 0.015625 max: 1.0625 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.77% max: 9.30% x̄: 3.40% x̃: 2.78% 95% mean confidence interval for cvt value: -0.03 -0.03 95% mean confidence interval for cvt %-change: -5.10% -4.81% Cvt are helped. total ls in shared programs: 129545 -> 129494 (-0.04%) ls in affected programs: 495 -> 444 (-10.30%) helped: 6 HURT: 0 helped stats (abs) min: 2.0 max: 11.0 x̄: 8.50 x̃: 11 helped stats (rel) min: 1.49% max: 19.64% x̄: 13.95% x̃: 19.64% 95% mean confidence interval for ls value: -12.68 -4.32 95% mean confidence interval for ls %-change: -23.23% -4.67% Ls are helped. total quadwords in shared programs: 1476416 -> 1469824 (-0.45%) quadwords in affected programs: 121208 -> 114616 (-5.44%) helped: 820 HURT: 16 helped stats (abs) min: 8.0 max: 32.0 x̄: 8.28 x̃: 8 helped stats (rel) min: 1.39% max: 50.00% x̄: 11.00% x̃: 10.00% HURT stats (abs) min: 8.0 max: 32.0 x̄: 12.50 x̃: 8 HURT stats (rel) min: 1.38% max: 10.00% x̄: 6.19% x̃: 7.14% 95% mean confidence interval for quadwords value: -8.14 -7.63 95% mean confidence interval for quadwords %-change: -11.20% -10.15% Quadwords are helped. total threads in shared programs: 53633 -> 53663 (0.06%) threads in affected programs: 39 -> 69 (76.92%) helped: 33 HURT: 3 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: 0.64 1.02 95% mean confidence interval for threads %-change: 73.27% 101.73% Threads are helped. total spills in shared programs: 154 -> 103 (-33.12%) spills in affected programs: 75 -> 24 (-68.00%) helped: 6 HURT: 0 total fills in shared programs: 656 -> 656 (0.00%) fills in affected programs: 148 -> 148 (0.00%) helped: 2 HURT: 4 Signed-off-by: Alyssa Rosenzweig Part-of: --- .../bifrost/valhall/va_lower_constants.c | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/panfrost/bifrost/valhall/va_lower_constants.c b/src/panfrost/bifrost/valhall/va_lower_constants.c index e05735f2043..f8dc9d37a8f 100644 --- a/src/panfrost/bifrost/valhall/va_lower_constants.c +++ b/src/panfrost/bifrost/valhall/va_lower_constants.c @@ -86,8 +86,30 @@ va_demote_constant_fp16(uint32_t value) return bi_null(); } +/* + * Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit + * value. + */ +static bool +is_extension_of_8(uint32_t x, bool is_signed) +{ + if (is_signed) + return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1)); + else + return (x <= UINT8_MAX); +} + +static bool +is_extension_of_16(uint32_t x, bool is_signed) +{ + if (is_signed) + return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1)); + else + return (x <= UINT16_MAX); +} + static bi_index -va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool staging) +va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging) { /* Try the constant as-is */ if (!staging) { @@ -120,20 +142,21 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool } } - /* TODO: Distinguish sign extend from zero extend */ -#if 0 - /* Try zero-extending a single byte */ - if (!staging && info.widen && value <= UINT8_MAX) { - bi_index lut = va_lut_index_8(value); + /* Try extending a byte */ + if (!staging && (info.widen || info.lanes) && + is_extension_of_8(value, is_signed)) { + + bi_index lut = va_lut_index_8(value & 0xFF); if (!bi_is_null(lut)) return lut; } - /* Try zero-extending a single halfword */ - if (!staging && info.widen && value <= UINT16_MAX) { - bi_index lut = va_lut_index_16(value); + /* Try extending a halfword */ + if (!staging && info.widen && + is_extension_of_16(value, is_signed)) { + + bi_index lut = va_lut_index_16(value & 0xFFFF); if (!bi_is_null(lut)) return lut; } -#endif /* Try demoting the constant to FP16 */ if (!staging && info.swizzle && info.size == VA_SIZE_32) { @@ -160,6 +183,7 @@ va_lower_constants(bi_context *ctx, bi_instr *I) /* abs(#c) is pointless, but -#c occurs in transcendental sequences */ assert(!I->src[s].abs && "redundant .abs modifier"); + bool is_signed = valhall_opcodes[I->op].is_signed; bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); struct va_src_info info = va_src_info(I->op, s); uint32_t value = I->src[s].value; @@ -194,7 +218,7 @@ va_lower_constants(bi_context *ctx, bi_instr *I) value = bi_apply_swizzle(value, swz); } - bi_index cons = va_resolve_constant(&b, value, info, staging); + bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging); cons.neg ^= I->src[s].neg; I->src[s] = cons; }