pan/va: Try widening small constants
Many small integers are availabled as small constants, but the table of small constants is tightly packed. Zero and sign extensions are usually required to access small integers. When packing constants, try zero/sign extension for unsigned/signed integer instructions respectively. total instructions in shared programs: 2716912 -> 2707795 (-0.34%) instructions in affected programs: 1045609 -> 1036492 (-0.87%) helped: 4460 HURT: 125 helped stats (abs) min: 1.0 max: 58.0 x̄: 2.14 x̃: 1 helped stats (rel) min: 0.14% max: 23.85% x̄: 1.35% x̃: 0.88% HURT stats (abs) min: 1.0 max: 68.0 x̄: 3.41 x̃: 1 HURT stats (rel) min: 0.34% max: 3.88% x̄: 0.93% x̃: 0.70% 95% mean confidence interval for instructions value: -2.09 -1.89 95% mean confidence interval for instructions %-change: -1.33% -1.25% Instructions are helped. total cycles in shared programs: 141984.06 -> 141932.42 (-0.04%) cycles in affected programs: 552.08 -> 500.44 (-9.35%) helped: 18 HURT: 0 helped stats (abs) min: 0.015625 max: 11.0 x̄: 2.87 x̃: 0 helped stats (rel) min: 0.50% max: 19.64% x̄: 5.36% x̃: 1.53% 95% mean confidence interval for cycles value: -5.17 -0.56 95% mean confidence interval for cycles %-change: -9.28% -1.44% Cycles are helped. total cvt in shared programs: 13805.05 -> 13663.39 (-1.03%) cvt in affected programs: 6127.45 -> 5985.80 (-2.31%) helped: 4460 HURT: 125 helped stats (abs) min: 0.015625 max: 0.90625 x̄: 0.03 x̃: 0 helped stats (rel) min: 0.35% max: 50.00% x̄: 5.19% x̃: 4.00% HURT stats (abs) min: 0.015625 max: 1.0625 x̄: 0.05 x̃: 0 HURT stats (rel) min: 0.77% max: 9.30% x̄: 3.40% x̃: 2.78% 95% mean confidence interval for cvt value: -0.03 -0.03 95% mean confidence interval for cvt %-change: -5.10% -4.81% Cvt are helped. total ls in shared programs: 129545 -> 129494 (-0.04%) ls in affected programs: 495 -> 444 (-10.30%) helped: 6 HURT: 0 helped stats (abs) min: 2.0 max: 11.0 x̄: 8.50 x̃: 11 helped stats (rel) min: 1.49% max: 19.64% x̄: 13.95% x̃: 19.64% 95% mean confidence interval for ls value: -12.68 -4.32 95% mean confidence interval for ls %-change: -23.23% -4.67% Ls are helped. total quadwords in shared programs: 1476416 -> 1469824 (-0.45%) quadwords in affected programs: 121208 -> 114616 (-5.44%) helped: 820 HURT: 16 helped stats (abs) min: 8.0 max: 32.0 x̄: 8.28 x̃: 8 helped stats (rel) min: 1.39% max: 50.00% x̄: 11.00% x̃: 10.00% HURT stats (abs) min: 8.0 max: 32.0 x̄: 12.50 x̃: 8 HURT stats (rel) min: 1.38% max: 10.00% x̄: 6.19% x̃: 7.14% 95% mean confidence interval for quadwords value: -8.14 -7.63 95% mean confidence interval for quadwords %-change: -11.20% -10.15% Quadwords are helped. total threads in shared programs: 53633 -> 53663 (0.06%) threads in affected programs: 39 -> 69 (76.92%) helped: 33 HURT: 3 helped stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: 0.64 1.02 95% mean confidence interval for threads %-change: 73.27% 101.73% Threads are helped. total spills in shared programs: 154 -> 103 (-33.12%) spills in affected programs: 75 -> 24 (-68.00%) helped: 6 HURT: 0 total fills in shared programs: 656 -> 656 (0.00%) fills in affected programs: 148 -> 148 (0.00%) helped: 2 HURT: 4 Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16862>
This commit is contained in:
parent
72146051d5
commit
9cfafbb09b
|
@ -86,8 +86,30 @@ va_demote_constant_fp16(uint32_t value)
|
|||
return bi_null();
|
||||
}
|
||||
|
||||
/*
|
||||
* Test if a 32-bit word arises as a sign or zero extension of some 8/16-bit
|
||||
* value.
|
||||
*/
|
||||
static bool
|
||||
is_extension_of_8(uint32_t x, bool is_signed)
|
||||
{
|
||||
if (is_signed)
|
||||
return (x <= INT8_MAX) || ((x >> 7) == BITFIELD_MASK(24 + 1));
|
||||
else
|
||||
return (x <= UINT8_MAX);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_extension_of_16(uint32_t x, bool is_signed)
|
||||
{
|
||||
if (is_signed)
|
||||
return (x <= INT16_MAX) || ((x >> 15) == BITFIELD_MASK(16 + 1));
|
||||
else
|
||||
return (x <= UINT16_MAX);
|
||||
}
|
||||
|
||||
static bi_index
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool staging)
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
|
||||
{
|
||||
/* Try the constant as-is */
|
||||
if (!staging) {
|
||||
|
@ -120,20 +142,21 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool
|
|||
}
|
||||
}
|
||||
|
||||
/* TODO: Distinguish sign extend from zero extend */
|
||||
#if 0
|
||||
/* Try zero-extending a single byte */
|
||||
if (!staging && info.widen && value <= UINT8_MAX) {
|
||||
bi_index lut = va_lut_index_8(value);
|
||||
/* Try extending a byte */
|
||||
if (!staging && (info.widen || info.lanes) &&
|
||||
is_extension_of_8(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_8(value & 0xFF);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
}
|
||||
|
||||
/* Try zero-extending a single halfword */
|
||||
if (!staging && info.widen && value <= UINT16_MAX) {
|
||||
bi_index lut = va_lut_index_16(value);
|
||||
/* Try extending a halfword */
|
||||
if (!staging && info.widen &&
|
||||
is_extension_of_16(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_16(value & 0xFFFF);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Try demoting the constant to FP16 */
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_32) {
|
||||
|
@ -160,6 +183,7 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
|
|||
/* abs(#c) is pointless, but -#c occurs in transcendental sequences */
|
||||
assert(!I->src[s].abs && "redundant .abs modifier");
|
||||
|
||||
bool is_signed = valhall_opcodes[I->op].is_signed;
|
||||
bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs);
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
uint32_t value = I->src[s].value;
|
||||
|
@ -194,7 +218,7 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
|
|||
value = bi_apply_swizzle(value, swz);
|
||||
}
|
||||
|
||||
bi_index cons = va_resolve_constant(&b, value, info, staging);
|
||||
bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
|
||||
cons.neg ^= I->src[s].neg;
|
||||
I->src[s] = cons;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue