r300: better packing for immediates

How this works? First we check which immediates are used as vectors,
i.e., have any reads that are using 2 or more channels. Such immdeiates
will be places in a free slots (but only the specific channels that are
used in the vector). This way we don't have to worry about swizzling
restrictions. The remaining scalar immediates will be checked for
duplicates and placed in free slots, including any empty slots in
previously places vector immediates (any swizzle is valid for scalars).

RV410:
total instructions in shared programs: 98883 -> 98905 (0.02%)
instructions in affected programs: 15414 -> 15436 (0.14%)
helped: 100
HURT: 102
total presub in shared programs: 2235 -> 2235 (0.00%)
presub in affected programs: 608 -> 608 (0.00%)
helped: 51
HURT: 72
total omod in shared programs: 419 -> 418 (-0.24%)
omod in affected programs: 15 -> 14 (-6.67%)
helped: 3
HURT: 3
total temps in shared programs: 15698 -> 15692 (-0.04%)
temps in affected programs: 952 -> 946 (-0.63%)
helped: 46
HURT: 37
total consts in shared programs: 84458 -> 83856 (-0.71%)
consts in affected programs: 14648 -> 14046 (-4.11%)
helped: 499
HURT: 0
total cycles in shared programs: 156476 -> 156493 (0.01%)
cycles in affected programs: 22532 -> 22549 (0.08%)
helped: 100
HURT: 102
LOST:   shaders/ck2/157.shader_test FS
GAINED: shaders/ck2/160.shader_test FS
GAINED: shaders/tesseract/395.shader_test FS

RV530:
total instructions in shared programs: 119543 -> 119612 (0.06%)
instructions in affected programs: 27435 -> 27504 (0.25%)
helped: 118
HURT: 183
total presub in shared programs: 7257 -> 7111 (-2.01%)
presub in affected programs: 1856 -> 1710 (-7.87%)
helped: 121
HURT: 48
total omod in shared programs: 426 -> 427 (0.23%)
omod in affected programs: 5 -> 6 (20.00%)
helped: 1
HURT: 2
total temps in shared programs: 16784 -> 16779 (-0.03%)
temps in affected programs: 392 -> 387 (-1.28%)
helped: 29
HURT: 17
total consts in shared programs: 93198 -> 92667 (-0.57%)
consts in affected programs: 14577 -> 14046 (-3.64%)
helped: 451
HURT: 0
total cycles in shared programs: 186649 -> 186590 (-0.03%)
cycles in affected programs: 26306 -> 26247 (-0.22%)
helped: 125
HURT: 111

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28630>
This commit is contained in:
Pavel Ondračka 2024-04-02 10:25:29 +02:00
parent 11ad056ee9
commit 0c96b03fcf
1 changed files with 63 additions and 3 deletions

View File

@ -18,6 +18,8 @@ struct const_remap_state {
struct rc_constant *constants;
/* New constant layout. */
struct rc_constant_list new_constants;
/* Marks immediates that are used as a vector. Those will be just copied. */
uint8_t *is_used_as_vector;
bool has_rel_addr;
bool are_externals_remapped;
bool is_identity;
@ -48,6 +50,7 @@ static void mark_used(void * userdata, struct rc_instruction * inst,
struct const_remap_state* d = userdata;
if (src->File == RC_FILE_CONSTANT) {
uint8_t mask = 0;
if (src->RelAddr) {
d->has_rel_addr = true;
} else {
@ -55,9 +58,14 @@ static void mark_used(void * userdata, struct rc_instruction * inst,
char swz = GET_SWZ(src->Swizzle, chan);
if (swz > RC_SWIZZLE_W)
continue;
d->constants[src->Index].UseMask |= 1 << swz;
mask |= 1 << swz;
}
}
d->constants[src->Index].UseMask |= mask;
if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE &&
util_bitcount(mask) > 1) {
d->is_used_as_vector[src->Index] |= mask;
}
}
}
@ -82,6 +90,26 @@ static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
s->new_constants.Count++;
}
static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
{
assert(util_bitcount(s->is_used_as_vector[i]) > 1);
unsigned count = s->new_constants.Count;
s->new_constants.Constants[count] = s->constants[i];
s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
for (unsigned chan = 0; chan < 4; chan++) {
if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
s->inv_remap_table[i].index[chan] = count;
s->inv_remap_table[i].swizzle[chan] = chan;
}
}
if (count != i) {
s->is_identity = false;
}
s->new_constants.Count++;
}
static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
{
assert(util_bitcount(s->constants[i].UseMask) == 1);
@ -110,10 +138,12 @@ static void try_merge_constants_external(struct const_remap_state *s, unsigned i
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
{
s->is_identity = true;
s->is_used_as_vector = malloc(c->Program.Constants.Count);
s->new_constants.Constants =
malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
s->new_constants._Reserved = c->Program.Constants.Count;
s->constants = c->Program.Constants.Constants;
memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
s->inv_remap_table =
@ -179,9 +209,39 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
try_merge_constants_external(s, i);
}
/* Now put the immediates and state constants. */
/* Now put immediates which are used as vectors. */
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
util_bitcount(s->constants[i].UseMask) > 0 &&
util_bitcount(s->is_used_as_vector[i]) > 0) {
place_immediate_in_free_slot(s, i);
}
}
/* Now walk over scalar immediates and try to:
* a) check for duplicates,
* b) find free slot.
* All of this is already done by rc_constants_add_immediate_scalar,
* so just use it.
*/
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
continue;
for (unsigned chan = 0; chan < 4; chan++) {
if ((s->constants[i].UseMask) & (1 << chan) &&
(~(s->is_used_as_vector[i]) & (1 << chan))) {
unsigned swz;
s->inv_remap_table[i].index[chan] =
rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz);
s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
s->is_identity = false;
}
}
}
/* Finally place state constants. */
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (constants[i].Type != RC_CONSTANT_STATE)
continue;
if (util_bitcount(s->constants[i].UseMask) > 0) {
place_constant_in_free_slot(s, i);