mirror of https://gitlab.freedesktop.org/mesa/mesa
r300: better packing for immediates
How this works? First we check which immediates are used as vectors, i.e., have any reads that are using 2 or more channels. Such immdeiates will be places in a free slots (but only the specific channels that are used in the vector). This way we don't have to worry about swizzling restrictions. The remaining scalar immediates will be checked for duplicates and placed in free slots, including any empty slots in previously places vector immediates (any swizzle is valid for scalars). RV410: total instructions in shared programs: 98883 -> 98905 (0.02%) instructions in affected programs: 15414 -> 15436 (0.14%) helped: 100 HURT: 102 total presub in shared programs: 2235 -> 2235 (0.00%) presub in affected programs: 608 -> 608 (0.00%) helped: 51 HURT: 72 total omod in shared programs: 419 -> 418 (-0.24%) omod in affected programs: 15 -> 14 (-6.67%) helped: 3 HURT: 3 total temps in shared programs: 15698 -> 15692 (-0.04%) temps in affected programs: 952 -> 946 (-0.63%) helped: 46 HURT: 37 total consts in shared programs: 84458 -> 83856 (-0.71%) consts in affected programs: 14648 -> 14046 (-4.11%) helped: 499 HURT: 0 total cycles in shared programs: 156476 -> 156493 (0.01%) cycles in affected programs: 22532 -> 22549 (0.08%) helped: 100 HURT: 102 LOST: shaders/ck2/157.shader_test FS GAINED: shaders/ck2/160.shader_test FS GAINED: shaders/tesseract/395.shader_test FS RV530: total instructions in shared programs: 119543 -> 119612 (0.06%) instructions in affected programs: 27435 -> 27504 (0.25%) helped: 118 HURT: 183 total presub in shared programs: 7257 -> 7111 (-2.01%) presub in affected programs: 1856 -> 1710 (-7.87%) helped: 121 HURT: 48 total omod in shared programs: 426 -> 427 (0.23%) omod in affected programs: 5 -> 6 (20.00%) helped: 1 HURT: 2 total temps in shared programs: 16784 -> 16779 (-0.03%) temps in affected programs: 392 -> 387 (-1.28%) helped: 29 HURT: 17 total consts in shared programs: 93198 -> 92667 (-0.57%) consts in affected programs: 14577 -> 14046 (-3.64%) helped: 451 HURT: 0 total cycles in shared programs: 186649 -> 186590 (-0.03%) cycles in affected programs: 26306 -> 26247 (-0.22%) helped: 125 HURT: 111 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28630>
This commit is contained in:
parent
11ad056ee9
commit
0c96b03fcf
|
@ -18,6 +18,8 @@ struct const_remap_state {
|
|||
struct rc_constant *constants;
|
||||
/* New constant layout. */
|
||||
struct rc_constant_list new_constants;
|
||||
/* Marks immediates that are used as a vector. Those will be just copied. */
|
||||
uint8_t *is_used_as_vector;
|
||||
bool has_rel_addr;
|
||||
bool are_externals_remapped;
|
||||
bool is_identity;
|
||||
|
@ -48,6 +50,7 @@ static void mark_used(void * userdata, struct rc_instruction * inst,
|
|||
struct const_remap_state* d = userdata;
|
||||
|
||||
if (src->File == RC_FILE_CONSTANT) {
|
||||
uint8_t mask = 0;
|
||||
if (src->RelAddr) {
|
||||
d->has_rel_addr = true;
|
||||
} else {
|
||||
|
@ -55,9 +58,14 @@ static void mark_used(void * userdata, struct rc_instruction * inst,
|
|||
char swz = GET_SWZ(src->Swizzle, chan);
|
||||
if (swz > RC_SWIZZLE_W)
|
||||
continue;
|
||||
d->constants[src->Index].UseMask |= 1 << swz;
|
||||
mask |= 1 << swz;
|
||||
}
|
||||
}
|
||||
d->constants[src->Index].UseMask |= mask;
|
||||
if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE &&
|
||||
util_bitcount(mask) > 1) {
|
||||
d->is_used_as_vector[src->Index] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,6 +90,26 @@ static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
|
|||
s->new_constants.Count++;
|
||||
}
|
||||
|
||||
static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
assert(util_bitcount(s->is_used_as_vector[i]) > 1);
|
||||
|
||||
unsigned count = s->new_constants.Count;
|
||||
|
||||
s->new_constants.Constants[count] = s->constants[i];
|
||||
s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
|
||||
s->inv_remap_table[i].index[chan] = count;
|
||||
s->inv_remap_table[i].swizzle[chan] = chan;
|
||||
}
|
||||
}
|
||||
if (count != i) {
|
||||
s->is_identity = false;
|
||||
}
|
||||
s->new_constants.Count++;
|
||||
}
|
||||
|
||||
static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
assert(util_bitcount(s->constants[i].UseMask) == 1);
|
||||
|
@ -110,10 +138,12 @@ static void try_merge_constants_external(struct const_remap_state *s, unsigned i
|
|||
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
|
||||
{
|
||||
s->is_identity = true;
|
||||
s->is_used_as_vector = malloc(c->Program.Constants.Count);
|
||||
s->new_constants.Constants =
|
||||
malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
|
||||
s->new_constants._Reserved = c->Program.Constants.Count;
|
||||
s->constants = c->Program.Constants.Constants;
|
||||
memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
|
||||
|
||||
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
s->inv_remap_table =
|
||||
|
@ -179,9 +209,39 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
|||
try_merge_constants_external(s, i);
|
||||
}
|
||||
|
||||
/* Now put the immediates and state constants. */
|
||||
/* Now put immediates which are used as vectors. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
|
||||
util_bitcount(s->constants[i].UseMask) > 0 &&
|
||||
util_bitcount(s->is_used_as_vector[i]) > 0) {
|
||||
place_immediate_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now walk over scalar immediates and try to:
|
||||
* a) check for duplicates,
|
||||
* b) find free slot.
|
||||
* All of this is already done by rc_constants_add_immediate_scalar,
|
||||
* so just use it.
|
||||
*/
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
|
||||
continue;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if ((s->constants[i].UseMask) & (1 << chan) &&
|
||||
(~(s->is_used_as_vector[i]) & (1 << chan))) {
|
||||
unsigned swz;
|
||||
s->inv_remap_table[i].index[chan] =
|
||||
rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz);
|
||||
s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
|
||||
s->is_identity = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally place state constants. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_STATE)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 0) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
|
|
Loading…
Reference in New Issue