pan/bi: Align staging registers on Valhall

This handles the following from the Valhall specification (that I wrote):

    If multiple subsequent staging registers are accessed, the base must
    be aligned to 2. However, even if 4 registers are accessed, it is
    not necessary to align to 4, only to 2. This restriction allows the
    hardware to use a 64-bit data path without handling unaligned
    access, which is more efficient. This restriction does not apply if
    only a single register is accessed.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12026>
This commit is contained in:
Alyssa Rosenzweig 2021-07-26 20:35:51 -04:00 committed by Marge Bot
parent a6ccbf48e9
commit 5649f24d16
1 changed files with 21 additions and 2 deletions

View File

@ -178,8 +178,13 @@ lcra_count_constraints(struct lcra_state *l, unsigned i)
* that union is the desired clobber set. That may be written equivalently as
* the union over i < n of (B - i), where subtraction is defined elementwise
* and corresponds to a shift of the entire bitset.
*
* EVEN_BITS_MASK is an affinity mask for aligned register pairs. Interpreted
* as a bit set, it is { x : 0 <= x < 64 if x is even }
*/
#define EVEN_BITS_MASK (0x5555555555555555ull)
static uint64_t
bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
{
@ -207,7 +212,7 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
}
static void
bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint64_t preload_live, unsigned node_count, bool is_blend, bool split_file)
bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint64_t preload_live, unsigned node_count, bool is_blend, bool split_file, bool aligned_sr)
{
bi_foreach_instr_in_block_rev(block, ins) {
/* Mark all registers live after the instruction as
@ -228,6 +233,11 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint6
unsigned count = bi_count_write_registers(ins, d);
unsigned offset = ins->dest[d].offset;
uint64_t affinity = bi_make_affinity(preload_live, count, split_file);
/* Valhall needs >= 64-bit staging writes to be pair-aligned */
if (aligned_sr && count >= 2)
affinity &= EVEN_BITS_MASK;
l->affinity[node] &= (affinity >> offset);
for (unsigned i = 0; i < node_count; ++i) {
@ -238,6 +248,14 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, uint6
}
}
/* Valhall needs >= 64-bit staging reads to be pair-aligned */
if (aligned_sr && bi_count_read_registers(ins, 0) >= 2) {
unsigned node = bi_get_node(ins->src[0]);
if (node < node_count)
l->affinity[node] &= EVEN_BITS_MASK;
}
if (!is_blend && ins->op == BI_OPCODE_BLEND) {
/* Blend shaders might clobber r0-r15, r48. */
uint64_t clobber = BITFIELD64_MASK(16) | BITFIELD64_BIT(48);
@ -268,7 +286,8 @@ bi_compute_interference(bi_context *ctx, struct lcra_state *l, bool full_regs)
uint8_t *live = mem_dup(blk->live_out, node_count);
bi_mark_interference(blk, l, live, blk->reg_live_out,
node_count, ctx->inputs->is_blend, !full_regs);
node_count, ctx->inputs->is_blend, !full_regs,
ctx->arch >= 9);
free(live);
}