pan/bi: Rewrite aligned vectors as well
This still isn't optimal, but it handles another common case where we have a vector "prefix" and can rewrite directly. The last case is one where writemasks and such would start coming into play and - for the moment - is not worth the hike in complexity. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4288> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4288>
This commit is contained in:
parent
5a3493c536
commit
6a4fadce12
|
@ -108,8 +108,12 @@ bi_rewrite_uses(bi_context *ctx,
|
||||||
* returns false if not (nondestructive in this case). */
|
* returns false if not (nondestructive in this case). */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
bi_shift_mask_scalar(bi_instruction *ins, signed shift)
|
bi_shift_mask(bi_instruction *ins, unsigned shift)
|
||||||
{
|
{
|
||||||
|
/* No op and handles the funny cases */
|
||||||
|
if (!shift)
|
||||||
|
return true;
|
||||||
|
|
||||||
unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
|
unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
|
||||||
unsigned bytes = sz / 8;
|
unsigned bytes = sz / 8;
|
||||||
|
|
||||||
|
@ -124,33 +128,68 @@ bi_shift_mask_scalar(bi_instruction *ins, signed shift)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Shift swizzle so old i'th component is accessed by new (i + j)'th
|
||||||
|
* component where j is component shift */
|
||||||
|
unsigned component_shift = shift / bytes;
|
||||||
|
|
||||||
|
/* Sanity check to avoid memory corruption */
|
||||||
|
if (component_shift >= sizeof(ins->swizzle[0]))
|
||||||
|
return false;
|
||||||
|
|
||||||
/* Otherwise, shift is divisible by bytes, and all relevant src types
|
/* Otherwise, shift is divisible by bytes, and all relevant src types
|
||||||
* are the same size as the dest type. */
|
* are the same size as the dest type. */
|
||||||
ins->writemask <<= shift;
|
ins->writemask <<= shift;
|
||||||
|
|
||||||
/* Shift swizzle so old i'th component is accessed by new (i + j)'th
|
|
||||||
* component where j is component shift */
|
|
||||||
signed component_shift = shift / bytes;
|
|
||||||
|
|
||||||
bi_foreach_src(ins, s) {
|
bi_foreach_src(ins, s) {
|
||||||
if (!ins->src[s]) continue;
|
if (!ins->src[s]) continue;
|
||||||
|
|
||||||
size_t overlap = sizeof(ins->swizzle[s]) - abs(component_shift);
|
size_t overlap = sizeof(ins->swizzle[s]) - component_shift;
|
||||||
|
memmove(ins->swizzle[s] + component_shift, ins->swizzle[s], overlap);
|
||||||
if (component_shift > 0)
|
|
||||||
memmove(ins->swizzle[s] + component_shift, ins->swizzle[s], overlap);
|
|
||||||
else
|
|
||||||
memmove(ins->swizzle[s], ins->swizzle[s] - component_shift, overlap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Checks if we have a nicely aligned vector prefix */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
bi_is_aligned_vec(bi_instruction *combine, unsigned s, bi_instruction *parent,
|
||||||
|
unsigned *count)
|
||||||
|
{
|
||||||
|
/* We only support prefixes */
|
||||||
|
if (s != 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Is it a contiguous write? */
|
||||||
|
unsigned writes = util_bitcount(parent->writemask);
|
||||||
|
if (parent->writemask != ((1 << writes) - 1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Okay - how many components? */
|
||||||
|
unsigned bytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
|
||||||
|
unsigned components = writes / bytes;
|
||||||
|
|
||||||
|
/* Are we contiguous like that? */
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < components; ++i) {
|
||||||
|
if (combine->src[i] != parent->dest)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (combine->swizzle[i][0] != i)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We're good to go */
|
||||||
|
*count = components;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Tries to lower a given source of a combine to an appropriate rewrite,
|
/* Tries to lower a given source of a combine to an appropriate rewrite,
|
||||||
* returning true if successful, and false with no changes otherwise. */
|
* returning true if successful, and false with no changes otherwise. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
bi_lower_combine_src(bi_context *ctx, bi_instruction *ins, unsigned s, unsigned R)
|
bi_lower_combine_src(bi_context *ctx, bi_instruction *ins, unsigned s, unsigned R,
|
||||||
|
unsigned *vec_count)
|
||||||
{
|
{
|
||||||
unsigned src = ins->src[s];
|
unsigned src = ins->src[s];
|
||||||
|
|
||||||
|
@ -171,9 +210,10 @@ bi_lower_combine_src(bi_context *ctx, bi_instruction *ins, unsigned s, unsigned
|
||||||
unsigned pbytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
|
unsigned pbytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
|
||||||
if (pbytes != bytes) return false;
|
if (pbytes != bytes) return false;
|
||||||
|
|
||||||
/* Scalar? */
|
bool scalar = (parent->writemask == ((1 << bytes) - 1));
|
||||||
if (parent->writemask != ((1 << bytes) - 1)) return false;
|
if (!(scalar || bi_is_aligned_vec(ins, s, parent, vec_count))) return false;
|
||||||
if (!bi_shift_mask_scalar(parent, bytes * s)) return false;
|
|
||||||
|
if (!bi_shift_mask(parent, bytes * s)) return false;
|
||||||
bi_rewrite_uses(ctx, parent->dest, 0, R, s);
|
bi_rewrite_uses(ctx, parent->dest, 0, R, s);
|
||||||
parent->dest = R;
|
parent->dest = R;
|
||||||
return true;
|
return true;
|
||||||
|
@ -191,8 +231,15 @@ bi_lower_combine(bi_context *ctx, bi_block *block)
|
||||||
unsigned R = bi_make_temp_reg(ctx);
|
unsigned R = bi_make_temp_reg(ctx);
|
||||||
|
|
||||||
bi_foreach_src(ins, s) {
|
bi_foreach_src(ins, s) {
|
||||||
if (!bi_lower_combine_src(ctx, ins, s, R))
|
unsigned vec_count = 0;
|
||||||
|
|
||||||
|
if (bi_lower_combine_src(ctx, ins, s, R, &vec_count)) {
|
||||||
|
/* Skip vectored sources */
|
||||||
|
if (vec_count)
|
||||||
|
s += (vec_count - 1);
|
||||||
|
} else {
|
||||||
bi_insert_combine_mov(ctx, ins, s, R);
|
bi_insert_combine_mov(ctx, ins, s, R);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue