583 lines
21 KiB
C
583 lines
21 KiB
C
/*
|
|
* Copyright (C) 2021 Valve Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include "ir3_ra.h"
|
|
#include "ir3_shader.h"
|
|
|
|
struct copy_src {
|
|
unsigned flags;
|
|
union {
|
|
uint32_t imm;
|
|
physreg_t reg;
|
|
unsigned const_num;
|
|
};
|
|
};
|
|
|
|
struct copy_entry {
|
|
physreg_t dst;
|
|
unsigned flags;
|
|
bool done;
|
|
|
|
struct copy_src src;
|
|
};
|
|
|
|
static unsigned
|
|
copy_entry_size(const struct copy_entry *entry)
|
|
{
|
|
return (entry->flags & IR3_REG_HALF) ? 1 : 2;
|
|
}
|
|
|
|
static struct copy_src
|
|
get_copy_src(const struct ir3_register *reg, unsigned offset)
|
|
{
|
|
if (reg->flags & IR3_REG_IMMED) {
|
|
return (struct copy_src){
|
|
.flags = IR3_REG_IMMED,
|
|
.imm = reg->uim_val,
|
|
};
|
|
} else if (reg->flags & IR3_REG_CONST) {
|
|
return (struct copy_src){
|
|
.flags = IR3_REG_CONST,
|
|
.const_num = reg->num,
|
|
};
|
|
} else {
|
|
return (struct copy_src){
|
|
.flags = 0,
|
|
.reg = ra_reg_get_physreg(reg) + offset,
|
|
};
|
|
}
|
|
}
|
|
|
|
static void
|
|
do_xor(struct ir3_instruction *instr, unsigned dst_num, unsigned src1_num,
|
|
unsigned src2_num, unsigned flags)
|
|
{
|
|
struct ir3_instruction * xor
|
|
= ir3_instr_create(instr->block, OPC_XOR_B, 1, 2);
|
|
ir3_dst_create(xor, dst_num, flags);
|
|
ir3_src_create(xor, src1_num, flags);
|
|
ir3_src_create(xor, src2_num, flags);
|
|
|
|
ir3_instr_move_before(xor, instr);
|
|
}
|
|
|
|
static void
|
|
do_swap(struct ir3_compiler *compiler, struct ir3_instruction *instr,
|
|
const struct copy_entry *entry)
|
|
{
|
|
assert(!entry->src.flags);
|
|
|
|
if (entry->flags & IR3_REG_HALF) {
|
|
/* We currently make sure to never emit parallel copies where the
|
|
* source/destination is a half-reg above the range accessable to half
|
|
* registers. However, when a full-reg source overlaps a half-reg
|
|
* destination or vice versa, it can be very, very complicated to come
|
|
* up with a series of "legal" swaps and copies to resolve the
|
|
* parallel copy. So here we provide a fallback to implement the
|
|
* "illegal" swap instead. This may also be useful for implementing
|
|
* "spilling" half-regs to the inaccessable space.
|
|
*/
|
|
if (entry->src.reg >= RA_HALF_SIZE) {
|
|
/* Choose a temporary that doesn't overlap src or dst */
|
|
physreg_t tmp = entry->dst < 2 ? 2 : 0;
|
|
|
|
/* Swap src and the temporary */
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = entry->src.reg & ~1u},
|
|
.dst = tmp,
|
|
.flags = entry->flags & ~IR3_REG_HALF,
|
|
});
|
|
|
|
/* If src and dst are within the same full register, then swapping src
|
|
* with tmp above will also move dst to tmp. Account for that here.
|
|
*/
|
|
unsigned dst =
|
|
(entry->src.reg & ~1u) == (entry->dst & ~1u) ?
|
|
tmp + (entry->dst & 1u) : entry->dst;
|
|
|
|
/* Do the original swap with src replaced with tmp */
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = tmp + (entry->src.reg & 1)},
|
|
.dst = dst,
|
|
.flags = entry->flags,
|
|
});
|
|
|
|
/* Swap src and the temporary back */
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = entry->src.reg & ~1u},
|
|
.dst = tmp,
|
|
.flags = entry->flags & ~IR3_REG_HALF,
|
|
});
|
|
return;
|
|
}
|
|
|
|
/* If dst is not addressable, we only need to swap the arguments and
|
|
* let the case above handle it.
|
|
*/
|
|
if (entry->dst >= RA_HALF_SIZE) {
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = entry->dst},
|
|
.dst = entry->src.reg,
|
|
.flags = entry->flags,
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
|
|
unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
|
|
unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
|
|
|
|
/* a5xx+ is known to support swz, which enables us to swap two registers
|
|
* in-place. If unsupported we emulate it using the xor trick.
|
|
*/
|
|
if (compiler->gen < 5) {
|
|
/* Shared regs only exist since a5xx, so we don't have to provide a
|
|
* fallback path for them.
|
|
*/
|
|
assert(!(entry->flags & IR3_REG_SHARED));
|
|
do_xor(instr, dst_num, dst_num, src_num, entry->flags);
|
|
do_xor(instr, src_num, src_num, dst_num, entry->flags);
|
|
do_xor(instr, dst_num, dst_num, src_num, entry->flags);
|
|
} else {
|
|
/* Use a macro for shared regs because any shared reg writes need to
|
|
* be wrapped in a getone block to work correctly. Writing shared regs
|
|
* with multiple threads active does not work, even if they all return
|
|
* the same value.
|
|
*/
|
|
unsigned opc =
|
|
(entry->flags & IR3_REG_SHARED) ? OPC_SWZ_SHARED_MACRO : OPC_SWZ;
|
|
struct ir3_instruction *swz = ir3_instr_create(instr->block, opc, 2, 2);
|
|
ir3_dst_create(swz, dst_num, entry->flags);
|
|
ir3_dst_create(swz, src_num, entry->flags);
|
|
ir3_src_create(swz, src_num, entry->flags);
|
|
ir3_src_create(swz, dst_num, entry->flags);
|
|
swz->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
swz->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
swz->repeat = 1;
|
|
ir3_instr_move_before(swz, instr);
|
|
}
|
|
}
|
|
|
|
static void
|
|
do_copy(struct ir3_compiler *compiler, struct ir3_instruction *instr,
|
|
const struct copy_entry *entry)
|
|
{
|
|
if (entry->flags & IR3_REG_HALF) {
|
|
/* See do_swap() for why this is here. */
|
|
if (entry->dst >= RA_HALF_SIZE) {
|
|
/* TODO: is there a hw instruction we can use for this case? */
|
|
physreg_t tmp = !entry->src.flags && entry->src.reg < 2 ? 2 : 0;
|
|
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = entry->dst & ~1u},
|
|
.dst = tmp,
|
|
.flags = entry->flags & ~IR3_REG_HALF,
|
|
});
|
|
|
|
/* Similar to in do_swap(), account for src being swapped with tmp if
|
|
* src and dst are in the same register.
|
|
*/
|
|
struct copy_src src = entry->src;
|
|
if (!src.flags && (src.reg & ~1u) == (entry->dst & ~1u))
|
|
src.reg = tmp + (src.reg & 1u);
|
|
|
|
do_copy(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = src,
|
|
.dst = tmp + (entry->dst & 1),
|
|
.flags = entry->flags,
|
|
});
|
|
|
|
do_swap(compiler, instr,
|
|
&(struct copy_entry){
|
|
.src = {.reg = entry->dst & ~1u},
|
|
.dst = tmp,
|
|
.flags = entry->flags & ~IR3_REG_HALF,
|
|
});
|
|
return;
|
|
}
|
|
|
|
if (!entry->src.flags && entry->src.reg >= RA_HALF_SIZE) {
|
|
unsigned src_num = ra_physreg_to_num(entry->src.reg & ~1u,
|
|
entry->flags & ~IR3_REG_HALF);
|
|
unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
|
|
|
|
if (entry->src.reg % 2 == 0) {
|
|
/* cov.u32u16 dst, src */
|
|
struct ir3_instruction *cov =
|
|
ir3_instr_create(instr->block, OPC_MOV, 1, 1);
|
|
ir3_dst_create(cov, dst_num, entry->flags);
|
|
ir3_src_create(cov, src_num, entry->flags & ~IR3_REG_HALF);
|
|
cov->cat1.dst_type = TYPE_U16;
|
|
cov->cat1.src_type = TYPE_U32;
|
|
ir3_instr_move_before(cov, instr);
|
|
} else {
|
|
/* shr.b dst, src, (16) */
|
|
struct ir3_instruction *shr =
|
|
ir3_instr_create(instr->block, OPC_SHR_B, 1, 2);
|
|
ir3_dst_create(shr, dst_num, entry->flags);
|
|
ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF);
|
|
ir3_src_create(shr, 0, IR3_REG_IMMED)->uim_val = 16;
|
|
ir3_instr_move_before(shr, instr);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
|
|
unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
|
|
|
|
/* Similar to the swap case, we have to use a macro for shared regs. */
|
|
unsigned opc =
|
|
(entry->flags & IR3_REG_SHARED) ? OPC_READ_FIRST_MACRO : OPC_MOV;
|
|
struct ir3_instruction *mov = ir3_instr_create(instr->block, opc, 1, 1);
|
|
ir3_dst_create(mov, dst_num, entry->flags);
|
|
ir3_src_create(mov, src_num, entry->flags | entry->src.flags);
|
|
mov->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
mov->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
|
if (entry->src.flags & IR3_REG_IMMED)
|
|
mov->srcs[0]->uim_val = entry->src.imm;
|
|
else if (entry->src.flags & IR3_REG_CONST)
|
|
mov->srcs[0]->num = entry->src.const_num;
|
|
ir3_instr_move_before(mov, instr);
|
|
}
|
|
|
|
struct copy_ctx {
|
|
/* For each physreg, the number of pending copy entries that use it as a
|
|
* source. Once this drops to zero, then the physreg is unblocked and can
|
|
* be moved to.
|
|
*/
|
|
unsigned physreg_use_count[RA_MAX_FILE_SIZE];
|
|
|
|
/* For each physreg, the pending copy_entry that uses it as a dest. */
|
|
struct copy_entry *physreg_dst[RA_MAX_FILE_SIZE];
|
|
|
|
struct copy_entry entries[RA_MAX_FILE_SIZE];
|
|
unsigned entry_count;
|
|
};
|
|
|
|
static bool
|
|
entry_blocked(struct copy_entry *entry, struct copy_ctx *ctx)
|
|
{
|
|
for (unsigned i = 0; i < copy_entry_size(entry); i++) {
|
|
if (ctx->physreg_use_count[entry->dst + i] != 0)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
split_32bit_copy(struct copy_ctx *ctx, struct copy_entry *entry)
|
|
{
|
|
assert(!entry->done);
|
|
assert(!(entry->src.flags & (IR3_REG_IMMED | IR3_REG_CONST)));
|
|
assert(copy_entry_size(entry) == 2);
|
|
struct copy_entry *new_entry = &ctx->entries[ctx->entry_count++];
|
|
|
|
new_entry->dst = entry->dst + 1;
|
|
new_entry->src.flags = entry->src.flags;
|
|
new_entry->src.reg = entry->src.reg + 1;
|
|
new_entry->done = false;
|
|
entry->flags |= IR3_REG_HALF;
|
|
new_entry->flags = entry->flags;
|
|
ctx->physreg_dst[entry->dst + 1] = new_entry;
|
|
}
|
|
|
|
static void
|
|
_handle_copies(struct ir3_compiler *compiler, struct ir3_instruction *instr,
|
|
struct copy_ctx *ctx)
|
|
{
|
|
/* Set up the bookkeeping */
|
|
memset(ctx->physreg_dst, 0, sizeof(ctx->physreg_dst));
|
|
memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
|
|
|
|
for (unsigned i = 0; i < ctx->entry_count; i++) {
|
|
struct copy_entry *entry = &ctx->entries[i];
|
|
for (unsigned j = 0; j < copy_entry_size(entry); j++) {
|
|
if (!entry->src.flags)
|
|
ctx->physreg_use_count[entry->src.reg + j]++;
|
|
|
|
/* Copies should not have overlapping destinations. */
|
|
assert(!ctx->physreg_dst[entry->dst + j]);
|
|
ctx->physreg_dst[entry->dst + j] = entry;
|
|
}
|
|
}
|
|
|
|
bool progress = true;
|
|
while (progress) {
|
|
progress = false;
|
|
|
|
/* Step 1: resolve paths in the transfer graph. This means finding
|
|
* copies whose destination aren't blocked by something else and then
|
|
* emitting them, continuing this process until every copy is blocked
|
|
* and there are only cycles left.
|
|
*
|
|
* TODO: We should note that src is also available in dst to unblock
|
|
* cycles that src is involved in.
|
|
*/
|
|
|
|
for (unsigned i = 0; i < ctx->entry_count; i++) {
|
|
struct copy_entry *entry = &ctx->entries[i];
|
|
if (!entry->done && !entry_blocked(entry, ctx)) {
|
|
entry->done = true;
|
|
progress = true;
|
|
do_copy(compiler, instr, entry);
|
|
for (unsigned j = 0; j < copy_entry_size(entry); j++) {
|
|
if (!entry->src.flags)
|
|
ctx->physreg_use_count[entry->src.reg + j]--;
|
|
ctx->physreg_dst[entry->dst + j] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (progress)
|
|
continue;
|
|
|
|
/* Step 2: Find partially blocked copies and split them. In the
|
|
* mergedregs case, we can 32-bit copies which are only blocked on one
|
|
* 16-bit half, and splitting them helps get things moving.
|
|
*
|
|
* We can skip splitting copies if the source isn't a register,
|
|
* however, because it does not unblock anything and therefore doesn't
|
|
* contribute to making forward progress with step 1. These copies
|
|
* should still be resolved eventually in step 1 because they can't be
|
|
* part of a cycle.
|
|
*/
|
|
for (unsigned i = 0; i < ctx->entry_count; i++) {
|
|
struct copy_entry *entry = &ctx->entries[i];
|
|
if (entry->done || entry->flags & IR3_REG_HALF)
|
|
continue;
|
|
|
|
if (((ctx->physreg_use_count[entry->dst] == 0 ||
|
|
ctx->physreg_use_count[entry->dst + 1] == 0)) &&
|
|
!(entry->src.flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
|
|
split_32bit_copy(ctx, entry);
|
|
progress = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Step 3: resolve cycles through swapping.
|
|
*
|
|
* At this point, the transfer graph should consist of only cycles.
|
|
* The reason is that, given any physreg n_1 that's the source of a
|
|
* remaining entry, it has a destination n_2, which (because every
|
|
* copy is blocked) is the source of some other copy whose destination
|
|
* is n_3, and so we can follow the chain until we get a cycle. If we
|
|
* reached some other node than n_1:
|
|
*
|
|
* n_1 -> n_2 -> ... -> n_i
|
|
* ^ |
|
|
* |-------------|
|
|
*
|
|
* then n_2 would be the destination of 2 copies, which is illegal
|
|
* (checked above in an assert). So n_1 must be part of a cycle:
|
|
*
|
|
* n_1 -> n_2 -> ... -> n_i
|
|
* ^ |
|
|
* |---------------------|
|
|
*
|
|
* and this must be only cycle n_1 is involved in, because any other
|
|
* path starting from n_1 would also have to end in n_1, resulting in
|
|
* a node somewhere along the way being the destination of 2 copies
|
|
* when the 2 paths merge.
|
|
*
|
|
* The way we resolve the cycle is through picking a copy (n_1, n_2)
|
|
* and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
|
|
* out of the cycle:
|
|
*
|
|
* n_1 -> ... -> n_i
|
|
* ^ |
|
|
* |--------------|
|
|
*
|
|
* and we can keep repeating this until the cycle is empty.
|
|
*/
|
|
|
|
for (unsigned i = 0; i < ctx->entry_count; i++) {
|
|
struct copy_entry *entry = &ctx->entries[i];
|
|
if (entry->done)
|
|
continue;
|
|
|
|
assert(!entry->src.flags);
|
|
|
|
/* catch trivial copies */
|
|
if (entry->dst == entry->src.reg) {
|
|
entry->done = true;
|
|
continue;
|
|
}
|
|
|
|
do_swap(compiler, instr, entry);
|
|
|
|
/* Split any blocking copies whose sources are only partially
|
|
* contained within our destination.
|
|
*/
|
|
if (entry->flags & IR3_REG_HALF) {
|
|
for (unsigned j = 0; j < ctx->entry_count; j++) {
|
|
struct copy_entry *blocking = &ctx->entries[j];
|
|
|
|
if (blocking->done)
|
|
continue;
|
|
|
|
if (blocking->src.reg <= entry->dst &&
|
|
blocking->src.reg + 1 >= entry->dst &&
|
|
!(blocking->flags & IR3_REG_HALF)) {
|
|
split_32bit_copy(ctx, blocking);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Update sources of blocking copies.
|
|
*
|
|
* Note: at this point, every blocking copy's source should be
|
|
* contained within our destination.
|
|
*/
|
|
for (unsigned j = 0; j < ctx->entry_count; j++) {
|
|
struct copy_entry *blocking = &ctx->entries[j];
|
|
if (blocking->src.reg >= entry->dst &&
|
|
blocking->src.reg < entry->dst + copy_entry_size(entry)) {
|
|
blocking->src.reg =
|
|
entry->src.reg + (blocking->src.reg - entry->dst);
|
|
}
|
|
}
|
|
|
|
entry->done = true;
|
|
}
|
|
}
|
|
|
|
static void
|
|
handle_copies(struct ir3_shader_variant *v, struct ir3_instruction *instr,
|
|
struct copy_entry *entries, unsigned entry_count)
|
|
{
|
|
struct copy_ctx ctx;
|
|
|
|
/* handle shared copies first */
|
|
ctx.entry_count = 0;
|
|
for (unsigned i = 0; i < entry_count; i++) {
|
|
if (entries[i].flags & IR3_REG_SHARED)
|
|
ctx.entries[ctx.entry_count++] = entries[i];
|
|
}
|
|
_handle_copies(v->compiler, instr, &ctx);
|
|
|
|
if (v->mergedregs) {
|
|
/* Half regs and full regs are in the same file, so handle everything
|
|
* at once.
|
|
*/
|
|
ctx.entry_count = 0;
|
|
for (unsigned i = 0; i < entry_count; i++) {
|
|
if (!(entries[i].flags & IR3_REG_SHARED))
|
|
ctx.entries[ctx.entry_count++] = entries[i];
|
|
}
|
|
_handle_copies(v->compiler, instr, &ctx);
|
|
} else {
|
|
/* There may be both half copies and full copies, so we have to split
|
|
* them up since they don't interfere.
|
|
*/
|
|
ctx.entry_count = 0;
|
|
for (unsigned i = 0; i < entry_count; i++) {
|
|
if (entries[i].flags & IR3_REG_HALF)
|
|
ctx.entries[ctx.entry_count++] = entries[i];
|
|
}
|
|
_handle_copies(v->compiler, instr, &ctx);
|
|
|
|
ctx.entry_count = 0;
|
|
for (unsigned i = 0; i < entry_count; i++) {
|
|
if (!(entries[i].flags & (IR3_REG_HALF | IR3_REG_SHARED)))
|
|
ctx.entries[ctx.entry_count++] = entries[i];
|
|
}
|
|
_handle_copies(v->compiler, instr, &ctx);
|
|
}
|
|
}
|
|
|
|
void
|
|
ir3_lower_copies(struct ir3_shader_variant *v)
|
|
{
|
|
DECLARE_ARRAY(struct copy_entry, copies);
|
|
copies_count = copies_sz = 0;
|
|
copies = NULL;
|
|
|
|
foreach_block (block, &v->ir->block_list) {
|
|
foreach_instr_safe (instr, &block->instr_list) {
|
|
if (instr->opc == OPC_META_PARALLEL_COPY) {
|
|
copies_count = 0;
|
|
for (unsigned i = 0; i < instr->dsts_count; i++) {
|
|
struct ir3_register *dst = instr->dsts[i];
|
|
struct ir3_register *src = instr->srcs[i];
|
|
unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
|
|
unsigned dst_physreg = ra_reg_get_physreg(dst);
|
|
for (unsigned j = 0; j < reg_elems(dst); j++) {
|
|
array_insert(
|
|
NULL, copies,
|
|
(struct copy_entry){
|
|
.dst = dst_physreg + j * reg_elem_size(dst),
|
|
.src = get_copy_src(src, j * reg_elem_size(dst)),
|
|
.flags = flags,
|
|
});
|
|
}
|
|
}
|
|
handle_copies(v, instr, copies, copies_count);
|
|
list_del(&instr->node);
|
|
} else if (instr->opc == OPC_META_COLLECT) {
|
|
copies_count = 0;
|
|
struct ir3_register *dst = instr->dsts[0];
|
|
unsigned flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
|
|
for (unsigned i = 0; i < instr->srcs_count; i++) {
|
|
struct ir3_register *src = instr->srcs[i];
|
|
array_insert(NULL, copies,
|
|
(struct copy_entry){
|
|
.dst = ra_num_to_physreg(dst->num + i, flags),
|
|
.src = get_copy_src(src, 0),
|
|
.flags = flags,
|
|
});
|
|
}
|
|
handle_copies(v, instr, copies, copies_count);
|
|
list_del(&instr->node);
|
|
} else if (instr->opc == OPC_META_SPLIT) {
|
|
copies_count = 0;
|
|
struct ir3_register *dst = instr->dsts[0];
|
|
struct ir3_register *src = instr->srcs[0];
|
|
unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
|
|
array_insert(NULL, copies,
|
|
(struct copy_entry){
|
|
.dst = ra_reg_get_physreg(dst),
|
|
.src = get_copy_src(
|
|
src, instr->split.off * reg_elem_size(dst)),
|
|
.flags = flags,
|
|
});
|
|
handle_copies(v, instr, copies, copies_count);
|
|
list_del(&instr->node);
|
|
} else if (instr->opc == OPC_META_PHI) {
|
|
list_del(&instr->node);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (copies)
|
|
ralloc_free(copies);
|
|
}
|