ir3: Add support for subgroup arithmetic
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14107>
This commit is contained in:
parent
a433db60c1
commit
1a78604d20
|
@ -192,6 +192,7 @@ static const struct opc_info {
|
|||
OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
|
||||
OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
|
||||
OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
|
||||
OPC(1, OPC_SCAN_MACRO, scan.macro),
|
||||
|
||||
/* category 2: */
|
||||
OPC(2, OPC_ADD_F, add.f),
|
||||
|
|
|
@ -127,6 +127,9 @@ typedef enum {
|
|||
OPC_READ_FIRST_MACRO = _OPC(1, 55),
|
||||
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
|
||||
|
||||
/* Macros that expand to a loop */
|
||||
OPC_SCAN_MACRO = _OPC(1, 57),
|
||||
|
||||
/* category 2: */
|
||||
OPC_ADD_F = _OPC(2, 0),
|
||||
OPC_MIN_F = _OPC(2, 1),
|
||||
|
|
|
@ -239,6 +239,22 @@ struct ir3_register {
|
|||
arr[arr##_count++] = __VA_ARGS__; \
|
||||
} while (0)
|
||||
|
||||
typedef enum {
|
||||
REDUCE_OP_ADD_U,
|
||||
REDUCE_OP_ADD_F,
|
||||
REDUCE_OP_MUL_U,
|
||||
REDUCE_OP_MUL_F,
|
||||
REDUCE_OP_MIN_U,
|
||||
REDUCE_OP_MIN_S,
|
||||
REDUCE_OP_MIN_F,
|
||||
REDUCE_OP_MAX_U,
|
||||
REDUCE_OP_MAX_S,
|
||||
REDUCE_OP_MAX_F,
|
||||
REDUCE_OP_AND_B,
|
||||
REDUCE_OP_OR_B,
|
||||
REDUCE_OP_XOR_B,
|
||||
} reduce_op_t;
|
||||
|
||||
struct ir3_instruction {
|
||||
struct ir3_block *block;
|
||||
opc_t opc;
|
||||
|
@ -324,6 +340,7 @@ struct ir3_instruction {
|
|||
struct {
|
||||
type_t src_type, dst_type;
|
||||
round_t round;
|
||||
reduce_op_t reduce_op;
|
||||
} cat1;
|
||||
struct {
|
||||
enum {
|
||||
|
@ -896,6 +913,7 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr)
|
|||
case OPC_READ_COND_MACRO:
|
||||
case OPC_READ_FIRST_MACRO:
|
||||
case OPC_SWZ_SHARED_MACRO:
|
||||
case OPC_SCAN_MACRO:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -1823,6 +1823,148 @@ get_frag_coord(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
return ctx->frag_coord;
|
||||
}
|
||||
|
||||
/* This is a bit of a hack until ir3_context is converted to store SSA values
|
||||
* as ir3_register's instead of ir3_instruction's. Pick out a given destination
|
||||
* of an instruction with multiple destinations using a mov that will get folded
|
||||
* away by ir3_cp.
|
||||
*/
|
||||
static struct ir3_instruction *
|
||||
create_multidst_mov(struct ir3_block *block, struct ir3_register *dst)
|
||||
{
|
||||
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
|
||||
unsigned dst_flags = dst->flags & IR3_REG_HALF;
|
||||
unsigned src_flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
|
||||
|
||||
__ssa_dst(mov)->flags |= dst_flags;
|
||||
struct ir3_register *src =
|
||||
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | src_flags);
|
||||
src->wrmask = dst->wrmask;
|
||||
src->def = dst;
|
||||
debug_assert(!(dst->flags & IR3_REG_RELATIV));
|
||||
mov->cat1.src_type = mov->cat1.dst_type =
|
||||
(dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||
return mov;
|
||||
}
|
||||
|
||||
static reduce_op_t
|
||||
get_reduce_op(nir_op opc)
|
||||
{
|
||||
switch (opc) {
|
||||
case nir_op_iadd: return REDUCE_OP_ADD_U;
|
||||
case nir_op_fadd: return REDUCE_OP_ADD_F;
|
||||
case nir_op_imul: return REDUCE_OP_MUL_U;
|
||||
case nir_op_fmul: return REDUCE_OP_MUL_F;
|
||||
case nir_op_umin: return REDUCE_OP_MIN_U;
|
||||
case nir_op_imin: return REDUCE_OP_MIN_S;
|
||||
case nir_op_fmin: return REDUCE_OP_MIN_F;
|
||||
case nir_op_umax: return REDUCE_OP_MAX_U;
|
||||
case nir_op_imax: return REDUCE_OP_MAX_S;
|
||||
case nir_op_fmax: return REDUCE_OP_MAX_F;
|
||||
case nir_op_iand: return REDUCE_OP_AND_B;
|
||||
case nir_op_ior: return REDUCE_OP_OR_B;
|
||||
case nir_op_ixor: return REDUCE_OP_XOR_B;
|
||||
default:
|
||||
unreachable("unknown NIR reduce op");
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_reduce_identity(nir_op opc, unsigned size)
|
||||
{
|
||||
switch (opc) {
|
||||
case nir_op_iadd:
|
||||
return 0;
|
||||
case nir_op_fadd:
|
||||
return size == 32 ? fui(0.0f) : _mesa_float_to_half(0.0f);
|
||||
case nir_op_imul:
|
||||
return 1;
|
||||
case nir_op_fmul:
|
||||
return size == 32 ? fui(1.0f) : _mesa_float_to_half(1.0f);
|
||||
case nir_op_umax:
|
||||
return 0;
|
||||
case nir_op_imax:
|
||||
return size == 32 ? INT32_MIN : (uint32_t)INT16_MIN;
|
||||
case nir_op_fmax:
|
||||
return size == 32 ? fui(-INFINITY) : _mesa_float_to_half(-INFINITY);
|
||||
case nir_op_umin:
|
||||
return size == 32 ? UINT32_MAX : UINT16_MAX;
|
||||
case nir_op_imin:
|
||||
return size == 32 ? INT32_MAX : (uint32_t)INT16_MAX;
|
||||
case nir_op_fmin:
|
||||
return size == 32 ? fui(INFINITY) : _mesa_float_to_half(INFINITY);
|
||||
case nir_op_iand:
|
||||
return size == 32 ? ~0 : (size == 16 ? (uint32_t)(uint16_t)~0 : 1);
|
||||
case nir_op_ior:
|
||||
return 0;
|
||||
case nir_op_ixor:
|
||||
return 0;
|
||||
default:
|
||||
unreachable("unknown NIR reduce op");
|
||||
}
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
emit_intrinsic_reduce(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
nir_op nir_reduce_op = (nir_op) nir_intrinsic_reduction_op(intr);
|
||||
reduce_op_t reduce_op = get_reduce_op(nir_reduce_op);
|
||||
unsigned dst_size = nir_dest_bit_size(intr->dest);
|
||||
unsigned flags = (ir3_bitsize(ctx, dst_size) == 16) ? IR3_REG_HALF : 0;
|
||||
|
||||
/* Note: the shared reg is initialized to the identity, so we need it to
|
||||
* always be 32-bit even when the source isn't because half shared regs are
|
||||
* not supported.
|
||||
*/
|
||||
struct ir3_instruction *identity =
|
||||
create_immed(ctx->block, get_reduce_identity(nir_reduce_op, dst_size));
|
||||
identity = ir3_READ_FIRST_MACRO(ctx->block, identity, 0);
|
||||
identity->dsts[0]->flags |= IR3_REG_SHARED;
|
||||
|
||||
/* OPC_SCAN_MACRO has the following destinations:
|
||||
* - Exclusive scan result (interferes with source)
|
||||
* - Inclusive scan result
|
||||
* - Shared reg reduction result, must be initialized to the identity
|
||||
*
|
||||
* The loop computes all three results at the same time, we just have to
|
||||
* choose which destination to return.
|
||||
*/
|
||||
struct ir3_instruction *scan =
|
||||
ir3_instr_create(ctx->block, OPC_SCAN_MACRO, 3, 2);
|
||||
scan->cat1.reduce_op = reduce_op;
|
||||
|
||||
struct ir3_register *exclusive = __ssa_dst(scan);
|
||||
exclusive->flags |= flags | IR3_REG_EARLY_CLOBBER;
|
||||
struct ir3_register *inclusive = __ssa_dst(scan);
|
||||
inclusive->flags |= flags;
|
||||
struct ir3_register *reduce = __ssa_dst(scan);
|
||||
reduce->flags |= IR3_REG_SHARED;
|
||||
|
||||
/* The 32-bit multiply macro reads its sources after writing a partial result
|
||||
* to the destination, therefore inclusive also interferes with the source.
|
||||
*/
|
||||
if (reduce_op == REDUCE_OP_MUL_U && dst_size == 32)
|
||||
inclusive->flags |= IR3_REG_EARLY_CLOBBER;
|
||||
|
||||
/* Normal source */
|
||||
__ssa_src(scan, src, 0);
|
||||
|
||||
/* shared reg tied source */
|
||||
struct ir3_register *reduce_init = __ssa_src(scan, identity, IR3_REG_SHARED);
|
||||
ir3_reg_tie(reduce, reduce_init);
|
||||
|
||||
struct ir3_register *dst;
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_reduce: dst = reduce; break;
|
||||
case nir_intrinsic_inclusive_scan: dst = inclusive; break;
|
||||
case nir_intrinsic_exclusive_scan: dst = exclusive; break;
|
||||
default:
|
||||
unreachable("unknown reduce intrinsic");
|
||||
}
|
||||
|
||||
return create_multidst_mov(ctx->block, dst);
|
||||
}
|
||||
|
||||
static void setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
||||
static void setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr);
|
||||
|
||||
|
@ -2425,6 +2567,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_reduce:
|
||||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
dst[0] = emit_intrinsic_reduce(ctx, intr);
|
||||
break;
|
||||
|
||||
default:
|
||||
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
||||
nir_intrinsic_infos[intr->intrinsic].name);
|
||||
|
|
|
@ -71,14 +71,106 @@ mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
|
|||
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
mov_reg(struct ir3_block *block, struct ir3_register *dst,
|
||||
struct ir3_register *src)
|
||||
{
|
||||
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
|
||||
|
||||
struct ir3_register *mov_dst =
|
||||
ir3_dst_create(mov, dst->num, dst->flags & (IR3_REG_HALF | IR3_REG_SHARED));
|
||||
struct ir3_register *mov_src =
|
||||
ir3_src_create(mov, src->num, src->flags & (IR3_REG_HALF | IR3_REG_SHARED));
|
||||
mov_dst->wrmask = dst->wrmask;
|
||||
mov_src->wrmask = src->wrmask;
|
||||
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
|
||||
|
||||
mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||
mov->cat1.src_type = (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||
}
|
||||
|
||||
static void
|
||||
binop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
|
||||
struct ir3_register *src0, struct ir3_register *src1)
|
||||
{
|
||||
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2);
|
||||
|
||||
unsigned flags = dst->flags & IR3_REG_HALF;
|
||||
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
|
||||
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
|
||||
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
|
||||
|
||||
instr_dst->wrmask = dst->wrmask;
|
||||
instr_src0->wrmask = src0->wrmask;
|
||||
instr_src1->wrmask = src1->wrmask;
|
||||
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
triop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
|
||||
struct ir3_register *src0, struct ir3_register *src1,
|
||||
struct ir3_register *src2)
|
||||
{
|
||||
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 3);
|
||||
|
||||
unsigned flags = dst->flags & IR3_REG_HALF;
|
||||
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
|
||||
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
|
||||
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
|
||||
struct ir3_register *instr_src2 = ir3_src_create(instr, src2->num, flags);
|
||||
|
||||
instr_dst->wrmask = dst->wrmask;
|
||||
instr_src0->wrmask = src0->wrmask;
|
||||
instr_src1->wrmask = src1->wrmask;
|
||||
instr_src2->wrmask = src2->wrmask;
|
||||
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
do_reduce(struct ir3_block *block, reduce_op_t opc,
|
||||
struct ir3_register *dst, struct ir3_register *src0,
|
||||
struct ir3_register *src1)
|
||||
{
|
||||
switch (opc) {
|
||||
#define CASE(name) \
|
||||
case REDUCE_OP_##name: \
|
||||
binop(block, OPC_##name, dst, src0, src1); \
|
||||
break;
|
||||
|
||||
CASE(ADD_U)
|
||||
CASE(ADD_F)
|
||||
CASE(MUL_F)
|
||||
CASE(MIN_U)
|
||||
CASE(MIN_S)
|
||||
CASE(MIN_F)
|
||||
CASE(MAX_U)
|
||||
CASE(MAX_S)
|
||||
CASE(MAX_F)
|
||||
CASE(AND_B)
|
||||
CASE(OR_B)
|
||||
CASE(XOR_B)
|
||||
|
||||
#undef CASE
|
||||
|
||||
case REDUCE_OP_MUL_U:
|
||||
if (dst->flags & IR3_REG_HALF) {
|
||||
binop(block, OPC_MUL_S24, dst, src0, src1);
|
||||
} else {
|
||||
/* 32-bit multiplication macro - see ir3_nir_imul */
|
||||
binop(block, OPC_MULL_U, dst, src0, src1);
|
||||
triop(block, OPC_MADSH_M16, dst, src0, src1, dst);
|
||||
triop(block, OPC_MADSH_M16, dst, src1, src0, dst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ir3_block *
|
||||
split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||
struct ir3_instruction *instr, struct ir3_block **then)
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_block *then_block = ir3_block_create(ir);
|
||||
struct ir3_block *after_block = ir3_block_create(ir);
|
||||
list_add(&then_block->node, &before_block->node);
|
||||
list_add(&after_block->node, &then_block->node);
|
||||
list_add(&after_block->node, &before_block->node);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
|
||||
after_block->successors[i] = before_block->successors[i];
|
||||
|
@ -96,19 +188,8 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
|||
}
|
||||
}
|
||||
|
||||
before_block->successors[0] = then_block;
|
||||
before_block->successors[1] = after_block;
|
||||
before_block->physical_successors[0] = then_block;
|
||||
before_block->physical_successors[1] = after_block;
|
||||
ir3_block_add_predecessor(then_block, before_block);
|
||||
ir3_block_add_predecessor(after_block, before_block);
|
||||
ir3_block_add_physical_predecessor(then_block, before_block);
|
||||
ir3_block_add_physical_predecessor(after_block, before_block);
|
||||
|
||||
then_block->successors[0] = after_block;
|
||||
then_block->physical_successors[0] = after_block;
|
||||
ir3_block_add_predecessor(after_block, then_block);
|
||||
ir3_block_add_physical_predecessor(after_block, then_block);
|
||||
before_block->successors[0] = before_block->successors[1] = NULL;
|
||||
before_block->physical_successors[0] = before_block->physical_successors[1] = NULL;
|
||||
|
||||
foreach_instr_from_safe (rem_instr, &instr->node,
|
||||
&before_block->instr_list) {
|
||||
|
@ -120,10 +201,39 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
|||
after_block->brtype = before_block->brtype;
|
||||
after_block->condition = before_block->condition;
|
||||
|
||||
*then = then_block;
|
||||
return after_block;
|
||||
}
|
||||
|
||||
static void
|
||||
link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ,
|
||||
unsigned index)
|
||||
{
|
||||
pred->physical_successors[index] = succ;
|
||||
ir3_block_add_physical_predecessor(succ, pred);
|
||||
}
|
||||
|
||||
static void
|
||||
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
|
||||
{
|
||||
pred->successors[index] = succ;
|
||||
ir3_block_add_predecessor(succ, pred);
|
||||
link_blocks_physical(pred, succ, index);
|
||||
}
|
||||
|
||||
static struct ir3_block *
|
||||
create_if(struct ir3 *ir, struct ir3_block *before_block,
|
||||
struct ir3_block *after_block)
|
||||
{
|
||||
struct ir3_block *then_block = ir3_block_create(ir);
|
||||
list_add(&then_block->node, &before_block->node);
|
||||
|
||||
link_blocks(before_block, then_block, 0);
|
||||
link_blocks(before_block, after_block, 1);
|
||||
link_blocks(then_block, after_block, 0);
|
||||
|
||||
return then_block;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *instr)
|
||||
{
|
||||
|
@ -135,106 +245,156 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in
|
|||
case OPC_READ_COND_MACRO:
|
||||
case OPC_READ_FIRST_MACRO:
|
||||
case OPC_SWZ_SHARED_MACRO:
|
||||
case OPC_SCAN_MACRO:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ir3_block *before_block = *block;
|
||||
struct ir3_block *then_block;
|
||||
struct ir3_block *after_block =
|
||||
split_block(ir, before_block, instr, &then_block);
|
||||
struct ir3_block *after_block = split_block(ir, before_block, instr);
|
||||
|
||||
/* For ballot, the destination must be initialized to 0 before we do
|
||||
* the movmsk because the condition may be 0 and then the movmsk will
|
||||
* be skipped. Because it's a shared register we have to wrap the
|
||||
* initialization in a getone block.
|
||||
*/
|
||||
if (instr->opc == OPC_BALLOT_MACRO) {
|
||||
before_block->brtype = IR3_BRANCH_GETONE;
|
||||
before_block->condition = NULL;
|
||||
mov_immed(instr->dsts[0], then_block, 0);
|
||||
before_block = after_block;
|
||||
after_block = split_block(ir, before_block, instr, &then_block);
|
||||
}
|
||||
if (instr->opc == OPC_SCAN_MACRO) {
|
||||
/* The pseudo-code for the scan macro is:
|
||||
*
|
||||
* while (true) {
|
||||
* header:
|
||||
* if (elect()) {
|
||||
* exit:
|
||||
* exclusive = reduce;
|
||||
* inclusive = src OP exclusive;
|
||||
* reduce = inclusive;
|
||||
* }
|
||||
* footer:
|
||||
* }
|
||||
*
|
||||
* This is based on the blob's sequence, and carefully crafted to avoid
|
||||
* using the shared register "reduce" except in move instructions, since
|
||||
* using it in the actual OP isn't possible for half-registers.
|
||||
*/
|
||||
struct ir3_block *header = ir3_block_create(ir);
|
||||
list_add(&header->node, &before_block->node);
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_BALLOT_MACRO:
|
||||
case OPC_READ_COND_MACRO:
|
||||
case OPC_ANY_MACRO:
|
||||
case OPC_ALL_MACRO:
|
||||
before_block->condition = instr->srcs[0]->def->instr;
|
||||
break;
|
||||
default:
|
||||
before_block->condition = NULL;
|
||||
break;
|
||||
}
|
||||
struct ir3_block *exit = ir3_block_create(ir);
|
||||
list_add(&exit->node, &header->node);
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_BALLOT_MACRO:
|
||||
case OPC_READ_COND_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_COND;
|
||||
break;
|
||||
case OPC_ANY_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_ANY;
|
||||
break;
|
||||
case OPC_ALL_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_ALL;
|
||||
break;
|
||||
case OPC_ELECT_MACRO:
|
||||
case OPC_READ_FIRST_MACRO:
|
||||
case OPC_SWZ_SHARED_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_GETONE;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad opcode");
|
||||
}
|
||||
struct ir3_block *footer = ir3_block_create(ir);
|
||||
list_add(&footer->node, &exit->node);
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_ALL_MACRO:
|
||||
case OPC_ANY_MACRO:
|
||||
case OPC_ELECT_MACRO:
|
||||
mov_immed(instr->dsts[0], then_block, 1);
|
||||
mov_immed(instr->dsts[0], before_block, 0);
|
||||
break;
|
||||
link_blocks(before_block, header, 0);
|
||||
|
||||
case OPC_BALLOT_MACRO: {
|
||||
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
|
||||
struct ir3_instruction *movmsk =
|
||||
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
|
||||
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
movmsk->repeat = comp_count - 1;
|
||||
break;
|
||||
}
|
||||
link_blocks(header, exit, 0);
|
||||
link_blocks(header, footer, 1);
|
||||
header->brtype = IR3_BRANCH_GETONE;
|
||||
|
||||
case OPC_READ_COND_MACRO:
|
||||
case OPC_READ_FIRST_MACRO: {
|
||||
struct ir3_instruction *mov =
|
||||
ir3_instr_create(then_block, OPC_MOV, 1, 1);
|
||||
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
|
||||
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
|
||||
*new_src = *instr->srcs[src];
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
mov->cat1.src_type =
|
||||
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||
break;
|
||||
}
|
||||
link_blocks(exit, after_block, 0);
|
||||
link_blocks_physical(exit, footer, 1);
|
||||
|
||||
case OPC_SWZ_SHARED_MACRO: {
|
||||
struct ir3_instruction *swz =
|
||||
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
|
||||
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
|
||||
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
|
||||
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
|
||||
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
|
||||
swz->repeat = 1;
|
||||
break;
|
||||
}
|
||||
link_blocks(footer, header, 0);
|
||||
|
||||
default:
|
||||
unreachable("bad opcode");
|
||||
struct ir3_register *exclusive = instr->dsts[0];
|
||||
struct ir3_register *inclusive = instr->dsts[1];
|
||||
struct ir3_register *reduce = instr->dsts[2];
|
||||
struct ir3_register *src = instr->srcs[0];
|
||||
|
||||
mov_reg(exit, exclusive, reduce);
|
||||
do_reduce(exit, instr->cat1.reduce_op, inclusive, src, exclusive);
|
||||
mov_reg(exit, reduce, inclusive);
|
||||
} else {
|
||||
struct ir3_block *then_block = create_if(ir, before_block, after_block);
|
||||
|
||||
/* For ballot, the destination must be initialized to 0 before we do
|
||||
* the movmsk because the condition may be 0 and then the movmsk will
|
||||
* be skipped. Because it's a shared register we have to wrap the
|
||||
* initialization in a getone block.
|
||||
*/
|
||||
if (instr->opc == OPC_BALLOT_MACRO) {
|
||||
before_block->brtype = IR3_BRANCH_GETONE;
|
||||
before_block->condition = NULL;
|
||||
mov_immed(instr->dsts[0], then_block, 0);
|
||||
before_block = after_block;
|
||||
after_block = split_block(ir, before_block, instr);
|
||||
then_block = create_if(ir, before_block, after_block);
|
||||
}
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_BALLOT_MACRO:
|
||||
case OPC_READ_COND_MACRO:
|
||||
case OPC_ANY_MACRO:
|
||||
case OPC_ALL_MACRO:
|
||||
before_block->condition = instr->srcs[0]->def->instr;
|
||||
break;
|
||||
default:
|
||||
before_block->condition = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_BALLOT_MACRO:
|
||||
case OPC_READ_COND_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_COND;
|
||||
break;
|
||||
case OPC_ANY_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_ANY;
|
||||
break;
|
||||
case OPC_ALL_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_ALL;
|
||||
break;
|
||||
case OPC_ELECT_MACRO:
|
||||
case OPC_READ_FIRST_MACRO:
|
||||
case OPC_SWZ_SHARED_MACRO:
|
||||
before_block->brtype = IR3_BRANCH_GETONE;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad opcode");
|
||||
}
|
||||
|
||||
switch (instr->opc) {
|
||||
case OPC_ALL_MACRO:
|
||||
case OPC_ANY_MACRO:
|
||||
case OPC_ELECT_MACRO:
|
||||
mov_immed(instr->dsts[0], then_block, 1);
|
||||
mov_immed(instr->dsts[0], before_block, 0);
|
||||
break;
|
||||
|
||||
case OPC_BALLOT_MACRO: {
|
||||
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
|
||||
struct ir3_instruction *movmsk =
|
||||
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
|
||||
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
movmsk->repeat = comp_count - 1;
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_READ_COND_MACRO:
|
||||
case OPC_READ_FIRST_MACRO: {
|
||||
struct ir3_instruction *mov =
|
||||
ir3_instr_create(then_block, OPC_MOV, 1, 1);
|
||||
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
|
||||
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
|
||||
*new_src = *instr->srcs[src];
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
mov->cat1.src_type =
|
||||
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
|
||||
break;
|
||||
}
|
||||
|
||||
case OPC_SWZ_SHARED_MACRO: {
|
||||
struct ir3_instruction *swz =
|
||||
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
|
||||
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
|
||||
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
|
||||
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
|
||||
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
|
||||
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
|
||||
swz->repeat = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("bad opcode");
|
||||
}
|
||||
}
|
||||
|
||||
*block = after_block;
|
||||
|
|
|
@ -137,7 +137,51 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
|
|||
disasm_a3xx_instr_name(instr->opc));
|
||||
}
|
||||
|
||||
if (instr->opc != OPC_MOVMSK) {
|
||||
if (instr->opc == OPC_SCAN_MACRO) {
|
||||
switch (instr->cat1.reduce_op) {
|
||||
case REDUCE_OP_ADD_U:
|
||||
mesa_log_stream_printf(stream, ".add.u");
|
||||
break;
|
||||
case REDUCE_OP_ADD_F:
|
||||
mesa_log_stream_printf(stream, ".add.f");
|
||||
break;
|
||||
case REDUCE_OP_MUL_U:
|
||||
mesa_log_stream_printf(stream, ".mul.u");
|
||||
break;
|
||||
case REDUCE_OP_MUL_F:
|
||||
mesa_log_stream_printf(stream, ".mul.f");
|
||||
break;
|
||||
case REDUCE_OP_MIN_U:
|
||||
mesa_log_stream_printf(stream, ".min.u");
|
||||
break;
|
||||
case REDUCE_OP_MIN_S:
|
||||
mesa_log_stream_printf(stream, ".min.s");
|
||||
break;
|
||||
case REDUCE_OP_MIN_F:
|
||||
mesa_log_stream_printf(stream, ".min.f");
|
||||
break;
|
||||
case REDUCE_OP_MAX_U:
|
||||
mesa_log_stream_printf(stream, ".max.u");
|
||||
break;
|
||||
case REDUCE_OP_MAX_S:
|
||||
mesa_log_stream_printf(stream, ".max.s");
|
||||
break;
|
||||
case REDUCE_OP_MAX_F:
|
||||
mesa_log_stream_printf(stream, ".max.f");
|
||||
break;
|
||||
case REDUCE_OP_AND_B:
|
||||
mesa_log_stream_printf(stream, ".and.b");
|
||||
break;
|
||||
case REDUCE_OP_OR_B:
|
||||
mesa_log_stream_printf(stream, ".or.b");
|
||||
break;
|
||||
case REDUCE_OP_XOR_B:
|
||||
mesa_log_stream_printf(stream, ".xor.b");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->opc != OPC_MOVMSK && instr->opc != OPC_SCAN_MACRO) {
|
||||
mesa_log_stream_printf(stream, ".%s%s",
|
||||
type_name(instr->cat1.src_type),
|
||||
type_name(instr->cat1.dst_type));
|
||||
|
|
|
@ -238,6 +238,14 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
|||
} else if (instr->opc == OPC_ELECT_MACRO) {
|
||||
validate_assert(ctx, instr->dsts_count == 1);
|
||||
validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
|
||||
} else if (instr->opc == OPC_SCAN_MACRO) {
|
||||
validate_assert(ctx, instr->dsts_count == 3);
|
||||
validate_assert(ctx, instr->srcs_count == 2);
|
||||
validate_assert(ctx, reg_class_flags(instr->dsts[0]) ==
|
||||
reg_class_flags(instr->srcs[0]));
|
||||
validate_assert(ctx, reg_class_flags(instr->dsts[1]) ==
|
||||
reg_class_flags(instr->srcs[0]));
|
||||
validate_assert(ctx, reg_class_flags(instr->dsts[2]) == IR3_REG_SHARED);
|
||||
} else {
|
||||
foreach_dst (dst, instr)
|
||||
validate_reg_size(ctx, dst, instr->cat1.dst_type);
|
||||
|
|
Loading…
Reference in New Issue