ir3: Add support for subgroup arithmetic

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14107>
This commit is contained in:
Connor Abbott 2021-12-07 12:11:05 +01:00 committed by Marge Bot
parent a433db60c1
commit 1a78604d20
7 changed files with 487 additions and 105 deletions

View File

@ -192,6 +192,7 @@ static const struct opc_info {
OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
OPC(1, OPC_SCAN_MACRO, scan.macro),
/* category 2: */
OPC(2, OPC_ADD_F, add.f),

View File

@ -127,6 +127,9 @@ typedef enum {
OPC_READ_FIRST_MACRO = _OPC(1, 55),
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
/* Macros that expand to a loop */
OPC_SCAN_MACRO = _OPC(1, 57),
/* category 2: */
OPC_ADD_F = _OPC(2, 0),
OPC_MIN_F = _OPC(2, 1),

View File

@ -239,6 +239,22 @@ struct ir3_register {
arr[arr##_count++] = __VA_ARGS__; \
} while (0)
typedef enum {
REDUCE_OP_ADD_U,
REDUCE_OP_ADD_F,
REDUCE_OP_MUL_U,
REDUCE_OP_MUL_F,
REDUCE_OP_MIN_U,
REDUCE_OP_MIN_S,
REDUCE_OP_MIN_F,
REDUCE_OP_MAX_U,
REDUCE_OP_MAX_S,
REDUCE_OP_MAX_F,
REDUCE_OP_AND_B,
REDUCE_OP_OR_B,
REDUCE_OP_XOR_B,
} reduce_op_t;
struct ir3_instruction {
struct ir3_block *block;
opc_t opc;
@ -324,6 +340,7 @@ struct ir3_instruction {
struct {
type_t src_type, dst_type;
round_t round;
reduce_op_t reduce_op;
} cat1;
struct {
enum {
@ -896,6 +913,7 @@ is_subgroup_cond_mov_macro(struct ir3_instruction *instr)
case OPC_READ_COND_MACRO:
case OPC_READ_FIRST_MACRO:
case OPC_SWZ_SHARED_MACRO:
case OPC_SCAN_MACRO:
return true;
default:
return false;

View File

@ -1823,6 +1823,148 @@ get_frag_coord(struct ir3_context *ctx, nir_intrinsic_instr *intr)
return ctx->frag_coord;
}
/* This is a bit of a hack until ir3_context is converted to store SSA values
* as ir3_register's instead of ir3_instruction's. Pick out a given destination
* of an instruction with multiple destinations using a mov that will get folded
* away by ir3_cp.
*/
static struct ir3_instruction *
create_multidst_mov(struct ir3_block *block, struct ir3_register *dst)
{
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
unsigned dst_flags = dst->flags & IR3_REG_HALF;
unsigned src_flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
__ssa_dst(mov)->flags |= dst_flags;
struct ir3_register *src =
ir3_src_create(mov, INVALID_REG, IR3_REG_SSA | src_flags);
src->wrmask = dst->wrmask;
src->def = dst;
debug_assert(!(dst->flags & IR3_REG_RELATIV));
mov->cat1.src_type = mov->cat1.dst_type =
(dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
return mov;
}
static reduce_op_t
get_reduce_op(nir_op opc)
{
switch (opc) {
case nir_op_iadd: return REDUCE_OP_ADD_U;
case nir_op_fadd: return REDUCE_OP_ADD_F;
case nir_op_imul: return REDUCE_OP_MUL_U;
case nir_op_fmul: return REDUCE_OP_MUL_F;
case nir_op_umin: return REDUCE_OP_MIN_U;
case nir_op_imin: return REDUCE_OP_MIN_S;
case nir_op_fmin: return REDUCE_OP_MIN_F;
case nir_op_umax: return REDUCE_OP_MAX_U;
case nir_op_imax: return REDUCE_OP_MAX_S;
case nir_op_fmax: return REDUCE_OP_MAX_F;
case nir_op_iand: return REDUCE_OP_AND_B;
case nir_op_ior: return REDUCE_OP_OR_B;
case nir_op_ixor: return REDUCE_OP_XOR_B;
default:
unreachable("unknown NIR reduce op");
}
}
static uint32_t
get_reduce_identity(nir_op opc, unsigned size)
{
switch (opc) {
case nir_op_iadd:
return 0;
case nir_op_fadd:
return size == 32 ? fui(0.0f) : _mesa_float_to_half(0.0f);
case nir_op_imul:
return 1;
case nir_op_fmul:
return size == 32 ? fui(1.0f) : _mesa_float_to_half(1.0f);
case nir_op_umax:
return 0;
case nir_op_imax:
return size == 32 ? INT32_MIN : (uint32_t)INT16_MIN;
case nir_op_fmax:
return size == 32 ? fui(-INFINITY) : _mesa_float_to_half(-INFINITY);
case nir_op_umin:
return size == 32 ? UINT32_MAX : UINT16_MAX;
case nir_op_imin:
return size == 32 ? INT32_MAX : (uint32_t)INT16_MAX;
case nir_op_fmin:
return size == 32 ? fui(INFINITY) : _mesa_float_to_half(INFINITY);
case nir_op_iand:
return size == 32 ? ~0 : (size == 16 ? (uint32_t)(uint16_t)~0 : 1);
case nir_op_ior:
return 0;
case nir_op_ixor:
return 0;
default:
unreachable("unknown NIR reduce op");
}
}
static struct ir3_instruction *
emit_intrinsic_reduce(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
nir_op nir_reduce_op = (nir_op) nir_intrinsic_reduction_op(intr);
reduce_op_t reduce_op = get_reduce_op(nir_reduce_op);
unsigned dst_size = nir_dest_bit_size(intr->dest);
unsigned flags = (ir3_bitsize(ctx, dst_size) == 16) ? IR3_REG_HALF : 0;
/* Note: the shared reg is initialized to the identity, so we need it to
* always be 32-bit even when the source isn't because half shared regs are
* not supported.
*/
struct ir3_instruction *identity =
create_immed(ctx->block, get_reduce_identity(nir_reduce_op, dst_size));
identity = ir3_READ_FIRST_MACRO(ctx->block, identity, 0);
identity->dsts[0]->flags |= IR3_REG_SHARED;
/* OPC_SCAN_MACRO has the following destinations:
* - Exclusive scan result (interferes with source)
* - Inclusive scan result
* - Shared reg reduction result, must be initialized to the identity
*
* The loop computes all three results at the same time, we just have to
* choose which destination to return.
*/
struct ir3_instruction *scan =
ir3_instr_create(ctx->block, OPC_SCAN_MACRO, 3, 2);
scan->cat1.reduce_op = reduce_op;
struct ir3_register *exclusive = __ssa_dst(scan);
exclusive->flags |= flags | IR3_REG_EARLY_CLOBBER;
struct ir3_register *inclusive = __ssa_dst(scan);
inclusive->flags |= flags;
struct ir3_register *reduce = __ssa_dst(scan);
reduce->flags |= IR3_REG_SHARED;
/* The 32-bit multiply macro reads its sources after writing a partial result
* to the destination, therefore inclusive also interferes with the source.
*/
if (reduce_op == REDUCE_OP_MUL_U && dst_size == 32)
inclusive->flags |= IR3_REG_EARLY_CLOBBER;
/* Normal source */
__ssa_src(scan, src, 0);
/* shared reg tied source */
struct ir3_register *reduce_init = __ssa_src(scan, identity, IR3_REG_SHARED);
ir3_reg_tie(reduce, reduce_init);
struct ir3_register *dst;
switch (intr->intrinsic) {
case nir_intrinsic_reduce: dst = reduce; break;
case nir_intrinsic_inclusive_scan: dst = inclusive; break;
case nir_intrinsic_exclusive_scan: dst = exclusive; break;
default:
unreachable("unknown reduce intrinsic");
}
return create_multidst_mov(ctx->block, dst);
}
static void setup_input(struct ir3_context *ctx, nir_intrinsic_instr *intr);
static void setup_output(struct ir3_context *ctx, nir_intrinsic_instr *intr);
@ -2425,6 +2567,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
break;
}
case nir_intrinsic_reduce:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan:
dst[0] = emit_intrinsic_reduce(ctx, intr);
break;
default:
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
nir_intrinsic_infos[intr->intrinsic].name);

View File

@ -71,14 +71,106 @@ mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
}
static void
mov_reg(struct ir3_block *block, struct ir3_register *dst,
struct ir3_register *src)
{
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
struct ir3_register *mov_dst =
ir3_dst_create(mov, dst->num, dst->flags & (IR3_REG_HALF | IR3_REG_SHARED));
struct ir3_register *mov_src =
ir3_src_create(mov, src->num, src->flags & (IR3_REG_HALF | IR3_REG_SHARED));
mov_dst->wrmask = dst->wrmask;
mov_src->wrmask = src->wrmask;
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
mov->cat1.src_type = (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
}
static void
binop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
struct ir3_register *src0, struct ir3_register *src1)
{
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2);
unsigned flags = dst->flags & IR3_REG_HALF;
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
instr_dst->wrmask = dst->wrmask;
instr_src0->wrmask = src0->wrmask;
instr_src1->wrmask = src1->wrmask;
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
}
static void
triop(struct ir3_block *block, opc_t opc, struct ir3_register *dst,
struct ir3_register *src0, struct ir3_register *src1,
struct ir3_register *src2)
{
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 3);
unsigned flags = dst->flags & IR3_REG_HALF;
struct ir3_register *instr_dst = ir3_dst_create(instr, dst->num, flags);
struct ir3_register *instr_src0 = ir3_src_create(instr, src0->num, flags);
struct ir3_register *instr_src1 = ir3_src_create(instr, src1->num, flags);
struct ir3_register *instr_src2 = ir3_src_create(instr, src2->num, flags);
instr_dst->wrmask = dst->wrmask;
instr_src0->wrmask = src0->wrmask;
instr_src1->wrmask = src1->wrmask;
instr_src2->wrmask = src2->wrmask;
instr->repeat = util_last_bit(instr_dst->wrmask) - 1;
}
static void
do_reduce(struct ir3_block *block, reduce_op_t opc,
struct ir3_register *dst, struct ir3_register *src0,
struct ir3_register *src1)
{
switch (opc) {
#define CASE(name) \
case REDUCE_OP_##name: \
binop(block, OPC_##name, dst, src0, src1); \
break;
CASE(ADD_U)
CASE(ADD_F)
CASE(MUL_F)
CASE(MIN_U)
CASE(MIN_S)
CASE(MIN_F)
CASE(MAX_U)
CASE(MAX_S)
CASE(MAX_F)
CASE(AND_B)
CASE(OR_B)
CASE(XOR_B)
#undef CASE
case REDUCE_OP_MUL_U:
if (dst->flags & IR3_REG_HALF) {
binop(block, OPC_MUL_S24, dst, src0, src1);
} else {
/* 32-bit multiplication macro - see ir3_nir_imul */
binop(block, OPC_MULL_U, dst, src0, src1);
triop(block, OPC_MADSH_M16, dst, src0, src1, dst);
triop(block, OPC_MADSH_M16, dst, src1, src0, dst);
}
break;
}
}
static struct ir3_block *
split_block(struct ir3 *ir, struct ir3_block *before_block,
struct ir3_instruction *instr, struct ir3_block **then)
struct ir3_instruction *instr)
{
struct ir3_block *then_block = ir3_block_create(ir);
struct ir3_block *after_block = ir3_block_create(ir);
list_add(&then_block->node, &before_block->node);
list_add(&after_block->node, &then_block->node);
list_add(&after_block->node, &before_block->node);
for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
after_block->successors[i] = before_block->successors[i];
@ -96,19 +188,8 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
}
}
before_block->successors[0] = then_block;
before_block->successors[1] = after_block;
before_block->physical_successors[0] = then_block;
before_block->physical_successors[1] = after_block;
ir3_block_add_predecessor(then_block, before_block);
ir3_block_add_predecessor(after_block, before_block);
ir3_block_add_physical_predecessor(then_block, before_block);
ir3_block_add_physical_predecessor(after_block, before_block);
then_block->successors[0] = after_block;
then_block->physical_successors[0] = after_block;
ir3_block_add_predecessor(after_block, then_block);
ir3_block_add_physical_predecessor(after_block, then_block);
before_block->successors[0] = before_block->successors[1] = NULL;
before_block->physical_successors[0] = before_block->physical_successors[1] = NULL;
foreach_instr_from_safe (rem_instr, &instr->node,
&before_block->instr_list) {
@ -120,10 +201,39 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
after_block->brtype = before_block->brtype;
after_block->condition = before_block->condition;
*then = then_block;
return after_block;
}
static void
link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ,
unsigned index)
{
pred->physical_successors[index] = succ;
ir3_block_add_physical_predecessor(succ, pred);
}
static void
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
{
pred->successors[index] = succ;
ir3_block_add_predecessor(succ, pred);
link_blocks_physical(pred, succ, index);
}
static struct ir3_block *
create_if(struct ir3 *ir, struct ir3_block *before_block,
struct ir3_block *after_block)
{
struct ir3_block *then_block = ir3_block_create(ir);
list_add(&then_block->node, &before_block->node);
link_blocks(before_block, then_block, 0);
link_blocks(before_block, after_block, 1);
link_blocks(then_block, after_block, 0);
return then_block;
}
static bool
lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *instr)
{
@ -135,106 +245,156 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in
case OPC_READ_COND_MACRO:
case OPC_READ_FIRST_MACRO:
case OPC_SWZ_SHARED_MACRO:
case OPC_SCAN_MACRO:
break;
default:
return false;
}
struct ir3_block *before_block = *block;
struct ir3_block *then_block;
struct ir3_block *after_block =
split_block(ir, before_block, instr, &then_block);
struct ir3_block *after_block = split_block(ir, before_block, instr);
/* For ballot, the destination must be initialized to 0 before we do
* the movmsk because the condition may be 0 and then the movmsk will
* be skipped. Because it's a shared register we have to wrap the
* initialization in a getone block.
*/
if (instr->opc == OPC_BALLOT_MACRO) {
before_block->brtype = IR3_BRANCH_GETONE;
before_block->condition = NULL;
mov_immed(instr->dsts[0], then_block, 0);
before_block = after_block;
after_block = split_block(ir, before_block, instr, &then_block);
}
if (instr->opc == OPC_SCAN_MACRO) {
/* The pseudo-code for the scan macro is:
*
* while (true) {
* header:
* if (elect()) {
* exit:
* exclusive = reduce;
* inclusive = src OP exclusive;
* reduce = inclusive;
* }
* footer:
* }
*
* This is based on the blob's sequence, and carefully crafted to avoid
* using the shared register "reduce" except in move instructions, since
* using it in the actual OP isn't possible for half-registers.
*/
struct ir3_block *header = ir3_block_create(ir);
list_add(&header->node, &before_block->node);
switch (instr->opc) {
case OPC_BALLOT_MACRO:
case OPC_READ_COND_MACRO:
case OPC_ANY_MACRO:
case OPC_ALL_MACRO:
before_block->condition = instr->srcs[0]->def->instr;
break;
default:
before_block->condition = NULL;
break;
}
struct ir3_block *exit = ir3_block_create(ir);
list_add(&exit->node, &header->node);
switch (instr->opc) {
case OPC_BALLOT_MACRO:
case OPC_READ_COND_MACRO:
before_block->brtype = IR3_BRANCH_COND;
break;
case OPC_ANY_MACRO:
before_block->brtype = IR3_BRANCH_ANY;
break;
case OPC_ALL_MACRO:
before_block->brtype = IR3_BRANCH_ALL;
break;
case OPC_ELECT_MACRO:
case OPC_READ_FIRST_MACRO:
case OPC_SWZ_SHARED_MACRO:
before_block->brtype = IR3_BRANCH_GETONE;
break;
default:
unreachable("bad opcode");
}
struct ir3_block *footer = ir3_block_create(ir);
list_add(&footer->node, &exit->node);
switch (instr->opc) {
case OPC_ALL_MACRO:
case OPC_ANY_MACRO:
case OPC_ELECT_MACRO:
mov_immed(instr->dsts[0], then_block, 1);
mov_immed(instr->dsts[0], before_block, 0);
break;
link_blocks(before_block, header, 0);
case OPC_BALLOT_MACRO: {
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
struct ir3_instruction *movmsk =
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
movmsk->repeat = comp_count - 1;
break;
}
link_blocks(header, exit, 0);
link_blocks(header, footer, 1);
header->brtype = IR3_BRANCH_GETONE;
case OPC_READ_COND_MACRO:
case OPC_READ_FIRST_MACRO: {
struct ir3_instruction *mov =
ir3_instr_create(then_block, OPC_MOV, 1, 1);
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
*new_src = *instr->srcs[src];
mov->cat1.dst_type = TYPE_U32;
mov->cat1.src_type =
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
break;
}
link_blocks(exit, after_block, 0);
link_blocks_physical(exit, footer, 1);
case OPC_SWZ_SHARED_MACRO: {
struct ir3_instruction *swz =
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
swz->repeat = 1;
break;
}
link_blocks(footer, header, 0);
default:
unreachable("bad opcode");
struct ir3_register *exclusive = instr->dsts[0];
struct ir3_register *inclusive = instr->dsts[1];
struct ir3_register *reduce = instr->dsts[2];
struct ir3_register *src = instr->srcs[0];
mov_reg(exit, exclusive, reduce);
do_reduce(exit, instr->cat1.reduce_op, inclusive, src, exclusive);
mov_reg(exit, reduce, inclusive);
} else {
struct ir3_block *then_block = create_if(ir, before_block, after_block);
/* For ballot, the destination must be initialized to 0 before we do
* the movmsk because the condition may be 0 and then the movmsk will
* be skipped. Because it's a shared register we have to wrap the
* initialization in a getone block.
*/
if (instr->opc == OPC_BALLOT_MACRO) {
before_block->brtype = IR3_BRANCH_GETONE;
before_block->condition = NULL;
mov_immed(instr->dsts[0], then_block, 0);
before_block = after_block;
after_block = split_block(ir, before_block, instr);
then_block = create_if(ir, before_block, after_block);
}
switch (instr->opc) {
case OPC_BALLOT_MACRO:
case OPC_READ_COND_MACRO:
case OPC_ANY_MACRO:
case OPC_ALL_MACRO:
before_block->condition = instr->srcs[0]->def->instr;
break;
default:
before_block->condition = NULL;
break;
}
switch (instr->opc) {
case OPC_BALLOT_MACRO:
case OPC_READ_COND_MACRO:
before_block->brtype = IR3_BRANCH_COND;
break;
case OPC_ANY_MACRO:
before_block->brtype = IR3_BRANCH_ANY;
break;
case OPC_ALL_MACRO:
before_block->brtype = IR3_BRANCH_ALL;
break;
case OPC_ELECT_MACRO:
case OPC_READ_FIRST_MACRO:
case OPC_SWZ_SHARED_MACRO:
before_block->brtype = IR3_BRANCH_GETONE;
break;
default:
unreachable("bad opcode");
}
switch (instr->opc) {
case OPC_ALL_MACRO:
case OPC_ANY_MACRO:
case OPC_ELECT_MACRO:
mov_immed(instr->dsts[0], then_block, 1);
mov_immed(instr->dsts[0], before_block, 0);
break;
case OPC_BALLOT_MACRO: {
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
struct ir3_instruction *movmsk =
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
movmsk->repeat = comp_count - 1;
break;
}
case OPC_READ_COND_MACRO:
case OPC_READ_FIRST_MACRO: {
struct ir3_instruction *mov =
ir3_instr_create(then_block, OPC_MOV, 1, 1);
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
*new_src = *instr->srcs[src];
mov->cat1.dst_type = TYPE_U32;
mov->cat1.src_type =
(new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
break;
}
case OPC_SWZ_SHARED_MACRO: {
struct ir3_instruction *swz =
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
swz->repeat = 1;
break;
}
default:
unreachable("bad opcode");
}
}
*block = after_block;

View File

@ -137,7 +137,51 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
disasm_a3xx_instr_name(instr->opc));
}
if (instr->opc != OPC_MOVMSK) {
if (instr->opc == OPC_SCAN_MACRO) {
switch (instr->cat1.reduce_op) {
case REDUCE_OP_ADD_U:
mesa_log_stream_printf(stream, ".add.u");
break;
case REDUCE_OP_ADD_F:
mesa_log_stream_printf(stream, ".add.f");
break;
case REDUCE_OP_MUL_U:
mesa_log_stream_printf(stream, ".mul.u");
break;
case REDUCE_OP_MUL_F:
mesa_log_stream_printf(stream, ".mul.f");
break;
case REDUCE_OP_MIN_U:
mesa_log_stream_printf(stream, ".min.u");
break;
case REDUCE_OP_MIN_S:
mesa_log_stream_printf(stream, ".min.s");
break;
case REDUCE_OP_MIN_F:
mesa_log_stream_printf(stream, ".min.f");
break;
case REDUCE_OP_MAX_U:
mesa_log_stream_printf(stream, ".max.u");
break;
case REDUCE_OP_MAX_S:
mesa_log_stream_printf(stream, ".max.s");
break;
case REDUCE_OP_MAX_F:
mesa_log_stream_printf(stream, ".max.f");
break;
case REDUCE_OP_AND_B:
mesa_log_stream_printf(stream, ".and.b");
break;
case REDUCE_OP_OR_B:
mesa_log_stream_printf(stream, ".or.b");
break;
case REDUCE_OP_XOR_B:
mesa_log_stream_printf(stream, ".xor.b");
break;
}
}
if (instr->opc != OPC_MOVMSK && instr->opc != OPC_SCAN_MACRO) {
mesa_log_stream_printf(stream, ".%s%s",
type_name(instr->cat1.src_type),
type_name(instr->cat1.dst_type));

View File

@ -238,6 +238,14 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
} else if (instr->opc == OPC_ELECT_MACRO) {
validate_assert(ctx, instr->dsts_count == 1);
validate_assert(ctx, !(instr->dsts[0]->flags & IR3_REG_SHARED));
} else if (instr->opc == OPC_SCAN_MACRO) {
validate_assert(ctx, instr->dsts_count == 3);
validate_assert(ctx, instr->srcs_count == 2);
validate_assert(ctx, reg_class_flags(instr->dsts[0]) ==
reg_class_flags(instr->srcs[0]));
validate_assert(ctx, reg_class_flags(instr->dsts[1]) ==
reg_class_flags(instr->srcs[0]));
validate_assert(ctx, reg_class_flags(instr->dsts[2]) == IR3_REG_SHARED);
} else {
foreach_dst (dst, instr)
validate_reg_size(ctx, dst, instr->cat1.dst_type);