nvc0: replace branching with predicated insns where feasible
This commit is contained in:
parent
0f776fea43
commit
0691530b7f
|
@ -55,15 +55,11 @@ nvc0_insn_can_load(struct nv_instruction *nvi, int s,
|
|||
boolean
|
||||
nvc0_insn_is_predicateable(struct nv_instruction *nvi)
|
||||
{
|
||||
int s;
|
||||
|
||||
if (!nv_op_predicateable(nvi->opcode))
|
||||
if (nvi->predicate >= 0) /* already predicated */
|
||||
return FALSE;
|
||||
if (nvi->predicate >= 0)
|
||||
if (!nvc0_op_info_table[nvi->opcode].predicate &&
|
||||
!nvc0_op_info_table[nvi->opcode].pseudo)
|
||||
return FALSE;
|
||||
for (s = 0; s < 4 && nvi->src[s]; ++s)
|
||||
if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
|
||||
return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -505,6 +501,9 @@ nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
|
|||
|
||||
i->bb = b;
|
||||
b->num_instructions++;
|
||||
|
||||
if (i->prev && i->prev->terminator)
|
||||
nvc0_insns_permute(i->prev, i);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -205,6 +205,10 @@
|
|||
#define NV_CC_C 0x11
|
||||
#define NV_CC_A 0x12
|
||||
#define NV_CC_S 0x13
|
||||
#define NV_CC_INVERSE(cc) ((cc) ^ 0x7)
|
||||
/* for 1 bit predicates: */
|
||||
#define NV_CC_P 0
|
||||
#define NV_CC_NOT_P 1
|
||||
|
||||
#define NV_PC_MAX_INSTRUCTIONS 2048
|
||||
#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
|
||||
|
@ -260,12 +264,6 @@ nv_op_supported_src_mods(uint opcode)
|
|||
return nvc0_op_info_table[opcode].mods;
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
nv_op_predicateable(uint opcode)
|
||||
{
|
||||
return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
static INLINE uint
|
||||
nv_type_order(ubyte type)
|
||||
{
|
||||
|
@ -488,7 +486,7 @@ nv_alloc_instruction(struct nv_pc *pc, uint opcode)
|
|||
assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
|
||||
|
||||
insn->opcode = opcode;
|
||||
insn->cc = 0;
|
||||
insn->cc = NV_CC_P;
|
||||
insn->indirect = -1;
|
||||
insn->predicate = -1;
|
||||
|
||||
|
|
|
@ -99,6 +99,7 @@ inst_removable(struct nv_instruction *nvi)
|
|||
nvc0_insn_refcount(nvi)));
|
||||
}
|
||||
|
||||
/* Check if we do not actually have to emit this instruction. */
|
||||
static INLINE boolean
|
||||
inst_is_noop(struct nv_instruction *nvi)
|
||||
{
|
||||
|
@ -1043,7 +1044,6 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
|
||||
* Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
|
||||
* BREAK and dummy ELSE block.
|
||||
|
@ -1064,24 +1064,92 @@ bb_is_if_else_endif(struct nv_basic_block *bb)
|
|||
}
|
||||
}
|
||||
|
||||
/* predicate instructions and remove branch at the end */
|
||||
/* Predicate instructions and delete any branch at the end if it is
|
||||
* not a break from a loop.
|
||||
*/
|
||||
static void
|
||||
predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
|
||||
struct nv_value *p, ubyte cc)
|
||||
struct nv_value *pred, uint8_t cc)
|
||||
{
|
||||
struct nv_instruction *nvi, *prev;
|
||||
int s;
|
||||
|
||||
if (!b->entry)
|
||||
return;
|
||||
for (nvi = b->entry; nvi; nvi = nvi->next) {
|
||||
prev = nvi;
|
||||
if (inst_is_noop(nvi))
|
||||
continue;
|
||||
for (s = 0; nvi->src[s]; ++s);
|
||||
assert(s < 6);
|
||||
nvi->predicate = s;
|
||||
nvi->cc = cc;
|
||||
nv_reference(pc, nvi, nvi->predicate, pred);
|
||||
}
|
||||
if (prev->opcode == NV_OP_BRA &&
|
||||
b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
|
||||
b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
|
||||
nvc0_insn_delete(prev);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* NOTE: Run this after register allocation, we can just cut out the cflow
|
||||
* instructions and hook the predicates to the conditional OPs if they are
|
||||
* not using immediates; better than inserting SELECT to join definitions.
|
||||
*
|
||||
* NOTE: Should adapt prior optimization to make this possible more often.
|
||||
static INLINE boolean
|
||||
may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
|
||||
{
|
||||
if (nvi->def[0] && values_equal(nvi->def[0], pred))
|
||||
return FALSE;
|
||||
return nvc0_insn_is_predicateable(nvi);
|
||||
}
|
||||
|
||||
/* Transform IF/ELSE/ENDIF constructs into predicated instructions
|
||||
* where feasible.
|
||||
*/
|
||||
static int
|
||||
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
|
||||
{
|
||||
struct nv_instruction *nvi;
|
||||
struct nv_value *pred;
|
||||
int k;
|
||||
int n0, n1; /* instruction counts of outgoing blocks */
|
||||
|
||||
if (bb_is_if_else_endif(b)) {
|
||||
assert(b->exit && b->exit->opcode == NV_OP_BRA);
|
||||
|
||||
assert(b->exit->predicate >= 0);
|
||||
pred = b->exit->src[b->exit->predicate]->value;
|
||||
|
||||
n1 = n0 = 0;
|
||||
for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
|
||||
if (!may_predicate_insn(nvi, pred))
|
||||
break;
|
||||
if (!nvi) {
|
||||
/* we're after register allocation, so there always is an ELSE block */
|
||||
for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
|
||||
if (!may_predicate_insn(nvi, pred))
|
||||
break;
|
||||
}
|
||||
|
||||
/* 12 is an arbitrary limit */
|
||||
if (!nvi && n0 < 12 && n1 < 12) {
|
||||
predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
|
||||
predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
|
||||
|
||||
nvc0_insn_delete(b->exit); /* delete the branch */
|
||||
|
||||
/* and a potential joinat before it */
|
||||
if (b->exit && b->exit->opcode == NV_OP_JOINAT)
|
||||
nvc0_insn_delete(b->exit);
|
||||
|
||||
/* remove join operations at the end of the conditional */
|
||||
k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
|
||||
if ((nvi = b->out[0]->out[k]->entry)) {
|
||||
nvi->join = 0;
|
||||
if (nvi->opcode == NV_OP_JOIN)
|
||||
nvc0_insn_delete(nvi);
|
||||
}
|
||||
}
|
||||
}
|
||||
DESCEND_ARBITRARY(k, nv_pass_flatten);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ nvc0_print_instruction(struct nv_instruction *i)
|
|||
|
||||
PRINT("%s", gree);
|
||||
if (NV_BASEOP(i->opcode) == NV_OP_SET)
|
||||
PRINT("set %s", nv_cond_name(i->set_cond));
|
||||
PRINT("%s %s", nvc0_opcode_name(i->opcode), nv_cond_name(i->set_cond));
|
||||
else
|
||||
if (i->saturate)
|
||||
PRINT("sat %s", nvc0_opcode_name(i->opcode));
|
||||
|
@ -278,7 +278,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
|
|||
{ NV_OP_MERGE, "merge", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
|
||||
{ NV_OP_PHI, "phi", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
|
||||
{ NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
|
||||
{ NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_NOP, "nop", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 1, 0, 0, 0 },
|
||||
|
||||
{ NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
|
@ -343,18 +343,18 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
|
|||
{ NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 2, 2 },
|
||||
{ NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 2, 2 },
|
||||
{ NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
|
||||
{ NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
|
||||
{ NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
|
||||
{ NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
|
||||
|
@ -369,7 +369,7 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
|
|||
|
||||
{ NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
|
||||
|
||||
{ NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
|
||||
{ NV_OP_SET, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
|
||||
|
||||
{ NV_OP_TXG, "texgrad", NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
|
||||
|
||||
|
|
|
@ -659,7 +659,7 @@ bld_kil(struct bld_context *bld, struct nv_value *src)
|
|||
|
||||
static void
|
||||
bld_flow(struct bld_context *bld, uint opcode,
|
||||
struct nv_value *src, struct nv_basic_block *target,
|
||||
struct nv_value *pred, uint8_t cc, struct nv_basic_block *target,
|
||||
boolean reconverge)
|
||||
{
|
||||
struct nv_instruction *nvi;
|
||||
|
@ -670,8 +670,10 @@ bld_flow(struct bld_context *bld, uint opcode,
|
|||
nvi = new_instruction(bld->pc, opcode);
|
||||
nvi->target = target;
|
||||
nvi->terminator = 1;
|
||||
if (src)
|
||||
bld_src_predicate(bld, nvi, 0, src);
|
||||
if (pred) {
|
||||
nvi->cc = cc;
|
||||
bld_src_predicate(bld, nvi, 0, pred);
|
||||
}
|
||||
}
|
||||
|
||||
static ubyte
|
||||
|
@ -1584,6 +1586,7 @@ bld_instruction(struct bld_context *bld,
|
|||
case TGSI_OPCODE_IF:
|
||||
{
|
||||
struct nv_basic_block *b = new_basic_block(bld->pc);
|
||||
struct nv_value *pred = emit_fetch(bld, insn, 0, 0);
|
||||
|
||||
assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
|
||||
|
||||
|
@ -1592,10 +1595,19 @@ bld_instruction(struct bld_context *bld,
|
|||
bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
|
||||
bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
|
||||
|
||||
src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ,
|
||||
emit_fetch(bld, insn, 0, 0), bld->zero);
|
||||
if (pred->insn && NV_BASEOP(pred->insn->opcode) == NV_OP_SET) {
|
||||
pred = bld_clone(bld, pred->insn);
|
||||
pred->reg.size = 1;
|
||||
pred->reg.file = NV_FILE_PRED;
|
||||
if (pred->insn->opcode == NV_OP_FSET_F32)
|
||||
pred->insn->opcode = NV_OP_SET_F32;
|
||||
} else {
|
||||
pred = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE | NV_CC_U,
|
||||
pred, bld->zero);
|
||||
}
|
||||
assert(!mask);
|
||||
|
||||
bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
|
||||
bld_flow(bld, NV_OP_BRA, pred, NV_CC_NOT_P, NULL, (bld->cond_lvl == 0));
|
||||
|
||||
++bld->cond_lvl;
|
||||
bld_new_block(bld, b);
|
||||
|
@ -1661,7 +1673,7 @@ bld_instruction(struct bld_context *bld,
|
|||
{
|
||||
struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
|
||||
|
||||
bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
|
||||
bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
|
||||
|
||||
if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
|
||||
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
|
||||
|
@ -1673,7 +1685,7 @@ bld_instruction(struct bld_context *bld,
|
|||
{
|
||||
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
|
||||
|
||||
bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
|
||||
bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
|
||||
|
||||
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
|
||||
|
||||
|
@ -1689,7 +1701,7 @@ bld_instruction(struct bld_context *bld,
|
|||
struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
|
||||
|
||||
if (bld->out_kind != CFG_EDGE_FAKE) { /* else we already had BRK/CONT */
|
||||
bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
|
||||
bld_flow(bld, NV_OP_BRA, NULL, NV_CC_P, bb, FALSE);
|
||||
|
||||
nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue