v3d: Kill off vir_PF(), which is hard to use right.

You were allowed to pass in any old temp so that you could hopefully fold
the PF up into the def of the temp.  If we couldn't find one, it
implicitly generated a MOV(nop, reg).  However, that PF could have
different behavior depending on whether the def being folded into was a
float or int opcode, which the caller doesn't necessarily control.

Due to the fragility of the function, just switch all callers over to
vir_set_pf().  This also encourages the callers to use a _dest call for
the inst they're putting the PF on, eliminating a bunch of temps in the
pre-optimization VIR.

shader-db says the change is in the noise:

total instructions in shared programs: 6226247 -> 6227184 (0.02%)
instructions in affected programs: 851068 -> 852005 (0.11%)
This commit is contained in:
Eric Anholt 2019-02-13 12:09:02 -08:00
parent 6186a8d44e
commit 3022b4bd82
3 changed files with 36 additions and 70 deletions

View File

@ -283,8 +283,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
instr->num_components - 2);
}
if (c->execute.file != QFILE_NULL)
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
if (c->execute.file != QFILE_NULL) {
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
}
struct qreg dest;
if (config == ~0)
@ -398,7 +400,8 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
/* Set the flags to the current exec mask.
*/
c->cursor = vir_before_inst(last_inst);
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
c->cursor = vir_after_inst(last_inst);
vir_set_cond(last_inst, V3D_QPU_COND_IFA);
@ -540,9 +543,9 @@ ntq_fsign(struct v3d_compile *c, struct qreg src)
struct qreg t = vir_get_temp(c);
vir_MOV_dest(c, t, vir_uniform_f(c, 0.0));
vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHZ);
vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_f(c, 1.0));
vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHN);
vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHN);
vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_f(c, -1.0));
return vir_MOV(c, t);
}
@ -789,7 +792,8 @@ ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
return cond;
out:
vir_PF(c, ntq_get_src(c, src, 0), V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), ntq_get_src(c, src, 0)),
V3D_QPU_PF_PUSHZ);
return V3D_QPU_COND_IFNA;
}
@ -873,7 +877,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
break;
case nir_op_i2b32:
case nir_op_f2b32:
vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), src[0]),
V3D_QPU_PF_PUSHZ);
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
vir_uniform_ui(c, ~0),
vir_uniform_ui(c, 0)));
@ -967,7 +972,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
break;
case nir_op_fcsel:
vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), src[0]),
V3D_QPU_PF_PUSHZ);
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
src[1], src[2]));
break;
@ -1033,7 +1039,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
break;
case nir_op_uadd_carry:
vir_PF(c, vir_ADD(c, src[0], src[1]), V3D_QPU_PF_PUSHC);
vir_set_pf(vir_ADD_dest(c, vir_nop_reg(), src[0], src[1]),
V3D_QPU_PF_PUSHC);
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
vir_uniform_ui(c, ~0),
vir_uniform_ui(c, 0)));
@ -1832,7 +1839,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_helper_invocation:
vir_PF(c, vir_MSF(c), V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
ntq_store_dest(c, &instr->dest, 0,
vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
vir_uniform_ui(c, ~0),
@ -1886,7 +1893,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_discard:
if (c->execute.file != QFILE_NULL) {
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
vir_uniform_ui(c, 0)),
V3D_QPU_COND_IFA);
@ -2103,7 +2111,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
/* Jump to ELSE if nothing is active for THEN, otherwise fall
* through.
*/
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ);
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA);
vir_link_blocks(c->cur_block, else_block);
vir_link_blocks(c->cur_block, then_block);
@ -2117,14 +2125,16 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
* active channels update their execute flags to point to
* ENDIF
*/
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
vir_uniform_ui(c, after_block->index));
/* If everything points at ENDIF, then jump there immediately. */
vir_PF(c, vir_XOR(c, c->execute,
vir_uniform_ui(c, after_block->index)),
V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_XOR_dest(c, vir_nop_reg(),
c->execute,
vir_uniform_ui(c, after_block->index)),
V3D_QPU_PF_PUSHZ);
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLA);
vir_link_blocks(c->cur_block, after_block);
vir_link_blocks(c->cur_block, else_block);
@ -2159,13 +2169,15 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
{
switch (jump->type) {
case nir_jump_break:
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
vir_uniform_ui(c, c->loop_break_block->index));
break;
case nir_jump_continue:
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
vir_uniform_ui(c, c->loop_cont_block->index));
break;
@ -2251,13 +2263,14 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
*
* XXX: Use the .ORZ flags update, instead.
*/
vir_PF(c, vir_XOR(c,
c->execute,
vir_uniform_ui(c, c->loop_cont_block->index)),
V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_XOR_dest(c,
vir_nop_reg(),
c->execute,
vir_uniform_ui(c, c->loop_cont_block->index)),
V3D_QPU_PF_PUSHZ);
vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0));
vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ);
struct qinst *branch = vir_BRANCH(c, V3D_QPU_BRANCH_COND_ANYA);
/* Pixels that were not dispatched or have been discarded should not

View File

@ -837,8 +837,6 @@ bool vir_init_reg_sets(struct v3d_compiler *compiler);
bool v3d_gl_format_is_return_32(GLenum format);
void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
static inline bool
quniform_contents_is_texture_p0(enum quniform_contents contents)
{

View File

@ -1059,51 +1059,6 @@ vir_uniform(struct v3d_compile *c,
return vir_reg(QFILE_UNIF, uniform);
}
static bool
vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
{
if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
v3d_qpu_uses_sfu(&inst->qpu))) {
return false;
}
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
(inst->qpu.alu.add.op == V3D_QPU_A_NOP &&
inst->qpu.alu.mul.op == V3D_QPU_M_NOP)) {
return false;
}
return true;
}
void
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
{
struct qinst *last_inst = NULL;
if (!list_empty(&c->cur_block->instructions)) {
last_inst = (struct qinst *)c->cur_block->instructions.prev;
/* Can't stuff the PF into the last last inst if our cursor
* isn't pointing after it.
*/
struct vir_cursor after_inst = vir_after_inst(last_inst);
if (c->cursor.mode != after_inst.mode ||
c->cursor.link != after_inst.link)
last_inst = NULL;
}
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
last_inst != c->defs[src.index] ||
!vir_can_set_flags(c, last_inst)) {
/* XXX: Make the MOV be the appropriate type */
last_inst = vir_MOV_dest(c, vir_nop_reg(), src);
}
vir_set_pf(last_inst, pf);
}
#define OPTPASS(func) \
do { \
bool stage_progress = func(c); \