vc4: Make the pack-to-unorm instructions be non-SSA.
This helps ensure that the register allocator doesn't force the later pack operations to insert extra MOVs. total instructions in shared programs: 98170 -> 98159 (-0.01%) instructions in affected programs: 2134 -> 2123 (-0.52%)
This commit is contained in:
parent
0bba4fa070
commit
69ef08d303
|
@ -839,14 +839,13 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
|||
}
|
||||
|
||||
if (instr->op == nir_op_pack_unorm_4x8) {
|
||||
struct qreg result;
|
||||
struct qreg result = qir_get_temp(c);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct qreg src = ntq_get_src(c, instr->src[0].src,
|
||||
instr->src[0].swizzle[i]);
|
||||
if (i == 0)
|
||||
result = qir_PACK_8888_F(c, src);
|
||||
else
|
||||
result = qir_PACK_8_F(c, result, src, i);
|
||||
qir_PACK_8_F(c, result,
|
||||
ntq_get_src(c, instr->src[0].src,
|
||||
instr->src[0].swizzle[i]),
|
||||
i);
|
||||
}
|
||||
struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
|
||||
*dest = result;
|
||||
|
|
|
@ -71,11 +71,11 @@ static const struct qir_op_info qir_op_info[] = {
|
|||
[QOP_RSQ] = { "rsq", 1, 1, false, true },
|
||||
[QOP_EXP2] = { "exp2", 1, 2, false, true },
|
||||
[QOP_LOG2] = { "log2", 1, 2, false, true },
|
||||
[QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
|
||||
[QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
|
||||
[QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
|
||||
[QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
|
||||
[QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
|
||||
[QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
|
||||
[QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
|
||||
[QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
|
||||
[QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
|
||||
[QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
|
||||
[QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
|
||||
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
|
||||
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
|
||||
|
|
|
@ -534,6 +534,16 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
|
|||
qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \
|
||||
}
|
||||
|
||||
#define QIR_PACK(name) \
|
||||
static inline struct qreg \
|
||||
qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
|
||||
{ \
|
||||
qir_emit(c, qir_inst(QOP_##name, dest, a, c->undef)); \
|
||||
if (dest.file == QFILE_TEMP) \
|
||||
c->defs[dest.index] = NULL; \
|
||||
return dest; \
|
||||
}
|
||||
|
||||
QIR_ALU1(MOV)
|
||||
QIR_ALU2(FADD)
|
||||
QIR_ALU2(FSUB)
|
||||
|
@ -572,10 +582,10 @@ QIR_ALU1(EXP2)
|
|||
QIR_ALU1(LOG2)
|
||||
QIR_ALU2(PACK_SCALED)
|
||||
QIR_ALU1(PACK_8888_F)
|
||||
QIR_ALU2(PACK_8A_F)
|
||||
QIR_ALU2(PACK_8B_F)
|
||||
QIR_ALU2(PACK_8C_F)
|
||||
QIR_ALU2(PACK_8D_F)
|
||||
QIR_PACK(PACK_8A_F)
|
||||
QIR_PACK(PACK_8B_F)
|
||||
QIR_PACK(PACK_8C_F)
|
||||
QIR_PACK(PACK_8D_F)
|
||||
QIR_ALU1(VARY_ADD_C)
|
||||
QIR_NODST_2(TEX_S)
|
||||
QIR_NODST_2(TEX_T)
|
||||
|
@ -627,11 +637,12 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
|
|||
}
|
||||
|
||||
static inline struct qreg
|
||||
qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
|
||||
qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
|
||||
{
|
||||
struct qreg t = qir_get_temp(c);
|
||||
qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
|
||||
return t;
|
||||
qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
|
||||
if (dest.file == QFILE_TEMP)
|
||||
c->defs[dest.index] = NULL;
|
||||
return dest;
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
|
|
|
@ -336,28 +336,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
|
|||
case QOP_PACK_8B_F:
|
||||
case QOP_PACK_8C_F:
|
||||
case QOP_PACK_8D_F:
|
||||
/* If dst doesn't happen to already contain src[0],
|
||||
* then we have to move it in.
|
||||
*/
|
||||
if (qinst->src[0].file != QFILE_NULL &&
|
||||
(src[0].mux != dst.mux || src[0].addr != dst.addr)) {
|
||||
/* Don't overwrite src1 while setting up
|
||||
* the dst!
|
||||
*/
|
||||
if (dst.mux == src[1].mux &&
|
||||
dst.addr == src[1].addr) {
|
||||
queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
|
||||
src[1] = qpu_rb(31);
|
||||
}
|
||||
|
||||
queue(c, qpu_m_MOV(dst, src[0]));
|
||||
}
|
||||
|
||||
queue(c, qpu_m_MOV(dst, src[1]));
|
||||
*last_inst(c) |= QPU_PM;
|
||||
*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
|
||||
qinst->op - QOP_PACK_8A_F,
|
||||
QPU_PACK);
|
||||
queue(c,
|
||||
qpu_m_MOV(dst, src[0]) |
|
||||
QPU_PM |
|
||||
QPU_SET_FIELD(QPU_PACK_MUL_8A +
|
||||
qinst->op - QOP_PACK_8A_F,
|
||||
QPU_PACK));
|
||||
break;
|
||||
|
||||
case QOP_FRAG_X:
|
||||
|
|
Loading…
Reference in New Issue