vc4: Add support for MUL output rotation.
Extracted from a patch by jonasarrow on github.
This commit is contained in:
parent
074f1f3c0c
commit
8ce6526178
|
@ -90,6 +90,14 @@ try_copy_prop(struct vc4_compile *c, struct qinst *inst, struct qinst **movs)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Mul rotation's source needs to be in an r0-r3 accumulator,
|
||||
* so no uniforms or regfile-a/r4 unpacking allowed.
|
||||
*/
|
||||
if (inst->op == QOP_ROT_MUL &&
|
||||
(mov->src[0].file != QFILE_TEMP ||
|
||||
mov->src[0].pack))
|
||||
continue;
|
||||
|
||||
uint8_t unpack;
|
||||
if (mov->src[0].pack) {
|
||||
/* Make sure that the meaning of the unpack
|
||||
|
|
|
@ -86,6 +86,8 @@ static const struct qir_op_info qir_op_info[] = {
|
|||
[QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
|
||||
[QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
|
||||
|
||||
[QOP_ROT_MUL] = { "rot_mul", 0, 2 },
|
||||
|
||||
[QOP_BRANCH] = { "branch", 0, 0, true },
|
||||
[QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
|
||||
};
|
||||
|
@ -164,6 +166,7 @@ qir_is_mul(struct qinst *inst)
|
|||
case QOP_V8MAX:
|
||||
case QOP_V8ADDS:
|
||||
case QOP_V8SUBS:
|
||||
case QOP_ROT_MUL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -168,6 +168,8 @@ enum qop {
|
|||
*/
|
||||
QOP_LOAD_IMM_I2,
|
||||
|
||||
QOP_ROT_MUL,
|
||||
|
||||
/* Jumps to block->successor[0] if the qinst->cond (as a
|
||||
* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note
|
||||
* that block->successor[1] may be unset if the condition is ALWAYS.
|
||||
|
@ -822,6 +824,16 @@ qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
|
|||
c->undef));
|
||||
}
|
||||
|
||||
/** Shifts the multiply output to the right by rot channels */
|
||||
static inline struct qreg
|
||||
qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
|
||||
{
|
||||
return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,
|
||||
val,
|
||||
qir_reg(QFILE_LOAD_IMM,
|
||||
QPU_SMALL_IMM_MUL_ROT + rot)));
|
||||
}
|
||||
|
||||
static inline void
|
||||
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
|
||||
struct qreg dest, struct qreg src)
|
||||
|
|
|
@ -234,6 +234,19 @@ qpu_m_alu2(enum qpu_op_mul op,
|
|||
return inst;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
|
||||
{
|
||||
uint64_t inst = 0;
|
||||
inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
|
||||
|
||||
inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
|
||||
inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
|
||||
QPU_SMALL_IMM);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
static bool
|
||||
merge_fields(uint64_t *merge,
|
||||
uint64_t a, uint64_t b,
|
||||
|
|
|
@ -150,6 +150,7 @@ uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
|
|||
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
|
||||
uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
|
||||
uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
|
||||
uint64_t qpu_m_rot(struct qpu_reg dst, struct qpu_reg src, int rot) ATTRIBUTE_CONST;
|
||||
|
||||
bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
|
||||
bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
|
||||
|
|
|
@ -434,6 +434,20 @@ vc4_generate_code_block(struct vc4_compile *c,
|
|||
|
||||
case QOP_LOAD_IMM_I2:
|
||||
queue(block, qpu_load_imm_i2(dst, qinst->src[0].index));
|
||||
|
||||
case QOP_ROT_MUL:
|
||||
/* Rotation at the hardware level occurs on the inputs
|
||||
* to the MUL unit, and they must be accumulators in
|
||||
* order to have the time necessary to move things.
|
||||
*/
|
||||
assert(src[0].mux <= QPU_MUX_R3);
|
||||
|
||||
queue(block,
|
||||
qpu_m_rot(dst, src[0], qinst->src[1].index -
|
||||
QPU_SMALL_IMM_MUL_ROT) | unpack);
|
||||
set_last_cond_mul(block, qinst->cond);
|
||||
handled_qinst_cond = true;
|
||||
set_last_dst_pack(block, qinst);
|
||||
break;
|
||||
|
||||
case QOP_MS_MASK:
|
||||
|
|
Loading…
Reference in New Issue