From 69ef08d303cdf153fe2432a7e40faccae5d62aab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Aug 2015 20:31:21 -0700 Subject: [PATCH] vc4: Make the pack-to-unorm instructions be non-SSA. This helps ensure that the register allocator doesn't force the later pack operations to insert extra MOVs. total instructions in shared programs: 98170 -> 98159 (-0.01%) instructions in affected programs: 2134 -> 2123 (-0.52%) --- src/gallium/drivers/vc4/vc4_program.c | 13 ++++++------ src/gallium/drivers/vc4/vc4_qir.c | 10 ++++----- src/gallium/drivers/vc4/vc4_qir.h | 27 +++++++++++++++++-------- src/gallium/drivers/vc4/vc4_qpu_emit.c | 28 ++++++-------------------- 4 files changed, 36 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 13c472152d8..303132f3a3b 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -839,14 +839,13 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) } if (instr->op == nir_op_pack_unorm_4x8) { - struct qreg result; + struct qreg result = qir_get_temp(c); + for (int i = 0; i < 4; i++) { - struct qreg src = ntq_get_src(c, instr->src[0].src, - instr->src[0].swizzle[i]); - if (i == 0) - result = qir_PACK_8888_F(c, src); - else - result = qir_PACK_8_F(c, result, src, i); + qir_PACK_8_F(c, result, + ntq_get_src(c, instr->src[0].src, + instr->src[0].swizzle[i]), + i); } struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); *dest = result; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 90d1c1ff69b..3a37451a3ca 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -71,11 +71,11 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true }, - [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true }, - [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true }, - [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true }, - [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true }, + [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 }, + [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 }, + [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, + [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, + [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index cade795c12a..ca93ab8641f 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -534,6 +534,16 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ } +#define QIR_PACK(name) \ +static inline struct qreg \ +qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \ +{ \ + qir_emit(c, qir_inst(QOP_##name, dest, a, c->undef)); \ + if (dest.file == QFILE_TEMP) \ + c->defs[dest.index] = NULL; \ + return dest; \ +} + QIR_ALU1(MOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) @@ -572,10 +582,10 @@ QIR_ALU1(EXP2) QIR_ALU1(LOG2) QIR_ALU2(PACK_SCALED) QIR_ALU1(PACK_8888_F) -QIR_ALU2(PACK_8A_F) -QIR_ALU2(PACK_8B_F) -QIR_ALU2(PACK_8C_F) -QIR_ALU2(PACK_8D_F) +QIR_PACK(PACK_8A_F) +QIR_PACK(PACK_8B_F) +QIR_PACK(PACK_8C_F) +QIR_PACK(PACK_8D_F) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) @@ -627,11 +637,12 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) } static inline struct qreg -qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) +qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) { - struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); - return t; + qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef)); + if (dest.file == QFILE_TEMP) + c->defs[dest.index] = NULL; + return dest; } static inline struct qreg diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index f324056258c..e89db3e4f05 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -336,28 +336,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_PACK_8B_F: case QOP_PACK_8C_F: case QOP_PACK_8D_F: - /* If dst doesn't happen to already contain src[0], - * then we have to move it in. - */ - if (qinst->src[0].file != QFILE_NULL && - (src[0].mux != dst.mux || src[0].addr != dst.addr)) { - /* Don't overwrite src1 while setting up - * the dst! - */ - if (dst.mux == src[1].mux && - dst.addr == src[1].addr) { - queue(c, qpu_m_MOV(qpu_rb(31), src[1])); - src[1] = qpu_rb(31); - } - - queue(c, qpu_m_MOV(dst, src[0])); - } - - queue(c, qpu_m_MOV(dst, src[1])); - *last_inst(c) |= QPU_PM; - *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + - qinst->op - QOP_PACK_8A_F, - QPU_PACK); + queue(c, + qpu_m_MOV(dst, src[0]) | + QPU_PM | + QPU_SET_FIELD(QPU_PACK_MUL_8A + + qinst->op - QOP_PACK_8A_F, + QPU_PACK)); break; case QOP_FRAG_X: