166 lines
5.9 KiB
C
166 lines
5.9 KiB
C
/*
|
|
* Copyright © 2021 Raspberry Pi Ltd
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* @file v3d_opt_constant_alu.c
|
|
*
|
|
* Identified sequences of ALU instructions that operate on constant operands
|
|
* and reduces them to a uniform load.
|
|
*
|
|
* This is useful, for example, to optimize the result of removing leading
|
|
* ldunifa instructions in the DCE pass, which can leave a series of constant
|
|
* additions that increment the unifa address by 4 for each leading ldunif
|
|
* removed. It helps turn this:
|
|
*
|
|
* nop t1; ldunif (0x00000004 / 0.000000)
|
|
* nop t2; ldunif (0x00000004 / 0.000000)
|
|
* add t3, t1, t2
|
|
*
|
|
* into:
|
|
*
|
|
* nop t1; ldunif (0x00000004 / 0.000000)
|
|
* nop t2; ldunif (0x00000004 / 0.000000)
|
|
* nop t4; ldunif (0x00000008 / 0.000000)
|
|
* mov t3, t4
|
|
*
|
|
* For best results we want to run copy propagation in between this and
|
|
* the combine constants pass: every time we manage to convert an alu to
|
|
* a uniform load, we move the uniform to the original alu destination. By
|
|
* running copy propagation immediately after we can reuse the uniform as
|
|
* source in more follow-up alu instructions, making them constant and allowing
|
|
* this pass to continue making progress. However, if we run the small
|
|
* immediates optimization before that, that pass can convert some of the movs
|
|
* to use small immediates instead of the uniforms and prevent us from making
|
|
* the best of this pass, as small immediates don't get copy propagated.
|
|
*/
|
|
|
|
#include "v3d_compiler.h"
|
|
|
|
#include "util/half_float.h"
|
|
#include "util/u_math.h"
|
|
|
|
static bool
|
|
opt_constant_add(struct v3d_compile *c, struct qinst *inst, union fi *values)
|
|
{
|
|
/* FIXME: handle more add operations */
|
|
struct qreg unif = { };
|
|
switch (inst->qpu.alu.add.op) {
|
|
case V3D_QPU_A_ADD:
|
|
c->cursor = vir_after_inst(inst);
|
|
unif = vir_uniform_ui(c, values[0].ui + values[1].ui);
|
|
break;
|
|
|
|
case V3D_QPU_A_VFPACK: {
|
|
assert(inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE);
|
|
|
|
const uint32_t packed =
|
|
(((uint32_t)_mesa_float_to_half(values[1].f)) << 16) |
|
|
_mesa_float_to_half(values[0].f);
|
|
|
|
c->cursor = vir_after_inst(inst);
|
|
unif = vir_uniform_ui(c, packed);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/* Remove the original ALU instruction and replace it with a uniform
|
|
* load. If the original instruction loaded an implicit uniform we
|
|
* need to replicate that in the new instruction.
|
|
*/
|
|
struct qreg dst = inst->dst;
|
|
struct qinst *mov = vir_MOV_dest(c, dst, unif);
|
|
mov->uniform = inst->uniform;
|
|
vir_remove_instruction(c, inst);
|
|
if (dst.file == QFILE_TEMP)
|
|
c->defs[dst.index] = mov;
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst)
|
|
{
|
|
if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
|
|
return false;
|
|
|
|
/* If the instruction does anything other than writing the result
|
|
* directly to the destination, skip.
|
|
*/
|
|
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
|
|
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
|
|
return false;
|
|
}
|
|
|
|
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
|
|
inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
|
|
return false;
|
|
}
|
|
|
|
assert(vir_get_nsrc(inst) <= 2);
|
|
union fi values[2];
|
|
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
|
if (inst->src[i].file == QFILE_SMALL_IMM &&
|
|
v3d_qpu_small_imm_unpack(c->devinfo,
|
|
inst->qpu.raddr_b,
|
|
&values[i].ui)) {
|
|
continue;
|
|
}
|
|
|
|
if (inst->src[i].file == QFILE_TEMP) {
|
|
struct qinst *def = c->defs[inst->src[i].index];
|
|
if (!def)
|
|
return false;
|
|
|
|
if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) &&
|
|
c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) {
|
|
values[i].ui = c->uniform_data[def->uniform];
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* FIXME: handle mul operations */
|
|
if (vir_is_add(inst))
|
|
return opt_constant_add(c, inst, values);
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
vir_opt_constant_alu(struct v3d_compile *c)
|
|
{
|
|
bool progress = false;
|
|
vir_for_each_block(block, c) {
|
|
c->cur_block = block;
|
|
vir_for_each_inst_safe(inst, block) {
|
|
progress = try_opt_constant_alu(c, inst) || progress;
|
|
}
|
|
}
|
|
|
|
return progress;
|
|
}
|