aco/nops: add State
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12720>
This commit is contained in:
parent
bdf7eed045
commit
a8cc911aaf
|
@ -33,6 +33,12 @@
|
|||
namespace aco {
|
||||
namespace {
|
||||
|
||||
struct State {
|
||||
Program* program;
|
||||
Block* block;
|
||||
std::vector<aco_ptr<Instruction>> old_instructions;
|
||||
};
|
||||
|
||||
struct NOP_ctx_gfx6 {
|
||||
void join(const NOP_ctx_gfx6& other)
|
||||
{
|
||||
|
@ -228,8 +234,7 @@ handle_raw_hazard_instr(aco_ptr<Instruction>& pred, PhysReg reg, int* nops_neede
|
|||
|
||||
template <bool Valu, bool Vintrp, bool Salu>
|
||||
int
|
||||
handle_raw_hazard_internal(Program* program, Block* block, int nops_needed, PhysReg reg,
|
||||
uint32_t mask)
|
||||
handle_raw_hazard_internal(State& state, Block* block, int nops_needed, PhysReg reg, uint32_t mask)
|
||||
{
|
||||
for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; pred_idx--) {
|
||||
if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(block->instructions[pred_idx], reg,
|
||||
|
@ -244,19 +249,19 @@ handle_raw_hazard_internal(Program* program, Block* block, int nops_needed, Phys
|
|||
* huge value. */
|
||||
for (unsigned lin_pred : block->linear_preds) {
|
||||
res = std::max(res, handle_raw_hazard_internal<Valu, Vintrp, Salu>(
|
||||
program, &program->blocks[lin_pred], nops_needed, reg, mask));
|
||||
state, &state.program->blocks[lin_pred], nops_needed, reg, mask));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <bool Valu, bool Vintrp, bool Salu>
|
||||
void
|
||||
handle_raw_hazard(Program* program, Block* cur_block, int* NOPs, int min_states, Operand op)
|
||||
handle_raw_hazard(State& state, int* NOPs, int min_states, Operand op)
|
||||
{
|
||||
if (*NOPs >= min_states)
|
||||
return;
|
||||
int res = handle_raw_hazard_internal<Valu, Vintrp, Salu>(
|
||||
program, cur_block, min_states, op.physReg(), u_bit_consecutive(0, op.size()));
|
||||
state, state.block, min_states, op.physReg(), u_bit_consecutive(0, op.size()));
|
||||
*NOPs = MAX2(*NOPs, res);
|
||||
}
|
||||
|
||||
|
@ -332,15 +337,14 @@ handle_smem_clause_hazards(Program* program, NOP_ctx_gfx6& ctx, aco_ptr<Instruct
|
|||
|
||||
/* TODO: we don't handle accessing VCC using the actual SGPR instead of using the alias */
|
||||
void
|
||||
handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
||||
aco_ptr<Instruction>& instr,
|
||||
handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& instr,
|
||||
std::vector<aco_ptr<Instruction>>& new_instructions)
|
||||
{
|
||||
/* check hazards */
|
||||
int NOPs = 0;
|
||||
|
||||
if (instr->isSMEM()) {
|
||||
if (program->chip_class == GFX6) {
|
||||
if (state.program->chip_class == GFX6) {
|
||||
/* A read of an SGPR by SMRD instruction requires 4 wait states
|
||||
* when the SGPR was written by a VALU instruction. According to LLVM,
|
||||
* there is also an undocumented hardware behavior when the buffer
|
||||
|
@ -352,13 +356,13 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
|
||||
bool is_buffer_desc = i == 0 && op.size() > 2;
|
||||
if (is_buffer_desc)
|
||||
handle_valu_salu_then_read_hazard(program, cur_block, &NOPs, 4, op);
|
||||
handle_valu_salu_then_read_hazard(state, &NOPs, 4, op);
|
||||
else
|
||||
handle_valu_then_read_hazard(program, cur_block, &NOPs, 4, op);
|
||||
handle_valu_then_read_hazard(state, &NOPs, 4, op);
|
||||
}
|
||||
}
|
||||
|
||||
handle_smem_clause_hazards(program, ctx, instr, &NOPs);
|
||||
handle_smem_clause_hazards(state.program, ctx, instr, &NOPs);
|
||||
} else if (instr->isSALU()) {
|
||||
if (instr->opcode == aco_opcode::s_setreg_b32 ||
|
||||
instr->opcode == aco_opcode::s_setreg_imm32_b32 ||
|
||||
|
@ -366,7 +370,7 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
NOPs = MAX2(NOPs, ctx.setreg_then_getsetreg);
|
||||
}
|
||||
|
||||
if (program->chip_class == GFX9) {
|
||||
if (state.program->chip_class == GFX9) {
|
||||
if (instr->opcode == aco_opcode::s_movrels_b32 ||
|
||||
instr->opcode == aco_opcode::s_movrels_b64 ||
|
||||
instr->opcode == aco_opcode::s_movreld_b32 ||
|
||||
|
@ -389,7 +393,7 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
|
||||
if (instr->isDPP()) {
|
||||
NOPs = MAX2(NOPs, ctx.valu_wr_exec_then_dpp);
|
||||
handle_valu_then_read_hazard(program, cur_block, &NOPs, 2, instr->operands[0]);
|
||||
handle_valu_then_read_hazard(state, &NOPs, 2, instr->operands[0]);
|
||||
}
|
||||
|
||||
for (Definition def : instr->definitions) {
|
||||
|
@ -404,7 +408,7 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64) &&
|
||||
!instr->operands[1].isConstant()) {
|
||||
handle_valu_then_read_hazard(program, cur_block, &NOPs, 4, instr->operands[1]);
|
||||
handle_valu_then_read_hazard(state, &NOPs, 4, instr->operands[1]);
|
||||
}
|
||||
|
||||
/* It's required to insert 1 wait state if the dst VGPR of any v_interp_*
|
||||
|
@ -412,10 +416,10 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
* hangs on GFX6. Note that v_writelane_* is apparently not affected.
|
||||
* This hazard isn't documented anywhere but AMD confirmed that hazard.
|
||||
*/
|
||||
if (program->chip_class == GFX6 &&
|
||||
if (state.program->chip_class == GFX6 &&
|
||||
(instr->opcode == aco_opcode::v_readlane_b32 || /* GFX6 doesn't have v_readlane_b32_e64 */
|
||||
instr->opcode == aco_opcode::v_readfirstlane_b32)) {
|
||||
handle_vintrp_then_read_hazard(program, cur_block, &NOPs, 1, instr->operands[0]);
|
||||
handle_vintrp_then_read_hazard(state, &NOPs, 1, instr->operands[0]);
|
||||
}
|
||||
|
||||
if (instr->opcode == aco_opcode::v_div_fmas_f32 ||
|
||||
|
@ -425,14 +429,14 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
|
||||
for (Operand op : instr->operands) {
|
||||
if (!op.isConstant() && !op.isUndefined() && op.regClass().type() == RegType::sgpr)
|
||||
handle_valu_then_read_hazard(program, cur_block, &NOPs, 5, op);
|
||||
handle_valu_then_read_hazard(state, &NOPs, 5, op);
|
||||
}
|
||||
}
|
||||
|
||||
if (!instr->isSALU() && instr->format != Format::SMEM)
|
||||
NOPs = MAX2(NOPs, ctx.set_vskip_mode_then_vector);
|
||||
|
||||
if (program->chip_class == GFX9) {
|
||||
if (state.program->chip_class == GFX9) {
|
||||
bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) && instr->flatlike().lds;
|
||||
if (instr->isVINTRP() || lds_scratch_global ||
|
||||
instr->opcode == aco_opcode::ds_read_addtid_b32 ||
|
||||
|
@ -459,7 +463,7 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
ctx.smem_clause = false;
|
||||
ctx.smem_write = false;
|
||||
|
||||
if (program->dev.xnack_enabled) {
|
||||
if (state.program->dev.xnack_enabled) {
|
||||
BITSET_ZERO(ctx.smem_clause_read_write);
|
||||
BITSET_ZERO(ctx.smem_clause_write);
|
||||
}
|
||||
|
@ -471,7 +475,7 @@ handle_instruction_gfx6(Program* program, Block* cur_block, NOP_ctx_gfx6& ctx,
|
|||
} else {
|
||||
ctx.smem_clause = true;
|
||||
|
||||
if (program->dev.xnack_enabled) {
|
||||
if (state.program->dev.xnack_enabled) {
|
||||
for (Operand op : instr->operands) {
|
||||
if (!op.isConstant()) {
|
||||
set_bitset_range(ctx.smem_clause_read_write, op.physReg(), op.size());
|
||||
|
@ -616,8 +620,7 @@ instr_is_branch(const aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
void
|
||||
handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx,
|
||||
aco_ptr<Instruction>& instr,
|
||||
handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& instr,
|
||||
std::vector<aco_ptr<Instruction>>& new_instructions)
|
||||
{
|
||||
// TODO: s_dcache_inv needs to be in it's own group on GFX10
|
||||
|
@ -630,7 +633,7 @@ handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx,
|
|||
/* Remember all SGPRs that are read by the VMEM instruction */
|
||||
mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
|
||||
ctx.sgprs_read_by_VMEM.set(exec);
|
||||
if (program->wave_size == 64)
|
||||
if (state.program->wave_size == 64)
|
||||
ctx.sgprs_read_by_VMEM.set(exec_hi);
|
||||
} else if (instr->isSALU() || instr->isSMEM()) {
|
||||
if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
|
@ -786,7 +789,7 @@ handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx,
|
|||
if (instr->isMUBUF() || instr->isMTBUF()) {
|
||||
uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
|
||||
if (offset & 6)
|
||||
Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
|
||||
Builder(state.program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -798,12 +801,12 @@ handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx,
|
|||
} else if (ctx.has_writelane) {
|
||||
ctx.has_writelane = false;
|
||||
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
|
||||
Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
|
||||
Builder(state.program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
using HandleInstr = void (*)(Program*, Block* block, Ctx&, aco_ptr<Instruction>&,
|
||||
using HandleInstr = void (*)(State& state, Ctx&, aco_ptr<Instruction>&,
|
||||
std::vector<aco_ptr<Instruction>>&);
|
||||
|
||||
template <typename Ctx, HandleInstr<Ctx> Handle>
|
||||
|
@ -813,13 +816,16 @@ handle_block(Program* program, Ctx& ctx, Block& block)
|
|||
if (block.instructions.empty())
|
||||
return;
|
||||
|
||||
std::vector<aco_ptr<Instruction>> old_instructions = std::move(block.instructions);
|
||||
State state;
|
||||
state.program = program;
|
||||
state.block = █
|
||||
state.old_instructions = std::move(block.instructions);
|
||||
|
||||
block.instructions.clear(); // Silence clang-analyzer-cplusplus.Move warning
|
||||
block.instructions.reserve(old_instructions.size());
|
||||
block.instructions.reserve(state.old_instructions.size());
|
||||
|
||||
for (aco_ptr<Instruction>& instr : old_instructions) {
|
||||
Handle(program, &block, ctx, instr, block.instructions);
|
||||
for (aco_ptr<Instruction>& instr : state.old_instructions) {
|
||||
Handle(state, ctx, instr, block.instructions);
|
||||
block.instructions.emplace_back(std::move(instr));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue