From 9a6b11a7330b08f57876bd8b16c3b360e4818e86 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sat, 26 Sep 2020 19:25:41 +0200 Subject: [PATCH] r600/sfn: Fix indirect const buffer access Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/r600_asm.c | 4 +- .../r600/sfn/sfn_emitaluinstruction.cpp | 3 +- .../drivers/r600/sfn/sfn_emitinstruction.cpp | 6 +- .../drivers/r600/sfn/sfn_emitinstruction.h | 2 +- .../drivers/r600/sfn/sfn_instruction_base.cpp | 6 + .../r600/sfn/sfn_instruction_block.cpp | 5 + .../drivers/r600/sfn/sfn_instruction_block.h | 2 + .../drivers/r600/sfn/sfn_ir_to_assembly.cpp | 159 +++++++----------- .../drivers/r600/sfn/sfn_liverange.cpp | 8 + .../drivers/r600/sfn/sfn_shader_base.cpp | 49 +++++- .../drivers/r600/sfn/sfn_shader_base.h | 7 + src/gallium/drivers/r600/sfn/sfn_value.cpp | 4 +- src/gallium/drivers/r600/sfn/sfn_value.h | 2 + 13 files changed, 151 insertions(+), 106 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0ede1c4d30d..fbb173b0680 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -362,7 +362,7 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu * } assignment[4] = alu; } else { - if (assignment[chan]) { + if (assignment[chan]) { assert(0); /* ALU.chan has already been allocated. */ return -1; } @@ -1232,7 +1232,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, /* Load index register if required */ if (bc->chip_class >= EVERGREEN) { for (i = 0; i < 3; i++) - if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) + if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) egcm_load_index_reg(bc, 0, true); } diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp index 12045c2a724..4641c177acd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp @@ -196,6 +196,7 @@ void EmitAluInstruction::preload_src(const nir_alu_instr& instr) for (unsigned c = 0; c < nsrc_comp; ++c) { m_src[i][c] = from_nir(instr.src[i], c); sfn_log << SfnLog::reg << " " << *m_src[i][c]; + } sfn_log << SfnLog::reg << "\n"; } @@ -262,7 +263,7 @@ void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned ns if (src->type() == Value::kconst) { c[nconst] = static_cast(src.get()); idx[nconst++] = i; - sfn_log << SfnLog::reg << "is constant " << i; + sfn_log << SfnLog::reg << " is constant " << i; } sfn_log << SfnLog::reg << "\n"; } diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp index 20e573e1f9c..f1ddd7aa2e9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp @@ -80,6 +80,11 @@ void EmitInstruction::emit_instruction(Instruction *ir) return m_proc.emit_instruction(ir); } +void EmitInstruction::emit_instruction(AluInstruction *ir) +{ + return m_proc.emit_instruction(ir); +} + bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest, std::vector src0, const std::set& m_flags) @@ -179,7 +184,6 @@ int EmitInstruction::remap_atomic_base(int base) return m_proc.remap_atomic_base(base); } - const std::set EmitInstruction::empty = {}; const std::set EmitInstruction::write = {alu_write}; const std::set EmitInstruction::last_write = {alu_write, alu_last_instr}; diff --git a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h index a905c3c88e7..9c7614f87e8 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h +++ b/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h @@ -72,6 +72,7 @@ protected: // forwards from ShaderFromNirProcessor void emit_instruction(Instruction *ir); + void emit_instruction(AluInstruction *ir); bool emit_instruction(EAluOp opcode, PValue dest, std::vector src0, const std::set& m_flags); @@ -94,7 +95,6 @@ protected: const PValue& reg, bool map); int remap_atomic_base(int base); - private: ShaderFromNirProcessor& m_proc; diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp index acbc5650b1f..335c6d11b05 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp @@ -60,7 +60,13 @@ void ValueRemapper::remap(PValue& v) size_t range_end = range_start + val.array_size(); while (range_start < range_end) m_map[range_start++].used = true; + } else if (v->type() == Value::kconst) { + auto& val = static_cast(*v); + auto addr = val.addr(); + if (addr && addr->type() == Value::gpr) + val.reset_addr(remap_one_registers(addr)); } + } void ValueRemapper::remap(GPRVector& v) diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp index df01fbb3e8f..212499faf8f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp @@ -42,6 +42,11 @@ bool InstructionBlock::is_equal_to(const Instruction& lhs) const [](PInstruction ri, PInstruction li) {return *ri == *li;}); } +PInstruction InstructionBlock::last_instruction() +{ + return m_block.size() ? *m_block.rbegin() : nullptr; +} + void InstructionBlock::do_print(std::ostream& os) const { std::string space(" ", 2 * m_nesting_depth); diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/src/gallium/drivers/r600/sfn/sfn_instruction_block.h index f47a9e2ca92..f90579cfe7b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_block.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_block.h @@ -61,6 +61,8 @@ public: return m_block_number; } + PInstruction last_instruction(); + private: void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp index c8c9abaef55..5fb3c8c447b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp @@ -77,7 +77,7 @@ private: bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src); bool copy_src(r600_bytecode_alu_src& src, const Value& s); - + EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx); ConditionalJumpTracker m_jump_tracker; CallStack m_callstack; @@ -510,7 +510,7 @@ bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr) if (needs_workaround) { r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); - m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; op = cf_alu; } emit_alu(pred, op); @@ -726,37 +726,7 @@ bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr) const auto& boffs = static_cast(*addr); buffer_offset = boffs.value(); } else { - index_mode = bim_zero; - if ((!m_bc->index_loaded[0] || m_loop_nesting || - m_bc->index_reg[0] != addr->sel() || - m_bc->index_reg_chan[0] != addr->chan())) { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(alu)); - alu.op = opcode_map.at(op1_mova_int); - alu.dst.chan = 0; - alu.src[0].sel = addr->sel(); - alu.src[0].chan = addr->chan(); - alu.last = 1; - int r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->ar_loaded = 0; - - alu.op = opcode_map.at(op1_set_cf_idx0); - alu.dst.chan = 0; - alu.src[0].sel = 0; - alu.src[0].chan = 0; - alu.last = 1; - - r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->index_reg[0] = addr->sel(); - m_bc->index_reg_chan[0] = addr->chan(); - m_bc->index_loaded[0] = true; - } + index_mode = emit_index_reg(*addr, 0); } } @@ -887,37 +857,7 @@ bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr) int uav_idx = -1; auto addr = instr.uav_id(); if (addr->type() != Value::literal) { - if (!m_bc->index_loaded[1] || m_loop_nesting || - m_bc->index_reg[1] != addr->sel() - || m_bc->index_reg_chan[1] != addr->chan()) { - struct r600_bytecode_alu alu; - - memset(&alu, 0, sizeof(alu)); - alu.op = opcode_map.at(op1_mova_int); - alu.dst.chan = 0; - alu.src[0].sel = addr->sel(); - alu.src[0].chan = addr->chan(); - alu.last = 1; - int r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->ar_loaded = 0; - - alu.op = opcode_map.at(op1_set_cf_idx1); - alu.dst.chan = 0; - alu.src[0].sel = 0; - alu.src[0].chan = 0; - alu.last = 1; - - r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->index_reg[1] = addr->sel(); - m_bc->index_reg_chan[1] = addr->chan(); - m_bc->index_loaded[1] = true; - } + emit_index_reg(*addr, 1); } else { const LiteralValue& addr_reg = static_cast(*addr); uav_idx = addr_reg.value(); @@ -1102,39 +1042,7 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr) if (addr) { if (addr->type() != Value::literal) { - rat_index_mode = bim_one; - if (!m_bc->index_loaded[1] || m_loop_nesting || - m_bc->index_reg[1] != addr->sel() - || m_bc->index_reg_chan[1] != addr->chan()) { - struct r600_bytecode_alu alu; - - memset(&alu, 0, sizeof(alu)); - alu.op = opcode_map.at(op1_mova_int); - alu.dst.chan = 0; - alu.src[0].sel = addr->sel(); - alu.src[0].chan = addr->chan(); - alu.last = 1; - int r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->ar_loaded = 0; - - alu.op = opcode_map.at(op1_set_cf_idx1); - alu.dst.chan = 0; - alu.src[0].sel = 0; - alu.src[0].chan = 0; - alu.last = 1; - - r = r600_bytecode_add_alu(m_bc, &alu); - if (r) - return false; - - m_bc->index_reg[1] = addr->sel(); - m_bc->index_reg_chan[1] = addr->chan(); - m_bc->index_loaded[1] = true; - - } + rat_index_mode = emit_index_reg(*addr, 1); } else { const LiteralValue& addr_reg = static_cast(*addr); rat_idx += addr_reg.value(); @@ -1167,6 +1075,53 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr) return true; } +EBufferIndexMode +AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx) +{ + assert(idx < 2); + + EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0; + + if (!m_bc->index_loaded[idx] || m_loop_nesting || + m_bc->index_reg[idx] != addr.sel() + || m_bc->index_reg_chan[idx] != addr.chan()) { + struct r600_bytecode_alu alu; + + // Make sure MOVA is not last instr in clause + if ((m_bc->cf_last->ndw>>1) >= 110) + m_bc->force_add_cf = 1; + + memset(&alu, 0, sizeof(alu)); + alu.op = opcode_map.at(op1_mova_int); + alu.dst.chan = 0; + alu.src[0].sel = addr.sel(); + alu.src[0].chan = addr.chan(); + alu.last = 1; + sfn_log << SfnLog::assembly << " mova_int, "; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return bim_invalid; + + m_bc->ar_loaded = 0; + + alu.op = opcode_map.at(idxop); + alu.dst.chan = 0; + alu.src[0].sel = 0; + alu.src[0].chan = 0; + alu.last = 1; + sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx; + r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return bim_invalid; + + m_bc->index_reg[idx] = addr.sel(); + m_bc->index_reg_chan[idx] = addr.chan(); + m_bc->index_loaded[idx] = true; + sfn_log << SfnLog::assembly << "\n"; + } + return idx == 1 ? bim_zero : bim_one; +} + bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, const Value& d) { @@ -1249,6 +1204,16 @@ bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Va if (s.type() == Value::kconst) { const UniformValue& cv = static_cast(s); src.kc_bank = cv.kcache_bank(); + auto addr = cv.addr(); + if (addr) { + src.kc_rel = 1; + emit_index_reg(*addr, 0); + auto type = m_bc->cf_last->op; + if (r600_bytecode_add_cf(m_bc)) { + return false; + } + m_bc->cf_last->op = type; + } } return true; diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp index 55159233a14..2a4d0d2a971 100644 --- a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp @@ -812,6 +812,10 @@ void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm) } else if (src.type() == Value::gpr_array_value) { const GPRArrayValue& v = static_cast(src); v.record_read(*this); + } else if (src.type() == Value::kconst) { + const UniformValue& v = static_cast(src); + if (v.addr()) + record_read(*v.addr(),is_array_elm); } } @@ -829,6 +833,10 @@ void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm) } else if (src.type() == Value::gpr_array_value) { const GPRArrayValue& v = static_cast(src); v.record_write(*this); + } else if (src.type() == Value::kconst) { + const UniformValue& v = static_cast(src); + if (v.addr()) + record_write(*v.addr(),is_array_elm); } } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index 84c26ce6a54..d6ade30d4de 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -75,8 +75,8 @@ ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, m_next_hwatomic_loc(0), m_sel(sel), m_atomic_base(atomic_base), - m_image_count(0) - + m_image_count(0), + last_emitted_alu(nullptr) { m_sh_info.processor_type = ptype; @@ -363,7 +363,33 @@ bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr) return m_tex_instr.emit(instr); } +void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir) +{ + if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) { + for (unsigned i = 0; i < ir->n_sources(); ++i) { + auto& s = ir->src(i); + if (s.type() == Value::kconst) { + auto& c = static_cast(s); + if (c.addr()) { + last_emitted_alu->set_flag(alu_last_instr); + break; + } + } + } + } + last_emitted_alu = ir; + emit_instruction_internal(ir); +} + + void ShaderFromNirProcessor::emit_instruction(Instruction *ir) +{ + + emit_instruction_internal(ir); + last_emitted_alu = nullptr; +} + +void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir) { if (m_pending_else) { append_block(-1); @@ -858,6 +884,24 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32); } } else { + if (buf_offset) { + int buf_cmp = nir_intrinsic_component(instr); + AluInstruction *ir = nullptr; + auto kc_id = from_nir(instr->src[0], 0); + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + int cmp = buf_cmp + i; + auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id)); + if (instr->dest.is_ssa) + load_preloaded_value(instr->dest, i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; + } /* TODO: if buf_offset is constant then this can also be solved by using the CF indes * on the ALU block, and this would probably make sense when there are more then one * loads with the same buffer ID. */ @@ -884,7 +928,6 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) } - bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr) { r600::sfn_log << SfnLog::instr << "emit '" diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h index 2bf094aa5b3..309493f48e6 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -71,6 +71,7 @@ public: std::vector src0, const std::set& m_flags); void emit_export_instruction(WriteoutInstruction *ir); + void emit_instruction(AluInstruction *ir); void split_constants(nir_alu_instr* instr); void load_uniform(const nir_alu_src& src); @@ -141,9 +142,13 @@ protected: bool allocate_reserved_registers(); + private: virtual bool do_allocate_reserved_registers() = 0; + + void emit_instruction_internal(Instruction *ir); + bool emit_alu_instruction(nir_instr *instr); bool emit_deref_instruction(nir_deref_instr* instr); bool emit_intrinsic_instruction(nir_intrinsic_instr* instr); @@ -176,6 +181,7 @@ private: virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0; virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0; + bool emit_store_scratch(nir_intrinsic_instr* instr); bool emit_load_scratch(nir_intrinsic_instr* instr); virtual void do_finalize() = 0; @@ -217,6 +223,7 @@ private: int m_image_count; std::unordered_map m_atomic_base_map; + AluInstruction *last_emitted_alu; }; } diff --git a/src/gallium/drivers/r600/sfn/sfn_value.cpp b/src/gallium/drivers/r600/sfn/sfn_value.cpp index 370b7adeae8..3a5a3ce7129 100644 --- a/src/gallium/drivers/r600/sfn/sfn_value.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_value.cpp @@ -238,8 +238,10 @@ void UniformValue::do_print(std::ostream& os) const { if (m_index < 512) os << "KC" << m_kcache_bank << "[" << m_index; + else if (m_addr) + os << "KC[" << *m_addr << "][" << m_index; else - os << "KCX[" << m_index; + os << "KCx[" << m_index; os << "]." << component_names[chan()]; } diff --git a/src/gallium/drivers/r600/sfn/sfn_value.h b/src/gallium/drivers/r600/sfn/sfn_value.h index 222b0462b9a..33d3d745b37 100644 --- a/src/gallium/drivers/r600/sfn/sfn_value.h +++ b/src/gallium/drivers/r600/sfn/sfn_value.h @@ -186,6 +186,8 @@ public: UniformValue(uint32_t sel, uint32_t chan, PValue addr); uint32_t sel() const override; uint32_t kcache_bank() const; + PValue addr() const {return m_addr;} + void reset_addr(PValue v) {m_addr = v;} private: void do_print(std::ostream& os) const override; bool is_equal_to(const Value& other) const override;