r600/sfn: Fix indirect const buffer access
Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6879>
This commit is contained in:
parent
73c5f45191
commit
9a6b11a733
|
@ -1232,7 +1232,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
|
|||
/* Load index register if required */
|
||||
if (bc->chip_class >= EVERGREEN) {
|
||||
for (i = 0; i < 3; i++)
|
||||
if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
|
||||
if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
|
||||
egcm_load_index_reg(bc, 0, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -196,6 +196,7 @@ void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
|
|||
for (unsigned c = 0; c < nsrc_comp; ++c) {
|
||||
m_src[i][c] = from_nir(instr.src[i], c);
|
||||
sfn_log << SfnLog::reg << " " << *m_src[i][c];
|
||||
|
||||
}
|
||||
sfn_log << SfnLog::reg << "\n";
|
||||
}
|
||||
|
@ -262,7 +263,7 @@ void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned ns
|
|||
if (src->type() == Value::kconst) {
|
||||
c[nconst] = static_cast<const UniformValue *>(src.get());
|
||||
idx[nconst++] = i;
|
||||
sfn_log << SfnLog::reg << "is constant " << i;
|
||||
sfn_log << SfnLog::reg << " is constant " << i;
|
||||
}
|
||||
sfn_log << SfnLog::reg << "\n";
|
||||
}
|
||||
|
|
|
@ -80,6 +80,11 @@ void EmitInstruction::emit_instruction(Instruction *ir)
|
|||
return m_proc.emit_instruction(ir);
|
||||
}
|
||||
|
||||
void EmitInstruction::emit_instruction(AluInstruction *ir)
|
||||
{
|
||||
return m_proc.emit_instruction(ir);
|
||||
}
|
||||
|
||||
bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags)
|
||||
|
@ -179,7 +184,6 @@ int EmitInstruction::remap_atomic_base(int base)
|
|||
return m_proc.remap_atomic_base(base);
|
||||
}
|
||||
|
||||
|
||||
const std::set<AluModifiers> EmitInstruction::empty = {};
|
||||
const std::set<AluModifiers> EmitInstruction::write = {alu_write};
|
||||
const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
|
||||
|
|
|
@ -72,6 +72,7 @@ protected:
|
|||
|
||||
// forwards from ShaderFromNirProcessor
|
||||
void emit_instruction(Instruction *ir);
|
||||
void emit_instruction(AluInstruction *ir);
|
||||
bool emit_instruction(EAluOp opcode, PValue dest,
|
||||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
|
@ -94,7 +95,6 @@ protected:
|
|||
const PValue& reg, bool map);
|
||||
|
||||
int remap_atomic_base(int base);
|
||||
|
||||
private:
|
||||
|
||||
ShaderFromNirProcessor& m_proc;
|
||||
|
|
|
@ -60,7 +60,13 @@ void ValueRemapper::remap(PValue& v)
|
|||
size_t range_end = range_start + val.array_size();
|
||||
while (range_start < range_end)
|
||||
m_map[range_start++].used = true;
|
||||
} else if (v->type() == Value::kconst) {
|
||||
auto& val = static_cast<UniformValue&>(*v);
|
||||
auto addr = val.addr();
|
||||
if (addr && addr->type() == Value::gpr)
|
||||
val.reset_addr(remap_one_registers(addr));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ValueRemapper::remap(GPRVector& v)
|
||||
|
|
|
@ -42,6 +42,11 @@ bool InstructionBlock::is_equal_to(const Instruction& lhs) const
|
|||
[](PInstruction ri, PInstruction li) {return *ri == *li;});
|
||||
}
|
||||
|
||||
PInstruction InstructionBlock::last_instruction()
|
||||
{
|
||||
return m_block.size() ? *m_block.rbegin() : nullptr;
|
||||
}
|
||||
|
||||
void InstructionBlock::do_print(std::ostream& os) const
|
||||
{
|
||||
std::string space(" ", 2 * m_nesting_depth);
|
||||
|
|
|
@ -61,6 +61,8 @@ public:
|
|||
return m_block_number;
|
||||
}
|
||||
|
||||
PInstruction last_instruction();
|
||||
|
||||
private:
|
||||
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
|
||||
bool is_equal_to(const Instruction& lhs) const override;
|
||||
|
|
|
@ -77,7 +77,7 @@ private:
|
|||
bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
|
||||
bool copy_src(r600_bytecode_alu_src& src, const Value& s);
|
||||
|
||||
|
||||
EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
|
||||
|
||||
ConditionalJumpTracker m_jump_tracker;
|
||||
CallStack m_callstack;
|
||||
|
@ -510,7 +510,7 @@ bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
|
|||
|
||||
if (needs_workaround) {
|
||||
r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
|
||||
m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
|
||||
m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
|
||||
op = cf_alu;
|
||||
}
|
||||
emit_alu(pred, op);
|
||||
|
@ -726,37 +726,7 @@ bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
|
|||
const auto& boffs = static_cast<const LiteralValue&>(*addr);
|
||||
buffer_offset = boffs.value();
|
||||
} else {
|
||||
index_mode = bim_zero;
|
||||
if ((!m_bc->index_loaded[0] || m_loop_nesting ||
|
||||
m_bc->index_reg[0] != addr->sel() ||
|
||||
m_bc->index_reg_chan[0] != addr->chan())) {
|
||||
struct r600_bytecode_alu alu;
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = opcode_map.at(op1_mova_int);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = addr->sel();
|
||||
alu.src[0].chan = addr->chan();
|
||||
alu.last = 1;
|
||||
int r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
alu.op = opcode_map.at(op1_set_cf_idx0);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = 0;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
|
||||
r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->index_reg[0] = addr->sel();
|
||||
m_bc->index_reg_chan[0] = addr->chan();
|
||||
m_bc->index_loaded[0] = true;
|
||||
}
|
||||
index_mode = emit_index_reg(*addr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -887,37 +857,7 @@ bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
|
|||
int uav_idx = -1;
|
||||
auto addr = instr.uav_id();
|
||||
if (addr->type() != Value::literal) {
|
||||
if (!m_bc->index_loaded[1] || m_loop_nesting ||
|
||||
m_bc->index_reg[1] != addr->sel()
|
||||
|| m_bc->index_reg_chan[1] != addr->chan()) {
|
||||
struct r600_bytecode_alu alu;
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = opcode_map.at(op1_mova_int);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = addr->sel();
|
||||
alu.src[0].chan = addr->chan();
|
||||
alu.last = 1;
|
||||
int r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
alu.op = opcode_map.at(op1_set_cf_idx1);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = 0;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
|
||||
r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->index_reg[1] = addr->sel();
|
||||
m_bc->index_reg_chan[1] = addr->chan();
|
||||
m_bc->index_loaded[1] = true;
|
||||
}
|
||||
emit_index_reg(*addr, 1);
|
||||
} else {
|
||||
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
|
||||
uav_idx = addr_reg.value();
|
||||
|
@ -1102,39 +1042,7 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
|
|||
|
||||
if (addr) {
|
||||
if (addr->type() != Value::literal) {
|
||||
rat_index_mode = bim_one;
|
||||
if (!m_bc->index_loaded[1] || m_loop_nesting ||
|
||||
m_bc->index_reg[1] != addr->sel()
|
||||
|| m_bc->index_reg_chan[1] != addr->chan()) {
|
||||
struct r600_bytecode_alu alu;
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = opcode_map.at(op1_mova_int);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = addr->sel();
|
||||
alu.src[0].chan = addr->chan();
|
||||
alu.last = 1;
|
||||
int r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
alu.op = opcode_map.at(op1_set_cf_idx1);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = 0;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
|
||||
r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
m_bc->index_reg[1] = addr->sel();
|
||||
m_bc->index_reg_chan[1] = addr->chan();
|
||||
m_bc->index_loaded[1] = true;
|
||||
|
||||
}
|
||||
rat_index_mode = emit_index_reg(*addr, 1);
|
||||
} else {
|
||||
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
|
||||
rat_idx += addr_reg.value();
|
||||
|
@ -1167,6 +1075,53 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
EBufferIndexMode
|
||||
AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
|
||||
{
|
||||
assert(idx < 2);
|
||||
|
||||
EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
|
||||
|
||||
if (!m_bc->index_loaded[idx] || m_loop_nesting ||
|
||||
m_bc->index_reg[idx] != addr.sel()
|
||||
|| m_bc->index_reg_chan[idx] != addr.chan()) {
|
||||
struct r600_bytecode_alu alu;
|
||||
|
||||
// Make sure MOVA is not last instr in clause
|
||||
if ((m_bc->cf_last->ndw>>1) >= 110)
|
||||
m_bc->force_add_cf = 1;
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = opcode_map.at(op1_mova_int);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = addr.sel();
|
||||
alu.src[0].chan = addr.chan();
|
||||
alu.last = 1;
|
||||
sfn_log << SfnLog::assembly << " mova_int, ";
|
||||
int r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return bim_invalid;
|
||||
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
alu.op = opcode_map.at(idxop);
|
||||
alu.dst.chan = 0;
|
||||
alu.src[0].sel = 0;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
|
||||
r = r600_bytecode_add_alu(m_bc, &alu);
|
||||
if (r)
|
||||
return bim_invalid;
|
||||
|
||||
m_bc->index_reg[idx] = addr.sel();
|
||||
m_bc->index_reg_chan[idx] = addr.chan();
|
||||
m_bc->index_loaded[idx] = true;
|
||||
sfn_log << SfnLog::assembly << "\n";
|
||||
}
|
||||
return idx == 1 ? bim_zero : bim_one;
|
||||
}
|
||||
|
||||
bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
|
||||
const Value& d)
|
||||
{
|
||||
|
@ -1249,6 +1204,16 @@ bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Va
|
|||
if (s.type() == Value::kconst) {
|
||||
const UniformValue& cv = static_cast<const UniformValue&>(s);
|
||||
src.kc_bank = cv.kcache_bank();
|
||||
auto addr = cv.addr();
|
||||
if (addr) {
|
||||
src.kc_rel = 1;
|
||||
emit_index_reg(*addr, 0);
|
||||
auto type = m_bc->cf_last->op;
|
||||
if (r600_bytecode_add_cf(m_bc)) {
|
||||
return false;
|
||||
}
|
||||
m_bc->cf_last->op = type;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -812,6 +812,10 @@ void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm)
|
|||
} else if (src.type() == Value::gpr_array_value) {
|
||||
const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
|
||||
v.record_read(*this);
|
||||
} else if (src.type() == Value::kconst) {
|
||||
const UniformValue& v = static_cast<const UniformValue&>(src);
|
||||
if (v.addr())
|
||||
record_read(*v.addr(),is_array_elm);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -829,6 +833,10 @@ void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm)
|
|||
} else if (src.type() == Value::gpr_array_value) {
|
||||
const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
|
||||
v.record_write(*this);
|
||||
} else if (src.type() == Value::kconst) {
|
||||
const UniformValue& v = static_cast<const UniformValue&>(src);
|
||||
if (v.addr())
|
||||
record_write(*v.addr(),is_array_elm);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -75,8 +75,8 @@ ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
|
|||
m_next_hwatomic_loc(0),
|
||||
m_sel(sel),
|
||||
m_atomic_base(atomic_base),
|
||||
m_image_count(0)
|
||||
|
||||
m_image_count(0),
|
||||
last_emitted_alu(nullptr)
|
||||
{
|
||||
m_sh_info.processor_type = ptype;
|
||||
|
||||
|
@ -363,7 +363,33 @@ bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
|
|||
return m_tex_instr.emit(instr);
|
||||
}
|
||||
|
||||
void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
|
||||
{
|
||||
if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
|
||||
for (unsigned i = 0; i < ir->n_sources(); ++i) {
|
||||
auto& s = ir->src(i);
|
||||
if (s.type() == Value::kconst) {
|
||||
auto& c = static_cast<UniformValue&>(s);
|
||||
if (c.addr()) {
|
||||
last_emitted_alu->set_flag(alu_last_instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
last_emitted_alu = ir;
|
||||
emit_instruction_internal(ir);
|
||||
}
|
||||
|
||||
|
||||
void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
|
||||
{
|
||||
|
||||
emit_instruction_internal(ir);
|
||||
last_emitted_alu = nullptr;
|
||||
}
|
||||
|
||||
void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
|
||||
{
|
||||
if (m_pending_else) {
|
||||
append_block(-1);
|
||||
|
@ -858,6 +884,24 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32);
|
||||
}
|
||||
} else {
|
||||
if (buf_offset) {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
AluInstruction *ir = nullptr;
|
||||
auto kc_id = from_nir(instr->src[0], 0);
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
return true;
|
||||
}
|
||||
/* TODO: if buf_offset is constant then this can also be solved by using the CF indes
|
||||
* on the ALU block, and this would probably make sense when there are more then one
|
||||
* loads with the same buffer ID. */
|
||||
|
@ -884,7 +928,6 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
|||
|
||||
}
|
||||
|
||||
|
||||
bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << "emit '"
|
||||
|
|
|
@ -71,6 +71,7 @@ public:
|
|||
std::vector<PValue> src0,
|
||||
const std::set<AluModifiers>& m_flags);
|
||||
void emit_export_instruction(WriteoutInstruction *ir);
|
||||
void emit_instruction(AluInstruction *ir);
|
||||
|
||||
void split_constants(nir_alu_instr* instr);
|
||||
void load_uniform(const nir_alu_src& src);
|
||||
|
@ -141,9 +142,13 @@ protected:
|
|||
|
||||
bool allocate_reserved_registers();
|
||||
|
||||
|
||||
private:
|
||||
virtual bool do_allocate_reserved_registers() = 0;
|
||||
|
||||
|
||||
void emit_instruction_internal(Instruction *ir);
|
||||
|
||||
bool emit_alu_instruction(nir_instr *instr);
|
||||
bool emit_deref_instruction(nir_deref_instr* instr);
|
||||
bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
|
||||
|
@ -176,6 +181,7 @@ private:
|
|||
virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0;
|
||||
virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
|
||||
|
||||
|
||||
bool emit_store_scratch(nir_intrinsic_instr* instr);
|
||||
bool emit_load_scratch(nir_intrinsic_instr* instr);
|
||||
virtual void do_finalize() = 0;
|
||||
|
@ -217,6 +223,7 @@ private:
|
|||
int m_image_count;
|
||||
|
||||
std::unordered_map<int, int> m_atomic_base_map;
|
||||
AluInstruction *last_emitted_alu;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -238,8 +238,10 @@ void UniformValue::do_print(std::ostream& os) const
|
|||
{
|
||||
if (m_index < 512)
|
||||
os << "KC" << m_kcache_bank << "[" << m_index;
|
||||
else if (m_addr)
|
||||
os << "KC[" << *m_addr << "][" << m_index;
|
||||
else
|
||||
os << "KCX[" << m_index;
|
||||
os << "KCx[" << m_index;
|
||||
os << "]." << component_names[chan()];
|
||||
}
|
||||
|
||||
|
|
|
@ -186,6 +186,8 @@ public:
|
|||
UniformValue(uint32_t sel, uint32_t chan, PValue addr);
|
||||
uint32_t sel() const override;
|
||||
uint32_t kcache_bank() const;
|
||||
PValue addr() const {return m_addr;}
|
||||
void reset_addr(PValue v) {m_addr = v;}
|
||||
private:
|
||||
void do_print(std::ostream& os) const override;
|
||||
bool is_equal_to(const Value& other) const override;
|
||||
|
|
Loading…
Reference in New Issue