r600/sfn: Fix indirect const buffer access

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6879>
This commit is contained in:
Gert Wollny 2020-09-26 19:25:41 +02:00 committed by Marge Bot
parent 73c5f45191
commit 9a6b11a733
13 changed files with 151 additions and 106 deletions

View File

@ -362,7 +362,7 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
}
assignment[4] = alu;
} else {
if (assignment[chan]) {
if (assignment[chan]) {
assert(0); /* ALU.chan has already been allocated. */
return -1;
}
@ -1232,7 +1232,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
/* Load index register if required */
if (bc->chip_class >= EVERGREEN) {
for (i = 0; i < 3; i++)
if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
egcm_load_index_reg(bc, 0, true);
}

View File

@ -196,6 +196,7 @@ void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
for (unsigned c = 0; c < nsrc_comp; ++c) {
m_src[i][c] = from_nir(instr.src[i], c);
sfn_log << SfnLog::reg << " " << *m_src[i][c];
}
sfn_log << SfnLog::reg << "\n";
}
@ -262,7 +263,7 @@ void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned ns
if (src->type() == Value::kconst) {
c[nconst] = static_cast<const UniformValue *>(src.get());
idx[nconst++] = i;
sfn_log << SfnLog::reg << "is constant " << i;
sfn_log << SfnLog::reg << " is constant " << i;
}
sfn_log << SfnLog::reg << "\n";
}

View File

@ -80,6 +80,11 @@ void EmitInstruction::emit_instruction(Instruction *ir)
return m_proc.emit_instruction(ir);
}
void EmitInstruction::emit_instruction(AluInstruction *ir)
{
return m_proc.emit_instruction(ir);
}
bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags)
@ -179,7 +184,6 @@ int EmitInstruction::remap_atomic_base(int base)
return m_proc.remap_atomic_base(base);
}
const std::set<AluModifiers> EmitInstruction::empty = {};
const std::set<AluModifiers> EmitInstruction::write = {alu_write};
const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};

View File

@ -72,6 +72,7 @@ protected:
// forwards from ShaderFromNirProcessor
void emit_instruction(Instruction *ir);
void emit_instruction(AluInstruction *ir);
bool emit_instruction(EAluOp opcode, PValue dest,
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags);
@ -94,7 +95,6 @@ protected:
const PValue& reg, bool map);
int remap_atomic_base(int base);
private:
ShaderFromNirProcessor& m_proc;

View File

@ -60,7 +60,13 @@ void ValueRemapper::remap(PValue& v)
size_t range_end = range_start + val.array_size();
while (range_start < range_end)
m_map[range_start++].used = true;
} else if (v->type() == Value::kconst) {
auto& val = static_cast<UniformValue&>(*v);
auto addr = val.addr();
if (addr && addr->type() == Value::gpr)
val.reset_addr(remap_one_registers(addr));
}
}
void ValueRemapper::remap(GPRVector& v)

View File

@ -42,6 +42,11 @@ bool InstructionBlock::is_equal_to(const Instruction& lhs) const
[](PInstruction ri, PInstruction li) {return *ri == *li;});
}
PInstruction InstructionBlock::last_instruction()
{
return m_block.size() ? *m_block.rbegin() : nullptr;
}
void InstructionBlock::do_print(std::ostream& os) const
{
std::string space(" ", 2 * m_nesting_depth);

View File

@ -61,6 +61,8 @@ public:
return m_block_number;
}
PInstruction last_instruction();
private:
void do_evalue_liveness(LiverangeEvaluator& eval) const override;
bool is_equal_to(const Instruction& lhs) const override;

View File

@ -77,7 +77,7 @@ private:
bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
bool copy_src(r600_bytecode_alu_src& src, const Value& s);
EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
ConditionalJumpTracker m_jump_tracker;
CallStack m_callstack;
@ -510,7 +510,7 @@ bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
if (needs_workaround) {
r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
op = cf_alu;
}
emit_alu(pred, op);
@ -726,37 +726,7 @@ bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
const auto& boffs = static_cast<const LiteralValue&>(*addr);
buffer_offset = boffs.value();
} else {
index_mode = bim_zero;
if ((!m_bc->index_loaded[0] || m_loop_nesting ||
m_bc->index_reg[0] != addr->sel() ||
m_bc->index_reg_chan[0] != addr->chan())) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
alu.dst.chan = 0;
alu.src[0].sel = addr->sel();
alu.src[0].chan = addr->chan();
alu.last = 1;
int r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->ar_loaded = 0;
alu.op = opcode_map.at(op1_set_cf_idx0);
alu.dst.chan = 0;
alu.src[0].sel = 0;
alu.src[0].chan = 0;
alu.last = 1;
r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->index_reg[0] = addr->sel();
m_bc->index_reg_chan[0] = addr->chan();
m_bc->index_loaded[0] = true;
}
index_mode = emit_index_reg(*addr, 0);
}
}
@ -887,37 +857,7 @@ bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
int uav_idx = -1;
auto addr = instr.uav_id();
if (addr->type() != Value::literal) {
if (!m_bc->index_loaded[1] || m_loop_nesting ||
m_bc->index_reg[1] != addr->sel()
|| m_bc->index_reg_chan[1] != addr->chan()) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
alu.dst.chan = 0;
alu.src[0].sel = addr->sel();
alu.src[0].chan = addr->chan();
alu.last = 1;
int r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->ar_loaded = 0;
alu.op = opcode_map.at(op1_set_cf_idx1);
alu.dst.chan = 0;
alu.src[0].sel = 0;
alu.src[0].chan = 0;
alu.last = 1;
r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->index_reg[1] = addr->sel();
m_bc->index_reg_chan[1] = addr->chan();
m_bc->index_loaded[1] = true;
}
emit_index_reg(*addr, 1);
} else {
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
uav_idx = addr_reg.value();
@ -1102,39 +1042,7 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
if (addr) {
if (addr->type() != Value::literal) {
rat_index_mode = bim_one;
if (!m_bc->index_loaded[1] || m_loop_nesting ||
m_bc->index_reg[1] != addr->sel()
|| m_bc->index_reg_chan[1] != addr->chan()) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
alu.dst.chan = 0;
alu.src[0].sel = addr->sel();
alu.src[0].chan = addr->chan();
alu.last = 1;
int r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->ar_loaded = 0;
alu.op = opcode_map.at(op1_set_cf_idx1);
alu.dst.chan = 0;
alu.src[0].sel = 0;
alu.src[0].chan = 0;
alu.last = 1;
r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return false;
m_bc->index_reg[1] = addr->sel();
m_bc->index_reg_chan[1] = addr->chan();
m_bc->index_loaded[1] = true;
}
rat_index_mode = emit_index_reg(*addr, 1);
} else {
const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
rat_idx += addr_reg.value();
@ -1167,6 +1075,53 @@ bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
return true;
}
EBufferIndexMode
AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
{
assert(idx < 2);
EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
if (!m_bc->index_loaded[idx] || m_loop_nesting ||
m_bc->index_reg[idx] != addr.sel()
|| m_bc->index_reg_chan[idx] != addr.chan()) {
struct r600_bytecode_alu alu;
// Make sure MOVA is not last instr in clause
if ((m_bc->cf_last->ndw>>1) >= 110)
m_bc->force_add_cf = 1;
memset(&alu, 0, sizeof(alu));
alu.op = opcode_map.at(op1_mova_int);
alu.dst.chan = 0;
alu.src[0].sel = addr.sel();
alu.src[0].chan = addr.chan();
alu.last = 1;
sfn_log << SfnLog::assembly << " mova_int, ";
int r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return bim_invalid;
m_bc->ar_loaded = 0;
alu.op = opcode_map.at(idxop);
alu.dst.chan = 0;
alu.src[0].sel = 0;
alu.src[0].chan = 0;
alu.last = 1;
sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
r = r600_bytecode_add_alu(m_bc, &alu);
if (r)
return bim_invalid;
m_bc->index_reg[idx] = addr.sel();
m_bc->index_reg_chan[idx] = addr.chan();
m_bc->index_loaded[idx] = true;
sfn_log << SfnLog::assembly << "\n";
}
return idx == 1 ? bim_zero : bim_one;
}
bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
const Value& d)
{
@ -1249,6 +1204,16 @@ bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Va
if (s.type() == Value::kconst) {
const UniformValue& cv = static_cast<const UniformValue&>(s);
src.kc_bank = cv.kcache_bank();
auto addr = cv.addr();
if (addr) {
src.kc_rel = 1;
emit_index_reg(*addr, 0);
auto type = m_bc->cf_last->op;
if (r600_bytecode_add_cf(m_bc)) {
return false;
}
m_bc->cf_last->op = type;
}
}
return true;

View File

@ -812,6 +812,10 @@ void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm)
} else if (src.type() == Value::gpr_array_value) {
const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
v.record_read(*this);
} else if (src.type() == Value::kconst) {
const UniformValue& v = static_cast<const UniformValue&>(src);
if (v.addr())
record_read(*v.addr(),is_array_elm);
}
}
@ -829,6 +833,10 @@ void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm)
} else if (src.type() == Value::gpr_array_value) {
const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
v.record_write(*this);
} else if (src.type() == Value::kconst) {
const UniformValue& v = static_cast<const UniformValue&>(src);
if (v.addr())
record_write(*v.addr(),is_array_elm);
}
}

View File

@ -75,8 +75,8 @@ ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
m_next_hwatomic_loc(0),
m_sel(sel),
m_atomic_base(atomic_base),
m_image_count(0)
m_image_count(0),
last_emitted_alu(nullptr)
{
m_sh_info.processor_type = ptype;
@ -363,7 +363,33 @@ bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
return m_tex_instr.emit(instr);
}
void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
{
if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
for (unsigned i = 0; i < ir->n_sources(); ++i) {
auto& s = ir->src(i);
if (s.type() == Value::kconst) {
auto& c = static_cast<UniformValue&>(s);
if (c.addr()) {
last_emitted_alu->set_flag(alu_last_instr);
break;
}
}
}
}
last_emitted_alu = ir;
emit_instruction_internal(ir);
}
void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
{
emit_instruction_internal(ir);
last_emitted_alu = nullptr;
}
void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
{
if (m_pending_else) {
append_block(-1);
@ -858,6 +884,24 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32);
}
} else {
if (buf_offset) {
int buf_cmp = nir_intrinsic_component(instr);
AluInstruction *ir = nullptr;
auto kc_id = from_nir(instr->src[0], 0);
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
int cmp = buf_cmp + i;
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
if (instr->dest.is_ssa)
load_preloaded_value(instr->dest, i, u);
else {
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
emit_instruction(ir);
}
}
if (ir)
ir->set_flag(alu_last_instr);
return true;
}
/* TODO: if buf_offset is constant then this can also be solved by using the CF indes
* on the ALU block, and this would probably make sense when there are more then one
* loads with the same buffer ID. */
@ -884,7 +928,6 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
}
bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
{
r600::sfn_log << SfnLog::instr << "emit '"

View File

@ -71,6 +71,7 @@ public:
std::vector<PValue> src0,
const std::set<AluModifiers>& m_flags);
void emit_export_instruction(WriteoutInstruction *ir);
void emit_instruction(AluInstruction *ir);
void split_constants(nir_alu_instr* instr);
void load_uniform(const nir_alu_src& src);
@ -141,9 +142,13 @@ protected:
bool allocate_reserved_registers();
private:
virtual bool do_allocate_reserved_registers() = 0;
void emit_instruction_internal(Instruction *ir);
bool emit_alu_instruction(nir_instr *instr);
bool emit_deref_instruction(nir_deref_instr* instr);
bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
@ -176,6 +181,7 @@ private:
virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0;
virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
bool emit_store_scratch(nir_intrinsic_instr* instr);
bool emit_load_scratch(nir_intrinsic_instr* instr);
virtual void do_finalize() = 0;
@ -217,6 +223,7 @@ private:
int m_image_count;
std::unordered_map<int, int> m_atomic_base_map;
AluInstruction *last_emitted_alu;
};
}

View File

@ -238,8 +238,10 @@ void UniformValue::do_print(std::ostream& os) const
{
if (m_index < 512)
os << "KC" << m_kcache_bank << "[" << m_index;
else if (m_addr)
os << "KC[" << *m_addr << "][" << m_index;
else
os << "KCX[" << m_index;
os << "KCx[" << m_index;
os << "]." << component_names[chan()];
}

View File

@ -186,6 +186,8 @@ public:
UniformValue(uint32_t sel, uint32_t chan, PValue addr);
uint32_t sel() const override;
uint32_t kcache_bank() const;
PValue addr() const {return m_addr;}
void reset_addr(PValue v) {m_addr = v;}
private:
void do_print(std::ostream& os) const override;
bool is_equal_to(const Value& other) const override;