r600/sfn: Fix the kcache failure handling
Instead of starting a new block when the kcache handling failed, try to continue scheduling instructions until kcache allocation fails for all ready instruction. With that we avoid a CF split withing an LDS fetch/read group. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17678>
This commit is contained in:
parent
8db31e0fe6
commit
8a7d34e3bd
|
@ -302,17 +302,43 @@ void Block::push_back(PInst instr)
|
|||
|
||||
bool Block::try_reserve_kcache(const AluGroup& group)
|
||||
{
|
||||
auto kcache = m_kcache;
|
||||
|
||||
auto kcache_constants = group.get_kconsts();
|
||||
for (auto& kc : kcache_constants) {
|
||||
auto u = kc->as_uniform();
|
||||
assert(u);
|
||||
if (!try_reserve_kcache(*u))
|
||||
if (!try_reserve_kcache(*u, kcache)) {
|
||||
m_kcache_alloc_failed = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
m_kcache = kcache;
|
||||
m_kcache_alloc_failed = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Block::try_reserve_kcache(const UniformValue& u)
|
||||
bool Block::try_reserve_kcache(const AluInstr& instr)
|
||||
{
|
||||
auto kcache = m_kcache;
|
||||
|
||||
for (auto& src : instr.sources()) {
|
||||
auto u = src->as_uniform();
|
||||
if (u) {
|
||||
if (!try_reserve_kcache(*u, kcache)) {
|
||||
m_kcache_alloc_failed = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
m_kcache = kcache;
|
||||
m_kcache_alloc_failed = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Block::try_reserve_kcache(const UniformValue& u,
|
||||
std::array<KCacheLine, 4>& kcache) const
|
||||
{
|
||||
const int kcache_banks = 4; // TODO: handle pre-evergreen
|
||||
|
||||
|
@ -323,49 +349,50 @@ bool Block::try_reserve_kcache(const UniformValue& u)
|
|||
bool found = false;
|
||||
|
||||
for (int i = 0; i < kcache_banks && !found; ++i) {
|
||||
if (m_kcache[i].mode) {
|
||||
if (m_kcache[i].bank < bank)
|
||||
if (kcache[i].mode) {
|
||||
if (kcache[i].bank < bank)
|
||||
continue;
|
||||
|
||||
if ((m_kcache[i].bank == bank &&
|
||||
m_kcache[i].addr > line + 1) ||
|
||||
m_kcache[i].bank > bank) {
|
||||
if (m_kcache[kcache_banks - 1].mode)
|
||||
if ((kcache[i].bank == bank &&
|
||||
kcache[i].addr > line + 1) ||
|
||||
kcache[i].bank > bank) {
|
||||
if (kcache[kcache_banks - 1].mode)
|
||||
return false;
|
||||
|
||||
memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
|
||||
m_kcache[i].mode = KCacheLine::lock_1;
|
||||
m_kcache[i].bank = bank;
|
||||
m_kcache[i].addr = line;
|
||||
memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
|
||||
kcache[i].mode = KCacheLine::lock_1;
|
||||
kcache[i].bank = bank;
|
||||
kcache[i].addr = line;
|
||||
return true;
|
||||
}
|
||||
|
||||
int d = line - m_kcache[i].addr;
|
||||
int d = line - kcache[i].addr;
|
||||
|
||||
if (d == -1) {
|
||||
m_kcache[i].addr--;
|
||||
if (m_kcache[i].mode == KCacheLine::lock_2) {
|
||||
kcache[i].addr--;
|
||||
if (kcache[i].mode == KCacheLine::lock_2) {
|
||||
/* we are prepending the line to the current set,
|
||||
* discarding the existing second line,
|
||||
* so we'll have to insert line+2 after it */
|
||||
* discarding the existing second line,
|
||||
* so we'll have to insert line+2 after it */
|
||||
line += 2;
|
||||
continue;
|
||||
} else if (m_kcache[i].mode == KCacheLine::lock_1) {
|
||||
m_kcache[i].mode = KCacheLine::lock_2;
|
||||
} else if (kcache[i].mode == KCacheLine::lock_1) {
|
||||
kcache[i].mode = KCacheLine::lock_2;
|
||||
return true;
|
||||
} else {
|
||||
/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
|
||||
return false;
|
||||
}
|
||||
} else if (d == 1) {
|
||||
m_kcache[i].mode = KCacheLine::lock_2;
|
||||
kcache[i].mode = KCacheLine::lock_2;
|
||||
return true;
|
||||
} else if (d == 0)
|
||||
} else if (d == 0) {
|
||||
return true;
|
||||
}
|
||||
} else { /* free kcache set - use it */
|
||||
m_kcache[i].mode = KCacheLine::lock_1;
|
||||
m_kcache[i].bank = bank;
|
||||
m_kcache[i].addr = line;
|
||||
kcache[i].mode = KCacheLine::lock_1;
|
||||
kcache[i].bank = bank;
|
||||
kcache[i].addr = line;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -196,7 +196,8 @@ public:
|
|||
void set_type(Type t);
|
||||
uint32_t remaining_slots() const { return m_remaining_slots;}
|
||||
|
||||
bool try_reserve_kcache(const AluGroup& group);
|
||||
bool try_reserve_kcache(const AluGroup& instr);
|
||||
bool try_reserve_kcache(const AluInstr& group);
|
||||
|
||||
auto last_lds_instr() {return m_last_lds_instr;}
|
||||
void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
|
||||
|
@ -207,8 +208,11 @@ public:
|
|||
|
||||
size_t size() const { return m_instructions.size();}
|
||||
|
||||
bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
|
||||
|
||||
private:
|
||||
bool try_reserve_kcache(const UniformValue& u);
|
||||
bool try_reserve_kcache(const UniformValue& u,
|
||||
std::array<KCacheLine, 4>& kcache) const;
|
||||
|
||||
bool do_ready() const override {return true;};
|
||||
void do_print(std::ostream& os) const override;
|
||||
|
@ -221,11 +225,13 @@ private:
|
|||
uint32_t m_remaining_slots{0xffff};
|
||||
|
||||
std::array<KCacheLine, 4> m_kcache;
|
||||
bool m_kcache_alloc_failed{false};
|
||||
|
||||
Instr *m_last_lds_instr{nullptr};
|
||||
|
||||
int m_lds_group_requirement{0};
|
||||
AluInstr *m_lds_group_start{nullptr};
|
||||
|
||||
};
|
||||
|
||||
class InstrWithVectorResult : public Instr {
|
||||
|
|
|
@ -489,83 +489,84 @@ bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
|
|||
bool has_lds_ready = !alu_vec_ready.empty() &&
|
||||
(*alu_vec_ready.begin())->has_lds_access();
|
||||
|
||||
/* If we have ready ALU instructions we have to start a new ALU block */
|
||||
if (has_alu_ready || !alu_groups_ready.empty()) {
|
||||
if (m_current_block->type() != Block::alu) {
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_alu_groups_schduled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule groups first. unless we have a pending LDS instuction
|
||||
* We don't want the LDS instructions to be too far apart because the
|
||||
* fetch + read from queue has to be in the same ALU CF block */
|
||||
if (!alu_groups_ready.empty() && !has_lds_ready) {
|
||||
group = *alu_groups_ready.begin();
|
||||
alu_groups_ready.erase(alu_groups_ready.begin());
|
||||
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
|
||||
success = true;
|
||||
} else {
|
||||
if (has_alu_ready) {
|
||||
group = new AluGroup();
|
||||
sfn_log << SfnLog::schedule << "START new ALU group\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (group) {
|
||||
int free_slots = group->free_slots();
|
||||
|
||||
if (free_slots && has_alu_ready) {
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_vec(group);
|
||||
|
||||
/* Apparently one can't schedule a t-slot if there is already
|
||||
* and LDS instruction scheduled.
|
||||
* TODO: check whether this is only relevant for actual LDS instructions
|
||||
* or also for instructions that read from the LDS return value queue */
|
||||
|
||||
if (free_slots & 0x10 && !has_lds_ready) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
|
||||
if (!alu_trans_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
|
||||
}
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
|
||||
group->set_scheduled();
|
||||
group->fix_last_flag();
|
||||
group->set_nesting_depth(m_current_block->nesting_depth());
|
||||
|
||||
|
||||
if (m_current_block->type() != Block::alu) {
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_alu_groups_schduled = 0;
|
||||
}
|
||||
|
||||
/* Pessimistic hack: If we have started an LDS group,
|
||||
* make sure 8 instructions groups still fit into the CF
|
||||
* TODO: take care of Address slot emission
|
||||
* TODO: maybe do this CF split only in the assembler
|
||||
*/
|
||||
/*if (group->slots() > m_current_block->remaining_slots() ||
|
||||
(group->has_lds_group_start() &&
|
||||
m_current_block->remaining_slots() < 7 * 8)) {
|
||||
//assert(!m_current_block->lds_group_active());
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
}*/
|
||||
|
||||
if (!m_current_block->try_reserve_kcache(*group)) {
|
||||
assert(!m_current_block->lds_group_active());
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
}
|
||||
|
||||
assert(m_current_block->try_reserve_kcache(*group));
|
||||
|
||||
if (group->has_lds_group_start())
|
||||
m_current_block->lds_group_start(*group->begin());
|
||||
|
||||
m_current_block->push_back(group);
|
||||
if (group->has_lds_group_end())
|
||||
m_current_block->lds_group_end();
|
||||
if (!m_current_block->try_reserve_kcache(*group))
|
||||
unreachable("Scheduling a group in a new block should always succeed");
|
||||
alu_groups_ready.erase(alu_groups_ready.begin());
|
||||
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
|
||||
success = true;
|
||||
} else if (has_alu_ready) {
|
||||
group = new AluGroup();
|
||||
sfn_log << SfnLog::schedule << "START new ALU group\n";
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (success)
|
||||
++m_alu_groups_schduled;
|
||||
assert(group);
|
||||
|
||||
int free_slots = group->free_slots();
|
||||
|
||||
while (free_slots && has_alu_ready) {
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_vec(group);
|
||||
|
||||
/* Apparently one can't schedule a t-slot if there is already
|
||||
* and LDS instruction scheduled.
|
||||
* TODO: check whether this is only relevant for actual LDS instructions
|
||||
* or also for instructions that read from the LDS return value queue */
|
||||
|
||||
if (free_slots & 0x10 && !has_lds_ready) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
|
||||
if (!alu_trans_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
|
||||
if (!alu_vec_ready.empty())
|
||||
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
|
||||
}
|
||||
|
||||
if (success) {
|
||||
++m_alu_groups_schduled;
|
||||
break;
|
||||
} else if (m_current_block->kcache_reservation_failed()) {
|
||||
// LDS read groups should not lead to impossible
|
||||
// kcache constellations
|
||||
assert(!m_current_block->lds_group_active());
|
||||
|
||||
// kcache reservation failed, so we have to start a new CF
|
||||
start_new_block(out_blocks, Block::alu);
|
||||
m_current_block->set_instr_flag(Instr::force_cf);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
|
||||
group->set_scheduled();
|
||||
group->fix_last_flag();
|
||||
group->set_nesting_depth(m_current_block->nesting_depth());
|
||||
m_current_block->push_back(group);
|
||||
|
||||
if (group->has_lds_group_start())
|
||||
m_current_block->lds_group_start(*group->begin());
|
||||
|
||||
if (group->has_lds_group_end())
|
||||
m_current_block->lds_group_end();
|
||||
|
||||
return success;
|
||||
}
|
||||
|
@ -652,6 +653,13 @@ bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
|
|||
auto e = alu_vec_ready.end();
|
||||
while (i != e) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
|
||||
|
||||
if (!m_current_block->try_reserve_kcache(**i)) {
|
||||
sfn_log << SfnLog::schedule << " failed (kcache)\n";
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (group->add_vec_instructions(*i)) {
|
||||
auto old_i = i;
|
||||
++i;
|
||||
|
@ -679,6 +687,12 @@ bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluIn
|
|||
auto e = readylist.end();
|
||||
while (i != e) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
|
||||
if (!m_current_block->try_reserve_kcache(**i)) {
|
||||
sfn_log << SfnLog::schedule << " failed (kcache)\n";
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (group->add_trans_instructions(*i)) {
|
||||
auto old_i = i;
|
||||
++i;
|
||||
|
|
Loading…
Reference in New Issue