r600/sfn: Fix the kcache failure handling

Instead of starting a new block when the kcache handling failed,
try to continue scheduling instructions until kcache allocation
fails for all ready instruction.
With that we avoid a CF split withing an LDS fetch/read group.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17678>
This commit is contained in:
Gert Wollny 2022-07-21 17:52:48 +02:00 committed by Marge Bot
parent 8db31e0fe6
commit 8a7d34e3bd
3 changed files with 138 additions and 91 deletions

View File

@ -302,17 +302,43 @@ void Block::push_back(PInst instr)
bool Block::try_reserve_kcache(const AluGroup& group)
{
auto kcache = m_kcache;
auto kcache_constants = group.get_kconsts();
for (auto& kc : kcache_constants) {
auto u = kc->as_uniform();
assert(u);
if (!try_reserve_kcache(*u))
if (!try_reserve_kcache(*u, kcache)) {
m_kcache_alloc_failed = true;
return false;
}
}
m_kcache = kcache;
m_kcache_alloc_failed = false;
return true;
}
bool Block::try_reserve_kcache(const UniformValue& u)
bool Block::try_reserve_kcache(const AluInstr& instr)
{
auto kcache = m_kcache;
for (auto& src : instr.sources()) {
auto u = src->as_uniform();
if (u) {
if (!try_reserve_kcache(*u, kcache)) {
m_kcache_alloc_failed = true;
return false;
}
}
}
m_kcache = kcache;
m_kcache_alloc_failed = false;
return true;
}
bool Block::try_reserve_kcache(const UniformValue& u,
std::array<KCacheLine, 4>& kcache) const
{
const int kcache_banks = 4; // TODO: handle pre-evergreen
@ -323,49 +349,50 @@ bool Block::try_reserve_kcache(const UniformValue& u)
bool found = false;
for (int i = 0; i < kcache_banks && !found; ++i) {
if (m_kcache[i].mode) {
if (m_kcache[i].bank < bank)
if (kcache[i].mode) {
if (kcache[i].bank < bank)
continue;
if ((m_kcache[i].bank == bank &&
m_kcache[i].addr > line + 1) ||
m_kcache[i].bank > bank) {
if (m_kcache[kcache_banks - 1].mode)
if ((kcache[i].bank == bank &&
kcache[i].addr > line + 1) ||
kcache[i].bank > bank) {
if (kcache[kcache_banks - 1].mode)
return false;
memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
m_kcache[i].mode = KCacheLine::lock_1;
m_kcache[i].bank = bank;
m_kcache[i].addr = line;
memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
kcache[i].mode = KCacheLine::lock_1;
kcache[i].bank = bank;
kcache[i].addr = line;
return true;
}
int d = line - m_kcache[i].addr;
int d = line - kcache[i].addr;
if (d == -1) {
m_kcache[i].addr--;
if (m_kcache[i].mode == KCacheLine::lock_2) {
kcache[i].addr--;
if (kcache[i].mode == KCacheLine::lock_2) {
/* we are prepending the line to the current set,
* discarding the existing second line,
* so we'll have to insert line+2 after it */
* discarding the existing second line,
* so we'll have to insert line+2 after it */
line += 2;
continue;
} else if (m_kcache[i].mode == KCacheLine::lock_1) {
m_kcache[i].mode = KCacheLine::lock_2;
} else if (kcache[i].mode == KCacheLine::lock_1) {
kcache[i].mode = KCacheLine::lock_2;
return true;
} else {
/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
return false;
}
} else if (d == 1) {
m_kcache[i].mode = KCacheLine::lock_2;
kcache[i].mode = KCacheLine::lock_2;
return true;
} else if (d == 0)
} else if (d == 0) {
return true;
}
} else { /* free kcache set - use it */
m_kcache[i].mode = KCacheLine::lock_1;
m_kcache[i].bank = bank;
m_kcache[i].addr = line;
kcache[i].mode = KCacheLine::lock_1;
kcache[i].bank = bank;
kcache[i].addr = line;
return true;
}
}

View File

@ -196,7 +196,8 @@ public:
void set_type(Type t);
uint32_t remaining_slots() const { return m_remaining_slots;}
bool try_reserve_kcache(const AluGroup& group);
bool try_reserve_kcache(const AluGroup& instr);
bool try_reserve_kcache(const AluInstr& group);
auto last_lds_instr() {return m_last_lds_instr;}
void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
@ -207,8 +208,11 @@ public:
size_t size() const { return m_instructions.size();}
bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
private:
bool try_reserve_kcache(const UniformValue& u);
bool try_reserve_kcache(const UniformValue& u,
std::array<KCacheLine, 4>& kcache) const;
bool do_ready() const override {return true;};
void do_print(std::ostream& os) const override;
@ -221,11 +225,13 @@ private:
uint32_t m_remaining_slots{0xffff};
std::array<KCacheLine, 4> m_kcache;
bool m_kcache_alloc_failed{false};
Instr *m_last_lds_instr{nullptr};
int m_lds_group_requirement{0};
AluInstr *m_lds_group_start{nullptr};
};
class InstrWithVectorResult : public Instr {

View File

@ -489,83 +489,84 @@ bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
bool has_lds_ready = !alu_vec_ready.empty() &&
(*alu_vec_ready.begin())->has_lds_access();
/* If we have ready ALU instructions we have to start a new ALU block */
if (has_alu_ready || !alu_groups_ready.empty()) {
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
m_alu_groups_schduled = 0;
}
}
/* Schedule groups first. unless we have a pending LDS instuction
* We don't want the LDS instructions to be too far apart because the
* fetch + read from queue has to be in the same ALU CF block */
if (!alu_groups_ready.empty() && !has_lds_ready) {
group = *alu_groups_ready.begin();
alu_groups_ready.erase(alu_groups_ready.begin());
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
success = true;
} else {
if (has_alu_ready) {
group = new AluGroup();
sfn_log << SfnLog::schedule << "START new ALU group\n";
}
}
if (group) {
int free_slots = group->free_slots();
if (free_slots && has_alu_ready) {
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_vec(group);
/* Apparently one can't schedule a t-slot if there is already
* and LDS instruction scheduled.
* TODO: check whether this is only relevant for actual LDS instructions
* or also for instructions that read from the LDS return value queue */
if (free_slots & 0x10 && !has_lds_ready) {
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
if (!alu_trans_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
}
}
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
group->set_scheduled();
group->fix_last_flag();
group->set_nesting_depth(m_current_block->nesting_depth());
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
m_alu_groups_schduled = 0;
}
/* Pessimistic hack: If we have started an LDS group,
* make sure 8 instructions groups still fit into the CF
* TODO: take care of Address slot emission
* TODO: maybe do this CF split only in the assembler
*/
/*if (group->slots() > m_current_block->remaining_slots() ||
(group->has_lds_group_start() &&
m_current_block->remaining_slots() < 7 * 8)) {
//assert(!m_current_block->lds_group_active());
start_new_block(out_blocks, Block::alu);
}*/
if (!m_current_block->try_reserve_kcache(*group)) {
assert(!m_current_block->lds_group_active());
start_new_block(out_blocks, Block::alu);
m_current_block->set_instr_flag(Instr::force_cf);
}
assert(m_current_block->try_reserve_kcache(*group));
if (group->has_lds_group_start())
m_current_block->lds_group_start(*group->begin());
m_current_block->push_back(group);
if (group->has_lds_group_end())
m_current_block->lds_group_end();
if (!m_current_block->try_reserve_kcache(*group))
unreachable("Scheduling a group in a new block should always succeed");
alu_groups_ready.erase(alu_groups_ready.begin());
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
success = true;
} else if (has_alu_ready) {
group = new AluGroup();
sfn_log << SfnLog::schedule << "START new ALU group\n";
} else {
return false;
}
if (success)
++m_alu_groups_schduled;
assert(group);
int free_slots = group->free_slots();
while (free_slots && has_alu_ready) {
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_vec(group);
/* Apparently one can't schedule a t-slot if there is already
* and LDS instruction scheduled.
* TODO: check whether this is only relevant for actual LDS instructions
* or also for instructions that read from the LDS return value queue */
if (free_slots & 0x10 && !has_lds_ready) {
sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
if (!alu_trans_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_trans_ready);
if (!alu_vec_ready.empty())
success |= schedule_alu_to_group_trans(group, alu_vec_ready);
}
if (success) {
++m_alu_groups_schduled;
break;
} else if (m_current_block->kcache_reservation_failed()) {
// LDS read groups should not lead to impossible
// kcache constellations
assert(!m_current_block->lds_group_active());
// kcache reservation failed, so we have to start a new CF
start_new_block(out_blocks, Block::alu);
m_current_block->set_instr_flag(Instr::force_cf);
} else {
return false;
}
}
sfn_log << SfnLog::schedule << "Finalize ALU group\n";
group->set_scheduled();
group->fix_last_flag();
group->set_nesting_depth(m_current_block->nesting_depth());
m_current_block->push_back(group);
if (group->has_lds_group_start())
m_current_block->lds_group_start(*group->begin());
if (group->has_lds_group_end())
m_current_block->lds_group_end();
return success;
}
@ -652,6 +653,13 @@ bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
auto e = alu_vec_ready.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
if (!m_current_block->try_reserve_kcache(**i)) {
sfn_log << SfnLog::schedule << " failed (kcache)\n";
++i;
continue;
}
if (group->add_vec_instructions(*i)) {
auto old_i = i;
++i;
@ -679,6 +687,12 @@ bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluIn
auto e = readylist.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
if (!m_current_block->try_reserve_kcache(**i)) {
sfn_log << SfnLog::schedule << " failed (kcache)\n";
++i;
continue;
}
if (group->add_trans_instructions(*i)) {
auto old_i = i;
++i;