intel/fs: Implement Wa_22012725308 for cross-pipe accumulator data hazard.

The hardware fails to provide the expected data coherency guarantees
for accumulator registers when accessed from multiple FPU pipelines.
Fix this by tracking implicit accumulator accesses just like we do for
regular GRF registers, but instead of adding synchronization
annotations for any dependency we only do it for dependencies with a
pipeline mismatch, since the hardware should be able to guarantee
proper synchronization for matching pipelines.

Note that this workaround handles RaW and WaW dependencies in addition
to the WaR dependencies described in the hardware bug report even
though cross-pipeline RaW accumulator dependencies should be extremely
rare, since chances are the hardware will also hang if we ever hit
such a condition.  This only affects XeHP+, since all FPU instructions
are executed as a single in-order pipeline on earlier Xe platforms.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
This commit is contained in:
Francisco Jerez 2021-05-25 15:02:53 -07:00 committed by Marge Bot
parent 385da1fe36
commit d46bb14d14
1 changed files with 50 additions and 1 deletions

View File

@ -209,6 +209,21 @@ namespace {
}
};
/**
* Return true if the specified ordered address is trivially satisfied for
* all pipelines except potentially for the specified pipeline \p p.
*/
bool
is_single_pipe(const ordered_address &jp, tgl_pipe p)
{
for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
if ((p == TGL_PIPE_NONE || IDX(p) != q) && jp.jp[q] > INT_MIN)
return false;
}
return true;
}
/**
* Return the number of instructions in the program.
*/
@ -962,6 +977,9 @@ namespace {
sb.set(byte_offset(inst->src[i], REG_SIZE * j), rd_dep);
}
if (inst->reads_accumulator_implicitly())
sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all));
if (is_send(inst) && inst->base_mrf != -1) {
const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
@ -976,6 +994,9 @@ namespace {
dependency(TGL_REGDIST_DST, jp, exec_all) :
dependency();
if (inst->writes_accumulator_implicitly(devinfo))
sb.set(brw_acc_reg(8), wr_dep);
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
!inst->dst.is_null()) {
for (unsigned j = 0; j < regs_written(inst); j++)
@ -1059,6 +1080,7 @@ namespace {
gather_inst_dependencies(const fs_visitor *shader,
const ordered_address *jps)
{
const struct intel_device_info *devinfo = shader->devinfo;
equivalence_relation eq(num_instructions(shader));
scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq);
const unsigned *ids = eq.flatten();
@ -1067,6 +1089,7 @@ namespace {
foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
const bool exec_all = inst->force_writemask_all;
const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
scoreboard &sb = sbs[block->num];
for (unsigned i = 0; i < inst->sources; i++) {
@ -1075,6 +1098,18 @@ namespace {
sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
}
if (inst->reads_accumulator_implicitly()) {
/* Wa_22012725308:
*
* "When the accumulator registers are used as source and/or
* destination, hardware does not ensure prevention of write
* after read hazard across execution pipes."
*/
const dependency dep = sb.get(brw_acc_reg(8));
if (dep.ordered && !is_single_pipe(dep.jp, p))
add_dependency(ids, deps[ip], dep);
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->mlen; j++)
add_dependency(ids, deps[ip], dependency_for_read(
@ -1086,13 +1121,27 @@ namespace {
dependency(TGL_SBID_SET, ip, exec_all));
if (!inst->no_dd_check) {
if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
!inst->dst.is_accumulator()) {
for (unsigned j = 0; j < regs_written(inst); j++) {
add_dependency(ids, deps[ip], dependency_for_write(inst,
sb.get(byte_offset(inst->dst, REG_SIZE * j))));
}
}
if (inst->writes_accumulator_implicitly(devinfo) ||
inst->dst.is_accumulator()) {
/* Wa_22012725308:
*
* "When the accumulator registers are used as source and/or
* destination, hardware does not ensure prevention of write
* after read hazard across execution pipes."
*/
const dependency dep = sb.get(brw_acc_reg(8));
if (dep.ordered && !is_single_pipe(dep.jp, p))
add_dependency(ids, deps[ip], dep);
}
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
add_dependency(ids, deps[ip], dependency_for_write(inst,