intel/fs: Represent SWSB in-order dependency addresses as vectors.
This extends the current ordered_address instruction counter to a vector with one component per asynchronous ALU pipeline, allowing us to track the last instruction that accessed a register separately for each ALU pipeline of the XeHP EU, making it straightforward to infer the right cross-pipeline synchronization annotations. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> v2: Make unit tests happy (with ubsan as run by GitLab automation). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
This commit is contained in:
parent
78b643fb7f
commit
3f063334fc
|
@ -130,13 +130,21 @@ namespace {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of in-order hardware instructions contained in this IR
|
* Index of the \p p pipeline counter in the ordered_address vector defined
|
||||||
* instruction. This determines the increment applied to the RegDist
|
* below.
|
||||||
* counter calculated for any ordered dependency that crosses this
|
*/
|
||||||
|
#define IDX(p) (p >= TGL_PIPE_FLOAT ? unsigned(p - TGL_PIPE_FLOAT) : \
|
||||||
|
(abort(), ~0u))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of in-order hardware instructions for pipeline index \p contained
|
||||||
|
* in this IR instruction. This determines the increment applied to the
|
||||||
|
* RegDist counter calculated for any ordered dependency that crosses this
|
||||||
* instruction.
|
* instruction.
|
||||||
*/
|
*/
|
||||||
unsigned
|
unsigned
|
||||||
ordered_unit(const fs_inst *inst)
|
ordered_unit(const struct gen_device_info *devinfo, const fs_inst *inst,
|
||||||
|
unsigned p)
|
||||||
{
|
{
|
||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case BRW_OPCODE_SYNC:
|
case BRW_OPCODE_SYNC:
|
||||||
|
@ -156,7 +164,11 @@ namespace {
|
||||||
* (again) don't use virtual instructions if you want optimal
|
* (again) don't use virtual instructions if you want optimal
|
||||||
* scheduling.
|
* scheduling.
|
||||||
*/
|
*/
|
||||||
return is_unordered(inst) ? 0 : 1;
|
if (!is_unordered(inst) && (p == IDX(inferred_exec_pipe(devinfo, inst)) ||
|
||||||
|
p == IDX(TGL_PIPE_ALL)))
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,8 +176,36 @@ namespace {
|
||||||
* Type for an instruction counter that increments for in-order
|
* Type for an instruction counter that increments for in-order
|
||||||
* instructions only, arbitrarily denoted 'jp' throughout this lowering
|
* instructions only, arbitrarily denoted 'jp' throughout this lowering
|
||||||
* pass in order to distinguish it from the regular instruction counter.
|
* pass in order to distinguish it from the regular instruction counter.
|
||||||
|
* This is represented as a vector with an independent counter for each
|
||||||
|
* asynchronous ALU pipeline in the EU.
|
||||||
*/
|
*/
|
||||||
typedef int ordered_address;
|
struct ordered_address {
|
||||||
|
/**
|
||||||
|
* Construct the ordered address of a dependency known to execute on a
|
||||||
|
* single specified pipeline \p p (unless TGL_PIPE_NONE or TGL_PIPE_ALL
|
||||||
|
* is provided), in which case the vector counter will be initialized
|
||||||
|
* with all components equal to INT_MIN (always satisfied) except for
|
||||||
|
* component IDX(p).
|
||||||
|
*/
|
||||||
|
ordered_address(tgl_pipe p = TGL_PIPE_NONE, int jp0 = INT_MIN) {
|
||||||
|
for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++)
|
||||||
|
jp[q] = (p == TGL_PIPE_NONE || (IDX(p) != q && p != TGL_PIPE_ALL) ?
|
||||||
|
INT_MIN : jp0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int jp[IDX(TGL_PIPE_ALL)];
|
||||||
|
|
||||||
|
friend bool
|
||||||
|
operator==(const ordered_address &jp0, const ordered_address &jp1)
|
||||||
|
{
|
||||||
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
|
||||||
|
if (jp0.jp[p] != jp1.jp[p])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the number of instructions in the program.
|
* Return the number of instructions in the program.
|
||||||
|
@ -184,12 +224,13 @@ namespace {
|
||||||
ordered_inst_addresses(const fs_visitor *shader)
|
ordered_inst_addresses(const fs_visitor *shader)
|
||||||
{
|
{
|
||||||
ordered_address *jps = new ordered_address[num_instructions(shader)];
|
ordered_address *jps = new ordered_address[num_instructions(shader)];
|
||||||
ordered_address jp = 0;
|
ordered_address jp(TGL_PIPE_ALL, 0);
|
||||||
unsigned ip = 0;
|
unsigned ip = 0;
|
||||||
|
|
||||||
foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
|
foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
|
||||||
jps[ip] = jp;
|
jps[ip] = jp;
|
||||||
jp += ordered_unit(inst);
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
|
||||||
|
jp.jp[p] += ordered_unit(shader->devinfo, inst, p);
|
||||||
ip++;
|
ip++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -347,7 +388,7 @@ namespace {
|
||||||
/**
|
/**
|
||||||
* No dependency information.
|
* No dependency information.
|
||||||
*/
|
*/
|
||||||
dependency() : ordered(TGL_REGDIST_NULL), jp(INT_MIN),
|
dependency() : ordered(TGL_REGDIST_NULL), jp(),
|
||||||
unordered(TGL_SBID_NULL), id(0),
|
unordered(TGL_SBID_NULL), id(0),
|
||||||
exec_all(false) {}
|
exec_all(false) {}
|
||||||
|
|
||||||
|
@ -355,7 +396,8 @@ namespace {
|
||||||
* Construct a dependency on the in-order instruction with the provided
|
* Construct a dependency on the in-order instruction with the provided
|
||||||
* ordered_address instruction counter.
|
* ordered_address instruction counter.
|
||||||
*/
|
*/
|
||||||
dependency(tgl_regdist_mode mode, ordered_address jp, bool exec_all) :
|
dependency(tgl_regdist_mode mode, const ordered_address &jp,
|
||||||
|
bool exec_all) :
|
||||||
ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
|
ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
|
||||||
exec_all(exec_all) {}
|
exec_all(exec_all) {}
|
||||||
|
|
||||||
|
@ -364,7 +406,7 @@ namespace {
|
||||||
* specified synchronization token.
|
* specified synchronization token.
|
||||||
*/
|
*/
|
||||||
dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
|
dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
|
||||||
ordered(TGL_REGDIST_NULL), jp(INT_MIN), unordered(mode), id(id),
|
ordered(TGL_REGDIST_NULL), jp(), unordered(mode), id(id),
|
||||||
exec_all(exec_all) {}
|
exec_all(exec_all) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -432,7 +474,8 @@ namespace {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const dependency dependency::done = dependency(TGL_REGDIST_SRC, INT_MIN, false);
|
const dependency dependency::done =
|
||||||
|
dependency(TGL_REGDIST_SRC, ordered_address(), false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return whether \p dep contains any dependency information.
|
* Return whether \p dep contains any dependency information.
|
||||||
|
@ -458,7 +501,8 @@ namespace {
|
||||||
|
|
||||||
if (dep0.ordered || dep1.ordered) {
|
if (dep0.ordered || dep1.ordered) {
|
||||||
dep.ordered = dep0.ordered | dep1.ordered;
|
dep.ordered = dep0.ordered | dep1.ordered;
|
||||||
dep.jp = MAX2(dep0.jp, dep1.jp);
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
|
||||||
|
dep.jp.jp[p] = MAX2(dep0.jp.jp[p], dep1.jp.jp[p]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dep0.unordered || dep1.unordered) {
|
if (dep0.unordered || dep1.unordered) {
|
||||||
|
@ -492,10 +536,14 @@ namespace {
|
||||||
* the end of the origin block.
|
* the end of the origin block.
|
||||||
*/
|
*/
|
||||||
dependency
|
dependency
|
||||||
transport(dependency dep, int delta)
|
transport(dependency dep, int delta[IDX(TGL_PIPE_ALL)])
|
||||||
{
|
{
|
||||||
if (dep.ordered && dep.jp > INT_MIN)
|
if (dep.ordered) {
|
||||||
dep.jp += delta;
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
|
||||||
|
if (dep.jp.jp[p] > INT_MIN)
|
||||||
|
dep.jp.jp[p] += delta[p];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return dep;
|
return dep;
|
||||||
}
|
}
|
||||||
|
@ -599,7 +647,7 @@ namespace {
|
||||||
* object. \sa transport().
|
* object. \sa transport().
|
||||||
*/
|
*/
|
||||||
friend scoreboard
|
friend scoreboard
|
||||||
transport(const scoreboard &sb0, int delta)
|
transport(const scoreboard &sb0, int delta[IDX(TGL_PIPE_ALL)])
|
||||||
{
|
{
|
||||||
scoreboard sb;
|
scoreboard sb;
|
||||||
|
|
||||||
|
@ -736,7 +784,9 @@ namespace {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (dep.ordered && deps[i].ordered) {
|
if (dep.ordered && deps[i].ordered) {
|
||||||
deps[i].jp = MAX2(deps[i].jp, dep.jp);
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
|
||||||
|
deps[i].jp.jp[p] = MAX2(deps[i].jp.jp[p], dep.jp.jp[p]);
|
||||||
|
|
||||||
deps[i].ordered |= dep.ordered;
|
deps[i].ordered |= dep.ordered;
|
||||||
deps[i].exec_all |= dep.exec_all;
|
deps[i].exec_all |= dep.exec_all;
|
||||||
dep.ordered = TGL_REGDIST_NULL;
|
dep.ordered = TGL_REGDIST_NULL;
|
||||||
|
@ -770,11 +820,13 @@ namespace {
|
||||||
|
|
||||||
for (unsigned i = 0; i < deps.size(); i++) {
|
for (unsigned i = 0; i < deps.size(); i++) {
|
||||||
if (deps[i].ordered && exec_all >= deps[i].exec_all) {
|
if (deps[i].ordered && exec_all >= deps[i].exec_all) {
|
||||||
const unsigned dist = jp - deps[i].jp;
|
for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
|
||||||
const unsigned max_dist = 10;
|
const unsigned dist = jp.jp[q] - int64_t(deps[i].jp.jp[q]);
|
||||||
assert(jp > deps[i].jp);
|
const unsigned max_dist = (q == IDX(TGL_PIPE_LONG) ? 14 : 10);
|
||||||
if (dist <= max_dist)
|
assert(jp.jp[q] > deps[i].jp.jp[q]);
|
||||||
min_dist = MIN3(min_dist, dist, 7);
|
if (dist <= max_dist)
|
||||||
|
min_dist = MIN3(min_dist, dist, 7);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -861,6 +913,10 @@ namespace {
|
||||||
const fs_inst *inst, unsigned ip, scoreboard &sb)
|
const fs_inst *inst, unsigned ip, scoreboard &sb)
|
||||||
{
|
{
|
||||||
const bool exec_all = inst->force_writemask_all;
|
const bool exec_all = inst->force_writemask_all;
|
||||||
|
const struct gen_device_info *devinfo = shader->devinfo;
|
||||||
|
const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
|
||||||
|
const ordered_address jp = p ? ordered_address(p, jps[ip].jp[IDX(p)]) :
|
||||||
|
ordered_address();
|
||||||
|
|
||||||
/* Track any source registers that may be fetched asynchronously by this
|
/* Track any source registers that may be fetched asynchronously by this
|
||||||
* instruction, otherwise clear the dependency in order to avoid
|
* instruction, otherwise clear the dependency in order to avoid
|
||||||
|
@ -870,7 +926,8 @@ namespace {
|
||||||
const dependency rd_dep =
|
const dependency rd_dep =
|
||||||
(inst->is_payload(i) ||
|
(inst->is_payload(i) ||
|
||||||
inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
|
inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
|
||||||
ordered_unit(inst) ? dependency(TGL_REGDIST_SRC, jps[ip], exec_all) :
|
ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
|
||||||
|
dependency(TGL_REGDIST_SRC, jp, exec_all) :
|
||||||
dependency::done;
|
dependency::done;
|
||||||
|
|
||||||
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
for (unsigned j = 0; j < regs_read(inst, i); j++)
|
||||||
|
@ -887,7 +944,8 @@ namespace {
|
||||||
/* Track any destination registers of this instruction. */
|
/* Track any destination registers of this instruction. */
|
||||||
const dependency wr_dep =
|
const dependency wr_dep =
|
||||||
is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
|
is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
|
||||||
ordered_unit(inst) ? dependency(TGL_REGDIST_DST, jps[ip], exec_all) :
|
ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
|
||||||
|
dependency(TGL_REGDIST_DST, jp, exec_all) :
|
||||||
dependency();
|
dependency();
|
||||||
|
|
||||||
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
|
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
|
||||||
|
@ -942,9 +1000,13 @@ namespace {
|
||||||
foreach_list_typed(bblock_link, child_link, link,
|
foreach_list_typed(bblock_link, child_link, link,
|
||||||
&block->children) {
|
&block->children) {
|
||||||
scoreboard &in_sb = in_sbs[child_link->block->num];
|
scoreboard &in_sb = in_sbs[child_link->block->num];
|
||||||
const int delta =
|
int delta[IDX(TGL_PIPE_ALL)];
|
||||||
jps[child_link->block->start_ip] - jps[block->end_ip]
|
|
||||||
- ordered_unit(static_cast<const fs_inst *>(block->end()));
|
for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
|
||||||
|
delta[p] = jps[child_link->block->start_ip].jp[p]
|
||||||
|
- jps[block->end_ip].jp[p]
|
||||||
|
- ordered_unit(shader->devinfo,
|
||||||
|
static_cast<const fs_inst *>(block->end()), p);
|
||||||
|
|
||||||
in_sb = merge(eq, in_sb, transport(sb, delta));
|
in_sb = merge(eq, in_sb, transport(sb, delta));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue