freedreno/ir3: rename fanin/fanout to collect/split
If I'm going to refactor a bit to use these meta instructions to also handle input/output, then might as well cleanup the names first. Nouveau also uses collect/split for names of these meta instructions, and I like those names better. Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
4af86bd0b9
commit
611258d578
|
@ -214,13 +214,16 @@ typedef enum {
|
||||||
/* meta instructions (category -1): */
|
/* meta instructions (category -1): */
|
||||||
/* placeholder instr to mark shader inputs: */
|
/* placeholder instr to mark shader inputs: */
|
||||||
OPC_META_INPUT = _OPC(-1, 0),
|
OPC_META_INPUT = _OPC(-1, 0),
|
||||||
/* The "fan-in" and "fan-out" instructions are used for keeping
|
/* The "collect" and "split" instructions are used for keeping
|
||||||
* track of instructions that write to multiple dst registers
|
* track of instructions that write to multiple dst registers
|
||||||
* (fan-out) like texture sample instructions, or read multiple
|
* (split) like texture sample instructions, or read multiple
|
||||||
* consecutive scalar registers (fan-in) (bary.f, texture samp)
|
* consecutive scalar registers (collect) (bary.f, texture samp)
|
||||||
|
*
|
||||||
|
* A "split" extracts a scalar component from a vecN, and a
|
||||||
|
* "collect" gathers multiple scalar components into a vecN
|
||||||
*/
|
*/
|
||||||
OPC_META_FO = _OPC(-1, 2),
|
OPC_META_SPLIT = _OPC(-1, 2),
|
||||||
OPC_META_FI = _OPC(-1, 3),
|
OPC_META_COLLECT = _OPC(-1, 3),
|
||||||
|
|
||||||
/* placeholder for texture fetches that run before FS invocation
|
/* placeholder for texture fetches that run before FS invocation
|
||||||
* starts:
|
* starts:
|
||||||
|
|
|
@ -267,7 +267,7 @@ struct ir3_instruction {
|
||||||
*/
|
*/
|
||||||
struct {
|
struct {
|
||||||
int off; /* component/offset */
|
int off; /* component/offset */
|
||||||
} fo;
|
} split;
|
||||||
struct {
|
struct {
|
||||||
unsigned samp, tex;
|
unsigned samp, tex;
|
||||||
unsigned input_offset;
|
unsigned input_offset;
|
||||||
|
@ -313,7 +313,7 @@ struct ir3_instruction {
|
||||||
int sun; /* Sethi–Ullman number, used by sched */
|
int sun; /* Sethi–Ullman number, used by sched */
|
||||||
int use_count; /* currently just updated/used by cp */
|
int use_count; /* currently just updated/used by cp */
|
||||||
|
|
||||||
/* Used during CP and RA stages. For fanin and shader inputs/
|
/* Used during CP and RA stages. For collect and shader inputs/
|
||||||
* outputs where we need a sequence of consecutive registers,
|
* outputs where we need a sequence of consecutive registers,
|
||||||
* keep track of each src instructions left (ie 'n-1') and right
|
* keep track of each src instructions left (ie 'n-1') and right
|
||||||
* (ie 'n+1') neighbor. The front-end must insert enough mov's
|
* (ie 'n+1') neighbor. The front-end must insert enough mov's
|
||||||
|
@ -333,7 +333,7 @@ struct ir3_instruction {
|
||||||
* it should be overkill.. the problem is if, potentially after
|
* it should be overkill.. the problem is if, potentially after
|
||||||
* already eliminating some mov's, if you have a single mov that
|
* already eliminating some mov's, if you have a single mov that
|
||||||
* needs to be grouped with it's neighbors in two different
|
* needs to be grouped with it's neighbors in two different
|
||||||
* places (ex. shader output and a fanin).
|
* places (ex. shader output and a collect).
|
||||||
*/
|
*/
|
||||||
struct {
|
struct {
|
||||||
struct ir3_instruction *left, *right;
|
struct ir3_instruction *left, *right;
|
||||||
|
|
|
@ -346,7 +346,7 @@ get_atomic_dest_mov(struct ir3_instruction *atomic)
|
||||||
|
|
||||||
/* extract back out the 'dummy' which serves as stand-in for dest: */
|
/* extract back out the 'dummy' which serves as stand-in for dest: */
|
||||||
struct ir3_instruction *src = ssa(atomic->regs[3]);
|
struct ir3_instruction *src = ssa(atomic->regs[3]);
|
||||||
debug_assert(src->opc == OPC_META_FI);
|
debug_assert(src->opc == OPC_META_COLLECT);
|
||||||
struct ir3_instruction *dummy = ssa(src->regs[1]);
|
struct ir3_instruction *dummy = ssa(src->regs[1]);
|
||||||
|
|
||||||
struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32);
|
struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32);
|
||||||
|
|
|
@ -209,7 +209,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
|
||||||
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
|
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
|
||||||
struct ir3_instruction *dst = ctx->last_dst[i];
|
struct ir3_instruction *dst = ctx->last_dst[i];
|
||||||
dst->regs[0]->flags |= IR3_REG_HALF;
|
dst->regs[0]->flags |= IR3_REG_HALF;
|
||||||
if (ctx->last_dst[i]->opc == OPC_META_FO)
|
if (ctx->last_dst[i]->opc == OPC_META_SPLIT)
|
||||||
dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
|
dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,7 +257,7 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
|
||||||
|
|
||||||
unsigned flags = dest_flags(arr[0]);
|
unsigned flags = dest_flags(arr[0]);
|
||||||
|
|
||||||
collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
|
collect = ir3_instr_create2(block, OPC_META_COLLECT, 1 + arrsz);
|
||||||
__ssa_dst(collect)->flags |= flags;
|
__ssa_dst(collect)->flags |= flags;
|
||||||
for (unsigned i = 0; i < arrsz; i++) {
|
for (unsigned i = 0; i < arrsz; i++) {
|
||||||
struct ir3_instruction *elem = arr[i];
|
struct ir3_instruction *elem = arr[i];
|
||||||
|
@ -301,7 +301,7 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* helper for instructions that produce multiple consecutive scalar
|
/* helper for instructions that produce multiple consecutive scalar
|
||||||
* outputs which need to have a split/fanout meta instruction inserted
|
* outputs which need to have a split meta instruction inserted
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
|
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
|
||||||
|
@ -317,10 +317,11 @@ ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
|
||||||
unsigned flags = dest_flags(src);
|
unsigned flags = dest_flags(src);
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < n; i++) {
|
for (int i = 0, j = 0; i < n; i++) {
|
||||||
struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
|
struct ir3_instruction *split =
|
||||||
|
ir3_instr_create(block, OPC_META_SPLIT);
|
||||||
__ssa_dst(split)->flags |= flags;
|
__ssa_dst(split)->flags |= flags;
|
||||||
__ssa_src(split, src, flags);
|
__ssa_src(split, src, flags);
|
||||||
split->fo.off = i + base;
|
split->split.off = i + base;
|
||||||
|
|
||||||
if (prev) {
|
if (prev) {
|
||||||
split->cp.left = prev;
|
split->cp.left = prev;
|
||||||
|
|
|
@ -85,10 +85,10 @@ static bool is_eligible_mov(struct ir3_instruction *instr,
|
||||||
* We could possibly do a bit better, and copy-propagation if
|
* We could possibly do a bit better, and copy-propagation if
|
||||||
* we can CP all components that are being fanned out.
|
* we can CP all components that are being fanned out.
|
||||||
*/
|
*/
|
||||||
if (src_instr->opc == OPC_META_FO) {
|
if (src_instr->opc == OPC_META_SPLIT) {
|
||||||
if (!dst_instr)
|
if (!dst_instr)
|
||||||
return false;
|
return false;
|
||||||
if (dst_instr->opc == OPC_META_FI)
|
if (dst_instr->opc == OPC_META_COLLECT)
|
||||||
return false;
|
return false;
|
||||||
if (dst_instr->cp.left || dst_instr->cp.right)
|
if (dst_instr->cp.left || dst_instr->cp.right)
|
||||||
return false;
|
return false;
|
||||||
|
@ -706,12 +706,12 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
|
||||||
*/
|
*/
|
||||||
if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
|
if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
|
||||||
!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
|
!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
|
||||||
/* The first src will be a fan-in (collect), if both of it's
|
/* The first src will be a collect, if both of it's
|
||||||
* two sources are mov from imm, then we can
|
* two sources are mov from imm, then we can
|
||||||
*/
|
*/
|
||||||
struct ir3_instruction *samp_tex = ssa(instr->regs[1]);
|
struct ir3_instruction *samp_tex = ssa(instr->regs[1]);
|
||||||
|
|
||||||
debug_assert(samp_tex->opc == OPC_META_FI);
|
debug_assert(samp_tex->opc == OPC_META_COLLECT);
|
||||||
|
|
||||||
struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
|
struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
|
||||||
struct ir3_instruction *tex = ssa(samp_tex->regs[2]);
|
struct ir3_instruction *tex = ssa(samp_tex->regs[2]);
|
||||||
|
|
|
@ -175,7 +175,7 @@ remove_unused_by_block(struct ir3_block *block)
|
||||||
if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
|
if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
|
||||||
continue;
|
continue;
|
||||||
if (instr->flags & IR3_INSTR_UNUSED) {
|
if (instr->flags & IR3_INSTR_UNUSED) {
|
||||||
if (instr->opc == OPC_META_FO) {
|
if (instr->opc == OPC_META_SPLIT) {
|
||||||
struct ir3_instruction *src = ssa(instr->regs[1]);
|
struct ir3_instruction *src = ssa(instr->regs[1]);
|
||||||
/* leave inputs alone.. we can't optimize out components of
|
/* leave inputs alone.. we can't optimize out components of
|
||||||
* an input, since the hw is still going to be writing all
|
* an input, since the hw is still going to be writing all
|
||||||
|
@ -184,7 +184,7 @@ remove_unused_by_block(struct ir3_block *block)
|
||||||
*/
|
*/
|
||||||
if ((src->opc != OPC_META_INPUT) &&
|
if ((src->opc != OPC_META_INPUT) &&
|
||||||
(src->regs[0]->wrmask > 1)) {
|
(src->regs[0]->wrmask > 1)) {
|
||||||
src->regs[0]->wrmask &= ~(1 << instr->fo.off);
|
src->regs[0]->wrmask &= ~(1 << instr->split.off);
|
||||||
|
|
||||||
/* prune no-longer needed right-neighbors. We could
|
/* prune no-longer needed right-neighbors. We could
|
||||||
* probably do the same for left-neighbors (ie. tex
|
* probably do the same for left-neighbors (ie. tex
|
||||||
|
|
|
@ -185,7 +185,7 @@ instr_find_neighbors(struct ir3_instruction *instr)
|
||||||
if (ir3_instr_check_mark(instr))
|
if (ir3_instr_check_mark(instr))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (instr->opc == OPC_META_FI)
|
if (instr->opc == OPC_META_COLLECT)
|
||||||
group_n(&instr_ops, instr, instr->regs_count - 1);
|
group_n(&instr_ops, instr, instr->regs_count - 1);
|
||||||
|
|
||||||
foreach_ssa_src(src, instr)
|
foreach_ssa_src(src, instr)
|
||||||
|
|
|
@ -95,8 +95,8 @@ static void print_instr_name(struct ir3_instruction *instr, bool flags)
|
||||||
if (is_meta(instr)) {
|
if (is_meta(instr)) {
|
||||||
switch (instr->opc) {
|
switch (instr->opc) {
|
||||||
case OPC_META_INPUT: printf("_meta:in"); break;
|
case OPC_META_INPUT: printf("_meta:in"); break;
|
||||||
case OPC_META_FO: printf("_meta:fo"); break;
|
case OPC_META_SPLIT: printf("_meta:split"); break;
|
||||||
case OPC_META_FI: printf("_meta:fi"); break;
|
case OPC_META_COLLECT: printf("_meta:collect"); break;
|
||||||
case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break;
|
case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break;
|
||||||
|
|
||||||
/* shouldn't hit here.. just for debugging: */
|
/* shouldn't hit here.. just for debugging: */
|
||||||
|
@ -237,8 +237,8 @@ print_instr(struct ir3_instruction *instr, int lvl)
|
||||||
printf("]");
|
printf("]");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instr->opc == OPC_META_FO) {
|
if (instr->opc == OPC_META_SPLIT) {
|
||||||
printf(", off=%d", instr->fo.off);
|
printf(", off=%d", instr->split.off);
|
||||||
} else if (instr->opc == OPC_META_TEX_PREFETCH) {
|
} else if (instr->opc == OPC_META_TEX_PREFETCH) {
|
||||||
printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
|
printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
|
||||||
instr->prefetch.samp, instr->prefetch.input_offset);
|
instr->prefetch.samp, instr->prefetch.input_offset);
|
||||||
|
|
|
@ -421,10 +421,10 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
||||||
return id->defn;
|
return id->defn;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instr->opc == OPC_META_FI) {
|
if (instr->opc == OPC_META_COLLECT) {
|
||||||
/* What about the case where collect is subset of array, we
|
/* What about the case where collect is subset of array, we
|
||||||
* need to find the distance between where actual array starts
|
* need to find the distance between where actual array starts
|
||||||
* and fanin.. that probably doesn't happen currently.
|
* and collect.. that probably doesn't happen currently.
|
||||||
*/
|
*/
|
||||||
struct ir3_register *src;
|
struct ir3_register *src;
|
||||||
int dsz, doff;
|
int dsz, doff;
|
||||||
|
@ -454,7 +454,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
||||||
|
|
||||||
/* by definition, the entire sequence forms one linked list
|
/* by definition, the entire sequence forms one linked list
|
||||||
* of single scalar register nodes (even if some of them may
|
* of single scalar register nodes (even if some of them may
|
||||||
* be fanouts from a texture sample (for example) instr. We
|
* be splits from a texture sample (for example) instr. We
|
||||||
* just need to walk the list finding the first element of
|
* just need to walk the list finding the first element of
|
||||||
* the group defined (lowest ip)
|
* the group defined (lowest ip)
|
||||||
*/
|
*/
|
||||||
|
@ -480,7 +480,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
||||||
} else {
|
} else {
|
||||||
/* second case is looking directly at the instruction which
|
/* second case is looking directly at the instruction which
|
||||||
* produces multiple values (eg, texture sample), rather
|
* produces multiple values (eg, texture sample), rather
|
||||||
* than the fanout nodes that point back to that instruction.
|
* than the split nodes that point back to that instruction.
|
||||||
* This isn't quite right, because it may be part of a larger
|
* This isn't quite right, because it may be part of a larger
|
||||||
* group, such as:
|
* group, such as:
|
||||||
*
|
*
|
||||||
|
@ -500,7 +500,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
||||||
d = instr;
|
d = instr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (d->opc == OPC_META_FO) {
|
if (d->opc == OPC_META_SPLIT) {
|
||||||
struct ir3_instruction *dd;
|
struct ir3_instruction *dd;
|
||||||
int dsz, doff;
|
int dsz, doff;
|
||||||
|
|
||||||
|
@ -511,13 +511,13 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
||||||
|
|
||||||
*sz = MAX2(*sz, dsz);
|
*sz = MAX2(*sz, dsz);
|
||||||
|
|
||||||
if (instr->opc == OPC_META_FO)
|
if (instr->opc == OPC_META_SPLIT)
|
||||||
*off = MAX2(*off, instr->fo.off);
|
*off = MAX2(*off, instr->split.off);
|
||||||
|
|
||||||
d = dd;
|
d = dd;
|
||||||
}
|
}
|
||||||
|
|
||||||
debug_assert(d->opc != OPC_META_FO);
|
debug_assert(d->opc != OPC_META_SPLIT);
|
||||||
|
|
||||||
id->defn = d;
|
id->defn = d;
|
||||||
id->sz = *sz;
|
id->sz = *sz;
|
||||||
|
@ -707,16 +707,16 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||||
|
|
||||||
/* There are a couple special cases to deal with here:
|
/* There are a couple special cases to deal with here:
|
||||||
*
|
*
|
||||||
* fanout: used to split values from a higher class to a lower
|
* split: used to split values from a higher class to a lower
|
||||||
* class, for example split the results of a texture fetch
|
* class, for example split the results of a texture fetch
|
||||||
* into individual scalar values; We skip over these from
|
* into individual scalar values; We skip over these from
|
||||||
* a 'def' perspective, and for a 'use' we walk the chain
|
* a 'def' perspective, and for a 'use' we walk the chain
|
||||||
* up to the defining instruction.
|
* up to the defining instruction.
|
||||||
*
|
*
|
||||||
* fanin: used to collect values from lower class and assemble
|
* collect: used to collect values from lower class and assemble
|
||||||
* them together into a higher class, for example arguments
|
* them together into a higher class, for example arguments
|
||||||
* to texture sample instructions; We consider these to be
|
* to texture sample instructions; We consider these to be
|
||||||
* defined at the earliest fanin source.
|
* defined at the earliest collect source.
|
||||||
*
|
*
|
||||||
* Most of this is handled in the get_definer() helper.
|
* Most of this is handled in the get_definer() helper.
|
||||||
*
|
*
|
||||||
|
|
|
@ -79,7 +79,7 @@ unuse_each_src(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
||||||
continue;
|
continue;
|
||||||
if (instr->block != src->block)
|
if (instr->block != src->block)
|
||||||
continue;
|
continue;
|
||||||
if ((src->opc == OPC_META_FI) || (src->opc == OPC_META_FO)) {
|
if ((src->opc == OPC_META_COLLECT) || (src->opc == OPC_META_SPLIT)) {
|
||||||
unuse_each_src(ctx, src);
|
unuse_each_src(ctx, src);
|
||||||
} else {
|
} else {
|
||||||
debug_assert(src->use_count > 0);
|
debug_assert(src->use_count > 0);
|
||||||
|
@ -133,7 +133,7 @@ use_each_src(struct ir3_instruction *instr)
|
||||||
static void
|
static void
|
||||||
use_instr(struct ir3_instruction *instr)
|
use_instr(struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO)) {
|
if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT)) {
|
||||||
use_each_src(instr);
|
use_each_src(instr);
|
||||||
} else {
|
} else {
|
||||||
instr->use_count++;
|
instr->use_count++;
|
||||||
|
@ -143,7 +143,7 @@ use_instr(struct ir3_instruction *instr)
|
||||||
static void
|
static void
|
||||||
update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
update_live_values(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
|
if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ctx->live_values += dest_regs(instr);
|
ctx->live_values += dest_regs(instr);
|
||||||
|
@ -161,7 +161,7 @@ update_use_count(struct ir3 *ir)
|
||||||
|
|
||||||
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
|
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
|
||||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||||
if ((instr->opc == OPC_META_FI) || (instr->opc == OPC_META_FO))
|
if ((instr->opc == OPC_META_COLLECT) || (instr->opc == OPC_META_SPLIT))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
use_each_src(instr);
|
use_each_src(instr);
|
||||||
|
@ -542,15 +542,15 @@ live_effect(struct ir3_instruction *instr)
|
||||||
if (instr->block != src->block)
|
if (instr->block != src->block)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* for fanout/split, just pass things along to the real src: */
|
/* for split, just pass things along to the real src: */
|
||||||
if (src->opc == OPC_META_FO)
|
if (src->opc == OPC_META_SPLIT)
|
||||||
src = ssa(src->regs[1]);
|
src = ssa(src->regs[1]);
|
||||||
|
|
||||||
/* for fanin/collect, if this is the last use of *each* src,
|
/* for collect, if this is the last use of *each* src,
|
||||||
* then it will decrease the live values, since RA treats
|
* then it will decrease the live values, since RA treats
|
||||||
* them as a whole:
|
* them as a whole:
|
||||||
*/
|
*/
|
||||||
if (src->opc == OPC_META_FI) {
|
if (src->opc == OPC_META_COLLECT) {
|
||||||
struct ir3_instruction *src2;
|
struct ir3_instruction *src2;
|
||||||
bool last_use = true;
|
bool last_use = true;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue