mesa/src/freedreno/ir3/ir3_ra.h

/*
 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors:
 *    Rob Clark <robclark@freedesktop.org>
 */

#ifndef IR3_RA_H_
#define IR3_RA_H_

#include "util/bitset.h"


static const unsigned class_sizes[] = {
	1, 2, 3, 4,
	4 + 4, /* txd + 1d/2d */
	4 + 6, /* txd + 3d */
};
#define class_count ARRAY_SIZE(class_sizes)

static const unsigned half_class_sizes[] = {
	1, 2, 3, 4,
};
#define half_class_count  ARRAY_SIZE(half_class_sizes)

/* seems to just be used for compute shaders?  Seems like vec1 and vec3
 * are sufficient (for now?)
 */
static const unsigned high_class_sizes[] = {
	1, 3,
};
#define high_class_count ARRAY_SIZE(high_class_sizes)

#define total_class_count (class_count + half_class_count + high_class_count)

/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
#define NUM_REGS             (4 * 48)  /* r0 to r47 */
#define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
#define FIRST_HIGH_REG       (4 * 48)
/* Number of virtual regs in a given class: */

static inline unsigned CLASS_REGS(unsigned i)
{
	assert(i < class_count);

	return (NUM_REGS - (class_sizes[i] - 1));
}

static inline unsigned HALF_CLASS_REGS(unsigned i)
{
	assert(i < half_class_count);

	return (NUM_REGS - (half_class_sizes[i] - 1));
}

static inline unsigned HIGH_CLASS_REGS(unsigned i)
{
	assert(i < high_class_count);

	return (NUM_HIGH_REGS - (high_class_sizes[i] - 1));
}

#define HALF_OFFSET          (class_count)
#define HIGH_OFFSET          (class_count + half_class_count)

/* register-set, created one time, used for all shaders: */
struct ir3_ra_reg_set {
	struct ra_regs *regs;
	unsigned int classes[class_count];
	unsigned int half_classes[half_class_count];
	unsigned int high_classes[high_class_count];

	/* pre-fetched tex dst is limited, on current gens to regs
	 * 0x3f and below.  An additional register class, with one
	 * vreg, that is setup to conflict with any regs above that
	 * limit.
	 */
	unsigned prefetch_exclude_class;
	unsigned prefetch_exclude_reg;

	/* The virtual register space flattens out all the classes,
	 * starting with full, followed by half and then high, ie:
	 *
	 *   scalar full  (starting at zero)
	 *   vec2 full
	 *   vec3 full
	 *   ...
	 *   vecN full
	 *   scalar half  (starting at first_half_reg)
	 *   vec2 half
	 *   ...
	 *   vecN half
	 *   scalar high  (starting at first_high_reg)
	 *   ...
	 *   vecN high
	 *
	 */
	unsigned first_half_reg, first_high_reg;

	/* maps flat virtual register space to base gpr: */
	uint16_t *ra_reg_to_gpr;
	/* maps cls,gpr to flat virtual register space: */
	uint16_t **gpr_to_ra_reg;
};

/* additional block-data (per-block) */
struct ir3_ra_block_data {
	BITSET_WORD *def;        /* variables defined before used in block */
	BITSET_WORD *use;        /* variables used before defined in block */
	BITSET_WORD *livein;     /* which defs reach entry point of block */
	BITSET_WORD *liveout;    /* which defs reach exit point of block */
};

/* additional instruction-data (per-instruction) */
struct ir3_ra_instr_data {
	/* cached instruction 'definer' info: */
	struct ir3_instruction *defn;
	int off, sz, cls;
};

/* register-assign context, per-shader */
struct ir3_ra_ctx {
	struct ir3_shader_variant *v;
	struct ir3 *ir;

	struct ir3_ra_reg_set *set;
	struct ra_graph *g;

	/* Are we in the scalar assignment pass?  In this pass, all larger-
	 * than-vec1 vales have already been assigned and pre-colored, so
	 * we only consider scalar values.
	 */
	bool scalar_pass;

	unsigned alloc_count;
	unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
	unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
	unsigned prefetch_exclude_node;
	/* one per class, plus one slot for arrays: */
	unsigned class_alloc_count[total_class_count + 1];
	unsigned class_base[total_class_count + 1];
	unsigned instr_cnt;
	unsigned *def, *use;     /* def/use table */
	struct ir3_ra_instr_data *instrd;

	/* Mapping vreg name back to instruction, used select reg callback: */
	struct hash_table *name_to_instr;

	/* Tracking for select_reg callback */
	unsigned start_search_reg;
	unsigned max_target;

	/* Temporary buffer for def/use iterators
	 *
	 * The worst case should probably be an array w/ relative access (ie.
	 * all elements are def'd or use'd), and that can't be larger than
	 * the number of registers.
	 *
	 * NOTE we could declare this on the stack if needed, but I don't
	 * think there is a need for nested iterators.
	 */
	unsigned namebuf[NUM_REGS];
	unsigned namecnt, nameidx;
};

static inline int
ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
{
	unsigned name;
	debug_assert(id->cls >= 0);
	debug_assert(id->cls < total_class_count);  /* we shouldn't get arrays here.. */
	name = ctx->class_base[id->cls] + id->defn->name;
	debug_assert(name < ctx->alloc_count);
	return name;
}

/* Get the scalar name of the n'th component of an instruction dst: */
static inline int
scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
{
	if (ctx->scalar_pass) {
		if (instr->opc == OPC_META_SPLIT) {
			debug_assert(n == 0);     /* split results in a scalar */
			struct ir3_instruction *src = instr->regs[1]->instr;
			return scalar_name(ctx, src, instr->split.off);
		} else if (instr->opc == OPC_META_COLLECT) {
			debug_assert(n < (instr->regs_count + 1));
			struct ir3_instruction *src = instr->regs[n + 1]->instr;
			return scalar_name(ctx, src, 0);
		}
	} else {
		debug_assert(n == 0);
	}

	return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
}

#define NO_NAME ~0

/*
 * Iterators to iterate the vreg names of an instructions def's and use's
 */

static inline unsigned
__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
	if (!instr)
		return 0;

	/* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
	if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
		return 0;

	/* in scalar pass, we aren't considering virtual register classes, ie.
	 * if an instruction writes a vec2, then it defines two different scalar
	 * register names.
	 */
	if (ctx->scalar_pass)
		return dest_regs(instr);

	return 1;
}

#define foreach_name_n(__name, __n, __ctx, __instr) \
	for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
	     (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)

#define foreach_name(__name, __ctx, __instr) \
	foreach_name_n(__name, __n, __ctx, __instr)

static inline unsigned
__ra_itr_pop(struct ir3_ra_ctx *ctx)
{
	if (ctx->nameidx < ctx->namecnt)
		return ctx->namebuf[ctx->nameidx++];
	return NO_NAME;
}

static inline void
__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
{
	assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
	ctx->namebuf[ctx->namecnt++] = name;
}

static inline unsigned
__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
	/* nested use is not supported: */
	assert(ctx->namecnt == ctx->nameidx);

	ctx->namecnt = ctx->nameidx = 0;

	if (!writes_gpr(instr))
		return NO_NAME;

	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
	struct ir3_register *dst = instr->regs[0];

	if (dst->flags & IR3_REG_ARRAY) {
		struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);

		/* indirect write is treated like a write to all array
		 * elements, since we don't know which one is actually
		 * written:
		 */
		if (dst->flags & IR3_REG_RELATIV) {
			for (unsigned i = 0; i < arr->length; i++) {
				__ra_itr_push(ctx, arr->base + i);
			}
		} else {
			__ra_itr_push(ctx, arr->base + dst->array.offset);
			debug_assert(dst->array.offset < arr->length);
		}
	} else if (id->defn == instr) {
		foreach_name_n (name, i, ctx, instr) {
			/* tex instructions actually have a wrmask, and
			 * don't touch masked out components.  We can't do
			 * anything useful about that in the first pass,
			 * but in the scalar pass we can realize these
			 * registers are available:
			 */
			if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
					!(instr->regs[0]->wrmask & (1 << i)))
				continue;
			__ra_itr_push(ctx, name);
		}
	}

	return __ra_itr_pop(ctx);
}

static inline unsigned
__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
	/* nested use is not supported: */
	assert(ctx->namecnt == ctx->nameidx);

	ctx->namecnt = ctx->nameidx = 0;

	foreach_src (reg, instr) {
		if (reg->flags & IR3_REG_ARRAY) {
			struct ir3_array *arr =
				ir3_lookup_array(ctx->ir, reg->array.id);

			/* indirect read is treated like a read from all array
			 * elements, since we don't know which one is actually
			 * read:
			 */
			if (reg->flags & IR3_REG_RELATIV) {
				for (unsigned i = 0; i < arr->length; i++) {
					__ra_itr_push(ctx, arr->base + i);
				}
			} else {
				__ra_itr_push(ctx, arr->base + reg->array.offset);
				debug_assert(reg->array.offset < arr->length);
			}
		} else {
			foreach_name_n (name, i, ctx, reg->instr) {
				/* split takes a src w/ wrmask potentially greater
				 * than 0x1, but it really only cares about a single
				 * component.  This shows up in splits coming out of
				 * a tex instruction w/ wrmask=.z, for example.
				 */
				if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
						!(i == instr->split.off))
					continue;
				__ra_itr_push(ctx, name);
			}
		}
	}

	return __ra_itr_pop(ctx);
}

#define foreach_def(__name, __ctx, __instr) \
	for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
	     __name != NO_NAME; __name = __ra_itr_pop(__ctx))

#define foreach_use(__name, __ctx, __instr) \
	for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
	     __name != NO_NAME; __name = __ra_itr_pop(__ctx))

int ra_size_to_class(unsigned sz, bool half, bool high);
int ra_class_to_size(unsigned class, bool *half, bool *high);

#endif  /* IR3_RA_H_ */
freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00			`/*`
			`* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a`
			`* copy of this software and associated documentation files (the "Software"),`
			`* to deal in the Software without restriction, including without limitation`
			`* the rights to use, copy, modify, merge, publish, distribute, sublicense,`
			`* and/or sell copies of the Software, and to permit persons to whom the`
			`* Software is furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice (including the next`
			`* paragraph) shall be included in all copies or substantial portions of the`
			`* Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL`
			`* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE`
			`* SOFTWARE.`
			`*`
			`* Authors:`
			`* Rob Clark <robclark@freedesktop.org>`
			`*/`

			`#ifndef IR3_RA_H_`
			`#define IR3_RA_H_`

			`#include "util/bitset.h"`


			`static const unsigned class_sizes[] = {`
			`1, 2, 3, 4,`
			`4 + 4, /* txd + 1d/2d */`
			`4 + 6, /* txd + 3d */`
			`};`
			`#define class_count ARRAY_SIZE(class_sizes)`

			`static const unsigned half_class_sizes[] = {`
			`1, 2, 3, 4,`
			`};`
			`#define half_class_count ARRAY_SIZE(half_class_sizes)`

			`/* seems to just be used for compute shaders? Seems like vec1 and vec3`
			`* are sufficient (for now?)`
			`*/`
			`static const unsigned high_class_sizes[] = {`
			`1, 3,`
			`};`
			`#define high_class_count ARRAY_SIZE(high_class_sizes)`

			`#define total_class_count (class_count + half_class_count + high_class_count)`

			`/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */`
			`#define NUM_REGS (4 * 48) /* r0 to r47 */`
			`#define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */`
			`#define FIRST_HIGH_REG (4 * 48)`
			`/* Number of virtual regs in a given class: */`
freedreno/ir3: Fix sz vs class confusion Add bounds checking to make sure we don't silently access out of bounds again. Fixes: 90f7d12236c ("freedreno/ir3/ra: pick higher numbered scalars in first pass") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4503> 2020-04-09 23:57:41 +01:00
			`static inline unsigned CLASS_REGS(unsigned i)`
			`{`
			`assert(i < class_count);`

			`return (NUM_REGS - (class_sizes[i] - 1));`
			`}`

			`static inline unsigned HALF_CLASS_REGS(unsigned i)`
			`{`
			`assert(i < half_class_count);`

			`return (NUM_REGS - (half_class_sizes[i] - 1));`
			`}`

			`static inline unsigned HIGH_CLASS_REGS(unsigned i)`
			`{`
			`assert(i < high_class_count);`

			`return (NUM_HIGH_REGS - (high_class_sizes[i] - 1));`
			`}`
freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00
			`#define HALF_OFFSET (class_count)`
			`#define HIGH_OFFSET (class_count + half_class_count)`

			`/* register-set, created one time, used for all shaders: */`
			`struct ir3_ra_reg_set {`
			`struct ra_regs *regs;`
			`unsigned int classes[class_count];`
			`unsigned int half_classes[half_class_count];`
			`unsigned int high_classes[high_class_count];`
freedreno/ir3/ra: split building regs/classes and conflicts Split out the construction of registers and classes (which is the same on all gens) from setting up conflicts. Prep to re-work how we setup conflicts on a6xx+ which merged half/full register file. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-26 17:25:04 +00:00
freedreno/ir3: limit pre-fetched tex dest Teach RA to setup additional interference to prevent textures fetched before the FS starts from ending up in a register that is too high to encode. Fixes mis-rendering in multiple playcanv.as webgl apps. Note that the regression was not actually 733bee57eb8's fault, but that was the commit that exposed the problem. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3108 Fixes: 733bee57eb8 ("glsl: lower samplers with highp coordinates correctly") Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5431> 2020-06-11 20:03:03 +01:00			`/* pre-fetched tex dst is limited, on current gens to regs`
			`* 0x3f and below. An additional register class, with one`
			`* vreg, that is setup to conflict with any regs above that`
			`* limit.`
			`*/`
			`unsigned prefetch_exclude_class;`
			`unsigned prefetch_exclude_reg;`

freedreno/ir3/ra: split building regs/classes and conflicts Split out the construction of registers and classes (which is the same on all gens) from setting up conflicts. Prep to re-work how we setup conflicts on a6xx+ which merged half/full register file. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-26 17:25:04 +00:00			`/* The virtual register space flattens out all the classes,`
			`* starting with full, followed by half and then high, ie:`
			`*`
			`* scalar full (starting at zero)`
			`* vec2 full`
			`* vec3 full`
			`* ...`
			`* vecN full`
			`* scalar half (starting at first_half_reg)`
			`* vec2 half`
			`* ...`
			`* vecN half`
			`* scalar high (starting at first_high_reg)`
			`* ...`
			`* vecN high`
			`*`
			`*/`
			`unsigned first_half_reg, first_high_reg;`

freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00			`/* maps flat virtual register space to base gpr: */`
			`uint16_t *ra_reg_to_gpr;`
			`/* maps cls,gpr to flat virtual register space: */`
			`uint16_t **gpr_to_ra_reg;`
			`};`

			`/* additional block-data (per-block) */`
			`struct ir3_ra_block_data {`
			`BITSET_WORD def; / variables defined before used in block */`
			`BITSET_WORD use; / variables used before defined in block */`
			`BITSET_WORD livein; / which defs reach entry point of block */`
			`BITSET_WORD liveout; / which defs reach exit point of block */`
			`};`

			`/* additional instruction-data (per-instruction) */`
			`struct ir3_ra_instr_data {`
			`/* cached instruction 'definer' info: */`
			`struct ir3_instruction *defn;`
			`int off, sz, cls;`
			`};`

			`/* register-assign context, per-shader */`
			`struct ir3_ra_ctx {`
			`struct ir3_shader_variant *v;`
			`struct ir3 *ir;`

			`struct ir3_ra_reg_set *set;`
			`struct ra_graph *g;`

			`/* Are we in the scalar assignment pass? In this pass, all larger-`
			`* than-vec1 vales have already been assigned and pre-colored, so`
			`* we only consider scalar values.`
			`*/`
			`bool scalar_pass;`

			`unsigned alloc_count;`
freedreno/ir3: Fix register allocation assertion failures. We were failing to tell the allocator about the restriction that scalar texture instructions (allocated as scalar regs) couldn't be allocated such that the start of the full unwritemasked vector started before r0. There was a patch in select_reg_callback on a6xx that tried to work around that, but you could still end up backed into a corner you shouldn't be because we didn't tell the RA what it needed. Fixes compiler assertion failures on a300-a400's blit_z shader, used for Z32F gmem blits. Looks like as a result we get tighter register allocation but more nops: instructions in affected programs: 757945 -> 760356 (0.32%) nops in affected programs: 317983 -> 320468 (0.78%) non-nops in affected programs: 27525 -> 27451 (-0.27%) mov in affected programs: 3098 -> 3023 (-2.42%) dwords in affected programs: 109664 -> 110656 (0.90%) last-baryf in affected programs: 112701 -> 112847 (0.13%) full in affected programs: 4326 -> 4011 (-7.28%) sstall in affected programs: 120550 -> 120836 (0.24%) (ss) in affected programs: 13939 -> 13918 (-0.15%) (sy) in affected programs: 3006 -> 2786 (-7.32%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4562> 2020-04-21 21:26:14 +01:00			`unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */`
freedreno/ir3: limit pre-fetched tex dest Teach RA to setup additional interference to prevent textures fetched before the FS starts from ending up in a register that is too high to encode. Fixes mis-rendering in multiple playcanv.as webgl apps. Note that the regression was not actually 733bee57eb8's fault, but that was the commit that exposed the problem. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3108 Fixes: 733bee57eb8 ("glsl: lower samplers with highp coordinates correctly") Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5431> 2020-06-11 20:03:03 +01:00			`unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */`
			`unsigned prefetch_exclude_node;`
freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00			`/* one per class, plus one slot for arrays: */`
			`unsigned class_alloc_count[total_class_count + 1];`
			`unsigned class_base[total_class_count + 1];`
			`unsigned instr_cnt;`
			`unsigned def, use; /* def/use table */`
			`struct ir3_ra_instr_data *instrd;`

			`/* Mapping vreg name back to instruction, used select reg callback: */`
			`struct hash_table *name_to_instr;`

			`/* Tracking for select_reg callback */`
			`unsigned start_search_reg;`
			`unsigned max_target;`
freedreno/ir3/ra: add def/use iterators Decouple the messy logic of figuring out vreg names defined/used by an instruction from the logic of what to do about it by introducing iterators. There is still some array vs ssa special casing in ra_block_compute_live_ranges(), but less than before. And this will avoid introducing a second copy of the def/use logic in a following patch which uses the liveranges to calculate the maximum # of live values (which is the optimal target for max physical register window to round-robin within). Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 21:44:44 +00:00
			`/* Temporary buffer for def/use iterators`
			`*`
			`* The worst case should probably be an array w/ relative access (ie.`
			`* all elements are def'd or use'd), and that can't be larger than`
			`* the number of registers.`
			`*`
			`* NOTE we could declare this on the stack if needed, but I don't`
			`* think there is a need for nested iterators.`
			`*/`
			`unsigned namebuf[NUM_REGS];`
			`unsigned namecnt, nameidx;`
freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00			`};`

			`static inline int`
			`ra_name(struct ir3_ra_ctx ctx, struct ir3_ra_instr_data id)`
			`{`
			`unsigned name;`
			`debug_assert(id->cls >= 0);`
			`debug_assert(id->cls < total_class_count); /* we shouldn't get arrays here.. */`
			`name = ctx->class_base[id->cls] + id->defn->name;`
			`debug_assert(name < ctx->alloc_count);`
			`return name;`
			`}`

			`/* Get the scalar name of the n'th component of an instruction dst: */`
			`static inline int`
			`scalar_name(struct ir3_ra_ctx ctx, struct ir3_instruction instr, unsigned n)`
			`{`
			`if (ctx->scalar_pass) {`
			`if (instr->opc == OPC_META_SPLIT) {`
			`debug_assert(n == 0); /* split results in a scalar */`
			`struct ir3_instruction *src = instr->regs[1]->instr;`
			`return scalar_name(ctx, src, instr->split.off);`
			`} else if (instr->opc == OPC_META_COLLECT) {`
			`debug_assert(n < (instr->regs_count + 1));`
			`struct ir3_instruction *src = instr->regs[n + 1]->instr;`
			`return scalar_name(ctx, src, 0);`
			`}`
			`} else {`
			`debug_assert(n == 0);`
			`}`

			`return ra_name(ctx, &ctx->instrd[instr->ip]) + n;`
			`}`

freedreno/ir3/ra: add def/use iterators Decouple the messy logic of figuring out vreg names defined/used by an instruction from the logic of what to do about it by introducing iterators. There is still some array vs ssa special casing in ra_block_compute_live_ranges(), but less than before. And this will avoid introducing a second copy of the def/use logic in a following patch which uses the liveranges to calculate the maximum # of live values (which is the optimal target for max physical register window to round-robin within). Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 21:44:44 +00:00			`#define NO_NAME ~0`

			`/*`
			`* Iterators to iterate the vreg names of an instructions def's and use's`
			`*/`

			`static inline unsigned`
			`__ra_name_cnt(struct ir3_ra_ctx ctx, struct ir3_instruction instr)`
			`{`
			`if (!instr)`
			`return 0;`

			`/* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */`
			`if (!writes_gpr(instr) \|\| (instr->regs[0]->flags & IR3_REG_ARRAY))`
			`return 0;`

			`/* in scalar pass, we aren't considering virtual register classes, ie.`
			`* if an instruction writes a vec2, then it defines two different scalar`
			`* register names.`
			`*/`
			`if (ctx->scalar_pass)`
			`return dest_regs(instr);`

			`return 1;`
			`}`

			`#define foreach_name_n(__name, __n, __ctx, __instr) \`
			`for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \`
			`(__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)`

			`#define foreach_name(__name, __ctx, __instr) \`
			`foreach_name_n(__name, __n, __ctx, __instr)`

			`static inline unsigned`
			`__ra_itr_pop(struct ir3_ra_ctx *ctx)`
			`{`
			`if (ctx->nameidx < ctx->namecnt)`
			`return ctx->namebuf[ctx->nameidx++];`
			`return NO_NAME;`
			`}`

			`static inline void`
			`__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)`
			`{`
			`assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));`
			`ctx->namebuf[ctx->namecnt++] = name;`
			`}`

			`static inline unsigned`
			`__ra_init_def_itr(struct ir3_ra_ctx ctx, struct ir3_instruction instr)`
			`{`
			`/* nested use is not supported: */`
			`assert(ctx->namecnt == ctx->nameidx);`

			`ctx->namecnt = ctx->nameidx = 0;`

			`if (!writes_gpr(instr))`
			`return NO_NAME;`

			`struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];`
			`struct ir3_register *dst = instr->regs[0];`

			`if (dst->flags & IR3_REG_ARRAY) {`
			`struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);`

			`/* indirect write is treated like a write to all array`
			`* elements, since we don't know which one is actually`
			`* written:`
			`*/`
			`if (dst->flags & IR3_REG_RELATIV) {`
			`for (unsigned i = 0; i < arr->length; i++) {`
			`__ra_itr_push(ctx, arr->base + i);`
			`}`
			`} else {`
			`__ra_itr_push(ctx, arr->base + dst->array.offset);`
			`debug_assert(dst->array.offset < arr->length);`
			`}`
			`} else if (id->defn == instr) {`
			`foreach_name_n (name, i, ctx, instr) {`
			`/* tex instructions actually have a wrmask, and`
			`* don't touch masked out components. We can't do`
			`* anything useful about that in the first pass,`
			`* but in the scalar pass we can realize these`
			`* registers are available:`
			`*/`
			`if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&`
			`!(instr->regs[0]->wrmask & (1 << i)))`
			`continue;`
			`__ra_itr_push(ctx, name);`
			`}`
			`}`

			`return __ra_itr_pop(ctx);`
			`}`

			`static inline unsigned`
			`__ra_init_use_itr(struct ir3_ra_ctx ctx, struct ir3_instruction instr)`
			`{`
			`/* nested use is not supported: */`
			`assert(ctx->namecnt == ctx->nameidx);`

			`ctx->namecnt = ctx->nameidx = 0;`

			`foreach_src (reg, instr) {`
			`if (reg->flags & IR3_REG_ARRAY) {`
			`struct ir3_array *arr =`
			`ir3_lookup_array(ctx->ir, reg->array.id);`

			`/* indirect read is treated like a read from all array`
			`* elements, since we don't know which one is actually`
			`* read:`
			`*/`
			`if (reg->flags & IR3_REG_RELATIV) {`
			`for (unsigned i = 0; i < arr->length; i++) {`
			`__ra_itr_push(ctx, arr->base + i);`
			`}`
			`} else {`
			`__ra_itr_push(ctx, arr->base + reg->array.offset);`
			`debug_assert(reg->array.offset < arr->length);`
			`}`
			`} else {`
			`foreach_name_n (name, i, ctx, reg->instr) {`
			`/* split takes a src w/ wrmask potentially greater`
			`* than 0x1, but it really only cares about a single`
			`* component. This shows up in splits coming out of`
			`* a tex instruction w/ wrmask=.z, for example.`
			`*/`
			`if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&`
			`!(i == instr->split.off))`
			`continue;`
			`__ra_itr_push(ctx, name);`
			`}`
			`}`
			`}`

			`return __ra_itr_pop(ctx);`
			`}`

			`#define foreach_def(__name, __ctx, __instr) \`
			`for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \`
			`__name != NO_NAME; __name = __ra_itr_pop(__ctx))`

			`#define foreach_use(__name, __ctx, __instr) \`
			`for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \`
			`__name != NO_NAME; __name = __ra_itr_pop(__ctx))`

freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00			`int ra_size_to_class(unsigned sz, bool half, bool high);`
freedreno/ir3/ra: pick higher numbered scalars in first pass Since we are re-assigning the scalars anyways in the second pass, assign them to the highest free reg in the first pass (rather than lowest) to allow packing vecN regs as low as possible. Note this required some changes specifically for tex instructions with a single component writemask that is not necessarily .x, as previously these would get assigned in the first RA pass, and since they are still scalar, we'd end up w/ some r47.* and other similarly way-to-high assignments after the 2nd pass. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-23 17:25:38 +00:00			`int ra_class_to_size(unsigned class, bool half, bool high);`
freedreno/ir3/ra: split-up Split out regset and shared header, since the RA pass is already getting large-ish. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272> 2020-03-21 17:33:48 +00:00
			`#endif /* IR3_RA_H_ */`