From 64cdc1311b9385eb3611061dcbfe5ad8a183a896 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Tue, 30 Jun 2020 11:37:54 +0200 Subject: [PATCH] etnaviv: move ra into own file Signed-off-by: Christian Gmeiner Acked-by: Jonathan Marek Part-of: --- src/gallium/drivers/etnaviv/Makefile.sources | 1 + .../drivers/etnaviv/etnaviv_compiler_nir.c | 308 +----------------- .../drivers/etnaviv/etnaviv_compiler_nir.h | 91 +++++- .../drivers/etnaviv/etnaviv_compiler_nir_ra.c | 248 ++++++++++++++ src/gallium/drivers/etnaviv/meson.build | 1 + 5 files changed, 342 insertions(+), 307 deletions(-) create mode 100644 src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c diff --git a/src/gallium/drivers/etnaviv/Makefile.sources b/src/gallium/drivers/etnaviv/Makefile.sources index 5835dea69bb..f765088117d 100644 --- a/src/gallium/drivers/etnaviv/Makefile.sources +++ b/src/gallium/drivers/etnaviv/Makefile.sources @@ -20,6 +20,7 @@ C_SOURCES := \ etnaviv_compiler_nir.c \ etnaviv_compiler_nir_emit.c \ etnaviv_compiler_nir_liveness.c \ + etnaviv_compiler_nir_ra.c \ etnaviv_compiler_tgsi.c \ etnaviv_context.c \ etnaviv_context.h \ diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index eafb82680f8..63b5152a4a6 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -277,77 +277,6 @@ const_src(struct etna_compile *c, nir_const_value *value, unsigned num_component return SRC_CONST(i - 1, swiz); } -/* Swizzles and write masks can be used to layer virtual non-interfering - * registers on top of the real VEC4 registers. For example, the virtual - * VEC3_XYZ register and the virtual SCALAR_W register that use the same - * physical VEC4 base register do not interfere. - */ -enum reg_class { - REG_CLASS_VIRT_SCALAR, - REG_CLASS_VIRT_VEC2, - REG_CLASS_VIRT_VEC3, - REG_CLASS_VEC4, - /* special vec2 class for fast transcendentals, limited to XY or ZW */ - REG_CLASS_VIRT_VEC2T, - /* special classes for LOAD - contiguous components */ - REG_CLASS_VIRT_VEC2C, - REG_CLASS_VIRT_VEC3C, - NUM_REG_CLASSES, -}; - -enum reg_type { - REG_TYPE_VEC4, - REG_TYPE_VIRT_VEC3_XYZ, - REG_TYPE_VIRT_VEC3_XYW, - REG_TYPE_VIRT_VEC3_XZW, - REG_TYPE_VIRT_VEC3_YZW, - REG_TYPE_VIRT_VEC2_XY, - REG_TYPE_VIRT_VEC2_XZ, - REG_TYPE_VIRT_VEC2_XW, - REG_TYPE_VIRT_VEC2_YZ, - REG_TYPE_VIRT_VEC2_YW, - REG_TYPE_VIRT_VEC2_ZW, - REG_TYPE_VIRT_SCALAR_X, - REG_TYPE_VIRT_SCALAR_Y, - REG_TYPE_VIRT_SCALAR_Z, - REG_TYPE_VIRT_SCALAR_W, - REG_TYPE_VIRT_VEC2T_XY, - REG_TYPE_VIRT_VEC2T_ZW, - REG_TYPE_VIRT_VEC2C_XY, - REG_TYPE_VIRT_VEC2C_YZ, - REG_TYPE_VIRT_VEC2C_ZW, - REG_TYPE_VIRT_VEC3C_XYZ, - REG_TYPE_VIRT_VEC3C_YZW, - NUM_REG_TYPES, -}; - -/* writemask when used as dest */ -static const uint8_t -reg_writemask[NUM_REG_TYPES] = { - [REG_TYPE_VEC4] = 0xf, - [REG_TYPE_VIRT_SCALAR_X] = 0x1, - [REG_TYPE_VIRT_SCALAR_Y] = 0x2, - [REG_TYPE_VIRT_VEC2_XY] = 0x3, - [REG_TYPE_VIRT_VEC2T_XY] = 0x3, - [REG_TYPE_VIRT_VEC2C_XY] = 0x3, - [REG_TYPE_VIRT_SCALAR_Z] = 0x4, - [REG_TYPE_VIRT_VEC2_XZ] = 0x5, - [REG_TYPE_VIRT_VEC2_YZ] = 0x6, - [REG_TYPE_VIRT_VEC2C_YZ] = 0x6, - [REG_TYPE_VIRT_VEC3_XYZ] = 0x7, - [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7, - [REG_TYPE_VIRT_SCALAR_W] = 0x8, - [REG_TYPE_VIRT_VEC2_XW] = 0x9, - [REG_TYPE_VIRT_VEC2_YW] = 0xa, - [REG_TYPE_VIRT_VEC3_XYW] = 0xb, - [REG_TYPE_VIRT_VEC2_ZW] = 0xc, - [REG_TYPE_VIRT_VEC2T_ZW] = 0xc, - [REG_TYPE_VIRT_VEC2C_ZW] = 0xc, - [REG_TYPE_VIRT_VEC3_XZW] = 0xd, - [REG_TYPE_VIRT_VEC3_YZW] = 0xe, - [REG_TYPE_VIRT_VEC3C_YZW] = 0xe, -}; - /* how to swizzle when used as a src */ static const uint8_t reg_swiz[NUM_REG_TYPES] = { @@ -402,62 +331,6 @@ reg_dst_swiz[NUM_REG_TYPES] = { [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z), }; -static inline int reg_get_type(int virt_reg) -{ - return virt_reg % NUM_REG_TYPES; -} - -static inline int reg_get_base(struct etna_compile *c, int virt_reg) -{ - /* offset by 1 to avoid reserved position register */ - if (c->nir->info.stage == MESA_SHADER_FRAGMENT) - return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS; - return virt_reg / NUM_REG_TYPES; -} - -/* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base - * (fs registers are offset by 1 to avoid reserving r0) - */ -#define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z) - -static inline int reg_get_class(int virt_reg) -{ - switch (reg_get_type(virt_reg)) { - case REG_TYPE_VEC4: - return REG_CLASS_VEC4; - case REG_TYPE_VIRT_VEC3_XYZ: - case REG_TYPE_VIRT_VEC3_XYW: - case REG_TYPE_VIRT_VEC3_XZW: - case REG_TYPE_VIRT_VEC3_YZW: - return REG_CLASS_VIRT_VEC3; - case REG_TYPE_VIRT_VEC2_XY: - case REG_TYPE_VIRT_VEC2_XZ: - case REG_TYPE_VIRT_VEC2_XW: - case REG_TYPE_VIRT_VEC2_YZ: - case REG_TYPE_VIRT_VEC2_YW: - case REG_TYPE_VIRT_VEC2_ZW: - return REG_CLASS_VIRT_VEC2; - case REG_TYPE_VIRT_SCALAR_X: - case REG_TYPE_VIRT_SCALAR_Y: - case REG_TYPE_VIRT_SCALAR_Z: - case REG_TYPE_VIRT_SCALAR_W: - return REG_CLASS_VIRT_SCALAR; - case REG_TYPE_VIRT_VEC2T_XY: - case REG_TYPE_VIRT_VEC2T_ZW: - return REG_CLASS_VIRT_VEC2T; - case REG_TYPE_VIRT_VEC2C_XY: - case REG_TYPE_VIRT_VEC2C_YZ: - case REG_TYPE_VIRT_VEC2C_ZW: - return REG_CLASS_VIRT_VEC2C; - case REG_TYPE_VIRT_VEC3C_XYZ: - case REG_TYPE_VIRT_VEC3C_YZW: - return REG_CLASS_VIRT_VEC3C; - } - - assert(false); - return 0; -} - /* nir_src to allocated register */ static hw_src ra_src(struct etna_compile *c, nir_src *src) @@ -571,183 +444,6 @@ ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz) }; } -/* precomputed by register_allocate */ -static unsigned int *q_values[] = { - (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, }, - (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, }, - (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, }, - (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, }, - (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, }, - (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, }, - (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, }, -}; - -static void -ra_assign(struct etna_compile *c, nir_shader *shader) -{ - struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS * - NUM_REG_TYPES, false); - - /* classes always be created from index 0, so equal to the class enum - * which represents a register with (c+1) components - */ - for (int c = 0; c < NUM_REG_CLASSES; c++) - ra_alloc_reg_class(regs); - /* add each register of each class */ - for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++) - ra_class_add_reg(regs, reg_get_class(r), r); - /* set conflicts */ - for (int r = 0; r < ETNA_MAX_TEMPS; r++) { - for (int i = 0; i < NUM_REG_TYPES; i++) { - for (int j = 0; j < i; j++) { - if (reg_writemask[i] & reg_writemask[j]) { - ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i, - NUM_REG_TYPES * r + j); - } - } - } - } - ra_set_finalize(regs, q_values); - - nir_function_impl *impl = nir_shader_get_entrypoint(shader); - - /* liveness and interference */ - - nir_index_blocks(impl); - nir_index_ssa_defs(impl); - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) - instr->pass_flags = 0; - } - - /* this gives an approximation/upper limit on how many nodes are needed - * (some ssa values do not represent an allocated register) - */ - unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc; - unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes); - memset(live_map, 0xff, sizeof(unsigned) * max_nodes); - struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes); - - unsigned num_nodes = etna_live_defs(impl, defs, live_map); - struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes); - - /* set classes from num_components */ - for (unsigned i = 0; i < num_nodes; i++) { - nir_instr *instr = defs[i].instr; - nir_dest *dest = defs[i].dest; - unsigned comp = nir_dest_num_components(*dest) - 1; - - if (instr->type == nir_instr_type_alu && - c->specs->has_new_transcendentals) { - switch (nir_instr_as_alu(instr)->op) { - case nir_op_fdiv: - case nir_op_flog2: - case nir_op_fsin: - case nir_op_fcos: - assert(dest->is_ssa); - comp = REG_CLASS_VIRT_VEC2T; - default: - break; - } - } - - if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - /* can't have dst swizzle or sparse writemask on UBO loads */ - if (intr->intrinsic == nir_intrinsic_load_ubo) { - assert(dest == &intr->dest); - if (dest->ssa.num_components == 2) - comp = REG_CLASS_VIRT_VEC2C; - if (dest->ssa.num_components == 3) - comp = REG_CLASS_VIRT_VEC3C; - } - } - - ra_set_node_class(g, i, comp); - } - - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_dest *dest = dest_for_instr(instr); - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - unsigned reg; - - switch (intr->intrinsic) { - case nir_intrinsic_store_deref: { - /* don't want outputs to be swizzled - * TODO: better would be to set the type to X/XY/XYZ/XYZW - * TODO: what if fragcoord.z is read after writing fragdepth? - */ - nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); - unsigned index = live_map[src_index(impl, &intr->src[1])]; - - if (shader->info.stage == MESA_SHADER_FRAGMENT && - deref->var->data.location == FRAG_RESULT_DEPTH) { - ra_set_node_reg(g, index, REG_FRAG_DEPTH); - } else { - ra_set_node_class(g, index, REG_CLASS_VEC4); - } - } continue; - case nir_intrinsic_load_input: - reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) { - REG_TYPE_VIRT_SCALAR_X, - REG_TYPE_VIRT_VEC2_XY, - REG_TYPE_VIRT_VEC3_XYZ, - REG_TYPE_VEC4, - }[nir_dest_num_components(*dest) - 1]; - break; - case nir_intrinsic_load_instance_id: - reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y; - break; - default: - continue; - } - - ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg); - } - } - - /* add interference for intersecting live ranges */ - for (unsigned i = 0; i < num_nodes; i++) { - assert(defs[i].live_start < defs[i].live_end); - for (unsigned j = 0; j < i; j++) { - if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end) - continue; - ra_add_node_interference(g, i, j); - } - } - - ralloc_free(defs); - - /* Allocate registers */ - ASSERTED bool ok = ra_allocate(g); - assert(ok); - - c->g = g; - c->regs = regs; - c->live_map = live_map; - c->num_nodes = num_nodes; -} - -static unsigned -ra_finish(struct etna_compile *c) -{ - /* TODO: better way to get number of registers used? */ - unsigned j = 0; - for (unsigned i = 0; i < c->num_nodes; i++) { - j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1); - } - - ralloc_free(c->g); - ralloc_free(c->regs); - ralloc_free(c->live_map); - - return j; -} - static void emit_alu(struct etna_compile *c, nir_alu_instr * alu) { @@ -1265,11 +961,11 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) nir_convert_from_ssa(shader, true); nir_opt_dce(shader); - ra_assign(c, shader); + etna_ra_assign(c, shader); emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body); - *num_temps = ra_finish(c); + *num_temps = etna_ra_finish(c); *num_consts = c->const_count; return true; } diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h index 8ad67cb6a7e..4808767b640 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h @@ -30,7 +30,6 @@ #include "compiler/nir/nir.h" #include "etnaviv_asm.h" #include "etnaviv_compiler.h" -#include "util/register_allocate.h" struct etna_compile { nir_shader *nir; @@ -235,6 +234,96 @@ struct live_def { unsigned etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map); +/* Swizzles and write masks can be used to layer virtual non-interfering + * registers on top of the real VEC4 registers. For example, the virtual + * VEC3_XYZ register and the virtual SCALAR_W register that use the same + * physical VEC4 base register do not interfere. + */ +enum reg_class { + REG_CLASS_VIRT_SCALAR, + REG_CLASS_VIRT_VEC2, + REG_CLASS_VIRT_VEC3, + REG_CLASS_VEC4, + /* special vec2 class for fast transcendentals, limited to XY or ZW */ + REG_CLASS_VIRT_VEC2T, + /* special classes for LOAD - contiguous components */ + REG_CLASS_VIRT_VEC2C, + REG_CLASS_VIRT_VEC3C, + NUM_REG_CLASSES, +}; + +enum reg_type { + REG_TYPE_VEC4, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VIRT_VEC3_XYW, + REG_TYPE_VIRT_VEC3_XZW, + REG_TYPE_VIRT_VEC3_YZW, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC2_XZ, + REG_TYPE_VIRT_VEC2_XW, + REG_TYPE_VIRT_VEC2_YZ, + REG_TYPE_VIRT_VEC2_YW, + REG_TYPE_VIRT_VEC2_ZW, + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_SCALAR_Y, + REG_TYPE_VIRT_SCALAR_Z, + REG_TYPE_VIRT_SCALAR_W, + REG_TYPE_VIRT_VEC2T_XY, + REG_TYPE_VIRT_VEC2T_ZW, + REG_TYPE_VIRT_VEC2C_XY, + REG_TYPE_VIRT_VEC2C_YZ, + REG_TYPE_VIRT_VEC2C_ZW, + REG_TYPE_VIRT_VEC3C_XYZ, + REG_TYPE_VIRT_VEC3C_YZW, + NUM_REG_TYPES, +}; + +/* writemask when used as dest */ +static const uint8_t +reg_writemask[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = 0xf, + [REG_TYPE_VIRT_SCALAR_X] = 0x1, + [REG_TYPE_VIRT_SCALAR_Y] = 0x2, + [REG_TYPE_VIRT_VEC2_XY] = 0x3, + [REG_TYPE_VIRT_VEC2T_XY] = 0x3, + [REG_TYPE_VIRT_VEC2C_XY] = 0x3, + [REG_TYPE_VIRT_SCALAR_Z] = 0x4, + [REG_TYPE_VIRT_VEC2_XZ] = 0x5, + [REG_TYPE_VIRT_VEC2_YZ] = 0x6, + [REG_TYPE_VIRT_VEC2C_YZ] = 0x6, + [REG_TYPE_VIRT_VEC3_XYZ] = 0x7, + [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7, + [REG_TYPE_VIRT_SCALAR_W] = 0x8, + [REG_TYPE_VIRT_VEC2_XW] = 0x9, + [REG_TYPE_VIRT_VEC2_YW] = 0xa, + [REG_TYPE_VIRT_VEC3_XYW] = 0xb, + [REG_TYPE_VIRT_VEC2_ZW] = 0xc, + [REG_TYPE_VIRT_VEC2T_ZW] = 0xc, + [REG_TYPE_VIRT_VEC2C_ZW] = 0xc, + [REG_TYPE_VIRT_VEC3_XZW] = 0xd, + [REG_TYPE_VIRT_VEC3_YZW] = 0xe, + [REG_TYPE_VIRT_VEC3C_YZW] = 0xe, +}; + +static inline int reg_get_type(int virt_reg) +{ + return virt_reg % NUM_REG_TYPES; +} + +static inline int reg_get_base(struct etna_compile *c, int virt_reg) +{ + /* offset by 1 to avoid reserved position register */ + if (c->nir->info.stage == MESA_SHADER_FRAGMENT) + return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS; + return virt_reg / NUM_REG_TYPES; +} + +void +etna_ra_assign(struct etna_compile *c, nir_shader *shader); + +unsigned +etna_ra_finish(struct etna_compile *c); + static inline void emit_inst(struct etna_compile *c, struct etna_inst *inst) { diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c new file mode 100644 index 00000000000..b322f035279 --- /dev/null +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "etnaviv_compiler_nir.h" +#include "util/register_allocate.h" + +/* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base + * (fs registers are offset by 1 to avoid reserving r0) + */ +#define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z) + +/* precomputed by register_allocate */ +static unsigned int *q_values[] = { + (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, }, + (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, }, + (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, }, + (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, }, + (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, }, + (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, }, + (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, }, +}; + +static inline int reg_get_class(int virt_reg) +{ + switch (reg_get_type(virt_reg)) { + case REG_TYPE_VEC4: + return REG_CLASS_VEC4; + case REG_TYPE_VIRT_VEC3_XYZ: + case REG_TYPE_VIRT_VEC3_XYW: + case REG_TYPE_VIRT_VEC3_XZW: + case REG_TYPE_VIRT_VEC3_YZW: + return REG_CLASS_VIRT_VEC3; + case REG_TYPE_VIRT_VEC2_XY: + case REG_TYPE_VIRT_VEC2_XZ: + case REG_TYPE_VIRT_VEC2_XW: + case REG_TYPE_VIRT_VEC2_YZ: + case REG_TYPE_VIRT_VEC2_YW: + case REG_TYPE_VIRT_VEC2_ZW: + return REG_CLASS_VIRT_VEC2; + case REG_TYPE_VIRT_SCALAR_X: + case REG_TYPE_VIRT_SCALAR_Y: + case REG_TYPE_VIRT_SCALAR_Z: + case REG_TYPE_VIRT_SCALAR_W: + return REG_CLASS_VIRT_SCALAR; + case REG_TYPE_VIRT_VEC2T_XY: + case REG_TYPE_VIRT_VEC2T_ZW: + return REG_CLASS_VIRT_VEC2T; + case REG_TYPE_VIRT_VEC2C_XY: + case REG_TYPE_VIRT_VEC2C_YZ: + case REG_TYPE_VIRT_VEC2C_ZW: + return REG_CLASS_VIRT_VEC2C; + case REG_TYPE_VIRT_VEC3C_XYZ: + case REG_TYPE_VIRT_VEC3C_YZW: + return REG_CLASS_VIRT_VEC3C; + } + + assert(false); + return 0; +} + +void +etna_ra_assign(struct etna_compile *c, nir_shader *shader) +{ + struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS * + NUM_REG_TYPES, false); + + /* classes always be created from index 0, so equal to the class enum + * which represents a register with (c+1) components + */ + for (int c = 0; c < NUM_REG_CLASSES; c++) + ra_alloc_reg_class(regs); + /* add each register of each class */ + for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++) + ra_class_add_reg(regs, reg_get_class(r), r); + /* set conflicts */ + for (int r = 0; r < ETNA_MAX_TEMPS; r++) { + for (int i = 0; i < NUM_REG_TYPES; i++) { + for (int j = 0; j < i; j++) { + if (reg_writemask[i] & reg_writemask[j]) { + ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i, + NUM_REG_TYPES * r + j); + } + } + } + } + ra_set_finalize(regs, q_values); + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + /* liveness and interference */ + + nir_index_blocks(impl); + nir_index_ssa_defs(impl); + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) + instr->pass_flags = 0; + } + + /* this gives an approximation/upper limit on how many nodes are needed + * (some ssa values do not represent an allocated register) + */ + unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc; + unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes); + memset(live_map, 0xff, sizeof(unsigned) * max_nodes); + struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes); + + unsigned num_nodes = etna_live_defs(impl, defs, live_map); + struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes); + + /* set classes from num_components */ + for (unsigned i = 0; i < num_nodes; i++) { + nir_instr *instr = defs[i].instr; + nir_dest *dest = defs[i].dest; + unsigned comp = nir_dest_num_components(*dest) - 1; + + if (instr->type == nir_instr_type_alu && + c->specs->has_new_transcendentals) { + switch (nir_instr_as_alu(instr)->op) { + case nir_op_fdiv: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + assert(dest->is_ssa); + comp = REG_CLASS_VIRT_VEC2T; + default: + break; + } + } + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + /* can't have dst swizzle or sparse writemask on UBO loads */ + if (intr->intrinsic == nir_intrinsic_load_ubo) { + assert(dest == &intr->dest); + if (dest->ssa.num_components == 2) + comp = REG_CLASS_VIRT_VEC2C; + if (dest->ssa.num_components == 3) + comp = REG_CLASS_VIRT_VEC3C; + } + } + + ra_set_node_class(g, i, comp); + } + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_dest *dest = dest_for_instr(instr); + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + unsigned reg; + + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: { + /* don't want outputs to be swizzled + * TODO: better would be to set the type to X/XY/XYZ/XYZW + * TODO: what if fragcoord.z is read after writing fragdepth? + */ + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + unsigned index = live_map[src_index(impl, &intr->src[1])]; + + if (shader->info.stage == MESA_SHADER_FRAGMENT && + deref->var->data.location == FRAG_RESULT_DEPTH) { + ra_set_node_reg(g, index, REG_FRAG_DEPTH); + } else { + ra_set_node_class(g, index, REG_CLASS_VEC4); + } + } continue; + case nir_intrinsic_load_input: + reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) { + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VEC4, + }[nir_dest_num_components(*dest) - 1]; + break; + case nir_intrinsic_load_instance_id: + reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y; + break; + default: + continue; + } + + ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg); + } + } + + /* add interference for intersecting live ranges */ + for (unsigned i = 0; i < num_nodes; i++) { + assert(defs[i].live_start < defs[i].live_end); + for (unsigned j = 0; j < i; j++) { + if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end) + continue; + ra_add_node_interference(g, i, j); + } + } + + ralloc_free(defs); + + /* Allocate registers */ + ASSERTED bool ok = ra_allocate(g); + assert(ok); + + c->g = g; + c->regs = regs; + c->live_map = live_map; + c->num_nodes = num_nodes; +} + +unsigned +etna_ra_finish(struct etna_compile *c) +{ + /* TODO: better way to get number of registers used? */ + unsigned j = 0; + for (unsigned i = 0; i < c->num_nodes; i++) { + j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1); + } + + ralloc_free(c->g); + ralloc_free(c->regs); + ralloc_free(c->live_map); + + return j; +} diff --git a/src/gallium/drivers/etnaviv/meson.build b/src/gallium/drivers/etnaviv/meson.build index 93199abe05a..7eaa4cca92c 100644 --- a/src/gallium/drivers/etnaviv/meson.build +++ b/src/gallium/drivers/etnaviv/meson.build @@ -39,6 +39,7 @@ files_etnaviv = files( 'etnaviv_compiler_nir.c', 'etnaviv_compiler_nir_emit.c', 'etnaviv_compiler_nir_liveness.c', + 'etnaviv_compiler_nir_ra.c', 'etnaviv_compiler_tgsi.c', 'etnaviv_context.c', 'etnaviv_context.h',