From fa17295ebdd42ab9f6a76ce44f303a99efcf3707 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 4 Mar 2021 12:18:44 +0100 Subject: [PATCH] ir3: Add simple CSE pass RA currently can't handle a live value that's part of a vector and introduces extra copies. This was espeically a problem for bary.f, where the bary coords were being split and repeatedly re-collected. But this could be a problem in other situations as well. Part-of: --- src/freedreno/ir3/ir3.h | 3 + src/freedreno/ir3/ir3_compiler_nir.c | 1 + src/freedreno/ir3/ir3_cse.c | 145 +++++++++++++++++++++++++++ src/freedreno/ir3/meson.build | 1 + 4 files changed, 150 insertions(+) create mode 100644 src/freedreno/ir3/ir3_cse.c diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 5239ce97744..8e78f9beced 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1436,6 +1436,9 @@ bool ir3_cf(struct ir3 *ir); bool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); bool ir3_cp_postsched(struct ir3 *ir); +/* common subexpression elimination: */ +bool ir3_cse(struct ir3 *ir); + /* Make arrays SSA */ bool ir3_array_to_ssa(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d95fe12e008..6921d9a95c1 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3973,6 +3973,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, progress |= IR3_PASS(ir, ir3_cf); progress |= IR3_PASS(ir, ir3_cp, so); + progress |= IR3_PASS(ir, ir3_cse); progress |= IR3_PASS(ir, ir3_dce, so); } while (progress); diff --git a/src/freedreno/ir3/ir3_cse.c b/src/freedreno/ir3/ir3_cse.c new file mode 100644 index 00000000000..93a2d3444c6 --- /dev/null +++ b/src/freedreno/ir3/ir3_cse.c @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2014 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir3.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +/* This pass handles CSE'ing repeated expressions created in the process of + * translating from NIR. Currently this is just collect's. Also, currently + * this is intra-block only, to make it work over multiple block we'd need to + * bring forward dominance calculation. + */ + +#define HASH(hash, data) XXH32(&(data), sizeof(data), hash) + +static uint32_t +hash_instr(const void *data) +{ + const struct ir3_instruction *instr = data; + uint32_t hash = 0; + + hash = HASH(hash, instr->opc); + hash = HASH(hash, instr->regs[0]->flags); + foreach_src (src, (struct ir3_instruction *) instr) { + if (src->flags & IR3_REG_CONST) + hash = HASH(hash, src->num); + else if (src->flags & IR3_REG_IMMED) + hash = HASH(hash, src->uim_val); + else + hash = HASH(hash, src->def); + } + + return hash; +} + +static bool +instrs_equal(const struct ir3_instruction *i1, const struct ir3_instruction *i2) +{ + if (i1->opc != i2->opc) + return false; + + if (i1->regs_count != i2->regs_count) + return false; + + if (i1->regs[0]->flags != i2->regs[0]->flags) + return false; + + for (unsigned i = 1; i < i1->regs_count; i++) { + const struct ir3_register *i1_reg = i1->regs[i], *i2_reg = i2->regs[i]; + + if (i1_reg->flags != i2_reg->flags) + return false; + + if (i1_reg->flags & IR3_REG_CONST) { + if (i1_reg->num != i2_reg->num) + return false; + } else if (i1_reg->flags & IR3_REG_IMMED) { + if (i1_reg->uim_val != i2_reg->uim_val) + return false; + } else { + if (i1_reg->def != i2_reg->def) + return false; + } + } + + return true; +} + +static bool +instr_can_cse(const struct ir3_instruction *instr) +{ + if (instr->opc != OPC_META_COLLECT) + return false; + + return true; +} + +static bool +cmp_func(const void *data1, const void *data2) +{ + return instrs_equal(data1, data2); +} + +bool +ir3_cse(struct ir3 *ir) +{ + struct set *instr_set = _mesa_set_create(NULL, hash_instr, cmp_func); + foreach_block (block, &ir->block_list) { + _mesa_set_clear(instr_set, NULL); + + foreach_instr (instr, &block->instr_list) { + instr->data = NULL; + + if (!instr_can_cse(instr)) + continue; + + bool found; + struct set_entry *entry = + _mesa_set_search_or_add(instr_set, instr, &found); + if (found) + instr->data = (void *) entry->key; + } + } + + + bool progress = false; + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + foreach_src(src, instr) { + if ((src->flags & IR3_REG_SSA) && + src->def && + src->def->instr->data) { + progress = true; + struct ir3_instruction *instr = src->def->instr->data; + src->def = instr->regs[0]; + } + } + } + } + + _mesa_set_destroy(instr_set, NULL); + return progress; +} + diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 28c8e88c75c..ea98381e9a1 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -77,6 +77,7 @@ libfreedreno_ir3_files = files( 'ir3_cf.c', 'ir3_cp.c', 'ir3_cp_postsched.c', + 'ir3_cse.c', 'ir3_dce.c', 'ir3_delay.c', 'ir3_dominance.c',