From 13d7ca13001064dc7114e6cc26ba2c3e7ff64e47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 21 Jul 2021 20:29:54 -0400 Subject: [PATCH] pan/va: Optimize add with imm to ADD_IMM Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 4 + src/panfrost/bifrost/compiler.h | 1 + src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_compiler.h | 1 + src/panfrost/bifrost/valhall/va_optimize.c | 113 +++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/va_optimize.c diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 663a655fae6..88e7f1bb129 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4123,6 +4123,10 @@ bi_compile_variant_nir(nir_shader *nir, bi_lower_opt_instruction(I); } + if (ctx->arch >= 9) { + va_optimize(ctx); + } + bi_foreach_block(ctx, block) { bi_lower_branch(ctx, block); } diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index e074421c3db..920e18900f0 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -1038,6 +1038,7 @@ void bi_lower_swizzle(bi_context *ctx); void bi_lower_fau(bi_context *ctx); void bi_assign_scoreboard(bi_context *ctx); void bi_register_allocate(bi_context *ctx); +void va_optimize(bi_context *ctx); void bi_lower_opt_instruction(bi_instr *I); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index ea9e7c751ea..3191d24ec65 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -45,6 +45,7 @@ libpanfrost_bifrost_files = files( 'bi_validate.c', 'bir.c', 'bifrost_compile.c', + 'valhall/va_optimize.c', 'valhall/va_pack.c', ) diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index a6409ae6fee..d7207cd77b4 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -34,6 +34,7 @@ extern "C" { #endif +void va_fuse_add_imm(bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); static inline unsigned diff --git a/src/panfrost/bifrost/valhall/va_optimize.c b/src/panfrost/bifrost/valhall/va_optimize.c new file mode 100644 index 00000000000..118321c9a68 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_optimize.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" + +/* Valhall specific instruction selection optimizations */ + +static enum bi_opcode +va_op_add_imm(enum bi_opcode op) +{ + switch (op) { + case BI_OPCODE_FADD_F32: return BI_OPCODE_FADD_IMM_F32; + case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16; + case BI_OPCODE_IADD_S32: + case BI_OPCODE_IADD_U32: return BI_OPCODE_IADD_IMM_I32; + case BI_OPCODE_IADD_V2S16: + case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16; + case BI_OPCODE_IADD_V4S8: + case BI_OPCODE_IADD_V4U8: return BI_OPCODE_IADD_IMM_V4I8; + default: return 0; + } +} + +static bool +va_is_add_imm(bi_instr *I, unsigned s) +{ + return I->src[s].swizzle == BI_SWIZZLE_H01 && + !I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round; +} + +static unsigned +va_choose_imm(bi_instr *I) +{ + for (unsigned i = 0; i < 2; ++i) { + if (I->src[i].type == BI_INDEX_CONSTANT) + return i; + } + + return ~0; +} + +/* Lower MOV.i32 #constant --> IADD_IMM.i32 0x0, #constant */ +static void +va_lower_mov_imm(bi_instr *I) +{ + if (I->src[0].type == BI_INDEX_CONSTANT) { + I->op = BI_OPCODE_IADD_IMM_I32; + I->index = I->src[0].value; + I->src[0] = bi_zero(); + } +} + +void +va_fuse_add_imm(bi_instr *I) +{ + if (I->op == BI_OPCODE_MOV_I32) { + va_lower_mov_imm(I); + return; + } + + enum bi_opcode op = va_op_add_imm(I->op); + if (!op) return; + + unsigned s = va_choose_imm(I); + if (s > 1) return; + if (!va_is_add_imm(I, 1 - s)) return; + + I->op = op; + I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle); + + assert(!I->src[s].abs && "redundant .abs set"); + + /* If the constant is negated, flip the sign bit */ + if (I->src[s].neg) { + if (I->op == BI_OPCODE_FADD_IMM_F32) + I->index ^= (1 << 31); + else if (I->op == BI_OPCODE_FADD_IMM_V2F16) + I->index ^= (1 << 31) | (1 << 15); + else + unreachable("unexpected .neg"); + } + + I->src[0] = I->src[1 - s]; + I->src[1] = bi_null(); +} + +void +va_optimize(bi_context *ctx) +{ + bi_foreach_instr_global(ctx, I) { + va_fuse_add_imm(I); + } +}