diff --git a/src/panfrost/bifrost/bi_lower_swizzle.c b/src/panfrost/bifrost/bi_lower_swizzle.c index 32517ad93de..2d79fcf83ca 100644 --- a/src/panfrost/bifrost/bi_lower_swizzle.c +++ b/src/panfrost/bifrost/bi_lower_swizzle.c @@ -137,6 +137,106 @@ bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src) ins->src[src].swizzle = BI_SWIZZLE_H01; } +static bool +bi_swizzle_replicates_8(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_B0000: + case BI_SWIZZLE_B1111: + case BI_SWIZZLE_B2222: + case BI_SWIZZLE_B3333: + return true; + default: + return false; + } +} + +static bool +bi_swizzle_replicates_16(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H00: + case BI_SWIZZLE_H11: + return true; + default: + /* If a swizzle replicates every 8-bits, it also replicates + * every 16-bits, so allow 8-bit replicating swizzles. + */ + return bi_swizzle_replicates_8(swz); + } +} + +static bool +bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16) +{ + switch (I->op) { + + /* Instructions that construct vectors have replicated output if their + * sources are identical. Check this case first. + */ + case BI_OPCODE_MKVEC_V2I16: + case BI_OPCODE_V2F16_TO_V2S16: + case BI_OPCODE_V2F16_TO_V2U16: + case BI_OPCODE_V2F32_TO_V2F16: + case BI_OPCODE_V2S16_TO_V2F16: + case BI_OPCODE_V2S8_TO_V2F16: + case BI_OPCODE_V2S8_TO_V2S16: + case BI_OPCODE_V2U16_TO_V2F16: + case BI_OPCODE_V2U8_TO_V2F16: + case BI_OPCODE_V2U8_TO_V2U16: + return bi_is_value_equiv(I->src[0], I->src[1]); + + /* 16-bit transcendentals are defined to output zero in their + * upper half, so they do not replicate + */ + case BI_OPCODE_FRCP_F16: + case BI_OPCODE_FRSQ_F16: + return false; + + /* Not sure, be conservative, we don't use these.. */ + case BI_OPCODE_VN_ASST1_F16: + case BI_OPCODE_FPCLASS_F16: + case BI_OPCODE_FPOW_SC_DET_F16: + return false; + + default: + break; + } + + /* Replication analysis only makes sense for ALU instructions */ + if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE) + return false; + + /* We only analyze 16-bit instructions for 16-bit replication. We could + * maybe do better. + */ + if (bi_opcode_props[I->op].size != BI_SIZE_16) + return false; + + bi_foreach_src(I, s) { + if (bi_is_null(I->src[s])) + continue; + + /* Replicated swizzles */ + if (bi_swizzle_replicates_16(I->src[s].swizzle)) + continue; + + /* Replicated values */ + if (bi_is_ssa(I->src[s]) && + BITSET_TEST(replicates_16, bi_word_node(I->src[s]))) + continue; + + /* Replicated constants */ + if (I->src[s].type == BI_INDEX_CONSTANT && + (I->src[s].value & 0xFFFF) == (I->src[s].value >> 16)) + continue; + + return false; + } + + return true; +} + void bi_lower_swizzle(bi_context *ctx) { @@ -146,4 +246,20 @@ bi_lower_swizzle(bi_context *ctx) bi_lower_swizzle_16(ctx, ins, s); } } + + /* Now that we've lowered swizzles, clean up the mess */ + BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ((ctx->ssa_alloc + 1) << 2)); + + bi_foreach_instr_global(ctx, ins) { + if (bi_is_ssa(ins->dest[0]) && bi_instr_replicates(ins, replicates_16)) + BITSET_SET(replicates_16, bi_word_node(ins->dest[0])); + + if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) && + BITSET_TEST(replicates_16, bi_word_node(ins->src[0]))) { + ins->op = BI_OPCODE_MOV_I32; + ins->src[0].swizzle = BI_SWIZZLE_H01; + } + } + + free(replicates_16); } diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 5e767f3d2a7..72d21da8ee4 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -355,6 +355,28 @@ bi_is_word_equiv(bi_index left, bi_index right) return bi_is_equiv(left, right) && left.offset == right.offset; } +/* An even stronger equivalence that checks if indices correspond to the + * right value when evaluated + */ +static inline bool +bi_is_value_equiv(bi_index left, bi_index right) +{ + if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) { + return (bi_apply_swizzle(left.value, left.swizzle) == + bi_apply_swizzle(right.value, right.swizzle)) && + (left.abs == right.abs) && + (left.neg == right.neg); + } else { + return (left.value == right.value) && + (left.abs == right.abs) && + (left.neg == right.neg) && + (left.swizzle == right.swizzle) && + (left.offset == right.offset) && + (left.reg == right.reg) && + (left.type == right.type); + } +} + #define BI_MAX_DESTS 2 #define BI_MAX_SRCS 5