bifrost: Add support for nir_op_ishl

Bifrost's bitwise ops include the shift capability. Previously we had hardcoded the shift to zero in all cases. There's room in future to emit slightly better code if a shift and a bitwise operation can be folded together, but not going after that for now. This change also removes the separate BI_SHIFT instruction class as BI_BITWISE can cover both cases. Signed-off-by: Chris Forbes <chrisforbes@google.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6091>
2020-07-26 12:18:54 -07:00 · 2020-07-26 12:18:54 -07:00 · 946ff9b439
parent 539ea08736
commit 946ff9b439
5 changed files with 20 additions and 10 deletions
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@ -1168,8 +1168,6 @@ bi_pack_fma(bi_clause *clause, bi_bundle bundle, bi_registers *regs)
                return bi_pack_fma_addmin(bundle.fma, regs);
        case BI_MOV:
                return bi_pack_fma_1src(bundle.fma, regs, BIFROST_FMA_OP_MOV);
-        case BI_SHIFT:
-                unreachable("Packing todo");
        case BI_SELECT:
                return bi_pack_fma_select(bundle.fma, regs);
        case BI_ROUND:
@ -1733,7 +1731,6 @@ bi_pack_add(bi_clause *clause, bi_bundle bundle, bi_registers *regs, gl_shader_s
        case BI_MINMAX:
                return bi_pack_add_addmin(bundle.add, regs);
        case BI_MOV:
-        case BI_SHIFT:
        case BI_STORE:
                unreachable("Packing todo");
        case BI_STORE_VAR:
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@ -149,7 +149,6 @@ bi_class_name(enum bi_class cl)
        case BI_MINMAX: return "minmax";
        case BI_MOV: return "mov";
        case BI_SELECT: return "select";
-        case BI_SHIFT: return "shift";
        case BI_STORE: return "store";
        case BI_STORE_VAR: return "store_var";
        case BI_SPECIAL: return "special";
--- a/src/panfrost/bifrost/bi_tables.c
+++ b/src/panfrost/bifrost/bi_tables.c
@ -49,7 +49,6 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
        [BI_MOV] 		= BI_SCHED_ALL,
        [BI_FMOV]               = BI_MODS | BI_SCHED_ALL,
        [BI_REDUCE_FMA]         = BI_SCHED_FMA,
-        [BI_SHIFT] 		= BI_SCHED_ALL,
        [BI_STORE] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC,
        [BI_STORE_VAR] 		= BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC,
        [BI_SPECIAL] 		= BI_SCHED_ADD | BI_SCHED_SLOW,
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@ -516,6 +516,7 @@ bi_class_for_nir_alu(nir_op op)
        case nir_op_ior:
        case nir_op_ixor:
        case nir_op_inot:
+        case nir_op_ishl:
                return BI_BITWISE;

        BI_CASE_CMP(nir_op_flt)
@ -807,6 +808,16 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                alu.op.bitwise = BI_BITWISE_OR;
                alu.bitwise.src_invert[0] = true;
                alu.src[1] = BIR_INDEX_ZERO;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
+                break;
+        case nir_op_ishl:
+                alu.op.bitwise = BI_BITWISE_OR;
+                /* move src1 to src2 and replace with zero. underlying op is (src0 << src2) | src1 */
+                alu.src[2] = alu.src[1];
+                alu.src_types[2] = alu.src_types[1];
+                alu.src[1] = BIR_INDEX_ZERO;
                break;
        case nir_op_fmax:
        case nir_op_imax:
@ -843,12 +854,21 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                break;
        case nir_op_iand:
                alu.op.bitwise = BI_BITWISE_AND;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                break;
        case nir_op_ior:
                alu.op.bitwise = BI_BITWISE_OR;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                break;
        case nir_op_ixor:
                alu.op.bitwise = BI_BITWISE_XOR;
+                /* zero shift */
+                alu.src[2] = BIR_INDEX_ZERO;
+                alu.src_types[2] = alu.src_types[1];
                break;
        case nir_op_f2i32:
                alu.roundmode = BIFROST_RTZ;
@ -889,10 +909,6 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
                bi_fuse_cond(&alu, instr->src[0],
                                &constants_left, &constant_shift, comps, false);
 #endif
-        } else if (alu.type == BI_BITWISE) {
-                /* Implicit shift argument... at some point we should fold */
-                alu.src[2] = BIR_INDEX_ZERO;
-                alu.src_types[2] = alu.src_types[1];
        }

        bi_emit(ctx, alu);
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@ -70,7 +70,6 @@ enum bi_class {
        BI_MOV,
        BI_REDUCE_FMA,
        BI_SELECT,
-        BI_SHIFT,
        BI_STORE,
        BI_STORE_VAR,
        BI_SPECIAL, /* _FAST on supported GPUs */