From 513d02cfeb9cd42d8ed41a6999bcab08bb5e239d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 1 Aug 2019 14:28:34 -0700 Subject: [PATCH] pan/midgard: Remove "r27-only" register class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As far as I know, there's no such thing as a load/store op that only takes its argument in r27. We just need to set the appropriate arg_1 field in the RA to specify other registers if we want them. To facilitate this, various RA-related changes are needed across the compiler ; this should also fix indirect offsets which were implicitly interpreted as "r27-only" despite not even passing through RA yet. One ripple effect change is switching the move insertion point and adjusting the liveness analysis accordingly, so while this was intended as a purely functional change, there are some shader-db changes: total instructions in shared programs: 3511 -> 3498 (-0.37%) instructions in affected programs: 563 -> 550 (-2.31%) helped: 12 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.08 x̃: 1 helped stats (rel) min: 0.93% max: 5.00% x̄: 2.58% x̃: 2.33% 95% mean confidence interval for instructions value: -1.27 -0.90 95% mean confidence interval for instructions %-change: -3.23% -1.93% Instructions are helped. total bundles in shared programs: 2067 -> 2067 (0.00%) bundles in affected programs: 398 -> 398 (0.00%) helped: 7 HURT: 4 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 1.54% max: 10.00% x̄: 5.04% x̃: 5.56% HURT stats (abs) min: 1 max: 2 x̄: 1.75 x̃: 2 HURT stats (rel) min: 2.13% max: 4.26% x̄: 3.72% x̃: 4.26% 95% mean confidence interval for bundles value: -0.95 0.95 95% mean confidence interval for bundles %-change: -5.21% 1.50% Inconclusive result (value mean confidence interval includes 0). total quadwords in shared programs: 3464 -> 3454 (-0.29%) quadwords in affected programs: 1199 -> 1189 (-0.83%) helped: 18 HURT: 4 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 1.03% max: 5.26% x̄: 2.44% x̃: 1.79% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 2.56% max: 2.82% x̄: 2.63% x̃: 2.56% 95% mean confidence interval for quadwords value: -0.98 0.07 Inconclusive result (value mean confidence interval includes 0). total registers in shared programs: 383 -> 373 (-2.61%) registers in affected programs: 56 -> 46 (-17.86%) helped: 12 HURT: 2 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 9.09% max: 33.33% x̄: 29.58% x̃: 33.33% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 20.00% max: 50.00% x̄: 35.00% x̃: 35.00% 95% mean confidence interval for registers value: -1.13 -0.29 95% mean confidence interval for registers %-change: -35.07% -5.63% Registers are helped. Signed-off-by: Alyssa Rosenzweig --- src/panfrost/midgard/helpers.h | 6 +- src/panfrost/midgard/midgard_compile.c | 48 ++------- src/panfrost/midgard/midgard_liveness.c | 6 +- .../midgard/midgard_opt_perspective.c | 2 +- src/panfrost/midgard/midgard_ra.c | 101 ++++++++++-------- 5 files changed, 66 insertions(+), 97 deletions(-) diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index f1f502372d8..d7df9ad01a8 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -47,7 +47,7 @@ ) #define OP_IS_STORE(op) (\ - OP_IS_STORE_VARY(op) || \ + OP_IS_STORE_R26(op) || \ op == midgard_op_st_cubemap_coords \ ) @@ -56,7 +56,7 @@ op == midgard_op_ldst_perspective_division_w \ ) -#define OP_IS_R27_ONLY(op) ( \ +#define OP_IS_VEC4_ONLY(op) ( \ OP_IS_PROJECTION(op) || \ op == midgard_op_st_cubemap_coords \ ) @@ -345,7 +345,7 @@ mir_is_simple_swizzle(unsigned swizzle, unsigned mask) static inline uint8_t midgard_ldst_reg(unsigned reg, unsigned component) { - assert(reg == REGISTER_LDST_BASE || (reg == REGISTER_LDST_BASE + 1)); + assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1)); midgard_ldst_register_select sel = { .component = component, diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 4758677aa76..5fdf435893d 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -655,37 +655,6 @@ emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp) emit_mir_instruction(ctx, ins); } - - -/* Likewise, indirect offsets are put in r27.w. TODO: Allow componentwise - * pinning to eliminate this move in all known cases */ - -static void -emit_indirect_offset(compiler_context *ctx, nir_src *src) -{ - int offset = nir_src_index(ctx, src); - - midgard_instruction ins = { - .type = TAG_ALU_4, - .mask = 1 << COMPONENT_W, - .ssa_args = { - .src0 = SSA_UNUSED_1, - .src1 = offset, - .dest = SSA_FIXED_REGISTER(REGISTER_LDST_BASE + 1), - }, - .alu = { - .op = midgard_alu_op_imov, - .outmod = midgard_outmod_int_wrap, - .reg_mode = midgard_reg_mode_32, - .dest_override = midgard_dest_override_none, - .src1 = vector_alu_srco_unsigned(zero_alu_src), - .src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx) - }, - }; - - emit_mir_instruction(ctx, ins); -} - #define ALU_CASE(nir, _op) \ case nir_op_##nir: \ op = midgard_alu_op_##_op; \ @@ -1172,8 +1141,8 @@ emit_ubo_read( ins.load_store.address = offset >> 3; if (indirect_offset) { - emit_indirect_offset(ctx, indirect_offset); - ins.load_store.arg_2 = 0x87; + ins.ssa_args.src1 = nir_src_index(ctx, indirect_offset); + ins.load_store.arg_2 = 0x80; } else { ins.load_store.arg_2 = 0x1E; } @@ -1207,14 +1176,10 @@ emit_varying_read( memcpy(&u, &p, sizeof(p)); ins.load_store.varying_parameters = u; - if (indirect_offset) { - /* We need to add in the dynamic index, moved to r27.w */ - emit_indirect_offset(ctx, indirect_offset); - ins.load_store.arg_2 = 0x07; - } else { - /* Just a direct load */ + if (indirect_offset) + ins.ssa_args.src1 = nir_src_index(ctx, indirect_offset); + else ins.load_store.arg_2 = 0x1E; - } ins.load_store.arg_1 = 0x9E; @@ -1616,11 +1581,10 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr, * texture register */ unsigned temp = make_compiler_temp(ctx); - midgard_instruction st = m_st_cubemap_coords(temp, 0); st.ssa_args.src0 = index; - st.load_store.arg_1 = 0x24; st.mask = 0x3; /* xy */ + st.load_store.arg_1 = 0x20; st.load_store.swizzle = alu_src.swizzle; emit_mir_instruction(ctx, st); diff --git a/src/panfrost/midgard/midgard_liveness.c b/src/panfrost/midgard/midgard_liveness.c index 39f00c09651..7da62cf1c28 100644 --- a/src/panfrost/midgard/midgard_liveness.c +++ b/src/panfrost/midgard/midgard_liveness.c @@ -66,11 +66,7 @@ is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src) /* If written-before-use, we're gone */ - if (ins->ssa_args.dest == src && - ins->type == TAG_LOAD_STORE_4 && - ins->load_store.op == midgard_op_ld_int4 && - ins->load_store.arg_1 == 0xEA && - ins->load_store.arg_2 == 0x1E) { + if (ins->ssa_args.dest == src && ins->mask == 0xF) { block_done = true; break; } diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c index fe816481fef..d648cc7dd2c 100644 --- a/src/panfrost/midgard/midgard_opt_perspective.c +++ b/src/panfrost/midgard/midgard_opt_perspective.c @@ -124,7 +124,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) midgard_op_ldst_perspective_division_w : midgard_op_ldst_perspective_division_z, .swizzle = SWIZZLE_XYZW, - .arg_1 = 0x24, + .arg_1 = 0x20 } }; diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 4f7844bbcba..67e8340bbfd 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -48,7 +48,6 @@ /* We have overlapping register classes for special registers, handled via * shadows */ -#define SHADOW_R27 17 #define SHADOW_R28 18 #define SHADOW_R29 19 @@ -155,8 +154,8 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg) /* Apply shadow registers */ - if (phys >= SHADOW_R27 && phys <= SHADOW_R29) - phys += 27 - SHADOW_R27; + if (phys >= SHADOW_R28 && phys <= SHADOW_R29) + phys += 28 - SHADOW_R28; struct phys_reg r = { .reg = phys, @@ -213,13 +212,10 @@ create_register_set(unsigned work_count, unsigned *classes) /* Special register classes have other register counts */ unsigned count = - (c == REG_CLASS_WORK) ? work_count : - (c == REG_CLASS_LDST27) ? 1 : 2; + (c == REG_CLASS_WORK) ? work_count : 2; - /* We arbitraily pick r17 (RA unused) as the shadow for r27 */ unsigned first_reg = (c == REG_CLASS_LDST) ? 26 : - (c == REG_CLASS_LDST27) ? SHADOW_R27 : (c == REG_CLASS_TEXR) ? 28 : (c == REG_CLASS_TEXW) ? SHADOW_R28 : 0; @@ -256,7 +252,6 @@ create_register_set(unsigned work_count, unsigned *classes) /* We have duplicate classes */ - add_shadow_conflicts(regs, 27, SHADOW_R27); add_shadow_conflicts(regs, 28, SHADOW_R28); add_shadow_conflicts(regs, 29, SHADOW_R29); @@ -315,17 +310,8 @@ set_class(unsigned *classes, unsigned node, unsigned class) if (class == current_class) return; - - if ((current_class == REG_CLASS_LDST27) && (class == REG_CLASS_LDST)) - return; - - /* If we're changing, we must not have already assigned a special class - */ - - bool compat = current_class == REG_CLASS_WORK; - compat |= (current_class == REG_CLASS_LDST) && (class == REG_CLASS_LDST27); - - assert(compat); + /* If we're changing, we haven't assigned a special class */ + assert(current_class == REG_CLASS_WORK); classes[node] &= 0x3; classes[node] |= (class << 2); @@ -355,7 +341,6 @@ check_read_class(unsigned *classes, unsigned tag, unsigned node) switch (current_class) { case REG_CLASS_LDST: - case REG_CLASS_LDST27: return (tag == TAG_LOAD_STORE_4); case REG_CLASS_TEXR: return (tag == TAG_TEXTURE_4); @@ -383,7 +368,6 @@ check_write_class(unsigned *classes, unsigned tag, unsigned node) case REG_CLASS_TEXW: return (tag == TAG_TEXTURE_4); case REG_CLASS_LDST: - case REG_CLASS_LDST27: case REG_CLASS_WORK: return (tag == TAG_ALU_4) || (tag == TAG_LOAD_STORE_4); default: @@ -491,22 +475,28 @@ mir_lower_special_reads(compiler_context *ctx) v_mov(idx, blank_alu_src, i) : v_mov(i, blank_alu_src, idx); - /* Insert move after each write */ + /* Insert move before each read/write, depending on the + * hazard we're trying to account for */ + mir_foreach_instr_global_safe(ctx, pre_use) { - if (pre_use->ssa_args.dest != i) + if (pre_use->type != classes[j]) continue; - /* If the hazard is writing, we need to - * specific insert moves for the contentious - * class. If the hazard is reading, we insert - * moves whenever it is written */ + if (hazard_write) { + if (pre_use->ssa_args.dest != i) + continue; + } else { + if (!mir_has_arg(pre_use, i)) + continue; + } - if (hazard_write && pre_use->type != classes[j]) - continue; - - midgard_instruction *use = mir_next_op(pre_use); - assert(use); - mir_insert_instruction_before(use, m); + if (hazard_write) { + midgard_instruction *use = mir_next_op(pre_use); + assert(use); + mir_insert_instruction_before(use, m); + } else { + mir_insert_instruction_before(pre_use, m); + } } /* Rewrite to use */ @@ -580,13 +570,12 @@ allocate_registers(compiler_context *ctx, bool *spilled) /* Check if this operation imposes any classes */ if (ins->type == TAG_LOAD_STORE_4) { - bool force_r27 = OP_IS_R27_ONLY(ins->load_store.op); - unsigned class = force_r27 ? REG_CLASS_LDST27 : REG_CLASS_LDST; + bool force_vec4_only = OP_IS_VEC4_ONLY(ins->load_store.op); - set_class(found_class, ins->ssa_args.src0, class); - set_class(found_class, ins->ssa_args.src1, class); + set_class(found_class, ins->ssa_args.src0, REG_CLASS_LDST); + set_class(found_class, ins->ssa_args.src1, REG_CLASS_LDST); - if (force_r27) { + if (force_vec4_only) { force_vec4(found_class, ins->ssa_args.dest); force_vec4(found_class, ins->ssa_args.src0); force_vec4(found_class, ins->ssa_args.src1); @@ -759,6 +748,14 @@ install_registers_instr( case TAG_LOAD_STORE_4: { bool fixed = args.src0 >= SSA_FIXED_MINIMUM; + /* Which physical register we read off depends on + * whether we are loading or storing -- think about the + * logical dataflow */ + + bool encodes_src = + OP_IS_STORE(ins->load_store.op) && + ins->load_store.op != midgard_op_st_cubemap_coords; + if (OP_IS_STORE_R26(ins->load_store.op) && fixed) { ins->load_store.reg = SSA_REG_FROM_FIXED(args.src0); } else if (OP_IS_STORE_VARY(ins->load_store.op)) { @@ -769,14 +766,6 @@ install_registers_instr( /* TODO: swizzle/mask */ } else { - /* Which physical register we read off depends on - * whether we are loading or storing -- think about the - * logical dataflow */ - - bool encodes_src = - OP_IS_STORE(ins->load_store.op) && - ins->load_store.op != midgard_op_st_cubemap_coords; - unsigned r = encodes_src ? args.src0 : args.dest; @@ -792,6 +781,26 @@ install_registers_instr( ins->mask, src); } + /* We also follow up by actual arguments */ + + int src2 = + encodes_src ? args.src1 : args.src0; + + int src3 = + encodes_src ? -1 : args.src1; + + if (src2 >= 0) { + struct phys_reg src = index_to_reg(ctx, g, src2); + unsigned component = __builtin_ctz(src.mask); + ins->load_store.arg_1 |= midgard_ldst_reg(src.reg, component); + } + + if (src3 >= 0) { + struct phys_reg src = index_to_reg(ctx, g, src3); + unsigned component = __builtin_ctz(src.mask); + ins->load_store.arg_2 |= midgard_ldst_reg(src.reg, component); + } + break; }