pan/midgard: Remove "r27-only" register class

As far as I know, there's no such thing as a load/store op that only
takes its argument in r27. We just need to set the appropriate arg_1
field in the RA to specify other registers if we want them.

To facilitate this, various RA-related changes are needed across the
compiler ; this should also fix indirect offsets which were implicitly
interpreted as "r27-only" despite not even passing through RA yet. One
ripple effect change is switching the move insertion point and adjusting
the liveness analysis accordingly, so while this was intended as a
purely functional change, there are some shader-db changes:

total instructions in shared programs: 3511 -> 3498 (-0.37%)
instructions in affected programs: 563 -> 550 (-2.31%)
helped: 12
HURT: 0
helped stats (abs) min: 1 max: 2 x̄: 1.08 x̃: 1
helped stats (rel) min: 0.93% max: 5.00% x̄: 2.58% x̃: 2.33%
95% mean confidence interval for instructions value: -1.27 -0.90
95% mean confidence interval for instructions %-change: -3.23% -1.93%
Instructions are helped.

total bundles in shared programs: 2067 -> 2067 (0.00%)
bundles in affected programs: 398 -> 398 (0.00%)
helped: 7
HURT: 4
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 1.54% max: 10.00% x̄: 5.04% x̃: 5.56%
HURT stats (abs)   min: 1 max: 2 x̄: 1.75 x̃: 2
HURT stats (rel)   min: 2.13% max: 4.26% x̄: 3.72% x̃: 4.26%
95% mean confidence interval for bundles value: -0.95 0.95
95% mean confidence interval for bundles %-change: -5.21% 1.50%
Inconclusive result (value mean confidence interval includes 0).

total quadwords in shared programs: 3464 -> 3454 (-0.29%)
quadwords in affected programs: 1199 -> 1189 (-0.83%)
helped: 18
HURT: 4
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 1.03% max: 5.26% x̄: 2.44% x̃: 1.79%
HURT stats (abs)   min: 2 max: 2 x̄: 2.00 x̃: 2
HURT stats (rel)   min: 2.56% max: 2.82% x̄: 2.63% x̃: 2.56%
95% mean confidence interval for quadwords value: -0.98 0.07
Inconclusive result (value mean confidence interval includes 0).

total registers in shared programs: 383 -> 373 (-2.61%)
registers in affected programs: 56 -> 46 (-17.86%)
helped: 12
HURT: 2
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 9.09% max: 33.33% x̄: 29.58% x̃: 33.33%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 20.00% max: 50.00% x̄: 35.00% x̃: 35.00%
95% mean confidence interval for registers value: -1.13 -0.29
95% mean confidence interval for registers %-change: -35.07% -5.63%
Registers are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-08-01 14:28:34 -07:00
parent 5d9b7a8ddb
commit 513d02cfeb
5 changed files with 66 additions and 97 deletions

View File

@ -47,7 +47,7 @@
)
#define OP_IS_STORE(op) (\
OP_IS_STORE_VARY(op) || \
OP_IS_STORE_R26(op) || \
op == midgard_op_st_cubemap_coords \
)
@ -56,7 +56,7 @@
op == midgard_op_ldst_perspective_division_w \
)
#define OP_IS_R27_ONLY(op) ( \
#define OP_IS_VEC4_ONLY(op) ( \
OP_IS_PROJECTION(op) || \
op == midgard_op_st_cubemap_coords \
)
@ -345,7 +345,7 @@ mir_is_simple_swizzle(unsigned swizzle, unsigned mask)
static inline uint8_t
midgard_ldst_reg(unsigned reg, unsigned component)
{
assert(reg == REGISTER_LDST_BASE || (reg == REGISTER_LDST_BASE + 1));
assert((reg == REGISTER_LDST_BASE) || (reg == REGISTER_LDST_BASE + 1));
midgard_ldst_register_select sel = {
.component = component,

View File

@ -655,37 +655,6 @@ emit_condition_mixed(compiler_context *ctx, nir_alu_src *src, unsigned nr_comp)
emit_mir_instruction(ctx, ins);
}
/* Likewise, indirect offsets are put in r27.w. TODO: Allow componentwise
* pinning to eliminate this move in all known cases */
static void
emit_indirect_offset(compiler_context *ctx, nir_src *src)
{
int offset = nir_src_index(ctx, src);
midgard_instruction ins = {
.type = TAG_ALU_4,
.mask = 1 << COMPONENT_W,
.ssa_args = {
.src0 = SSA_UNUSED_1,
.src1 = offset,
.dest = SSA_FIXED_REGISTER(REGISTER_LDST_BASE + 1),
},
.alu = {
.op = midgard_alu_op_imov,
.outmod = midgard_outmod_int_wrap,
.reg_mode = midgard_reg_mode_32,
.dest_override = midgard_dest_override_none,
.src1 = vector_alu_srco_unsigned(zero_alu_src),
.src2 = vector_alu_srco_unsigned(blank_alu_src_xxxx)
},
};
emit_mir_instruction(ctx, ins);
}
#define ALU_CASE(nir, _op) \
case nir_op_##nir: \
op = midgard_alu_op_##_op; \
@ -1172,8 +1141,8 @@ emit_ubo_read(
ins.load_store.address = offset >> 3;
if (indirect_offset) {
emit_indirect_offset(ctx, indirect_offset);
ins.load_store.arg_2 = 0x87;
ins.ssa_args.src1 = nir_src_index(ctx, indirect_offset);
ins.load_store.arg_2 = 0x80;
} else {
ins.load_store.arg_2 = 0x1E;
}
@ -1207,14 +1176,10 @@ emit_varying_read(
memcpy(&u, &p, sizeof(p));
ins.load_store.varying_parameters = u;
if (indirect_offset) {
/* We need to add in the dynamic index, moved to r27.w */
emit_indirect_offset(ctx, indirect_offset);
ins.load_store.arg_2 = 0x07;
} else {
/* Just a direct load */
if (indirect_offset)
ins.ssa_args.src1 = nir_src_index(ctx, indirect_offset);
else
ins.load_store.arg_2 = 0x1E;
}
ins.load_store.arg_1 = 0x9E;
@ -1616,11 +1581,10 @@ emit_texop_native(compiler_context *ctx, nir_tex_instr *instr,
* texture register */
unsigned temp = make_compiler_temp(ctx);
midgard_instruction st = m_st_cubemap_coords(temp, 0);
st.ssa_args.src0 = index;
st.load_store.arg_1 = 0x24;
st.mask = 0x3; /* xy */
st.load_store.arg_1 = 0x20;
st.load_store.swizzle = alu_src.swizzle;
emit_mir_instruction(ctx, st);

View File

@ -66,11 +66,7 @@ is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
/* If written-before-use, we're gone */
if (ins->ssa_args.dest == src &&
ins->type == TAG_LOAD_STORE_4 &&
ins->load_store.op == midgard_op_ld_int4 &&
ins->load_store.arg_1 == 0xEA &&
ins->load_store.arg_2 == 0x1E) {
if (ins->ssa_args.dest == src && ins->mask == 0xF) {
block_done = true;
break;
}

View File

@ -124,7 +124,7 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
midgard_op_ldst_perspective_division_w :
midgard_op_ldst_perspective_division_z,
.swizzle = SWIZZLE_XYZW,
.arg_1 = 0x24,
.arg_1 = 0x20
}
};

View File

@ -48,7 +48,6 @@
/* We have overlapping register classes for special registers, handled via
* shadows */
#define SHADOW_R27 17
#define SHADOW_R28 18
#define SHADOW_R29 19
@ -155,8 +154,8 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg)
/* Apply shadow registers */
if (phys >= SHADOW_R27 && phys <= SHADOW_R29)
phys += 27 - SHADOW_R27;
if (phys >= SHADOW_R28 && phys <= SHADOW_R29)
phys += 28 - SHADOW_R28;
struct phys_reg r = {
.reg = phys,
@ -213,13 +212,10 @@ create_register_set(unsigned work_count, unsigned *classes)
/* Special register classes have other register counts */
unsigned count =
(c == REG_CLASS_WORK) ? work_count :
(c == REG_CLASS_LDST27) ? 1 : 2;
(c == REG_CLASS_WORK) ? work_count : 2;
/* We arbitraily pick r17 (RA unused) as the shadow for r27 */
unsigned first_reg =
(c == REG_CLASS_LDST) ? 26 :
(c == REG_CLASS_LDST27) ? SHADOW_R27 :
(c == REG_CLASS_TEXR) ? 28 :
(c == REG_CLASS_TEXW) ? SHADOW_R28 :
0;
@ -256,7 +252,6 @@ create_register_set(unsigned work_count, unsigned *classes)
/* We have duplicate classes */
add_shadow_conflicts(regs, 27, SHADOW_R27);
add_shadow_conflicts(regs, 28, SHADOW_R28);
add_shadow_conflicts(regs, 29, SHADOW_R29);
@ -315,17 +310,8 @@ set_class(unsigned *classes, unsigned node, unsigned class)
if (class == current_class)
return;
if ((current_class == REG_CLASS_LDST27) && (class == REG_CLASS_LDST))
return;
/* If we're changing, we must not have already assigned a special class
*/
bool compat = current_class == REG_CLASS_WORK;
compat |= (current_class == REG_CLASS_LDST) && (class == REG_CLASS_LDST27);
assert(compat);
/* If we're changing, we haven't assigned a special class */
assert(current_class == REG_CLASS_WORK);
classes[node] &= 0x3;
classes[node] |= (class << 2);
@ -355,7 +341,6 @@ check_read_class(unsigned *classes, unsigned tag, unsigned node)
switch (current_class) {
case REG_CLASS_LDST:
case REG_CLASS_LDST27:
return (tag == TAG_LOAD_STORE_4);
case REG_CLASS_TEXR:
return (tag == TAG_TEXTURE_4);
@ -383,7 +368,6 @@ check_write_class(unsigned *classes, unsigned tag, unsigned node)
case REG_CLASS_TEXW:
return (tag == TAG_TEXTURE_4);
case REG_CLASS_LDST:
case REG_CLASS_LDST27:
case REG_CLASS_WORK:
return (tag == TAG_ALU_4) || (tag == TAG_LOAD_STORE_4);
default:
@ -491,22 +475,28 @@ mir_lower_special_reads(compiler_context *ctx)
v_mov(idx, blank_alu_src, i) :
v_mov(i, blank_alu_src, idx);
/* Insert move after each write */
/* Insert move before each read/write, depending on the
* hazard we're trying to account for */
mir_foreach_instr_global_safe(ctx, pre_use) {
if (pre_use->ssa_args.dest != i)
if (pre_use->type != classes[j])
continue;
/* If the hazard is writing, we need to
* specific insert moves for the contentious
* class. If the hazard is reading, we insert
* moves whenever it is written */
if (hazard_write) {
if (pre_use->ssa_args.dest != i)
continue;
} else {
if (!mir_has_arg(pre_use, i))
continue;
}
if (hazard_write && pre_use->type != classes[j])
continue;
midgard_instruction *use = mir_next_op(pre_use);
assert(use);
mir_insert_instruction_before(use, m);
if (hazard_write) {
midgard_instruction *use = mir_next_op(pre_use);
assert(use);
mir_insert_instruction_before(use, m);
} else {
mir_insert_instruction_before(pre_use, m);
}
}
/* Rewrite to use */
@ -580,13 +570,12 @@ allocate_registers(compiler_context *ctx, bool *spilled)
/* Check if this operation imposes any classes */
if (ins->type == TAG_LOAD_STORE_4) {
bool force_r27 = OP_IS_R27_ONLY(ins->load_store.op);
unsigned class = force_r27 ? REG_CLASS_LDST27 : REG_CLASS_LDST;
bool force_vec4_only = OP_IS_VEC4_ONLY(ins->load_store.op);
set_class(found_class, ins->ssa_args.src0, class);
set_class(found_class, ins->ssa_args.src1, class);
set_class(found_class, ins->ssa_args.src0, REG_CLASS_LDST);
set_class(found_class, ins->ssa_args.src1, REG_CLASS_LDST);
if (force_r27) {
if (force_vec4_only) {
force_vec4(found_class, ins->ssa_args.dest);
force_vec4(found_class, ins->ssa_args.src0);
force_vec4(found_class, ins->ssa_args.src1);
@ -759,6 +748,14 @@ install_registers_instr(
case TAG_LOAD_STORE_4: {
bool fixed = args.src0 >= SSA_FIXED_MINIMUM;
/* Which physical register we read off depends on
* whether we are loading or storing -- think about the
* logical dataflow */
bool encodes_src =
OP_IS_STORE(ins->load_store.op) &&
ins->load_store.op != midgard_op_st_cubemap_coords;
if (OP_IS_STORE_R26(ins->load_store.op) && fixed) {
ins->load_store.reg = SSA_REG_FROM_FIXED(args.src0);
} else if (OP_IS_STORE_VARY(ins->load_store.op)) {
@ -769,14 +766,6 @@ install_registers_instr(
/* TODO: swizzle/mask */
} else {
/* Which physical register we read off depends on
* whether we are loading or storing -- think about the
* logical dataflow */
bool encodes_src =
OP_IS_STORE(ins->load_store.op) &&
ins->load_store.op != midgard_op_st_cubemap_coords;
unsigned r = encodes_src ?
args.src0 : args.dest;
@ -792,6 +781,26 @@ install_registers_instr(
ins->mask, src);
}
/* We also follow up by actual arguments */
int src2 =
encodes_src ? args.src1 : args.src0;
int src3 =
encodes_src ? -1 : args.src1;
if (src2 >= 0) {
struct phys_reg src = index_to_reg(ctx, g, src2);
unsigned component = __builtin_ctz(src.mask);
ins->load_store.arg_1 |= midgard_ldst_reg(src.reg, component);
}
if (src3 >= 0) {
struct phys_reg src = index_to_reg(ctx, g, src3);
unsigned component = __builtin_ctz(src.mask);
ins->load_store.arg_2 |= midgard_ldst_reg(src.reg, component);
}
break;
}