pan/mdg: Set lower_uniforms_to_ubo

Removes our custom load_uniform implementation and unifies the command
stream side with Bifrost, preparing for additional optimizations.
shader-db results are a wash. It's worth noting some of the increase in
bundles is due to peephole select which is notoriously awkward for
shader-db stats.

total instructions in shared programs: 96611 -> 95613 (-1.03%)
instructions in affected programs: 17562 -> 16564 (-5.68%)
helped: 137
HURT: 13
helped stats (abs) min: 2 max: 27 x̄: 7.83 x̃: 7
helped stats (rel) min: 0.61% max: 20.00% x̄: 7.19% x̃: 5.75%
HURT stats (abs)   min: 1 max: 33 x̄: 5.77 x̃: 2
HURT stats (rel)   min: 0.47% max: 15.64% x̄: 3.56% x̃: 1.72%
95% mean confidence interval for instructions value: -7.78 -5.53
95% mean confidence interval for instructions %-change: -7.13% -5.38%
Instructions are helped.

total bundles in shared programs: 44886 -> 45230 (0.77%)
bundles in affected programs: 6649 -> 6993 (5.17%)
helped: 54
HURT: 68
helped stats (abs) min: 1 max: 6 x̄: 2.35 x̃: 2
helped stats (rel) min: 1.04% max: 6.82% x̄: 4.37% x̃: 4.80%
HURT stats (abs)   min: 1 max: 16 x̄: 6.93 x̃: 6
HURT stats (rel)   min: 2.22% max: 37.50% x̄: 14.03% x̃: 10.00%
95% mean confidence interval for bundles value: 1.78 3.85
95% mean confidence interval for bundles %-change: 3.73% 8.04%
Bundles are HURT.

total quadwords in shared programs: 76320 -> 75533 (-1.03%)
quadwords in affected programs: 12404 -> 11617 (-6.34%)
helped: 133
HURT: 3
helped stats (abs) min: 1 max: 18 x̄: 6.18 x̃: 6
helped stats (rel) min: 0.36% max: 18.18% x̄: 7.34% x̃: 7.45%
HURT stats (abs)   min: 1 max: 19 x̄: 11.67 x̃: 15
HURT stats (rel)   min: 0.55% max: 15.79% x̄: 9.94% x̃: 13.48%
95% mean confidence interval for quadwords value: -6.41 -5.16
95% mean confidence interval for quadwords %-change: -7.58% -6.34%
Quadwords are helped.

total registers in shared programs: 6958 -> 6928 (-0.43%)
registers in affected programs: 524 -> 494 (-5.73%)
helped: 42
HURT: 15
helped stats (abs) min: 1 max: 2 x̄: 1.10 x̃: 1
helped stats (rel) min: 6.25% max: 25.00% x̄: 12.71% x̃: 12.50%
HURT stats (abs)   min: 1 max: 2 x̄: 1.07 x̃: 1
HURT stats (rel)   min: 9.09% max: 20.00% x̄: 11.44% x̃: 11.11%
95% mean confidence interval for registers value: -0.79 -0.26
95% mean confidence interval for registers %-change: -9.35% -3.36%
Registers are helped.

total threads in shared programs: 5109 -> 5107 (-0.04%)
threads in affected programs: 16 -> 14 (-12.50%)
helped: 2
HURT: 6
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00%
HURT stats (abs)   min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel)   min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00%
95% mean confidence interval for threads value: -1.41 0.91
95% mean confidence interval for threads %-change: -70.55% 45.55%
Inconclusive result (value mean confidence interval includes 0).

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

WIP - do peephole ourselves

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
This commit is contained in:
Alyssa Rosenzweig 2021-02-07 16:15:07 -05:00 committed by Marge Bot
parent 2a82e58360
commit a0b9420915
2 changed files with 21 additions and 16 deletions

View File

@ -399,16 +399,12 @@ panfrost_shader_compile(struct panfrost_context *ctx,
state->attribute_count = attribute_count;
state->varying_count = varying_count;
/* off-by-one for uniforms. Not needed on Bifrost since uniforms
* have been lowered to UBOs using nir_lower_uniforms_to_ubo() which
* already increments s->info.num_ubos. We do have to account for the
* "no uniform, no UBO" case though, otherwise sysval passed through
* uniforms won't work correctly.
/* Uniforms have been lowered to UBOs using nir_lower_uniforms_to_ubo()
* which already increments s->info.num_ubos. We do have to account for
* the "no uniform, no UBO" case though, otherwise sysval passed
* through uniforms won't work correctly.
*/
if (pan_is_bifrost(dev))
state->ubo_count = MAX2(s->info.num_ubos, 1);
else
state->ubo_count = s->info.num_ubos + 1;
state->ubo_count = MAX2(s->info.num_ubos, 1);
/* Prepare the descriptors at compile-time */
state->shader.shader = shader;

View File

@ -320,6 +320,15 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early);
/* Peephole select is more effective before lowering uniforms to UBO,
* so do a round of that, and then call lower_uniforms_to_ubo
* explicitly (instead of relying on the state tracker to do it). Note
* the state tracker does run peephole_select before lowering uniforms
* to UBO ordinarily, but it isn't as aggressive as we need. */
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
do {
progress = false;
@ -1653,7 +1662,6 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
@ -1662,7 +1670,6 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_load_input:
case nir_intrinsic_load_kernel_input:
case nir_intrinsic_load_interpolated_input: {
bool is_uniform = instr->intrinsic == nir_intrinsic_load_uniform;
bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
bool is_global = instr->intrinsic == nir_intrinsic_load_global ||
instr->intrinsic == nir_intrinsic_load_global_constant;
@ -1676,7 +1683,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
/* TODO: Infer type? Does it matter? */
nir_alu_type t =
(is_interp) ? nir_type_float :
(is_uniform || is_flat) ? nir_intrinsic_dest_type(instr) :
(is_flat) ? nir_intrinsic_dest_type(instr) :
nir_type_uint;
t = nir_alu_type_get_base_type(t);
@ -1700,9 +1707,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
nir_intrinsic_component(instr) : 0;
reg = nir_dest_index(&instr->dest);
if (is_uniform && !ctx->is_blend) {
emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count + offset) * 16, indirect_offset, 4, 0);
} else if (is_kernel) {
if (is_kernel) {
emit_ubo_read(ctx, &instr->instr, reg, (ctx->sysvals.sysval_count * 16) + offset, indirect_offset, 0, 0);
} else if (is_ubo) {
nir_src index = instr->src[0];
@ -1710,7 +1715,11 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
/* TODO: Is indirect block number possible? */
assert(nir_src_is_const(index));
uint32_t uindex = nir_src_as_uint(index) + 1;
uint32_t uindex = nir_src_as_uint(index);
if (uindex == 0)
offset += ctx->sysvals.sysval_count * 16;
emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
} else if (is_global || is_shared || is_scratch) {
unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);