panfrost: Fix uniform_count on Midgard

The compiler ABI specifies push uniforms at a 4-byte granularity (like
Bifrost), but Midgard require a 16-byte granularity. As such if the
number of pushed words is not a multiple of 4, there is a buffer overrun
at shader load time. Ordinarily this is inaccessible so the garbage is
ignored.

However, there was a great deal of confusion around the `uniform_cutoff`
variable. In some cases (such a full glmark2 run on a 64-bit processor),
the push uniforms would be at the end of a BO and the overrun would
cause a page fault.

Remove uniform_cutoff entirely and work with count directly to avoid
faulting, and round the count up to be defensive.

Closes: #4289
Fixes: ed810eb0a0 ("panfrost: Don't truncate uniform_count")
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9109>
This commit is contained in:
Alyssa Rosenzweig 2021-02-17 15:36:13 -05:00 committed by Marge Bot
parent 0fc146e7da
commit c6ed8bf77c
6 changed files with 17 additions and 26 deletions

View File

@ -47,8 +47,10 @@ static inline void
pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
struct MALI_RENDERER_STATE *rsd)
{
assert((info->push.count & 3) == 0);
rsd->properties.uniform_buffer_count = info->ubo_count;
rsd->properties.midgard.uniform_count = info->midgard.uniform_cutoff;
rsd->properties.midgard.uniform_count = info->push.count / 4;
rsd->properties.midgard.shader_has_side_effects = info->writes_global;
rsd->properties.midgard.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;

View File

@ -289,9 +289,6 @@ typedef struct compiler_context {
/* Set of NIR indices that were already emitted as outmods */
BITSET_WORD *already_emitted;
/* The number of uniforms allowable for the fast path */
int uniform_cutoff;
/* Count of instructions emitted from NIR overall, across all blocks */
int instruction_count;

View File

@ -3008,11 +3008,6 @@ midgard_compile_shader_nir(nir_shader *nir,
ctx->blend_src1 = ~0;
ctx->quirks = midgard_get_quirks(inputs->gpu_id);
/* Start off with a safe cutoff, allowing usage of all 16 work
* registers. Later, we'll promote uniform reads to uniform registers
* if we determine it is beneficial to do so */
info->midgard.uniform_cutoff = 8;
/* Initialize at a global (not block) level hash tables */
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);

View File

@ -391,11 +391,11 @@ mir_is_64(midgard_instruction *ins)
static struct lcra_state *
allocate_registers(compiler_context *ctx, bool *spilled)
{
/* The number of vec4 work registers available depends on when the
* uniforms start and the shader stage. By ABI we limit blend shaders
* to 8 registers, should be lower XXX */
int work_count = ctx->inputs->is_blend ? 8 :
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
/* The number of vec4 work registers available depends on the number of
* register-mapped uniforms and the shader stage. By ABI we limit blend
* shaders to 8 registers, should be lower XXX */
unsigned rmu = ctx->info->push.count / 4;
int work_count = ctx->inputs->is_blend ? 8 : 16 - MAX2(rmu - 8, 0);
/* No register allocation to do with no SSA */
@ -959,15 +959,13 @@ mir_spill_register(
static void
mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
{
unsigned old_work_count =
16 - MAX2((ctx->info->midgard.uniform_cutoff - 8), 0);
unsigned uniforms = ctx->info->push.count / 4;
unsigned old_work_count = 16 - MAX2(uniforms - 8, 0);
unsigned work_count = 16 - MAX2((new_cutoff - 8), 0);
unsigned min_demote = SSA_FIXED_REGISTER(old_work_count);
unsigned max_demote = SSA_FIXED_REGISTER(work_count);
ctx->info->midgard.uniform_cutoff = new_cutoff;
mir_foreach_block(ctx, _block) {
midgard_block *block = (midgard_block *) _block;
mir_foreach_instr_in_block(block, ins) {
@ -1002,6 +1000,8 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
}
}
}
ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4);
}
/* Run register allocation in a loop, spilling until we succeed */
@ -1022,13 +1022,12 @@ mir_ra(compiler_context *ctx)
do {
if (spilled) {
signed spill_node = mir_choose_spill_node(ctx, l);
unsigned uniforms = ctx->info->push.count / 4;
/* It's a lot cheaper to demote uniforms to get more
* work registers than to spill to TLS. */
if (l->spill_class == REG_CLASS_WORK &&
ctx->info->midgard.uniform_cutoff > 8) {
mir_demote_uniforms(ctx, MAX2(ctx->info->midgard.uniform_cutoff - 4, 8));
if (l->spill_class == REG_CLASS_WORK && uniforms > 8) {
mir_demote_uniforms(ctx, MAX2(uniforms - 4, 8));
} else if (spill_node == -1) {
fprintf(stderr, "ERROR: Failed to choose spill node\n");
lcra_free(l);

View File

@ -263,7 +263,9 @@ midgard_promote_uniforms(compiler_context *ctx)
unsigned work_count = mir_work_heuristic(ctx, &analysis);
unsigned promoted_count = 24 - work_count;
/* Ensure we are 16 byte aligned to avoid underallocations */
mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);
/* First, figure out special indices a priori so we don't recompute a lot */
BITSET_WORD *special = mir_special_indices(ctx);
@ -287,9 +289,6 @@ midgard_promote_uniforms(compiler_context *ctx)
/* Should've taken into account when pushing */
assert(address < promoted_count);
ctx->info->midgard.uniform_cutoff =
MAX2(ctx->info->midgard.uniform_cutoff, address + 1);
unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
/* We do need the move for safety for a non-SSA dest, or if

View File

@ -145,7 +145,6 @@ struct bifrost_shader_info {
};
struct midgard_shader_info {
unsigned uniform_cutoff;
unsigned first_tag;
};