From a5005c349d0304b81f0aa8fbea35162d3630adec Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 8 Oct 2020 10:58:53 +0200 Subject: [PATCH] panfrost: Get rid of the constant patching done on blend shader binaries When constants are used in the blend equation we simply recompile the shader. Signed-off-by: Boris Brezillon Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_blend.h | 26 ++++++--- src/gallium/drivers/panfrost/pan_blend_cso.c | 39 ++++++------- .../drivers/panfrost/pan_blend_shaders.c | 55 +++++++++++++------ .../drivers/panfrost/pan_blend_shaders.h | 12 ++-- src/gallium/drivers/panfrost/pan_job.c | 3 +- src/panfrost/midgard/compiler.h | 6 +- src/panfrost/midgard/midgard_compile.c | 10 +--- src/panfrost/midgard/midgard_schedule.c | 27 --------- src/panfrost/util/pan_ir.h | 6 +- 9 files changed, 86 insertions(+), 98 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_blend.h b/src/gallium/drivers/panfrost/pan_blend.h index 300bb8a819a..8e30d677d05 100644 --- a/src/gallium/drivers/panfrost/pan_blend.h +++ b/src/gallium/drivers/panfrost/pan_blend.h @@ -29,6 +29,7 @@ #define __PAN_BLEND_H #include "util/hash_table.h" +#include "nir.h" struct panfrost_bo; @@ -37,6 +38,17 @@ struct panfrost_bo; struct panfrost_blend_shader { struct panfrost_context *ctx; + nir_shader *nir; + + /* Render target */ + unsigned rt; + + /* RT format */ + enum pipe_format format; + + /* Blend constants */ + float constants[4]; + /* The compiled shader */ void *buffer; @@ -46,10 +58,6 @@ struct panfrost_blend_shader { /* Number of 128-bit work registers required by the shader */ unsigned work_count; - /* Offset into the shader to patch constants. Zero to disable patching - * (it is illogical to have constants at offset 0). */ - unsigned patch_index; - /* First instruction tag (for tagging the pointer) */ unsigned first_tag; }; @@ -126,10 +134,10 @@ struct panfrost_blend_final panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset); struct panfrost_blend_shader * -panfrost_get_blend_shader( - struct panfrost_context *ctx, - struct panfrost_blend_state *blend, - enum pipe_format fmt, - unsigned rt); +panfrost_get_blend_shader(struct panfrost_context *ctx, + struct panfrost_blend_state *blend, + enum pipe_format fmt, + unsigned rt, + const float *constants); #endif diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 4f5a835a1f4..e1f279d2dfc 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -67,30 +67,30 @@ * tracking paths. If the cache hits, boom, done. */ struct panfrost_blend_shader * -panfrost_get_blend_shader( - struct panfrost_context *ctx, - struct panfrost_blend_state *blend, - enum pipe_format fmt, - unsigned rt) +panfrost_get_blend_shader(struct panfrost_context *ctx, + struct panfrost_blend_state *blend, + enum pipe_format fmt, + unsigned rt, + const float *constants) { /* Prevent NULL collision issues.. */ assert(fmt != 0); /* Check the cache. Key by the RT and format */ struct hash_table_u64 *shaders = blend->rt[rt].shaders; - unsigned key = (fmt << 3) | rt; + unsigned key = (fmt << 4) | ((constants != NULL) << 3) | rt; struct panfrost_blend_shader *shader = _mesa_hash_table_u64_search(shaders, key); - if (shader) - return shader; + if (!shader) { + /* Cache miss. Build one instead, cache it, and go */ + shader = panfrost_create_blend_shader(ctx, blend, fmt, rt); + _mesa_hash_table_u64_insert(shaders, key, shader); + } - /* Cache miss. Build one instead, cache it, and go */ - - shader = panfrost_compile_blend_shader(ctx, blend, fmt, rt); - _mesa_hash_table_u64_insert(shaders, key, shader); - return shader; + panfrost_compile_blend_shader(shader, constants); + return shader; } /* Create a blend CSO. Essentially, try to compile a fixed-function @@ -242,7 +242,10 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc } /* Otherwise, we need to grab a shader */ - struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti); + struct panfrost_blend_shader *shader = + panfrost_get_blend_shader(ctx, blend, fmt, rti, + rt->constant_mask ? + ctx->blend_color.color : NULL); /* Upload the shader, sharing a BO */ if (!(*bo)) { @@ -258,14 +261,6 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc memcpy((*bo)->cpu + *shader_offset, shader->buffer, shader->size); - if (shader->patch_index) { - /* We have to specialize the blend shader to use constants, so - * patch in the current constants */ - - float *patch = (float *) ((*bo)->cpu + *shader_offset + shader->patch_index); - memcpy(patch, ctx->blend_color.color, sizeof(float) * 4); - } - struct panfrost_blend_final final = { .is_shader = true, .shader = { diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c index d8b5c2a69b6..b0c6d2738e2 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -130,15 +130,16 @@ nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi) } struct panfrost_blend_shader * -panfrost_compile_blend_shader(struct panfrost_context *ctx, - struct panfrost_blend_state *state, - enum pipe_format format, - unsigned rt) +panfrost_create_blend_shader(struct panfrost_context *ctx, + struct panfrost_blend_state *state, + enum pipe_format format, + unsigned rt) { - struct panfrost_device *dev = pan_device(ctx->base.screen); - struct panfrost_blend_shader *res = ralloc(state, struct panfrost_blend_shader); + struct panfrost_blend_shader *res = rzalloc(state, struct panfrost_blend_shader); res->ctx = ctx; + res->rt = rt; + res->format = format; /* Build the shader */ @@ -209,27 +210,45 @@ panfrost_compile_blend_shader(struct panfrost_context *ctx, NIR_PASS_V(shader, nir_lower_blend, options); - /* Compile the built shader */ + res->nir = shader; + return res; +} +void +panfrost_compile_blend_shader(struct panfrost_blend_shader *shader, + const float *constants) +{ + struct panfrost_device *dev = pan_device(shader->ctx->base.screen); + + /* If the shader has already been compiled and the constants match + * or the shader doesn't use the blend constants, we can keep the + * compiled version. + */ + if (shader->buffer && + (!constants || + !memcmp(shader->constants, constants, sizeof(shader->constants)))) + return; + + /* Compile or recompile the NIR shader */ panfrost_program program; struct panfrost_compile_inputs inputs = { .gpu_id = dev->gpu_id, .is_blend = true, - .blend.rt = rt, - .rt_formats = {format}, + .blend.rt = shader->rt, + .rt_formats = {shader->format}, }; - midgard_compile_shader_nir(shader, &program, &inputs); + if (constants) + memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants)); + + midgard_compile_shader_nir(shader->nir, &program, &inputs); /* Allow us to patch later */ - res->patch_index = program.blend_patch_offset; - res->first_tag = program.first_tag; - res->size = program.compiled.size; - res->buffer = ralloc_size(res, res->size); - memcpy(res->buffer, program.compiled.data, res->size); + shader->first_tag = program.first_tag; + shader->size = program.compiled.size; + shader->buffer = reralloc_size(shader, shader->buffer, shader->size); + memcpy(shader->buffer, program.compiled.data, shader->size); util_dynarray_fini(&program.compiled); - res->work_count = program.work_register_count; - - return res; + shader->work_count = program.work_register_count; } diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.h b/src/gallium/drivers/panfrost/pan_blend_shaders.h index 1fcae664fdd..92004896898 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.h +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.h @@ -32,9 +32,13 @@ #include "pan_blend.h" struct panfrost_blend_shader * -panfrost_compile_blend_shader(struct panfrost_context *ctx, - struct panfrost_blend_state *state, - enum pipe_format format, - unsigned rt); +panfrost_create_blend_shader(struct panfrost_context *ctx, + struct panfrost_blend_state *state, + enum pipe_format format, + unsigned rt); + +void +panfrost_compile_blend_shader(struct panfrost_blend_shader *shader, + const float *constants); #endif diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index d07b358f2eb..d91f31a9855 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -855,7 +855,8 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u if (loc >= FRAG_RESULT_DATA0 && !panfrost_can_fixed_blend(rsrc->base.format)) { struct panfrost_blend_shader *b = panfrost_get_blend_shader(batch->ctx, batch->ctx->blit_blend, - rsrc->base.format, loc - FRAG_RESULT_DATA0); + rsrc->base.format, loc - FRAG_RESULT_DATA0, + NULL); struct panfrost_bo *bo = panfrost_batch_create_bo(batch, b->size, PAN_BO_EXECUTE, diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index b5a37037130..5a93eb325bd 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -140,7 +140,6 @@ typedef struct midgard_instruction { bool has_constants; midgard_constants constants; uint16_t inline_constant; - bool has_blend_constant; bool has_inline_constant; bool compact_branch; @@ -221,7 +220,6 @@ typedef struct midgard_bundle { int control; bool has_embedded_constants; midgard_constants constants; - bool has_blend_constant; bool last_writeout; } midgard_bundle; @@ -254,8 +252,8 @@ typedef struct compiler_context { /* Index to precolour to r2 for a dual-source blend colour */ unsigned blend_src1; - /* Tracking for blend constant patching */ - int blend_constant_offset; + /* Blend constants */ + float blend_constants[4]; /* Number of bytes used for Thread Local Storage */ unsigned tls_size; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index d2c08471bc4..7f67fb35537 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1864,12 +1864,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) assert(ctx->is_blend); reg = nir_dest_index(&instr->dest); - /* Blend constants are embedded directly in the shader and - * patched in, so we use some magic routing */ - midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), reg); ins.has_constants = true; - ins.has_blend_constant = true; + memcpy(ins.constants.f32, ctx->blend_constants, sizeof(ctx->blend_constants)); emit_mir_instruction(ctx, ins); break; } @@ -2543,9 +2540,6 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block) if (!ins->has_constants) continue; if (ins->has_inline_constant) continue; - /* Blend constants must not be inlined by definition */ - if (ins->has_blend_constant) continue; - unsigned max_bitsize = max_bitsize_for_alu(ins); /* We can inline 32-bit (sometimes) or 16-bit (usually) */ @@ -2963,6 +2957,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, ctx->stage = nir->info.stage; ctx->is_blend = inputs->is_blend; ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt; + memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants)); ctx->blend_input = ~0; ctx->blend_src1 = ~0; ctx->quirks = midgard_get_quirks(inputs->gpu_id); @@ -3141,7 +3136,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, program->work_register_count = ctx->work_registers + 1; program->uniform_cutoff = ctx->uniform_cutoff; - program->blend_patch_offset = ctx->blend_constant_offset; program->tls_size = ctx->tls_size; if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) { diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 63b1d781298..a5b57a3ab72 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -328,7 +328,6 @@ struct midgard_predicate { midgard_constants *constants; unsigned constant_mask; - bool blend_constant; /* Exclude this destination (if not ~0) */ unsigned exclude; @@ -437,17 +436,6 @@ mir_adjust_constants(midgard_instruction *ins, struct midgard_predicate *pred, bool destructive) { - /* Blend constants dominate */ - if (ins->has_blend_constant) { - if (pred->constant_mask) - return false; - else if (destructive) { - pred->blend_constant = true; - pred->constant_mask = 0xffff; - return true; - } - } - /* No constant, nothing to adjust */ if (!ins->has_constants) return true; @@ -1296,7 +1284,6 @@ mir_schedule_alu( mir_update_worklist(worklist, len, instructions, vmul); mir_update_worklist(worklist, len, instructions, sadd); - bundle.has_blend_constant = predicate.blend_constant; bundle.has_embedded_constants = predicate.constant_mask != 0; unsigned padding = 0; @@ -1375,7 +1362,6 @@ schedule_block(compiler_context *ctx, midgard_block *block) util_dynarray_init(&bundles, NULL); block->quadword_count = 0; - unsigned blend_offset = 0; for (;;) { unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len); @@ -1391,10 +1377,6 @@ schedule_block(compiler_context *ctx, midgard_block *block) break; util_dynarray_append(&bundles, midgard_bundle, bundle); - - if (bundle.has_blend_constant) - blend_offset = block->quadword_count; - block->quadword_count += midgard_tag_props[bundle.tag].size; } @@ -1406,15 +1388,6 @@ schedule_block(compiler_context *ctx, midgard_block *block) } util_dynarray_fini(&bundles); - /* Blend constant was backwards as well. blend_offset if set is - * strictly positive, as an offset of zero would imply constants before - * any instructions which is invalid in Midgard. TODO: blend constants - * are broken if you spill since then quadword_count becomes invalid - * XXX */ - - if (blend_offset) - ctx->blend_constant_offset = ((ctx->quadword_count + block->quadword_count) - blend_offset - 1) * 0x10; - block->scheduled = true; ctx->quadword_count += block->quadword_count; diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 955e94f255c..f916d40e800 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -98,11 +98,6 @@ typedef struct { struct util_dynarray compiled; - /* For a blend shader using a constant color -- patch point. If - * negative, there's no constant. */ - - int blend_patch_offset; - /* The number of bytes to allocate per-thread for Thread Local Storage * (register spilling), or zero if no spilling is used */ unsigned tls_size; @@ -114,6 +109,7 @@ struct panfrost_compile_inputs { bool is_blend; struct { unsigned rt; + float constants[4]; } blend; bool shaderdb;