panfrost: Get rid of the constant patching done on blend shader binaries
When constants are used in the blend equation we simply recompile the shader. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7066>
This commit is contained in:
parent
c9739941ba
commit
a5005c349d
|
@ -29,6 +29,7 @@
|
|||
#define __PAN_BLEND_H
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "nir.h"
|
||||
|
||||
struct panfrost_bo;
|
||||
|
||||
|
@ -37,6 +38,17 @@ struct panfrost_bo;
|
|||
struct panfrost_blend_shader {
|
||||
struct panfrost_context *ctx;
|
||||
|
||||
nir_shader *nir;
|
||||
|
||||
/* Render target */
|
||||
unsigned rt;
|
||||
|
||||
/* RT format */
|
||||
enum pipe_format format;
|
||||
|
||||
/* Blend constants */
|
||||
float constants[4];
|
||||
|
||||
/* The compiled shader */
|
||||
void *buffer;
|
||||
|
||||
|
@ -46,10 +58,6 @@ struct panfrost_blend_shader {
|
|||
/* Number of 128-bit work registers required by the shader */
|
||||
unsigned work_count;
|
||||
|
||||
/* Offset into the shader to patch constants. Zero to disable patching
|
||||
* (it is illogical to have constants at offset 0). */
|
||||
unsigned patch_index;
|
||||
|
||||
/* First instruction tag (for tagging the pointer) */
|
||||
unsigned first_tag;
|
||||
};
|
||||
|
@ -126,10 +134,10 @@ struct panfrost_blend_final
|
|||
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
|
||||
|
||||
struct panfrost_blend_shader *
|
||||
panfrost_get_blend_shader(
|
||||
struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *blend,
|
||||
enum pipe_format fmt,
|
||||
unsigned rt);
|
||||
panfrost_get_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *blend,
|
||||
enum pipe_format fmt,
|
||||
unsigned rt,
|
||||
const float *constants);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -67,30 +67,30 @@
|
|||
* tracking paths. If the cache hits, boom, done. */
|
||||
|
||||
struct panfrost_blend_shader *
|
||||
panfrost_get_blend_shader(
|
||||
struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *blend,
|
||||
enum pipe_format fmt,
|
||||
unsigned rt)
|
||||
panfrost_get_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *blend,
|
||||
enum pipe_format fmt,
|
||||
unsigned rt,
|
||||
const float *constants)
|
||||
{
|
||||
/* Prevent NULL collision issues.. */
|
||||
assert(fmt != 0);
|
||||
|
||||
/* Check the cache. Key by the RT and format */
|
||||
struct hash_table_u64 *shaders = blend->rt[rt].shaders;
|
||||
unsigned key = (fmt << 3) | rt;
|
||||
unsigned key = (fmt << 4) | ((constants != NULL) << 3) | rt;
|
||||
|
||||
struct panfrost_blend_shader *shader =
|
||||
_mesa_hash_table_u64_search(shaders, key);
|
||||
|
||||
if (shader)
|
||||
return shader;
|
||||
if (!shader) {
|
||||
/* Cache miss. Build one instead, cache it, and go */
|
||||
shader = panfrost_create_blend_shader(ctx, blend, fmt, rt);
|
||||
_mesa_hash_table_u64_insert(shaders, key, shader);
|
||||
}
|
||||
|
||||
/* Cache miss. Build one instead, cache it, and go */
|
||||
|
||||
shader = panfrost_compile_blend_shader(ctx, blend, fmt, rt);
|
||||
_mesa_hash_table_u64_insert(shaders, key, shader);
|
||||
return shader;
|
||||
panfrost_compile_blend_shader(shader, constants);
|
||||
return shader;
|
||||
}
|
||||
|
||||
/* Create a blend CSO. Essentially, try to compile a fixed-function
|
||||
|
@ -242,7 +242,10 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
|
|||
}
|
||||
|
||||
/* Otherwise, we need to grab a shader */
|
||||
struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
|
||||
struct panfrost_blend_shader *shader =
|
||||
panfrost_get_blend_shader(ctx, blend, fmt, rti,
|
||||
rt->constant_mask ?
|
||||
ctx->blend_color.color : NULL);
|
||||
|
||||
/* Upload the shader, sharing a BO */
|
||||
if (!(*bo)) {
|
||||
|
@ -258,14 +261,6 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
|
|||
|
||||
memcpy((*bo)->cpu + *shader_offset, shader->buffer, shader->size);
|
||||
|
||||
if (shader->patch_index) {
|
||||
/* We have to specialize the blend shader to use constants, so
|
||||
* patch in the current constants */
|
||||
|
||||
float *patch = (float *) ((*bo)->cpu + *shader_offset + shader->patch_index);
|
||||
memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
|
||||
}
|
||||
|
||||
struct panfrost_blend_final final = {
|
||||
.is_shader = true,
|
||||
.shader = {
|
||||
|
|
|
@ -130,15 +130,16 @@ nir_iclamp(nir_builder *b, nir_ssa_def *v, int32_t lo, int32_t hi)
|
|||
}
|
||||
|
||||
struct panfrost_blend_shader *
|
||||
panfrost_compile_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *state,
|
||||
enum pipe_format format,
|
||||
unsigned rt)
|
||||
panfrost_create_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *state,
|
||||
enum pipe_format format,
|
||||
unsigned rt)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct panfrost_blend_shader *res = ralloc(state, struct panfrost_blend_shader);
|
||||
struct panfrost_blend_shader *res = rzalloc(state, struct panfrost_blend_shader);
|
||||
|
||||
res->ctx = ctx;
|
||||
res->rt = rt;
|
||||
res->format = format;
|
||||
|
||||
/* Build the shader */
|
||||
|
||||
|
@ -209,27 +210,45 @@ panfrost_compile_blend_shader(struct panfrost_context *ctx,
|
|||
|
||||
NIR_PASS_V(shader, nir_lower_blend, options);
|
||||
|
||||
/* Compile the built shader */
|
||||
res->nir = shader;
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
|
||||
const float *constants)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(shader->ctx->base.screen);
|
||||
|
||||
/* If the shader has already been compiled and the constants match
|
||||
* or the shader doesn't use the blend constants, we can keep the
|
||||
* compiled version.
|
||||
*/
|
||||
if (shader->buffer &&
|
||||
(!constants ||
|
||||
!memcmp(shader->constants, constants, sizeof(shader->constants))))
|
||||
return;
|
||||
|
||||
/* Compile or recompile the NIR shader */
|
||||
panfrost_program program;
|
||||
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = dev->gpu_id,
|
||||
.is_blend = true,
|
||||
.blend.rt = rt,
|
||||
.rt_formats = {format},
|
||||
.blend.rt = shader->rt,
|
||||
.rt_formats = {shader->format},
|
||||
};
|
||||
|
||||
midgard_compile_shader_nir(shader, &program, &inputs);
|
||||
if (constants)
|
||||
memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
|
||||
|
||||
midgard_compile_shader_nir(shader->nir, &program, &inputs);
|
||||
|
||||
/* Allow us to patch later */
|
||||
res->patch_index = program.blend_patch_offset;
|
||||
res->first_tag = program.first_tag;
|
||||
res->size = program.compiled.size;
|
||||
res->buffer = ralloc_size(res, res->size);
|
||||
memcpy(res->buffer, program.compiled.data, res->size);
|
||||
shader->first_tag = program.first_tag;
|
||||
shader->size = program.compiled.size;
|
||||
shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
|
||||
memcpy(shader->buffer, program.compiled.data, shader->size);
|
||||
util_dynarray_fini(&program.compiled);
|
||||
res->work_count = program.work_register_count;
|
||||
|
||||
return res;
|
||||
shader->work_count = program.work_register_count;
|
||||
}
|
||||
|
|
|
@ -32,9 +32,13 @@
|
|||
#include "pan_blend.h"
|
||||
|
||||
struct panfrost_blend_shader *
|
||||
panfrost_compile_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *state,
|
||||
enum pipe_format format,
|
||||
unsigned rt);
|
||||
panfrost_create_blend_shader(struct panfrost_context *ctx,
|
||||
struct panfrost_blend_state *state,
|
||||
enum pipe_format format,
|
||||
unsigned rt);
|
||||
|
||||
void
|
||||
panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
|
||||
const float *constants);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -855,7 +855,8 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u
|
|||
if (loc >= FRAG_RESULT_DATA0 && !panfrost_can_fixed_blend(rsrc->base.format)) {
|
||||
struct panfrost_blend_shader *b =
|
||||
panfrost_get_blend_shader(batch->ctx, batch->ctx->blit_blend,
|
||||
rsrc->base.format, loc - FRAG_RESULT_DATA0);
|
||||
rsrc->base.format, loc - FRAG_RESULT_DATA0,
|
||||
NULL);
|
||||
|
||||
struct panfrost_bo *bo = panfrost_batch_create_bo(batch, b->size,
|
||||
PAN_BO_EXECUTE,
|
||||
|
|
|
@ -140,7 +140,6 @@ typedef struct midgard_instruction {
|
|||
bool has_constants;
|
||||
midgard_constants constants;
|
||||
uint16_t inline_constant;
|
||||
bool has_blend_constant;
|
||||
bool has_inline_constant;
|
||||
|
||||
bool compact_branch;
|
||||
|
@ -221,7 +220,6 @@ typedef struct midgard_bundle {
|
|||
int control;
|
||||
bool has_embedded_constants;
|
||||
midgard_constants constants;
|
||||
bool has_blend_constant;
|
||||
bool last_writeout;
|
||||
} midgard_bundle;
|
||||
|
||||
|
@ -254,8 +252,8 @@ typedef struct compiler_context {
|
|||
/* Index to precolour to r2 for a dual-source blend colour */
|
||||
unsigned blend_src1;
|
||||
|
||||
/* Tracking for blend constant patching */
|
||||
int blend_constant_offset;
|
||||
/* Blend constants */
|
||||
float blend_constants[4];
|
||||
|
||||
/* Number of bytes used for Thread Local Storage */
|
||||
unsigned tls_size;
|
||||
|
|
|
@ -1864,12 +1864,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
assert(ctx->is_blend);
|
||||
reg = nir_dest_index(&instr->dest);
|
||||
|
||||
/* Blend constants are embedded directly in the shader and
|
||||
* patched in, so we use some magic routing */
|
||||
|
||||
midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), reg);
|
||||
ins.has_constants = true;
|
||||
ins.has_blend_constant = true;
|
||||
memcpy(ins.constants.f32, ctx->blend_constants, sizeof(ctx->blend_constants));
|
||||
emit_mir_instruction(ctx, ins);
|
||||
break;
|
||||
}
|
||||
|
@ -2543,9 +2540,6 @@ embedded_to_inline_constant(compiler_context *ctx, midgard_block *block)
|
|||
if (!ins->has_constants) continue;
|
||||
if (ins->has_inline_constant) continue;
|
||||
|
||||
/* Blend constants must not be inlined by definition */
|
||||
if (ins->has_blend_constant) continue;
|
||||
|
||||
unsigned max_bitsize = max_bitsize_for_alu(ins);
|
||||
|
||||
/* We can inline 32-bit (sometimes) or 16-bit (usually) */
|
||||
|
@ -2963,6 +2957,7 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program,
|
|||
ctx->stage = nir->info.stage;
|
||||
ctx->is_blend = inputs->is_blend;
|
||||
ctx->blend_rt = MIDGARD_COLOR_RT0 + inputs->blend.rt;
|
||||
memcpy(ctx->blend_constants, inputs->blend.constants, sizeof(ctx->blend_constants));
|
||||
ctx->blend_input = ~0;
|
||||
ctx->blend_src1 = ~0;
|
||||
ctx->quirks = midgard_get_quirks(inputs->gpu_id);
|
||||
|
@ -3141,7 +3136,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program,
|
|||
program->work_register_count = ctx->work_registers + 1;
|
||||
program->uniform_cutoff = ctx->uniform_cutoff;
|
||||
|
||||
program->blend_patch_offset = ctx->blend_constant_offset;
|
||||
program->tls_size = ctx->tls_size;
|
||||
|
||||
if ((midgard_debug & MIDGARD_DBG_SHADERS) && !nir->info.internal) {
|
||||
|
|
|
@ -328,7 +328,6 @@ struct midgard_predicate {
|
|||
|
||||
midgard_constants *constants;
|
||||
unsigned constant_mask;
|
||||
bool blend_constant;
|
||||
|
||||
/* Exclude this destination (if not ~0) */
|
||||
unsigned exclude;
|
||||
|
@ -437,17 +436,6 @@ mir_adjust_constants(midgard_instruction *ins,
|
|||
struct midgard_predicate *pred,
|
||||
bool destructive)
|
||||
{
|
||||
/* Blend constants dominate */
|
||||
if (ins->has_blend_constant) {
|
||||
if (pred->constant_mask)
|
||||
return false;
|
||||
else if (destructive) {
|
||||
pred->blend_constant = true;
|
||||
pred->constant_mask = 0xffff;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* No constant, nothing to adjust */
|
||||
if (!ins->has_constants)
|
||||
return true;
|
||||
|
@ -1296,7 +1284,6 @@ mir_schedule_alu(
|
|||
mir_update_worklist(worklist, len, instructions, vmul);
|
||||
mir_update_worklist(worklist, len, instructions, sadd);
|
||||
|
||||
bundle.has_blend_constant = predicate.blend_constant;
|
||||
bundle.has_embedded_constants = predicate.constant_mask != 0;
|
||||
|
||||
unsigned padding = 0;
|
||||
|
@ -1375,7 +1362,6 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
|||
util_dynarray_init(&bundles, NULL);
|
||||
|
||||
block->quadword_count = 0;
|
||||
unsigned blend_offset = 0;
|
||||
|
||||
for (;;) {
|
||||
unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len);
|
||||
|
@ -1391,10 +1377,6 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
|||
break;
|
||||
|
||||
util_dynarray_append(&bundles, midgard_bundle, bundle);
|
||||
|
||||
if (bundle.has_blend_constant)
|
||||
blend_offset = block->quadword_count;
|
||||
|
||||
block->quadword_count += midgard_tag_props[bundle.tag].size;
|
||||
}
|
||||
|
||||
|
@ -1406,15 +1388,6 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
|||
}
|
||||
util_dynarray_fini(&bundles);
|
||||
|
||||
/* Blend constant was backwards as well. blend_offset if set is
|
||||
* strictly positive, as an offset of zero would imply constants before
|
||||
* any instructions which is invalid in Midgard. TODO: blend constants
|
||||
* are broken if you spill since then quadword_count becomes invalid
|
||||
* XXX */
|
||||
|
||||
if (blend_offset)
|
||||
ctx->blend_constant_offset = ((ctx->quadword_count + block->quadword_count) - blend_offset - 1) * 0x10;
|
||||
|
||||
block->scheduled = true;
|
||||
ctx->quadword_count += block->quadword_count;
|
||||
|
||||
|
|
|
@ -98,11 +98,6 @@ typedef struct {
|
|||
|
||||
struct util_dynarray compiled;
|
||||
|
||||
/* For a blend shader using a constant color -- patch point. If
|
||||
* negative, there's no constant. */
|
||||
|
||||
int blend_patch_offset;
|
||||
|
||||
/* The number of bytes to allocate per-thread for Thread Local Storage
|
||||
* (register spilling), or zero if no spilling is used */
|
||||
unsigned tls_size;
|
||||
|
@ -114,6 +109,7 @@ struct panfrost_compile_inputs {
|
|||
bool is_blend;
|
||||
struct {
|
||||
unsigned rt;
|
||||
float constants[4];
|
||||
} blend;
|
||||
bool shaderdb;
|
||||
|
||||
|
|
Loading…
Reference in New Issue