panfrost: Pack MRT blend shaders into a single BO

Blend shader size and location in memory is considerably constrained,
probably to facilitate optimizations (my guess is that blend shaders are
run strictly out of i-cache). We need to pack the blend shaders for each
RT of a single framebuffer together. The easiest way to do this is at
draw time which is not terribly efficient but will hold us over for now.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-12-31 21:37:30 -05:00
parent 1b86e0927d
commit d58600c0e0
3 changed files with 38 additions and 22 deletions

View File

@ -55,8 +55,8 @@ struct panfrost_blend_shader {
/* A blend shader descriptor ready for actual use */
struct panfrost_blend_shader_final {
/* The compiled shader in GPU memory, possibly patched */
struct panfrost_bo *bo;
/* GPU address where we're compiled to */
uint64_t gpu;
/* First instruction tag (for tagging the pointer) */
unsigned first_tag;
@ -113,6 +113,6 @@ void
panfrost_blend_context_init(struct pipe_context *pipe);
struct panfrost_blend_final
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt);
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
#endif

View File

@ -227,7 +227,7 @@ panfrost_blend_constant(float *out, float *in, unsigned mask)
/* Create a final blend given the context */
struct panfrost_blend_final
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struct panfrost_bo **bo, unsigned *shader_offset)
{
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
@ -273,23 +273,32 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
final.shader.work_count = shader->work_count;
final.shader.first_tag = shader->first_tag;
/* Upload the shader */
final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
PAN_BO_EXECUTE,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_VERTEX_TILER |
PAN_BO_ACCESS_FRAGMENT);
memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
/* Upload the shader, sharing a BO */
if (!(*bo)) {
*bo = panfrost_batch_create_bo(batch, 4096,
PAN_BO_EXECUTE,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_VERTEX_TILER |
PAN_BO_ACCESS_FRAGMENT);
}
/* Size check */
assert((*shader_offset + shader->size) < 4096);
memcpy((*bo)->cpu + *shader_offset, shader->buffer, shader->size);
final.shader.gpu = (*bo)->gpu + *shader_offset;
if (shader->patch_index) {
/* We have to specialize the blend shader to use constants, so
* patch in the current constants */
float *patch = (float *) (final.shader.bo->cpu + shader->patch_index);
float *patch = (float *) ((*bo)->cpu + *shader_offset + shader->patch_index);
memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
}
*shader_offset += shader->size;
return final;
}

View File

@ -936,9 +936,12 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
unsigned shader_offset = 0;
struct panfrost_bo *shader_bo = NULL;
for (unsigned c = 0; c < rt_count; ++c)
blend[c] = panfrost_get_blend_for_context(ctx, c);
for (unsigned c = 0; c < rt_count; ++c) {
blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo, &shader_offset);
}
/* If there is a blend shader, work registers are shared. XXX: opt */
@ -979,13 +982,17 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
/* Even on MFBD, the shader descriptor gets blend shaders. It's
* *also* copied to the blend_meta appended (by convention),
* but this is the field actually read by the hardware. (Or
* maybe both are read...?) */
* maybe both are read...?). Specify the last RTi with a blend
* shader. */
if (blend[0].is_shader) {
ctx->fragment_shader_core.blend.shader =
blend[0].shader.bo->gpu | blend[0].shader.first_tag;
} else {
ctx->fragment_shader_core.blend.shader = 0;
ctx->fragment_shader_core.blend.shader = 0;
for (signed rt = (rt_count - 1); rt >= 0; --rt) {
if (blend[rt].is_shader) {
ctx->fragment_shader_core.blend.shader =
blend[rt].shader.gpu | blend[rt].shader.first_tag;
break;
}
}
if (screen->quirks & MIDGARD_SFBD) {
@ -1039,7 +1046,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
assert(!(is_srgb && blend[i].is_shader));
if (blend[i].is_shader) {
rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag;
rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
} else {
rts[i].blend.equation = *blend[i].equation.equation;
rts[i].blend.constant = blend[i].equation.constant;