freedreno/a6xx: KHR_blend_equation_advanced support
Wire up support to sample from the fb (and force GMEM rendering when we have fb reads). The existing GLSL IR lowering for blend_equation_advanced does the rest. Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
parent
650246523b
commit
6fa8a6d60f
|
@ -340,11 +340,49 @@ emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
|||
u_upload_unmap(fd6_ctx->border_color_uploader);
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx)
|
||||
{
|
||||
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
||||
struct pipe_surface *psurf = pfb->cbufs[0];
|
||||
struct fd_resource *rsc = fd_resource(psurf->texture);
|
||||
|
||||
uint32_t texconst0 = fd6_tex_const_0(psurf->texture, psurf->u.tex.level,
|
||||
psurf->format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
|
||||
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
|
||||
|
||||
/* always TILE6_2 mode in GMEM.. which also means no swap: */
|
||||
texconst0 &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
|
||||
texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
|
||||
OUT_RING(state, texconst0);
|
||||
OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) |
|
||||
A6XX_TEX_CONST_1_HEIGHT(pfb->height));
|
||||
OUT_RINGP(state, A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_FETCHSIZE(TFETCH6_2_BYTE),
|
||||
&ctx->batch->fb_read_patches);
|
||||
OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size));
|
||||
|
||||
OUT_RING(state, A6XX_TEX_CONST_4_BASE_LO(ctx->screen->gmem_base));
|
||||
OUT_RING(state, A6XX_TEX_CONST_5_BASE_HI(ctx->screen->gmem_base >> 32) |
|
||||
A6XX_TEX_CONST_5_DEPTH(1));
|
||||
OUT_RING(state, 0); /* texconst6 */
|
||||
OUT_RING(state, 0); /* texconst7 */
|
||||
OUT_RING(state, 0); /* texconst8 */
|
||||
OUT_RING(state, 0); /* texconst9 */
|
||||
OUT_RING(state, 0); /* texconst10 */
|
||||
OUT_RING(state, 0); /* texconst11 */
|
||||
OUT_RING(state, 0);
|
||||
OUT_RING(state, 0);
|
||||
OUT_RING(state, 0);
|
||||
OUT_RING(state, 0);
|
||||
}
|
||||
|
||||
bool
|
||||
fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
|
||||
enum pipe_shader_type type, struct fd_texture_stateobj *tex,
|
||||
unsigned bcolor_offset,
|
||||
/* can be NULL if no image/SSBO state to merge in: */
|
||||
/* can be NULL if no image/SSBO/fb state to merge in: */
|
||||
const struct ir3_shader_variant *v, struct fd_context *ctx)
|
||||
{
|
||||
bool needs_border = false;
|
||||
|
@ -412,6 +450,9 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
|
|||
if (v) {
|
||||
num_merged_textures += v->image_mapping.num_tex;
|
||||
|
||||
if (v->fb_read)
|
||||
num_merged_textures++;
|
||||
|
||||
/* There could be more bound textures than what the shader uses.
|
||||
* Which isn't known at shader compile time. So in the case we
|
||||
* are merging tex state, only emit the textures that the shader
|
||||
|
@ -479,6 +520,10 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
|
|||
fd6_emit_image_tex(state, &img->si[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
if (v->fb_read) {
|
||||
fd6_emit_fb_tex(state, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
/* emit texture state: */
|
||||
|
@ -528,10 +573,20 @@ fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
|
|||
|
||||
debug_assert(state_id[type]);
|
||||
|
||||
if (!v->image_mapping.num_tex) {
|
||||
if (!v->image_mapping.num_tex && !v->fb_read) {
|
||||
/* in the fast-path, when we don't have to mix in any image/SSBO
|
||||
* related texture state, we can just lookup the stateobj and
|
||||
* re-emit that:
|
||||
*
|
||||
* Also, framebuffer-read is a slow-path because an extra
|
||||
* texture needs to be inserted.
|
||||
*
|
||||
* TODO we can probably simmplify things if we also treated
|
||||
* border_color as a slow-path.. this way the tex state key
|
||||
* wouldn't depend on bcolor_offset.. but fb_read might rather
|
||||
* be *somehow* a fast-path if we eventually used it for PLS.
|
||||
* I suppose there would be no harm in just *always* inserting
|
||||
* an fb_read texture?
|
||||
*/
|
||||
if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&
|
||||
ctx->tex[type].num_textures > 0) {
|
||||
|
@ -546,9 +601,10 @@ fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,
|
|||
/* In the slow-path, create a one-shot texture state object
|
||||
* if either TEX|PROG|SSBO|IMAGE state is dirty:
|
||||
*/
|
||||
if (ctx->dirty_shader[type] &
|
||||
if ((ctx->dirty_shader[type] &
|
||||
(FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |
|
||||
FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {
|
||||
FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) ||
|
||||
v->fb_read) {
|
||||
struct fd_texture_stateobj *tex = &ctx->tex[type];
|
||||
struct fd_ringbuffer *stateobj =
|
||||
fd_submit_new_ringbuffer(ctx->batch->submit,
|
||||
|
@ -738,6 +794,13 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
|
||||
emit_marker6(ring, 5);
|
||||
|
||||
/* NOTE: we track fb_read differently than _BLEND_ENABLED since
|
||||
* we might at some point decide to do sysmem in some cases when
|
||||
* blend is enabled:
|
||||
*/
|
||||
if (fp->fb_read)
|
||||
ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
|
||||
|
||||
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) {
|
||||
struct fd_ringbuffer *state;
|
||||
|
||||
|
|
|
@ -264,6 +264,18 @@ use_hw_binning(struct fd_batch *batch)
|
|||
(batch->num_draws > 0);
|
||||
}
|
||||
|
||||
static void
|
||||
patch_fb_read(struct fd_batch *batch)
|
||||
{
|
||||
struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
|
||||
|
||||
for (unsigned i = 0; i < fd_patch_num_elements(&batch->fb_read_patches); i++) {
|
||||
struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
|
||||
*patch->cs = patch->val | A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]);
|
||||
}
|
||||
util_dynarray_resize(&batch->fb_read_patches, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
|
||||
{
|
||||
|
@ -518,6 +530,7 @@ fd6_emit_tile_init(struct fd_batch *batch)
|
|||
emit_zs(ring, pfb->zsbuf, &ctx->gmem);
|
||||
emit_mrt(ring, pfb, &ctx->gmem);
|
||||
emit_msaa(ring, pfb->samples);
|
||||
patch_fb_read(batch);
|
||||
|
||||
if (use_hw_binning(batch)) {
|
||||
set_bin_size(ring, gmem->bin_w, gmem->bin_h,
|
||||
|
|
|
@ -596,7 +596,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state,
|
|||
|
||||
OUT_RING(ring,
|
||||
CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
|
||||
COND(sample_shading, A6XX_RB_RENDER_CONTROL1_UNK4 | A6XX_RB_RENDER_CONTROL1_UNK5) |
|
||||
CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
|
||||
CONDREG(ij_size_regid, A6XX_RB_RENDER_CONTROL1_SIZE) |
|
||||
COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
|
||||
|
|
|
@ -89,6 +89,7 @@ batch_init(struct fd_batch *batch)
|
|||
fd_reset_wfi(batch);
|
||||
|
||||
util_dynarray_init(&batch->draw_patches, NULL);
|
||||
util_dynarray_init(&batch->fb_read_patches, NULL);
|
||||
|
||||
if (is_a2xx(ctx->screen)) {
|
||||
util_dynarray_init(&batch->shader_patches, NULL);
|
||||
|
@ -168,6 +169,7 @@ batch_fini(struct fd_batch *batch)
|
|||
fd_submit_del(batch->submit);
|
||||
|
||||
util_dynarray_fini(&batch->draw_patches);
|
||||
util_dynarray_fini(&batch->fb_read_patches);
|
||||
|
||||
if (is_a2xx(batch->ctx->screen)) {
|
||||
util_dynarray_fini(&batch->shader_patches);
|
||||
|
|
|
@ -122,6 +122,7 @@ struct fd_batch {
|
|||
|
||||
FD_GMEM_BLEND_ENABLED = 0x10,
|
||||
FD_GMEM_LOGICOP_ENABLED = 0x20,
|
||||
FD_GMEM_FB_READ = 0x40,
|
||||
} gmem_reason;
|
||||
unsigned num_draws; /* number of draws in current batch */
|
||||
unsigned num_vertices; /* number of vertices in current batch */
|
||||
|
@ -137,6 +138,9 @@ struct fd_batch {
|
|||
*/
|
||||
struct util_dynarray draw_patches;
|
||||
|
||||
/* texture state that needs patching for fb_read: */
|
||||
struct util_dynarray fb_read_patches;
|
||||
|
||||
/* Keep track of writes to RB_RENDER_CONTROL which need to be patched
|
||||
* once we know whether or not to use GMEM, and GMEM tile pitch.
|
||||
*
|
||||
|
|
|
@ -307,6 +307,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_TGSI_FS_FBFETCH:
|
||||
if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&
|
||||
is_a6xx(screen))
|
||||
return 1;
|
||||
return 0;
|
||||
case PIPE_CAP_SAMPLE_SHADING:
|
||||
if (is_a6xx(screen)) return 1;
|
||||
return 0;
|
||||
|
@ -784,6 +789,10 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
|||
}
|
||||
screen->gmemsize_bytes = val;
|
||||
|
||||
if (fd_device_version(dev) >= FD_VERSION_GMEM_BASE) {
|
||||
fd_pipe_get_param(screen->pipe, FD_GMEM_BASE, &screen->gmem_base);
|
||||
}
|
||||
|
||||
if (fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val)) {
|
||||
DBG("could not get device-id");
|
||||
goto fail;
|
||||
|
|
|
@ -59,6 +59,7 @@ struct fd_screen {
|
|||
|
||||
struct slab_parent_pool transfer_pool;
|
||||
|
||||
uint64_t gmem_base;
|
||||
uint32_t gmemsize_bytes;
|
||||
uint32_t device_id;
|
||||
uint32_t gpu_id; /* 220, 305, etc */
|
||||
|
|
Loading…
Reference in New Issue