From ae78489d3eb2d3916555e593e387103c06cecce6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 11 Sep 2018 15:59:22 -0400 Subject: [PATCH] freedreno/a6xx: hwbinning Signed-off-by: Rob Clark --- .../drivers/freedreno/a6xx/fd6_context.c | 10 +- .../drivers/freedreno/a6xx/fd6_context.h | 16 +- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 3 +- src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 211 ++++++++++-------- .../drivers/freedreno/freedreno_context.h | 2 +- .../drivers/freedreno/freedreno_gmem.c | 15 +- .../drivers/freedreno/freedreno_gmem.h | 1 + .../drivers/freedreno/freedreno_screen.c | 6 +- 8 files changed, 159 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 695a4c49bad..02b7f1874fc 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -49,7 +49,8 @@ fd6_context_destroy(struct pipe_context *pctx) fd_bo_del(fd6_ctx->vs_pvt_mem); fd_bo_del(fd6_ctx->fs_pvt_mem); - fd_bo_del(fd6_ctx->vsc_size_mem); + fd_bo_del(fd6_ctx->vsc_data); + fd_bo_del(fd6_ctx->vsc_data2); fd_bo_del(fd6_ctx->blit_mem); fd_context_cleanup_common_vbos(&fd6_ctx->base); @@ -104,7 +105,12 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) fd6_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM); - fd6_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + fd6_ctx->vsc_data = fd_bo_new(screen->dev, + (A6XX_VSC_DATA_PITCH * 32) + 0x100, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd6_ctx->vsc_data2 = fd_bo_new(screen->dev, + A6XX_VSC_DATA2_PITCH * 32, DRM_FREEDRENO_GEM_TYPE_KMEM); fd6_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 26861809355..9676a2ed955 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -50,10 +50,20 @@ struct fd6_context { struct fd_bo *vs_pvt_mem, *fs_pvt_mem; - /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We - * could combine it with another allocation. + /* Two buffers related to hw binning / visibility stream (VSC). + * Compared to previous generations + * (1) we cannot specify individual buffers per VSC, instead + * just a pitch and base address + * (2) there is a second smaller buffer, for something.. we + * also stash VSC_BIN_SIZE at end of 2nd buffer. */ - struct fd_bo *vsc_size_mem; + struct fd_bo *vsc_data, *vsc_data2; + +// TODO annoyingly large sizes to prevent hangs with larger amounts +// of geometry, like aquarium with max # of fish. Need to figure +// out how to calculate the required size. +#define A6XX_VSC_DATA_PITCH 0x4400 +#define A6XX_VSC_DATA2_PITCH 0x10400 /* TODO not sure what this is for.. probably similar to * CACHE_FLUSH_TS on kernel side, where value gets written diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index ba8b52810d5..69b686aa6b0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -248,7 +248,8 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, /* figure out whether we need to disable LRZ write for binning * pass using draw pass's fp: */ - emit.no_lrz_write = fp->writes_pos || fp->has_kill; + // TODO disable until lrz is wired up: + emit.no_lrz_write = true; // fp->writes_pos || fp->has_kill; emit.key.binning_pass = false; emit.dirty = dirty; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 4c40d374d02..1045605f1ac 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -214,7 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, static bool use_hw_binning(struct fd_batch *batch) { - return false; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + // TODO figure out hw limits for binning + + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) && + (batch->num_draws > 0); } static void @@ -244,28 +249,44 @@ patch_gmem_bases(struct fd_batch *batch) util_dynarray_resize(&batch->gmem_patches, 0); } +static void +update_render_cntl(struct fd_batch *batch, bool binning) +{ + struct fd_ringbuffer *ring = batch->gmem; + uint32_t cntl = 0; + + cntl |= A6XX_RB_RENDER_CNTL_UNK4; + if (binning) + cntl |= A6XX_RB_RENDER_CNTL_BINNING; + + OUT_PKT7(ring, CP_REG_WRITE, 3); + OUT_RING(ring, 0x2); + OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL); + OUT_RING(ring, cntl); +} + static void update_vsc_pipe(struct fd_batch *batch) { struct fd_context *ctx = batch->ctx; struct fd6_context *fd6_ctx = fd6_context(ctx); - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = batch->gmem; + unsigned n = gmem->nbins_x * gmem->nbins_y; int i; OUT_PKT4(ring, REG_A6XX_VSC_BIN_SIZE, 3); OUT_RING(ring, A6XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A6XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); - OUT_RELOCW(ring, fd6_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ + OUT_RELOCW(ring, fd6_ctx->vsc_data, + n * A6XX_VSC_DATA_PITCH, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ -#if 0 - OUT_PKT4(ring, REG_A6XX_UNKNOWN_0BC5, 2); - OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */ - OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */ -#endif + OUT_PKT4(ring, REG_A6XX_VSC_BIN_COUNT, 1); + OUT_RING(ring, A6XX_VSC_BIN_COUNT_NX(gmem->nbins_x) | + A6XX_VSC_BIN_COUNT_NY(gmem->nbins_y)); - OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 16); - for (i = 0; i < 16; i++) { + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32); + for (i = 0; i < 32; i++) { struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | @@ -273,25 +294,15 @@ update_vsc_pipe(struct fd_batch *batch) A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); } -#if 0 - OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); - for (i = 0; i < 16; i++) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; - if (!pipe->bo) { - pipe->bo = fd_bo_new(ctx->dev, 0x20000, - DRM_FREEDRENO_GEM_TYPE_KMEM); - } - OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ - } -#endif + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO, 4); + OUT_RELOCW(ring, fd6_ctx->vsc_data2, 0, 0, 0); + OUT_RING(ring, A6XX_VSC_DATA2_PITCH); + OUT_RING(ring, fd_bo_size(fd6_ctx->vsc_data2)); -#if 0 - OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); - for (i = 0; i < 16; i++) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; - OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ - } -#endif + OUT_PKT4(ring, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO, 4); + OUT_RELOCW(ring, fd6_ctx->vsc_data, 0, 0, 0); + OUT_RING(ring, A6XX_VSC_DATA_PITCH); + OUT_RING(ring, fd_bo_size(fd6_ctx->vsc_data)); } static void @@ -310,6 +321,23 @@ set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2, u A6XX_GRAS_RESOLVE_CNTL_2_Y(y2)); } +static void +set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) +{ + OUT_PKT4(ring, REG_A6XX_GRAS_BIN_CONTROL, 1); + OUT_RING(ring, A6XX_GRAS_BIN_CONTROL_BINW(w) | + A6XX_GRAS_BIN_CONTROL_BINH(h) | flag); + + OUT_PKT4(ring, REG_A6XX_RB_BIN_CONTROL, 1); + OUT_RING(ring, A6XX_RB_BIN_CONTROL_BINW(w) | + A6XX_RB_BIN_CONTROL_BINH(h) | flag); + + /* no flag for RB_BIN_CONTROL2... */ + OUT_PKT4(ring, REG_A6XX_RB_BIN_CONTROL2, 1); + OUT_RING(ring, A6XX_RB_BIN_CONTROL2_BINW(w) | + A6XX_RB_BIN_CONTROL2_BINH(h)); +} + static void emit_binning_pass(struct fd_batch *batch) { @@ -322,25 +350,31 @@ emit_binning_pass(struct fd_batch *batch) uint32_t x2 = gmem->minx + gmem->width - 1; uint32_t y2 = gmem->miny + gmem->height - 1; + set_scissor(ring, x1, y1, x2, y2); + emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BINNING) | 0x10); /* | 0x10 ? */ + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BINNING)); emit_marker6(ring, 7); -#if 0 - OUT_PKT4(ring, REG_A6XX_RB_CNTL, 1); - OUT_RING(ring, A6XX_RB_CNTL_WIDTH(gmem->bin_w) | - A6XX_RB_CNTL_HEIGHT(gmem->bin_h)); -#endif + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); - set_scissor(ring, x1, y1, x2, y2); + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x1); + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); + OUT_RING(ring, A6XX_VFD_MODE_CNTL_BINNING_PASS); update_vsc_pipe(batch); -#if 0 - OUT_PKT4(ring, REG_A6XX_VPC_MODE_CNTL, 1); - OUT_RING(ring, A6XX_VPC_MODE_CNTL_BINNING_PASS); -#endif + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1); + OUT_RING(ring, 0x1); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_2C); @@ -349,11 +383,22 @@ emit_binning_pass(struct fd_batch *batch) OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0)); + OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); + OUT_RING(ring, A6XX_SP_TP_WINDOW_OFFSET_X(0) | + A6XX_SP_TP_WINDOW_OFFSET_Y(0)); + /* emit IB to binning drawcmds: */ ctx->emit_ib(ring, batch->binning); fd_reset_wfi(batch); + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_2D); @@ -362,14 +407,7 @@ emit_binning_pass(struct fd_batch *batch) OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ OUT_RING(ring, 0x00000000); - // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??) - fd_wfi(batch, ring); - -#if 0 - OUT_PKT4(ring, REG_A6XX_VPC_MODE_CNTL, 1); - OUT_RING(ring, 0x0); -#endif } static void @@ -392,23 +430,6 @@ disable_msaa(struct fd_ringbuffer *ring) A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); } -static void -set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) -{ - OUT_PKT4(ring, REG_A6XX_GRAS_BIN_CONTROL, 1); - OUT_RING(ring, A6XX_GRAS_BIN_CONTROL_BINW(w) | - A6XX_GRAS_BIN_CONTROL_BINH(h) | flag); - - OUT_PKT4(ring, REG_A6XX_RB_BIN_CONTROL, 1); - OUT_RING(ring, A6XX_RB_BIN_CONTROL_BINW(w) | - A6XX_RB_BIN_CONTROL_BINH(h) | flag); - - /* no flag for X3_BIN_SIZE... */ - OUT_PKT4(ring, REG_A6XX_RB_BIN_CONTROL2, 1); - OUT_RING(ring, A6XX_RB_BIN_CONTROL2_BINW(w) | - A6XX_RB_BIN_CONTROL2_BINH(h)); -} - /* before first tile */ static void fd6_emit_tile_init(struct fd_batch *batch) @@ -428,46 +449,40 @@ fd6_emit_tile_init(struct fd_batch *batch) OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, 0x31); /* vertex cache invalidate? */ -#if 0 - OUT_PKT4(ring, REG_A6XX_GRAS_CL_CNTL, 1); - OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ -#endif - OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); -#if 0 - OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ -#endif - -#if 0 - OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1); - OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ -#endif - /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */ - DBG("emit_mrt"); emit_zs(ring, pfb->zsbuf, &ctx->gmem); emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem); patch_gmem_bases(batch); - set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); - disable_msaa(ring); if (use_hw_binning(batch)) { + set_bin_size(ring, gmem->bin_w, gmem->bin_h, + A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000); + update_render_cntl(batch, true); emit_binning_pass(batch); fd6_emit_lrz_flush(ring); patch_draws(batch, USE_VISIBILITY); + + set_bin_size(ring, gmem->bin_w, gmem->bin_h, + A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000); + + OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); + OUT_RING(ring, 0x0); } else { + set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); patch_draws(batch, IGNORE_VISIBILITY); } + + update_render_cntl(batch, false); } static void @@ -498,9 +513,12 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd_ringbuffer *ring = batch->gmem; + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); + emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); /* | 0x10 ? */ + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); emit_marker6(ring, 7); uint32_t x1 = tile->xoff; @@ -516,26 +534,34 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); if (use_hw_binning(batch)) { + struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; + unsigned n = gmem->nbins_x * gmem->nbins_y; OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x0); - OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_BIN_DATA5, 7); OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | CP_SET_BIN_DATA5_0_VSC_N(tile->n)); - OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ - OUT_RELOC(ring, fd6_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ - (tile->p * 4), 0, 0); + OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_PIPE[p].DATA_ADDRESS */ + (tile->p * A6XX_VSC_DATA_PITCH), 0, 0); + OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_SIZE_ADDRESS + (p * 4) */ + (tile->p * 4) + (n * A6XX_VSC_DATA_PITCH), 0, 0); + OUT_RELOC(ring, fd6_ctx->vsc_data2, + (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0); } else { OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); - } - OUT_PKT7(ring, CP_SET_MODE, 1); - OUT_RING(ring, 0x0); + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + } } static void @@ -719,12 +745,17 @@ fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd_ringbuffer *ring = batch->gmem; + if (use_hw_binning(batch)) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); + } + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE)); /* | 0x10 ? */ + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); emit_marker6(ring, 7); set_blit_scissor(batch); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 58fba99874a..dc8779b661b 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -258,7 +258,7 @@ struct fd_context { * means we'd always have to recalc tiles ever batch) */ struct fd_gmem_stateobj gmem; - struct fd_vsc_pipe vsc_pipe[16]; + struct fd_vsc_pipe vsc_pipe[32]; struct fd_tile tile[512]; /* which state objects need to be re-emit'd: */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 92c719392c8..668730390cc 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -107,17 +107,18 @@ static void calculate_tiles(struct fd_batch *batch) { struct fd_context *ctx = batch->ctx; + struct fd_screen *screen = ctx->screen; struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_scissor_state *scissor = &batch->max_scissor; struct pipe_framebuffer_state *pfb = &batch->framebuffer; - const uint32_t gmem_alignw = ctx->screen->gmem_alignw; - const uint32_t gmem_alignh = ctx->screen->gmem_alignh; - const unsigned npipes = ctx->screen->num_vsc_pipes; - const uint32_t gmem_size = ctx->screen->gmemsize_bytes; + const uint32_t gmem_alignw = screen->gmem_alignw; + const uint32_t gmem_alignh = screen->gmem_alignh; + const unsigned npipes = screen->num_vsc_pipes; + const uint32_t gmem_size = screen->gmemsize_bytes; uint32_t minx, miny, width, height; uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; - uint32_t max_width = bin_width(ctx->screen); + uint32_t max_width = bin_width(screen); uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0}; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; @@ -216,10 +217,10 @@ calculate_tiles(struct fd_batch *batch) #define div_round_up(v, a) (((v) + (a) - 1) / (a)) /* figure out number of tiles per pipe: */ tpp_x = tpp_y = 1; - while (div_round_up(nbins_y, tpp_y) > 8) + while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes) tpp_y += 2; while ((div_round_up(nbins_y, tpp_y) * - div_round_up(nbins_x, tpp_x)) > 8) + div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes) tpp_x += 1; gmem->maxpw = tpp_x; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index 144e9506cfb..47f52307b60 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -35,6 +35,7 @@ /* per-pipe configuration for hw binning: */ struct fd_vsc_pipe { + // TODO a3xx/a4xx/a5xx could probably move to single bo for vsc stream, like a6xx does struct fd_bo *bo; uint8_t x, y, w, h; /* VSC_PIPE[p].CONFIG */ }; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 33f14b8f248..231e0d4c817 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -822,7 +822,11 @@ fd_screen_create(struct fd_device *dev) goto fail; } - if (screen->gpu_id >= 500) { + if (screen->gpu_id >= 600) { + screen->gmem_alignw = 32; + screen->gmem_alignh = 32; + screen->num_vsc_pipes = 32; + } else if (screen->gpu_id >= 500) { screen->gmem_alignw = 64; screen->gmem_alignh = 32; screen->num_vsc_pipes = 16;