turnip: make tiling config part of framebuffer state
Compute the tiling config at framebuffer creation time. A framebuffer will b be re-used multiple times, so this will avoid having to re-calculate the tiling config every time a command buffer is recorded. The tiling config already couldn't use the render area's x1/y1 because of hw binning, this move makes it so the render area isn't used at all for the tiling config. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5570>
This commit is contained in:
parent
31392f8371
commit
8898ebce1a
|
@ -2321,10 +2321,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const VkRect2D *render_area = &tiling->render_area;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const VkRect2D *render_area = &cmd->state.render_area;
|
||||
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
|
||||
struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
|
||||
struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
|
||||
|
||||
if (!dst->store)
|
||||
|
@ -2377,7 +2377,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
|
||||
A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
|
||||
A6XX_SP_PS_2D_SRC_HI(),
|
||||
A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
|
||||
A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
|
||||
|
||||
/* sync GMEM writes with CACHE. */
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
|
||||
|
|
|
@ -109,177 +109,29 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
|
|||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev,
|
||||
const struct tu_render_pass *pass)
|
||||
{
|
||||
const uint32_t tile_align_w = pass->tile_align_w;
|
||||
const uint32_t max_tile_width = 1024;
|
||||
|
||||
/* note: don't offset the tiling config by render_area.offset,
|
||||
* because binning pass can't deal with it
|
||||
* this means we might end up with more tiles than necessary,
|
||||
* but load/store/etc are still scissored to the render_area
|
||||
*/
|
||||
tiling->tile0.offset = (VkOffset2D) {};
|
||||
|
||||
const uint32_t ra_width =
|
||||
tiling->render_area.extent.width +
|
||||
(tiling->render_area.offset.x - tiling->tile0.offset.x);
|
||||
const uint32_t ra_height =
|
||||
tiling->render_area.extent.height +
|
||||
(tiling->render_area.offset.y - tiling->tile0.offset.y);
|
||||
|
||||
/* start from 1 tile */
|
||||
tiling->tile_count = (VkExtent2D) {
|
||||
.width = 1,
|
||||
.height = 1,
|
||||
};
|
||||
tiling->tile0.extent = (VkExtent2D) {
|
||||
.width = util_align_npot(ra_width, tile_align_w),
|
||||
.height = align(ra_height, TILE_ALIGN_H),
|
||||
};
|
||||
|
||||
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
|
||||
/* start with 2x2 tiles */
|
||||
tiling->tile_count.width = 2;
|
||||
tiling->tile_count.height = 2;
|
||||
tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
|
||||
tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
|
||||
}
|
||||
|
||||
/* do not exceed max tile width */
|
||||
while (tiling->tile0.extent.width > max_tile_width) {
|
||||
tiling->tile_count.width++;
|
||||
tiling->tile0.extent.width =
|
||||
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
}
|
||||
|
||||
/* will force to sysmem, don't bother trying to have a valid tile config
|
||||
* TODO: just skip all GMEM stuff when sysmem is forced?
|
||||
*/
|
||||
if (!pass->gmem_pixels)
|
||||
return;
|
||||
|
||||
/* do not exceed gmem size */
|
||||
while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
|
||||
if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
|
||||
tiling->tile_count.width++;
|
||||
tiling->tile0.extent.width =
|
||||
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
} else {
|
||||
/* if this assert fails then layout is impossible.. */
|
||||
assert(tiling->tile0.extent.height > TILE_ALIGN_H);
|
||||
tiling->tile_count.height++;
|
||||
tiling->tile0.extent.height =
|
||||
align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
|
||||
/* start from 1 tile per pipe */
|
||||
tiling->pipe0 = (VkExtent2D) {
|
||||
.width = 1,
|
||||
.height = 1,
|
||||
};
|
||||
tiling->pipe_count = tiling->tile_count;
|
||||
|
||||
while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
|
||||
if (tiling->pipe0.width < tiling->pipe0.height) {
|
||||
tiling->pipe0.width += 1;
|
||||
tiling->pipe_count.width =
|
||||
DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
|
||||
} else {
|
||||
tiling->pipe0.height += 1;
|
||||
tiling->pipe_count.height =
|
||||
DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
const uint32_t used_pipe_count =
|
||||
tiling->pipe_count.width * tiling->pipe_count.height;
|
||||
const VkExtent2D last_pipe = {
|
||||
.width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
|
||||
.height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
|
||||
};
|
||||
|
||||
assert(used_pipe_count <= max_pipe_count);
|
||||
assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
|
||||
|
||||
for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
|
||||
for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
|
||||
const uint32_t pipe_x = tiling->pipe0.width * x;
|
||||
const uint32_t pipe_y = tiling->pipe0.height * y;
|
||||
const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
|
||||
? last_pipe.width
|
||||
: tiling->pipe0.width;
|
||||
const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
|
||||
? last_pipe.height
|
||||
: tiling->pipe0.height;
|
||||
const uint32_t n = tiling->pipe_count.width * y + x;
|
||||
|
||||
tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
|
||||
tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
|
||||
}
|
||||
}
|
||||
|
||||
memset(tiling->pipe_config + used_pipe_count, 0,
|
||||
sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev,
|
||||
tu_tiling_config_get_tile(const struct tu_framebuffer *fb,
|
||||
uint32_t tx,
|
||||
uint32_t ty,
|
||||
struct tu_tile *tile)
|
||||
uint32_t *pipe,
|
||||
uint32_t *slot)
|
||||
{
|
||||
/* find the pipe and the slot for tile (tx, ty) */
|
||||
const uint32_t px = tx / tiling->pipe0.width;
|
||||
const uint32_t py = ty / tiling->pipe0.height;
|
||||
const uint32_t sx = tx - tiling->pipe0.width * px;
|
||||
const uint32_t sy = ty - tiling->pipe0.height * py;
|
||||
const uint32_t px = tx / fb->pipe0.width;
|
||||
const uint32_t py = ty / fb->pipe0.height;
|
||||
const uint32_t sx = tx - fb->pipe0.width * px;
|
||||
const uint32_t sy = ty - fb->pipe0.height * py;
|
||||
/* last pipe has different width */
|
||||
const uint32_t pipe_width =
|
||||
MIN2(tiling->pipe0.width,
|
||||
tiling->tile_count.width - px * tiling->pipe0.width);
|
||||
MIN2(fb->pipe0.width,
|
||||
fb->tile_count.width - px * fb->pipe0.width);
|
||||
|
||||
assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
|
||||
assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
|
||||
assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
|
||||
assert(tx < fb->tile_count.width && ty < fb->tile_count.height);
|
||||
assert(px < fb->pipe_count.width && py < fb->pipe_count.height);
|
||||
assert(sx < fb->pipe0.width && sy < fb->pipe0.height);
|
||||
|
||||
/* convert to 1D indices */
|
||||
tile->pipe = tiling->pipe_count.width * py + px;
|
||||
tile->slot = pipe_width * sy + sx;
|
||||
|
||||
/* get the blit area for the tile */
|
||||
tile->begin = (VkOffset2D) {
|
||||
.x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
|
||||
.y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
|
||||
};
|
||||
tile->end.x =
|
||||
(tx == tiling->tile_count.width - 1)
|
||||
? tiling->render_area.offset.x + tiling->render_area.extent.width
|
||||
: tile->begin.x + tiling->tile0.extent.width;
|
||||
tile->end.y =
|
||||
(ty == tiling->tile_count.height - 1)
|
||||
? tiling->render_area.offset.y + tiling->render_area.extent.height
|
||||
: tile->begin.y + tiling->tile0.extent.height;
|
||||
*pipe = fb->pipe_count.width * py + px;
|
||||
*slot = pipe_width * sy + sx;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -602,7 +454,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
|
|||
static void
|
||||
tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
|
||||
{
|
||||
const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
|
||||
const VkRect2D *render_area = &cmd->state.render_area;
|
||||
uint32_t x1 = render_area->offset.x;
|
||||
uint32_t y1 = render_area->offset.y;
|
||||
uint32_t x2 = x1 + render_area->extent.width - 1;
|
||||
|
@ -706,7 +558,7 @@ tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
|
|||
static bool
|
||||
use_hw_binning(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
/* XFB commands are emitted for BINNING || SYSMEM, which makes it incompatible
|
||||
* with non-hw binning GMEM rendering. this is required because some of the
|
||||
|
@ -721,7 +573,7 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
|
|||
if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
|
||||
return true;
|
||||
|
||||
return (tiling->tile_count.width * tiling->tile_count.height) > 2;
|
||||
return (fb->tile_count.width * fb->tile_count.height) > 2;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -740,24 +592,29 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
|
|||
if (cmd->has_tess)
|
||||
return true;
|
||||
|
||||
return cmd->state.tiling_config.force_sysmem;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_tile *tile)
|
||||
uint32_t tx, uint32_t ty)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
uint32_t pipe, slot;
|
||||
|
||||
tu_tiling_config_get_tile(fb, tx, ty, &pipe, &slot);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_YIELD));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
|
||||
|
||||
const uint32_t x1 = tile->begin.x;
|
||||
const uint32_t y1 = tile->begin.y;
|
||||
const uint32_t x2 = tile->end.x - 1;
|
||||
const uint32_t y2 = tile->end.y - 1;
|
||||
const uint32_t x1 = fb->tile0.width * tx;
|
||||
const uint32_t y1 = fb->tile0.height * ty;
|
||||
const uint32_t x2 = x1 + fb->tile0.width - 1;
|
||||
const uint32_t y2 = y1 + fb->tile0.height - 1;
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
|
||||
tu6_emit_window_offset(cs, x1, y1);
|
||||
|
||||
|
@ -771,11 +628,11 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, 0x0);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
|
||||
tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
|
||||
CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
|
||||
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + tile->pipe * cmd->vsc_draw_strm_pitch);
|
||||
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + (tile->pipe * 4) + (32 * cmd->vsc_draw_strm_pitch));
|
||||
tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + (tile->pipe * cmd->vsc_prim_strm_pitch));
|
||||
tu_cs_emit(cs, fb->pipe_sizes[pipe] |
|
||||
CP_SET_BIN_DATA5_0_VSC_N(slot));
|
||||
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * cmd->vsc_draw_strm_pitch);
|
||||
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * 4 + 32 * cmd->vsc_draw_strm_pitch);
|
||||
tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + pipe * cmd->vsc_prim_strm_pitch);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
|
||||
tu_cs_emit(cs, 0x0);
|
||||
|
@ -801,7 +658,7 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
|
|||
struct tu_image_view *dst = fb->attachments[a].attachment;
|
||||
struct tu_image_view *src = fb->attachments[gmem_a].attachment;
|
||||
|
||||
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
|
||||
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.render_area);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1009,21 +866,20 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
static void
|
||||
update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_BIN_SIZE(.width = tiling->tile0.extent.width,
|
||||
.height = tiling->tile0.extent.height),
|
||||
A6XX_VSC_BIN_SIZE(.width = fb->tile0.width,
|
||||
.height = fb->tile0.height),
|
||||
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
|
||||
.bo_offset = 32 * cmd->vsc_draw_strm_pitch));
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
|
||||
.ny = tiling->tile_count.height));
|
||||
A6XX_VSC_BIN_COUNT(.nx = fb->tile_count.width,
|
||||
.ny = fb->tile_count.height));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
tu_cs_emit(cs, tiling->pipe_config[i]);
|
||||
tu_cs_emit_array(cs, fb->pipe_config, 32);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
|
||||
|
@ -1039,9 +895,9 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
static void
|
||||
emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const uint32_t used_pipe_count =
|
||||
tiling->pipe_count.width * tiling->pipe_count.height;
|
||||
fb->pipe_count.width * fb->pipe_count.height;
|
||||
|
||||
/* Clear vsc_scratch: */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
|
||||
|
@ -1078,14 +934,9 @@ static void
|
|||
tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
uint32_t x1 = tiling->tile0.offset.x;
|
||||
uint32_t y1 = tiling->tile0.offset.y;
|
||||
uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
|
||||
uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
|
||||
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
|
||||
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
|
||||
|
@ -1213,7 +1064,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
|
|||
dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
|
||||
dst[2] =
|
||||
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.tiling_config.tile0.extent.width * att->cpp);
|
||||
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * att->cpp);
|
||||
dst[3] = 0;
|
||||
dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
|
||||
dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
|
||||
|
@ -1282,8 +1133,7 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
static void
|
||||
tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
const struct VkRect2D *renderArea)
|
||||
tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
|
@ -1348,14 +1198,12 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
|
||||
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
|
||||
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
if (use_hw_binning(cmd)) {
|
||||
/* enable stream-out during binning pass: */
|
||||
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
|
||||
|
||||
tu6_emit_bin_size(cs,
|
||||
tiling->tile0.extent.width,
|
||||
tiling->tile0.extent.height,
|
||||
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
|
||||
A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
|
||||
|
||||
tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, true);
|
||||
|
@ -1365,9 +1213,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
/* and disable stream-out for draw pass: */
|
||||
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=true));
|
||||
|
||||
tu6_emit_bin_size(cs,
|
||||
tiling->tile0.extent.width,
|
||||
tiling->tile0.extent.height,
|
||||
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
|
||||
A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
|
@ -1383,10 +1229,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
/* no binning pass, so enable stream-out for draw pass:: */
|
||||
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
|
||||
|
||||
tu6_emit_bin_size(cs,
|
||||
tiling->tile0.extent.width,
|
||||
tiling->tile0.extent.height,
|
||||
0x6000000);
|
||||
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height, 0x6000000);
|
||||
}
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
|
@ -1395,9 +1238,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
static void
|
||||
tu6_render_tile(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_tile *tile)
|
||||
uint32_t tx, uint32_t ty)
|
||||
{
|
||||
tu6_emit_tile_select(cmd, cs, tile);
|
||||
tu6_emit_tile_select(cmd, cs, tx, ty);
|
||||
|
||||
tu_cs_emit_call(cs, &cmd->draw_cs);
|
||||
|
||||
|
@ -1429,19 +1272,16 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
static void
|
||||
tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
if (use_hw_binning(cmd))
|
||||
cmd->use_vsc_data = true;
|
||||
|
||||
tu6_tile_render_begin(cmd, &cmd->cs);
|
||||
|
||||
for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
|
||||
for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
|
||||
struct tu_tile tile;
|
||||
tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
|
||||
tu6_render_tile(cmd, &cmd->cs, &tile);
|
||||
}
|
||||
for (uint32_t y = 0; y < fb->tile_count.height; y++) {
|
||||
for (uint32_t x = 0; x < fb->tile_count.width; x++)
|
||||
tu6_render_tile(cmd, &cmd->cs, x, y);
|
||||
}
|
||||
|
||||
tu6_tile_render_end(cmd, &cmd->cs);
|
||||
|
@ -1450,9 +1290,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
|
|||
static void
|
||||
tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
|
||||
tu6_sysmem_render_begin(cmd, &cmd->cs, &tiling->render_area);
|
||||
tu6_sysmem_render_begin(cmd, &cmd->cs);
|
||||
|
||||
tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
|
||||
|
||||
|
@ -1478,21 +1316,6 @@ tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
|
|||
cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
|
||||
const VkRect2D *render_area)
|
||||
{
|
||||
const struct tu_device *dev = cmd->device;
|
||||
struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
|
||||
tiling->render_area = *render_area;
|
||||
tiling->force_sysmem = false;
|
||||
|
||||
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
|
||||
tu_tiling_config_update_pipe_layout(tiling, dev);
|
||||
tu_tiling_config_update_pipes(tiling, dev);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_create_cmd_buffer(struct tu_device *device,
|
||||
struct tu_cmd_pool *pool,
|
||||
|
@ -2791,8 +2614,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
|
|||
cmd->state.pass = pass;
|
||||
cmd->state.subpass = pass->subpasses;
|
||||
cmd->state.framebuffer = fb;
|
||||
cmd->state.render_area = pRenderPassBegin->renderArea;
|
||||
|
||||
tu_cmd_update_tiling_config(cmd, &pRenderPassBegin->renderArea);
|
||||
tu_cmd_prepare_tile_store_ib(cmd);
|
||||
|
||||
/* Note: because this is external, any flushes will happen before draw_cs
|
||||
|
|
|
@ -2271,6 +2271,7 @@ tu_CreateFramebuffer(VkDevice _device,
|
|||
VkFramebuffer *pFramebuffer)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
|
||||
struct tu_framebuffer *framebuffer;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
|
||||
|
@ -2292,6 +2293,8 @@ tu_CreateFramebuffer(VkDevice _device,
|
|||
framebuffer->attachments[i].attachment = iview;
|
||||
}
|
||||
|
||||
tu_framebuffer_tiling_config(framebuffer, device, pass);
|
||||
|
||||
*pFramebuffer = tu_framebuffer_to_handle(framebuffer);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -654,36 +654,6 @@ struct tu_descriptor_state
|
|||
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
|
||||
};
|
||||
|
||||
struct tu_tile
|
||||
{
|
||||
uint8_t pipe;
|
||||
uint8_t slot;
|
||||
VkOffset2D begin;
|
||||
VkOffset2D end;
|
||||
};
|
||||
|
||||
struct tu_tiling_config
|
||||
{
|
||||
VkRect2D render_area;
|
||||
|
||||
/* position and size of the first tile */
|
||||
VkRect2D tile0;
|
||||
/* number of tiles */
|
||||
VkExtent2D tile_count;
|
||||
|
||||
/* size of the first VSC pipe */
|
||||
VkExtent2D pipe0;
|
||||
/* number of VSC pipes */
|
||||
VkExtent2D pipe_count;
|
||||
|
||||
/* pipe register values */
|
||||
uint32_t pipe_config[MAX_VSC_PIPES];
|
||||
uint32_t pipe_sizes[MAX_VSC_PIPES];
|
||||
|
||||
/* Whether sysmem rendering must be used */
|
||||
bool force_sysmem;
|
||||
};
|
||||
|
||||
enum tu_cmd_dirty_bits
|
||||
{
|
||||
TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
|
||||
|
@ -859,8 +829,7 @@ struct tu_cmd_state
|
|||
const struct tu_render_pass *pass;
|
||||
const struct tu_subpass *subpass;
|
||||
const struct tu_framebuffer *framebuffer;
|
||||
|
||||
struct tu_tiling_config tiling_config;
|
||||
VkRect2D render_area;
|
||||
|
||||
struct tu_cs_entry tile_store_ib;
|
||||
|
||||
|
@ -1389,10 +1358,29 @@ struct tu_framebuffer
|
|||
uint32_t height;
|
||||
uint32_t layers;
|
||||
|
||||
/* size of the first tile */
|
||||
VkExtent2D tile0;
|
||||
/* number of tiles */
|
||||
VkExtent2D tile_count;
|
||||
|
||||
/* size of the first VSC pipe */
|
||||
VkExtent2D pipe0;
|
||||
/* number of VSC pipes */
|
||||
VkExtent2D pipe_count;
|
||||
|
||||
/* pipe register values */
|
||||
uint32_t pipe_config[MAX_VSC_PIPES];
|
||||
uint32_t pipe_sizes[MAX_VSC_PIPES];
|
||||
|
||||
uint32_t attachment_count;
|
||||
struct tu_attachment_info attachments[0];
|
||||
};
|
||||
|
||||
void
|
||||
tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
|
||||
const struct tu_device *device,
|
||||
const struct tu_render_pass *pass);
|
||||
|
||||
struct tu_subpass_barrier {
|
||||
VkPipelineStageFlags src_stage_mask;
|
||||
VkAccessFlags src_access_mask;
|
||||
|
|
|
@ -116,3 +116,133 @@ __vk_errorf(struct tu_instance *instance,
|
|||
|
||||
return error;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
|
||||
const struct tu_device *dev,
|
||||
const struct tu_render_pass *pass)
|
||||
{
|
||||
const uint32_t tile_align_w = pass->tile_align_w;
|
||||
const uint32_t max_tile_width = 1024;
|
||||
|
||||
/* start from 1 tile */
|
||||
fb->tile_count = (VkExtent2D) {
|
||||
.width = 1,
|
||||
.height = 1,
|
||||
};
|
||||
fb->tile0 = (VkExtent2D) {
|
||||
.width = util_align_npot(fb->width, tile_align_w),
|
||||
.height = align(fb->height, TILE_ALIGN_H),
|
||||
};
|
||||
|
||||
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
|
||||
/* start with 2x2 tiles */
|
||||
fb->tile_count.width = 2;
|
||||
fb->tile_count.height = 2;
|
||||
fb->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
|
||||
fb->tile0.height = align(DIV_ROUND_UP(fb->height, 2), TILE_ALIGN_H);
|
||||
}
|
||||
|
||||
/* do not exceed max tile width */
|
||||
while (fb->tile0.width > max_tile_width) {
|
||||
fb->tile_count.width++;
|
||||
fb->tile0.width =
|
||||
util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
|
||||
}
|
||||
|
||||
/* will force to sysmem, don't bother trying to have a valid tile config
|
||||
* TODO: just skip all GMEM stuff when sysmem is forced?
|
||||
*/
|
||||
if (!pass->gmem_pixels)
|
||||
return;
|
||||
|
||||
/* do not exceed gmem size */
|
||||
while (fb->tile0.width * fb->tile0.height > pass->gmem_pixels) {
|
||||
if (fb->tile0.width > MAX2(tile_align_w, fb->tile0.height)) {
|
||||
fb->tile_count.width++;
|
||||
fb->tile0.width =
|
||||
util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
|
||||
} else {
|
||||
/* if this assert fails then layout is impossible.. */
|
||||
assert(fb->tile0.height > TILE_ALIGN_H);
|
||||
fb->tile_count.height++;
|
||||
fb->tile0.height =
|
||||
align(DIV_ROUND_UP(fb->height, fb->tile_count.height), TILE_ALIGN_H);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_pipe_layout(struct tu_framebuffer *fb,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
|
||||
/* start from 1 tile per pipe */
|
||||
fb->pipe0 = (VkExtent2D) {
|
||||
.width = 1,
|
||||
.height = 1,
|
||||
};
|
||||
fb->pipe_count = fb->tile_count;
|
||||
|
||||
while (fb->pipe_count.width * fb->pipe_count.height > max_pipe_count) {
|
||||
if (fb->pipe0.width < fb->pipe0.height) {
|
||||
fb->pipe0.width += 1;
|
||||
fb->pipe_count.width =
|
||||
DIV_ROUND_UP(fb->tile_count.width, fb->pipe0.width);
|
||||
} else {
|
||||
fb->pipe0.height += 1;
|
||||
fb->pipe_count.height =
|
||||
DIV_ROUND_UP(fb->tile_count.height, fb->pipe0.height);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_pipes(struct tu_framebuffer *fb,
|
||||
const struct tu_device *dev)
|
||||
{
|
||||
const uint32_t max_pipe_count = 32; /* A6xx */
|
||||
const uint32_t used_pipe_count =
|
||||
fb->pipe_count.width * fb->pipe_count.height;
|
||||
const VkExtent2D last_pipe = {
|
||||
.width = (fb->tile_count.width - 1) % fb->pipe0.width + 1,
|
||||
.height = (fb->tile_count.height - 1) % fb->pipe0.height + 1,
|
||||
};
|
||||
|
||||
assert(used_pipe_count <= max_pipe_count);
|
||||
assert(max_pipe_count <= ARRAY_SIZE(fb->pipe_config));
|
||||
|
||||
for (uint32_t y = 0; y < fb->pipe_count.height; y++) {
|
||||
for (uint32_t x = 0; x < fb->pipe_count.width; x++) {
|
||||
const uint32_t pipe_x = fb->pipe0.width * x;
|
||||
const uint32_t pipe_y = fb->pipe0.height * y;
|
||||
const uint32_t pipe_w = (x == fb->pipe_count.width - 1)
|
||||
? last_pipe.width
|
||||
: fb->pipe0.width;
|
||||
const uint32_t pipe_h = (y == fb->pipe_count.height - 1)
|
||||
? last_pipe.height
|
||||
: fb->pipe0.height;
|
||||
const uint32_t n = fb->pipe_count.width * y + x;
|
||||
|
||||
fb->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
|
||||
A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
|
||||
fb->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
|
||||
}
|
||||
}
|
||||
|
||||
memset(fb->pipe_config + used_pipe_count, 0,
|
||||
sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
|
||||
}
|
||||
|
||||
void
|
||||
tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
|
||||
const struct tu_device *device,
|
||||
const struct tu_render_pass *pass)
|
||||
{
|
||||
tu_tiling_config_update_tile_layout(fb, device, pass);
|
||||
tu_tiling_config_update_pipe_layout(fb, device);
|
||||
tu_tiling_config_update_pipes(fb, device);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue