turnip: make tiling config part of framebuffer state

Compute the tiling config at framebuffer creation time. A framebuffer will b
be re-used multiple times, so this will avoid having to re-calculate the
tiling config every time a command buffer is recorded.

The tiling config already couldn't use the render area's x1/y1 because of
hw binning, this move makes it so the render area isn't used at all for the
tiling config.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5570>
This commit is contained in:
Jonathan Marek 2020-06-18 20:39:39 -04:00 committed by Marge Bot
parent 31392f8371
commit 8898ebce1a
5 changed files with 215 additions and 271 deletions

View File

@ -2321,10 +2321,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
uint32_t a,
uint32_t gmem_a)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const VkRect2D *render_area = &tiling->render_area;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const VkRect2D *render_area = &cmd->state.render_area;
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
struct tu_image_view *iview = fb->attachments[a].attachment;
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
if (!dst->store)
@ -2377,7 +2377,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
A6XX_SP_PS_2D_SRC_HI(),
A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
/* sync GMEM writes with CACHE. */
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);

View File

@ -109,177 +109,29 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
}
static void
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
const struct tu_device *dev,
const struct tu_render_pass *pass)
{
const uint32_t tile_align_w = pass->tile_align_w;
const uint32_t max_tile_width = 1024;
/* note: don't offset the tiling config by render_area.offset,
* because binning pass can't deal with it
* this means we might end up with more tiles than necessary,
* but load/store/etc are still scissored to the render_area
*/
tiling->tile0.offset = (VkOffset2D) {};
const uint32_t ra_width =
tiling->render_area.extent.width +
(tiling->render_area.offset.x - tiling->tile0.offset.x);
const uint32_t ra_height =
tiling->render_area.extent.height +
(tiling->render_area.offset.y - tiling->tile0.offset.y);
/* start from 1 tile */
tiling->tile_count = (VkExtent2D) {
.width = 1,
.height = 1,
};
tiling->tile0.extent = (VkExtent2D) {
.width = util_align_npot(ra_width, tile_align_w),
.height = align(ra_height, TILE_ALIGN_H),
};
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
/* start with 2x2 tiles */
tiling->tile_count.width = 2;
tiling->tile_count.height = 2;
tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
}
/* do not exceed max tile width */
while (tiling->tile0.extent.width > max_tile_width) {
tiling->tile_count.width++;
tiling->tile0.extent.width =
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
}
/* will force to sysmem, don't bother trying to have a valid tile config
* TODO: just skip all GMEM stuff when sysmem is forced?
*/
if (!pass->gmem_pixels)
return;
/* do not exceed gmem size */
while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
tiling->tile_count.width++;
tiling->tile0.extent.width =
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
} else {
/* if this assert fails then layout is impossible.. */
assert(tiling->tile0.extent.height > TILE_ALIGN_H);
tiling->tile_count.height++;
tiling->tile0.extent.height =
align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
}
}
}
static void
tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling,
const struct tu_device *dev)
{
const uint32_t max_pipe_count = 32; /* A6xx */
/* start from 1 tile per pipe */
tiling->pipe0 = (VkExtent2D) {
.width = 1,
.height = 1,
};
tiling->pipe_count = tiling->tile_count;
while (tiling->pipe_count.width * tiling->pipe_count.height > max_pipe_count) {
if (tiling->pipe0.width < tiling->pipe0.height) {
tiling->pipe0.width += 1;
tiling->pipe_count.width =
DIV_ROUND_UP(tiling->tile_count.width, tiling->pipe0.width);
} else {
tiling->pipe0.height += 1;
tiling->pipe_count.height =
DIV_ROUND_UP(tiling->tile_count.height, tiling->pipe0.height);
}
}
}
static void
tu_tiling_config_update_pipes(struct tu_tiling_config *tiling,
const struct tu_device *dev)
{
const uint32_t max_pipe_count = 32; /* A6xx */
const uint32_t used_pipe_count =
tiling->pipe_count.width * tiling->pipe_count.height;
const VkExtent2D last_pipe = {
.width = (tiling->tile_count.width - 1) % tiling->pipe0.width + 1,
.height = (tiling->tile_count.height - 1) % tiling->pipe0.height + 1,
};
assert(used_pipe_count <= max_pipe_count);
assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config));
for (uint32_t y = 0; y < tiling->pipe_count.height; y++) {
for (uint32_t x = 0; x < tiling->pipe_count.width; x++) {
const uint32_t pipe_x = tiling->pipe0.width * x;
const uint32_t pipe_y = tiling->pipe0.height * y;
const uint32_t pipe_w = (x == tiling->pipe_count.width - 1)
? last_pipe.width
: tiling->pipe0.width;
const uint32_t pipe_h = (y == tiling->pipe_count.height - 1)
? last_pipe.height
: tiling->pipe0.height;
const uint32_t n = tiling->pipe_count.width * y + x;
tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
}
}
memset(tiling->pipe_config + used_pipe_count, 0,
sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
}
static void
tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
const struct tu_device *dev,
tu_tiling_config_get_tile(const struct tu_framebuffer *fb,
uint32_t tx,
uint32_t ty,
struct tu_tile *tile)
uint32_t *pipe,
uint32_t *slot)
{
/* find the pipe and the slot for tile (tx, ty) */
const uint32_t px = tx / tiling->pipe0.width;
const uint32_t py = ty / tiling->pipe0.height;
const uint32_t sx = tx - tiling->pipe0.width * px;
const uint32_t sy = ty - tiling->pipe0.height * py;
const uint32_t px = tx / fb->pipe0.width;
const uint32_t py = ty / fb->pipe0.height;
const uint32_t sx = tx - fb->pipe0.width * px;
const uint32_t sy = ty - fb->pipe0.height * py;
/* last pipe has different width */
const uint32_t pipe_width =
MIN2(tiling->pipe0.width,
tiling->tile_count.width - px * tiling->pipe0.width);
MIN2(fb->pipe0.width,
fb->tile_count.width - px * fb->pipe0.width);
assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height);
assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height);
assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height);
assert(tx < fb->tile_count.width && ty < fb->tile_count.height);
assert(px < fb->pipe_count.width && py < fb->pipe_count.height);
assert(sx < fb->pipe0.width && sy < fb->pipe0.height);
/* convert to 1D indices */
tile->pipe = tiling->pipe_count.width * py + px;
tile->slot = pipe_width * sy + sx;
/* get the blit area for the tile */
tile->begin = (VkOffset2D) {
.x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx,
.y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty,
};
tile->end.x =
(tx == tiling->tile_count.width - 1)
? tiling->render_area.offset.x + tiling->render_area.extent.width
: tile->begin.x + tiling->tile0.extent.width;
tile->end.y =
(ty == tiling->tile_count.height - 1)
? tiling->render_area.offset.y + tiling->render_area.extent.height
: tile->begin.y + tiling->tile0.extent.height;
*pipe = fb->pipe_count.width * py + px;
*slot = pipe_width * sy + sx;
}
void
@ -602,7 +454,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
static void
tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
{
const VkRect2D *render_area = &cmd->state.tiling_config.render_area;
const VkRect2D *render_area = &cmd->state.render_area;
uint32_t x1 = render_area->offset.x;
uint32_t y1 = render_area->offset.y;
uint32_t x2 = x1 + render_area->extent.width - 1;
@ -706,7 +558,7 @@ tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
static bool
use_hw_binning(struct tu_cmd_buffer *cmd)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
/* XFB commands are emitted for BINNING || SYSMEM, which makes it incompatible
* with non-hw binning GMEM rendering. this is required because some of the
@ -721,7 +573,7 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN))
return true;
return (tiling->tile_count.width * tiling->tile_count.height) > 2;
return (fb->tile_count.width * fb->tile_count.height) > 2;
}
static bool
@ -740,24 +592,29 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
if (cmd->has_tess)
return true;
return cmd->state.tiling_config.force_sysmem;
return false;
}
static void
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_tile *tile)
uint32_t tx, uint32_t ty)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
uint32_t pipe, slot;
tu_tiling_config_get_tile(fb, tx, ty, &pipe, &slot);
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_YIELD));
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
const uint32_t x1 = tile->begin.x;
const uint32_t y1 = tile->begin.y;
const uint32_t x2 = tile->end.x - 1;
const uint32_t y2 = tile->end.y - 1;
const uint32_t x1 = fb->tile0.width * tx;
const uint32_t y1 = fb->tile0.height * ty;
const uint32_t x2 = x1 + fb->tile0.width - 1;
const uint32_t y2 = y1 + fb->tile0.height - 1;
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu6_emit_window_offset(cs, x1, y1);
@ -771,11 +628,11 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
tu_cs_emit(cs, 0x0);
tu_cs_emit_pkt7(cs, CP_SET_BIN_DATA5, 7);
tu_cs_emit(cs, cmd->state.tiling_config.pipe_sizes[tile->pipe] |
CP_SET_BIN_DATA5_0_VSC_N(tile->slot));
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + tile->pipe * cmd->vsc_draw_strm_pitch);
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + (tile->pipe * 4) + (32 * cmd->vsc_draw_strm_pitch));
tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + (tile->pipe * cmd->vsc_prim_strm_pitch));
tu_cs_emit(cs, fb->pipe_sizes[pipe] |
CP_SET_BIN_DATA5_0_VSC_N(slot));
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * cmd->vsc_draw_strm_pitch);
tu_cs_emit_qw(cs, cmd->vsc_draw_strm.iova + pipe * 4 + 32 * cmd->vsc_draw_strm_pitch);
tu_cs_emit_qw(cs, cmd->vsc_prim_strm.iova + pipe * cmd->vsc_prim_strm_pitch);
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
tu_cs_emit(cs, 0x0);
@ -801,7 +658,7 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
struct tu_image_view *dst = fb->attachments[a].attachment;
struct tu_image_view *src = fb->attachments[gmem_a].attachment;
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.render_area);
}
static void
@ -1009,21 +866,20 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_SIZE(.width = tiling->tile0.extent.width,
.height = tiling->tile0.extent.height),
A6XX_VSC_BIN_SIZE(.width = fb->tile0.width,
.height = fb->tile0.height),
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = &cmd->vsc_draw_strm,
.bo_offset = 32 * cmd->vsc_draw_strm_pitch));
tu_cs_emit_regs(cs,
A6XX_VSC_BIN_COUNT(.nx = tiling->tile_count.width,
.ny = tiling->tile_count.height));
A6XX_VSC_BIN_COUNT(.nx = fb->tile_count.width,
.ny = fb->tile_count.height));
tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
for (unsigned i = 0; i < 32; i++)
tu_cs_emit(cs, tiling->pipe_config[i]);
tu_cs_emit_array(cs, fb->pipe_config, 32);
tu_cs_emit_regs(cs,
A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
@ -1039,9 +895,9 @@ update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const uint32_t used_pipe_count =
tiling->pipe_count.width * tiling->pipe_count.height;
fb->pipe_count.width * fb->pipe_count.height;
/* Clear vsc_scratch: */
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
@ -1078,14 +934,9 @@ static void
tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
struct tu_physical_device *phys_dev = cmd->device->physical_device;
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
uint32_t x1 = tiling->tile0.offset.x;
uint32_t y1 = tiling->tile0.offset.y;
uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
@ -1213,7 +1064,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
dst[2] =
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
A6XX_TEX_CONST_2_PITCH(cmd->state.tiling_config.tile0.extent.width * att->cpp);
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * att->cpp);
dst[3] = 0;
dst[4] = cmd->device->physical_device->gmem_base + att->gmem_offset;
dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
@ -1282,8 +1133,7 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
}
static void
tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
const struct VkRect2D *renderArea)
tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
@ -1348,14 +1198,12 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
if (use_hw_binning(cmd)) {
/* enable stream-out during binning pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, true);
@ -1365,9 +1213,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
/* and disable stream-out for draw pass: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=true));
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height,
A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
tu_cs_emit_regs(cs,
@ -1383,10 +1229,7 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
/* no binning pass, so enable stream-out for draw pass:: */
tu_cs_emit_regs(cs, A6XX_VPC_SO_OVERRIDE(.so_disable=false));
tu6_emit_bin_size(cs,
tiling->tile0.extent.width,
tiling->tile0.extent.height,
0x6000000);
tu6_emit_bin_size(cs, fb->tile0.width, fb->tile0.height, 0x6000000);
}
tu_cs_sanity_check(cs);
@ -1395,9 +1238,9 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
tu6_render_tile(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_tile *tile)
uint32_t tx, uint32_t ty)
{
tu6_emit_tile_select(cmd, cs, tile);
tu6_emit_tile_select(cmd, cs, tx, ty);
tu_cs_emit_call(cs, &cmd->draw_cs);
@ -1429,19 +1272,16 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
static void
tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
if (use_hw_binning(cmd))
cmd->use_vsc_data = true;
tu6_tile_render_begin(cmd, &cmd->cs);
for (uint32_t y = 0; y < tiling->tile_count.height; y++) {
for (uint32_t x = 0; x < tiling->tile_count.width; x++) {
struct tu_tile tile;
tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile);
tu6_render_tile(cmd, &cmd->cs, &tile);
}
for (uint32_t y = 0; y < fb->tile_count.height; y++) {
for (uint32_t x = 0; x < fb->tile_count.width; x++)
tu6_render_tile(cmd, &cmd->cs, x, y);
}
tu6_tile_render_end(cmd, &cmd->cs);
@ -1450,9 +1290,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd)
static void
tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
tu6_sysmem_render_begin(cmd, &cmd->cs, &tiling->render_area);
tu6_sysmem_render_begin(cmd, &cmd->cs);
tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
@ -1478,21 +1316,6 @@ tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs);
}
static void
tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
const VkRect2D *render_area)
{
const struct tu_device *dev = cmd->device;
struct tu_tiling_config *tiling = &cmd->state.tiling_config;
tiling->render_area = *render_area;
tiling->force_sysmem = false;
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
tu_tiling_config_update_pipe_layout(tiling, dev);
tu_tiling_config_update_pipes(tiling, dev);
}
static VkResult
tu_create_cmd_buffer(struct tu_device *device,
struct tu_cmd_pool *pool,
@ -2791,8 +2614,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
cmd->state.pass = pass;
cmd->state.subpass = pass->subpasses;
cmd->state.framebuffer = fb;
cmd->state.render_area = pRenderPassBegin->renderArea;
tu_cmd_update_tiling_config(cmd, &pRenderPassBegin->renderArea);
tu_cmd_prepare_tile_store_ib(cmd);
/* Note: because this is external, any flushes will happen before draw_cs

View File

@ -2271,6 +2271,7 @@ tu_CreateFramebuffer(VkDevice _device,
VkFramebuffer *pFramebuffer)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
struct tu_framebuffer *framebuffer;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
@ -2292,6 +2293,8 @@ tu_CreateFramebuffer(VkDevice _device,
framebuffer->attachments[i].attachment = iview;
}
tu_framebuffer_tiling_config(framebuffer, device, pass);
*pFramebuffer = tu_framebuffer_to_handle(framebuffer);
return VK_SUCCESS;
}

View File

@ -654,36 +654,6 @@ struct tu_descriptor_state
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
};
struct tu_tile
{
uint8_t pipe;
uint8_t slot;
VkOffset2D begin;
VkOffset2D end;
};
struct tu_tiling_config
{
VkRect2D render_area;
/* position and size of the first tile */
VkRect2D tile0;
/* number of tiles */
VkExtent2D tile_count;
/* size of the first VSC pipe */
VkExtent2D pipe0;
/* number of VSC pipes */
VkExtent2D pipe_count;
/* pipe register values */
uint32_t pipe_config[MAX_VSC_PIPES];
uint32_t pipe_sizes[MAX_VSC_PIPES];
/* Whether sysmem rendering must be used */
bool force_sysmem;
};
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
@ -859,8 +829,7 @@ struct tu_cmd_state
const struct tu_render_pass *pass;
const struct tu_subpass *subpass;
const struct tu_framebuffer *framebuffer;
struct tu_tiling_config tiling_config;
VkRect2D render_area;
struct tu_cs_entry tile_store_ib;
@ -1389,10 +1358,29 @@ struct tu_framebuffer
uint32_t height;
uint32_t layers;
/* size of the first tile */
VkExtent2D tile0;
/* number of tiles */
VkExtent2D tile_count;
/* size of the first VSC pipe */
VkExtent2D pipe0;
/* number of VSC pipes */
VkExtent2D pipe_count;
/* pipe register values */
uint32_t pipe_config[MAX_VSC_PIPES];
uint32_t pipe_sizes[MAX_VSC_PIPES];
uint32_t attachment_count;
struct tu_attachment_info attachments[0];
};
void
tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
const struct tu_device *device,
const struct tu_render_pass *pass);
struct tu_subpass_barrier {
VkPipelineStageFlags src_stage_mask;
VkAccessFlags src_access_mask;

View File

@ -116,3 +116,133 @@ __vk_errorf(struct tu_instance *instance,
return error;
}
static void
tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
const struct tu_device *dev,
const struct tu_render_pass *pass)
{
const uint32_t tile_align_w = pass->tile_align_w;
const uint32_t max_tile_width = 1024;
/* start from 1 tile */
fb->tile_count = (VkExtent2D) {
.width = 1,
.height = 1,
};
fb->tile0 = (VkExtent2D) {
.width = util_align_npot(fb->width, tile_align_w),
.height = align(fb->height, TILE_ALIGN_H),
};
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
/* start with 2x2 tiles */
fb->tile_count.width = 2;
fb->tile_count.height = 2;
fb->tile0.width = util_align_npot(DIV_ROUND_UP(fb->width, 2), tile_align_w);
fb->tile0.height = align(DIV_ROUND_UP(fb->height, 2), TILE_ALIGN_H);
}
/* do not exceed max tile width */
while (fb->tile0.width > max_tile_width) {
fb->tile_count.width++;
fb->tile0.width =
util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
}
/* will force to sysmem, don't bother trying to have a valid tile config
* TODO: just skip all GMEM stuff when sysmem is forced?
*/
if (!pass->gmem_pixels)
return;
/* do not exceed gmem size */
while (fb->tile0.width * fb->tile0.height > pass->gmem_pixels) {
if (fb->tile0.width > MAX2(tile_align_w, fb->tile0.height)) {
fb->tile_count.width++;
fb->tile0.width =
util_align_npot(DIV_ROUND_UP(fb->width, fb->tile_count.width), tile_align_w);
} else {
/* if this assert fails then layout is impossible.. */
assert(fb->tile0.height > TILE_ALIGN_H);
fb->tile_count.height++;
fb->tile0.height =
align(DIV_ROUND_UP(fb->height, fb->tile_count.height), TILE_ALIGN_H);
}
}
}
static void
tu_tiling_config_update_pipe_layout(struct tu_framebuffer *fb,
const struct tu_device *dev)
{
const uint32_t max_pipe_count = 32; /* A6xx */
/* start from 1 tile per pipe */
fb->pipe0 = (VkExtent2D) {
.width = 1,
.height = 1,
};
fb->pipe_count = fb->tile_count;
while (fb->pipe_count.width * fb->pipe_count.height > max_pipe_count) {
if (fb->pipe0.width < fb->pipe0.height) {
fb->pipe0.width += 1;
fb->pipe_count.width =
DIV_ROUND_UP(fb->tile_count.width, fb->pipe0.width);
} else {
fb->pipe0.height += 1;
fb->pipe_count.height =
DIV_ROUND_UP(fb->tile_count.height, fb->pipe0.height);
}
}
}
static void
tu_tiling_config_update_pipes(struct tu_framebuffer *fb,
const struct tu_device *dev)
{
const uint32_t max_pipe_count = 32; /* A6xx */
const uint32_t used_pipe_count =
fb->pipe_count.width * fb->pipe_count.height;
const VkExtent2D last_pipe = {
.width = (fb->tile_count.width - 1) % fb->pipe0.width + 1,
.height = (fb->tile_count.height - 1) % fb->pipe0.height + 1,
};
assert(used_pipe_count <= max_pipe_count);
assert(max_pipe_count <= ARRAY_SIZE(fb->pipe_config));
for (uint32_t y = 0; y < fb->pipe_count.height; y++) {
for (uint32_t x = 0; x < fb->pipe_count.width; x++) {
const uint32_t pipe_x = fb->pipe0.width * x;
const uint32_t pipe_y = fb->pipe0.height * y;
const uint32_t pipe_w = (x == fb->pipe_count.width - 1)
? last_pipe.width
: fb->pipe0.width;
const uint32_t pipe_h = (y == fb->pipe_count.height - 1)
? last_pipe.height
: fb->pipe0.height;
const uint32_t n = fb->pipe_count.width * y + x;
fb->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) |
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) |
A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) |
A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h);
fb->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h);
}
}
memset(fb->pipe_config + used_pipe_count, 0,
sizeof(uint32_t) * (max_pipe_count - used_pipe_count));
}
void
tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
const struct tu_device *device,
const struct tu_render_pass *pass)
{
tu_tiling_config_update_tile_layout(fb, device, pass);
tu_tiling_config_update_pipe_layout(fb, device);
tu_tiling_config_update_pipes(fb, device);
}