turnip: add adreno 650
Tile alignment is 96, with gmem alignment of 0x6000 Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4608>
This commit is contained in:
parent
aa3624b8ab
commit
c3ef0275c4
|
@ -113,10 +113,9 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
|
|||
static void
|
||||
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev,
|
||||
uint32_t pixels)
|
||||
const struct tu_render_pass *pass)
|
||||
{
|
||||
const uint32_t tile_align_w = 64; /* note: 32 when no input attachments */
|
||||
const uint32_t tile_align_h = 16;
|
||||
const uint32_t tile_align_w = pass->tile_align_w;
|
||||
const uint32_t max_tile_width = 1024;
|
||||
|
||||
/* note: don't offset the tiling config by render_area.offset,
|
||||
|
@ -139,43 +138,43 @@ tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
|
|||
.height = 1,
|
||||
};
|
||||
tiling->tile0.extent = (VkExtent2D) {
|
||||
.width = align(ra_width, tile_align_w),
|
||||
.height = align(ra_height, tile_align_h),
|
||||
.width = util_align_npot(ra_width, tile_align_w),
|
||||
.height = align(ra_height, TILE_ALIGN_H),
|
||||
};
|
||||
|
||||
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_FORCEBIN)) {
|
||||
/* start with 2x2 tiles */
|
||||
tiling->tile_count.width = 2;
|
||||
tiling->tile_count.height = 2;
|
||||
tiling->tile0.extent.width = align(DIV_ROUND_UP(ra_width, 2), tile_align_w);
|
||||
tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), tile_align_h);
|
||||
tiling->tile0.extent.width = util_align_npot(DIV_ROUND_UP(ra_width, 2), tile_align_w);
|
||||
tiling->tile0.extent.height = align(DIV_ROUND_UP(ra_height, 2), TILE_ALIGN_H);
|
||||
}
|
||||
|
||||
/* do not exceed max tile width */
|
||||
while (tiling->tile0.extent.width > max_tile_width) {
|
||||
tiling->tile_count.width++;
|
||||
tiling->tile0.extent.width =
|
||||
align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
}
|
||||
|
||||
/* will force to sysmem, don't bother trying to have a valid tile config
|
||||
* TODO: just skip all GMEM stuff when sysmem is forced?
|
||||
*/
|
||||
if (!pixels)
|
||||
if (!pass->gmem_pixels)
|
||||
return;
|
||||
|
||||
/* do not exceed gmem size */
|
||||
while (tiling->tile0.extent.width * tiling->tile0.extent.height > pixels) {
|
||||
while (tiling->tile0.extent.width * tiling->tile0.extent.height > pass->gmem_pixels) {
|
||||
if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) {
|
||||
tiling->tile_count.width++;
|
||||
tiling->tile0.extent.width =
|
||||
align(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
util_align_npot(DIV_ROUND_UP(ra_width, tiling->tile_count.width), tile_align_w);
|
||||
} else {
|
||||
/* if this assert fails then layout is impossible.. */
|
||||
assert(tiling->tile0.extent.height > tile_align_h);
|
||||
assert(tiling->tile0.extent.height > TILE_ALIGN_H);
|
||||
tiling->tile_count.height++;
|
||||
tiling->tile0.extent.height =
|
||||
align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), tile_align_h);
|
||||
align(DIV_ROUND_UP(ra_height, tiling->tile_count.height), TILE_ALIGN_H);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1378,7 +1377,7 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
|
|||
tiling->render_area = *render_area;
|
||||
tiling->force_sysmem = false;
|
||||
|
||||
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels);
|
||||
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass);
|
||||
tu_tiling_config_update_pipe_layout(tiling, dev);
|
||||
tu_tiling_config_update_pipes(tiling, dev);
|
||||
}
|
||||
|
|
|
@ -267,6 +267,7 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
case 618:
|
||||
device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */
|
||||
device->ccu_offset_bypass = 0x10000;
|
||||
device->tile_align_w = 64;
|
||||
device->magic.PC_UNKNOWN_9805 = 0x0;
|
||||
device->magic.SP_UNKNOWN_A0F8 = 0x0;
|
||||
break;
|
||||
|
@ -274,9 +275,17 @@ tu_physical_device_init(struct tu_physical_device *device,
|
|||
case 640:
|
||||
device->ccu_offset_gmem = 0xf8000;
|
||||
device->ccu_offset_bypass = 0x20000;
|
||||
device->tile_align_w = 64;
|
||||
device->magic.PC_UNKNOWN_9805 = 0x1;
|
||||
device->magic.SP_UNKNOWN_A0F8 = 0x1;
|
||||
break;
|
||||
case 650:
|
||||
device->ccu_offset_gmem = 0x114000;
|
||||
device->ccu_offset_bypass = 0x30000;
|
||||
device->tile_align_w = 96;
|
||||
device->magic.PC_UNKNOWN_9805 = 0x2;
|
||||
device->magic.SP_UNKNOWN_A0F8 = 0x2;
|
||||
break;
|
||||
default:
|
||||
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"device %s is unsupported", device->name);
|
||||
|
|
|
@ -36,20 +36,32 @@ static void update_samples(struct tu_subpass *subpass,
|
|||
subpass->samples = samples;
|
||||
}
|
||||
|
||||
#define GMEM_ALIGN 0x4000
|
||||
|
||||
static void
|
||||
create_render_pass_common(struct tu_render_pass *pass,
|
||||
const struct tu_physical_device *phys_dev)
|
||||
{
|
||||
uint32_t block_align_shift = 4; /* log2(gmem_align/(tile_align_w*tile_align_h)) */
|
||||
uint32_t tile_align_w = phys_dev->tile_align_w;
|
||||
uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * TILE_ALIGN_H;
|
||||
|
||||
/* calculate total bytes per pixel */
|
||||
uint32_t cpp_total = 0;
|
||||
for (uint32_t i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
if (att->gmem_offset >= 0)
|
||||
if (att->gmem_offset >= 0) {
|
||||
cpp_total += att->cpp;
|
||||
/* texture pitch must be aligned to 64, use a tile_align_w that is
|
||||
* a multiple of 64 for cpp==1 attachment to work as input attachment
|
||||
*/
|
||||
if (att->cpp == 1 && tile_align_w % 64 != 0) {
|
||||
tile_align_w *= 2;
|
||||
block_align_shift -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pass->tile_align_w = tile_align_w;
|
||||
|
||||
/* no gmem attachments */
|
||||
if (cpp_total == 0) {
|
||||
/* any value non-zero value so tiling config works with no attachments */
|
||||
|
@ -64,7 +76,7 @@ create_render_pass_common(struct tu_render_pass *pass,
|
|||
* result: nblocks = {12, 52}, pixels = 196608
|
||||
* optimal: nblocks = {13, 51}, pixels = 208896
|
||||
*/
|
||||
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
|
||||
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
|
||||
uint32_t offset = 0, pixels = ~0u;
|
||||
for (uint32_t i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
|
@ -73,14 +85,13 @@ create_render_pass_common(struct tu_render_pass *pass,
|
|||
|
||||
att->gmem_offset = offset;
|
||||
|
||||
/* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */
|
||||
uint32_t align = MAX2(1, att->cpp / 16);
|
||||
uint32_t align = MAX2(1, att->cpp >> block_align_shift);
|
||||
uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
|
||||
|
||||
gmem_blocks -= nblocks;
|
||||
cpp_total -= att->cpp;
|
||||
offset += nblocks * GMEM_ALIGN;
|
||||
pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp);
|
||||
offset += nblocks * gmem_align;
|
||||
pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
|
||||
}
|
||||
|
||||
pass->gmem_pixels = pixels;
|
||||
|
|
|
@ -324,6 +324,10 @@ struct tu_physical_device
|
|||
uint64_t gmem_base;
|
||||
uint32_t ccu_offset_gmem;
|
||||
uint32_t ccu_offset_bypass;
|
||||
/* alignment for size of tiles */
|
||||
uint32_t tile_align_w;
|
||||
#define TILE_ALIGN_H 16
|
||||
/* gmem store/load granularity */
|
||||
#define GMEM_ALIGN_W 16
|
||||
#define GMEM_ALIGN_H 4
|
||||
|
||||
|
@ -1607,6 +1611,7 @@ struct tu_render_pass
|
|||
uint32_t attachment_count;
|
||||
uint32_t subpass_count;
|
||||
uint32_t gmem_pixels;
|
||||
uint32_t tile_align_w;
|
||||
struct tu_subpass_attachment *subpass_attachments;
|
||||
struct tu_render_pass_attachment *attachments;
|
||||
struct tu_subpass subpasses[0];
|
||||
|
|
Loading…
Reference in New Issue