freedreno/fdl6: rework layout code a bit (reduce linear align to 64 bytes)
Reduce linear alignment, and rework the layout code a bit. This rework has a side effect of also increasing the alignment on linear levels of tiled (non-ubwc) cpp=1 and cpp=2 layouts. Since we should be UBWC for those cases anyway, its not a big loss. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5013>
This commit is contained in:
parent
3a9ab3b6e9
commit
d53dc6c376
|
@ -29,62 +29,70 @@
|
||||||
|
|
||||||
#include "freedreno_layout.h"
|
#include "freedreno_layout.h"
|
||||||
|
|
||||||
/* indexed by cpp, including msaa 2x and 4x:
|
|
||||||
* TODO:
|
|
||||||
* cpp=1 UBWC needs testing at larger texture sizes
|
|
||||||
* missing UBWC blockwidth/blockheight for npot+64 cpp
|
|
||||||
* missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
|
|
||||||
*/
|
|
||||||
static const struct tile_alignment {
|
|
||||||
unsigned basealign;
|
|
||||||
unsigned pitchalign;
|
|
||||||
unsigned heightalign;
|
|
||||||
/* UBWC block width/height. Used in size alignment, and calculating a
|
|
||||||
* descriptor's FLAG_BUFFER_LOG2W/H for mipmapping.
|
|
||||||
*/
|
|
||||||
uint8_t ubwc_blockwidth;
|
|
||||||
uint8_t ubwc_blockheight;
|
|
||||||
} tile_alignment[] = {
|
|
||||||
[1] = { 64, 128, 32, 16, 4 },
|
|
||||||
[2] = { 128, 128, 16, 16, 4 },
|
|
||||||
[3] = { 256, 64, 32 },
|
|
||||||
[4] = { 256, 64, 16, 16, 4 },
|
|
||||||
[6] = { 256, 64, 16 },
|
|
||||||
[8] = { 256, 64, 16, 8, 4, },
|
|
||||||
[12] = { 256, 64, 16 },
|
|
||||||
[16] = { 256, 64, 16, 4, 4, },
|
|
||||||
[24] = { 256, 64, 16 },
|
|
||||||
[32] = { 256, 64, 16, 4, 2 },
|
|
||||||
[48] = { 256, 64, 16 },
|
|
||||||
[64] = { 256, 64, 16 },
|
|
||||||
|
|
||||||
/* special cases for r8g8: */
|
|
||||||
[0] = { 256, 64, 32, 16, 8 },
|
|
||||||
};
|
|
||||||
|
|
||||||
#define RGB_TILE_WIDTH_ALIGNMENT 64
|
#define RGB_TILE_WIDTH_ALIGNMENT 64
|
||||||
#define RGB_TILE_HEIGHT_ALIGNMENT 16
|
#define RGB_TILE_HEIGHT_ALIGNMENT 16
|
||||||
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
|
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
|
||||||
|
|
||||||
static const struct tile_alignment *
|
static bool
|
||||||
fdl6_tile_alignment(struct fdl_layout *layout)
|
is_r8g8(struct fdl_layout *layout)
|
||||||
{
|
{
|
||||||
debug_assert(layout->cpp < ARRAY_SIZE(tile_alignment));
|
return layout->cpp == 2 &&
|
||||||
|
util_format_get_nr_components(layout->format) == 2;
|
||||||
if ((layout->cpp == 2) && (util_format_get_nr_components(layout->format) == 2))
|
|
||||||
return &tile_alignment[0];
|
|
||||||
else
|
|
||||||
return &tile_alignment[layout->cpp];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
void
|
||||||
fdl6_pitchalign(struct fdl_layout *layout, int level)
|
fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
|
||||||
|
uint32_t *blockwidth, uint32_t *blockheight)
|
||||||
{
|
{
|
||||||
uint32_t pitchalign = 64;
|
static const struct {
|
||||||
if (fdl_tile_mode(layout, level))
|
uint8_t width;
|
||||||
pitchalign = fdl6_tile_alignment(layout)->pitchalign;
|
uint8_t height;
|
||||||
|
} blocksize[] = {
|
||||||
|
{ 16, 4 }, /* cpp = 1 */
|
||||||
|
{ 16, 4 }, /* cpp = 2 */
|
||||||
|
{ 16, 4 }, /* cpp = 4 */
|
||||||
|
{ 8, 4, }, /* cpp = 8 */
|
||||||
|
{ 4, 4, }, /* cpp = 16 */
|
||||||
|
{ 4, 2 }, /* cpp = 32 */
|
||||||
|
{ 0, 0 }, /* cpp = 64 (TODO) */
|
||||||
|
};
|
||||||
|
|
||||||
return pitchalign;
|
/* special case for r8g8: */
|
||||||
|
if (is_r8g8(layout)) {
|
||||||
|
*blockwidth = 16;
|
||||||
|
*blockheight = 8;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t cpp = fdl_cpp_shift(layout);
|
||||||
|
assert(cpp < ARRAY_SIZE(blocksize));
|
||||||
|
*blockwidth = blocksize[cpp].width;
|
||||||
|
*blockheight = blocksize[cpp].height;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
|
||||||
|
{
|
||||||
|
layout->pitchalign = fdl_cpp_shift(layout);
|
||||||
|
*heightalign = 16;
|
||||||
|
|
||||||
|
if (is_r8g8(layout) || layout->cpp == 1) {
|
||||||
|
layout->pitchalign = 1;
|
||||||
|
*heightalign = 32;
|
||||||
|
} else if (layout->cpp == 2) {
|
||||||
|
layout->pitchalign = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* note: this base_align is *probably* not always right,
|
||||||
|
* it doesn't really get tested. for example with UBWC we might
|
||||||
|
* want 4k alignment, since we align UBWC levels to 4k
|
||||||
|
*/
|
||||||
|
if (layout->cpp == 1)
|
||||||
|
layout->base_align = 64;
|
||||||
|
else if (layout->cpp == 2)
|
||||||
|
layout->base_align = 128;
|
||||||
|
else
|
||||||
|
layout->base_align = 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE: good way to test this is: (for example)
|
/* NOTE: good way to test this is: (for example)
|
||||||
|
@ -97,8 +105,9 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
uint32_t mip_levels, uint32_t array_size, bool is_3d,
|
uint32_t mip_levels, uint32_t array_size, bool is_3d,
|
||||||
struct fdl_slice *plane_layout)
|
struct fdl_slice *plane_layout)
|
||||||
{
|
{
|
||||||
uint32_t offset;
|
uint32_t offset, pitch0;
|
||||||
uint32_t pitch0;
|
uint32_t pitchalign, heightalign;
|
||||||
|
uint32_t ubwc_blockwidth, ubwc_blockheight;
|
||||||
|
|
||||||
assert(nr_samples > 0);
|
assert(nr_samples > 0);
|
||||||
layout->width0 = width0;
|
layout->width0 = width0;
|
||||||
|
@ -113,37 +122,54 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
layout->nr_samples = nr_samples;
|
layout->nr_samples = nr_samples;
|
||||||
layout->layer_first = !is_3d;
|
layout->layer_first = !is_3d;
|
||||||
|
|
||||||
if (depth0 > 1)
|
fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
|
||||||
layout->ubwc = false;
|
|
||||||
if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
|
|
||||||
layout->ubwc = false;
|
|
||||||
|
|
||||||
const struct tile_alignment *ta = fdl6_tile_alignment(layout);
|
if (depth0 > 1 || ubwc_blockwidth == 0)
|
||||||
|
layout->ubwc = false;
|
||||||
|
|
||||||
/* in layer_first layout, the level (slice) contains just one
|
/* in layer_first layout, the level (slice) contains just one
|
||||||
* layer (since in fact the layer contains the slices)
|
* layer (since in fact the layer contains the slices)
|
||||||
*/
|
*/
|
||||||
uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
|
uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
|
||||||
|
|
||||||
debug_assert(ta->pitchalign);
|
/* note: for tiled+noubwc layouts, we can use a lower pitchalign
|
||||||
|
* which will affect the linear levels only, (the hardware will still
|
||||||
|
* expect the tiled alignment on the tiled levels)
|
||||||
|
*/
|
||||||
if (layout->tile_mode) {
|
if (layout->tile_mode) {
|
||||||
layout->base_align = ta->basealign;
|
fdl6_tile_alignment(layout, &heightalign);
|
||||||
} else {
|
} else {
|
||||||
layout->base_align = 64;
|
layout->base_align = 64;
|
||||||
|
layout->pitchalign = 0;
|
||||||
|
/* align pitch to at least 16 pixels:
|
||||||
|
* both turnip and galium assume there is enough alignment for 16x4
|
||||||
|
* aligned gmem store. turnip can use CP_BLIT to work without this
|
||||||
|
* extra alignment, but gallium driver doesn't implement it yet
|
||||||
|
*/
|
||||||
|
if (layout->cpp > 4)
|
||||||
|
layout->pitchalign = fdl_cpp_shift(layout) - 2;
|
||||||
|
|
||||||
|
/* when possible, use a bit more alignment than necessary
|
||||||
|
* presumably this is better for performance?
|
||||||
|
*/
|
||||||
|
if (!plane_layout)
|
||||||
|
layout->pitchalign = fdl_cpp_shift(layout);
|
||||||
|
|
||||||
|
/* not used, avoid "may be used uninitialized" warning */
|
||||||
|
heightalign = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pitchalign = 64 << layout->pitchalign;
|
||||||
|
|
||||||
if (plane_layout) {
|
if (plane_layout) {
|
||||||
offset = plane_layout->offset;
|
offset = plane_layout->offset;
|
||||||
pitch0 = plane_layout->pitch;
|
pitch0 = plane_layout->pitch;
|
||||||
if (align(pitch0, fdl6_pitchalign(layout, 0) * layout->cpp) != pitch0)
|
if (align(pitch0, pitchalign) != pitch0)
|
||||||
return false;
|
|
||||||
pitch0 /= layout->cpp; /* explicit pitch is in bytes */
|
|
||||||
if (pitch0 < width0 && height0 > 1)
|
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
|
uint32_t nblocksx = util_format_get_nblocksx(format, width0);
|
||||||
offset = 0;
|
offset = 0;
|
||||||
pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, 0));
|
pitch0 = util_align_npot(nblocksx * layout->cpp, pitchalign);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t ubwc_width0 = width0;
|
uint32_t ubwc_width0 = width0;
|
||||||
|
@ -159,15 +185,11 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
ubwc_height0 = util_next_power_of_two(height0);
|
ubwc_height0 = util_next_power_of_two(height0);
|
||||||
ubwc_tile_height_alignment = 64;
|
ubwc_tile_height_alignment = 64;
|
||||||
}
|
}
|
||||||
ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ta->ubwc_blockwidth),
|
ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
|
||||||
RGB_TILE_WIDTH_ALIGNMENT);
|
RGB_TILE_WIDTH_ALIGNMENT);
|
||||||
ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0,
|
ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
|
||||||
ta->ubwc_blockheight),
|
|
||||||
ubwc_tile_height_alignment);
|
ubwc_tile_height_alignment);
|
||||||
|
|
||||||
layout->pitchalign =
|
|
||||||
util_logbase2_ceil(fdl6_pitchalign(layout, mip_levels - 1) * layout->cpp >> 6);
|
|
||||||
|
|
||||||
for (uint32_t level = 0; level < mip_levels; level++) {
|
for (uint32_t level = 0; level < mip_levels; level++) {
|
||||||
uint32_t depth = u_minify(depth0, level);
|
uint32_t depth = u_minify(depth0, level);
|
||||||
struct fdl_slice *slice = &layout->slices[level];
|
struct fdl_slice *slice = &layout->slices[level];
|
||||||
|
@ -184,7 +206,7 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
|
|
||||||
uint32_t nblocksy = util_format_get_nblocksy(format, height);
|
uint32_t nblocksy = util_format_get_nblocksy(format, height);
|
||||||
if (tile_mode)
|
if (tile_mode)
|
||||||
nblocksy = align(nblocksy, ta->heightalign);
|
nblocksy = align(nblocksy, heightalign);
|
||||||
|
|
||||||
/* The blits used for mem<->gmem work at a granularity of
|
/* The blits used for mem<->gmem work at a granularity of
|
||||||
* 16x4, which can cause faults due to over-fetch on the
|
* 16x4, which can cause faults due to over-fetch on the
|
||||||
|
@ -196,14 +218,8 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
if (level == mip_levels - 1)
|
if (level == mip_levels - 1)
|
||||||
height = align(nblocksy, 4);
|
height = align(nblocksy, 4);
|
||||||
|
|
||||||
uint32_t nblocksx =
|
slice->offset = layout->size;
|
||||||
util_align_npot(util_format_get_nblocksx(format, u_minify(pitch0, level)),
|
slice->pitch = align(u_minify(pitch0, level), pitchalign);
|
||||||
fdl6_pitchalign(layout, level));
|
|
||||||
|
|
||||||
slice->offset = offset + layout->size;
|
|
||||||
uint32_t blocks = nblocksx * nblocksy;
|
|
||||||
|
|
||||||
slice->pitch = nblocksx * layout->cpp;
|
|
||||||
|
|
||||||
/* 1d array and 2d array textures must all have the same layer size
|
/* 1d array and 2d array textures must all have the same layer size
|
||||||
* for each miplevel on a6xx. 3d textures can have different layer
|
* for each miplevel on a6xx. 3d textures can have different layer
|
||||||
|
@ -213,12 +229,12 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
*/
|
*/
|
||||||
if (is_3d) {
|
if (is_3d) {
|
||||||
if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
|
if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
|
||||||
slice->size0 = align(blocks * layout->cpp, 4096);
|
slice->size0 = align(nblocksy * slice->pitch, 4096);
|
||||||
} else {
|
} else {
|
||||||
slice->size0 = layout->slices[level - 1].size0;
|
slice->size0 = layout->slices[level - 1].size0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
slice->size0 = blocks * layout->cpp;
|
slice->size0 = nblocksy * slice->pitch;
|
||||||
}
|
}
|
||||||
|
|
||||||
layout->size += slice->size0 * depth * layers_in_level;
|
layout->size += slice->size0 * depth * layers_in_level;
|
||||||
|
@ -260,12 +276,3 @@ fdl6_layout(struct fdl_layout *layout,
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
|
|
||||||
uint32_t *blockwidth, uint32_t *blockheight)
|
|
||||||
{
|
|
||||||
const struct tile_alignment *ta = fdl6_tile_alignment(layout);
|
|
||||||
*blockwidth = ta->ubwc_blockwidth;
|
|
||||||
*blockheight = ta->ubwc_blockheight;
|
|
||||||
}
|
|
||||||
|
|
|
@ -356,10 +356,10 @@ static const struct testcase testcases[] = {
|
||||||
{ .offset = 8192, .pitch = 128 },
|
{ .offset = 8192, .pitch = 128 },
|
||||||
{ .offset = 12288, .pitch = 128 },
|
{ .offset = 12288, .pitch = 128 },
|
||||||
{ .offset = 16384, .pitch = 128 },
|
{ .offset = 16384, .pitch = 128 },
|
||||||
{ .offset = 20480, .pitch = 64 },
|
{ .offset = 20480, .pitch = 128 },
|
||||||
{ .offset = 20544, .pitch = 64 },
|
{ .offset = 20608, .pitch = 128 },
|
||||||
{ .offset = 20608, .pitch = 64 },
|
{ .offset = 20736, .pitch = 128 },
|
||||||
{ .offset = 20672, .pitch = 64 },
|
{ .offset = 20864, .pitch = 128 },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -1129,10 +1129,6 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
|
||||||
uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
|
uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
|
||||||
uint32_t layer_size = src_height * pitch;
|
uint32_t layer_size = src_height * pitch;
|
||||||
|
|
||||||
/* note: the src_va/pitch alignment of 64 is for 2D engine,
|
|
||||||
* it is also valid for 1cpp format with shader path (stencil aspect path)
|
|
||||||
*/
|
|
||||||
|
|
||||||
ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
|
ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
|
||||||
|
|
||||||
struct tu_image_view dst;
|
struct tu_image_view dst;
|
||||||
|
@ -1212,10 +1208,6 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
|
||||||
uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
|
uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
|
||||||
uint32_t layer_size = pitch * dst_height;
|
uint32_t layer_size = pitch * dst_height;
|
||||||
|
|
||||||
/* note: the dst_va/pitch alignment of 64 is for 2D engine,
|
|
||||||
* it is also valid for 1cpp format with shader path (stencil aspect)
|
|
||||||
*/
|
|
||||||
|
|
||||||
ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
|
ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
|
||||||
|
|
||||||
struct tu_image_view src;
|
struct tu_image_view src;
|
||||||
|
|
|
@ -1032,9 +1032,11 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
|
||||||
|
|
||||||
uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw * rsc->layout.cpp;
|
uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw * rsc->layout.cpp;
|
||||||
|
|
||||||
/* use 64 pitchalign on a6xx where gmem_alignw is not right */
|
/* pitchalign is 64-bytes for linear formats on a6xx
|
||||||
|
* layout_resource_for_modifier will validate tiled pitch
|
||||||
|
*/
|
||||||
if (is_a6xx(screen))
|
if (is_a6xx(screen))
|
||||||
pitchalign = 64 * rsc->layout.cpp;
|
pitchalign = 64;
|
||||||
|
|
||||||
if ((slice->pitch < align(prsc->width0 * rsc->layout.cpp, pitchalign)) ||
|
if ((slice->pitch < align(prsc->width0 * rsc->layout.cpp, pitchalign)) ||
|
||||||
(slice->pitch & (pitchalign - 1)))
|
(slice->pitch & (pitchalign - 1)))
|
||||||
|
|
Loading…
Reference in New Issue