diff --git a/src/panfrost/shared/meson.build b/src/panfrost/shared/meson.build index cfe5bfedd82..ce197494774 100644 --- a/src/panfrost/shared/meson.build +++ b/src/panfrost/shared/meson.build @@ -36,3 +36,22 @@ libpanfrost_shared = static_library( gnu_symbol_visibility : 'hidden', build_by_default : false, ) + +if with_tests + test( + 'panfrost_tiling', + executable( + 'panfrost_tiling', + files( + 'test/test-tiling.cpp', + ), + c_args : [c_msvc_compat_args, no_override_init_args], + gnu_symbol_visibility : 'hidden', + include_directories : [inc_include, inc_src, inc_mesa, inc_panfrost, inc_gallium], + dependencies: [idep_gtest], + link_with : [libpanfrost_shared], + ), + suite : ['panfrost'], + protocol : gtest_test_protocol, + ) +endif diff --git a/src/panfrost/shared/pan_tiling.h b/src/panfrost/shared/pan_tiling.h index 5e35fe8dfc1..009ba280434 100644 --- a/src/panfrost/shared/pan_tiling.h +++ b/src/panfrost/shared/pan_tiling.h @@ -30,6 +30,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Load a rectangular region from a tiled image to a linear staging image. * @@ -70,4 +74,9 @@ void panfrost_store_tiled_image(void *dst, const void *src, uint32_t src_stride, enum pipe_format format); + +#ifdef __cplusplus +} /* extern C */ +#endif + #endif diff --git a/src/panfrost/shared/test/test-tiling.cpp b/src/panfrost/shared/test/test-tiling.cpp new file mode 100644 index 00000000000..b06954485d0 --- /dev/null +++ b/src/panfrost/shared/test/test-tiling.cpp @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pan_tiling.h" + +#include + +/* + * Reference tiling algorithm, written for clarity rather than performance. See + * docs/drivers/panfrost.rst for details on the format. + */ + +static unsigned +u_order(unsigned x, unsigned y) +{ + assert(x < 16 && y < 16); + + unsigned xy0 = ((x ^ y) & 1) ? 1 : 0; + unsigned xy1 = ((x ^ y) & 2) ? 1 : 0; + unsigned xy2 = ((x ^ y) & 4) ? 1 : 0; + unsigned xy3 = ((x ^ y) & 8) ? 1 : 0; + + unsigned y0 = (y & 1) ? 1 : 0; + unsigned y1 = (y & 2) ? 1 : 0; + unsigned y2 = (y & 4) ? 1 : 0; + unsigned y3 = (y & 8) ? 1 : 0; + + return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) | + (xy2 << 4) | (y2 << 5) | (xy3 << 6) | (y3 << 7); +} + +/* x/y are in blocks */ +static unsigned +tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize, unsigned blocksize) +{ + unsigned tile_x = x / tilesize; + unsigned tile_y = y / tilesize; + + unsigned x_in_tile = x % tilesize; + unsigned y_in_tile = y % tilesize; + + unsigned index_in_tile = u_order(x_in_tile, y_in_tile); + + unsigned row_offset = tile_y * (stride * tilesize); + unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize; + unsigned block_offset = index_in_tile * blocksize; + + return row_offset + col_offset + block_offset; +} + +static unsigned +linear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize) +{ + return (stride * y) + (x * blocksize); +} + +static void +ref_access_tiled(void *dst, const void *src, + unsigned region_x, unsigned region_y, + unsigned w, unsigned h, + uint32_t dst_stride, + uint32_t src_stride, + enum pipe_format format, + bool dst_is_tiled) +{ + const struct util_format_description *desc = util_format_description(format);; + + unsigned tilesize = (desc->block.width > 1) ? 4 : 16; + unsigned blocksize = (desc->block.bits / 8); + + unsigned w_block = w / desc->block.width; + unsigned h_block = h / desc->block.height; + + unsigned region_x_block = region_x / desc->block.width; + unsigned region_y_block = region_y / desc->block.height; + + for (unsigned linear_y_block = 0; linear_y_block < h_block; ++linear_y_block) { + for (unsigned linear_x_block = 0; linear_x_block < w_block; ++linear_x_block) { + + unsigned tiled_x_block = region_x_block + linear_x_block; + unsigned tiled_y_block = region_y_block + linear_y_block; + + unsigned dst_offset, src_offset; + + if (dst_is_tiled) { + dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride, tilesize, blocksize); + src_offset = linear_offset(linear_x_block, linear_y_block, src_stride, blocksize); + } else { + dst_offset = linear_offset(linear_x_block, linear_y_block, dst_stride, blocksize); + src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride, tilesize, blocksize); + } + + memcpy((uint8_t *) dst + dst_offset, + (const uint8_t *) src + src_offset, + desc->block.bits / 8); + } + } +} + +/* + * Helper to build test cases for tiled texture access. This test suite compares + * the above reference tiling algorithm to the optimized algorithm used in + * production. + */ +static void +test(unsigned width, unsigned height, unsigned rx, unsigned ry, + unsigned rw, unsigned rh, unsigned linear_stride, + enum pipe_format format, bool store) +{ + unsigned bpp = util_format_get_blocksize(format); + + unsigned tiled_width = ALIGN_POT(width, 16); + unsigned tiled_height = ALIGN_POT(height, 16); + unsigned tiled_stride = tiled_width * bpp; + + unsigned dst_stride = store ? tiled_stride : linear_stride; + unsigned src_stride = store ? linear_stride : tiled_stride; + + void *tiled = calloc(bpp, tiled_width * tiled_height); + void *linear = calloc(bpp, rw * linear_stride); + void *ref = calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride)); + + if (store) { + for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) { + ((uint8_t *) linear)[i] = (i & 0xFF); + } + + panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh, + dst_stride, src_stride, format); + } else { + for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) { + ((uint8_t *) tiled)[i] = (i & 0xFF); + } + + panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh, + dst_stride, src_stride, format); + } + + ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, + dst_stride, src_stride, format, store); + + if (store) + ASSERT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0); + else + ASSERT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0); + + free(ref); + free(tiled); + free(linear); +} + +static void +test_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry, + unsigned rw, unsigned rh, unsigned linear_stride, + enum pipe_format format) +{ + test(width, height, rx, ry, rw, rh, linear_stride, format, true); + test(width, height, rx, ry, rw, rh, linear_stride, format, false); +} + +TEST(UInterleavedTiling, RegulatFormats) +{ + /* 8-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT); + + /* 16-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT); + + /* 24-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT); + + /* 32-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT); + + /* 48-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT); + + /* 64-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT); + + /* 96-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT); + + /* 128-bit */ + test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT); +} + +TEST(UInterleavedTiling, UnpackedStrides) +{ + test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT); + test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT); +} + +TEST(UInterleavedTiling, PartialAccess) +{ + test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM); + test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM); +} + +TEST(UInterleavedTiling, ETC) +{ + /* Block alignment assumed */ + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8); + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1); + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM); +} + +TEST(UInterleavedTiling, PartialETC) +{ + /* Block alignment assumed */ + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8); + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1); + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM); +} + +TEST(UInterleavedTiling, DXT) +{ + /* Block alignment assumed */ + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB); + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA); + test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA); +} + +TEST(UInterleavedTiling, PartialDXT) +{ + /* Block alignment assumed */ + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB); + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA); + test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA); +} + +TEST(UInterleavedTiling, ASTC) +{ + /* Block alignment assumed */ + test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4); + test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4); + test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5); +} + +TEST(UInterleavedTiling, PartialASTC) +{ + /* Block alignment assumed */ + test_ldst(40, 40, 4, 4, 16, 8, 512, PIPE_FORMAT_ASTC_4x4); + test_ldst(50, 40, 5, 4, 10, 8, 512, PIPE_FORMAT_ASTC_5x4); + test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5); +}