panvk: Implement vkCmdCopyBuffer()

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12095>
This commit is contained in:
Boris Brezillon 2021-09-08 10:17:03 +02:00
parent f73ae1a6b5
commit b37eb5854c
2 changed files with 157 additions and 1 deletions

View File

@ -133,6 +133,7 @@ panvk_logi_v(const char *format, va_list va);
#define PANVK_META_COPY_IMG2BUF_NUM_FORMATS 12
#define PANVK_META_COPY_IMG2IMG_NUM_FORMATS 14
#define PANVK_META_COPY_NUM_TEX_TYPES 5
#define PANVK_META_COPY_BUF2BUF_NUM_BLKSIZES 5
static inline unsigned
panvk_meta_copy_tex_type(unsigned dim, bool isarray)
@ -172,6 +173,10 @@ struct panvk_meta {
struct {
mali_ptr rsd;
} img2img[PANVK_META_COPY_NUM_TEX_TYPES][PANVK_META_COPY_IMG2IMG_NUM_FORMATS];
struct {
mali_ptr rsd;
struct panfrost_ubo_push pushmap;
} buf2buf[PANVK_META_COPY_BUF2BUF_NUM_BLKSIZES];
} copy;
};

View File

@ -1751,6 +1751,150 @@ panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer,
}
}
struct panvk_meta_copy_buf2buf_info {
mali_ptr src;
mali_ptr dst;
};
#define panvk_meta_copy_buf2buf_get_info_field(b, field) \
nir_load_ubo((b), 1, \
sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
nir_imm_int(b, 0), \
nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2buf_info, field)), \
.align_mul = 4, \
.align_offset = 0, \
.range_base = 0, \
.range = ~0)
static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
unsigned blksz,
struct pan_shader_info *shader_info)
{
/* FIXME: Won't work on compute queues, but we can't do that with
* a compute shader if the destination is an AFBC surface.
*/
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_copy_buf2buf(blksz=%d)",
blksz);
b.shader->info.internal = true;
b.shader->info.num_ubos = 1;
nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
nir_ssa_def *offset =
nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
nir_ssa_def *srcptr =
nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
nir_ssa_def *dstptr =
nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
unsigned compsz = blksz < 4 ? blksz : 4;
unsigned ncomps = blksz / compsz;
nir_store_global(&b, dstptr, blksz,
nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
(1 << ncomps) - 1);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
/* Make sure UBO words have been upgraded to push constants and everything
* is at the right place.
*/
assert(shader_info->ubo_count == 1);
assert(shader_info->push.count == (sizeof(struct panvk_meta_copy_buf2buf_info) / 4));
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
PAN_ARCH >= 6 ? 128 : 64);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
{
for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1 << i, &shader_info);
dev->meta.copy.buf2buf[i].pushmap = shader_info.push;
dev->meta.copy.buf2buf[i].rsd =
panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
shader, &shader_info, false);
}
}
static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *src,
const struct panvk_buffer *dst,
const VkBufferCopy *region)
{
struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
struct panvk_meta_copy_buf2buf_info info = {
.src = src->bo->ptr.gpu + src->bo_offset + region->srcOffset,
.dst = dst->bo->ptr.gpu + dst->bo_offset + region->dstOffset,
};
unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
unsigned log2blksz = alignment ? alignment - 1 : 4;
assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
const struct panfrost_ubo_push *pushmap =
&cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
mali_ptr pushconsts =
panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
&info, sizeof(info));
mali_ptr ubo =
panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
if (cmdbuf->state.batch)
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
struct panvk_batch *batch = cmdbuf->state.batch;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
mali_ptr tsd = batch->tls.gpu;
unsigned nblocks = region->size >> log2blksz;
struct pan_compute_dim num_wg = { nblocks, 1, 1 };
struct pan_compute_dim wg_sz = { 1, 1, 1};
struct panfrost_ptr job =
panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
&num_wg, &wg_sz,
0, 0, ubo, pushconsts, rsd, tsd);
util_dynarray_append(&batch->jobs, void *, job.cpu);
batch->blit.src = src->bo;
batch->blit.dst = dst->bo;
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
void
panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
@ -1758,7 +1902,13 @@ panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer,
uint32_t regionCount,
const VkBufferCopy *pRegions)
{
panvk_stub();
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, src, srcBuffer);
VK_FROM_HANDLE(panvk_buffer, dst, destBuffer);
for (unsigned i = 0; i < regionCount; i++) {
panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pRegions[i]);
}
}
void
@ -1787,4 +1937,5 @@ panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
panvk_meta_copy_img2img_init(dev);
panvk_meta_copy_buf2img_init(dev);
panvk_meta_copy_img2buf_init(dev);
panvk_meta_copy_buf2buf_init(dev);
}