turnip: improve CmdCopyImage and implement CmdBlitImage

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
Jonathan Marek 2019-10-05 12:38:40 -04:00
parent 571b2611b3
commit d3c9914152
8 changed files with 526 additions and 590 deletions

View File

@ -49,8 +49,11 @@ tu_format_table_c = custom_target(
)
libtu_files = files(
'tu_blit.c',
'tu_blit.h',
'tu_cmd_buffer.c',
'tu_cs.c',
'tu_cs.h',
'tu_device.c',
'tu_descriptor_set.c',
'tu_descriptor_set.h',

View File

@ -0,0 +1,298 @@
/*
* Copyright © 2019 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jonathan Marek <jonathan@marek.ca>
*
*/
#include "tu_blit.h"
#include "a6xx.xml.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
#include "vk_format.h"
#include "tu_cs.h"
/* TODO:
* - Avoid disabling tiling for swapped formats
* (image_to_image copy doesn't deal with it)
* - Fix d24_unorm_s8_uint support & aspects
* - UBWC
*/
static VkFormat
blit_copy_format(VkFormat format)
{
switch (vk_format_get_blocksizebits(format)) {
case 8: return VK_FORMAT_R8_UINT;
case 16: return VK_FORMAT_R16_UINT;
case 32: return VK_FORMAT_R8G8B8A8_UINT;
case 64: return VK_FORMAT_R32G32_UINT;
case 96: return VK_FORMAT_R32G32B32_UINT;
case 128:return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("unhandled format size");
}
}
static uint32_t
blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read)
{
const struct tu_native_format *fmt = tu6_get_native_format(img->fmt);
enum a6xx_color_fmt rb = fmt->rb;
enum a3xx_color_swap swap = img->tiled ? WZYX : fmt->swap;
if (rb == RB6_R10G10B10A2_UNORM && src)
rb = RB6_R10G10B10A2_FLOAT16;
if (rb == RB6_X8Z24_UNORM)
rb = RB6_Z24_UNORM_S8_UINT;
if (stencil_read)
swap = XYZW;
return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) |
COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB);
}
static void
emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
{
struct tu_cs *cs = &cmdbuf->cs;
tu_cs_reserve_space(cmdbuf->device, cs, 52);
enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb;
if (fmt == RB6_X8Z24_UNORM)
fmt = RB6_Z24_UNORM_S8_UINT;
enum a6xx_2d_ifmt ifmt = tu6_rb_fmt_to_ifmt(fmt);
if (vk_format_is_srgb(blt->dst.fmt)) {
assert(ifmt == R2D_UNORM8);
ifmt = R2D_UNORM8_SRGB;
}
uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) | /* not required? */
COND(fmt == RB6_Z24_UNORM_S8_UINT, A6XX_RB_2D_BLIT_CNTL_D24S8) |
A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
/*
* Emit source:
*/
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
tu_cs_emit(cs, blit_image_info(&blt->src, true, blt->stencil_read) |
A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
/* TODO: should disable this bit for integer formats ? */
COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
0x500000);
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
tu_cs_emit_qw(cs, blt->src.va);
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
/*
* Emit destination:
*/
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
tu_cs_emit(cs, blit_image_info(&blt->dst, false, false));
tu_cs_emit_qw(cs, blt->dst.va);
tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
tu_cs_emit(cs, 0x3f);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
tu_cs_emit(cs, 0);
if (fmt == RB6_R10G10B10A2_UNORM)
fmt = RB6_R16G16B16A16_FLOAT;
tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt) |
COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(cs, 0x01000000);
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(cs, 0);
}
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy)
{
if (copy) {
blt->stencil_read =
blt->dst.fmt == VK_FORMAT_R8_UINT &&
blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
assert(vk_format_get_blocksize(blt->dst.fmt) ==
vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
assert(blt->src.samples == blt->dst.samples);
if (vk_format_is_compressed(blt->src.fmt)) {
unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
blt->src.pitch /= block_width;
blt->src.x /= block_width;
blt->src.y /= block_height;
/* for image_to_image copy, width/height is on the src format */
blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
}
if (vk_format_is_compressed(blt->dst.fmt)) {
unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
blt->dst.pitch /= block_width;
blt->dst.x /= block_width;
blt->dst.y /= block_height;
}
blt->src.fmt = blit_copy_format(blt->src.fmt);
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
/* TODO: does this work correctly with tiling/etc ? */
blt->src.x *= blt->src.samples;
blt->dst.x *= blt->dst.samples;
blt->src.width *= blt->src.samples;
blt->dst.width *= blt->dst.samples;
blt->src.samples = 1;
blt->dst.samples = 1;
} else {
assert(blt->dst.samples == 1);
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 18);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, LRZ_FLUSH, false);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_COLOR, false);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_DEPTH, false);
/* buffer copy setup */
tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
for (unsigned layer = 0; layer < blt->layers; layer++) {
if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
/* per line copy path (buffer_to_image) */
assert(copy && !blt->src.tiled);
struct tu_blit line_blt = *blt;
uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
line_blt.src.y = 0;
line_blt.src.pitch = 0;
line_blt.src.height = 1;
line_blt.dst.height = 1;
for (unsigned y = 0; y < blt->src.height; y++) {
line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
line_blt.src.va = src_va & ~63;
emit_blit_step(cmdbuf, &line_blt);
line_blt.dst.y++;
src_va += blt->src.pitch;
}
} else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
/* per line copy path (image_to_buffer) */
assert(copy && !blt->dst.tiled);
struct tu_blit line_blt = *blt;
uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
line_blt.dst.y = 0;
line_blt.dst.pitch = 0;
line_blt.src.height = 1;
line_blt.dst.height = 1;
for (unsigned y = 0; y < blt->src.height; y++) {
line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
line_blt.dst.va = dst_va & ~63;
emit_blit_step(cmdbuf, &line_blt);
line_blt.src.y++;
dst_va += blt->dst.pitch;
}
} else {
emit_blit_step(cmdbuf, blt);
}
blt->dst.va += blt->dst.layer_size;
blt->src.va += blt->src.layer_size;
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_INVALIDATE, false);
}

View File

@ -0,0 +1,100 @@
/*
* Copyright © 2019 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jonathan Marek <jonathan@marek.ca>
*
*/
#ifndef TU_BLIT_H
#define TU_BLIT_H
#include "tu_private.h"
#include "vk_format.h"
struct tu_blit_surf {
VkFormat fmt;
enum a6xx_tile_mode tile_mode;
bool tiled;
uint64_t va;
uint32_t pitch, layer_size;
uint32_t x, y;
uint32_t width, height;
unsigned samples;
};
static inline struct tu_blit_surf
tu_blit_surf(struct tu_image *img,
VkImageSubresourceLayers subres,
const VkOffset3D *offsets)
{
return (struct tu_blit_surf) {
.fmt = img->vk_format,
.tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel),
.tiled = img->tile_mode != TILE6_LINEAR,
.va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset +
subres.baseArrayLayer * img->layer_size +
MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size,
.pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples,
.layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size,
.x = MIN2(offsets[0].x, offsets[1].x),
.y = MIN2(offsets[0].y, offsets[1].y),
.width = abs(offsets[1].x - offsets[0].x),
.height = abs(offsets[1].y - offsets[0].y),
.samples = img->samples,
};
}
static inline struct tu_blit_surf
tu_blit_surf_ext(struct tu_image *image,
VkImageSubresourceLayers subres,
VkOffset3D offset,
VkExtent3D extent)
{
return tu_blit_surf(image, subres, (VkOffset3D[]) {
offset, {.x = offset.x + extent.width,
.y = offset.y + extent.height,
.z = offset.z}
});
}
static inline struct tu_blit_surf
tu_blit_surf_whole(struct tu_image *image)
{
return tu_blit_surf(image, (VkImageSubresourceLayers){}, (VkOffset3D[]) {
{}, {image->extent.width, image->extent.height}
});
}
struct tu_blit {
struct tu_blit_surf dst;
struct tu_blit_surf src;
uint32_t layers;
bool filter;
bool stencil_read;
enum a6xx_rotation rotation;
};
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy);
#endif /* TU_BLIT_H */

View File

@ -314,8 +314,8 @@ tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
: tile->begin.y + tiling->tile0.extent.height;
}
static enum a3xx_msaa_samples
tu6_msaa_samples(uint32_t samples)
enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples)
{
switch (samples) {
case 1:

View File

@ -645,8 +645,8 @@ tu_physical_device_get_format_properties(
}
if (native_fmt->rb >= 0) {
linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
}
end:

View File

@ -23,7 +23,49 @@
#include "tu_private.h"
#include "nir/nir_builder.h"
#include "tu_blit.h"
static void
tu_blit_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageBlit *info,
VkFilter filter)
{
static const enum a6xx_rotation rotate[2][2] = {
{ROTATE_0, ROTATE_HFLIP},
{ROTATE_VFLIP, ROTATE_180},
};
bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
(info->dstOffsets[1].x < info->dstOffsets[0].x);
bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
(info->dstOffsets[1].y < info->dstOffsets[0].y);
bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
(info->dstOffsets[1].z < info->dstOffsets[0].z);
if (mirror_z) {
tu_finishme("blit z mirror\n");
return;
}
if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
info->dstOffsets[1].z - info->dstOffsets[0].z) {
tu_finishme("blit z filter\n");
return;
}
assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
struct tu_blit blt = {
.dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets),
.src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets),
.layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z,
info->dstSubresource.layerCount),
.filter = filter == VK_FILTER_LINEAR,
.rotation = rotate[mirror_y][mirror_x],
};
tu_blit(cmdbuf, &blt, false);
}
void
tu_CmdBlitImage(VkCommandBuffer commandBuffer,
@ -36,4 +78,14 @@ tu_CmdBlitImage(VkCommandBuffer commandBuffer,
VkFilter filter)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < regionCount; ++i) {
tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter);
}
}

View File

@ -30,13 +30,7 @@
#include "vk_format.h"
#include "tu_cs.h"
/*
* TODO:
* - 3D textures
* - compressed image formats (need to divide offset/extent)
* - Fix d24_unorm_s8_uint support & aspects
*/
#include "tu_blit.h"
static uint32_t
blit_control(enum a6xx_color_fmt fmt)
@ -47,29 +41,6 @@ blit_control(enum a6xx_color_fmt fmt)
return blit_cntl;
}
static uint32_t tu6_sp_2d_src_format(VkFormat format)
{
const struct vk_format_description *desc = vk_format_description(format);
uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
int channel = vk_format_get_first_non_void_channel(format);
if (channel < 0) {
/* TODO special format. */
return reg;
}
if (desc->channel[channel].normalized) {
if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
} else if (desc->channel[channel].pure_integer) {
if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
else
reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
}
return reg;
}
static void
tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
{
@ -90,32 +61,6 @@ tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
tu_cs_emit(&cmdbuf->cs, 0x10000000);
}
/* Always use UINT formats to avoid precision issues.
*
* Example failure it avoids:
* - dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.r16_unorm.r16_unorm.general_general
*/
static VkFormat
tu_canonical_copy_format(VkFormat format)
{
switch (vk_format_get_blocksizebits(format)) {
case 8:
return VK_FORMAT_R8_UINT;
case 16:
return VK_FORMAT_R16_UINT;
case 32:
return VK_FORMAT_R32_UINT;
case 64:
return VK_FORMAT_R32G32_UINT;
case 96:
return VK_FORMAT_R32G32B32_UINT;
case 128:
return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("unhandled format size");
}
}
static void
tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
struct tu_bo *src_bo,
@ -249,548 +194,80 @@ tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
}
static void
tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
struct tu_buffer *src_buffer,
struct tu_image *dst_image,
const VkBufferImageCopy *copy_info,
VkFormat format,
uint32_t layer,
uint64_t src_va)
static struct tu_blit_surf
tu_blit_buffer(struct tu_buffer *buffer,
VkFormat format,
const VkBufferImageCopy *info)
{
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
format = VK_FORMAT_R8_UINT;
uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
vk_format_get_blocksize(format);
unsigned src_pitch;
unsigned src_offset = 0;
if (copy_info->imageExtent.height == 1) {
/* Can't find this in the spec, but not having it is sort of insane? */
assert(src_va % vk_format_get_blocksize(format) == 0);
src_offset = (src_va & 63) / vk_format_get_blocksize(format);
src_va &= ~63;
src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
} else {
unsigned src_pixel_stride = copy_info->bufferRowLength
? copy_info->bufferRowLength
: copy_info->imageExtent.width;
src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
assert(!(src_pitch & 63));
assert(!(src_va & 63));
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
/*
* Emit source:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
0x500000);
tu_cs_emit(&cmdbuf->cs,
A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
tu_cs_emit_qw(&cmdbuf->cs, src_va);
tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
/*
* Emit destination:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
tu_cs_emit(&cmdbuf->cs,
A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
tu_cs_emit_qw(&cmdbuf->cs, dst_va);
tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
copy_info->imageExtent.width - 1) |
A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
copy_info->imageExtent.height - 1));
tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
tu_cs_emit(&cmdbuf->cs, 0x3f);
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
tu_cs_emit(&cmdbuf->cs, 0);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0x01000000);
tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0);
return (struct tu_blit_surf) {
.fmt = format,
.tile_mode = TILE6_LINEAR,
.va = buffer->bo->iova + buffer->bo_offset + info->bufferOffset,
.pitch = pitch,
.layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
.width = info->imageExtent.width,
.height = info->imageExtent.height,
.samples = 1,
};
}
static void
tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
struct tu_buffer *src_buffer,
struct tu_image *dst_image,
const VkBufferImageCopy *copy_info)
const VkBufferImageCopy *info)
{
tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
/* general setup */
tu_dma_prepare(cmdbuf);
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
/* buffer copy setup */
tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
VkFormat format = tu_canonical_copy_format(dst_image->vk_format);
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
unsigned src_pixel_stride = copy_info->bufferRowLength
? copy_info->bufferRowLength
: copy_info->imageExtent.width;
unsigned cpp = vk_format_get_blocksize(format);
unsigned src_pitch = src_pixel_stride * cpp;
for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
if ((src_pitch & 63) || (src_va & 63)) {
/* Do a per line copy */
VkBufferImageCopy line_copy_info = *copy_info;
line_copy_info.imageExtent.height = 1;
for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
/*
* if src_va is not aligned the line copy will need to adjust. Give it
* room to do so.
*/
unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
line_copy_info.imageOffset.x = copy_info->imageOffset.x;
line_copy_info.imageExtent.width = copy_info->imageExtent.width;
for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
line_copy_info.imageOffset.x += max_width;
line_copy_info.imageExtent.width -= max_width;
}
line_copy_info.imageOffset.y++;
src_va += src_pitch;
}
} else {
tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
}
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(dst_image->vk_format) == 4) {
tu_finishme("aspect mask\n");
return;
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
}
static void
tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_buffer *dst_buffer,
const VkBufferImageCopy *copy_info,
VkFormat format,
uint32_t layer,
uint64_t dst_va)
{
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
vk_format_get_blocksize(format);
unsigned dst_pitch;
unsigned dst_offset = 0;
if (copy_info->imageExtent.height == 1) {
/* Can't find this in the spec, but not having it is sort of insane? */
assert(dst_va % vk_format_get_blocksize(format) == 0);
dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
dst_va &= ~63;
dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
} else {
unsigned dst_pixel_stride = copy_info->bufferRowLength
? copy_info->bufferRowLength
: copy_info->imageExtent.width;
dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
assert(!(dst_pitch & 63));
assert(!(dst_va & 63));
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
/*
* Emit source:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
tu_cs_emit(&cmdbuf->cs,
A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
tu_cs_emit(&cmdbuf->cs,
A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
tu_cs_emit_qw(&cmdbuf->cs, src_va);
tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
/*
* Emit destination:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
tu_cs_emit_qw(&cmdbuf->cs, dst_va);
tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
copy_info->imageExtent.width - 1));
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
copy_info->imageExtent.height - 1));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
tu_cs_emit(&cmdbuf->cs, 0x3f);
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
tu_cs_emit(&cmdbuf->cs, 0);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0x01000000);
tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0);
tu_blit(cmdbuf, &(struct tu_blit) {
.dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
.src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
}, true);
}
static void
tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_buffer *dst_buffer,
const VkBufferImageCopy *copy_info)
const VkBufferImageCopy *info)
{
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
/* general setup */
tu_dma_prepare(cmdbuf);
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
/* buffer copy setup */
tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
VkFormat format = tu_canonical_copy_format(src_image->vk_format);
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
unsigned dst_pixel_stride = copy_info->bufferRowLength
? copy_info->bufferRowLength
: copy_info->imageExtent.width;
unsigned cpp = vk_format_get_blocksize(format);
unsigned dst_pitch = dst_pixel_stride * cpp;
const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
if ((dst_pitch & 63) || (dst_va & 63)) {
/* Do a per line copy */
VkBufferImageCopy line_copy_info = *copy_info;
line_copy_info.imageExtent.height = 1;
for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
/*
* if dst_va is not aligned the line copy will need to adjust. Give it
* room to do so.
*/
unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
line_copy_info.imageOffset.x = copy_info->imageOffset.x;
line_copy_info.imageExtent.width = copy_info->imageExtent.width;
for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
line_copy_info.imageOffset.x += max_width;
line_copy_info.imageExtent.width -= max_width;
}
line_copy_info.imageOffset.y++;
dst_va += dst_pitch;
}
} else {
tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
}
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
}
static void
tu_copy_image_to_image_step(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageCopy *copy_info,
VkFormat format,
uint32_t layer_offset)
{
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
unsigned src_layer =
copy_info->srcSubresource.baseArrayLayer + layer_offset;
uint64_t src_va =
src_image->bo->iova + src_image->bo_offset +
src_image->layer_size * src_layer +
src_image->levels[copy_info->srcSubresource.mipLevel].offset;
unsigned src_pitch =
src_image->levels[copy_info->srcSubresource.mipLevel].pitch *
vk_format_get_blocksize(format);
unsigned dst_layer =
copy_info->dstSubresource.baseArrayLayer + layer_offset;
uint64_t dst_va =
dst_image->bo->iova + dst_image->bo_offset +
dst_image->layer_size * dst_layer +
dst_image->levels[copy_info->dstSubresource.mipLevel].offset;
unsigned dst_pitch =
src_image->levels[copy_info->dstSubresource.mipLevel].pitch *
vk_format_get_blocksize(format);
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
/*
* Emit source:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
tu_cs_emit(&cmdbuf->cs,
A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
tu_cs_emit(&cmdbuf->cs,
A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
tu_cs_emit_qw(&cmdbuf->cs, src_va);
tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
/*
* Emit destination:
*/
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
tu_cs_emit(&cmdbuf->cs,
A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
tu_cs_emit_qw(&cmdbuf->cs, dst_va);
tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit(&cmdbuf->cs, 0x00000000);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->srcOffset.x));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_X_X(copy_info->srcOffset.x +
copy_info->extent.width - 1));
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->srcOffset.y));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->srcOffset.y +
copy_info->extent.height - 1));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(copy_info->dstOffset.x) |
A6XX_GRAS_2D_DST_TL_Y(copy_info->dstOffset.y));
tu_cs_emit(&cmdbuf->cs,
A6XX_GRAS_2D_DST_BR_X(copy_info->dstOffset.x +
copy_info->extent.width - 1) |
A6XX_GRAS_2D_DST_BR_Y(copy_info->dstOffset.y +
copy_info->extent.height - 1));
tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
tu_cs_emit(&cmdbuf->cs, 0x3f);
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
tu_cs_emit(&cmdbuf->cs, 0);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0x01000000);
tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
tu_cs_emit_wfi(&cmdbuf->cs);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(&cmdbuf->cs, 0);
tu_blit(cmdbuf, &(struct tu_blit) {
.dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
.src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
}, true);
}
static void
tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageCopy *copy_info)
const VkImageCopy *info)
{
/* TODO:
* - Handle 3D images.
* - In some cases where src and dst format are different this may
* have tiling implications. Not sure if things happen correctly
* in that case.
*/
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
/* general setup */
tu_dma_prepare(cmdbuf);
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
/* buffer copy setup */
tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
VkFormat format = tu_canonical_copy_format(src_image->vk_format);
const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(&cmdbuf->cs, blit_cntl);
for (unsigned layer_offset = 0;
layer_offset < copy_info->srcSubresource.layerCount; ++layer_offset) {
tu_copy_image_to_image_step(cmdbuf, src_image, dst_image, copy_info,
format, layer_offset);
if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(dst_image->vk_format) == 4) ||
(info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(src_image->vk_format) == 4)) {
tu_finishme("aspect mask\n");
return;
}
tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
tu_blit(cmdbuf, &(struct tu_blit) {
.dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
.src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
.layers = info->extent.depth,
}, true);
}
void
@ -821,14 +298,15 @@ tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
TU_FROM_HANDLE(tu_image, dest_image, destImage);
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
for (unsigned i = 0; i < regionCount; ++i) {
tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
pRegions + i);
}
tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < regionCount; ++i)
tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
}
void
@ -839,14 +317,15 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
for (unsigned i = 0; i < regionCount; ++i) {
tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
pRegions + i);
}
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < regionCount; ++i)
tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
}
void
@ -858,11 +337,13 @@ tu_CmdCopyImage(VkCommandBuffer commandBuffer,
uint32_t regionCount,
const VkImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_image, dest_image, destImage);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
for (uint32_t i = 0; i < regionCount; ++i) {
tu_copy_image_to_image(cmd_buffer, src_image, dest_image, pRegions + i);
}
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < regionCount; ++i)
tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
}

View File

@ -1299,6 +1299,8 @@ tu_get_levelCount(const struct tu_image *image,
enum a6xx_tile_mode
tu6_get_image_tile_mode(struct tu_image *image, int level);
enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples);
struct tu_image_view
{