turnip: implement VK_KHR_sampler_ycbcr_conversion

Most changes based on radv, some simplification, since we don't need to
sample multiple planes, 422_UNORM/420_UNORM formats will be supported
directly using the hardware formats for those.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4590>
This commit is contained in:
Jonathan Marek 2020-04-10 09:19:36 -04:00 committed by Marge Bot
parent 70502f071c
commit d070a7ba0c
8 changed files with 230 additions and 31 deletions

View File

@ -119,18 +119,32 @@ tu_CreateDescriptorSetLayout(
uint32_t max_binding = 0;
uint32_t immutable_sampler_count = 0;
uint32_t ycbcr_sampler_count = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
pCreateInfo->pBindings[j].pImmutableSamplers) {
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
bool has_ycbcr_sampler = false;
for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
if (tu_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler)
has_ycbcr_sampler = true;
}
if (has_ycbcr_sampler)
ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
}
}
uint32_t samplers_offset = sizeof(struct tu_descriptor_set_layout) +
(max_binding + 1) * sizeof(set_layout->binding[0]);
uint32_t size = samplers_offset + immutable_sampler_count * A6XX_TEX_SAMP_DWORDS * 4;
/* note: only need to store TEX_SAMP_DWORDS for immutable samples,
* but using struct tu_sampler makes things simpler */
uint32_t size = samplers_offset +
immutable_sampler_count * sizeof(struct tu_sampler) +
ycbcr_sampler_count * sizeof(struct tu_sampler_ycbcr_conversion);
set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -141,6 +155,8 @@ tu_CreateDescriptorSetLayout(
/* We just allocate all the immutable samplers at the end of the struct */
struct tu_sampler *samplers = (void*) &set_layout->binding[max_binding + 1];
struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
(void*) &samplers[immutable_sampler_count];
VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings(
pCreateInfo->pBindings, pCreateInfo->bindingCount);
@ -196,6 +212,27 @@ tu_CreateDescriptorSetLayout(
samplers += binding->descriptorCount;
samplers_offset += sizeof(struct tu_sampler) * binding->descriptorCount;
bool has_ycbcr_sampler = false;
for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
if (tu_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
has_ycbcr_sampler = true;
}
if (has_ycbcr_sampler) {
set_layout->binding[b].ycbcr_samplers_offset =
(const char*)ycbcr_samplers - (const char*)set_layout;
for (uint32_t i = 0; i < binding->descriptorCount; i++) {
struct tu_sampler *sampler = tu_sampler_from_handle(binding->pImmutableSamplers[i]);
if (sampler->ycbcr_sampler)
ycbcr_samplers[i] = *sampler->ycbcr_sampler;
else
ycbcr_samplers[i].ycbcr_model = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
}
ycbcr_samplers += binding->descriptorCount;
} else {
set_layout->binding[b].ycbcr_samplers_offset = 0;
}
}
set_layout->size +=
@ -502,8 +539,8 @@ tu_descriptor_set_create(struct tu_device *device,
(const struct tu_sampler *)((const char *)layout +
layout->binding[i].immutable_samplers_offset);
for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
memcpy(set->mapped_ptr + offset, samplers + j,
sizeof(struct tu_sampler));
memcpy(set->mapped_ptr + offset, samplers[j].descriptor,
sizeof(samplers[j].descriptor));
offset += layout->binding[i].size / 4;
}
}
@ -1210,19 +1247,39 @@ tu_UpdateDescriptorSetWithTemplate(
VkResult
tu_CreateSamplerYcbcrConversion(
VkDevice device,
VkDevice _device,
const VkSamplerYcbcrConversionCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkSamplerYcbcrConversion *pYcbcrConversion)
{
*pYcbcrConversion = VK_NULL_HANDLE;
TU_FROM_HANDLE(tu_device, device, _device);
struct tu_sampler_ycbcr_conversion *conversion;
conversion = vk_alloc2(&device->alloc, pAllocator, sizeof(*conversion), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!conversion)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
conversion->format = pCreateInfo->format;
conversion->ycbcr_model = pCreateInfo->ycbcrModel;
conversion->ycbcr_range = pCreateInfo->ycbcrRange;
conversion->components = pCreateInfo->components;
conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
conversion->chroma_filter = pCreateInfo->chromaFilter;
*pYcbcrConversion = tu_sampler_ycbcr_conversion_to_handle(conversion);
return VK_SUCCESS;
}
void
tu_DestroySamplerYcbcrConversion(VkDevice device,
tu_DestroySamplerYcbcrConversion(VkDevice _device,
VkSamplerYcbcrConversion ycbcrConversion,
const VkAllocationCallbacks *pAllocator)
{
/* Do nothing. */
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
if (ycbcr_conversion)
vk_free2(&device->alloc, pAllocator, ycbcr_conversion);
}

View File

@ -61,6 +61,10 @@ struct tu_descriptor_set_binding_layout
* if there are no immutable samplers. */
uint32_t immutable_samplers_offset;
/* Offset in the tu_descriptor_set_layout of the ycbcr samplers, or 0
* if there are no immutable samplers. */
uint32_t ycbcr_samplers_offset;
/* Shader stages that use this binding */
uint32_t shader_stages;
};
@ -123,4 +127,15 @@ tu_immutable_samplers(const struct tu_descriptor_set_layout *set,
{
return (void *) ((const char *) set + binding->immutable_samplers_offset);
}
static inline const struct tu_sampler_ycbcr_conversion *
tu_immutable_ycbcr_samplers(const struct tu_descriptor_set_layout *set,
const struct tu_descriptor_set_binding_layout *binding)
{
if (!binding->ycbcr_samplers_offset)
return NULL;
return (void *) ((const char *) set + binding->ycbcr_samplers_offset);
}
#endif /* TU_DESCRIPTOR_SET_H */

View File

@ -674,7 +674,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
(VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext;
features->samplerYcbcrConversion = false;
features->samplerYcbcrConversion = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
@ -2172,6 +2172,8 @@ tu_init_sampler(struct tu_device *device,
{
const struct VkSamplerReductionModeCreateInfo *reduction =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
unsigned aniso = pCreateInfo->anisotropyEnable ?
util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
@ -2207,6 +2209,14 @@ tu_init_sampler(struct tu_device *device,
sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(reduction->reductionMode);
}
sampler->ycbcr_sampler = ycbcr_conversion ?
tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
if (sampler->ycbcr_sampler &&
sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
}
/* TODO:
* A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
*/

View File

@ -54,6 +54,7 @@ EXTENSIONS = [
Extension('VK_KHR_maintenance2', 1, True),
Extension('VK_KHR_maintenance3', 1, True),
Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
Extension('VK_KHR_surface', 25, 'TU_HAS_SURFACE'),
Extension('VK_KHR_swapchain', 68, 'TU_HAS_SURFACE'),
Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),

View File

@ -384,9 +384,15 @@ tu_physical_device_get_format_properties(
VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT |
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT |
VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED)
optimal |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
if (physical_device->supported_extensions.EXT_filter_cubic)
optimal |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT;
}
@ -701,6 +707,7 @@ tu_GetPhysicalDeviceImageFormatProperties2(
VkExternalImageFormatProperties *external_props = NULL;
VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL;
VkFormatFeatureFlags format_feature_flags;
VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
VkResult result;
result = tu_get_image_format_properties(physical_device,
@ -733,6 +740,9 @@ tu_GetPhysicalDeviceImageFormatProperties2(
case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT:
cubic_props = (void *) s;
break;
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
ycbcr_props = (void *) s;
break;
default:
break;
}
@ -767,6 +777,9 @@ tu_GetPhysicalDeviceImageFormatProperties2(
}
}
if (ycbcr_props)
ycbcr_props->combinedImageSamplerDescriptorCount = 1;
return VK_SUCCESS;
fail:

View File

@ -166,36 +166,63 @@ tu6_fetchsize(VkFormat format)
}
}
static void
compose_swizzle(unsigned char *swiz, const VkComponentMapping *mapping)
{
unsigned char src_swiz[4] = { swiz[0], swiz[1], swiz[2], swiz[3] };
VkComponentSwizzle vk_swiz[4] = {
mapping->r, mapping->g, mapping->b, mapping->a
};
for (int i = 0; i < 4; i++) {
switch (vk_swiz[i]) {
case VK_COMPONENT_SWIZZLE_IDENTITY:
swiz[i] = src_swiz[i];
break;
case VK_COMPONENT_SWIZZLE_R...VK_COMPONENT_SWIZZLE_A:
swiz[i] = src_swiz[vk_swiz[i] - VK_COMPONENT_SWIZZLE_R];
break;
case VK_COMPONENT_SWIZZLE_ZERO:
swiz[i] = A6XX_TEX_ZERO;
break;
case VK_COMPONENT_SWIZZLE_ONE:
swiz[i] = A6XX_TEX_ONE;
break;
default:
unreachable("unexpected swizzle");
}
}
}
static uint32_t
tu6_texswiz(const VkComponentMapping *comps,
const struct tu_sampler_ycbcr_conversion *conversion,
VkFormat format,
VkImageAspectFlagBits aspect_mask)
{
unsigned char swiz[4] = {comps->r, comps->g, comps->b, comps->a};
unsigned char vk_swizzle[] = {
[VK_COMPONENT_SWIZZLE_ZERO] = A6XX_TEX_ZERO,
[VK_COMPONENT_SWIZZLE_ONE] = A6XX_TEX_ONE,
[VK_COMPONENT_SWIZZLE_R] = A6XX_TEX_X,
[VK_COMPONENT_SWIZZLE_G] = A6XX_TEX_Y,
[VK_COMPONENT_SWIZZLE_B] = A6XX_TEX_Z,
[VK_COMPONENT_SWIZZLE_A] = A6XX_TEX_W,
unsigned char swiz[4] = {
A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W,
};
const unsigned char *fmt_swiz = vk_format_description(format)->swizzle;
for (unsigned i = 0; i < 4; i++) {
swiz[i] = (swiz[i] == VK_COMPONENT_SWIZZLE_IDENTITY) ? i : vk_swizzle[swiz[i]];
/* if format has 0/1 in channel, use that (needed for bc1_rgb) */
if (swiz[i] < 4) {
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
format == VK_FORMAT_D24_UNORM_S8_UINT)
swiz[i] = A6XX_TEX_Y;
switch (fmt_swiz[swiz[i]]) {
case PIPE_SWIZZLE_0: swiz[i] = A6XX_TEX_ZERO; break;
case PIPE_SWIZZLE_1: swiz[i] = A6XX_TEX_ONE; break;
}
switch (format) {
case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
/* same hardware format is used for BC1_RGB / BC1_RGBA */
swiz[3] = A6XX_TEX_ONE;
break;
case VK_FORMAT_D24_UNORM_S8_UINT:
/* for D24S8, stencil is in the 2nd channel of the hardware format */
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
swiz[0] = A6XX_TEX_Y;
swiz[1] = A6XX_TEX_ZERO;
}
default:
break;
}
compose_swizzle(swiz, comps);
if (conversion)
compose_swizzle(swiz, &conversion->components);
return A6XX_TEX_CONST_0_SWIZ_X(swiz[0]) |
A6XX_TEX_CONST_0_SWIZ_Y(swiz[1]) |
A6XX_TEX_CONST_0_SWIZ_Z(swiz[2]) |
@ -253,6 +280,11 @@ tu_image_view_init(struct tu_image_view *iview,
VkFormat format = pCreateInfo->format;
VkImageAspectFlagBits aspect_mask = pCreateInfo->subresourceRange.aspectMask;
const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
const struct tu_sampler_ycbcr_conversion *conversion = ycbcr_conversion ?
tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
switch (image->type) {
case VK_IMAGE_TYPE_1D:
case VK_IMAGE_TYPE_2D:
@ -320,7 +352,7 @@ tu_image_view_init(struct tu_image_view *iview,
A6XX_TEX_CONST_0_FMT(fmt_tex) |
A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
A6XX_TEX_CONST_0_SWAP(fmt.swap) |
tu6_texswiz(&pCreateInfo->components, format, aspect_mask) |
tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask) |
A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1);
iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
iview->descriptor[2] =
@ -630,7 +662,7 @@ tu_buffer_view_init(struct tu_buffer_view *view,
A6XX_TEX_CONST_0_SWAP(fmt.swap) |
A6XX_TEX_CONST_0_FMT(fmt.fmt) |
A6XX_TEX_CONST_0_MIPLVLS(0) |
tu6_texswiz(&components, vfmt, VK_IMAGE_ASPECT_COLOR_BIT);
tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT);
COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB);
view->descriptor[1] =
A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |

View File

@ -1507,8 +1507,18 @@ struct tu_image_view
uint32_t RB_BLIT_DST_INFO;
};
struct tu_sampler_ycbcr_conversion {
VkFormat format;
VkSamplerYcbcrModelConversion ycbcr_model;
VkSamplerYcbcrRange ycbcr_range;
VkComponentMapping components;
VkChromaLocation chroma_offsets[2];
VkFilter chroma_filter;
};
struct tu_sampler {
uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
struct tu_sampler_ycbcr_conversion *ycbcr_sampler;
};
void
@ -1774,6 +1784,7 @@ TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_shader_module, VkShaderModule)
TU_DEFINE_NONDISP_HANDLE_CASTS(tu_semaphore, VkSemaphore)

View File

@ -26,6 +26,7 @@
#include "spirv/nir_spirv.h"
#include "util/mesa-sha1.h"
#include "nir/nir_xfb_info.h"
#include "nir/nir_vulkan.h"
#include "vk_util.h"
#include "ir3/ir3_nir.h"
@ -272,10 +273,69 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
}
}
static void
lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
nir_builder *builder,
nir_tex_instr *tex)
{
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
assert(deref_src_idx >= 0);
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
nir_variable *var = nir_deref_instr_get_variable(deref);
const struct tu_descriptor_set_layout *set_layout =
layout->set[var->data.descriptor_set].layout;
const struct tu_descriptor_set_binding_layout *binding =
&set_layout->binding[var->data.binding];
const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
tu_immutable_ycbcr_samplers(set_layout, binding);
if (!ycbcr_samplers)
return;
/* For the following instructions, we don't apply any change */
if (tex->op == nir_texop_txs ||
tex->op == nir_texop_query_levels ||
tex->op == nir_texop_lod)
return;
assert(tex->texture_index == 0);
unsigned array_index = 0;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
if (!nir_src_is_const(deref->arr.index))
return;
array_index = nir_src_as_uint(deref->arr.index);
array_index = MIN2(array_index, binding->array_size - 1);
}
const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
return;
builder->cursor = nir_after_instr(&tex->instr);
uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
UTIL_FORMAT_COLORSPACE_RGB,
PIPE_SWIZZLE_X);
uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
ycbcr_sampler->ycbcr_model,
ycbcr_sampler->ycbcr_range,
&tex->dest.ssa,
bpcs);
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
result->parent_instr);
builder->cursor = nir_before_instr(&tex->instr);
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
struct tu_shader *shader, const struct tu_pipeline_layout *layout)
{
lower_tex_ycbcr(layout, b, tex);
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);