v3dv/descriptor_set: added support for samplers

This include SAMPLER, COMBINED_IMAGE_SAMPLER and SAMPLED_IMAGE
descriptors.

In order to support them we do the pre-packing of TEXTURE_SHADER_STATE
and SAMPLER_STATE when Images and Samplers (respectively) are
created. Those packets doesn't need to be tweaked later, so we upload
them to an bo.

A possible improvement of this would be that the descriptor pool
manages a bo for all descriptors, that suballocate for each descriptor
allocated. This is what other drivers do (and as far as I understand,
one of the reasons of having a descriptor pool).

Immutable samplers are not supported, will be handled on a following
patch.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
Alejandro Piñeiro 2020-03-29 16:29:55 +02:00 committed by Marge Bot
parent 34910532fd
commit 272f0d9cfb
5 changed files with 401 additions and 7 deletions

View File

@ -108,12 +108,15 @@ v3dv_CreateDescriptorPool(VkDevice _device,
if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
/* Verify supported descriptor type */
switch(pCreateInfo->pPoolSizes[i].type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
break;
default:
unreachable("Unimplemented descriptor type");
@ -255,10 +258,19 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
uint32_t max_binding = 0;
uint32_t immutable_sampler_count = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
pCreateInfo->pBindings[j].pImmutableSamplers) {
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
}
}
/* FIXME: immutable samplers not supported yet */
assert(immutable_sampler_count == 0);
uint32_t size = sizeof(struct v3dv_descriptor_set_layout) +
(max_binding + 1) * sizeof(set_layout->binding[0]);
@ -283,6 +295,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
set_layout->binding_count = max_binding + 1;
set_layout->flags = pCreateInfo->flags;
set_layout->shader_stages = 0;
set_layout->has_immutable_samplers = false;
uint32_t descriptor_count = 0;
uint32_t dynamic_offset_count = 0;
@ -299,6 +312,11 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
set_layout->binding[binding_number].dynamic_offset_count = 1;
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
/* Nothing here, just to keep the descriptor type filtering below */
break;
default:
unreachable("Unknown descriptor type\n");
break;
@ -375,6 +393,9 @@ descriptor_set_create(struct v3dv_device *device,
set->layout = layout;
/* FIXME: if we have immutable samplers those are tightly included here */
assert(layout->has_immutable_samplers == false);
if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
vk_free2(&device->alloc, NULL, set);
return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
@ -461,7 +482,9 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
descriptor += binding_layout->descriptor_index;
descriptor += writeset->dstArrayElement;
for (uint32_t j = 0; j < writeset->descriptorCount; ++j) {
descriptor->type = writeset->descriptorType;
switch(writeset->descriptorType) {
@ -472,10 +495,34 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
const VkDescriptorBufferInfo *buffer_info = writeset->pBufferInfo + j;
V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer);
descriptor->bo = buffer->mem->bo;
descriptor->buffer = buffer;
descriptor->offset = buffer_info->offset;
break;
}
case VK_DESCRIPTOR_TYPE_SAMPLER: {
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler);
descriptor->sampler = sampler;
break;
}
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView);
descriptor->image_view = iview;
break;
}
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView);
V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler);
descriptor->image_view = iview;
descriptor->sampler = sampler;
break;
}
default:
unreachable("unimplemented descriptor type");
break;

View File

@ -555,7 +555,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.largePoints = false,
.alphaToOne = false,
.multiViewport = false,
.samplerAnisotropy = false,
.samplerAnisotropy = true,
.textureCompressionETC2 = true,
.textureCompressionASTC_LDR = false,
.textureCompressionBC = false,
@ -1877,3 +1877,138 @@ v3dv_ResetEvent(VkDevice _device, VkEvent _event)
*((uint32_t *) event->bo->map) = 0;
return VK_SUCCESS;
}
static const enum V3DX(Wrap_Mode) vk_to_v3d_wrap_mode[] = {
[VK_SAMPLER_ADDRESS_MODE_REPEAT] = V3D_WRAP_MODE_REPEAT,
[VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = V3D_WRAP_MODE_MIRROR,
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = V3D_WRAP_MODE_CLAMP,
[VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = V3D_WRAP_MODE_MIRROR_ONCE,
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = V3D_WRAP_MODE_BORDER,
};
static const enum V3DX(Compare_Function)
vk_to_v3d_compare_func[] = {
[VK_COMPARE_OP_NEVER] = V3D_COMPARE_FUNC_NEVER,
[VK_COMPARE_OP_LESS] = V3D_COMPARE_FUNC_LESS,
[VK_COMPARE_OP_EQUAL] = V3D_COMPARE_FUNC_EQUAL,
[VK_COMPARE_OP_LESS_OR_EQUAL] = V3D_COMPARE_FUNC_LEQUAL,
[VK_COMPARE_OP_GREATER] = V3D_COMPARE_FUNC_GREATER,
[VK_COMPARE_OP_NOT_EQUAL] = V3D_COMPARE_FUNC_NOTEQUAL,
[VK_COMPARE_OP_GREATER_OR_EQUAL] = V3D_COMPARE_FUNC_GEQUAL,
[VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS,
};
static void
pack_sampler_state(struct v3dv_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo)
{
enum V3DX(Border_Color_Mode) border_color_mode;
/* FIXME: direct border_color_mode mapping would work with some specific
* formats, but some others it would be needed to use
* V3D_BORDER_COLOR_FOLLOWS, and fill up
* SAMPLER_STATE.border_color_word_[0/1/2/3]
*/
switch (pCreateInfo->borderColor) {
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
border_color_mode = V3D_BORDER_COLOR_0000;
break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
border_color_mode = V3D_BORDER_COLOR_0001;
break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
border_color_mode = V3D_BORDER_COLOR_1111;
break;
default:
unreachable("Unknown border color");
break;
}
v3dv_pack(sampler->state->map, SAMPLER_STATE, s) {
if (pCreateInfo->anisotropyEnable) {
s.anisotropy_enable = true;
if (pCreateInfo->maxAnisotropy > 8)
s.maximum_anisotropy = 3;
else if (pCreateInfo->maxAnisotropy > 4)
s.maximum_anisotropy = 2;
else if (pCreateInfo->maxAnisotropy > 2)
s.maximum_anisotropy = 1;
}
s.border_color_mode = border_color_mode;
s.wrap_i_border = false; /* Also hardcoded on v3d */
s.wrap_s = vk_to_v3d_wrap_mode[pCreateInfo->addressModeU];
s.wrap_t = vk_to_v3d_wrap_mode[pCreateInfo->addressModeV];
s.wrap_r = vk_to_v3d_wrap_mode[pCreateInfo->addressModeW];
s.fixed_bias = pCreateInfo->mipLodBias;
s.max_level_of_detail = MIN2(MAX2(0, pCreateInfo->maxLod), 15);
s.min_level_of_detail = MIN2(MAX2(0, pCreateInfo->minLod), 15);
s.srgb_disable = 0; /* Not even set by v3d */
s.depth_compare_function =
vk_to_v3d_compare_func[pCreateInfo->compareEnable ?
pCreateInfo->compareOp : VK_COMPARE_OP_NEVER];
s.mip_filter_nearest = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST;
s.min_filter_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
s.mag_filter_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
}
}
VkResult
v3dv_CreateSampler(VkDevice _device,
const VkSamplerCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkSampler *pSampler)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
sampler = vk_zalloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!sampler)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
if (sampler->state == NULL) {
sampler->state = v3dv_bo_alloc(device, cl_packet_length(SAMPLER_STATE),
"sampler_state");
if (!sampler->state) {
fprintf(stderr, "Failed to allocate memory for sampler state\n");
abort();
}
bool ok = v3dv_bo_map(device, sampler->state,
cl_packet_length(SAMPLER_STATE));
if (!ok) {
fprintf(stderr, "failed to map sampler state buffer\n");
abort();
}
}
pack_sampler_state(sampler, pCreateInfo);
*pSampler = v3dv_sampler_to_handle(sampler);
return VK_SUCCESS;
}
void
v3dv_DestroySampler(VkDevice _device,
VkSampler _sampler,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_sampler, sampler, _sampler);
if (!sampler)
return;
vk_free2(&device->alloc, pAllocator, sampler);
}

View File

@ -395,6 +395,144 @@ v3dv_DestroyImage(VkDevice _device,
vk_free2(&device->alloc, pAllocator, image);
}
/*
* This method translates pipe_swizzle to the swizzle values used at the
* packet TEXTURE_SHADER_STATE
*
* FIXME: C&P from v3d, common place?
*/
static uint32_t
translate_swizzle(unsigned char pipe_swizzle)
{
switch (pipe_swizzle) {
case PIPE_SWIZZLE_0:
return 0;
case PIPE_SWIZZLE_1:
return 1;
case PIPE_SWIZZLE_X:
case PIPE_SWIZZLE_Y:
case PIPE_SWIZZLE_Z:
case PIPE_SWIZZLE_W:
return 2 + pipe_swizzle;
default:
unreachable("unknown swizzle");
}
}
static void
pack_texture_shader_state(struct v3dv_device *device,
struct v3dv_image_view *image_view)
{
assert(image_view->image);
const struct v3dv_image *image = image_view->image;
if (image_view->texture_shader_state == NULL) {
image_view->texture_shader_state =
v3dv_bo_alloc(device, cl_packet_length(TEXTURE_SHADER_STATE),
"texture_shader_state");
if (!image_view->texture_shader_state) {
fprintf(stderr, "Failed to allocate memory for texture shader state\n");
abort();
}
bool ok = v3dv_bo_map(device, image_view->texture_shader_state,
cl_packet_length(TEXTURE_SHADER_STATE));
if (!ok) {
fprintf(stderr, "failed to map texture shader state\n");
abort();
}
}
int msaa_scale = 1; /* FIXME: hardcoded. Revisit when msaa get supported */
v3dv_pack(image_view->texture_shader_state->map, TEXTURE_SHADER_STATE, tex) {
tex.level_0_is_strictly_uif =
(image->slices[0].tiling == VC5_TILING_UIF_XOR ||
image->slices[0].tiling == VC5_TILING_UIF_NO_XOR);
tex.level_0_xor_enable = (image->slices[0].tiling == VC5_TILING_UIF_XOR);
if (tex.level_0_is_strictly_uif)
tex.level_0_ub_pad = image->slices[0].ub_pad;
/* FIXME: v3d never sets uif_xor_disable, but uses it on the following
* check so let's set the default value
*/
tex.uif_xor_disable = false;
if (tex.uif_xor_disable ||
tex.level_0_is_strictly_uif) {
tex.extended = true;
}
tex.base_level = image_view->base_level;
tex.max_level = image_view->max_level;
tex.swizzle_r = translate_swizzle(image_view->swizzle[0]);
tex.swizzle_g = translate_swizzle(image_view->swizzle[1]);
tex.swizzle_b = translate_swizzle(image_view->swizzle[2]);
tex.swizzle_a = translate_swizzle(image_view->swizzle[3]);
tex.texture_type = image_view->format->tex_type;
if (image->type == VK_IMAGE_TYPE_3D) {
tex.image_depth = image->extent.depth;
} else {
tex.image_depth = (image_view->last_layer - image_view->first_layer) + 1;
}
tex.image_height = image->extent.height * msaa_scale;
tex.image_width = image->extent.width * msaa_scale;
/* On 4.x, the height of a 1D texture is redefined to be the
* upper 14 bits of the width (which is only usable with txf).
*/
if (image->type == VK_IMAGE_TYPE_1D) {
tex.image_height = tex.image_width >> 14;
}
tex.image_width &= (1 << 14) - 1;
tex.image_height &= (1 << 14) - 1;
tex.array_stride_64_byte_aligned = image->cube_map_stride / 64;
tex.srgb = vk_format_is_srgb(image_view->vk_format);
/* At this point we don't have the job. That's the reason the first
* parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
* add the bo to the job. This also means that we need to add manually
* the image bo to the job using the texture.
*/
const uint32_t base_offset =
image->mem->bo->offset +
v3dv_layer_offset(image, 0, image_view->first_layer);
tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
}
}
static enum pipe_swizzle
vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle comp,
VkComponentSwizzle swz)
{
if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
swz = comp;
switch (swz) {
case VK_COMPONENT_SWIZZLE_ZERO:
return PIPE_SWIZZLE_0;
case VK_COMPONENT_SWIZZLE_ONE:
return PIPE_SWIZZLE_1;
case VK_COMPONENT_SWIZZLE_R:
return PIPE_SWIZZLE_X;
case VK_COMPONENT_SWIZZLE_G:
return PIPE_SWIZZLE_Y;
case VK_COMPONENT_SWIZZLE_B:
return PIPE_SWIZZLE_Z;
case VK_COMPONENT_SWIZZLE_A:
return PIPE_SWIZZLE_W;
default:
unreachable("Unknown VkComponentSwizzle");
};
}
VkResult
v3dv_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
@ -435,6 +573,7 @@ v3dv_CreateImageView(VkDevice _device,
iview->aspects = range->aspectMask;
iview->base_level = range->baseMipLevel;
iview->max_level = iview->base_level + v3dv_level_count(image, range) - 1;
iview->extent = (VkExtent3D) {
.width = u_minify(image->extent.width , iview->base_level),
.height = u_minify(image->extent.height, iview->base_level),
@ -476,6 +615,28 @@ v3dv_CreateImageView(VkDevice _device,
&iview->internal_type,
&iview->internal_bpp);
}
/* FIXME: we are doing this vk to pipe swizzle mapping just to call
* util_format_compose_swizzles. Would be good to check if it would be
* better to reimplement the latter using vk component
*/
uint8_t image_view_swizzle[4] = {
vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_R,
pCreateInfo->components.r),
vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_G,
pCreateInfo->components.g),
vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_B,
pCreateInfo->components.b),
vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_A,
pCreateInfo->components.a),
};
const uint8_t *format_swizzle =
v3dv_get_format_swizzle(iview->vk_format);
util_format_compose_swizzles(format_swizzle, image_view_swizzle, iview->swizzle);
pack_texture_shader_state(device, iview);
*pView = v3dv_image_view_to_handle(iview);
return VK_SUCCESS;

View File

@ -349,9 +349,27 @@ struct v3dv_image_view {
uint32_t internal_type;
uint32_t base_level;
uint32_t max_level;
uint32_t first_layer;
uint32_t last_layer;
uint32_t offset;
/* Precomputed (composed from createinfo->components and formar swizzle)
* swizzles to pass in to the shader key.
*
* FIXME: this is also a candidate to be included on the descriptor info.
*/
uint8_t swizzle[4];
/* FIXME: here we store the packet TEXTURE_SHADER_STATE, that is referenced
* as part of the tmu configuration, and the content is set per sampler. A
* possible perf improvement, to avoid bo fragmentation, would be to save
* the state as static, have the bo as part of the descriptor (booked from
* the descriptor pools), and then copy this content to the descriptor bo
* on UpdateDescriptor. This also makes sense because not all the images
* are used as textures.
*/
struct v3dv_bo *texture_shader_state;
};
uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
@ -668,8 +686,19 @@ struct v3dv_cmd_buffer_state {
};
struct v3dv_descriptor {
struct v3dv_bo *bo;
uint32_t offset;
VkDescriptorType type;
union {
struct {
struct v3dv_image_view *image_view;
struct v3dv_sampler *sampler;
};
struct {
struct v3dv_buffer *buffer;
uint32_t offset;
};
};
};
/* Aux struct as it is really common to have a pair bo/address. Called
@ -858,6 +887,9 @@ struct v3dv_descriptor_set_layout {
/* Number of bindings in this descriptor set */
uint32_t binding_count;
/* Total size of the descriptor set with room for all array entries */
uint32_t size;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;
@ -867,6 +899,8 @@ struct v3dv_descriptor_set_layout {
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
bool has_immutable_samplers;
/* Bindings in this descriptor set */
struct v3dv_descriptor_set_binding_layout binding[0];
};
@ -892,6 +926,17 @@ struct v3dv_descriptor_map {
int array_size[64];
};
struct v3dv_sampler {
/* FIXME: here we store the packet SAMPLER_STATE, that is referenced as part
* of the tmu configuration, and the content is set per sampler. A possible
* perf improvement, to avoid bo fragmentation, would be to save the state
* as static, have the bo as part of the descriptor (booked from the
* descriptor pools), and then copy this content to the descriptor bo on
* UpdateDescriptor
*/
struct v3dv_bo *state;
};
struct v3dv_pipeline {
struct v3dv_device *device;
@ -1137,6 +1182,7 @@ V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)
V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule)
@ -1147,6 +1193,10 @@ V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_shader_module, VkShaderModule)
((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
#define v3dv_level_count(_image, _range) \
((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
(_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)
static inline int
v3dv_ioctl(int fd, unsigned long request, void *arg)
{

View File

@ -186,10 +186,11 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
pipeline->layout,
index, &dynamic_offset);
assert(descriptor);
assert(descriptor->bo);
assert(descriptor->buffer);
cl_aligned_reloc(&job->indirect, uniforms,
descriptor->bo,
descriptor->buffer->mem->bo,
descriptor->buffer->mem_offset +
descriptor->offset + offset + dynamic_offset);
}
}