diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h index ae88b305cd8..a6a38b4aa26 100644 --- a/src/broadcom/vulkan/v3dv_cl.h +++ b/src/broadcom/vulkan/v3dv_cl.h @@ -133,8 +133,15 @@ cl_aligned_reloc(struct v3dv_cl *cl, uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment); void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space); +/* We redefine ALIGN as a macro as we want to use cl_aligned_packet_length for + * struct fields + */ +#define ALIGN(value, alignment) \ + (((value) + (alignment) - 1) & ~((alignment) - 1)) + #define cl_packet_header(packet) V3DX(packet ## _header) #define cl_packet_length(packet) V3DX(packet ## _length) +#define cl_aligned_packet_length(packet, alignment) ALIGN(cl_packet_length(packet), alignment) #define cl_packet_pack(packet) V3DX(packet ## _pack) #define cl_packet_struct(packet) V3DX(packet) diff --git a/src/broadcom/vulkan/v3dv_descriptor_set.c b/src/broadcom/vulkan/v3dv_descriptor_set.c index e6187870aba..5550536643c 100644 --- a/src/broadcom/vulkan/v3dv_descriptor_set.c +++ b/src/broadcom/vulkan/v3dv_descriptor_set.c @@ -33,6 +33,12 @@ static uint32_t descriptor_bo_size(VkDescriptorType type) { switch(type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return sizeof(struct v3dv_sampler_descriptor); + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return sizeof(struct v3dv_combined_image_sampler_descriptor); + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + return sizeof(struct v3dv_sampled_image_descriptor); default: return 0; } @@ -107,6 +113,49 @@ v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_stat return &set->descriptors[binding_layout->descriptor_index + array_index]; } +/* Equivalent to map_get_descriptor but it returns a reloc with the bo + * associated with that descriptor (suballocation of the descriptor pool bo) + * + * It also returns the descriptor type, so the caller could do extra + * validation or adding extra offsets if the bo contains more that one field. + */ +static struct v3dv_cl_reloc +v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + struct v3dv_pipeline_layout *pipeline_layout, + uint32_t index, + VkDescriptorType *out_type) +{ + assert(index >= 0 && index < map->num_desc); + + uint32_t set_number = map->set[index]; + assert(descriptor_state->valid & 1 << set_number); + + struct v3dv_descriptor_set *set = + descriptor_state->descriptor_sets[set_number]; + assert(set); + + uint32_t binding_number = map->binding[index]; + assert(binding_number < set->layout->binding_count); + + const struct v3dv_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding_number]; + + assert(descriptor_bo_size(binding_layout->type) > 0); + *out_type = binding_layout->type; + + uint32_t array_index = map->array_index[index]; + assert(array_index < binding_layout->array_size); + + struct v3dv_cl_reloc reloc = { + .bo = set->pool->bo, + .offset = set->base_offset + binding_layout->descriptor_offset + + array_index * descriptor_bo_size(binding_layout->type), + }; + + return reloc; +} + /* * The difference between this method and v3dv_descriptor_map_get_descriptor, * is that if the sampler are added as immutable when creating the set layout, @@ -162,6 +211,53 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state, return descriptor->sampler; } + +struct v3dv_cl_reloc +v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + struct v3dv_pipeline_layout *pipeline_layout, + uint32_t index) +{ + VkDescriptorType type; + struct v3dv_cl_reloc reloc = + v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map, + pipeline_layout, + index, &type); + + assert(type == VK_DESCRIPTOR_TYPE_SAMPLER || + type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor, + sampler_state); + } + + return reloc; +} + +struct v3dv_cl_reloc +v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + struct v3dv_pipeline_layout *pipeline_layout, + uint32_t index) +{ + VkDescriptorType type; + struct v3dv_cl_reloc reloc = + v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map, + pipeline_layout, + index, &type); + + assert(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || + type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor, + texture_state); + } + + return reloc; +} + struct v3dv_image_view * v3dv_descriptor_map_get_image_view(struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, @@ -674,6 +770,30 @@ descriptor_set_create(struct v3dv_device *device, pool->entry_count++; } + /* Go through and fill out immutable samplers if we have any */ + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers_offset == 0) + continue; + + const struct v3dv_sampler *samplers = + (const struct v3dv_sampler *)((const char *)layout + + layout->binding[b].immutable_samplers_offset); + + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { + uint32_t combined_offset = + layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? + offsetof(struct v3dv_combined_image_sampler_descriptor, sampler_state) : + 0; + + void *desc_map = descriptor_bo_map(set, &layout->binding[b], i); + desc_map += combined_offset; + + memcpy(desc_map, + samplers[i].sampler_state, + cl_packet_length(SAMPLER_STATE)); + } + } + *out_set = set; return VK_SUCCESS; @@ -747,6 +867,33 @@ descriptor_bo_copy(struct v3dv_descriptor_set *dst_set, memcpy(dst_map, src_map, descriptor_bo_size(src_binding_layout->type)); } +static void +write_image_descriptor(struct v3dv_descriptor_set *set, + const struct v3dv_descriptor_set_binding_layout *binding_layout, + struct v3dv_image_view *iview, + struct v3dv_sampler *sampler, + uint32_t array_index) +{ + void *desc_map = descriptor_bo_map(set, binding_layout, array_index); + + if (iview) { + memcpy(desc_map, + iview->texture_shader_state, + sizeof(iview->texture_shader_state)); + desc_map += offsetof(struct v3dv_combined_image_sampler_descriptor, + sampler_state); + } + + if (sampler && !binding_layout->immutable_samplers_offset) { + /* For immutable samplers this was already done as part of the + * descriptor set create, as that info can't change later + */ + memcpy(desc_map, + sampler->sampler_state, + sizeof(sampler->sampler_state)); + } +} + void v3dv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount, @@ -783,10 +930,18 @@ v3dv_UpdateDescriptorSets(VkDevice _device, break; } case VK_DESCRIPTOR_TYPE_SAMPLER: { + /* If we are here we shouldn't be modifying a immutable sampler, + * so we don't ensure that would work or not crash. But let the + * validation layers check that + */ const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler); descriptor->sampler = sampler; + + write_image_descriptor(set, binding_layout, NULL, sampler, + writeset->dstArrayElement + j); + break; } case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { @@ -794,6 +949,10 @@ v3dv_UpdateDescriptorSets(VkDevice _device, V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView); descriptor->image_view = iview; + + write_image_descriptor(set, binding_layout, iview, NULL, + writeset->dstArrayElement + j); + break; } case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { @@ -804,6 +963,9 @@ v3dv_UpdateDescriptorSets(VkDevice _device, descriptor->image_view = iview; descriptor->sampler = sampler; + write_image_descriptor(set, binding_layout, iview, sampler, + writeset->dstArrayElement + j); + break; } default: diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 4e17f37beb9..3f8c6da842c 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -2026,7 +2026,7 @@ pack_sampler_state(struct v3dv_sampler *sampler, break; } - v3dv_pack(sampler->state->map, SAMPLER_STATE, s) { + v3dv_pack(sampler->sampler_state, SAMPLER_STATE, s) { if (pCreateInfo->anisotropyEnable) { s.anisotropy_enable = true; if (pCreateInfo->maxAnisotropy > 8) @@ -2072,31 +2072,12 @@ v3dv_CreateSampler(VkDevice _device, if (!sampler) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - if (sampler->state == NULL) { - sampler->state = v3dv_bo_alloc(device, cl_packet_length(SAMPLER_STATE), - "sampler_state"); - - if (!sampler->state) - goto fail_alloc; - - bool ok = v3dv_bo_map(device, sampler->state, - cl_packet_length(SAMPLER_STATE)); - if (!ok) - goto fail_map; - } - sampler->compare_enable = pCreateInfo->compareEnable; pack_sampler_state(sampler, pCreateInfo); *pSampler = v3dv_sampler_to_handle(sampler); return VK_SUCCESS; - -fail_map: - v3dv_bo_free(device, sampler->state); -fail_alloc: - vk_free2(&device->alloc, pAllocator, sampler); - return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); } void @@ -2110,9 +2091,6 @@ v3dv_DestroySampler(VkDevice _device, if (!sampler) return; - if (sampler->state) - v3dv_bo_free(device, sampler->state); - vk_free2(&device->alloc, pAllocator, sampler); } diff --git a/src/broadcom/vulkan/v3dv_image.c b/src/broadcom/vulkan/v3dv_image.c index 6830289d89e..2e1dff30973 100644 --- a/src/broadcom/vulkan/v3dv_image.c +++ b/src/broadcom/vulkan/v3dv_image.c @@ -433,32 +433,16 @@ translate_swizzle(unsigned char pipe_swizzle) /* * Packs and ensure bo for the shader state (the latter can be temporal). - * - * Return false if it was not able to allocate the bo. */ -static bool +static void pack_texture_shader_state(struct v3dv_device *device, struct v3dv_image_view *image_view) { assert(image_view->image); const struct v3dv_image *image = image_view->image; - if (image_view->texture_shader_state == NULL) { - image_view->texture_shader_state = - v3dv_bo_alloc(device, cl_packet_length(TEXTURE_SHADER_STATE), - "texture_shader_state"); - - if (!image_view->texture_shader_state) - return false; - - bool ok = v3dv_bo_map(device, image_view->texture_shader_state, - cl_packet_length(TEXTURE_SHADER_STATE)); - if (!ok) - return false; - } - int msaa_scale = 1; /* FIXME: hardcoded. Revisit when msaa get supported */ - v3dv_pack(image_view->texture_shader_state->map, TEXTURE_SHADER_STATE, tex) { + v3dv_pack(image_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) { tex.level_0_is_strictly_uif = (image->slices[0].tiling == VC5_TILING_UIF_XOR || @@ -519,8 +503,6 @@ pack_texture_shader_state(struct v3dv_device *device, v3dv_layer_offset(image, 0, image_view->first_layer); tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); } - - return true; } static enum pipe_swizzle @@ -650,18 +632,11 @@ v3dv_CreateImageView(VkDevice _device, util_format_compose_swizzles(format_swizzle, image_view_swizzle, iview->swizzle); - if (!pack_texture_shader_state(device, iview)) - goto fail_texture_shader_state_alloc; + pack_texture_shader_state(device, iview); *pView = v3dv_image_view_to_handle(iview); return VK_SUCCESS; - - fail_texture_shader_state_alloc: - if (iview->texture_shader_state) - v3dv_bo_free(device, iview->texture_shader_state); - vk_free2(&device->alloc, pAllocator, iview); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; } void @@ -672,9 +647,6 @@ v3dv_DestroyImageView(VkDevice _device, V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_image_view, image_view, imageView); - if (image_view->texture_shader_state) - v3dv_bo_free(device, image_view->texture_shader_state); - vk_free2(&device->alloc, pAllocator, image_view); } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 783ba8bebbf..bd06058b7ef 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -416,19 +416,15 @@ struct v3dv_image_view { /* Precomputed (composed from createinfo->components and formar swizzle) * swizzles to pass in to the shader key. * - * FIXME: this is also a candidate to be included on the descriptor info. + * This could be also included on the descriptor bo, but the shader state + * packet doesn't need it on a bo, so we can just avoid a memory copy */ uint8_t swizzle[4]; - /* FIXME: here we store the packet TEXTURE_SHADER_STATE, that is referenced - * as part of the tmu configuration, and the content is set per sampler. A - * possible perf improvement, to avoid bo fragmentation, would be to save - * the state as static, have the bo as part of the descriptor (booked from - * the descriptor pools), and then copy this content to the descriptor bo - * on UpdateDescriptor. This also makes sense because not all the images - * are used as textures. + /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info + * during UpdateDescriptorSets */ - struct v3dv_bo *texture_shader_state; + uint8_t texture_shader_state[cl_packet_length(TEXTURE_SHADER_STATE)]; }; uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer); @@ -909,6 +905,33 @@ struct v3dv_descriptor { }; }; +/* The following v3dv_xxx_descriptor structs represent descriptor info that we + * upload to a bo, specifically a subregion of the descriptor pool bo. + * + * The general rule that we apply right now to decide which info goes to such + * bo is that we upload those that are referenced by an address when emitting + * a packet, so needed to be uploaded to an bo in any case. + * + * Note that these structs are mostly helpers that improve the semantics when + * doing all that, but we could do as other mesa vulkan drivers and just + * upload the info we know it is expected based on the context. + * + * Also note that the sizes are aligned, as there is an alignment requirement + * for addresses. + */ +struct v3dv_sampled_image_descriptor { + uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)]; +}; + +struct v3dv_sampler_descriptor { + uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)]; +}; + +struct v3dv_combined_image_sampler_descriptor { + uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)]; + uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)]; +}; + /* Aux struct as it is really common to have a pair bo/address. Called * resource because it is really likely that we would need something like that * if we work on reuse the same bo at different points (like the shader @@ -1289,14 +1312,11 @@ struct v3dv_descriptor_map { struct v3dv_sampler { bool compare_enable; - /* FIXME: here we store the packet SAMPLER_STATE, that is referenced as part - * of the tmu configuration, and the content is set per sampler. A possible - * perf improvement, to avoid bo fragmentation, would be to save the state - * as static, have the bo as part of the descriptor (booked from the - * descriptor pools), and then copy this content to the descriptor bo on - * UpdateDescriptor + /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu + * configuration. If needed it will be copied to the descriptor info during + * UpdateDescriptorSets */ - struct v3dv_bo *state; + uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)]; }; #define V3DV_NO_SAMPLER_IDX 666 @@ -1566,12 +1586,24 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index); +struct v3dv_cl_reloc +v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + struct v3dv_pipeline_layout *pipeline_layout, + uint32_t index); + struct v3dv_image_view * v3dv_descriptor_map_get_image_view(struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index); +struct v3dv_cl_reloc +v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state, + struct v3dv_descriptor_map *map, + struct v3dv_pipeline_layout *pipeline_layout, + uint32_t index); + static inline const struct v3dv_sampler * v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set, const struct v3dv_descriptor_set_binding_layout *binding) diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c index e5ca9cc69d9..3cbbb4df483 100644 --- a/src/broadcom/vulkan/v3dv_uniforms.c +++ b/src/broadcom/vulkan/v3dv_uniforms.c @@ -98,18 +98,24 @@ write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer, &texture_idx, NULL); + /* We need to ensure that the texture bo is added to the job */ struct v3dv_image_view *image_view = v3dv_descriptor_map_get_image_view(descriptor_state, &pipeline->texture_map, pipeline->layout, texture_idx); assert(image_view); + v3dv_job_add_bo(job, image_view->image->mem->bo); + + struct v3dv_cl_reloc state_reloc = + v3dv_descriptor_map_get_texture_shader_state(descriptor_state, + &pipeline->texture_map, + pipeline->layout, + texture_idx); cl_aligned_reloc(&job->indirect, uniforms, - image_view->texture_shader_state, + state_reloc.bo, + state_reloc.offset + v3d_unit_data_get_offset(data)); - - /* We need to ensure that the texture bo is added to the job */ - v3dv_job_add_bo(job, image_view->image->mem->bo); } /** V3D 4.x TMU configuration parameter 1 (sampler) */ @@ -129,14 +135,13 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, NULL, &sampler_idx); assert(sampler_idx != V3DV_NO_SAMPLER_IDX); - const struct v3dv_sampler *sampler = - v3dv_descriptor_map_get_sampler(descriptor_state, &pipeline->sampler_map, - pipeline->layout, sampler_idx); - - assert(sampler); + struct v3dv_cl_reloc sampler_state_reloc = + v3dv_descriptor_map_get_sampler_state(descriptor_state, &pipeline->sampler_map, + pipeline->layout, sampler_idx); cl_aligned_reloc(&job->indirect, uniforms, - sampler->state, + sampler_state_reloc.bo, + sampler_state_reloc.offset + v3d_unit_data_get_offset(data)); }