v3dv/descriptor: use descriptor pool bo for image/samplers

This allows us to remove some individual bos for the image and sampler, used to store the SAMPLER_STATE and TEXTURE_SHADER_STATE. Now they are prepacked on static memory as part of the vulkan object struct. This commit introduces small descriptor structs, used to define what the bo subregion would contain. It is used mostly to compute offsets to that specific data, and define the size needed. Having said so, it would be possible to replace them with some kind of flag (like anv) or just compute the offset based on the context. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
2020-06-03 01:22:48 +02:00 · 2020-06-03 01:22:48 +02:00 · 9a894849d5
parent 18553b50f9
commit 9a894849d5
6 changed files with 236 additions and 80 deletions
--- a/src/broadcom/vulkan/v3dv_cl.h
+++ b/src/broadcom/vulkan/v3dv_cl.h
@ -133,8 +133,15 @@ cl_aligned_reloc(struct v3dv_cl *cl,
 uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment);
 void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space);

+/* We redefine ALIGN as a macro as we want to use cl_aligned_packet_length for
+ * struct fields
+ */
+#define ALIGN(value, alignment)                           \
+        (((value) + (alignment) - 1) & ~((alignment) - 1))
+
 #define cl_packet_header(packet) V3DX(packet ## _header)
 #define cl_packet_length(packet) V3DX(packet ## _length)
+#define cl_aligned_packet_length(packet, alignment) ALIGN(cl_packet_length(packet), alignment)
 #define cl_packet_pack(packet)   V3DX(packet ## _pack)
 #define cl_packet_struct(packet) V3DX(packet)

--- a/src/broadcom/vulkan/v3dv_descriptor_set.c
+++ b/src/broadcom/vulkan/v3dv_descriptor_set.c
@ -33,6 +33,12 @@ static uint32_t
 descriptor_bo_size(VkDescriptorType type)
 {
   switch(type) {
+   case VK_DESCRIPTOR_TYPE_SAMPLER:
+      return sizeof(struct v3dv_sampler_descriptor);
+   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      return sizeof(struct v3dv_combined_image_sampler_descriptor);
+   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      return sizeof(struct v3dv_sampled_image_descriptor);
   default:
      return 0;
   }
@ -107,6 +113,49 @@ v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_stat
   return &set->descriptors[binding_layout->descriptor_index + array_index];
 }

+/* Equivalent to map_get_descriptor but it returns a reloc with the bo
+ * associated with that descriptor (suballocation of the descriptor pool bo)
+ *
+ * It also returns the descriptor type, so the caller could do extra
+ * validation or adding extra offsets if the bo contains more that one field.
+ */
+static struct v3dv_cl_reloc
+v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_state,
+                                      struct v3dv_descriptor_map *map,
+                                      struct v3dv_pipeline_layout *pipeline_layout,
+                                      uint32_t index,
+                                      VkDescriptorType *out_type)
+{
+   assert(index >= 0 && index < map->num_desc);
+
+   uint32_t set_number = map->set[index];
+   assert(descriptor_state->valid & 1 << set_number);
+
+   struct v3dv_descriptor_set *set =
+      descriptor_state->descriptor_sets[set_number];
+   assert(set);
+
+   uint32_t binding_number = map->binding[index];
+   assert(binding_number < set->layout->binding_count);
+
+   const struct v3dv_descriptor_set_binding_layout *binding_layout =
+      &set->layout->binding[binding_number];
+
+   assert(descriptor_bo_size(binding_layout->type) > 0);
+   *out_type = binding_layout->type;
+
+   uint32_t array_index = map->array_index[index];
+   assert(array_index < binding_layout->array_size);
+
+   struct v3dv_cl_reloc reloc = {
+      .bo = set->pool->bo,
+      .offset = set->base_offset + binding_layout->descriptor_offset +
+      array_index * descriptor_bo_size(binding_layout->type),
+   };
+
+   return reloc;
+}
+
 /*
 * The difference between this method and v3dv_descriptor_map_get_descriptor,
 * is that if the sampler are added as immutable when creating the set layout,
@ -162,6 +211,53 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
   return descriptor->sampler;
 }

+
+struct v3dv_cl_reloc
+v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
+                                      struct v3dv_descriptor_map *map,
+                                      struct v3dv_pipeline_layout *pipeline_layout,
+                                      uint32_t index)
+{
+   VkDescriptorType type;
+   struct v3dv_cl_reloc reloc =
+      v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map,
+                                            pipeline_layout,
+                                            index, &type);
+
+   assert(type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+          type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+
+   if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+      reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor,
+                               sampler_state);
+   }
+
+   return reloc;
+}
+
+struct v3dv_cl_reloc
+v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
+                                             struct v3dv_descriptor_map *map,
+                                             struct v3dv_pipeline_layout *pipeline_layout,
+                                             uint32_t index)
+{
+   VkDescriptorType type;
+   struct v3dv_cl_reloc reloc =
+      v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map,
+                                            pipeline_layout,
+                                            index, &type);
+
+   assert(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
+          type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+
+   if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+      reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor,
+                               texture_state);
+   }
+
+   return reloc;
+}
+
 struct v3dv_image_view *
 v3dv_descriptor_map_get_image_view(struct v3dv_descriptor_state *descriptor_state,
                                   struct v3dv_descriptor_map *map,
@ -674,6 +770,30 @@ descriptor_set_create(struct v3dv_device *device,
      pool->entry_count++;
   }

+   /* Go through and fill out immutable samplers if we have any */
+   for (uint32_t b = 0; b < layout->binding_count; b++) {
+      if (layout->binding[b].immutable_samplers_offset == 0)
+         continue;
+
+      const struct v3dv_sampler *samplers =
+         (const struct v3dv_sampler *)((const char *)layout +
+                                       layout->binding[b].immutable_samplers_offset);
+
+      for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
+         uint32_t combined_offset =
+            layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ?
+            offsetof(struct v3dv_combined_image_sampler_descriptor, sampler_state) :
+            0;
+
+         void *desc_map = descriptor_bo_map(set, &layout->binding[b], i);
+         desc_map += combined_offset;
+
+         memcpy(desc_map,
+                samplers[i].sampler_state,
+                cl_packet_length(SAMPLER_STATE));
+      }
+   }
+
   *out_set = set;

   return VK_SUCCESS;
@ -747,6 +867,33 @@ descriptor_bo_copy(struct v3dv_descriptor_set *dst_set,
   memcpy(dst_map, src_map, descriptor_bo_size(src_binding_layout->type));
 }

+static void
+write_image_descriptor(struct v3dv_descriptor_set *set,
+                       const struct v3dv_descriptor_set_binding_layout *binding_layout,
+                       struct v3dv_image_view *iview,
+                       struct v3dv_sampler *sampler,
+                       uint32_t array_index)
+{
+   void *desc_map = descriptor_bo_map(set, binding_layout, array_index);
+
+   if (iview) {
+      memcpy(desc_map,
+             iview->texture_shader_state,
+             sizeof(iview->texture_shader_state));
+      desc_map += offsetof(struct v3dv_combined_image_sampler_descriptor,
+                           sampler_state);
+   }
+
+   if (sampler && !binding_layout->immutable_samplers_offset) {
+      /* For immutable samplers this was already done as part of the
+       * descriptor set create, as that info can't change later
+       */
+      memcpy(desc_map,
+             sampler->sampler_state,
+             sizeof(sampler->sampler_state));
+   }
+}
+
 void
 v3dv_UpdateDescriptorSets(VkDevice  _device,
                          uint32_t descriptorWriteCount,
@ -783,10 +930,18 @@ v3dv_UpdateDescriptorSets(VkDevice  _device,
            break;
         }
         case VK_DESCRIPTOR_TYPE_SAMPLER: {
+            /* If we are here we shouldn't be modifying a immutable sampler,
+             * so we don't ensure that would work or not crash. But let the
+             * validation layers check that
+             */
            const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
            V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler);

            descriptor->sampler = sampler;
+
+            write_image_descriptor(set, binding_layout, NULL, sampler,
+                                   writeset->dstArrayElement + j);
+
            break;
         }
         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
@ -794,6 +949,10 @@ v3dv_UpdateDescriptorSets(VkDevice  _device,
            V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView);

            descriptor->image_view = iview;
+
+            write_image_descriptor(set, binding_layout, iview, NULL,
+                                   writeset->dstArrayElement + j);
+
            break;
         }
         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
@ -804,6 +963,9 @@ v3dv_UpdateDescriptorSets(VkDevice  _device,
            descriptor->image_view = iview;
            descriptor->sampler = sampler;

+            write_image_descriptor(set, binding_layout, iview, sampler,
+                                   writeset->dstArrayElement + j);
+
            break;
         }
         default:
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@ -2026,7 +2026,7 @@ pack_sampler_state(struct v3dv_sampler *sampler,
      break;
   }

-   v3dv_pack(sampler->state->map, SAMPLER_STATE, s) {
+   v3dv_pack(sampler->sampler_state, SAMPLER_STATE, s) {
      if (pCreateInfo->anisotropyEnable) {
         s.anisotropy_enable = true;
         if (pCreateInfo->maxAnisotropy > 8)
@ -2072,31 +2072,12 @@ v3dv_CreateSampler(VkDevice _device,
   if (!sampler)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

-   if (sampler->state == NULL) {
-      sampler->state = v3dv_bo_alloc(device, cl_packet_length(SAMPLER_STATE),
-                                     "sampler_state");
-
-      if (!sampler->state)
-         goto fail_alloc;
-
-      bool ok = v3dv_bo_map(device, sampler->state,
-                            cl_packet_length(SAMPLER_STATE));
-      if (!ok)
-         goto fail_map;
-   }
-
   sampler->compare_enable = pCreateInfo->compareEnable;
   pack_sampler_state(sampler, pCreateInfo);

   *pSampler = v3dv_sampler_to_handle(sampler);

   return VK_SUCCESS;
-
-fail_map:
-   v3dv_bo_free(device, sampler->state);
-fail_alloc:
-   vk_free2(&device->alloc, pAllocator, sampler);
-   return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
 }

 void
@ -2110,9 +2091,6 @@ v3dv_DestroySampler(VkDevice _device,
   if (!sampler)
      return;

-   if (sampler->state)
-      v3dv_bo_free(device, sampler->state);
-
   vk_free2(&device->alloc, pAllocator, sampler);
 }

--- a/src/broadcom/vulkan/v3dv_image.c
+++ b/src/broadcom/vulkan/v3dv_image.c
@ -433,32 +433,16 @@ translate_swizzle(unsigned char pipe_swizzle)

 /*
 * Packs and ensure bo for the shader state (the latter can be temporal).
- *
- * Return false if it was not able to allocate the bo.
 */
-static bool
+static void
 pack_texture_shader_state(struct v3dv_device *device,
                          struct v3dv_image_view *image_view)
 {
   assert(image_view->image);
   const struct v3dv_image *image = image_view->image;

-   if (image_view->texture_shader_state == NULL) {
-      image_view->texture_shader_state =
-         v3dv_bo_alloc(device, cl_packet_length(TEXTURE_SHADER_STATE),
-                       "texture_shader_state");
-
-      if (!image_view->texture_shader_state)
-         return false;
-
-      bool ok = v3dv_bo_map(device, image_view->texture_shader_state,
-                            cl_packet_length(TEXTURE_SHADER_STATE));
-      if (!ok)
-         return false;
-   }
-
   int msaa_scale = 1; /* FIXME: hardcoded. Revisit when msaa get supported */
-   v3dv_pack(image_view->texture_shader_state->map, TEXTURE_SHADER_STATE, tex) {
+   v3dv_pack(image_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) {

      tex.level_0_is_strictly_uif =
         (image->slices[0].tiling == VC5_TILING_UIF_XOR ||
@ -519,8 +503,6 @@ pack_texture_shader_state(struct v3dv_device *device,
         v3dv_layer_offset(image, 0, image_view->first_layer);
      tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
   }
-
-   return true;
 }

 static enum pipe_swizzle
@ -650,18 +632,11 @@ v3dv_CreateImageView(VkDevice _device,

   util_format_compose_swizzles(format_swizzle, image_view_swizzle, iview->swizzle);

-   if (!pack_texture_shader_state(device, iview))
-      goto fail_texture_shader_state_alloc;
+   pack_texture_shader_state(device, iview);

   *pView = v3dv_image_view_to_handle(iview);

   return VK_SUCCESS;
-
- fail_texture_shader_state_alloc:
-   if (iview->texture_shader_state)
-      v3dv_bo_free(device, iview->texture_shader_state);
-   vk_free2(&device->alloc, pAllocator, iview);
-   return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 }

 void
@ -672,9 +647,6 @@ v3dv_DestroyImageView(VkDevice _device,
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_image_view, image_view, imageView);

-   if (image_view->texture_shader_state)
-      v3dv_bo_free(device, image_view->texture_shader_state);
-
   vk_free2(&device->alloc, pAllocator, image_view);
 }

--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@ -416,19 +416,15 @@ struct v3dv_image_view {
   /* Precomputed (composed from createinfo->components and formar swizzle)
    * swizzles to pass in to the shader key.
    *
-    * FIXME: this is also a candidate to be included on the descriptor info.
+    * This could be also included on the descriptor bo, but the shader state
+    * packet doesn't need it on a bo, so we can just avoid a memory copy
    */
   uint8_t swizzle[4];

-   /* FIXME: here we store the packet TEXTURE_SHADER_STATE, that is referenced
-    * as part of the tmu configuration, and the content is set per sampler. A
-    * possible perf improvement, to avoid bo fragmentation, would be to save
-    * the state as static, have the bo as part of the descriptor (booked from
-    * the descriptor pools), and then copy this content to the descriptor bo
-    * on UpdateDescriptor. This also makes sense because not all the images
-    * are used as textures.
+   /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
+    * during UpdateDescriptorSets
    */
-   struct v3dv_bo *texture_shader_state;
+   uint8_t texture_shader_state[cl_packet_length(TEXTURE_SHADER_STATE)];
 };

 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
@ -909,6 +905,33 @@ struct v3dv_descriptor {
   };
 };

+/* The following v3dv_xxx_descriptor structs represent descriptor info that we
+ * upload to a bo, specifically a subregion of the descriptor pool bo.
+ *
+ * The general rule that we apply right now to decide which info goes to such
+ * bo is that we upload those that are referenced by an address when emitting
+ * a packet, so needed to be uploaded to an bo in any case.
+ *
+ * Note that these structs are mostly helpers that improve the semantics when
+ * doing all that, but we could do as other mesa vulkan drivers and just
+ * upload the info we know it is expected based on the context.
+ *
+ * Also note that the sizes are aligned, as there is an alignment requirement
+ * for addresses.
+ */
+struct v3dv_sampled_image_descriptor {
+   uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
+};
+
+struct v3dv_sampler_descriptor {
+   uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
+};
+
+struct v3dv_combined_image_sampler_descriptor {
+   uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
+   uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
+};
+
 /* Aux struct as it is really common to have a pair bo/address. Called
 * resource because it is really likely that we would need something like that
 * if we work on reuse the same bo at different points (like the shader
@ -1289,14 +1312,11 @@ struct v3dv_descriptor_map {
 struct v3dv_sampler {
   bool compare_enable;

-   /* FIXME: here we store the packet SAMPLER_STATE, that is referenced as part
-    * of the tmu configuration, and the content is set per sampler. A possible
-    * perf improvement, to avoid bo fragmentation, would be to save the state
-    * as static, have the bo as part of the descriptor (booked from the
-    * descriptor pools), and then copy this content to the descriptor bo on
-    * UpdateDescriptor
+   /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
+    * configuration. If needed it will be copied to the descriptor info during
+    * UpdateDescriptorSets
    */
-   struct v3dv_bo *state;
+   uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)];
 };

 #define V3DV_NO_SAMPLER_IDX 666
@ -1566,12 +1586,24 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
                                struct v3dv_pipeline_layout *pipeline_layout,
                                uint32_t index);

+struct v3dv_cl_reloc
+v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
+                                      struct v3dv_descriptor_map *map,
+                                      struct v3dv_pipeline_layout *pipeline_layout,
+                                      uint32_t index);
+
 struct v3dv_image_view *
 v3dv_descriptor_map_get_image_view(struct v3dv_descriptor_state *descriptor_state,
                                   struct v3dv_descriptor_map *map,
                                   struct v3dv_pipeline_layout *pipeline_layout,
                                   uint32_t index);

+struct v3dv_cl_reloc
+v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
+                                             struct v3dv_descriptor_map *map,
+                                             struct v3dv_pipeline_layout *pipeline_layout,
+                                             uint32_t index);
+
 static inline const struct v3dv_sampler *
 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
                        const struct v3dv_descriptor_set_binding_layout *binding)
--- a/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/src/broadcom/vulkan/v3dv_uniforms.c
@ -98,18 +98,24 @@ write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
                                           &texture_idx,
                                           NULL);

+   /* We need to ensure that the texture bo is added to the job */
   struct v3dv_image_view *image_view =
      v3dv_descriptor_map_get_image_view(descriptor_state, &pipeline->texture_map,
                                         pipeline->layout, texture_idx);

   assert(image_view);
+   v3dv_job_add_bo(job, image_view->image->mem->bo);
+
+   struct v3dv_cl_reloc state_reloc =
+      v3dv_descriptor_map_get_texture_shader_state(descriptor_state,
+                                                   &pipeline->texture_map,
+                                                   pipeline->layout,
+                                                   texture_idx);

   cl_aligned_reloc(&job->indirect, uniforms,
-                    image_view->texture_shader_state,
+                    state_reloc.bo,
+                    state_reloc.offset +
                    v3d_unit_data_get_offset(data));
-
-   /* We need to ensure that the texture bo is added to the job */
-   v3dv_job_add_bo(job, image_view->image->mem->bo);
 }

 /** V3D 4.x TMU configuration parameter 1 (sampler) */
@ -129,14 +135,13 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
                                           NULL, &sampler_idx);
   assert(sampler_idx != V3DV_NO_SAMPLER_IDX);

-   const struct v3dv_sampler *sampler =
-      v3dv_descriptor_map_get_sampler(descriptor_state, &pipeline->sampler_map,
-                                      pipeline->layout, sampler_idx);
-
-   assert(sampler);
+   struct v3dv_cl_reloc sampler_state_reloc =
+      v3dv_descriptor_map_get_sampler_state(descriptor_state, &pipeline->sampler_map,
+                                            pipeline->layout, sampler_idx);

   cl_aligned_reloc(&job->indirect, uniforms,
-                    sampler->state,
+                    sampler_state_reloc.bo,
+                    sampler_state_reloc.offset +
                    v3d_unit_data_get_offset(data));
 }