tu: Implement pipeline caching with shared Vulkan cache

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16147>
2022-02-17 20:48:36 +01:00 · 2022-02-17 20:48:36 +01:00 · 05329d7f9a
parent 43981f0f58
commit 05329d7f9a
6 changed files with 527 additions and 596 deletions
--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@ -44,7 +44,6 @@ libtu_files = files(
  'tu_nir_lower_multiview.c',
  'tu_pass.c',
  'tu_pipeline.c',
-  'tu_pipeline_cache.c',
  'tu_private.h',
  'tu_query.c',
  'tu_shader.c',
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@ -230,6 +230,11 @@ get_device_extensions(const struct tu_physical_device *device,
   };
 }

+static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
+   &tu_shaders_ops,
+   NULL,
+};
+
 VkResult
 tu_physical_device_init(struct tu_physical_device *device,
                        struct tu_instance *instance)
@ -275,13 +280,6 @@ tu_physical_device_init(struct tu_physical_device *device,
      goto fail_free_name;
   }

-   /* The gpu id is already embedded in the uuid so we just pass "tu"
-    * when creating the cache.
-    */
-   char buf[VK_UUID_SIZE * 2 + 1];
-   disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
-   device->disk_cache = disk_cache_create(device->name, buf, 0);
-
   fd_get_driver_uuid(device->driver_uuid);
   fd_get_device_uuid(device->device_uuid, &device->dev_id);

@ -298,21 +296,28 @@ tu_physical_device_init(struct tu_physical_device *device,
                                    &supported_extensions,
                                    &dispatch_table);
   if (result != VK_SUCCESS)
-      goto fail_free_cache;
+      goto fail_free_name;

 #if TU_HAS_SURFACE
   result = tu_wsi_init(device);
   if (result != VK_SUCCESS) {
      vk_startup_errorf(instance, result, "WSI init failure");
      vk_physical_device_finish(&device->vk);
-      goto fail_free_cache;
+      goto fail_free_name;
   }
 #endif

+   /* The gpu id is already embedded in the uuid so we just pass "tu"
+    * when creating the cache.
+    */
+   char buf[VK_UUID_SIZE * 2 + 1];
+   disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+   device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
+
+   device->vk.pipeline_cache_import_ops = cache_import_ops;
+
   return VK_SUCCESS;

-fail_free_cache:
-   disk_cache_destroy(device->disk_cache);
 fail_free_name:
   vk_free(&instance->vk.alloc, (void *)device->name);
   return result;
@ -325,7 +330,6 @@ tu_physical_device_finish(struct tu_physical_device *device)
   tu_wsi_finish(device);
 #endif

-   disk_cache_destroy(device->disk_cache);
   close(device->local_fd);
   if (device->master_fd != -1)
      close(device->master_fd);
@ -1790,6 +1794,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
                          &(struct ir3_compiler_options) {
                              .robust_ubo_access = robust_buffer_access2,
                              .push_ubo_with_preamble = true,
+                              .disable_cache = true,
                           });
   if (!device->compiler) {
      result = vk_startup_errorf(physical_device->instance,
@ -1851,16 +1856,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
   /* initialize to ones so ffs can be used to find unused slots */
   BITSET_ONES(device->custom_border_color);

-   VkPipelineCacheCreateInfo ci;
-   ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
-   ci.pNext = NULL;
-   ci.flags = 0;
-   ci.pInitialData = NULL;
-   ci.initialDataSize = 0;
-   VkPipelineCache pc;
-   result =
-      tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
-   if (result != VK_SUCCESS) {
+   struct vk_pipeline_cache_create_info pcc_info = { };
+   device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
+                                                false);
+   if (!device->mem_cache) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      vk_startup_errorf(device->instance, result, "create pipeline cache failed");
      goto fail_pipeline_cache;
   }
@ -1929,8 +1929,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
   }
   pthread_condattr_destroy(&condattr);

-   device->mem_cache = tu_pipeline_cache_from_handle(pc);
-
   result = tu_autotune_init(&device->autotune, device);
   if (result != VK_SUCCESS) {
      goto fail_timeline_cond;
@ -1959,7 +1957,7 @@ fail_prepare_perfcntrs_pass_cs:
 fail_perfcntrs_pass_entries_alloc:
   free(device->perfcntrs_pass_cs);
 fail_perfcntrs_pass_alloc:
-   tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
+   vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
 fail_pipeline_cache:
   tu_destroy_clear_blit_shaders(device);
 fail_global_bo_map:
@ -2009,8 +2007,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)

   ir3_compiler_destroy(device->compiler);

-   VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache);
-   tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
+   vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);

   if (device->perfcntrs_pass_cs) {
      free(device->perfcntrs_pass_cs_entries);
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@ -250,13 +250,12 @@ struct tu_pipeline_builder
 {
   struct tu_device *device;
   void *mem_ctx;
-   struct tu_pipeline_cache *cache;
+   struct vk_pipeline_cache *cache;
   struct tu_pipeline_layout *layout;
   const VkAllocationCallbacks *alloc;
   const VkGraphicsPipelineCreateInfo *create_info;

-   struct tu_shader *shaders[MESA_SHADER_FRAGMENT + 1];
-   struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1];
+   struct tu_compiled_shaders *shaders;
   struct ir3_shader_variant *binning_variant;
   uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1];
   uint64_t binning_vs_iova;
@ -660,7 +659,7 @@ tu6_emit_xs(struct tu_cs *cs,
 }

 static void
-tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
+tu6_emit_cs_config(struct tu_cs *cs,
                   const struct ir3_shader_variant *v,
                   const struct tu_pvtmem_config *pvtmem,
                   uint64_t binary_iova)
@ -1686,8 +1685,8 @@ tu6_emit_program_config(struct tu_cs *cs,
         .gs_state = true,
         .fs_state = true,
         .gfx_ibo = true));
-   for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
-      tu6_emit_xs_config(cs, stage, builder->variants[stage]);
+   for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
+      tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
   }
 }

@ -1697,16 +1696,16 @@ tu6_emit_program(struct tu_cs *cs,
                 bool binning_pass,
                 struct tu_pipeline *pipeline)
 {
-   const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
+   const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
   const struct ir3_shader_variant *bs = builder->binning_variant;
-   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
-   const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
-   const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
-   const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
+   const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
+   const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL];
+   const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY];
+   const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
   gl_shader_stage stage = MESA_SHADER_VERTEX;
   uint32_t cps_per_patch = builder->create_info->pTessellationState ?
      builder->create_info->pTessellationState->patchControlPoints : 0;
-   bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output;
+   bool multi_pos_output = builder->shaders->multi_pos_output;

  /* Don't use the binning pass variant when GS is present because we don't
   * support compiling correct binning pass variants with GS.
@ -1717,8 +1716,8 @@ tu6_emit_program(struct tu_cs *cs,
      stage++;
   }

-   for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
-      const struct ir3_shader_variant *xs = builder->variants[stage];
+   for (; stage < ARRAY_SIZE(builder->shaders->variants); stage++) {
+      const struct ir3_shader_variant *xs = builder->shaders->variants[stage];

      if (stage == MESA_SHADER_FRAGMENT && binning_pass)
         fs = xs = NULL;
@ -2255,24 +2254,23 @@ tu_pipeline_allocate_cs(struct tu_device *dev,
                        struct tu_pipeline *pipeline,
                        struct tu_pipeline_layout *layout,
                        struct tu_pipeline_builder *builder,
-                        struct tu_pipeline_cache *cache,
                        struct ir3_shader_variant *compute)
 {
   uint32_t size = 1024 + tu6_load_state_size(pipeline, layout, compute);

   /* graphics case: */
   if (builder) {
-      for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
-         if (builder->variants[i]) {
-            size += builder->variants[i]->info.size / 4;
+      for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
+         if (builder->shaders->variants[i]) {
+            size += builder->shaders->variants[i]->info.size / 4;
         }
      }

      size += builder->binning_variant->info.size / 4;

      builder->additional_cs_reserve_size = 0;
-      for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) {
-         struct ir3_shader_variant *variant = builder->variants[i];
+      for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
+         struct ir3_shader_variant *variant = builder->shaders->variants[i];
         if (variant) {
            builder->additional_cs_reserve_size +=
               tu_xs_get_additional_cs_size_dwords(variant);
@ -2445,10 +2443,248 @@ tu_link_shaders(struct tu_pipeline_builder *builder,
   }
 }

+static void
+tu_shader_key_init(struct tu_shader_key *key,
+                   const VkPipelineShaderStageCreateInfo *stage_info,
+                   struct tu_device *dev)
+{
+   enum ir3_wavesize_option api_wavesize, real_wavesize;
+
+   if (stage_info) {
+      if (stage_info->flags &
+          VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
+         api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
+      } else {
+         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info =
+            vk_find_struct_const(stage_info->pNext,
+                                 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+         if (size_info) {
+            if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
+               api_wavesize = IR3_SINGLE_ONLY;
+            } else {
+               assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
+               api_wavesize = IR3_DOUBLE_ONLY;
+            }
+         } else {
+            /* Match the exposed subgroupSize. */
+            api_wavesize = IR3_DOUBLE_ONLY;
+         }
+
+         if (stage_info->flags &
+             VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
+            real_wavesize = api_wavesize;
+         else if (api_wavesize == IR3_SINGLE_ONLY)
+            real_wavesize = IR3_SINGLE_ONLY;
+         else
+            real_wavesize = IR3_SINGLE_OR_DOUBLE;
+      }
+   } else {
+      api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
+   }
+
+   key->api_wavesize = api_wavesize;
+   key->real_wavesize = real_wavesize;
+}
+
+static void
+tu_hash_stage(struct mesa_sha1 *ctx,
+              const VkPipelineShaderStageCreateInfo *stage,
+              const struct tu_shader_key *key)
+{
+   VK_FROM_HANDLE(vk_shader_module, module, stage->module);
+   const VkSpecializationInfo *spec_info = stage->pSpecializationInfo;
+
+   _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
+   _mesa_sha1_update(ctx, stage->pName, strlen(stage->pName));
+   if (spec_info && spec_info->mapEntryCount) {
+      _mesa_sha1_update(ctx, spec_info->pMapEntries,
+                        spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
+      _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
+   }
+
+   _mesa_sha1_update(ctx, key, sizeof(*key));
+}
+
+/* Hash flags which can affect ir3 shader compilation which aren't known until
+ * logical device creation.
+ */
+static void
+tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler)
+{
+   _mesa_sha1_update(ctx, &compiler->robust_ubo_access,
+                     sizeof(compiler->robust_ubo_access));
+   _mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug));
+}
+
+static void
+tu_hash_shaders(unsigned char *hash,
+                const VkPipelineShaderStageCreateInfo **stages,
+                const struct tu_pipeline_layout *layout,
+                const struct tu_shader_key *keys,
+                const struct ir3_shader_key *ir3_key,
+                const struct ir3_compiler *compiler)
+{
+   struct mesa_sha1 ctx;
+
+   _mesa_sha1_init(&ctx);
+
+   if (layout)
+      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+   _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key));
+
+   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+      if (stages[i]) {
+         tu_hash_stage(&ctx, stages[i], &keys[i]);
+      }
+   }
+   tu_hash_compiler(&ctx, compiler);
+   _mesa_sha1_final(&ctx, hash);
+}
+
+static void
+tu_hash_compute(unsigned char *hash,
+                const VkPipelineShaderStageCreateInfo *stage,
+                const struct tu_pipeline_layout *layout,
+                const struct tu_shader_key *key,
+                const struct ir3_compiler *compiler)
+{
+   struct mesa_sha1 ctx;
+
+   _mesa_sha1_init(&ctx);
+
+   if (layout)
+      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+   tu_hash_stage(&ctx, stage, key);
+
+   tu_hash_compiler(&ctx, compiler);
+   _mesa_sha1_final(&ctx, hash);
+}
+
+static bool
+tu_shaders_serialize(struct vk_pipeline_cache_object *object,
+                     struct blob *blob);
+
+static struct vk_pipeline_cache_object *
+tu_shaders_deserialize(struct vk_device *device,
+                       const void *key_data, size_t key_size,
+                       struct blob_reader *blob);
+
+static void
+tu_shaders_destroy(struct vk_pipeline_cache_object *object)
+{
+   struct tu_compiled_shaders *shaders =
+      container_of(object, struct tu_compiled_shaders, base);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++)
+      ralloc_free(shaders->variants[i]);
+
+   vk_pipeline_cache_object_finish(&shaders->base);
+   vk_free(&object->device->alloc, shaders);
+}
+
+const struct vk_pipeline_cache_object_ops tu_shaders_ops = {
+   .serialize = tu_shaders_serialize,
+   .deserialize = tu_shaders_deserialize,
+   .destroy = tu_shaders_destroy,
+};
+
+static struct tu_compiled_shaders *
+tu_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size)
+{
+   VK_MULTIALLOC(ma);
+   VK_MULTIALLOC_DECL(&ma, struct tu_compiled_shaders, shaders, 1);
+   VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
+
+   if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
+      return NULL;
+
+   memcpy(obj_key_data, key_data, key_size);
+   vk_pipeline_cache_object_init(&dev->vk, &shaders->base,
+                                 &tu_shaders_ops, obj_key_data, key_size);
+
+   return shaders;
+}
+
+static bool
+tu_shaders_serialize(struct vk_pipeline_cache_object *object,
+                     struct blob *blob)
+{
+   struct tu_compiled_shaders *shaders =
+      container_of(object, struct tu_compiled_shaders, base);
+
+   blob_write_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
+   blob_write_uint8(blob, shaders->active_desc_sets);
+   blob_write_uint8(blob, shaders->multi_pos_output);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
+      if (shaders->variants[i]) {
+         blob_write_uint8(blob, 1);
+         ir3_store_variant(blob, shaders->variants[i]);
+      } else {
+         blob_write_uint8(blob, 0);
+      }
+   }
+
+   return true;
+}
+
+static struct vk_pipeline_cache_object *
+tu_shaders_deserialize(struct vk_device *_device,
+                       const void *key_data, size_t key_size,
+                       struct blob_reader *blob)
+{
+   struct tu_device *dev = container_of(_device, struct tu_device, vk);
+   struct tu_compiled_shaders *shaders =
+      tu_shaders_init(dev, key_data, key_size);
+
+   if (!shaders)
+      return NULL;
+
+   blob_copy_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
+   shaders->active_desc_sets = blob_read_uint8(blob);
+   shaders->multi_pos_output = blob_read_uint8(blob);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
+      bool has_shader = blob_read_uint8(blob);
+      if (has_shader) {
+         shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL);
+      }
+   }
+
+   return &shaders->base;
+}
+
+static struct tu_compiled_shaders *
+tu_pipeline_cache_lookup(struct vk_pipeline_cache *cache,
+                         const void *key_data, size_t key_size)
+{
+   struct vk_pipeline_cache_object *object =
+      vk_pipeline_cache_lookup_object(cache, key_data, key_size,
+                                      &tu_shaders_ops, NULL);
+   if (object)
+      return container_of(object, struct tu_compiled_shaders, base);
+   else
+      return NULL;
+}
+
+static struct tu_compiled_shaders *
+tu_pipeline_cache_insert(struct vk_pipeline_cache *cache,
+                         struct tu_compiled_shaders *shaders)
+{
+   struct vk_pipeline_cache_object *object =
+      vk_pipeline_cache_add_object(cache, &shaders->base);
+   return container_of(object, struct tu_compiled_shaders, base);
+}
+
 static VkResult
 tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
                                    struct tu_pipeline *pipeline)
 {
+   VkResult result = VK_SUCCESS;
   const struct ir3_compiler *compiler = builder->device->compiler;
   const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
      NULL
@ -2459,10 +2695,40 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
      stage_infos[stage] = &builder->create_info->pStages[i];
   }

-   struct ir3_shader_key key = {};
-   tu_pipeline_shader_key_init(&key, pipeline, builder->create_info);
+   struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+        stage < ARRAY_SIZE(keys); stage++) {
+      tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device);
+   }

-   nir_shader *nir[ARRAY_SIZE(builder->shaders)] = { NULL };
+   struct ir3_shader_key ir3_key = {};
+   tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info);
+
+   keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask;
+   keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
+
+   unsigned char pipeline_sha1[20];
+   tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler);
+
+   const bool executable_info = builder->create_info->flags &
+      VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
+
+   char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL };
+
+   struct tu_compiled_shaders *compiled_shaders;
+
+   if (!executable_info) {
+      compiled_shaders =
+         tu_pipeline_cache_lookup(builder->cache, &pipeline_sha1,
+                                  sizeof(pipeline_sha1));
+
+      if (compiled_shaders)
+         goto done;
+   }
+
+   nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL };
+
+   struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL };

   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
        stage < ARRAY_SIZE(nir); stage++) {
@ -2471,8 +2737,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
         continue;

      nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage);
-      if (!nir[stage])
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      if (!nir[stage]) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
   }

   if (!nir[MESA_SHADER_FRAGMENT]) {
@ -2484,11 +2752,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
         nir[MESA_SHADER_FRAGMENT] = fs_b.shader;
   }

-   const bool executable_info = builder->create_info->flags &
-      VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
-
-   char *nir_initial_disasm[ARRAY_SIZE(builder->shaders)] = { NULL };
-
   if (executable_info) {
      for (gl_shader_stage stage = MESA_SHADER_VERTEX;
            stage < ARRAY_SIZE(nir); stage++) {
@ -2509,26 +2772,27 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
         continue;

      struct tu_shader *shader =
-         tu_shader_create(builder->device, nir[stage], stage_infos[stage],
-                          builder->multiview_mask, builder->layout,
-                          builder->alloc);
-      if (!shader)
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
+         tu_shader_create(builder->device, nir[stage], &keys[stage],
+                          builder->layout, builder->alloc);
+      if (!shader) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }

      /* In SPIR-V generated from GLSL, the primitive mode is specified in the
       * tessellation evaluation shader, but in SPIR-V generated from HLSL,
       * the mode is specified in the tessellation control shader. */
      if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) &&
-          key.tessellation == IR3_TESS_NONE) {
-         key.tessellation = tu6_get_tessmode(shader);
+          ir3_key.tessellation == IR3_TESS_NONE) {
+         ir3_key.tessellation = tu6_get_tessmode(shader);
      }

      if (stage > MESA_SHADER_TESS_CTRL) {
         if (stage == MESA_SHADER_FRAGMENT) {
-            key.tcs_store_primid = key.tcs_store_primid ||
+            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
               (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
         } else {
-            key.tcs_store_primid = key.tcs_store_primid ||
+            ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
               BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
         }
      }
@ -2537,85 +2801,121 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
       * which is set in tu_lower_io. */
      desc_sets |= shader->active_desc_sets;

-      builder->shaders[stage] = shader;
+      shaders[stage] = shader;
   }
-   pipeline->active_desc_sets = desc_sets;

-   struct tu_shader *last_shader = builder->shaders[MESA_SHADER_GEOMETRY];
+   struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY];
   if (!last_shader)
-      last_shader = builder->shaders[MESA_SHADER_TESS_EVAL];
+      last_shader = shaders[MESA_SHADER_TESS_EVAL];
   if (!last_shader)
-      last_shader = builder->shaders[MESA_SHADER_VERTEX];
+      last_shader = shaders[MESA_SHADER_VERTEX];

   uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written;

-   key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
-   key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
+   ir3_key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
+   ir3_key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);

-   pipeline->tess.patch_type = key.tessellation;
+   compiled_shaders =
+      tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1));

-   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-        stage < ARRAY_SIZE(builder->shaders); stage++) {
-      if (!builder->shaders[stage])
-         continue;
-      
-      bool created;
-      builder->variants[stage] =
-         ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
-                                &key, false, executable_info, &created);
-      if (!builder->variants[stage])
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
+   if (!compiled_shaders) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
   }

-   uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
-
-   key.safe_constlen = true;
+   compiled_shaders->active_desc_sets = desc_sets;
+   compiled_shaders->multi_pos_output =
+      shaders[MESA_SHADER_VERTEX]->multi_pos_output;

   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
-        stage < ARRAY_SIZE(builder->shaders); stage++) {
-      if (!builder->shaders[stage])
+        stage < ARRAY_SIZE(shaders); stage++) {
+      if (!shaders[stage])
+         continue;
+      
+      compiled_shaders->variants[stage] =
+         ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
+                                   executable_info);
+      if (!compiled_shaders->variants[stage])
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      compiled_shaders->push_consts[stage] = shaders[stage]->push_consts;
+   }
+
+   uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler);
+
+   ir3_key.safe_constlen = true;
+
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+        stage < ARRAY_SIZE(shaders); stage++) {
+      if (!shaders[stage])
         continue;

      if (safe_constlens & (1 << stage)) {
-         bool created;
-         builder->variants[stage] =
-            ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
-                                   &key, false, executable_info, &created);
-         if (!builder->variants[stage])
-            return VK_ERROR_OUT_OF_HOST_MEMORY;
+         ralloc_free(compiled_shaders->variants[stage]);
+         compiled_shaders->variants[stage] =
+            ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
+                                      executable_info);
+         if (!compiled_shaders->variants[stage]) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto fail;
+         }
      }
   }

-   const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX];
-   struct ir3_shader_variant *variant;
-
-   if (vs->ir3_shader->stream_output.num_outputs ||
-       !ir3_has_binning_vs(&key)) {
-      variant = builder->variants[MESA_SHADER_VERTEX];
-   } else {
-      bool created;
-      key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
-      variant = ir3_shader_get_variant(vs->ir3_shader, &key,
-                                       true, executable_info, &created);
-      if (!variant)
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-   }
-
-   builder->binning_variant = variant;
-
   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
         stage < ARRAY_SIZE(nir); stage++) {
-      if (builder->variants[stage]) {
-         tu_append_executable(pipeline, builder->variants[stage],
+      if (shaders[stage]) {
+         tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
+      }
+   }
+
+   compiled_shaders =
+      tu_pipeline_cache_insert(builder->cache, compiled_shaders);
+
+done:
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+         stage < ARRAY_SIZE(nir); stage++) {
+      if (compiled_shaders->variants[stage]) {
+         tu_append_executable(pipeline, compiled_shaders->variants[stage],
            nir_initial_disasm[stage]);
      }
   }

-   if (builder->binning_variant != builder->variants[MESA_SHADER_VERTEX]) {
-      tu_append_executable(pipeline, builder->binning_variant, NULL);
+   struct ir3_shader_variant *vs =
+      compiled_shaders->variants[MESA_SHADER_VERTEX];
+
+   struct ir3_shader_variant *variant;
+   if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) {
+      tu_append_executable(pipeline, vs->binning, NULL);
+      variant = vs->binning;
+   } else {
+      variant = vs;
+   }
+
+   builder->binning_variant = variant;
+
+   builder->shaders = compiled_shaders;
+
+   pipeline->active_desc_sets = compiled_shaders->active_desc_sets;
+   if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) {
+      pipeline->tess.patch_type =
+         compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation;
   }

   return VK_SUCCESS;
+
+fail:
+   for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+         stage < ARRAY_SIZE(nir); stage++) {
+      if (shaders[stage]) {
+         tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
+      }
+   }
+
+   if (compiled_shaders)
+      vk_pipeline_cache_object_unref(&compiled_shaders->base);
+
+   return result;
 }

 static void
@ -2722,12 +3022,12 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,

 static void
 tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
-                        struct tu_shader *shader,
+                        struct tu_push_constant_range *push_consts,
                        struct ir3_shader_variant *v)
 {
   link->const_state = *ir3_const_state(v);
   link->constlen = v->constlen;
-   link->push_consts = shader->push_consts;
+   link->push_consts = *push_consts;
 }

 static void
@ -2765,13 +3065,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
   }
   pipeline->active_stages = stages;

-   for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
-      if (!builder->shaders[i])
+   for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
+      if (!builder->shaders->variants[i])
         continue;

      tu_pipeline_set_linkage(&pipeline->program.link[i],
-                              builder->shaders[i],
-                              builder->variants[i]);
+                              &builder->shaders->push_consts[i],
+                              builder->shaders->variants[i]);
   }
 }

@ -2781,7 +3081,7 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
 {
   const VkPipelineVertexInputStateCreateInfo *vi_info =
      builder->create_info->pVertexInputState;
-   const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
+   const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
   const struct ir3_shader_variant *bs = builder->binning_variant;

   /* Bindings may contain holes */
@ -2847,7 +3147,7 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
         vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
   pipeline->tess.upper_left_domain_origin = !domain_info ||
         domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
-   const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
+   const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
   pipeline->tess.param_stride = hs->output_size * 4;
 }

@ -3092,8 +3392,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
                                              .bfref = ds_info->back.reference & 0xff));
   }

-   if (builder->shaders[MESA_SHADER_FRAGMENT]) {
-      const struct ir3_shader_variant *fs = &builder->shaders[MESA_SHADER_FRAGMENT]->ir3_shader->variants[0];
+   if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) {
+      const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
      if (fs->has_kill || fs->no_earlyz || fs->writes_pos) {
         pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
      }
@ -3300,18 +3600,19 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
   result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
   if (result != VK_SUCCESS) {
      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
-      return result;
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
   }

   result = tu_pipeline_allocate_cs(builder->device, *pipeline,
-                                    builder->layout, builder, builder->cache, NULL);
+                                    builder->layout, builder, NULL);
   if (result != VK_SUCCESS) {
      vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
      return result;
   }

-   for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++)
-      builder->shader_iova[i] = tu_upload_variant(*pipeline, builder->variants[i]);
+   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++)
+      builder->shader_iova[i] =
+         tu_upload_variant(*pipeline, builder->shaders->variants[i]);

   builder->binning_vs_iova =
      tu_upload_variant(*pipeline, builder->binning_variant);
@ -3323,10 +3624,10 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,

   uint32_t pvtmem_size = 0;
   bool per_wave = true;
-   for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
-      if (builder->variants[i]) {
-         pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size);
-         if (!builder->variants[i]->pvtmem_per_wave)
+   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
+      if (builder->shaders->variants[i]) {
+         pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size);
+         if (!builder->shaders->variants[i]->pvtmem_per_wave)
            per_wave = false;
      }
   }
@ -3362,11 +3663,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
 static void
 tu_pipeline_builder_finish(struct tu_pipeline_builder *builder)
 {
-   for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
-      if (!builder->shaders[i])
-         continue;
-      tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
-   }
+   if (builder->shaders)
+      vk_pipeline_cache_object_unref(&builder->shaders->base);
   ralloc_free(builder->mem_ctx);
 }

@ -3374,7 +3672,7 @@ static void
 tu_pipeline_builder_init_graphics(
   struct tu_pipeline_builder *builder,
   struct tu_device *dev,
-   struct tu_pipeline_cache *cache,
+   struct vk_pipeline_cache *cache,
   const VkGraphicsPipelineCreateInfo *create_info,
   const VkAllocationCallbacks *alloc)
 {
@ -3461,7 +3759,9 @@ tu_graphics_pipeline_create(VkDevice device,
                            VkPipeline *pPipeline)
 {
   TU_FROM_HANDLE(tu_device, dev, device);
-   TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
+   TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
+
+   cache = cache ? cache : dev->mem_cache;

   struct tu_pipeline_builder builder;
   tu_pipeline_builder_init_graphics(&builder, dev, cache,
@ -3509,11 +3809,13 @@ tu_compute_pipeline_create(VkDevice device,
                           VkPipeline *pPipeline)
 {
   TU_FROM_HANDLE(tu_device, dev, device);
-   TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
+   TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
   TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
   const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
   VkResult result;

+   cache = cache ? cache : dev->mem_cache;
+
   struct tu_pipeline *pipeline;

   *pPipeline = VK_NULL_HANDLE;
@ -3526,38 +3828,73 @@ tu_compute_pipeline_create(VkDevice device,
   pipeline->executables_mem_ctx = ralloc_context(NULL);
   util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx);

-   struct ir3_shader_key key = {};
+   struct tu_shader_key key;
+   tu_shader_key_init(&key, stage_info, dev);

   void *pipeline_mem_ctx = ralloc_context(NULL);
-   nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info, MESA_SHADER_COMPUTE);
+
+   unsigned char pipeline_sha1[20];
+   tu_hash_compute(pipeline_sha1, stage_info, layout, &key, dev->compiler);
+
+   struct tu_compiled_shaders *compiled = NULL;

   const bool executable_info = pCreateInfo->flags &
      VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;

-   char *nir_initial_disasm = executable_info ?
-      nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
+   if (!executable_info)
+      compiled = tu_pipeline_cache_lookup(cache, pipeline_sha1, sizeof(pipeline_sha1));

-   struct tu_shader *shader =
-      tu_shader_create(dev, nir, stage_info, 0, layout, pAllocator);
-   if (!shader) {
-      result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      goto fail;
+   char *nir_initial_disasm = NULL;
+
+   if (!compiled) {
+      struct ir3_shader_key ir3_key = {};
+
+      nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info,
+                                        MESA_SHADER_COMPUTE);
+
+      nir_initial_disasm = executable_info ?
+         nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
+
+      struct tu_shader *shader =
+         tu_shader_create(dev, nir, &key, layout, pAllocator);
+      if (!shader) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      compiled = tu_shaders_init(dev, &pipeline_sha1, sizeof(pipeline_sha1));
+      if (!compiled) {
+         tu_shader_destroy(dev, shader, pAllocator);
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      compiled->active_desc_sets = shader->active_desc_sets;
+      compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts;
+
+      struct ir3_shader_variant *v =
+         ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info);
+
+      tu_shader_destroy(dev, shader, pAllocator);
+
+      if (!v) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+
+      compiled->variants[MESA_SHADER_COMPUTE] = v;
+
+      compiled = tu_pipeline_cache_insert(cache, compiled);
   }

-   pipeline->active_desc_sets = shader->active_desc_sets;
+   pipeline->active_desc_sets = compiled->active_desc_sets;

-   bool created;
-   struct ir3_shader_variant *v =
-      ir3_shader_get_variant(shader->ir3_shader, &key, false, executable_info, &created);
-   if (!v) {
-      result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      goto fail;
-   }
+   struct ir3_shader_variant *v = compiled->variants[MESA_SHADER_COMPUTE];

   tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE],
-                           shader, v);
+                           &compiled->push_consts[MESA_SHADER_COMPUTE], v);

-   result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, cache, v);
+   result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, v);
   if (result != VK_SUCCESS)
      goto fail;

@ -3574,14 +3911,14 @@ tu_compute_pipeline_create(VkDevice device,
   struct tu_cs prog_cs;
   uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v);
   tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs);
-   tu6_emit_cs_config(&prog_cs, shader, v, &pvtmem, shader_iova);
+   tu6_emit_cs_config(&prog_cs, v, &pvtmem, shader_iova);
   pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);

   tu6_emit_load_state(pipeline, layout, true);

   tu_append_executable(pipeline, v, nir_initial_disasm);

-   tu_shader_destroy(dev, shader, pAllocator);
+   vk_pipeline_cache_object_unref(&compiled->base);
   ralloc_free(pipeline_mem_ctx);

   *pPipeline = tu_pipeline_to_handle(pipeline);
@ -3589,8 +3926,8 @@ tu_compute_pipeline_create(VkDevice device,
   return VK_SUCCESS;

 fail:
-   if (shader)
-      tu_shader_destroy(dev, shader, pAllocator);
+   if (compiled)
+      vk_pipeline_cache_object_unref(&compiled->base);

   ralloc_free(pipeline_mem_ctx);

--- a/src/freedreno/vulkan/tu_pipeline_cache.c
+++ b/src/freedreno/vulkan/tu_pipeline_cache.c
@ -1,379 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "tu_private.h"
-
-#include "util/debug.h"
-#include "util/disk_cache.h"
-#include "util/mesa-sha1.h"
-#include "util/u_atomic.h"
-#include "vulkan/util/vk_util.h"
-
-struct cache_entry_variant_info
-{
-};
-
-struct cache_entry
-{
-   union {
-      unsigned char sha1[20];
-      uint32_t sha1_dw[5];
-   };
-   uint32_t code_sizes[MESA_SHADER_STAGES];
-   struct tu_shader_variant *variants[MESA_SHADER_STAGES];
-   char code[0];
-};
-
-static void
-tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
-                       struct tu_device *device)
-{
-   cache->device = device;
-   pthread_mutex_init(&cache->mutex, NULL);
-
-   cache->modified = false;
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-   cache->table_size = 1024;
-   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
-   cache->hash_table = malloc(byte_size);
-
-   /* We don't consider allocation failure fatal, we just start with a 0-sized
-    * cache. Disable caching when we want to keep shader debug info, since
-    * we don't get the debug info on cached shaders. */
-   if (cache->hash_table == NULL)
-      cache->table_size = 0;
-   else
-      memset(cache->hash_table, 0, byte_size);
-}
-
-static void
-tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
-{
-   for (unsigned i = 0; i < cache->table_size; ++i)
-      if (cache->hash_table[i]) {
-         vk_free(&cache->alloc, cache->hash_table[i]);
-      }
-   pthread_mutex_destroy(&cache->mutex);
-   free(cache->hash_table);
-}
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
-   size_t ret = sizeof(*entry);
-   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-      if (entry->code_sizes[i])
-         ret +=
-            sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
-   return ret;
-}
-
-static struct cache_entry *
-tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
-                                  const unsigned char *sha1)
-{
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) sha1);
-
-   if (cache->table_size == 0)
-      return NULL;
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      struct cache_entry *entry = cache->hash_table[index];
-
-      if (!entry)
-         return NULL;
-
-      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         return entry;
-      }
-   }
-
-   unreachable("hash table should never be full");
-}
-
-static struct cache_entry *
-tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
-                         const unsigned char *sha1)
-{
-   struct cache_entry *entry;
-
-   pthread_mutex_lock(&cache->mutex);
-
-   entry = tu_pipeline_cache_search_unlocked(cache, sha1);
-
-   pthread_mutex_unlock(&cache->mutex);
-
-   return entry;
-}
-
-static void
-tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
-                            struct cache_entry *entry)
-{
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = entry->sha1_dw[0];
-
-   /* We'll always be able to insert when we get here. */
-   assert(cache->kernel_count < cache->table_size / 2);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      if (!cache->hash_table[index]) {
-         cache->hash_table[index] = entry;
-         break;
-      }
-   }
-
-   cache->total_size += entry_size(entry);
-   cache->kernel_count++;
-}
-
-static VkResult
-tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
-{
-   const uint32_t table_size = cache->table_size * 2;
-   const uint32_t old_table_size = cache->table_size;
-   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-   struct cache_entry **table;
-   struct cache_entry **old_table = cache->hash_table;
-
-   table = malloc(byte_size);
-   if (table == NULL)
-      return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   cache->hash_table = table;
-   cache->table_size = table_size;
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-
-   memset(cache->hash_table, 0, byte_size);
-   for (uint32_t i = 0; i < old_table_size; i++) {
-      struct cache_entry *entry = old_table[i];
-      if (!entry)
-         continue;
-
-      tu_pipeline_cache_set_entry(cache, entry);
-   }
-
-   free(old_table);
-
-   return VK_SUCCESS;
-}
-
-static void
-tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
-                            struct cache_entry *entry)
-{
-   if (cache->kernel_count == cache->table_size / 2)
-      tu_pipeline_cache_grow(cache);
-
-   /* Failing to grow that hash table isn't fatal, but may mean we don't
-    * have enough space to add this new kernel. Only add it if there's room.
-    */
-   if (cache->kernel_count < cache->table_size / 2)
-      tu_pipeline_cache_set_entry(cache, entry);
-}
-
-static void
-tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
-                       const void *data,
-                       size_t size)
-{
-   struct tu_device *device = cache->device;
-   struct vk_pipeline_cache_header header;
-
-   if (size < sizeof(header))
-      return;
-   memcpy(&header, data, sizeof(header));
-   if (header.header_size < sizeof(header))
-      return;
-   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
-      return;
-   if (header.vendor_id != 0x5143)
-      return;
-   if (header.device_id != device->physical_device->dev_id.chip_id)
-      return;
-   if (memcmp(header.uuid, device->physical_device->cache_uuid,
-              VK_UUID_SIZE) != 0)
-      return;
-
-   char *end = (void *) data + size;
-   char *p = (void *) data + header.header_size;
-
-   while (end - p >= sizeof(struct cache_entry)) {
-      struct cache_entry *entry = (struct cache_entry *) p;
-      struct cache_entry *dest_entry;
-      size_t size = entry_size(entry);
-      if (end - p < size)
-         break;
-
-      dest_entry =
-         vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
-      if (dest_entry) {
-         memcpy(dest_entry, entry, size);
-         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-            dest_entry->variants[i] = NULL;
-         tu_pipeline_cache_add_entry(cache, dest_entry);
-      }
-      p += size;
-   }
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-tu_CreatePipelineCache(VkDevice _device,
-                       const VkPipelineCacheCreateInfo *pCreateInfo,
-                       const VkAllocationCallbacks *pAllocator,
-                       VkPipelineCache *pPipelineCache)
-{
-   TU_FROM_HANDLE(tu_device, device, _device);
-   struct tu_pipeline_cache *cache;
-
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
-   assert(pCreateInfo->flags == 0);
-
-   cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
-                           VK_OBJECT_TYPE_PIPELINE_CACHE);
-   if (cache == NULL)
-      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   if (pAllocator)
-      cache->alloc = *pAllocator;
-   else
-      cache->alloc = device->vk.alloc;
-
-   tu_pipeline_cache_init(cache, device);
-
-   if (pCreateInfo->initialDataSize > 0) {
-      tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
-                             pCreateInfo->initialDataSize);
-   }
-
-   *pPipelineCache = tu_pipeline_cache_to_handle(cache);
-
-   return VK_SUCCESS;
-}
-
-VKAPI_ATTR void VKAPI_CALL
-tu_DestroyPipelineCache(VkDevice _device,
-                        VkPipelineCache _cache,
-                        const VkAllocationCallbacks *pAllocator)
-{
-   TU_FROM_HANDLE(tu_device, device, _device);
-   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
-
-   if (!cache)
-      return;
-   tu_pipeline_cache_finish(cache);
-
-   vk_object_free(&device->vk, pAllocator, cache);
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-tu_GetPipelineCacheData(VkDevice _device,
-                        VkPipelineCache _cache,
-                        size_t *pDataSize,
-                        void *pData)
-{
-   TU_FROM_HANDLE(tu_device, device, _device);
-   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
-   struct vk_pipeline_cache_header *header;
-   VkResult result = VK_SUCCESS;
-
-   pthread_mutex_lock(&cache->mutex);
-
-   const size_t size = sizeof(*header) + cache->total_size;
-   if (pData == NULL) {
-      pthread_mutex_unlock(&cache->mutex);
-      *pDataSize = size;
-      return VK_SUCCESS;
-   }
-   if (*pDataSize < sizeof(*header)) {
-      pthread_mutex_unlock(&cache->mutex);
-      *pDataSize = 0;
-      return VK_INCOMPLETE;
-   }
-   void *p = pData, *end = pData + *pDataSize;
-   header = p;
-   header->header_size = sizeof(*header);
-   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
-   header->vendor_id = 0x5143;
-   header->device_id = device->physical_device->dev_id.chip_id;
-   memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
-   p += header->header_size;
-
-   struct cache_entry *entry;
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      if (!cache->hash_table[i])
-         continue;
-      entry = cache->hash_table[i];
-      const uint32_t size = entry_size(entry);
-      if (end < p + size) {
-         result = VK_INCOMPLETE;
-         break;
-      }
-
-      memcpy(p, entry, size);
-      for (int j = 0; j < MESA_SHADER_STAGES; ++j)
-         ((struct cache_entry *) p)->variants[j] = NULL;
-      p += size;
-   }
-   *pDataSize = p - pData;
-
-   pthread_mutex_unlock(&cache->mutex);
-   return result;
-}
-
-static void
-tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
-                        struct tu_pipeline_cache *src)
-{
-   for (uint32_t i = 0; i < src->table_size; i++) {
-      struct cache_entry *entry = src->hash_table[i];
-      if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
-         continue;
-
-      tu_pipeline_cache_add_entry(dst, entry);
-
-      src->hash_table[i] = NULL;
-   }
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-tu_MergePipelineCaches(VkDevice _device,
-                       VkPipelineCache destCache,
-                       uint32_t srcCacheCount,
-                       const VkPipelineCache *pSrcCaches)
-{
-   TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
-
-   for (uint32_t i = 0; i < srcCacheCount; i++) {
-      TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
-
-      tu_pipeline_cache_merge(dst, src);
-   }
-
-   return VK_SUCCESS;
-}
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@ -65,6 +65,7 @@
 #include "vk_log.h"
 #include "vk_physical_device.h"
 #include "vk_shader_module.h"
+#include "vk_pipeline_cache.h"
 #include "wsi_common.h"

 #include "ir3/ir3_compiler.h"
@ -239,11 +240,6 @@ struct tu_physical_device
   /* Address space and global fault count for this local_fd with DRM backend */
   uint64_t fault_count;

-   /* This is the drivers on-disk cache used as a fallback as opposed to
-    * the pipeline cache defined by apps.
-    */
-   struct disk_cache *disk_cache;
-
   struct tu_memory_heap heap;

   struct vk_sync_type syncobj_type;
@ -521,7 +517,7 @@ struct tu_device
   struct ir3_compiler *compiler;

   /* Backup in-memory cache to be used if the app doesn't provide one */
-   struct tu_pipeline_cache *mem_cache;
+   struct vk_pipeline_cache *mem_cache;

 #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */

@ -1367,6 +1363,24 @@ struct tu_shader
   bool multi_pos_output;
 };

+struct tu_shader_key {
+   unsigned multiview_mask;
+   enum ir3_wavesize_option api_wavesize, real_wavesize;
+};
+
+struct tu_compiled_shaders
+{
+   struct vk_pipeline_cache_object base;
+
+   struct tu_push_constant_range push_consts[MESA_SHADER_STAGES];
+   uint8_t active_desc_sets;
+   bool multi_pos_output;
+
+   struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
+};
+
+extern const struct vk_pipeline_cache_object_ops tu_shaders_ops;
+
 bool
 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
                       struct tu_device *dev);
@ -1380,8 +1394,7 @@ tu_spirv_to_nir(struct tu_device *dev,
 struct tu_shader *
 tu_shader_create(struct tu_device *dev,
                 nir_shader *nir,
-                 const VkPipelineShaderStageCreateInfo *stage_info,
-                 unsigned multiview_mask,
+                 const struct tu_shader_key *key,
                 struct tu_pipeline_layout *layout,
                 const VkAllocationCallbacks *alloc);

--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@ -700,8 +700,7 @@ tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
 struct tu_shader *
 tu_shader_create(struct tu_device *dev,
                 nir_shader *nir,
-                 const VkPipelineShaderStageCreateInfo *stage_info,
-                 unsigned multiview_mask,
+                 const struct tu_shader_key *key,
                 struct tu_pipeline_layout *layout,
                 const VkAllocationCallbacks *alloc)
 {
@ -729,7 +728,7 @@ tu_shader_create(struct tu_device *dev,
                      * sampling function. gl_Layer doesn't work when
                      * multiview is enabled.
                      */
-                     .use_view_id_for_layer = multiview_mask != 0,
+                     .use_view_id_for_layer = key->multiview_mask != 0,
                 });
   }

@ -740,8 +739,8 @@ tu_shader_create(struct tu_device *dev,
    */
   ir3_nir_lower_io_to_temporaries(nir);

-   if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
-      tu_nir_lower_multiview(nir, multiview_mask,
+   if (nir->info.stage == MESA_SHADER_VERTEX && key->multiview_mask) {
+      tu_nir_lower_multiview(nir, key->multiview_mask,
                             &shader->multi_pos_output, dev);
   }

@ -801,46 +800,11 @@ tu_shader_create(struct tu_device *dev,

   ir3_finalize_nir(dev->compiler, nir);

-   enum ir3_wavesize_option api_wavesize, real_wavesize;
-
-   if (stage_info) {
-      if (stage_info->flags &
-          VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
-         api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
-      } else {
-         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info =
-            vk_find_struct_const(stage_info->pNext,
-                                 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
-
-         if (size_info) {
-            if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
-               api_wavesize = IR3_SINGLE_ONLY;
-            } else {
-               assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
-               api_wavesize = IR3_DOUBLE_ONLY;
-            }
-         } else {
-            /* Match the exposed subgroupSize. */
-            api_wavesize = IR3_DOUBLE_ONLY;
-         }
-
-         if (stage_info->flags &
-             VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
-            real_wavesize = api_wavesize;
-         else if (api_wavesize == IR3_SINGLE_ONLY)
-            real_wavesize = IR3_SINGLE_ONLY;
-         else
-            real_wavesize = IR3_SINGLE_OR_DOUBLE;
-      }
-   } else {
-      api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
-   }
-
   shader->ir3_shader =
      ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
                           .reserved_user_consts = align(shader->push_consts.count, 4),
-                           .api_wavesize = api_wavesize,
-                           .real_wavesize = real_wavesize,
+                           .api_wavesize = key->api_wavesize,
+                           .real_wavesize = key->real_wavesize,
                          }, &so_info);

   return shader;