radv: Add caching for RT pipelines.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12592>
This commit is contained in:
Bas Nieuwenhuizen 2021-08-27 04:20:59 +02:00 committed by Marge Bot
parent a22a4162d9
commit ca2d96db51
5 changed files with 139 additions and 49 deletions

View File

@ -212,8 +212,8 @@ radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
radv_pipeline_destroy(device, pipeline, pAllocator);
}
static uint32_t
get_hash_flags(const struct radv_device *device, bool stats)
uint32_t
radv_get_hash_flags(const struct radv_device *device, bool stats)
{
uint32_t hash_flags = 0;
@ -3348,7 +3348,7 @@ VkResult
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
struct radv_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key,
const VkPipelineShaderStageCreateInfo **pStages,
const VkPipelineCreateFlags flags,
const VkPipelineCreateFlags flags, const uint8_t *custom_hash,
VkPipelineCreationFeedbackEXT *pipeline_feedback,
VkPipelineCreationFeedbackEXT **stage_feedbacks)
{
@ -3368,6 +3368,9 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
device->keep_shader_info;
bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
struct radv_pipeline_shader_stack_size **stack_sizes =
pipeline->type == RADV_PIPELINE_COMPUTE ? &pipeline->compute.rt_stack_sizes : NULL;
uint32_t *num_stack_sizes = stack_sizes ? &pipeline->compute.group_count : NULL;
radv_start_feedback(pipeline_feedback);
@ -3384,8 +3387,12 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
}
}
radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
get_hash_flags(device, keep_statistic_info));
if (custom_hash)
memcpy(hash, custom_hash, 20);
else {
radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
radv_get_hash_flags(device, keep_statistic_info));
}
memcpy(gs_copy_hash, hash, 20);
gs_copy_hash[0] ^= 1;
@ -3394,13 +3401,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
bool found_in_application_cache = true;
if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) {
struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
&found_in_application_cache);
radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants, NULL,
NULL, &found_in_application_cache);
pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
}
if (!keep_executable_info &&
radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
stack_sizes, num_stack_sizes,
&found_in_application_cache) &&
(!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
radv_stop_feedback(pipeline_feedback, found_in_application_cache);
@ -3629,7 +3637,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries);
radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries,
NULL, 0);
pipeline->gs_copy_shader = gs_variants[MESA_SHADER_GEOMETRY];
}
@ -3698,7 +3707,9 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
}
if (!keep_executable_info) {
radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries);
radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries,
stack_sizes ? *stack_sizes : NULL,
num_stack_sizes ? *num_stack_sizes : 0);
}
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
@ -5519,7 +5530,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
struct radv_pipeline_key key =
radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend);
result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, NULL,
pipeline_feedback, stage_feedbacks);
if (result != VK_SUCCESS)
return result;
@ -5746,7 +5757,9 @@ radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
VkResult
radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash,
struct radv_pipeline_shader_stack_size *rt_stack_sizes,
uint32_t rt_group_count, VkPipeline *pPipeline)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
@ -5759,8 +5772,10 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
if (pipeline == NULL) {
free(rt_stack_sizes);
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
pipeline->type = RADV_PIPELINE_COMPUTE;
@ -5768,6 +5783,8 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
pipeline->device = device;
pipeline->graphics.last_vgt_api_stage = MESA_SHADER_NONE;
pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
pipeline->compute.rt_stack_sizes = rt_stack_sizes;
pipeline->compute.group_count = rt_group_count;
assert(pipeline->layout);
const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
@ -5784,7 +5801,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
pipeline_feedback, stage_feedbacks);
custom_hash, pipeline_feedback, stage_feedbacks);
if (result != VK_SUCCESS) {
radv_pipeline_destroy(device, pipeline, pAllocator);
return result;
@ -5813,8 +5830,8 @@ radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uin
unsigned i = 0;
for (; i < count; i++) {
VkResult r;
r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
&pPipelines[i]);
r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL,
NULL, 0, &pPipelines[i]);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;

View File

@ -37,6 +37,7 @@ struct cache_entry {
uint32_t sha1_dw[5];
};
uint32_t binary_sizes[MESA_SHADER_STAGES];
uint32_t num_stack_sizes;
struct radv_shader_variant *variants[MESA_SHADER_STAGES];
char code[0];
};
@ -139,6 +140,39 @@ radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **s
_mesa_sha1_final(&ctx, hash);
}
void
radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
uint32_t flags)
{
RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) {
RADV_FROM_HANDLE(vk_shader_module, module, pCreateInfo->pStages[i].module);
const VkSpecializationInfo *spec_info = pCreateInfo->pStages[i].pSpecializationInfo;
_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
_mesa_sha1_update(&ctx, pCreateInfo->pStages[i].pName, strlen(pCreateInfo->pStages[i].pName));
if (spec_info && spec_info->mapEntryCount) {
_mesa_sha1_update(&ctx, spec_info->pMapEntries,
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
_mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
}
}
_mesa_sha1_update(&ctx, pCreateInfo->pGroups,
pCreateInfo->groupCount * sizeof(*pCreateInfo->pGroups));
if (!radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
_mesa_sha1_update(&ctx, &pCreateInfo->maxPipelineRayRecursionDepth, 4);
_mesa_sha1_update(&ctx, &flags, 4);
_mesa_sha1_final(&ctx, hash);
}
static struct cache_entry *
radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1)
{
@ -253,11 +287,10 @@ radv_is_cache_disabled(struct radv_device *device)
}
bool
radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
struct radv_pipeline_cache *cache,
const unsigned char *sha1,
struct radv_shader_variant **variants,
bool *found_in_application_cache)
radv_create_shader_variants_from_pipeline_cache(
struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
struct radv_shader_variant **variants, struct radv_pipeline_shader_stack_size **stack_sizes,
uint32_t *num_stack_sizes, bool *found_in_application_cache)
{
struct cache_entry *entry;
@ -325,6 +358,14 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
memcpy(variants, entry->variants, sizeof(entry->variants));
if (num_stack_sizes) {
*num_stack_sizes = entry->num_stack_sizes;
if (entry->num_stack_sizes) {
*stack_sizes = malloc(entry->num_stack_sizes * sizeof(**stack_sizes));
memcpy(*stack_sizes, p, entry->num_stack_sizes * sizeof(**stack_sizes));
}
}
if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache)
vk_free(&cache->alloc, entry);
else {
@ -340,7 +381,9 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
void
radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,
const unsigned char *sha1, struct radv_shader_variant **variants,
struct radv_shader_binary *const *binaries)
struct radv_shader_binary *const *binaries,
const struct radv_pipeline_shader_stack_size *stack_sizes,
uint32_t num_stack_sizes)
{
if (!cache)
cache = device->mem_cache;
@ -370,7 +413,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
return;
}
size_t size = sizeof(*entry);
size_t size = sizeof(*entry) + sizeof(*stack_sizes) * num_stack_sizes;
for (int i = 0; i < MESA_SHADER_STAGES; ++i)
if (variants[i])
size += binaries[i]->total_size;
@ -398,6 +441,12 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
p += binaries[i]->total_size;
}
if (num_stack_sizes) {
memcpy(p, stack_sizes, sizeof(*stack_sizes) * num_stack_sizes);
p += sizeof(*stack_sizes) * num_stack_sizes;
}
entry->num_stack_sizes = num_stack_sizes;
// Make valgrind happy by filling the alignment hole at the end.
assert(p == (char *)entry + size_without_align);
assert(sizeof(*entry) + (p - entry->code) == size_without_align);

View File

@ -22,6 +22,7 @@
*/
#include "radv_acceleration_structure.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
@ -1899,6 +1900,11 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
VkResult result;
struct radv_pipeline *pipeline = NULL;
struct radv_pipeline_shader_stack_size *stack_sizes = NULL;
uint8_t hash[20];
nir_shader *shader = NULL;
bool keep_statistic_info =
(pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline);
@ -1910,30 +1916,44 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
goto fail;
}
stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount);
if (!stack_sizes) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info));
struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
nir_shader *shader = create_rt_shader(device, &local_create_info, stack_sizes);
VkComputePipelineCreateInfo compute_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = NULL,
.flags = pCreateInfo->flags,
.flags = pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT,
.stage =
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_handle_from_nir(shader),
.module = vk_shader_module_to_handle(&module),
.pName = "main",
},
.layout = pCreateInfo->layout,
};
result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, pPipeline);
if (result != VK_SUCCESS)
goto shader_fail;
/* First check if we can get things from the cache before we take the expensive step of
* generating the nir. */
result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
stack_sizes, local_create_info.groupCount, pPipeline);
if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) {
stack_sizes = calloc(sizeof(*stack_sizes), local_create_info.groupCount);
if (!stack_sizes) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
shader = create_rt_shader(device, &local_create_info, stack_sizes);
module.nir = shader;
compute_info.flags = pCreateInfo->flags;
result = radv_compute_pipeline_create(_device, _cache, &compute_info, pAllocator, hash,
stack_sizes, local_create_info.groupCount, pPipeline);
stack_sizes = NULL;
if (result != VK_SUCCESS)
goto shader_fail;
}
pipeline = radv_pipeline_from_handle(*pPipeline);
pipeline->compute.rt_group_handles =
@ -1943,10 +1963,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
goto shader_fail;
}
pipeline->compute.rt_stack_sizes = stack_sizes;
stack_sizes = NULL;
pipeline->compute.dynamic_stack_size = has_dynamic_stack_size(pCreateInfo);
pipeline->compute.dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
for (unsigned i = 0; i < local_create_info.groupCount; ++i) {
const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i];

View File

@ -375,22 +375,21 @@ struct radv_pipeline_key {
struct radv_shader_binary;
struct radv_shader_variant;
struct radv_pipeline_shader_stack_size;
void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
struct radv_pipeline_cache *cache,
const unsigned char *sha1,
struct radv_shader_variant **variants,
bool *found_in_application_cache);
bool radv_create_shader_variants_from_pipeline_cache(
struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
struct radv_shader_variant **variants, struct radv_pipeline_shader_stack_size **stack_sizes,
uint32_t *num_stack_sizes, bool *found_in_application_cache);
void radv_pipeline_cache_insert_shaders(struct radv_device *device,
struct radv_pipeline_cache *cache,
const unsigned char *sha1,
struct radv_shader_variant **variants,
struct radv_shader_binary *const *binaries);
void radv_pipeline_cache_insert_shaders(
struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
struct radv_shader_variant **variants, struct radv_shader_binary *const *binaries,
const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
enum radv_blit_ds_layout {
RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
@ -1690,6 +1689,11 @@ void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInf
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *key, uint32_t flags);
void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
uint32_t flags);
uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
#define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
@ -1819,6 +1823,7 @@ struct radv_pipeline {
struct radv_pipeline_group_handle *rt_group_handles;
struct radv_pipeline_shader_stack_size *rt_stack_sizes;
bool dynamic_stack_size;
uint32_t group_count;
} compute;
struct {
unsigned stage_count;
@ -1878,7 +1883,9 @@ VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline);
const uint8_t *custom_hash,
struct radv_pipeline_shader_stack_size *rt_stack_sizes,
uint32_t rt_group_count, VkPipeline *pPipeline);
void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
const VkAllocationCallbacks *allocator);

View File

@ -449,7 +449,7 @@ void radv_destroy_shader_slabs(struct radv_device *device);
VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key,
const VkPipelineShaderStageCreateInfo **pStages,
const VkPipelineCreateFlags flags,
const VkPipelineCreateFlags flags, const uint8_t *custom_hash,
VkPipelineCreationFeedbackEXT *pipeline_feedback,
VkPipelineCreationFeedbackEXT **stage_feedbacks);