mesa/src/amd/vulkan/radv_pipeline_rt.c

470 lines
19 KiB
C

/*
* Copyright © 2021 Google
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "nir/nir.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
static VkRayTracingPipelineCreateInfoKHR
radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
{
VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo;
uint32_t total_stages = pCreateInfo->stageCount;
uint32_t total_groups = pCreateInfo->groupCount;
if (pCreateInfo->pLibraryInfo) {
for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
total_stages += library_pipeline->stage_count;
total_groups += library_pipeline->group_count;
}
}
VkPipelineShaderStageCreateInfo *stages = NULL;
VkRayTracingShaderGroupCreateInfoKHR *groups = NULL;
local_create_info.stageCount = total_stages;
local_create_info.groupCount = total_groups;
local_create_info.pStages = stages =
malloc(sizeof(VkPipelineShaderStageCreateInfo) * total_stages);
local_create_info.pGroups = groups =
malloc(sizeof(VkRayTracingShaderGroupCreateInfoKHR) * total_groups);
if (!local_create_info.pStages || !local_create_info.pGroups)
return local_create_info;
total_stages = pCreateInfo->stageCount;
total_groups = pCreateInfo->groupCount;
for (unsigned j = 0; j < pCreateInfo->stageCount; ++j)
stages[j] = pCreateInfo->pStages[j];
for (unsigned j = 0; j < pCreateInfo->groupCount; ++j)
groups[j] = pCreateInfo->pGroups[j];
if (pCreateInfo->pLibraryInfo) {
for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
for (unsigned j = 0; j < library_pipeline->stage_count; ++j)
stages[total_stages + j] = library_pipeline->stages[j];
for (unsigned j = 0; j < library_pipeline->group_count; ++j) {
VkRayTracingShaderGroupCreateInfoKHR *dst = &groups[total_groups + j];
*dst = library_pipeline->groups[j];
if (dst->generalShader != VK_SHADER_UNUSED_KHR)
dst->generalShader += total_stages;
if (dst->closestHitShader != VK_SHADER_UNUSED_KHR)
dst->closestHitShader += total_stages;
if (dst->anyHitShader != VK_SHADER_UNUSED_KHR)
dst->anyHitShader += total_stages;
if (dst->intersectionShader != VK_SHADER_UNUSED_KHR)
dst->intersectionShader += total_stages;
}
total_stages += library_pipeline->stage_count;
total_groups += library_pipeline->group_count;
}
}
return local_create_info;
}
static void
vk_shader_module_finish(void *_module)
{
struct vk_shader_module *module = _module;
vk_object_base_finish(&module->base);
}
static VkResult
radv_rt_pipeline_library_create(VkDevice _device, VkPipelineCache _cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_library_pipeline *pipeline;
pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_LIBRARY);
pipeline->ctx = ralloc_context(NULL);
VkRayTracingPipelineCreateInfoKHR local_create_info =
radv_create_merged_rt_create_info(pCreateInfo);
if (!local_create_info.pStages || !local_create_info.pGroups)
goto fail;
if (local_create_info.stageCount) {
pipeline->stage_count = local_create_info.stageCount;
size_t size = sizeof(VkPipelineShaderStageCreateInfo) * local_create_info.stageCount;
pipeline->stages = ralloc_size(pipeline->ctx, size);
if (!pipeline->stages)
goto fail;
memcpy(pipeline->stages, local_create_info.pStages, size);
pipeline->hashes =
ralloc_size(pipeline->ctx, sizeof(*pipeline->hashes) * local_create_info.stageCount);
if (!pipeline->hashes)
goto fail;
pipeline->identifiers =
ralloc_size(pipeline->ctx, sizeof(*pipeline->identifiers) * local_create_info.stageCount);
if (!pipeline->identifiers)
goto fail;
for (uint32_t i = 0; i < local_create_info.stageCount; i++) {
RADV_FROM_HANDLE(vk_shader_module, module, pipeline->stages[i].module);
const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
vk_find_struct_const(local_create_info.pStages[i].pNext,
PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
if (module) {
struct vk_shader_module *new_module =
ralloc_size(pipeline->ctx, sizeof(struct vk_shader_module) + module->size);
if (!new_module)
goto fail;
ralloc_set_destructor(new_module, vk_shader_module_finish);
vk_object_base_init(&device->vk, &new_module->base, VK_OBJECT_TYPE_SHADER_MODULE);
new_module->nir = NULL;
memcpy(new_module->sha1, module->sha1, sizeof(module->sha1));
new_module->size = module->size;
memcpy(new_module->data, module->data, module->size);
const VkSpecializationInfo *spec = pipeline->stages[i].pSpecializationInfo;
if (spec) {
VkSpecializationInfo *new_spec = ralloc(pipeline->ctx, VkSpecializationInfo);
if (!new_spec)
goto fail;
new_spec->mapEntryCount = spec->mapEntryCount;
uint32_t map_entries_size = sizeof(VkSpecializationMapEntry) * spec->mapEntryCount;
new_spec->pMapEntries = ralloc_size(pipeline->ctx, map_entries_size);
if (!new_spec->pMapEntries)
goto fail;
memcpy((void *)new_spec->pMapEntries, spec->pMapEntries, map_entries_size);
new_spec->dataSize = spec->dataSize;
new_spec->pData = ralloc_size(pipeline->ctx, spec->dataSize);
if (!new_spec->pData)
goto fail;
memcpy((void *)new_spec->pData, spec->pData, spec->dataSize);
pipeline->stages[i].pSpecializationInfo = new_spec;
}
pipeline->stages[i].module = vk_shader_module_to_handle(new_module);
pipeline->stages[i].pName = ralloc_strdup(pipeline->ctx, pipeline->stages[i].pName);
if (!pipeline->stages[i].pName)
goto fail;
pipeline->stages[i].pNext = NULL;
} else {
assert(iinfo);
pipeline->identifiers[i].identifierSize =
MIN2(iinfo->identifierSize, sizeof(pipeline->hashes[i].sha1));
memcpy(pipeline->hashes[i].sha1, iinfo->pIdentifier,
pipeline->identifiers[i].identifierSize);
pipeline->stages[i].module = VK_NULL_HANDLE;
pipeline->stages[i].pNext = &pipeline->identifiers[i];
pipeline->identifiers[i].sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT;
pipeline->identifiers[i].pNext = NULL;
pipeline->identifiers[i].pIdentifier = pipeline->hashes[i].sha1;
}
}
}
if (local_create_info.groupCount) {
size_t size = sizeof(VkRayTracingShaderGroupCreateInfoKHR) * local_create_info.groupCount;
pipeline->group_count = local_create_info.groupCount;
pipeline->groups = ralloc_size(pipeline->ctx, size);
if (!pipeline->groups)
goto fail;
memcpy(pipeline->groups, local_create_info.pGroups, size);
}
*pPipeline = radv_pipeline_to_handle(&pipeline->base);
free((void *)local_create_info.pGroups);
free((void *)local_create_info.pStages);
return VK_SUCCESS;
fail:
ralloc_free(pipeline->ctx);
free((void *)local_create_info.pGroups);
free((void *)local_create_info.pStages);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
bool
radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
{
if (!pCreateInfo->pDynamicState)
return false;
for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
return true;
}
return false;
}
static struct radv_pipeline_key
radv_generate_rt_pipeline_key(const struct radv_ray_tracing_pipeline *pipeline,
VkPipelineCreateFlags flags)
{
struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base.base, flags);
key.cs.compute_subgroup_size = pipeline->base.base.device->physical_device->rt_wave_size;
return key;
}
static VkResult
radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
VkResult result;
struct radv_ray_tracing_pipeline *rt_pipeline = NULL;
uint8_t hash[20];
nir_shader *shader = NULL;
bool keep_statistic_info =
(pCreateInfo->flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
return radv_rt_pipeline_library_create(_device, _cache, pCreateInfo, pAllocator, pPipeline);
VkRayTracingPipelineCreateInfoKHR local_create_info =
radv_create_merged_rt_create_info(pCreateInfo);
if (!local_create_info.pStages || !local_create_info.pGroups) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info));
struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE};
VkPipelineShaderStageCreateInfo stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = NULL,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_to_handle(&module),
.pName = "main",
};
VkPipelineCreateFlags flags =
pCreateInfo->flags | VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT;
rt_pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*rt_pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (rt_pipeline == NULL) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
radv_pipeline_init(device, &rt_pipeline->base.base, RADV_PIPELINE_RAY_TRACING);
rt_pipeline->group_count = local_create_info.groupCount;
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
struct radv_pipeline_key key = radv_generate_rt_pipeline_key(rt_pipeline, pCreateInfo->flags);
UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE;
/* First check if we can get things from the cache before we take the expensive step of
* generating the nir. */
result = radv_create_shaders(
&rt_pipeline->base.base, pipeline_layout, device, cache, &key, &stage, 1, flags, hash,
creation_feedback, &rt_pipeline->stack_sizes, &rt_pipeline->group_count, &last_vgt_api_stage);
if (result != VK_SUCCESS && result != VK_PIPELINE_COMPILE_REQUIRED)
goto pipeline_fail;
if (result == VK_PIPELINE_COMPILE_REQUIRED) {
if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
goto pipeline_fail;
rt_pipeline->stack_sizes =
calloc(sizeof(*rt_pipeline->stack_sizes), local_create_info.groupCount);
if (!rt_pipeline->stack_sizes) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto pipeline_fail;
}
shader = create_rt_shader(device, &local_create_info, rt_pipeline->stack_sizes);
module.nir = shader;
result = radv_create_shaders(&rt_pipeline->base.base, pipeline_layout, device, cache, &key,
&stage, 1, pCreateInfo->flags, hash, creation_feedback,
&rt_pipeline->stack_sizes, &rt_pipeline->group_count,
&last_vgt_api_stage);
if (result != VK_SUCCESS)
goto shader_fail;
}
radv_compute_pipeline_init(&rt_pipeline->base, pipeline_layout);
rt_pipeline->group_handles =
calloc(sizeof(*rt_pipeline->group_handles), local_create_info.groupCount);
if (!rt_pipeline->group_handles) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto shader_fail;
}
rt_pipeline->dynamic_stack_size = radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo);
/* For General and ClosestHit shaders, we can use the shader ID directly as handle.
* As (potentially different) AnyHit shaders are inlined, for Intersection shaders
* we use the Group ID.
*/
for (unsigned i = 0; i < local_create_info.groupCount; ++i) {
const VkRayTracingShaderGroupCreateInfoKHR *group_info = &local_create_info.pGroups[i];
switch (group_info->type) {
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
if (group_info->generalShader != VK_SHADER_UNUSED_KHR)
rt_pipeline->group_handles[i].general_index = group_info->generalShader + 2;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
rt_pipeline->group_handles[i].closest_hit_index = group_info->closestHitShader + 2;
if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR)
rt_pipeline->group_handles[i].intersection_index = i + 2;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
rt_pipeline->group_handles[i].closest_hit_index = group_info->closestHitShader + 2;
if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
rt_pipeline->group_handles[i].any_hit_index = i + 2;
break;
case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
}
if (pCreateInfo->flags &
VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR) {
if (group_info->pShaderGroupCaptureReplayHandle &&
memcmp(group_info->pShaderGroupCaptureReplayHandle, &rt_pipeline->group_handles[i],
sizeof(rt_pipeline->group_handles[i])) != 0) {
result = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
goto shader_fail;
}
}
}
*pPipeline = radv_pipeline_to_handle(&rt_pipeline->base.base);
shader_fail:
ralloc_free(shader);
pipeline_fail:
if (result != VK_SUCCESS)
radv_pipeline_destroy(device, &rt_pipeline->base.base, pAllocator);
fail:
free((void *)local_create_info.pGroups);
free((void *)local_create_info.pStages);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
VkPipelineCache pipelineCache, uint32_t count,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
{
VkResult result = VK_SUCCESS;
unsigned i = 0;
for (; i < count; i++) {
VkResult r;
r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
&pPipelines[i]);
if (r != VK_SUCCESS) {
result = r;
pPipelines[i] = VK_NULL_HANDLE;
if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
break;
}
}
for (; i < count; ++i)
pPipelines[i] = VK_NULL_HANDLE;
if (result == VK_SUCCESS && deferredOperation != VK_NULL_HANDLE)
return VK_OPERATION_NOT_DEFERRED_KHR;
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
uint32_t groupCount, size_t dataSize, void *pData)
{
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
char *data = pData;
STATIC_ASSERT(sizeof(*rt_pipeline->group_handles) <= RADV_RT_HANDLE_SIZE);
memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
for (uint32_t i = 0; i < groupCount; ++i) {
memcpy(data + i * RADV_RT_HANDLE_SIZE, &rt_pipeline->group_handles[firstGroup + i],
sizeof(*rt_pipeline->group_handles));
}
return VK_SUCCESS;
}
VKAPI_ATTR VkDeviceSize VKAPI_CALL
radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group,
VkShaderGroupShaderKHR groupShader)
{
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
const struct radv_pipeline_shader_stack_size *stack_size = &rt_pipeline->stack_sizes[group];
if (groupShader == VK_SHADER_GROUP_SHADER_ANY_HIT_KHR ||
groupShader == VK_SHADER_GROUP_SHADER_INTERSECTION_KHR)
return stack_size->non_recursive_size;
else
return stack_size->recursive_size;
}
VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline pipeline,
uint32_t firstGroup, uint32_t groupCount,
size_t dataSize, void *pData)
{
return radv_GetRayTracingShaderGroupHandlesKHR(device, pipeline, firstGroup, groupCount,
dataSize, pData);
}