radv: Implement DGC generated command layout structure.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17269>
This commit is contained in:
Bas Nieuwenhuizen 2022-06-27 23:21:08 +02:00 committed by Marge Bot
parent 0c7bb92a78
commit 37a619f517
2 changed files with 226 additions and 0 deletions

View File

@ -26,6 +26,88 @@
#include "nir_builder.h"
static void
radv_get_sequence_size(const struct radv_indirect_command_layout *layout,
const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size,
uint32_t *upload_size)
{
*cmd_size = 0;
*upload_size = 0;
if (layout->bind_vbo_mask) {
*upload_size += 16 * util_bitcount(pipeline->vb_desc_usage_mask);
/* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */
*cmd_size += 3 * 4;
}
if (layout->push_constant_mask) {
bool need_copy = false;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->base.shaders); ++i) {
if (!pipeline->base.shaders[i])
continue;
struct radv_userdata_locations *locs = &pipeline->base.shaders[i]->info.user_sgprs_locs;
if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
/* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
*cmd_size += 3 * 4;
need_copy = true;
}
if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
/* One PKT3_SET_SH_REG writing all inline push constants. */
*cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4;
}
if (need_copy)
*upload_size +=
align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16);
}
if (layout->binds_index_buffer) {
/* Index type write (normal reg write) + index buffer base write (64-bits, but special packet
* so only 1 word overhead) + index buffer size (again, special packet so only 1 word
* overhead)
*/
*cmd_size += (3 + 3 + 2) * 4;
}
if (layout->indexed) {
/* userdata writes + instance count + indexed draw */
*cmd_size += (5 + 2 + 5) * 4;
} else {
/* userdata writes + instance count + non-indexed draw */
*cmd_size += (5 + 2 + 3) * 4;
}
if (layout->binds_state) {
/* One PKT3_SET_CONTEXT_REG (PA_SU_SC_MODE_CNTL) */
*cmd_size += 3 * 4;
if (pipeline->base.device->physical_device->rad_info.has_gfx9_scissor_bug) {
/* 1 reg write of 4 regs + 1 reg write of 2 regs per scissor */
*cmd_size += (8 + 2 * MAX_SCISSORS) * 4;
}
}
}
static uint32_t
radv_align_cmdbuf_size(uint32_t size)
{
return align(MAX2(1, size), 256);
}
uint32_t
radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info)
{
VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, cmd_info->pipeline);
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
uint32_t cmd_size, upload_size;
radv_get_sequence_size(layout, graphics_pipeline, &cmd_size, &upload_size);
return radv_align_cmdbuf_size(cmd_size * cmd_info->sequencesCount);
}
enum radv_dgc_token_type {
RADV_DGC_INDEX_BUFFER,
RADV_DGC_DRAW,
@ -920,3 +1002,117 @@ fail:
ralloc_free(cs);
return result;
}
VkResult
radv_CreateIndirectCommandsLayoutNV(VkDevice _device,
const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkIndirectCommandsLayoutNV *pIndirectCommandsLayout)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_indirect_command_layout *layout;
size_t size =
sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
layout =
vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout),
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!layout)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV);
layout->input_stride = pCreateInfo->pStreamStrides[0];
layout->token_count = pCreateInfo->tokenCount;
typed_memcpy(layout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount);
layout->ibo_type_32 = VK_INDEX_TYPE_UINT32;
layout->ibo_type_8 = VK_INDEX_TYPE_UINT8_EXT;
for (unsigned i = 0; i < pCreateInfo->tokenCount; ++i) {
switch (pCreateInfo->pTokens[i].tokenType) {
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
layout->indexed = true;
layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
layout->binds_index_buffer = true;
layout->index_buffer_offset = pCreateInfo->pTokens[i].offset;
/* 16-bit is implied if we find no match. */
for (unsigned j = 0; j < pCreateInfo->pTokens[i].indexTypeCount; j++) {
if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT32)
layout->ibo_type_32 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
else if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT8_EXT)
layout->ibo_type_8 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
}
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit;
layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] =
pCreateInfo->pTokens[i].offset;
if (pCreateInfo->pTokens[i].vertexDynamicStride)
layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= 1u << 15;
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
for (unsigned j = pCreateInfo->pTokens[i].pushconstantOffset / 4, k = 0;
k < pCreateInfo->pTokens[i].pushconstantSize / 4; ++j, ++k) {
layout->push_constant_mask |= 1ull << j;
layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4;
}
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV:
layout->binds_state = true;
layout->state_offset = pCreateInfo->pTokens[i].offset;
break;
default:
unreachable("Unhandled token type");
}
}
if (!layout->indexed)
layout->binds_index_buffer = false;
*pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout);
return VK_SUCCESS;
}
void
radv_DestroyIndirectCommandsLayoutNV(VkDevice _device,
VkIndirectCommandsLayoutNV indirectCommandsLayout,
const VkAllocationCallbacks *pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, indirectCommandsLayout);
if (!layout)
return;
vk_object_base_finish(&layout->base);
vk_free2(&device->vk.alloc, pAllocator, layout);
}
void
radv_GetGeneratedCommandsMemoryRequirementsNV(
VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
uint32_t cmd_stride, upload_stride;
radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount);
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
pMemoryRequirements->memoryRequirements.memoryTypeBits =
device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.alignment = 256;
pMemoryRequirements->memoryRequirements.size =
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
}

View File

@ -2991,6 +2991,34 @@ void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
const struct radv_barrier_data *barrier);
struct radv_indirect_command_layout {
struct vk_object_base base;
uint32_t input_stride;
uint32_t token_count;
bool indexed;
bool binds_index_buffer;
bool binds_state;
uint16_t draw_params_offset;
uint16_t index_buffer_offset;
uint16_t state_offset;
uint32_t bind_vbo_mask;
uint32_t vbo_offsets[MAX_VBS];
uint64_t push_constant_mask;
uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
uint32_t ibo_type_32;
uint32_t ibo_type_8;
VkIndirectCommandsLayoutTokenNV tokens[0];
};
uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
uint64_t radv_get_current_time(void);
static inline uint32_t
@ -3256,6 +3284,8 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView,
VK_OBJECT_TYPE_IMAGE_VIEW);
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
VK_OBJECT_TYPE_PIPELINE_CACHE)
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,