turnip: implement VK_KHR_shader_draw_parameters
Note: going by the blob, VFD_INDEX_OFFSET/FD_INSTANCE_START_OFFSET seem completely unused by indirect draws, so this changes them to only be set for non-indirect draws (and moves them to the vs_params draw state). Passes dEQP-VK.draw.shader_draw_parameters.* Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5635>
This commit is contained in:
parent
16a9e233da
commit
62de79ac44
|
@ -919,6 +919,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_GS_SIV_CNTL, 0x0000ffff);
|
||||
|
||||
/* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f);
|
||||
|
@ -2973,46 +2974,6 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
|
|||
return tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
|
||||
uint32_t first_instance,
|
||||
struct tu_cs_entry *entry)
|
||||
{
|
||||
/* TODO: fill out more than just base instance */
|
||||
const struct tu_program_descriptor_linkage *link =
|
||||
&cmd->state.pipeline->program.link[MESA_SHADER_VERTEX];
|
||||
const struct ir3_const_state *const_state = &link->const_state;
|
||||
struct tu_cs cs;
|
||||
|
||||
if (const_state->offsets.driver_param >= link->constlen) {
|
||||
*entry = (struct tu_cs_entry) {};
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 8, &cs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(const_state->offsets.driver_param) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
STATIC_ASSERT(IR3_DP_INSTID_BASE == 2);
|
||||
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, first_instance);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
*entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static struct tu_cs_entry
|
||||
tu6_emit_vertex_buffers(struct tu_cmd_buffer *cmd,
|
||||
const struct tu_pipeline *pipeline)
|
||||
|
@ -3156,9 +3117,7 @@ static VkResult
|
|||
tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
bool indexed,
|
||||
uint32_t vertex_offset,
|
||||
uint32_t first_instance,
|
||||
/* note: draw_count count is 0 for indirect */
|
||||
/* note: draw_count is 0 for indirect */
|
||||
uint32_t draw_count)
|
||||
{
|
||||
const struct tu_pipeline *pipeline = cmd->state.pipeline;
|
||||
|
@ -3171,10 +3130,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
|
||||
/* TODO lrz */
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VFD_INDEX_OFFSET(vertex_offset),
|
||||
A6XX_VFD_INSTANCE_START_OFFSET(first_instance));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0(
|
||||
.primitive_restart =
|
||||
pipeline->ia.primitive_restart && indexed,
|
||||
|
@ -3225,11 +3180,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
|
||||
cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline);
|
||||
|
||||
struct tu_cs_entry vs_params;
|
||||
result = tu6_emit_vs_params(cmd, first_instance, &vs_params);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
bool has_tess =
|
||||
pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
struct tu_cs_entry tess_consts = {};
|
||||
|
@ -3269,7 +3219,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) {
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i,
|
||||
|
@ -3306,7 +3256,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
|
||||
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
}
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
|
@ -3352,6 +3302,68 @@ tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel)
|
|||
return initiator;
|
||||
}
|
||||
|
||||
|
||||
static uint32_t
|
||||
vs_params_offset(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_program_descriptor_linkage *link =
|
||||
&cmd->state.pipeline->program.link[MESA_SHADER_VERTEX];
|
||||
const struct ir3_const_state *const_state = &link->const_state;
|
||||
|
||||
if (const_state->offsets.driver_param >= link->constlen)
|
||||
return 0;
|
||||
|
||||
/* this layout is required by CP_DRAW_INDIRECT_MULTI */
|
||||
STATIC_ASSERT(IR3_DP_DRAWID == 0);
|
||||
STATIC_ASSERT(IR3_DP_VTXID_BASE == 1);
|
||||
STATIC_ASSERT(IR3_DP_INSTID_BASE == 2);
|
||||
|
||||
/* 0 means disabled for CP_DRAW_INDIRECT_MULTI */
|
||||
assert(const_state->offsets.driver_param != 0);
|
||||
|
||||
return const_state->offsets.driver_param;
|
||||
}
|
||||
|
||||
static struct tu_draw_state
|
||||
tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
|
||||
uint32_t vertex_offset,
|
||||
uint32_t first_instance)
|
||||
{
|
||||
uint32_t offset = vs_params_offset(cmd);
|
||||
|
||||
struct tu_cs cs;
|
||||
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
cmd->record_result = result;
|
||||
return (struct tu_draw_state) {};
|
||||
}
|
||||
|
||||
/* TODO: don't make a new draw state when it doesn't change */
|
||||
|
||||
tu_cs_emit_regs(&cs,
|
||||
A6XX_VFD_INDEX_OFFSET(vertex_offset),
|
||||
A6XX_VFD_INSTANCE_START_OFFSET(first_instance));
|
||||
|
||||
if (offset) {
|
||||
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, vertex_offset);
|
||||
tu_cs_emit(&cs, first_instance);
|
||||
tu_cs_emit(&cs, 0);
|
||||
}
|
||||
|
||||
struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
|
||||
return (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdDraw(VkCommandBuffer commandBuffer,
|
||||
uint32_t vertexCount,
|
||||
|
@ -3362,7 +3374,9 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
|
|||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_draw_common(cmd, cs, false, firstVertex, firstInstance, vertexCount);
|
||||
cmd->state.vs_params = tu6_emit_vs_params(cmd, firstVertex, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, false, vertexCount);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
||||
|
@ -3381,7 +3395,9 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
|||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_draw_common(cmd, cs, true, vertexOffset, firstInstance, indexCount);
|
||||
cmd->state.vs_params = tu6_emit_vs_params(cmd, vertexOffset, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, true, indexCount);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
|
||||
|
@ -3403,13 +3419,25 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
|||
TU_FROM_HANDLE(tu_buffer, buf, _buffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_draw_common(cmd, cs, false, 0, 0, 0);
|
||||
cmd->state.vs_params = (struct tu_draw_state) {};
|
||||
|
||||
for (uint32_t i = 0; i < drawCount; i++) {
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT, 3);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
||||
tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset + stride * i);
|
||||
}
|
||||
tu6_draw_common(cmd, cs, false, 0);
|
||||
|
||||
/* workaround for a firmware bug with CP_DRAW_INDIRECT_MULTI, where it
|
||||
* doesn't wait for WFIs to be completed and leads to GPU fault/hang
|
||||
* TODO: this could be worked around in a more performant way,
|
||||
* or there may exist newer firmware that has been fixed
|
||||
*/
|
||||
if (cmd->device->physical_device->gpu_id != 650)
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 6);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
||||
tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL) |
|
||||
A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
|
||||
tu_cs_emit(cs, drawCount);
|
||||
tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset);
|
||||
tu_cs_emit(cs, stride);
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
|
||||
}
|
||||
|
@ -3425,15 +3453,27 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
|||
TU_FROM_HANDLE(tu_buffer, buf, _buffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_draw_common(cmd, cs, true, 0, 0, 0);
|
||||
cmd->state.vs_params = (struct tu_draw_state) {};
|
||||
|
||||
for (uint32_t i = 0; i < drawCount; i++) {
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_INDIRECT, 6);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
|
||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||
tu_cs_emit(cs, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(cmd->state.max_index_count));
|
||||
tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset + stride * i);
|
||||
}
|
||||
tu6_draw_common(cmd, cs, true, 0);
|
||||
|
||||
/* workaround for a firmware bug with CP_DRAW_INDIRECT_MULTI, where it
|
||||
* doesn't wait for WFIs to be completed and leads to GPU fault/hang
|
||||
* TODO: this could be worked around in a more performant way,
|
||||
* or there may exist newer firmware that has been fixed
|
||||
*/
|
||||
if (cmd->device->physical_device->gpu_id != 650)
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDIRECT_MULTI, 9);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
|
||||
tu_cs_emit(cs, A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED) |
|
||||
A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
|
||||
tu_cs_emit(cs, drawCount);
|
||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||
tu_cs_emit_qw(cs, buf->bo->iova + buf->bo_offset + offset);
|
||||
tu_cs_emit(cs, stride);
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
|
||||
}
|
||||
|
@ -3450,7 +3490,9 @@ void tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
|
|||
TU_FROM_HANDLE(tu_buffer, buf, _counterBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_draw_common(cmd, cs, false, 0, firstInstance, 0);
|
||||
cmd->state.vs_params = tu6_emit_vs_params(cmd, 0, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, false, 0);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_AUTO, 6);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_XFB));
|
||||
|
|
|
@ -590,8 +590,8 @@ tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
|
|||
.sampleRateShading = true,
|
||||
.dualSrcBlend = true,
|
||||
.logicOp = true,
|
||||
.multiDrawIndirect = false,
|
||||
.drawIndirectFirstInstance = false,
|
||||
.multiDrawIndirect = true,
|
||||
.drawIndirectFirstInstance = true,
|
||||
.depthClamp = true,
|
||||
.depthBiasClamp = false,
|
||||
.fillModeNonSolid = false,
|
||||
|
@ -636,6 +636,22 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
vk_foreach_struct(ext, pFeatures->pNext)
|
||||
{
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES:
|
||||
*((VkPhysicalDeviceVulkan11Features*) ext) = (VkPhysicalDeviceVulkan11Features) {
|
||||
.storageBuffer16BitAccess = false,
|
||||
.uniformAndStorageBuffer16BitAccess = false,
|
||||
.storagePushConstant16 = false,
|
||||
.storageInputOutput16 = false,
|
||||
.multiview = false,
|
||||
.multiviewGeometryShader = false,
|
||||
.multiviewTessellationShader = false,
|
||||
.variablePointersStorageBuffer = false,
|
||||
.variablePointers = false,
|
||||
.protectedMemory = false,
|
||||
.samplerYcbcrConversion = true,
|
||||
.shaderDrawParameters = true,
|
||||
};
|
||||
break;
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
|
||||
VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext;
|
||||
features->variablePointersStorageBuffer = false;
|
||||
|
@ -653,7 +669,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
|
||||
VkPhysicalDeviceShaderDrawParametersFeatures *features =
|
||||
(VkPhysicalDeviceShaderDrawParametersFeatures *) ext;
|
||||
features->shaderDrawParameters = false;
|
||||
features->shaderDrawParameters = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
|
||||
|
|
|
@ -81,6 +81,7 @@ EXTENSIONS = [
|
|||
Extension('VK_EXT_filter_cubic', 1, 'device->gpu_id == 650'),
|
||||
Extension('VK_EXT_index_type_uint8', 1, True),
|
||||
Extension('VK_EXT_vertex_attribute_divisor', 1, True),
|
||||
Extension('VK_KHR_shader_draw_parameters', 1, True),
|
||||
]
|
||||
|
||||
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
|
||||
|
|
|
@ -815,6 +815,8 @@ struct tu_cmd_state
|
|||
struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
|
||||
struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib;
|
||||
|
||||
struct tu_draw_state vs_params;
|
||||
|
||||
/* Index buffer */
|
||||
uint64_t index_va;
|
||||
uint32_t max_index_count;
|
||||
|
|
|
@ -48,6 +48,7 @@ tu_spirv_to_nir(struct ir3_compiler *compiler,
|
|||
.caps = {
|
||||
.transform_feedback = true,
|
||||
.tessellation = true,
|
||||
.draw_parameters = true,
|
||||
},
|
||||
};
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
|
|
Loading…
Reference in New Issue