turnip: implement VK_EXT_extended_dynamic_state

Passes dEQP-VK.pipeline.extended_dynamic_state.*

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5641>
This commit is contained in:
Jonathan Marek 2020-09-17 10:16:42 -04:00 committed by Marge Bot
parent b2fa2d99ae
commit dcba32bac0
5 changed files with 452 additions and 109 deletions

View File

@ -464,6 +464,8 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
break;
}
STATIC_ASSERT(TU_DRAW_STATE_COUNT <= 32);
/* We need to reload the descriptors every time the descriptor sets
* change. However, the commands we send only depend on the pipeline
* because the whole point is to cache descriptors which are used by the
@ -1567,6 +1569,19 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
uint32_t bindingCount,
const VkBuffer *pBuffers,
const VkDeviceSize *pOffsets)
{
tu_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount,
pBuffers, pOffsets, NULL, NULL);
}
void
tu_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes,
const VkDeviceSize* pStrides)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs;
@ -1577,7 +1592,9 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
struct tu_buffer *buf = tu_buffer_from_handle(pBuffers[i]);
cmd->state.vb[firstBinding + i].base = tu_buffer_iova(buf) + pOffsets[i];
cmd->state.vb[firstBinding + i].size = buf->size - pOffsets[i];
cmd->state.vb[firstBinding + i].size = pSizes ? pSizes[i] : (buf->size - pOffsets[i]);
if (pStrides)
cmd->state.vb[firstBinding + i].stride = pStrides[i];
}
for (uint32_t i = 0; i < MAX_VBS; i++) {
@ -1588,6 +1605,16 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
}
cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
if (pStrides) {
cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].iova =
tu_cs_draw_state(&cmd->sub_cs, &cs, 2 * MAX_VBS).iova;
for (uint32_t i = 0; i < MAX_VBS; i++)
tu_cs_emit_regs(&cs, A6XX_VFD_FETCH_STRIDE(i, cmd->state.vb[i].stride));
cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
}
}
void
@ -1985,29 +2012,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
uint32_t i;
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (6 + util_bitcount(mask)));
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state);
for_each_bit(i, mask)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
/* dynamic linewidth state depends pipeline state's gras_su_cntl
* so the dynamic state ib must be updated when pipeline changes
*/
if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) {
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl;
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
}
/* the vertex_buffers draw state always contains all the currently
* bound vertex buffers. update its size to only emit the vbs which
* are actually used by the pipeline
@ -2018,6 +2033,38 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
cmd->state.vertex_buffers.size = pipeline->num_vbs * 4;
cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
}
if ((pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE)) &&
cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size != pipeline->num_vbs * 2) {
cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size = pipeline->num_vbs * 2;
cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
}
#define UPDATE_REG(X, Y) { \
/* note: would be better to have pipeline bits already masked */ \
uint32_t pipeline_bits = pipeline->X & pipeline->X##_mask; \
if ((cmd->state.X & pipeline->X##_mask) != pipeline_bits) { \
cmd->state.X &= ~pipeline->X##_mask; \
cmd->state.X |= pipeline_bits; \
cmd->state.dirty |= TU_CMD_DIRTY_##Y; \
} \
if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_##Y))) \
cmd->state.dirty &= ~TU_CMD_DIRTY_##Y; \
}
/* these registers can have bits set from both pipeline and dynamic state
* this updates the bits set by the pipeline
* if the pipeline doesn't use a dynamic state for the register, then
* the relevant dirty bit is cleared to avoid overriding the non-dynamic
* state with a dynamic state the next draw.
*/
UPDATE_REG(gras_su_cntl, GRAS_SU_CNTL);
UPDATE_REG(rb_depth_cntl, RB_DEPTH_CNTL);
UPDATE_REG(rb_stencil_cntl, RB_STENCIL_CNTL);
#undef UPDATE_REG
if (pipeline->rb_depth_cntl_disable)
cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
}
void
@ -2056,12 +2103,11 @@ void
tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
}
void
@ -2160,6 +2206,178 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
tu6_emit_sample_locations(&cs, pSampleLocationsInfo);
}
void
tu_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.gras_su_cntl &=
~(A6XX_GRAS_SU_CNTL_CULL_FRONT | A6XX_GRAS_SU_CNTL_CULL_BACK);
if (cullMode & VK_CULL_MODE_FRONT_BIT)
cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
if (cullMode & VK_CULL_MODE_BACK_BIT)
cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
}
void
tu_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
if (frontFace == VK_FRONT_FACE_CLOCKWISE)
cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
}
void
tu_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer,
VkPrimitiveTopology primitiveTopology)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.primtype = tu6_primtype(primitiveTopology);
}
void
tu_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer,
uint32_t viewportCount,
const VkViewport* pViewports)
{
tu_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
}
void
tu_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer,
uint32_t scissorCount,
const VkRect2D* pScissors)
{
tu_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
}
void
tu_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer,
VkBool32 depthTestEnable)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_ENABLE;
if (depthTestEnable)
cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_ENABLE;
cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
}
void
tu_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer,
VkBool32 depthWriteEnable)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
if (depthWriteEnable)
cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
}
void
tu_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer,
VkCompareOp depthCompareOp)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
cmd->state.rb_depth_cntl |=
A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(depthCompareOp));
cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
}
void
tu_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer,
VkBool32 depthBoundsTestEnable)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
if (depthBoundsTestEnable)
cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
}
void
tu_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer,
VkBool32 stencilTestEnable)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
cmd->state.rb_stencil_cntl &= ~(
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
if (stencilTestEnable) {
cmd->state.rb_stencil_cntl |=
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
}
cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
}
void
tu_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
VkStencilOp failOp,
VkStencilOp passOp,
VkStencilOp depthFailOp,
VkCompareOp compareOp)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
cmd->state.rb_stencil_cntl &= ~(
A6XX_RB_STENCIL_CONTROL_FUNC__MASK |
A6XX_RB_STENCIL_CONTROL_FAIL__MASK |
A6XX_RB_STENCIL_CONTROL_ZPASS__MASK |
A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK);
cmd->state.rb_stencil_cntl |=
A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(depthFailOp));
}
if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
cmd->state.rb_stencil_cntl &= ~(
A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK |
A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK |
A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK |
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK);
cmd->state.rb_stencil_cntl |=
A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(depthFailOp));
}
cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
}
static void
tu_flush_for_access(struct tu_cache_state *cache,
enum tu_cmd_access_mask src_mask,
@ -2902,6 +3120,30 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
.tess_upper_left_domain_origin =
pipeline->tess.upper_left_domain_origin));
if (cmd->state.dirty & TU_CMD_DIRTY_GRAS_SU_CNTL) {
struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2);
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.gras_su_cntl));
}
if (cmd->state.dirty & TU_CMD_DIRTY_RB_DEPTH_CNTL) {
struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2);
uint32_t rb_depth_cntl = cmd->state.rb_depth_cntl;
if ((rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_ENABLE) ||
(rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE))
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
if (pipeline->rb_depth_cntl_disable)
rb_depth_cntl = 0;
tu_cs_emit_regs(&cs, A6XX_RB_DEPTH_CNTL(.dword = rb_depth_cntl));
}
if (cmd->state.dirty & TU_CMD_DIRTY_RB_STENCIL_CNTL) {
struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2);
tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl));
}
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
cmd->state.shader_const[MESA_SHADER_VERTEX] =
tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX);
@ -2958,7 +3200,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DS, pipeline->ds_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_BLEND, pipeline->blend_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const[MESA_SHADER_VERTEX]);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_HS_CONST, cmd->state.shader_const[MESA_SHADER_TESS_CTRL]);
@ -2980,6 +3221,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
/* emit draw states that were just updated
* note we eventually don't want to have to emit anything here
*/
bool emit_binding_stride = false;
uint32_t draw_state_count =
has_tess +
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 5 : 0) +
@ -2987,6 +3229,12 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
1; /* vs_params */
if ((cmd->state.dirty & TU_CMD_DIRTY_VB_STRIDE) &&
!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
emit_binding_stride = true;
draw_state_count += 1;
}
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
/* We may need to re-emit tess consts if the current draw call is
@ -3004,6 +3252,10 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
if (emit_binding_stride) {
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_VB_STRIDE,
cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE]);
}
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
}
@ -3021,8 +3273,13 @@ static uint32_t
tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel)
{
const struct tu_pipeline *pipeline = cmd->state.pipeline;
enum pc_di_primtype primtype = pipeline->ia.primtype;
if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY))
primtype = cmd->state.primtype;
uint32_t initiator =
CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(pipeline->ia.primtype) |
CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(src_sel) |
CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(cmd->state.index_size) |
CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY);

View File

@ -619,6 +619,11 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->hostQueryReset = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
features->extendedDynamicState = true;
break;
}
default:
break;
}

View File

@ -95,6 +95,7 @@ EXTENSIONS = [
Extension('VK_KHR_multiview', 1, True),
Extension('VK_EXT_host_query_reset', 1, True),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_extended_dynamic_state', 1, True),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)

View File

@ -1482,7 +1482,8 @@ tu6_emit_program(struct tu_cs *cs,
}
static void
tu6_emit_vertex_input(struct tu_cs *cs,
tu6_emit_vertex_input(struct tu_pipeline *pipeline,
struct tu_cs *cs,
const struct ir3_shader_variant *vs,
const VkPipelineVertexInputStateCreateInfo *info)
{
@ -1494,8 +1495,10 @@ tu6_emit_vertex_input(struct tu_cs *cs,
const VkVertexInputBindingDescription *binding =
&info->pVertexBindingDescriptions[i];
tu_cs_emit_regs(cs,
A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
tu_cs_emit_regs(cs,
A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
}
if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
binding_instanced |= 1 << binding->binding;
@ -1717,7 +1720,8 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
/* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */
gras_su_cntl |=
A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
if (rast_info->depthBiasEnable)
gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
@ -1746,58 +1750,6 @@ tu6_emit_depth_bias(struct tu_cs *cs,
tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
}
static void
tu6_emit_depth_control(struct tu_cs *cs,
const VkPipelineDepthStencilStateCreateInfo *ds_info,
const VkPipelineRasterizationStateCreateInfo *rast_info)
{
uint32_t rb_depth_cntl = 0;
if (ds_info->depthTestEnable) {
rb_depth_cntl |=
A6XX_RB_DEPTH_CNTL_Z_ENABLE |
A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
if (rast_info->depthClampEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
if (ds_info->depthWriteEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
}
if (ds_info->depthBoundsTestEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1);
tu_cs_emit(cs, rb_depth_cntl);
}
static void
tu6_emit_stencil_control(struct tu_cs *cs,
const VkPipelineDepthStencilStateCreateInfo *ds_info)
{
uint32_t rb_stencil_control = 0;
if (ds_info->stencilTestEnable) {
const VkStencilOpState *front = &ds_info->front;
const VkStencilOpState *back = &ds_info->back;
rb_stencil_control |=
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
}
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
tu_cs_emit(cs, rb_stencil_control);
}
static uint32_t
tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
bool has_alpha)
@ -2131,15 +2083,72 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
if (!dynamic_info)
return;
pipeline->gras_su_cntl_mask = ~0u;
pipeline->rb_depth_cntl_mask = ~0u;
pipeline->rb_stencil_cntl_mask = ~0u;
for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
VkDynamicState state = dynamic_info->pDynamicStates[i];
switch (state) {
case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
if (state == VK_DYNAMIC_STATE_LINE_WIDTH)
pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
pipeline->dynamic_state_mask |= BIT(state);
break;
case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
break;
case VK_DYNAMIC_STATE_CULL_MODE_EXT:
pipeline->gras_su_cntl_mask &=
~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT);
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
break;
case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
break;
case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY);
break;
case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE);
break;
case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT);
break;
case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR);
break;
case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
pipeline->rb_depth_cntl_mask &=
~(A6XX_RB_DEPTH_CNTL_Z_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE);
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
break;
case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
break;
case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
break;
case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
pipeline->rb_depth_cntl_mask &=
~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE);
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
break;
case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
break;
case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
pipeline->rb_stencil_cntl_mask &= A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
break;
default:
assert(!"unsupported dynamic state");
break;
@ -2203,13 +2212,13 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
struct tu_cs vi_cs;
tu_cs_begin_sub_stream(&pipeline->cs,
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(&vi_cs, vs, vi_info);
tu6_emit_vertex_input(pipeline, &vi_cs, vs, vi_info);
pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
if (bs) {
tu_cs_begin_sub_stream(&pipeline->cs,
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(&vi_cs, bs, vi_info);
tu6_emit_vertex_input(pipeline, &vi_cs, bs, vi_info);
pipeline->vi.binning_state =
tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
}
@ -2249,6 +2258,8 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
if (!tess_info)
return;
assert(!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY)));
assert(pipeline->ia.primtype == DI_PT_PATCHES0);
assert(tess_info->patchControlPoints <= 32);
pipeline->ia.primtype += tess_info->patchControlPoints;
@ -2332,11 +2343,8 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
pipeline->gras_su_cntl =
tu6_gras_su_cntl(rast_info, builder->samples, builder->multiview_mask != 0);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) {
pipeline->gras_su_cntl |=
A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2))
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
}
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
@ -2357,26 +2365,79 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
* the pipeline has rasterization disabled or if the subpass of the
* render pass the pipeline is created against does not use a
* depth/stencil attachment.
*
* Disable both depth and stencil tests if there is no ds attachment,
* Disable depth test if ds attachment is S8_UINT, since S8_UINT defines
* only the separate stencil attachment
*/
static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info;
const VkPipelineDepthStencilStateCreateInfo *ds_info =
builder->depth_attachment_format != VK_FORMAT_UNDEFINED
? builder->create_info->pDepthStencilState
: &dummy_ds_info;
const VkPipelineDepthStencilStateCreateInfo *ds_info_depth =
builder->depth_attachment_format != VK_FORMAT_S8_UINT
? ds_info : &dummy_ds_info;
builder->create_info->pDepthStencilState;
const VkPipelineRasterizationStateCreateInfo *rast_info =
builder->create_info->pRasterizationState;
uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0;
struct tu_cs cs;
pipeline->ds_state = tu_cs_draw_state(&pipeline->cs, &cs, 4);
tu6_emit_depth_control(&cs, ds_info_depth,
builder->create_info->pRasterizationState);
tu6_emit_stencil_control(&cs, ds_info);
if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
if (ds_info->depthTestEnable) {
rb_depth_cntl |=
A6XX_RB_DEPTH_CNTL_Z_ENABLE |
A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
if (rast_info->depthClampEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
if (ds_info->depthWriteEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
}
if (ds_info->depthBoundsTestEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
} else {
/* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
* to 0 when this pipeline is used, as enabling depth test when there
* is no depth attachment is a problem (at least for the S8_UINT case)
*/
if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL))
pipeline->rb_depth_cntl_disable = true;
}
if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
const VkStencilOpState *front = &ds_info->front;
const VkStencilOpState *back = &ds_info->back;
rb_stencil_cntl |=
A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
if (ds_info->stencilTestEnable) {
rb_stencil_cntl |=
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
}
}
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) {
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1);
tu_cs_emit(&cs, rb_depth_cntl);
} else {
pipeline->rb_depth_cntl = rb_depth_cntl;
}
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) {
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
tu_cs_emit(&cs, rb_stencil_cntl);
} else {
pipeline->rb_stencil_cntl = rb_stencil_cntl;
}
/* the remaining draw states arent used if there is no d/s, leave them empty */
if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED)
return;
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
tu_cs_emit_regs(&cs,

View File

@ -460,7 +460,14 @@ enum tu_dynamic_state
{
/* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
TU_DYNAMIC_STATE_RB_DEPTH_CNTL,
TU_DYNAMIC_STATE_RB_STENCIL_CNTL,
TU_DYNAMIC_STATE_VB_STRIDE,
TU_DYNAMIC_STATE_COUNT,
/* no associated draw state: */
TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,
/* re-use the line width enum as it uses GRAS_SU_CNTL: */
TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,
};
enum tu_draw_state_group_id
@ -472,7 +479,6 @@ enum tu_draw_state_group_id
TU_DRAW_STATE_VI,
TU_DRAW_STATE_VI_BINNING,
TU_DRAW_STATE_RAST,
TU_DRAW_STATE_DS,
TU_DRAW_STATE_BLEND,
TU_DRAW_STATE_VS_CONST,
TU_DRAW_STATE_HS_CONST,
@ -681,12 +687,18 @@ struct tu_descriptor_state
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3,
TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = 1 << 4,
TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),
TU_CMD_DIRTY_VB_STRIDE = BIT(1),
TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),
TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),
TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),
TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),
TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),
TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
TU_CMD_DIRTY_DRAW_STATE = BIT(8)
};
/* There are only three cache domains we have to care about: the CCU, or
@ -852,6 +864,7 @@ struct tu_cmd_state
struct {
uint64_t base;
uint32_t size;
uint32_t stride;
} vb[MAX_VBS];
VkViewport viewport[MAX_VIEWPORTS];
VkRect2D scissor[MAX_SCISSORS];
@ -861,7 +874,9 @@ struct tu_cmd_state
uint32_t dynamic_stencil_mask;
uint32_t dynamic_stencil_wrmask;
uint32_t dynamic_stencil_ref;
uint32_t dynamic_gras_su_cntl;
uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;
enum pc_di_primtype primtype;
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
@ -1062,11 +1077,15 @@ struct tu_pipeline
uint32_t dynamic_state_mask;
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
/* gras_su_cntl without line width, used for dynamic line width state */
uint32_t gras_su_cntl;
/* for dynamic states which use the same register: */
uint32_t gras_su_cntl, gras_su_cntl_mask;
uint32_t rb_depth_cntl, rb_depth_cntl_mask;
uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;
bool rb_depth_cntl_disable;
/* draw states for the pipeline */
struct tu_draw_state load_state, rast_state, ds_state, blend_state;
struct tu_draw_state load_state, rast_state, blend_state;
/* for vertex buffers state */
uint32_t num_vbs;