turnip: refactor draw states and dynamic states

This reworks dynamic states to use draw states, and reworks draw states.

This moves towards doing as little as possible in bind_draw_states.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5446>
This commit is contained in:
Jonathan Marek 2020-06-14 10:52:37 -04:00 committed by Marge Bot
parent 62a4db4c0f
commit 233610f8cf
4 changed files with 384 additions and 503 deletions

View File

@ -2081,6 +2081,17 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
/* This clear path behaves like a draw, needs the same flush as tu_draw */
tu_emit_cache_flush_renderpass(cmd, cs);
/* disable all draw states so they don't interfere
* TODO: use and re-use draw states for this path
*/
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
CP_SET_DRAW_STATE__0_GROUP_ID(0));
tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
@ -2167,13 +2178,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
}
}
}
cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE |
TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
TU_CMD_DIRTY_DYNAMIC_VIEWPORT |
TU_CMD_DIRTY_DYNAMIC_SCISSOR;
}
/**

View File

@ -686,6 +686,58 @@ tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
A6XX_SP_TP_WINDOW_OFFSET(.x = x1, .y = y1));
}
static void
tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
{
uint32_t enable_mask;
switch (id) {
case TU_DRAW_STATE_PROGRAM:
case TU_DRAW_STATE_VI:
case TU_DRAW_STATE_FS_CONST:
/* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even
* when resources would actually be used in the binning shader.
* Presumably the overhead of prefetching the resources isn't
* worth it.
*/
case TU_DRAW_STATE_DESC_SETS_LOAD:
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
CP_SET_DRAW_STATE__0_SYSMEM;
break;
case TU_DRAW_STATE_PROGRAM_BINNING:
case TU_DRAW_STATE_VI_BINNING:
enable_mask = CP_SET_DRAW_STATE__0_BINNING;
break;
case TU_DRAW_STATE_DESC_SETS_GMEM:
enable_mask = CP_SET_DRAW_STATE__0_GMEM;
break;
case TU_DRAW_STATE_DESC_SETS_SYSMEM:
enable_mask = CP_SET_DRAW_STATE__0_BINNING |
CP_SET_DRAW_STATE__0_SYSMEM;
break;
default:
enable_mask = CP_SET_DRAW_STATE__0_GMEM |
CP_SET_DRAW_STATE__0_SYSMEM |
CP_SET_DRAW_STATE__0_BINNING;
break;
}
tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(state.size) |
enable_mask |
CP_SET_DRAW_STATE__0_GROUP_ID(id) |
COND(!state.size, CP_SET_DRAW_STATE__0_DISABLE));
tu_cs_emit_qw(cs, state.iova);
}
/* note: get rid of this eventually */
static void
tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry)
{
tu_cs_emit_draw_state(cs, id, (struct tu_draw_state) {
.iova = entry.size ? entry.bo->iova + entry.offset : 0,
.size = entry.size / 4,
});
}
static bool
use_hw_binning(struct tu_cmd_buffer *cmd)
{
@ -1987,6 +2039,28 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
return cmd_buffer->record_result;
}
static struct tu_cs
tu_cmd_dynamic_state(struct tu_cmd_buffer *cmd, uint32_t id, uint32_t size)
{
struct ts_cs_memory memory;
struct tu_cs cs;
/* TODO: share this logic with tu_pipeline_static_state */
tu_cs_alloc(&cmd->sub_cs, size, 1, &memory);
tu_cs_init_external(&cs, memory.map, memory.map + size);
tu_cs_begin(&cs);
tu_cs_reserve_space(&cs, size);
assert(id < ARRAY_SIZE(cmd->state.dynamic_state));
cmd->state.dynamic_state[id].iova = memory.iova;
cmd->state.dynamic_state[id].size = size;
tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DYNAMIC + id, cmd->state.dynamic_state[id]);
return cs;
}
void
tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -2011,7 +2085,23 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS);
cmd->state.pipeline = pipeline;
cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_SHADER_CONSTS;
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
struct tu_cs *cs = &cmd->draw_cs;
uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
uint32_t i;
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib);
for_each_bit(i, mask)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
/* If the new pipeline requires more VBs than we had previously set up, we
* need to re-emit them in SDS. If it requires the same set or fewer, we
@ -2023,6 +2113,18 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
/* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */
if (pipeline->layout->dynamic_offset_count + pipeline->layout->input_attachment_count)
cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
/* dynamic linewidth state depends pipeline state's gras_su_cntl
* so the dynamic state ib must be updated when pipeline changes
*/
if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) {
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl;
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
}
}
void
@ -2032,10 +2134,11 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer,
const VkViewport *pViewports)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 18);
assert(firstViewport == 0 && viewportCount == 1);
cmd->state.dynamic.viewport.viewports[0] = pViewports[0];
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_VIEWPORT;
tu6_emit_viewport(&cs, pViewports);
}
void
@ -2045,21 +2148,23 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer,
const VkRect2D *pScissors)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 3);
assert(firstScissor == 0 && scissorCount == 1);
cmd->state.dynamic.scissor.scissors[0] = pScissors[0];
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_SCISSOR;
tu6_emit_scissor(&cs, pScissors);
}
void
tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2);
cmd->state.dynamic.line_width = lineWidth;
cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
/* line width depends on VkPipelineRasterizationStateCreateInfo */
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
}
void
@ -2069,12 +2174,9 @@ tu_CmdSetDepthBias(VkCommandBuffer commandBuffer,
float depthBiasSlopeFactor)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *draw_cs = &cmd->draw_cs;
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BIAS, 4);
tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp,
depthBiasSlopeFactor);
tu_cs_sanity_check(draw_cs);
tu6_emit_depth_bias(&cs, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor);
}
void
@ -2082,11 +2184,10 @@ tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
const float blendConstants[4])
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *draw_cs = &cmd->draw_cs;
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5);
tu6_emit_blend_constants(draw_cs, blendConstants);
tu_cs_sanity_check(draw_cs);
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
tu_cs_emit_array(&cs, (const uint32_t *) blendConstants, 4);
}
void
@ -2096,20 +2197,26 @@ tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
{
}
static void
update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask)
{
if (face & VK_STENCIL_FACE_FRONT_BIT)
*value |= A6XX_RB_STENCILMASK_MASK(mask);
if (face & VK_STENCIL_FACE_BACK_BIT)
*value |= A6XX_RB_STENCILMASK_BFMASK(mask);
}
void
tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t compareMask)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2);
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
cmd->state.dynamic.stencil_compare_mask.front = compareMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
cmd->state.dynamic.stencil_compare_mask.back = compareMask;
update_stencil_mask(&cmd->state.dynamic_stencil_mask, faceMask, compareMask);
/* the front/back compare masks must be updated together */
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.dword = cmd->state.dynamic_stencil_mask));
}
void
@ -2118,14 +2225,11 @@ tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
uint32_t writeMask)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2);
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
cmd->state.dynamic.stencil_write_mask.front = writeMask;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
cmd->state.dynamic.stencil_write_mask.back = writeMask;
update_stencil_mask(&cmd->state.dynamic_stencil_wrmask, faceMask, writeMask);
/* the front/back write masks must be updated together */
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = cmd->state.dynamic_stencil_wrmask));
}
void
@ -2134,14 +2238,11 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
uint32_t reference)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2);
if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
cmd->state.dynamic.stencil_reference.front = reference;
if (faceMask & VK_STENCIL_FACE_BACK_BIT)
cmd->state.dynamic.stencil_reference.back = reference;
update_stencil_mask(&cmd->state.dynamic_stencil_ref, faceMask, reference);
/* the front/back references must be updated together */
cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.dword = cmd->state.dynamic_stencil_ref));
}
void
@ -2149,8 +2250,11 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 9);
tu6_emit_sample_locations(&cmd->draw_cs, pSampleLocationsInfo);
assert(pSampleLocationsInfo);
tu6_emit_sample_locations(&cs, pSampleLocationsInfo);
}
static void
@ -2578,6 +2682,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
tu_bo_list_add(&cmd->bo_list, iview->image->bo,
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
}
cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
}
void
@ -2701,39 +2807,6 @@ struct tu_draw_info
uint64_t streamout_buffer_offset;
};
#define ENABLE_ALL (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
#define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
#define ENABLE_NON_GMEM (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_SYSMEM)
enum tu_draw_state_group_id
{
TU_DRAW_STATE_PROGRAM,
TU_DRAW_STATE_PROGRAM_BINNING,
TU_DRAW_STATE_VB,
TU_DRAW_STATE_VI,
TU_DRAW_STATE_VI_BINNING,
TU_DRAW_STATE_VP,
TU_DRAW_STATE_RAST,
TU_DRAW_STATE_DS,
TU_DRAW_STATE_BLEND,
TU_DRAW_STATE_VS_CONST,
TU_DRAW_STATE_GS_CONST,
TU_DRAW_STATE_FS_CONST,
TU_DRAW_STATE_DESC_SETS,
TU_DRAW_STATE_DESC_SETS_GMEM,
TU_DRAW_STATE_DESC_SETS_LOAD,
TU_DRAW_STATE_VS_PARAMS,
TU_DRAW_STATE_COUNT,
};
struct tu_draw_state_group
{
enum tu_draw_state_group_id id;
uint32_t enable_mask;
struct tu_cs_entry ib;
};
static void
tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
struct tu_descriptor_state *descriptors_state,
@ -3088,9 +3161,6 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
const struct tu_draw_info *draw)
{
const struct tu_pipeline *pipeline = cmd->state.pipeline;
const struct tu_dynamic_state *dynamic = &cmd->state.dynamic;
struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT];
uint32_t draw_state_group_count = 0;
VkResult result;
struct tu_descriptor_state *descriptors_state =
@ -3102,120 +3172,13 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
A6XX_PC_PRIMITIVE_CNTL_0(.primitive_restart =
pipeline->ia.primitive_restart && draw->indexed));
if (cmd->state.dirty &
(TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) {
tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl,
dynamic->line_width);
}
if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front,
dynamic->stencil_compare_mask.back);
}
if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front,
dynamic->stencil_write_mask.back);
}
if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front,
dynamic->stencil_reference.back);
}
if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
tu6_emit_viewport(cs, &cmd->state.dynamic.viewport.viewports[0]);
}
if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_SCISSOR) &&
(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
tu6_emit_scissor(cs, &cmd->state.dynamic.scissor.scissors[0]);
}
if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) {
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_PROGRAM,
.enable_mask = ENABLE_DRAW,
.ib = pipeline->program.state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_PROGRAM_BINNING,
.enable_mask = CP_SET_DRAW_STATE__0_BINNING,
.ib = pipeline->program.binning_state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VI,
.enable_mask = ENABLE_DRAW,
.ib = pipeline->vi.state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VI_BINNING,
.enable_mask = CP_SET_DRAW_STATE__0_BINNING,
.ib = pipeline->vi.binning_state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VP,
.enable_mask = ENABLE_ALL,
.ib = pipeline->vp.state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_RAST,
.enable_mask = ENABLE_ALL,
.ib = pipeline->rast.state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_DS,
.enable_mask = ENABLE_ALL,
.ib = pipeline->ds.state_ib,
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_BLEND,
.enable_mask = ENABLE_ALL,
.ib = pipeline->blend.state_ib,
};
}
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VS_CONST,
.enable_mask = ENABLE_ALL,
.ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX)
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_GS_CONST,
.enable_mask = ENABLE_ALL,
.ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY)
};
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_FS_CONST,
.enable_mask = ENABLE_DRAW,
.ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT)
};
}
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) {
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VB,
.enable_mask = ENABLE_ALL,
.ib = tu6_emit_vertex_buffers(cmd, pipeline)
};
cmd->state.shader_const_ib[MESA_SHADER_VERTEX] =
tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX);
cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY] =
tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY);
cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT] =
tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT);
}
if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS)
@ -3234,35 +3197,26 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
* could also only re-emit dynamic state.
*/
if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
struct tu_cs_entry desc_sets, desc_sets_gmem;
bool need_gmem_desc_set = pipeline->layout->input_attachment_count > 0;
result = tu6_emit_descriptor_sets(cmd, pipeline,
VK_PIPELINE_BIND_POINT_GRAPHICS,
&desc_sets, false);
&cmd->state.desc_sets_ib, false);
if (result != VK_SUCCESS)
return result;
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_DESC_SETS,
.enable_mask = need_gmem_desc_set ? ENABLE_NON_GMEM : ENABLE_ALL,
.ib = desc_sets,
};
if (need_gmem_desc_set) {
cmd->state.desc_sets_sysmem_ib = cmd->state.desc_sets_ib;
cmd->state.desc_sets_ib.size = 0;
result = tu6_emit_descriptor_sets(cmd, pipeline,
VK_PIPELINE_BIND_POINT_GRAPHICS,
&desc_sets_gmem, true);
&cmd->state.desc_sets_gmem_ib, true);
if (result != VK_SUCCESS)
return result;
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_DESC_SETS_GMEM,
.enable_mask = CP_SET_DRAW_STATE__0_GMEM,
.ib = desc_sets_gmem,
};
} else {
cmd->state.desc_sets_gmem_ib.size = 0;
cmd->state.desc_sets_sysmem_ib.size = 0;
}
/* We need to reload the descriptors every time the descriptor sets
@ -3286,52 +3240,79 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
tu_cs_emit_array(&load_cs,
(uint32_t *)((char *)load_entry->bo->map + load_entry->offset),
load_entry->size / 4);
struct tu_cs_entry load_copy = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs);
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_DESC_SETS_LOAD,
/* The blob seems to not enable this for binning, even when
* resources would actually be used in the binning shader.
* Presumably the overhead of prefetching the resources isn't
* worth it.
*/
.enable_mask = ENABLE_DRAW,
.ib = load_copy,
};
cmd->state.desc_sets_load_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs);
} else {
cmd->state.desc_sets_load_ib.size = 0;
}
}
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline);
struct tu_cs_entry vs_params;
result = tu6_emit_vs_params(cmd, draw, &vs_params);
if (result != VK_SUCCESS)
return result;
draw_state_groups[draw_state_group_count++] =
(struct tu_draw_state_group) {
.id = TU_DRAW_STATE_VS_PARAMS,
.enable_mask = ENABLE_ALL,
.ib = vs_params,
};
/* for the first draw in a renderpass, re-emit all the draw states
*
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
* used, then draw states must be re-emitted. note however this only happens
* in the sysmem path, so this can be skipped this for the gmem path (TODO)
*/
if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) {
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * TU_DRAW_STATE_COUNT);
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count);
for (uint32_t i = 0; i < draw_state_group_count; i++) {
const struct tu_draw_state_group *group = &draw_state_groups[i];
debug_assert((group->enable_mask & ~ENABLE_ALL) == 0);
uint32_t cp_set_draw_state =
CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) |
group->enable_mask |
CP_SET_DRAW_STATE__0_GROUP_ID(group->id);
uint64_t iova;
if (group->ib.size) {
iova = group->ib.bo->iova + group->ib.offset;
} else {
cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE;
iova = 0;
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) {
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i,
((pipeline->dynamic_state_mask & BIT(i)) ?
cmd->state.dynamic_state[i] :
pipeline->dynamic_state[i]));
}
} else {
tu_cs_emit(cs, cp_set_draw_state);
tu_cs_emit_qw(cs, iova);
/* emit draw states that were just updated
* note we eventually don't want to have to emit anything here
*/
uint32_t draw_state_count =
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) +
((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 4 : 0) +
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
1; /* vs_params */
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
}
if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
}
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params);
}
tu_cs_sanity_check(cs);

View File

@ -281,36 +281,6 @@ struct tu_pipeline_builder
uint32_t render_components;
};
static enum tu_dynamic_state_bits
tu_dynamic_state_bit(VkDynamicState state)
{
switch (state) {
case VK_DYNAMIC_STATE_VIEWPORT:
return TU_DYNAMIC_VIEWPORT;
case VK_DYNAMIC_STATE_SCISSOR:
return TU_DYNAMIC_SCISSOR;
case VK_DYNAMIC_STATE_LINE_WIDTH:
return TU_DYNAMIC_LINE_WIDTH;
case VK_DYNAMIC_STATE_DEPTH_BIAS:
return TU_DYNAMIC_DEPTH_BIAS;
case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
return TU_DYNAMIC_BLEND_CONSTANTS;
case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
return TU_DYNAMIC_DEPTH_BOUNDS;
case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
return TU_DYNAMIC_STENCIL_COMPARE_MASK;
case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
return TU_DYNAMIC_STENCIL_WRITE_MASK;
case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
return TU_DYNAMIC_STENCIL_REFERENCE;
case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
return TU_DYNAMIC_SAMPLE_LOCATIONS;
default:
unreachable("invalid dynamic state");
return 0;
}
}
static bool
tu_logic_op_reads_dst(VkLogicOp op)
{
@ -1645,22 +1615,6 @@ tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp
tu_cs_emit(cs, sample_locations);
}
static void
tu6_emit_gras_unknowns(struct tu_cs *cs)
{
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1);
tu_cs_emit(cs, 0x0);
}
static void
tu6_emit_point_size(struct tu_cs *cs)
{
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2);
tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) |
A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f));
tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f).value);
}
static uint32_t
tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
VkSampleCountFlagBits samples)
@ -1686,18 +1640,6 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
return gras_su_cntl;
}
void
tu6_emit_gras_su_cntl(struct tu_cs *cs,
uint32_t gras_su_cntl,
float line_width)
{
assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0);
gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1);
tu_cs_emit(cs, gras_su_cntl);
}
void
tu6_emit_depth_bias(struct tu_cs *cs,
float constant_factor,
@ -1710,13 +1652,6 @@ tu6_emit_depth_bias(struct tu_cs *cs,
tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
}
static void
tu6_emit_alpha_control_disable(struct tu_cs *cs)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1);
tu_cs_emit(cs, 0);
}
static void
tu6_emit_depth_control(struct tu_cs *cs,
const VkPipelineDepthStencilStateCreateInfo *ds_info,
@ -1768,30 +1703,6 @@ tu6_emit_stencil_control(struct tu_cs *cs,
tu_cs_emit(cs, rb_stencil_control);
}
void
tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1);
tu_cs_emit(
cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back));
}
void
tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1);
tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) |
A6XX_RB_STENCILWRMASK_BFWRMASK(back));
}
void
tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1);
tu_cs_emit(cs,
A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back));
}
static uint32_t
tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
bool has_alpha)
@ -1912,13 +1823,6 @@ tu6_emit_blend_control(struct tu_cs *cs,
.alpha_to_one = msaa_info->alphaToOneEnable));
}
void
tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4])
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4);
tu_cs_emit_array(cs, (const uint32_t *) constants, 4);
}
static VkResult
tu_pipeline_create(struct tu_device *dev,
struct tu_pipeline_layout *layout,
@ -2095,8 +1999,18 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
return;
for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
pipeline->dynamic_state.mask |=
tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]);
VkDynamicState state = dynamic_info->pDynamicStates[i];
switch (state) {
case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
pipeline->dynamic_state_mask |= BIT(state);
break;
case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
break;
default:
assert(!"unsupported dynamic state");
break;
}
}
}
@ -2186,6 +2100,27 @@ tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
}
static bool
tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs,
uint32_t id, uint32_t size)
{
struct ts_cs_memory memory;
if (pipeline->dynamic_state_mask & BIT(id))
return false;
/* TODO: share this logc with tu_cmd_dynamic_state */
tu_cs_alloc(&pipeline->cs, size, 1, &memory);
tu_cs_init_external(cs, memory.map, memory.map + size);
tu_cs_begin(cs);
tu_cs_reserve_space(cs, size);
assert(id < ARRAY_SIZE(pipeline->dynamic_state));
pipeline->dynamic_state[id].iova = memory.iova;
pipeline->dynamic_state[id].size = size;
return true;
}
static void
tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
struct tu_pipeline *pipeline)
@ -2204,20 +2139,13 @@ tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
const VkPipelineViewportStateCreateInfo *vp_info =
builder->create_info->pViewportState;
struct tu_cs vp_cs;
tu_cs_begin_sub_stream(&pipeline->cs, 21, &vp_cs);
struct tu_cs cs;
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) {
assert(vp_info->viewportCount == 1);
tu6_emit_viewport(&vp_cs, vp_info->pViewports);
}
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 18))
tu6_emit_viewport(&cs, vp_info->pViewports);
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) {
assert(vp_info->scissorCount == 1);
tu6_emit_scissor(&vp_cs, vp_info->pScissors);
}
pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 3))
tu6_emit_scissor(&cs, vp_info->pScissors);
}
static void
@ -2229,11 +2157,10 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
struct tu_cs rast_cs;
tu_cs_begin_sub_stream(&pipeline->cs, 20, &rast_cs);
struct tu_cs cs;
tu_cs_begin_sub_stream(&pipeline->cs, 7, &cs);
tu_cs_emit_regs(&rast_cs,
tu_cs_emit_regs(&cs,
A6XX_GRAS_CL_CNTL(
.znear_clip_disable = rast_info->depthClampEnable,
.zfar_clip_disable = rast_info->depthClampEnable,
@ -2241,24 +2168,28 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
.zero_gb_scale_z = 1,
.vp_clip_code_ignore = 1));
/* move to hw ctx init? */
tu6_emit_gras_unknowns(&rast_cs);
tu6_emit_point_size(&rast_cs);
tu_cs_emit_regs(&cs, A6XX_GRAS_UNKNOWN_8001());
tu_cs_emit_regs(&cs,
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
A6XX_GRAS_SU_POINT_SIZE(1.0f));
const uint32_t gras_su_cntl =
pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
pipeline->gras_su_cntl =
tu6_gras_su_cntl(rast_info, builder->samples);
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH))
tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) {
pipeline->gras_su_cntl |=
A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
}
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) {
tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor,
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
rast_info->depthBiasClamp,
rast_info->depthBiasSlopeFactor);
}
pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs);
pipeline->rast.gras_su_cntl = gras_su_cntl;
}
static void
@ -2286,30 +2217,31 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
builder->depth_attachment_format != VK_FORMAT_S8_UINT
? ds_info : &dummy_ds_info;
struct tu_cs ds_cs;
tu_cs_begin_sub_stream(&pipeline->cs, 12, &ds_cs);
struct tu_cs cs;
tu_cs_begin_sub_stream(&pipeline->cs, 6, &cs);
/* move to hw ctx init? */
tu6_emit_alpha_control_disable(&ds_cs);
tu6_emit_depth_control(&ds_cs, ds_info_depth,
tu_cs_emit_regs(&cs, A6XX_RB_ALPHA_CONTROL());
tu6_emit_depth_control(&cs, ds_info_depth,
builder->create_info->pRasterizationState);
tu6_emit_stencil_control(&ds_cs, ds_info);
tu6_emit_stencil_control(&cs, ds_info);
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) {
tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask,
ds_info->back.compareMask);
}
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) {
tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask,
ds_info->back.writeMask);
}
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) {
tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference,
ds_info->back.reference);
pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
.bfmask = ds_info->back.compareMask & 0xff));
}
pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) {
tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.wrmask = ds_info->front.writeMask & 0xff,
.bfwrmask = ds_info->back.writeMask & 0xff));
}
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) {
tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff,
.bfref = ds_info->back.reference & 0xff));
}
}
static void
@ -2342,32 +2274,35 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
builder->use_color_attachments ? builder->create_info->pColorBlendState
: &dummy_blend_info;
struct tu_cs blend_cs;
tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 18, &blend_cs);
struct tu_cs cs;
tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 4, &cs);
uint32_t blend_enable_mask;
tu6_emit_rb_mrt_controls(&blend_cs, blend_info,
tu6_emit_rb_mrt_controls(&cs, blend_info,
builder->color_attachment_formats,
&blend_enable_mask);
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) {
const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
const VkSampleLocationsInfoEXT *samp_loc = NULL;
if (sample_locations && sample_locations->sampleLocationsEnable)
samp_loc = &sample_locations->sampleLocationsInfo;
tu6_emit_sample_locations(&blend_cs, samp_loc);
}
tu6_emit_blend_control(&blend_cs, blend_enable_mask,
tu6_emit_blend_control(&cs, blend_enable_mask,
builder->use_dual_src_blend, msaa_info);
pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);
pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) {
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4);
}
const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
const VkSampleLocationsInfoEXT *samp_loc = NULL;
if (sample_locations && sample_locations->sampleLocationsEnable)
samp_loc = &sample_locations->sampleLocationsInfo;
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS,
samp_loc ? 9 : 6)) {
tu6_emit_sample_locations(&cs, samp_loc);
}
}
static void

View File

@ -129,6 +129,7 @@ tu_minify(uint32_t n, uint32_t levels)
})
#define COND(bool, val) ((bool) ? (val) : 0)
#define BIT(bit) (1u << (bit))
/* Whenever we generate an error, pass it through this function. Useful for
* debugging, where we can break on it. Only call at error site, not when
@ -409,6 +410,42 @@ struct ts_cs_memory {
uint64_t iova;
};
struct tu_draw_state {
uint64_t iova : 48;
uint32_t size : 16;
};
enum tu_dynamic_state
{
/* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
TU_DYNAMIC_STATE_COUNT,
};
enum tu_draw_state_group_id
{
TU_DRAW_STATE_PROGRAM,
TU_DRAW_STATE_PROGRAM_BINNING,
TU_DRAW_STATE_VB,
TU_DRAW_STATE_VI,
TU_DRAW_STATE_VI_BINNING,
TU_DRAW_STATE_RAST,
TU_DRAW_STATE_DS,
TU_DRAW_STATE_BLEND,
TU_DRAW_STATE_VS_CONST,
TU_DRAW_STATE_GS_CONST,
TU_DRAW_STATE_FS_CONST,
TU_DRAW_STATE_DESC_SETS,
TU_DRAW_STATE_DESC_SETS_GMEM,
TU_DRAW_STATE_DESC_SETS_SYSMEM,
TU_DRAW_STATE_DESC_SETS_LOAD,
TU_DRAW_STATE_VS_PARAMS,
/* dynamic state related draw states */
TU_DRAW_STATE_DYNAMIC,
TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
};
enum tu_cs_mode
{
@ -578,73 +615,12 @@ tu_buffer_iova(struct tu_buffer *buffer)
return buffer->bo->iova + buffer->bo_offset;
}
enum tu_dynamic_state_bits
{
TU_DYNAMIC_VIEWPORT = 1 << 0,
TU_DYNAMIC_SCISSOR = 1 << 1,
TU_DYNAMIC_LINE_WIDTH = 1 << 2,
TU_DYNAMIC_DEPTH_BIAS = 1 << 3,
TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4,
TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5,
TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
TU_DYNAMIC_ALL = (1 << 11) - 1,
};
struct tu_vertex_binding
{
struct tu_buffer *buffer;
VkDeviceSize offset;
};
struct tu_viewport_state
{
uint32_t count;
VkViewport viewports[MAX_VIEWPORTS];
};
struct tu_scissor_state
{
uint32_t count;
VkRect2D scissors[MAX_SCISSORS];
};
struct tu_dynamic_state
{
/**
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
* Defines the set of saved dynamic state.
*/
uint32_t mask;
struct tu_viewport_state viewport;
struct tu_scissor_state scissor;
float line_width;
struct
{
uint32_t front;
uint32_t back;
} stencil_compare_mask;
struct
{
uint32_t front;
uint32_t back;
} stencil_write_mask;
struct
{
uint32_t front;
uint32_t back;
} stencil_reference;
};
const char *
tu_get_debug_option_name(int id);
@ -693,21 +669,14 @@ struct tu_tiling_config
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_PIPELINE = 1 << 0,
TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3,
TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4,
TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6,
TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16,
TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17,
TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18,
TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19,
TU_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 20,
TU_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 21,
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
};
struct tu_streamout_state {
@ -842,7 +811,17 @@ struct tu_cmd_state
VkDeviceSize offsets[MAX_VBS];
} vb;
struct tu_dynamic_state dynamic;
/* for dynamic states that can't be emitted directly */
uint32_t dynamic_stencil_mask;
uint32_t dynamic_stencil_wrmask;
uint32_t dynamic_stencil_ref;
uint32_t dynamic_gras_su_cntl;
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
struct tu_cs_entry vertex_buffers_ib;
struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES];
struct tu_cs_entry desc_sets_ib, desc_sets_gmem_ib, desc_sets_sysmem_ib, desc_sets_load_ib;
/* Stream output buffers */
struct
@ -1106,8 +1085,6 @@ struct tu_pipeline
{
struct tu_cs cs;
struct tu_dynamic_state dynamic_state;
struct tu_pipeline_layout *layout;
bool need_indirect_descriptor_sets;
@ -1116,6 +1093,15 @@ struct tu_pipeline
struct tu_streamout_state streamout;
/* mask of enabled dynamic states
* if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
*/
uint32_t dynamic_state_mask;
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
/* gras_su_cntl without line width, used for dynamic line width state */
uint32_t gras_su_cntl;
struct
{
struct tu_bo binary_bo;
@ -1147,12 +1133,6 @@ struct tu_pipeline
struct
{
struct tu_cs_entry state_ib;
} vp;
struct
{
uint32_t gras_su_cntl;
struct tu_cs_entry state_ib;
} rast;
struct
@ -1180,31 +1160,12 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor);
void
tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
void
tu6_emit_gras_su_cntl(struct tu_cs *cs,
uint32_t gras_su_cntl,
float line_width);
void
tu6_emit_depth_bias(struct tu_cs *cs,
float constant_factor,
float clamp,
float slope_factor);
void
tu6_emit_stencil_compare_mask(struct tu_cs *cs,
uint32_t front,
uint32_t back);
void
tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back);
void
tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back);
void
tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);