turnip: rework vertex buffers draw state handling

This exploits a HW optimization for when only the size of a draw state is
changed, to make things simpler and more optimal (assuming a well behaved
user which doesn't unecessarily call CmdBindVertexBuffers many times)

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6665>
This commit is contained in:
Jonathan Marek 2020-09-09 09:26:59 -04:00 committed by Marge Bot
parent 69e65b9cf0
commit 52becd39a5
3 changed files with 35 additions and 66 deletions

View File

@ -1638,10 +1638,6 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
return VK_SUCCESS;
}
/* Sets vertex buffers to HW binding points. We emit VBs in SDS (so that bin
* rendering can skip over unused state), so we need to collect all the
* bindings together into a single state emit at draw time.
*/
void
tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
uint32_t firstBinding,
@ -1650,18 +1646,25 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
const VkDeviceSize *pOffsets)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
assert(firstBinding + bindingCount <= MAX_VBS);
struct tu_cs cs;
/* TODO: track a "max_vb" value for the cmdbuf to save a bit of memory */
cmd->state.vertex_buffers.iova = tu_cs_draw_state(&cmd->sub_cs, &cs, 4 * MAX_VBS).iova;
for (uint32_t i = 0; i < bindingCount; i++) {
struct tu_buffer *buf = tu_buffer_from_handle(pBuffers[i]);
cmd->state.vb.buffers[firstBinding + i] = buf;
cmd->state.vb.offsets[firstBinding + i] = pOffsets[i];
cmd->state.vb[firstBinding + i].base = tu_buffer_iova(buf) + pOffsets[i];
cmd->state.vb[firstBinding + i].size = buf->size - pOffsets[i];
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
}
for (uint32_t i = 0; i < MAX_VBS; i++) {
tu_cs_emit_regs(&cs,
A6XX_VFD_FETCH_BASE_LO(i, cmd->state.vb[i].base),
A6XX_VFD_FETCH_BASE_HI(i, cmd->state.vb[i].base >> 32),
A6XX_VFD_FETCH_SIZE(i, cmd->state.vb[i].size));
}
cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
}
@ -2114,13 +2117,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
for_each_bit(i, mask)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
/* If the new pipeline requires more VBs than we had previously set up, we
* need to re-emit them in SDS. If it requires the same set or fewer, we
* can just re-use the old SDS.
*/
if (pipeline->vi.bindings_used & ~cmd->vertex_bindings_set)
cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
/* dynamic linewidth state depends pipeline state's gras_su_cntl
* so the dynamic state ib must be updated when pipeline changes
*/
@ -2132,6 +2128,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl));
}
/* the vertex_buffers draw state always contains all the currently
* bound vertex buffers. update its size to only emit the vbs which
* are actually used by the pipeline
* note there is a HW optimization which makes it so the draw state
* is not re-executed completely when only the size changes
*/
if (cmd->state.vertex_buffers.size != pipeline->num_vbs * 4) {
cmd->state.vertex_buffers.size = pipeline->num_vbs * 4;
cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
}
}
void
@ -2905,30 +2912,6 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
}
static struct tu_draw_state
tu6_emit_vertex_buffers(struct tu_cmd_buffer *cmd,
const struct tu_pipeline *pipeline)
{
struct tu_cs cs;
tu_cs_begin_sub_stream(&cmd->sub_cs, 4 * MAX_VBS, &cs);
int binding;
for_each_bit(binding, pipeline->vi.bindings_used) {
const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
const VkDeviceSize offset = buf->bo_offset +
cmd->state.vb.offsets[binding];
tu_cs_emit_regs(&cs,
A6XX_VFD_FETCH_BASE(binding, .bo = buf->bo, .bo_offset = offset),
A6XX_VFD_FETCH_SIZE(binding, buf->size - offset));
}
cmd->vertex_bindings_set = pipeline->vi.bindings_used;
return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
}
static uint64_t
get_tess_param_bo_size(const struct tu_pipeline *pipeline,
uint32_t draw_count)
@ -3068,9 +3051,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT);
}
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
cmd->state.vertex_buffers = tu6_emit_vertex_buffers(cmd, pipeline);
bool has_tess =
pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
struct tu_draw_state tess_consts = {};

View File

@ -1475,8 +1475,7 @@ tu6_emit_program(struct tu_cs *cs,
static void
tu6_emit_vertex_input(struct tu_cs *cs,
const struct ir3_shader_variant *vs,
const VkPipelineVertexInputStateCreateInfo *info,
uint32_t *bindings_used)
const VkPipelineVertexInputStateCreateInfo *info)
{
uint32_t vfd_decode_idx = 0;
uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
@ -1492,7 +1491,6 @@ tu6_emit_vertex_input(struct tu_cs *cs,
if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
binding_instanced |= 1 << binding->binding;
*bindings_used |= 1 << binding->binding;
step_rate[binding->binding] = 1;
}
@ -1513,8 +1511,6 @@ tu6_emit_vertex_input(struct tu_cs *cs,
&info->pVertexAttributeDescriptions[i];
uint32_t input_idx;
assert(*bindings_used & BIT(attr->binding));
for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
break;
@ -2173,18 +2169,18 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *bs = builder->binning_variant;
pipeline->num_vbs = vi_info->vertexBindingDescriptionCount;
struct tu_cs vi_cs;
tu_cs_begin_sub_stream(&pipeline->cs,
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(&vi_cs, vs, vi_info,
&pipeline->vi.bindings_used);
tu6_emit_vertex_input(&vi_cs, vs, vi_info);
pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
if (bs) {
tu_cs_begin_sub_stream(&pipeline->cs,
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(
&vi_cs, bs, vi_info, &pipeline->vi.bindings_used);
tu6_emit_vertex_input(&vi_cs, bs, vi_info);
pipeline->vi.binning_state =
tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
}

View File

@ -676,12 +676,6 @@ tu_buffer_iova(struct tu_buffer *buffer)
return buffer->bo->iova + buffer->bo_offset;
}
struct tu_vertex_binding
{
struct tu_buffer *buffer;
VkDeviceSize offset;
};
const char *
tu_get_debug_option_name(int id);
@ -861,11 +855,10 @@ struct tu_cmd_state
struct tu_pipeline *compute_pipeline;
/* Vertex buffers */
struct
{
struct tu_buffer *buffers[MAX_VBS];
VkDeviceSize offsets[MAX_VBS];
} vb;
struct {
uint64_t base;
uint32_t size;
} vb[MAX_VBS];
/* for dynamic states that can't be emitted directly */
uint32_t dynamic_stencil_mask;
@ -983,8 +976,6 @@ struct tu_cmd_buffer
enum tu_cmd_buffer_status status;
struct tu_cmd_state state;
struct tu_vertex_binding vertex_bindings[MAX_VBS];
uint32_t vertex_bindings_set;
uint32_t queue_family_index;
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
@ -1115,6 +1106,9 @@ struct tu_pipeline
/* draw states for the pipeline */
struct tu_draw_state load_state, rast_state, ds_state, blend_state;
/* for vertex buffers state */
uint32_t num_vbs;
struct
{
struct tu_draw_state state;
@ -1127,7 +1121,6 @@ struct tu_pipeline
{
struct tu_draw_state state;
struct tu_draw_state binning_state;
uint32_t bindings_used;
} vi;
struct