radv: don't update misaligned_mask in CmdBindVertexBuffers2

This can't do it in the loop because it doesn't easily know what
attributes use a binding.

We could do it in a separate loop, but there's no point, especially since
zink does CmdSetVertexInputEXT() after CmdBindVertexBuffers2().

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fixes: c335a4d70e ("radv: dynamically calculate misaligned_mask for dynamic vertex input")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17521>
This commit is contained in:
Rhys Perry 2022-07-13 16:43:44 +01:00 committed by Marge Bot
parent 1d019d2ab7
commit 0fb3aa005f
3 changed files with 51 additions and 40 deletions

View File

@ -2875,8 +2875,24 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
uint32_t zero_divisors = state->zero_divisors & attribute_mask;
*nontrivial_divisors = state->nontrivial_divisors & attribute_mask;
enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0;
uint32_t misaligned_mask = cmd_buffer->state.vbo_misaligned_mask;
if (cmd_buffer->state.vbo_misaligned_mask_invalid) {
assert(device->physical_device->rad_info.gfx_level == GFX6 ||
device->physical_device->rad_info.gfx_level >= GFX10);
u_foreach_bit (index, cmd_buffer->state.vbo_misaligned_mask_invalid & attribute_mask) {
uint8_t binding = state->bindings[index];
if (!(cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(binding)))
continue;
uint8_t req = state->format_align_req_minus_1[index];
struct radv_vertex_binding *vb = &cmd_buffer->vertex_bindings[binding];
VkDeviceSize offset = vb->offset + state->offsets[index];
if ((offset & req) || (vb->stride & req))
misaligned_mask |= BITFIELD_BIT(index);
}
cmd_buffer->state.vbo_misaligned_mask = misaligned_mask;
cmd_buffer->state.vbo_misaligned_mask_invalid &= ~attribute_mask;
}
/* try to use a pre-compiled prolog first */
struct radv_shader_part *prolog = NULL;
@ -4849,50 +4865,45 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_VBS);
cmd_buffer->state.vbo_misaligned_mask = state->misaligned_mask;
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings)
cmd_buffer->used_vertex_bindings = firstBinding + bindingCount;
uint32_t misaligned_mask_invalid = 0;
for (uint32_t i = 0; i < bindingCount; i++) {
RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
uint32_t idx = firstBinding + i;
VkDeviceSize size = pSizes ? pSizes[i] : 0;
VkDeviceSize stride = pStrides ? pStrides[i] : 0;
/* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride;
if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer ||
(buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) ||
(vb[idx].stride & 0x3) != (stride & 0x3)))) {
misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff;
}
cmd_buffer->vertex_binding_buffers[idx] = buffer;
vb[idx].offset = pOffsets[i];
vb[idx].size = size;
vb[idx].stride = stride;
if (chip == GFX6 || chip >= GFX10) {
const uint32_t bit = 1u << idx;
if (!buffer) {
cmd_buffer->state.vbo_misaligned_mask &= ~bit;
cmd_buffer->state.vbo_bound_mask &= ~bit;
} else {
cmd_buffer->state.vbo_bound_mask |= bit;
if (pStrides && vb[idx].stride != stride) {
if (stride & state->format_align_req_minus_1[idx])
cmd_buffer->state.vbo_misaligned_mask |= bit;
else
cmd_buffer->state.vbo_misaligned_mask &= ~bit;
}
if (state->possibly_misaligned_mask & bit &&
(vb[idx].offset + state->offsets[idx]) & state->format_align_req_minus_1[idx])
cmd_buffer->state.vbo_misaligned_mask |= bit;
}
}
/* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
if (pStrides)
vb[idx].stride = stride;
uint32_t bit = BITFIELD_BIT(idx);
if (buffer) {
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->vertex_binding_buffers[idx]->bo);
cmd_buffer->state.vbo_bound_mask |= bit;
} else {
cmd_buffer->state.vbo_bound_mask &= ~bit;
}
}
if ((chip == GFX6 || chip >= GFX10) && misaligned_mask_invalid) {
cmd_buffer->state.vbo_misaligned_mask_invalid = misaligned_mask_invalid;
cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid;
}
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
}
@ -5797,8 +5808,10 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
bindings[pVertexBindingDescriptions[i].binding] = &pVertexBindingDescriptions[i];
cmd_buffer->state.vbo_misaligned_mask = 0;
cmd_buffer->state.vbo_misaligned_mask_invalid = 0;
memset(state, 0, sizeof(*state));
state->bindings_match_attrib = true;
enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
@ -5808,6 +5821,8 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
state->attribute_mask |= 1u << loc;
state->bindings[loc] = attrib->binding;
if (attrib->binding != loc)
state->bindings_match_attrib = false;
if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
state->instance_rate_inputs |= 1u << loc;
state->divisors[loc] = binding->divisor;
@ -5858,18 +5873,13 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
if (found->post_shuffle)
state->post_shuffle |= 1u << loc;
if (chip == GFX6 || chip >= GFX10) {
const struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
unsigned bit = 1u << loc;
if ((chip == GFX6 || chip >= GFX10) &&
cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding)) {
if (binding->stride & found->fmt_align_req_minus_1) {
state->misaligned_mask |= bit;
if (cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding))
cmd_buffer->state.vbo_misaligned_mask |= bit;
} else {
state->possibly_misaligned_mask |= bit;
if (cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding) &&
((vb[attrib->binding].offset + state->offsets[loc]) & found->fmt_align_req_minus_1))
cmd_buffer->state.vbo_misaligned_mask |= bit;
cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
} else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + state->offsets[loc]) &
found->fmt_align_req_minus_1) {
cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
}
}
}

View File

@ -1539,6 +1539,7 @@ struct radv_cmd_state {
uint32_t *emitted_vs_prolog_key;
uint32_t emitted_vs_prolog_key_hash;
uint32_t vbo_misaligned_mask;
uint32_t vbo_misaligned_mask_invalid;
uint32_t vbo_bound_mask;
/* Whether the cmdbuffer owns the current render pass rather than the app. */

View File

@ -380,8 +380,6 @@ struct radv_shader_info {
struct radv_vs_input_state {
uint32_t attribute_mask;
uint32_t misaligned_mask;
uint32_t possibly_misaligned_mask;
uint32_t instance_rate_inputs;
uint32_t nontrivial_divisors;
@ -399,6 +397,8 @@ struct radv_vs_input_state {
uint8_t formats[MAX_VERTEX_ATTRIBS];
uint8_t format_align_req_minus_1[MAX_VERTEX_ATTRIBS];
uint8_t format_sizes[MAX_VERTEX_ATTRIBS];
bool bindings_match_attrib;
};
struct radv_vs_prolog_key {