panvk: Implement indexed rendering

Since we can do 8-bit index buffers, also advertise VK_EXT_type_uint8.

Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15295>
This commit is contained in:
Boris Brezillon 2022-03-10 17:27:02 -06:00 committed by Jason Ekstrand
parent a08b695386
commit 58587c32cb
7 changed files with 199 additions and 60 deletions

View File

@ -10,6 +10,10 @@ include = [
"dEQP-VK.glsl.builtin.function.integer.usubborrow.*",
"dEQP-VK.glsl.builtin.precision.frexp.*",
"dEQP-VK.glsl.builtin.precision.ldexp.*",
"dEQP-VK.glsl.conversions.*",
"dEQP-VK.glsl.derivate.*.constant.*",
"dEQP-VK.glsl.derivate.*.linear.*",
"dEQP-VK.glsl.derivate.*.uniform_*",
"dEQP-VK.image.load_store.with_format.*",
"dEQP-VK.pipeline.sampler.view_type.*.format.r*.address_modes.all_mode_clamp_to_border*",
"dEQP-VK.ssbo.layout.single_basic_type.*",

View File

@ -63,7 +63,27 @@ panvk_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
VkDeviceSize offset,
VkIndexType indexType)
{
panvk_stub();
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, buf, buffer);
cmdbuf->state.ib.buffer = buf;
cmdbuf->state.ib.offset = offset;
switch (indexType) {
case VK_INDEX_TYPE_UINT16:
cmdbuf->state.ib.index_size = 16;
break;
case VK_INDEX_TYPE_UINT32:
cmdbuf->state.ib.index_size = 32;
break;
case VK_INDEX_TYPE_NONE_KHR:
cmdbuf->state.ib.index_size = 0;
break;
case VK_INDEX_TYPE_UINT8_EXT:
cmdbuf->state.ib.index_size = 8;
break;
default:
unreachable("Invalid index type\n");
}
}
void
@ -530,17 +550,6 @@ panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf)
return cmdbuf->state.batch;
}
void
panvk_CmdDrawIndexed(VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance)
{
panvk_stub();
}
void
panvk_CmdDrawIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,

View File

@ -151,6 +151,7 @@ panvk_get_device_extensions(const struct panvk_physical_device *device,
.KHR_swapchain = true,
#endif
.EXT_custom_border_color = true,
.EXT_index_type_uint8 = true,
};
}

View File

@ -497,7 +497,8 @@ enum panvk_dynamic_state_bits {
PANVK_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
PANVK_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
PANVK_DYNAMIC_SSBO = 1 << 10,
PANVK_DYNAMIC_ALL = (1 << 11) - 1,
PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS = 1 << 11,
PANVK_DYNAMIC_ALL = (1 << 12) - 1,
};
struct panvk_descriptor_state {
@ -523,8 +524,10 @@ struct panvk_descriptor_state {
struct panvk_draw_info {
unsigned first_index;
unsigned index_count;
unsigned index_size;
unsigned first_vertex;
unsigned vertex_count;
unsigned vertex_range;
unsigned padded_vertex_count;
unsigned first_instance;
unsigned instance_count;
@ -542,6 +545,7 @@ struct panvk_draw_info {
mali_ptr samplers;
mali_ptr ubos;
mali_ptr position;
mali_ptr indices;
union {
mali_ptr psiz;
float line_width;
@ -626,10 +630,8 @@ struct panvk_cmd_state {
struct {
struct panvk_buffer *buffer;
uint64_t offset;
uint32_t type;
uint32_t max_index_count;
uint8_t index_size;
uint64_t index_va;
uint32_t first_vertex, base_vertex, base_instance;
} ib;
struct {

View File

@ -339,8 +339,9 @@ panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf,
panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data);
break;
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
/* TODO: support base_{vertex,instance} */
data->u32[0] = data->u32[1] = data->u32[2] = 0;
data->u32[0] = cmdbuf->state.ib.first_vertex;
data->u32[1] = cmdbuf->state.ib.base_vertex;
data->u32[2] = cmdbuf->state.ib.base_instance;
break;
case PAN_SYSVAL_BLEND_CONSTANTS:
memcpy(data->f32, cmdbuf->state.blend.constants, sizeof(data->f32));
@ -639,7 +640,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
draw->vertex_count);
draw->padded_vertex_count * draw->instance_count);
unsigned buf_count = panvk_varyings_buf_count(varyings);
struct panfrost_ptr bufs =
@ -756,7 +757,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
unsigned num_imgs =
pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ?
pipeline->layout->num_imgs : 0;
unsigned attrib_count = pipeline->attribs.buf_count + num_imgs;
unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
if (desc_state->vs_attribs || !attrib_count)
return;
@ -768,7 +769,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
return;
}
unsigned attrib_buf_count = attrib_count * 2;
unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
struct panfrost_ptr bufs =
pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
attrib_buf_count + (PAN_ARCH >= 6 ? 1 : 0),
@ -880,18 +881,10 @@ panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
}
void
panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance)
static void
panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf,
struct panvk_draw_info *draw)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
if (instanceCount == 0 || vertexCount == 0)
return;
struct panvk_batch *batch = cmdbuf->state.batch;
struct panvk_cmd_bind_point_state *bind_point_state =
panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
@ -911,6 +904,17 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
if (cmdbuf->state.ib.first_vertex != draw->offset_start ||
cmdbuf->state.ib.base_vertex != base_vertex ||
cmdbuf->state.ib.base_vertex != draw->first_instance) {
cmdbuf->state.ib.base_vertex = base_vertex;
cmdbuf->state.ib.base_instance = draw->first_instance;
cmdbuf->state.ib.first_vertex = draw->offset_start;
cmdbuf->state.dirty |= PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS;
}
panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
@ -919,48 +923,150 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
struct panvk_descriptor_state *desc_state =
panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
struct panvk_draw_info draw = {
.first_vertex = firstVertex,
.vertex_count = vertexCount,
.first_instance = firstInstance,
.instance_count = instanceCount,
.padded_vertex_count = panfrost_padded_vertex_count(vertexCount),
.offset_start = firstVertex,
.tls = batch->tls.gpu,
.fb = batch->fb.desc.gpu,
.ubos = desc_state->ubos,
.textures = desc_state->textures,
.samplers = desc_state->samplers,
};
draw->tls = batch->tls.gpu;
draw->fb = batch->fb.desc.gpu;
draw->ubos = desc_state->ubos;
draw->textures = desc_state->textures;
draw->samplers = desc_state->samplers;
STATIC_ASSERT(sizeof(draw.invocation) >= sizeof(struct mali_invocation_packed));
panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw.invocation,
1, vertexCount, instanceCount, 1, 1, 1, true, false);
panvk_draw_prepare_fs_rsd(cmdbuf, &draw);
panvk_draw_prepare_varyings(cmdbuf, &draw);
panvk_draw_prepare_attributes(cmdbuf, &draw);
panvk_draw_prepare_viewport(cmdbuf, &draw);
panvk_draw_prepare_tiler_context(cmdbuf, &draw);
panvk_draw_prepare_vertex_job(cmdbuf, &draw);
panvk_draw_prepare_tiler_job(cmdbuf, &draw);
STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed));
panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation,
1, draw->vertex_range, draw->instance_count,
1, 1, 1, true, false);
panvk_draw_prepare_fs_rsd(cmdbuf, draw);
panvk_draw_prepare_varyings(cmdbuf, draw);
panvk_draw_prepare_attributes(cmdbuf, draw);
panvk_draw_prepare_viewport(cmdbuf, draw);
panvk_draw_prepare_tiler_context(cmdbuf, draw);
panvk_draw_prepare_vertex_job(cmdbuf, draw);
panvk_draw_prepare_tiler_job(cmdbuf, draw);
batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
assert(!pipeline->wls_size);
unsigned vjob_id =
panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
&draw.jobs.vertex, false);
&draw->jobs.vertex, false);
if (pipeline->fs.required) {
panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
MALI_JOB_TYPE_TILER, false, false, vjob_id, 0,
&draw.jobs.tiler, false);
&draw->jobs.tiler, false);
}
/* Clear the dirty flags all at once */
desc_state->dirty = cmdbuf->state.dirty = 0;
}
void
panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
if (instanceCount == 0 || vertexCount == 0)
return;
struct panvk_draw_info draw = {
.first_vertex = firstVertex,
.vertex_count = vertexCount,
.vertex_range = vertexCount,
.first_instance = firstInstance,
.instance_count = instanceCount,
.padded_vertex_count = instanceCount > 1 ?
panfrost_padded_vertex_count(vertexCount) :
vertexCount,
.offset_start = firstVertex,
};
panvk_cmd_draw(cmdbuf, &draw);
}
static void
panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf,
uint32_t start, uint32_t count,
uint32_t *min, uint32_t *max)
{
void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu +
cmdbuf->state.ib.buffer->bo_offset +
cmdbuf->state.ib.offset;
fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
assert(cmdbuf->state.ib.buffer);
assert(cmdbuf->state.ib.buffer->bo);
assert(cmdbuf->state.ib.buffer->bo->ptr.cpu);
*max = 0;
/* TODO: Use panfrost_minmax_cache */
/* TODO: Read full cacheline of data to mitigate the uncached
* mapping slowness.
*/
switch (cmdbuf->state.ib.index_size) {
#define MINMAX_SEARCH_CASE(sz) \
case sz: { \
uint ## sz ## _t *indices = ptr; \
*min = UINT ## sz ## _MAX; \
for (uint32_t i = 0; i < count; i++) { \
*min = MIN2(indices[i + start], *min); \
*max = MAX2(indices[i + start], *max); \
} \
break; \
}
MINMAX_SEARCH_CASE(32)
MINMAX_SEARCH_CASE(16)
MINMAX_SEARCH_CASE(8)
#undef MINMAX_SEARCH_CASE
default:
unreachable("Invalid index size");
}
}
void
panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
uint32_t min_vertex, max_vertex;
if (instanceCount == 0 || indexCount == 0)
return;
panvk_index_minmax_search(cmdbuf, firstIndex, indexCount,
&min_vertex, &max_vertex);
unsigned vertex_range = max_vertex - min_vertex + 1;
struct panvk_draw_info draw = {
.index_size = cmdbuf->state.ib.index_size,
.first_index = firstIndex,
.index_count = indexCount,
.vertex_offset = vertexOffset,
.first_instance = firstInstance,
.instance_count = instanceCount,
.vertex_range = vertex_range,
.vertex_count = indexCount + abs(vertexOffset),
.padded_vertex_count = instanceCount > 1 ?
panfrost_padded_vertex_count(vertex_range) :
vertex_range,
.offset_start = min_vertex + vertexOffset,
.indices = cmdbuf->state.ib.buffer->bo->ptr.gpu +
cmdbuf->state.ib.buffer->bo_offset +
cmdbuf->state.ib.offset +
(firstIndex * (cmdbuf->state.ib.index_size / 8)),
};
panvk_cmd_draw(cmdbuf, &draw);
}
VkResult
panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
{

View File

@ -350,7 +350,7 @@ panvk_emit_attrib(const struct panvk_device *dev,
pan_pack(attrib, ATTRIBUTE, cfg) {
cfg.buffer_index = buf_idx * 2;
cfg.offset = attribs->attrib[idx].offset +
(bufs[cfg.buffer_index].address & 63);
(bufs[buf_idx].address & 63);
if (buf_info->per_instance)
cfg.offset += draw->first_instance * buf_info->stride;
@ -513,8 +513,22 @@ panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
if (pipeline->ia.primitive_restart)
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
cfg.job_task_split = 6;
/* TODO: indexed draws */
cfg.index_count = draw->vertex_count;
if (draw->index_size) {
cfg.index_count = draw->index_count;
cfg.indices = draw->indices;
cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
switch (draw->index_size) {
case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break;
case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break;
case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break;
default: unreachable("Invalid index size");
}
} else {
cfg.index_count = draw->vertex_count;
cfg.index_type = MALI_INDEX_TYPE_NONE;
}
}
}

View File

@ -244,6 +244,9 @@ panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *buil
case PAN_SYSVAL_SSBO:
pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_SSBO;
break;
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS;
break;
default:
break;
}