diff --git a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c index 8c7ffa18f2f..b024cd73c01 100644 --- a/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c +++ b/src/gallium/frontends/lavapipe/lvp_cmd_buffer.c @@ -1642,3 +1642,125 @@ void lvp_CmdPushDescriptorSetWithTemplateKHR( } cmd_buf_queue(cmd_buffer, cmd); } + +void lvp_CmdBindTransformFeedbackBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes) +{ + LVP_FROM_HANDLE(lvp_cmd_buffer, cmd_buffer, commandBuffer); + struct lvp_cmd_buffer_entry *cmd; + uint32_t cmd_size = 0; + + cmd_size += bindingCount * (sizeof(struct lvp_buffer *) + sizeof(VkDeviceSize) * 2); + + cmd = cmd_buf_entry_alloc_size(cmd_buffer, cmd_size, LVP_CMD_BIND_TRANSFORM_FEEDBACK_BUFFERS); + if (!cmd) + return; + + cmd->u.bind_transform_feedback_buffers.first_binding = firstBinding; + cmd->u.bind_transform_feedback_buffers.binding_count = bindingCount; + cmd->u.bind_transform_feedback_buffers.buffers = (struct lvp_buffer **)(cmd + 1); + cmd->u.bind_transform_feedback_buffers.offsets = (VkDeviceSize *)(cmd->u.bind_transform_feedback_buffers.buffers + bindingCount); + cmd->u.bind_transform_feedback_buffers.sizes = (VkDeviceSize *)(cmd->u.bind_transform_feedback_buffers.offsets + bindingCount); + + for (unsigned i = 0; i < bindingCount; i++) { + cmd->u.bind_transform_feedback_buffers.buffers[i] = lvp_buffer_from_handle(pBuffers[i]); + cmd->u.bind_transform_feedback_buffers.offsets[i] = pOffsets[i]; + cmd->u.bind_transform_feedback_buffers.sizes[i] = pSizes[i]; + } + cmd_buf_queue(cmd_buffer, cmd); +} + +void lvp_CmdBeginTransformFeedbackEXT( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets) +{ + LVP_FROM_HANDLE(lvp_cmd_buffer, cmd_buffer, commandBuffer); + struct lvp_cmd_buffer_entry *cmd; + uint32_t cmd_size = 0; + + cmd_size += counterBufferCount * (sizeof(struct lvp_buffer *) + sizeof(VkDeviceSize)); + + cmd = cmd_buf_entry_alloc_size(cmd_buffer, cmd_size, LVP_CMD_BEGIN_TRANSFORM_FEEDBACK); + if (!cmd) + return; + + cmd->u.begin_transform_feedback.first_counter_buffer = firstCounterBuffer; + cmd->u.begin_transform_feedback.counter_buffer_count = counterBufferCount; + cmd->u.begin_transform_feedback.counter_buffers = (struct lvp_buffer **)(cmd + 1); + cmd->u.begin_transform_feedback.counter_buffer_offsets = (VkDeviceSize *)(cmd->u.begin_transform_feedback.counter_buffers + counterBufferCount); + + for (unsigned i = 0; i < counterBufferCount; i++) { + cmd->u.begin_transform_feedback.counter_buffers[i] = lvp_buffer_from_handle(pCounterBuffers[i]); + if (pCounterBufferOffsets) + cmd->u.begin_transform_feedback.counter_buffer_offsets[i] = pCounterBufferOffsets[i]; + else + cmd->u.begin_transform_feedback.counter_buffer_offsets[i] = 0; + } + cmd_buf_queue(cmd_buffer, cmd); +} + +void lvp_CmdEndTransformFeedbackEXT( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets) +{ + LVP_FROM_HANDLE(lvp_cmd_buffer, cmd_buffer, commandBuffer); + struct lvp_cmd_buffer_entry *cmd; + uint32_t cmd_size = 0; + + cmd_size += counterBufferCount * (sizeof(struct lvp_buffer *) + sizeof(VkDeviceSize)); + + cmd = cmd_buf_entry_alloc_size(cmd_buffer, cmd_size, LVP_CMD_END_TRANSFORM_FEEDBACK); + if (!cmd) + return; + + cmd->u.begin_transform_feedback.first_counter_buffer = firstCounterBuffer; + cmd->u.begin_transform_feedback.counter_buffer_count = counterBufferCount; + cmd->u.begin_transform_feedback.counter_buffers = (struct lvp_buffer **)(cmd + 1); + cmd->u.begin_transform_feedback.counter_buffer_offsets = (VkDeviceSize *)(cmd->u.begin_transform_feedback.counter_buffers + counterBufferCount); + + for (unsigned i = 0; i < counterBufferCount; i++) { + cmd->u.begin_transform_feedback.counter_buffers[i] = lvp_buffer_from_handle(pCounterBuffers[i]); + if (pCounterBufferOffsets) + cmd->u.begin_transform_feedback.counter_buffer_offsets[i] = pCounterBufferOffsets[i]; + else + cmd->u.begin_transform_feedback.counter_buffer_offsets[i] = 0; + } + cmd_buf_queue(cmd_buffer, cmd); +} + +void lvp_CmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride) +{ + LVP_FROM_HANDLE(lvp_cmd_buffer, cmd_buffer, commandBuffer); + struct lvp_cmd_buffer_entry *cmd; + + cmd = cmd_buf_entry_alloc(cmd_buffer, LVP_CMD_DRAW_INDIRECT_BYTE_COUNT); + if (!cmd) + return; + + cmd->u.draw_indirect_byte_count.instance_count = instanceCount; + cmd->u.draw_indirect_byte_count.first_instance = firstInstance; + cmd->u.draw_indirect_byte_count.counter_buffer = lvp_buffer_from_handle(counterBuffer); + cmd->u.draw_indirect_byte_count.counter_buffer_offset = counterBufferOffset; + cmd->u.draw_indirect_byte_count.counter_offset = counterOffset; + cmd->u.draw_indirect_byte_count.vertex_stride = vertexStride; + + cmd_buf_queue(cmd_buffer, cmd); +} diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 063e63e13c2..93426cea1f6 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -392,7 +392,14 @@ void lvp_GetPhysicalDeviceFeatures2( features->indexTypeUint8 = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { + VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = + (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext; + features->transformFeedback = true; + features->geometryStreams = true; + break; + } default: break; } @@ -606,6 +613,20 @@ void lvp_GetPhysicalDeviceProperties2( props->maxVertexAttribDivisor = 1; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { + VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties = + (VkPhysicalDeviceTransformFeedbackPropertiesEXT*)ext; + properties->maxTransformFeedbackStreams = pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_MAX_VERTEX_STREAMS); + properties->maxTransformFeedbackBuffers = pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS); + properties->maxTransformFeedbackBufferSize = UINT32_MAX; + properties->maxTransformFeedbackStreamDataSize = 512; + properties->maxTransformFeedbackBufferDataSize = 512; + properties->maxTransformFeedbackBufferDataStride = 512; + properties->transformFeedbackQueries = true; + properties->transformFeedbackStreamsLinesTriangles = false; + properties->transformFeedbackRasterizationStreamSelect = false; + properties->transformFeedbackDraw = true; + } default: break; } diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 6f1c4ef5270..f03eed98144 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -125,6 +125,10 @@ struct rendering_state { const struct lvp_attachment_state *attachments; VkImageAspectFlags *pending_clear_aspects; int num_pending_aspects; + + uint32_t num_so_targets; + struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; + uint32_t so_offsets[PIPE_MAX_SO_BUFFERS]; }; static void emit_compute_state(struct rendering_state *state) @@ -2422,6 +2426,81 @@ static void handle_push_descriptor_set(struct lvp_cmd_buffer_entry *cmd, } } +static void handle_bind_transform_feedback_buffers(struct lvp_cmd_buffer_entry *cmd, + struct rendering_state *state) +{ + struct lvp_cmd_bind_transform_feedback_buffers *btfb = &cmd->u.bind_transform_feedback_buffers; + + for (unsigned i = 0; i < btfb->binding_count; i++) { + int idx = i + btfb->first_binding; + if (state->so_targets[idx]) + state->pctx->stream_output_target_destroy(state->pctx, state->so_targets[idx]); + + state->so_targets[idx] = state->pctx->create_stream_output_target(state->pctx, + btfb->buffers[i]->bo, + btfb->offsets[i], + btfb->sizes[i]); + } + state->num_so_targets = btfb->first_binding + btfb->binding_count; +} + +static void handle_begin_transform_feedback(struct lvp_cmd_buffer_entry *cmd, + struct rendering_state *state) +{ + struct lvp_cmd_begin_transform_feedback *btf = &cmd->u.begin_transform_feedback; + uint32_t offsets[4]; + + memset(offsets, 0, sizeof(uint32_t)*4); + + for (unsigned i = 0; i < btf->counter_buffer_count; i++) { + pipe_buffer_read(state->pctx, + btf->counter_buffers[i]->bo, + btf->counter_buffer_offsets[i], + 4, + &offsets[i]); + } + state->pctx->set_stream_output_targets(state->pctx, state->num_so_targets, + state->so_targets, offsets); +} + +static void handle_end_transform_feedback(struct lvp_cmd_buffer_entry *cmd, + struct rendering_state *state) +{ + struct lvp_cmd_end_transform_feedback *etf = &cmd->u.end_transform_feedback; + + if (etf->counter_buffer_count) { + for (unsigned i = 0; i < etf->counter_buffer_count; i++) { + uint32_t offset; + offset = state->pctx->stream_output_target_offset(state->so_targets[i]); + + pipe_buffer_write(state->pctx, + etf->counter_buffers[i]->bo, + etf->counter_buffer_offsets[i], + 4, + &offset); + } + } + state->pctx->set_stream_output_targets(state->pctx, 0, NULL, NULL); +} + +static void handle_draw_indirect_byte_count(struct lvp_cmd_buffer_entry *cmd, + struct rendering_state *state) +{ + struct lvp_cmd_draw_indirect_byte_count *dibc = &cmd->u.draw_indirect_byte_count; + + pipe_buffer_read(state->pctx, + dibc->counter_buffer->bo, + dibc->counter_buffer->offset + dibc->counter_buffer_offset, + 4, &state->draw.count); + + state->info.start_instance = cmd->u.draw_indirect_byte_count.first_instance; + state->info.instance_count = cmd->u.draw_indirect_byte_count.instance_count; + state->info.index_size = 0; + + state->draw.count /= cmd->u.draw_indirect_byte_count.vertex_stride; + state->pctx->draw_vbo(state->pctx, &state->info, &state->indirect_info, &state->draw, 1); +} + static void lvp_execute_cmd_buffer(struct lvp_cmd_buffer *cmd_buffer, struct rendering_state *state) { @@ -2576,6 +2655,19 @@ static void lvp_execute_cmd_buffer(struct lvp_cmd_buffer *cmd_buffer, case LVP_CMD_PUSH_DESCRIPTOR_SET: handle_push_descriptor_set(cmd, state); break; + case LVP_CMD_BIND_TRANSFORM_FEEDBACK_BUFFERS: + handle_bind_transform_feedback_buffers(cmd, state); + break; + case LVP_CMD_BEGIN_TRANSFORM_FEEDBACK: + handle_begin_transform_feedback(cmd, state); + break; + case LVP_CMD_END_TRANSFORM_FEEDBACK: + handle_end_transform_feedback(cmd, state); + break; + case LVP_CMD_DRAW_INDIRECT_BYTE_COUNT: + emit_state(state); + handle_draw_indirect_byte_count(cmd, state); + break; } } } diff --git a/src/gallium/frontends/lavapipe/lvp_extensions.py b/src/gallium/frontends/lavapipe/lvp_extensions.py index c06e04064aa..2189fcd5453 100644 --- a/src/gallium/frontends/lavapipe/lvp_extensions.py +++ b/src/gallium/frontends/lavapipe/lvp_extensions.py @@ -139,7 +139,7 @@ EXTENSIONS = [ Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_shader_subgroup_ballot', 1, False), Extension('VK_EXT_shader_subgroup_vote', 1, False), - Extension('VK_EXT_transform_feedback', 1, False), + Extension('VK_EXT_transform_feedback', 1, True), Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_EXT_ycbcr_image_arrays', 1, False), Extension('VK_GOOGLE_decorate_string', 1, True), diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 194c9ef9cf3..b83e0c655b4 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -29,6 +29,7 @@ #include "lvp_lower_vulkan_resource.h" #include "pipe/p_state.h" #include "pipe/p_context.h" +#include "nir/nir_xfb_info.h" #define SPIR_V_MAGIC_NUMBER 0x07230203 @@ -505,6 +506,8 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, .variable_pointers = true, .stencil_export = true, .post_depth_coverage = true, + .transform_feedback = true, + .geometry_streams = true, }, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, @@ -699,6 +702,39 @@ lvp_pipeline_compile(struct lvp_pipeline *pipeline, } else { struct pipe_shader_state shstate = {}; fill_shader_prog(&shstate, stage, pipeline); + + nir_xfb_info *xfb_info = NULL; + if (stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_TESS_EVAL) { + xfb_info = nir_gather_xfb_info(pipeline->pipeline_nir[stage], NULL); + if (xfb_info) { + unsigned num_outputs = 0; + uint8_t output_mapping[VARYING_SLOT_TESS_MAX]; + memset(output_mapping, 0, sizeof(output_mapping)); + + for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { + if (pipeline->pipeline_nir[stage]->info.outputs_written & BITFIELD64_BIT(attr)) + output_mapping[attr] = num_outputs++; + } + + shstate.stream_output.num_outputs = xfb_info->output_count; + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + if (xfb_info->buffers_written & (1 << i)) { + shstate.stream_output.stride[i] = xfb_info->buffers[i].stride / 4; + } + } + for (unsigned i = 0; i < xfb_info->output_count; i++) { + shstate.stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; + shstate.stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; + shstate.stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; + shstate.stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); + shstate.stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1; + shstate.stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; + } + } + } + switch (stage) { case MESA_SHADER_FRAGMENT: pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate); diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 85c10406071..6860373bc5f 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -660,6 +660,10 @@ enum lvp_cmds { LVP_CMD_DRAW_INDIRECT_COUNT, LVP_CMD_DRAW_INDEXED_INDIRECT_COUNT, LVP_CMD_PUSH_DESCRIPTOR_SET, + LVP_CMD_BIND_TRANSFORM_FEEDBACK_BUFFERS, + LVP_CMD_BEGIN_TRANSFORM_FEEDBACK, + LVP_CMD_END_TRANSFORM_FEEDBACK, + LVP_CMD_DRAW_INDIRECT_BYTE_COUNT, }; struct lvp_cmd_bind_pipeline { @@ -949,6 +953,37 @@ struct lvp_cmd_push_descriptor_set { union lvp_descriptor_info *infos; }; +struct lvp_cmd_bind_transform_feedback_buffers { + uint32_t first_binding; + uint32_t binding_count; + struct lvp_buffer **buffers; + VkDeviceSize *offsets; + VkDeviceSize *sizes; +}; + +struct lvp_cmd_begin_transform_feedback { + uint32_t first_counter_buffer; + uint32_t counter_buffer_count; + struct lvp_buffer **counter_buffers; + VkDeviceSize *counter_buffer_offsets; +}; + +struct lvp_cmd_end_transform_feedback { + uint32_t first_counter_buffer; + uint32_t counter_buffer_count; + struct lvp_buffer **counter_buffers; + VkDeviceSize *counter_buffer_offsets; +}; + +struct lvp_cmd_draw_indirect_byte_count { + uint32_t instance_count; + uint32_t first_instance; + struct lvp_buffer *counter_buffer; + VkDeviceSize counter_buffer_offset; + uint32_t counter_offset; + uint32_t vertex_stride; +}; + struct lvp_cmd_buffer_entry { struct list_head cmd_link; uint32_t cmd_type; @@ -991,6 +1026,10 @@ struct lvp_cmd_buffer_entry { struct lvp_cmd_execute_commands execute_commands; struct lvp_cmd_draw_indirect_count draw_indirect_count; struct lvp_cmd_push_descriptor_set push_descriptor_set; + struct lvp_cmd_bind_transform_feedback_buffers bind_transform_feedback_buffers; + struct lvp_cmd_begin_transform_feedback begin_transform_feedback; + struct lvp_cmd_end_transform_feedback end_transform_feedback; + struct lvp_cmd_draw_indirect_byte_count draw_indirect_byte_count; } u; }; diff --git a/src/gallium/frontends/lavapipe/lvp_query.c b/src/gallium/frontends/lavapipe/lvp_query.c index f807234ffb2..2d1856e8bc6 100644 --- a/src/gallium/frontends/lavapipe/lvp_query.c +++ b/src/gallium/frontends/lavapipe/lvp_query.c @@ -40,6 +40,9 @@ VkResult lvp_CreateQueryPool( case VK_QUERY_TYPE_TIMESTAMP: pipeq = PIPE_QUERY_TIMESTAMP; break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + pipeq = PIPE_QUERY_SO_STATISTICS; + break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: pipeq = PIPE_QUERY_PIPELINE_STATISTICS; break; @@ -126,6 +129,11 @@ VkResult lvp_GetQueryPoolResults( *(uint64_t *)dptr = pstats[i]; dptr += 8; } + } else if (pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) { + *(uint64_t *)dptr = result.so_statistics.num_primitives_written; + dptr += 8; + *(uint64_t *)dptr = result.so_statistics.primitives_storage_needed; + dptr += 8; } else { *(uint64_t *)dptr = result.u64; dptr += 8; @@ -147,6 +155,17 @@ VkResult lvp_GetQueryPoolResults( *(uint32_t *)dptr = pstats[i]; dptr += 4; } + } else if (pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) { + if (result.so_statistics.num_primitives_written > UINT32_MAX) + *(uint32_t *)dptr = UINT32_MAX; + else + *(uint32_t *)dptr = (uint32_t)result.so_statistics.num_primitives_written; + dptr += 4; + if (result.so_statistics.primitives_storage_needed > UINT32_MAX) + *(uint32_t *)dptr = UINT32_MAX; + else + *(uint32_t *)dptr = (uint32_t)result.so_statistics.primitives_storage_needed; + dptr += 4; } else { if (result.u64 > UINT32_MAX) *(uint32_t *)dptr = UINT32_MAX;