radv: Create internal cmdbuf when a graphics pipeline needs compute.

This is mainly going to be used by task shaders, because
the HW implementation mismatches the API:

- In the API, task shaders are considered graphics shaders which
  are part of a graphics pipeline and the draws are submitted to
  a graphics queue.
- The HW requires the driver to dispatch task shaders on
  an async compute queue.

When a pipeline is bound that has a task shader, create a
driver-internal ACE (async compute engine) cmdbuf which
we are going to submit to an ACE queue.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16531>
This commit is contained in:
Timur Kristóf 2022-01-22 10:10:41 +01:00 committed by Marge Bot
parent 44a71594c8
commit ef07c3272a
2 changed files with 49 additions and 0 deletions

View File

@ -421,6 +421,8 @@ radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->cs)
cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
if (cmd_buffer->ace_internal.cs)
cmd_buffer->device->ws->cs_destroy(cmd_buffer->ace_internal.cs);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set;
@ -490,6 +492,8 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
vk_command_buffer_reset(&cmd_buffer->vk);
cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
if (cmd_buffer->ace_internal.cs)
cmd_buffer->device->ws->cs_reset(cmd_buffer->ace_internal.cs);
list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
{
@ -686,6 +690,30 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
}
static struct radeon_cmdbuf *
radv_ace_internal_create(struct radv_cmd_buffer *cmd_buffer)
{
assert(!cmd_buffer->ace_internal.cs);
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *ace_cs = device->ws->cs_create(device->ws, AMD_IP_COMPUTE);
if (!ace_cs) {
cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
}
return ace_cs;
}
static VkResult
radv_ace_internal_finalize(struct radv_cmd_buffer *cmd_buffer)
{
assert(cmd_buffer->ace_internal.cs);
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
return device->ws->cs_finalize(ace_cs);
}
static void
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
{
@ -5247,6 +5275,13 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
if (cmd_buffer->gds_needed)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
/* Finalize the internal compute command stream, if it exists. */
if (cmd_buffer->ace_internal.cs) {
VkResult result = radv_ace_internal_finalize(cmd_buffer);
if (result != VK_SUCCESS)
return vk_error(cmd_buffer, result);
}
si_emit_cache_flush(cmd_buffer);
}
@ -5402,6 +5437,12 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
pipeline->shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TASK)) {
if (!cmd_buffer->ace_internal.cs) {
cmd_buffer->ace_internal.cs = radv_ace_internal_create(cmd_buffer);
if (!cmd_buffer->ace_internal.cs)
return;
}
cmd_buffer->task_rings_needed = true;
}
break;

View File

@ -1638,6 +1638,14 @@ struct radv_cmd_buffer {
uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
struct {
/**
* Internal command stream that is used when some graphics work
* also requires a submission to the compute queue.
*/
struct radeon_cmdbuf *cs;
} ace_internal;
/**
* Whether a query pool has been resetted and we have to flush caches.
*/