radv: add support for exporting pipelines with RGP

This is still experimental and only enabled with
RADV_THREAD_TRACE_PIPELINE to avoid breaking existing support.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9130>
This commit is contained in:
Samuel Pitoiset 2021-02-16 14:09:20 +01:00 committed by Marge Bot
parent 884e14d784
commit a72b7a9b6c
2 changed files with 363 additions and 0 deletions

View File

@ -22,6 +22,7 @@
*/
#include "radv_private.h"
#include "radv_shader.h"
#include "ac_rgp.h"
#include "ac_sqtt.h"
@ -723,6 +724,12 @@ void sqtt_CmdCopyQueryPoolResults(
radv_Cmd##cmd_name(__VA_ARGS__); \
radv_write_end_general_api_marker(cmd_buffer, ApiCmd##cmd_name);
static bool
radv_sqtt_dump_pipeline()
{
return getenv("RADV_THREAD_TRACE_PIPELINE");
}
void sqtt_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -957,4 +964,339 @@ VkResult sqtt_DebugMarkerSetObjectTagEXT(
return VK_SUCCESS;
}
/* Pipelines */
static enum rgp_hardware_stages
radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline,
gl_shader_stage stage)
{
struct radv_shader_variant *shader = pipeline->shaders[stage];
switch (stage) {
case MESA_SHADER_VERTEX:
if (shader->info.vs.as_ls)
return RGP_HW_STAGE_LS;
else if (shader->info.vs.as_es)
return RGP_HW_STAGE_ES;
else if (shader->info.is_ngg)
return RGP_HW_STAGE_GS;
else
return RGP_HW_STAGE_VS;
case MESA_SHADER_TESS_CTRL:
return RGP_HW_STAGE_HS;
case MESA_SHADER_TESS_EVAL:
if (shader->info.tes.as_es)
return RGP_HW_STAGE_ES;
else if (shader->info.is_ngg)
return RGP_HW_STAGE_GS;
else
return RGP_HW_STAGE_VS;
case MESA_SHADER_GEOMETRY:
return RGP_HW_STAGE_GS;
case MESA_SHADER_FRAGMENT:
return RGP_HW_STAGE_PS;
case MESA_SHADER_COMPUTE:
return RGP_HW_STAGE_CS;
default:
unreachable("invalid mesa shader stage");
}
}
static VkResult
radv_add_pso_correlation(struct radv_device *device,
struct radv_pipeline *pipeline)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
struct rgp_pso_correlation_record *record;
record = malloc(sizeof(struct rgp_pso_correlation_record));
if (!record)
return VK_ERROR_OUT_OF_HOST_MEMORY;
record->api_pso_hash = 0;
record->pipeline_hash[0] = pipeline->pipeline_hash;
record->pipeline_hash[1] = pipeline->pipeline_hash;
memset(record->api_level_obj_name, 0, sizeof(record->api_level_obj_name));
simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock);
list_addtail(&record->list, &pso_correlation->record);
pso_correlation->record_count++;
simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
return VK_SUCCESS;
}
static VkResult
radv_add_code_object_loader_event(struct radv_device *device,
struct radv_pipeline *pipeline)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
struct rgp_loader_events_record *record;
uint64_t base_va = ~0;
record = malloc(sizeof(struct rgp_loader_events_record));
if (!record)
return VK_ERROR_OUT_OF_HOST_MEMORY;
/* Find the lowest shader BO VA. */
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct radv_shader_variant *shader = pipeline->shaders[i];
uint64_t va;
if (!shader)
continue;
va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
base_va = MIN2(base_va, va);
}
record->loader_event_type = RGP_LOAD_TO_GPU_MEMORY;
record->reserved = 0;
record->base_address = base_va & 0xffffffffffff;
record->code_object_hash[0] = pipeline->pipeline_hash;
record->code_object_hash[1] = pipeline->pipeline_hash;
record->time_stamp = os_time_get_nano();
simple_mtx_lock(&loader_events->lock);
list_addtail(&record->list, &loader_events->record);
loader_events->record_count++;
simple_mtx_unlock(&loader_events->lock);
return VK_SUCCESS;
}
static VkResult
radv_add_code_object(struct radv_device *device,
struct radv_pipeline *pipeline)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
struct rgp_code_object_record *record;
record = malloc(sizeof(struct rgp_code_object_record));
if (!record)
return VK_ERROR_OUT_OF_HOST_MEMORY;
record->shader_stages_mask = 0;
record->num_shaders_combined = 0;
record->pipeline_hash[0] = pipeline->pipeline_hash;
record->pipeline_hash[1] = pipeline->pipeline_hash;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct radv_shader_variant *shader = pipeline->shaders[i];
uint8_t *code;
uint64_t va;
if (!shader)
continue;
code = malloc(shader->code_size);
if (!code) {
free(record);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(code, shader->code_ptr, shader->code_size);
va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
record->shader_data[i].code_size = shader->code_size;
record->shader_data[i].code = code;
record->shader_data[i].vgpr_count = shader->config.num_vgprs;
record->shader_data[i].sgpr_count = shader->config.num_sgprs;
record->shader_data[i].base_address = va & 0xffffffffffff;
record->shader_data[i].elf_symbol_offset = 0;
record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
record->shader_data[i].is_combined = false;
record->shader_stages_mask |= (1 << i);
record->num_shaders_combined++;
}
simple_mtx_lock(&code_object->lock);
list_addtail(&record->list, &code_object->record);
code_object->record_count++;
simple_mtx_unlock(&code_object->lock);
return VK_SUCCESS;
}
static VkResult
radv_register_pipeline(struct radv_device *device,
struct radv_pipeline *pipeline)
{
VkResult result;
result = radv_add_pso_correlation(device, pipeline);
if (result != VK_SUCCESS)
return result;
result = radv_add_code_object_loader_event(device, pipeline);
if (result != VK_SUCCESS)
return result;
result = radv_add_code_object(device, pipeline);
if (result != VK_SUCCESS)
return result;
return VK_SUCCESS;
}
static void
radv_unregister_pipeline(struct radv_device *device,
struct radv_pipeline *pipeline)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
/* Destroy the PSO correlation record. */
simple_mtx_lock(&pso_correlation->lock);
list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
&pso_correlation->record, list) {
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
pso_correlation->record_count--;
list_del(&record->list);
free(record);
break;
}
}
simple_mtx_unlock(&pso_correlation->lock);
/* Destroy the code object loader record. */
simple_mtx_lock(&loader_events->lock);
list_for_each_entry_safe(struct rgp_loader_events_record, record,
&loader_events->record, list) {
if (record->code_object_hash[0] == pipeline->pipeline_hash) {
loader_events->record_count--;
list_del(&record->list);
free(record);
break;
}
}
simple_mtx_unlock(&loader_events->lock);
/* Destroy the code object record. */
simple_mtx_lock(&code_object->lock);
list_for_each_entry_safe(struct rgp_code_object_record, record,
&code_object->record, list) {
if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
uint32_t mask = record->shader_stages_mask;
int i;
/* Free the disassembly. */
while (mask) {
i = u_bit_scan(&mask);
free(record->shader_data[i].code);
}
code_object->record_count--;
list_del(&record->list);
free(record);
break;
}
}
simple_mtx_unlock(&code_object->lock);
}
VkResult sqtt_CreateGraphicsPipelines(
VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result;
result = radv_CreateGraphicsPipelines(_device, pipelineCache, count,
pCreateInfos, pAllocator,
pPipelines);
if (result != VK_SUCCESS)
return result;
if (radv_sqtt_dump_pipeline()) {
for (unsigned i = 0; i < count; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
if (!pipeline)
continue;
result = radv_register_pipeline(device, pipeline);
if (result != VK_SUCCESS)
goto fail;
}
}
return VK_SUCCESS;
fail:
for (unsigned i = 0; i < count; i++) {
sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
pPipelines[i] = VK_NULL_HANDLE;
}
return result;
}
VkResult sqtt_CreateComputePipelines(
VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
RADV_FROM_HANDLE(radv_device, device, _device);
VkResult result;
result = radv_CreateComputePipelines(_device, pipelineCache, count,
pCreateInfos, pAllocator,
pPipelines);
if (result != VK_SUCCESS)
return result;
if (radv_sqtt_dump_pipeline()) {
for (unsigned i = 0; i < count; i++) {
RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
if (!pipeline)
continue;
result = radv_register_pipeline(device, pipeline);
if (result != VK_SUCCESS)
goto fail;
}
}
return VK_SUCCESS;
fail:
for (unsigned i = 0; i < count; i++) {
sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
pPipelines[i] = VK_NULL_HANDLE;
}
return result;
}
void sqtt_DestroyPipeline(
VkDevice _device,
VkPipeline _pipeline,
const VkAllocationCallbacks* pAllocator)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
if (!_pipeline)
return;
if (radv_sqtt_dump_pipeline())
radv_unregister_pipeline(device, pipeline);
radv_DestroyPipeline(_device, _pipeline, pAllocator);
}
#undef API_MARKER

View File

@ -432,6 +432,8 @@ radv_thread_trace_init_bo(struct radv_device *device)
bool
radv_thread_trace_init(struct radv_device *device)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
/* Default buffer size set to 1MB per SE. */
device->thread_trace.buffer_size =
radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
@ -444,12 +446,22 @@ radv_thread_trace_init(struct radv_device *device)
if (!radv_thread_trace_init_bo(device))
return false;
list_inithead(&thread_trace_data->rgp_pso_correlation.record);
simple_mtx_init(&thread_trace_data->rgp_pso_correlation.lock, mtx_plain);
list_inithead(&thread_trace_data->rgp_loader_events.record);
simple_mtx_init(&thread_trace_data->rgp_loader_events.lock, mtx_plain);
list_inithead(&thread_trace_data->rgp_code_object.record);
simple_mtx_init(&thread_trace_data->rgp_code_object.lock, mtx_plain);
return true;
}
void
radv_thread_trace_finish(struct radv_device *device)
{
struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
struct radeon_winsys *ws = device->ws;
if (unlikely(device->thread_trace.bo))
@ -461,6 +473,15 @@ radv_thread_trace_finish(struct radv_device *device)
if (device->thread_trace.stop_cs[i])
ws->cs_destroy(device->thread_trace.stop_cs[i]);
}
assert(thread_trace_data->rgp_pso_correlation.record_count == 0);
simple_mtx_destroy(&thread_trace_data->rgp_pso_correlation.lock);
assert(thread_trace_data->rgp_loader_events.record_count == 0);
simple_mtx_destroy(&thread_trace_data->rgp_loader_events.lock);
assert(thread_trace_data->rgp_code_object.record_count == 0);
simple_mtx_destroy(&thread_trace_data->rgp_code_object.lock);
}
static bool