turnip: implement VK_KHR_pipeline_executable_properties

Loosely based on ANV implementation.

For executable's internal representation we output:
- Initial NIR after spirv_to_nir
- Final optimized NIR
- IR3 disassembly

Note, that vkGetPipelineExecutablePropertiesKHR is required to
return executable properties even if pipeline was not created with
CAPTURE_STATISTICS or CAPTURE_INTERNAL_REPRESENTATIONS bits set.
So the executables array is unconditionally populated, however
NIR and IR3 disassemlies are filled only when
CAPTURE_INTERNAL_REPRESENTATIONS is set.

Passes dEQP-VK.pipeline.executable_properties.*
Works with RenderDoc.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8877>
This commit is contained in:
Danylo Piliaiev 2021-02-04 15:12:35 +02:00 committed by Marge Bot
parent 2bff8fd53b
commit 56909868cd
8 changed files with 363 additions and 11 deletions

View File

@ -489,7 +489,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_get_surface_capabilities2 DONE (anv, lvp, radv, tu)
VK_KHR_incremental_present DONE (anv, lvp, radv, tu)
VK_KHR_performance_query DONE (anv/gen8+, tu)
VK_KHR_pipeline_executable_properties DONE (anv, radv)
VK_KHR_pipeline_executable_properties DONE (anv, radv, tu)
VK_KHR_push_descriptor DONE (anv, lvp, radv, tu)
VK_EXT_sample_locations DONE (anv, radv, tu/a650)
VK_KHR_shader_clock DONE (anv, radv)

View File

@ -227,7 +227,7 @@ assemble_variant(struct ir3_shader_variant *v)
v->bin = ir3_shader_assemble(v);
bool dbg_enabled = shader_debug_enabled(v->shader->type);
if (dbg_enabled || ir3_shader_override_path) {
if (dbg_enabled || ir3_shader_override_path || v->disasm_info.write_disasm) {
unsigned char sha1[21];
char sha1buf[41];
@ -237,6 +237,24 @@ assemble_variant(struct ir3_shader_variant *v)
bool shader_overridden =
ir3_shader_override_path && try_override_shader_variant(v, sha1buf);
if (v->disasm_info.write_disasm) {
char *stream_data = NULL;
size_t stream_size = 0;
FILE *stream = open_memstream(&stream_data, &stream_size);
fprintf(stream, "Native code%s for unnamed %s shader %s with sha1 %s:\n",
shader_overridden ? " (overridden)" : "",
ir3_shader_stage(v), v->shader->nir->info.name, sha1buf);
ir3_shader_disasm(v, v->bin, stream);
fclose(stream);
v->disasm_info.disasm = ralloc_size(v->shader, stream_size + 1);
memcpy(v->disasm_info.disasm, stream_data, stream_size);
v->disasm_info.disasm[stream_size] = 0;
free(stream_data);
}
if (dbg_enabled || shader_overridden) {
fprintf(stdout, "Native code%s for unnamed %s shader %s with sha1 %s:\n",
shader_overridden ? " (overridden)" : "",
@ -315,17 +333,21 @@ needs_binning_variant(struct ir3_shader_variant *v)
}
static struct ir3_shader_variant *
create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
bool write_disasm)
{
struct ir3_shader_variant *v = alloc_variant(shader, key, NULL);
if (!v)
goto fail;
v->disasm_info.write_disasm = write_disasm;
if (needs_binning_variant(v)) {
v->binning = alloc_variant(shader, key, v);
if (!v->binning)
goto fail;
v->binning->disasm_info.write_disasm = write_disasm;
}
if (ir3_disk_cache_retrieve(shader->compiler, v))
@ -339,6 +361,10 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
nir_print_shader(shader->nir, stdout);
}
if (v->disasm_info.write_disasm) {
v->disasm_info.nir = nir_shader_as_str(shader->nir, shader);
}
shader->nir_finalized = true;
}
@ -371,14 +397,14 @@ shader_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
struct ir3_shader_variant *
ir3_shader_get_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
bool binning_pass, bool *created)
bool binning_pass, bool write_disasm, bool *created)
{
mtx_lock(&shader->variants_lock);
struct ir3_shader_variant *v = shader_variant(shader, key);
if (!v) {
/* compile new variant if it doesn't exist already: */
v = create_variant(shader, key);
v = create_variant(shader, key, write_disasm);
if (v) {
v->next = shader->variants;
shader->variants = v;

View File

@ -442,6 +442,12 @@ struct ir3_ibo_mapping {
uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
};
struct ir3_disasm_info {
bool write_disasm;
char *nir;
char *disasm;
};
/* Represents half register in regid */
#define HALF_REG_ID 0x100
@ -663,6 +669,8 @@ struct ir3_shader_variant {
uint16_t local_size[3];
bool local_size_variable;
struct ir3_disasm_info disasm_info;
};
static inline const char *
@ -762,7 +770,7 @@ ir3_max_const(const struct ir3_shader_variant *v)
void * ir3_shader_assemble(struct ir3_shader_variant *v);
struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader,
const struct ir3_shader_key *key, bool binning_pass, bool *created);
const struct ir3_shader_key *key, bool binning_pass, bool keep_ir, bool *created);
struct ir3_shader * ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
unsigned reserved_user_consts, struct ir3_stream_output_info *stream_output);
uint32_t ir3_trim_constlen(struct ir3_shader_variant **variants,

View File

@ -595,6 +595,13 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
feature->performanceCounterMultipleQueryPools = false;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
features->pipelineExecutableInfo = true;
break;
}
default:
break;
}

View File

@ -109,6 +109,7 @@ EXTENSIONS = [
Extension('VK_KHR_descriptor_update_template', 1, True),
Extension('VK_KHR_storage_buffer_storage_class', 1, True),
Extension('VK_KHR_external_fence_capabilities', 1, True),
Extension('VK_KHR_pipeline_executable_properties', 1, True),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)

View File

@ -2130,6 +2130,25 @@ tu_upload_variant(struct tu_pipeline *pipeline,
return memory.iova;
}
static void
tu_append_executable(struct tu_pipeline *pipeline, struct ir3_shader_variant *variant,
char *nir_from_spirv)
{
ralloc_steal(pipeline->executables_mem_ctx, variant->disasm_info.nir);
ralloc_steal(pipeline->executables_mem_ctx, variant->disasm_info.disasm);
struct tu_pipeline_executable exe = {
.stage = variant->shader->type,
.nir_from_spirv = nir_from_spirv,
.nir_final = variant->disasm_info.nir,
.disasm = variant->disasm_info.disasm,
.stats = variant->info,
.is_binning = variant->binning_pass,
};
util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe);
}
static VkResult
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
struct tu_pipeline *pipeline)
@ -2169,6 +2188,22 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
nir[MESA_SHADER_FRAGMENT] = fs_b.shader;
}
const bool executable_info = builder->create_info->flags &
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
char *nir_initial_disasm[ARRAY_SIZE(builder->shaders)] = { NULL };
if (executable_info) {
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
if (!nir[stage])
continue;
nir_initial_disasm[stage] =
nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx);
}
}
/* TODO do intra-stage linking here */
uint32_t desc_sets = 0;
@ -2222,7 +2257,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
bool created;
builder->variants[stage] =
ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
&key, false, &created);
&key, false, executable_info, &created);
if (!builder->variants[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
@ -2240,7 +2275,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
bool created;
builder->variants[stage] =
ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
&key, false, &created);
&key, false, executable_info, &created);
if (!builder->variants[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
@ -2256,13 +2291,25 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
bool created;
key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
variant = ir3_shader_get_variant(vs->ir3_shader, &key,
true, &created);
true, executable_info, &created);
if (!variant)
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
builder->binning_variant = variant;
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
if (builder->variants[stage]) {
tu_append_executable(pipeline, builder->variants[stage],
nir_initial_disasm[stage]);
}
}
if (builder->binning_variant != builder->variants[MESA_SHADER_VERTEX]) {
tu_append_executable(pipeline, builder->binning_variant, NULL);
}
return VK_SUCCESS;
}
@ -2800,6 +2847,8 @@ tu_pipeline_finish(struct tu_pipeline *pipeline,
const VkAllocationCallbacks *alloc)
{
tu_cs_finish(&pipeline->cs);
ralloc_free(pipeline->executables_mem_ctx);
}
static VkResult
@ -2814,6 +2863,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
return VK_ERROR_OUT_OF_HOST_MEMORY;
(*pipeline)->layout = builder->layout;
(*pipeline)->executables_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
/* compile and upload shaders */
result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
@ -3020,10 +3071,19 @@ tu_compute_pipeline_create(VkDevice device,
pipeline->layout = layout;
pipeline->executables_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx);
struct ir3_shader_key key = {};
nir_shader *nir = tu_spirv_to_nir(dev, stage_info, MESA_SHADER_COMPUTE);
const bool executable_info = pCreateInfo->flags &
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
char *nir_initial_disasm = executable_info ?
nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
struct tu_shader *shader =
tu_shader_create(dev, nir, 0, layout, pAllocator);
if (!shader) {
@ -3035,7 +3095,7 @@ tu_compute_pipeline_create(VkDevice device,
bool created;
struct ir3_shader_variant *v =
ir3_shader_get_variant(shader->ir3_shader, &key, false, &created);
ir3_shader_get_variant(shader->ir3_shader, &key, false, executable_info, &created);
if (!v) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
@ -3063,9 +3123,12 @@ tu_compute_pipeline_create(VkDevice device,
tu6_emit_load_state(pipeline, true);
tu_append_executable(pipeline, v, nir_initial_disasm);
tu_shader_destroy(dev, shader, pAllocator);
*pPipeline = tu_pipeline_to_handle(pipeline);
return VK_SUCCESS;
fail:
@ -3112,3 +3175,234 @@ tu_DestroyPipeline(VkDevice _device,
tu_pipeline_finish(pipeline, dev, pAllocator);
vk_object_free(&dev->vk, pAllocator, pipeline);
}
#define WRITE_STR(field, ...) ({ \
memset(field, 0, sizeof(field)); \
UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
assert(_i > 0 && _i < sizeof(field)); \
})
static const struct tu_pipeline_executable *
tu_pipeline_get_executable(struct tu_pipeline *pipeline, uint32_t index)
{
assert(index < util_dynarray_num_elements(&pipeline->executables,
struct tu_pipeline_executable));
return util_dynarray_element(
&pipeline->executables, struct tu_pipeline_executable, index);
}
VkResult
tu_GetPipelineExecutablePropertiesKHR(
VkDevice _device,
const VkPipelineInfoKHR* pPipelineInfo,
uint32_t* pExecutableCount,
VkPipelineExecutablePropertiesKHR* pProperties)
{
TU_FROM_HANDLE(tu_device, dev, _device);
TU_FROM_HANDLE(tu_pipeline, pipeline, pPipelineInfo->pipeline);
VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
util_dynarray_foreach (&pipeline->executables, struct tu_pipeline_executable, exe) {
vk_outarray_append(&out, props) {
gl_shader_stage stage = exe->stage;
props->stages = mesa_to_vk_shader_stage(stage);
if (!exe->is_binning)
WRITE_STR(props->name, "%s", _mesa_shader_stage_to_abbrev(stage));
else
WRITE_STR(props->name, "Binning VS");
WRITE_STR(props->description, "%s", _mesa_shader_stage_to_string(stage));
props->subgroupSize =
dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1);
}
}
return vk_outarray_status(&out);
}
VkResult
tu_GetPipelineExecutableStatisticsKHR(
VkDevice _device,
const VkPipelineExecutableInfoKHR* pExecutableInfo,
uint32_t* pStatisticCount,
VkPipelineExecutableStatisticKHR* pStatistics)
{
TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
const struct tu_pipeline_executable *exe =
tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Max Waves Per Core");
WRITE_STR(stat->description,
"Maximum number of simultaneous waves per core.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.max_waves;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Instruction Count");
WRITE_STR(stat->description,
"Total number of IR3 instructions in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.instrs_count;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "NOPs Count");
WRITE_STR(stat->description,
"Number of NOP instructions in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.nops_count;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "MOV Count");
WRITE_STR(stat->description,
"Number of MOV instructions in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.mov_count;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "COV Count");
WRITE_STR(stat->description,
"Number of COV instructions in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.cov_count;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Registers used");
WRITE_STR(stat->description,
"Number of registers used in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.max_reg + 1;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Half-registers used");
WRITE_STR(stat->description,
"Number of half-registers used in the final generated "
"shader executable.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.max_half_reg + 1;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Instructions with SS sync bit");
WRITE_STR(stat->description,
"SS bit is set for instructions which depend on a result "
"of \"long\" instructions to prevent RAW hazard.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.ss;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Instructions with SY sync bit");
WRITE_STR(stat->description,
"SY bit is set for instructions which depend on a result "
"of loads from global memory to prevent RAW hazard.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.sy;
}
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Estimated cycles stalled on SS");
WRITE_STR(stat->description,
"A better metric to estimate the impact of SS syncs.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.sstall;
}
for (int i = 0; i < ARRAY_SIZE(exe->stats.instrs_per_cat); i++) {
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "cat%d instructions", i);
WRITE_STR(stat->description,
"Number of cat%d instructions.", i);
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = exe->stats.instrs_per_cat[i];
}
}
return vk_outarray_status(&out);
}
static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
const char *data)
{
ir->isText = VK_TRUE;
size_t data_len = strlen(data) + 1;
if (ir->pData == NULL) {
ir->dataSize = data_len;
return true;
}
strncpy(ir->pData, data, ir->dataSize);
if (ir->dataSize < data_len)
return false;
ir->dataSize = data_len;
return true;
}
VkResult
tu_GetPipelineExecutableInternalRepresentationsKHR(
VkDevice _device,
const VkPipelineExecutableInfoKHR* pExecutableInfo,
uint32_t* pInternalRepresentationCount,
VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
{
TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
VK_OUTARRAY_MAKE(out, pInternalRepresentations, pInternalRepresentationCount);
bool incomplete_text = false;
const struct tu_pipeline_executable *exe =
tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
if (exe->nir_from_spirv) {
vk_outarray_append(&out, ir) {
WRITE_STR(ir->name, "NIR from SPIRV");
WRITE_STR(ir->description,
"Initial NIR before any optimizations");
if (!write_ir_text(ir, exe->nir_from_spirv))
incomplete_text = true;
}
}
if (exe->nir_final) {
vk_outarray_append(&out, ir) {
WRITE_STR(ir->name, "Final NIR");
WRITE_STR(ir->description,
"Final NIR before going into the back-end compiler");
if (!write_ir_text(ir, exe->nir_final))
incomplete_text = true;
}
}
if (exe->disasm) {
vk_outarray_append(&out, ir) {
WRITE_STR(ir->name, "IR3 Assembly");
WRITE_STR(ir->description,
"Final IR3 assembly for the generated shader binary");
if (!write_ir_text(ir, exe->disasm))
incomplete_text = true;
}
}
return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
}

View File

@ -52,6 +52,7 @@
#include "util/log.h"
#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "vk_device.h"
@ -1064,6 +1065,17 @@ struct tu_program_descriptor_linkage
struct tu_push_constant_range push_consts;
};
struct tu_pipeline_executable {
gl_shader_stage stage;
struct ir3_info stats;
bool is_binning;
char *nir_from_spirv;
char *nir_final;
char *disasm;
};
struct tu_pipeline
{
struct vk_object_base base;
@ -1130,6 +1142,10 @@ struct tu_pipeline
} compute;
struct tu_lrz_pipeline lrz;
void *executables_mem_ctx;
/* tu_pipeline_executable */
struct util_dynarray executables;
};
void

View File

@ -140,7 +140,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
*/
ir3_key_clear_unused(&key, shader);
v = ir3_shader_get_variant(shader, &key, binning_pass, &created);
v = ir3_shader_get_variant(shader, &key, binning_pass, false, &created);
if (created) {
if (shader->initial_variants_done) {