mirror of https://gitlab.freedesktop.org/mesa/mesa
anv: add debug shader printf support
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25814>
This commit is contained in:
parent
9a36278475
commit
64010716c8
|
@ -3821,10 +3821,16 @@ VkResult anv_CreateDevice(
|
|||
device->physical->instance->fp64_workaround_enabled)
|
||||
anv_load_fp64_shader(device);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
|
||||
result = anv_device_print_init(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_internal_cache;
|
||||
}
|
||||
|
||||
result = anv_device_init_rt_shaders(device);
|
||||
if (result != VK_SUCCESS) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_internal_cache;
|
||||
goto fail_print;
|
||||
}
|
||||
|
||||
#if DETECT_OS_ANDROID
|
||||
|
@ -3917,6 +3923,9 @@ VkResult anv_CreateDevice(
|
|||
vk_common_DestroyCommandPool(anv_device_to_handle(device),
|
||||
device->companion_rcs_cmd_pool, NULL);
|
||||
}
|
||||
fail_print:
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
|
||||
anv_device_print_fini(device);
|
||||
fail_internal_cache:
|
||||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||||
fail_default_pipeline_cache:
|
||||
|
@ -4037,6 +4046,9 @@ void anv_DestroyDevice(
|
|||
|
||||
anv_device_finish_internal_kernels(device);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
|
||||
anv_device_print_fini(device);
|
||||
|
||||
vk_pipeline_cache_destroy(device->internal_cache, NULL);
|
||||
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
|
||||
|
||||
|
|
|
@ -200,6 +200,46 @@ anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device,
|
|||
reloc_values, rv_count);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
for (unsigned i = 0; i < prog_data->printf_info_count; i++)
|
||||
count += prog_data->printf_info[i].num_args;
|
||||
return count;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
uint32_t size = 0;
|
||||
for (unsigned i = 0; i < prog_data->printf_info_count; i++)
|
||||
size += prog_data->printf_info[i].string_size;
|
||||
return size;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_uprintf(u_printf_info *out_infos,
|
||||
unsigned *out_arg_sizes,
|
||||
char *out_strings,
|
||||
const struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
for (unsigned i = 0; i < prog_data->printf_info_count; i++) {
|
||||
out_infos[i] = prog_data->printf_info[i];
|
||||
out_infos[i].arg_sizes = out_arg_sizes;
|
||||
memcpy(out_infos[i].arg_sizes,
|
||||
prog_data->printf_info[i].arg_sizes,
|
||||
sizeof(out_infos[i].arg_sizes[0]) * prog_data->printf_info[i].num_args);
|
||||
out_infos[i].strings = out_strings;
|
||||
memcpy(out_infos[i].strings,
|
||||
prog_data->printf_info[i].strings,
|
||||
prog_data->printf_info[i].string_size);
|
||||
|
||||
out_arg_sizes += prog_data->printf_info[i].num_args;
|
||||
out_strings += prog_data->printf_info[i].string_size;
|
||||
}
|
||||
}
|
||||
|
||||
static struct anv_shader_bin *
|
||||
anv_shader_bin_create(struct anv_device *device,
|
||||
gl_shader_stage stage,
|
||||
|
@ -237,6 +277,15 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
bind_map->kernel_arg_count);
|
||||
VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
|
||||
bind_map->embedded_sampler_count);
|
||||
VK_MULTIALLOC_DECL(&ma, u_printf_info, printf_infos,
|
||||
INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
|
||||
prog_data_in->printf_info_count : 0);
|
||||
VK_MULTIALLOC_DECL(&ma, unsigned, arg_sizes,
|
||||
INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
|
||||
brw_stage_prog_data_printf_num_args(prog_data_in) : 0);
|
||||
VK_MULTIALLOC_DECL(&ma, char, strings,
|
||||
INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
|
||||
brw_stage_prog_data_printf_string_size(prog_data_in) : 0);
|
||||
|
||||
if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
|
@ -268,7 +317,7 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
prog_data_in->const_data_offset;
|
||||
|
||||
int rv_count = 0;
|
||||
struct brw_shader_reloc_value reloc_values[7];
|
||||
struct brw_shader_reloc_value reloc_values[9];
|
||||
assert((device->physical->va.descriptor_buffer_pool.addr & 0xffffffff) == 0);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||
|
@ -314,6 +363,39 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
};
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT) && prog_data_in->printf_info_count > 0) {
|
||||
assert(device->printf.bo != NULL);
|
||||
|
||||
copy_uprintf(printf_infos, arg_sizes, strings, prog_data_in);
|
||||
|
||||
simple_mtx_lock(&device->printf.mutex);
|
||||
|
||||
uint32_t base_printf_idx =
|
||||
util_dynarray_num_elements(&device->printf.prints, u_printf_info*);
|
||||
for (uint32_t i = 0; i < prog_data_in->printf_info_count; i++) {
|
||||
util_dynarray_append(&device->printf.prints, u_printf_info *,
|
||||
&printf_infos[i]);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&device->printf.mutex);
|
||||
|
||||
/* u_printf expects the string IDs to start at 1. */
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_PRINTF_BASE_IDENTIFIER,
|
||||
.value = base_printf_idx,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
|
||||
.value = device->printf.bo->offset & 0xffffffff,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
|
||||
.value = device->printf.bo->offset >> 32,
|
||||
};
|
||||
} else if (prog_data_in->printf_info_count > 0) {
|
||||
unreachable("shader with printf intrinsics requires INTEL_DEBUG=shader-print");
|
||||
}
|
||||
|
||||
brw_write_shader_relocs(&device->physical->compiler->isa,
|
||||
shader->kernel.map, prog_data_in,
|
||||
reloc_values, rv_count);
|
||||
|
@ -327,6 +409,7 @@ anv_shader_bin_create(struct anv_device *device,
|
|||
memset(prog_data_param, 0,
|
||||
prog_data->nr_params * sizeof(*prog_data_param));
|
||||
prog_data->param = prog_data_param;
|
||||
prog_data->printf_info = printf_infos;
|
||||
shader->prog_data = prog_data;
|
||||
shader->prog_data_size = prog_data_size;
|
||||
|
||||
|
@ -392,6 +475,8 @@ anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
|
|||
blob_write_bytes(blob, shader->prog_data->relocs,
|
||||
shader->prog_data->num_relocs *
|
||||
sizeof(shader->prog_data->relocs[0]));
|
||||
nir_serialize_printf_info(blob, shader->prog_data->printf_info,
|
||||
shader->prog_data->printf_info_count);
|
||||
|
||||
blob_write_uint32(blob, shader->num_stats);
|
||||
blob_write_bytes(blob, shader->stats,
|
||||
|
@ -469,6 +554,11 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
|
|||
blob_read_bytes(blob, prog_data.base.num_relocs *
|
||||
sizeof(prog_data.base.relocs[0]));
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
prog_data.base.printf_info =
|
||||
nir_deserialize_printf_info(mem_ctx, blob,
|
||||
&prog_data.base.printf_info_count);
|
||||
|
||||
uint32_t num_stats = blob_read_uint32(blob);
|
||||
const struct brw_compile_stats *stats =
|
||||
blob_read_bytes(blob, num_stats * sizeof(stats[0]));
|
||||
|
@ -511,8 +601,10 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
|
|||
sizeof(*bind_map.kernel_args));
|
||||
blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
|
||||
|
||||
if (blob->overrun)
|
||||
if (blob->overrun) {
|
||||
ralloc_free(mem_ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct anv_shader_bin *shader =
|
||||
anv_shader_bin_create(device, stage,
|
||||
|
@ -522,6 +614,9 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
|
|||
stats, num_stats, xfb_info, &bind_map,
|
||||
&push_desc_info,
|
||||
dynamic_push_values);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
if (shader == NULL)
|
||||
return NULL;
|
||||
|
||||
|
|
|
@ -2034,6 +2034,21 @@ struct anv_device {
|
|||
simple_mtx_t mutex;
|
||||
struct hash_table *map;
|
||||
} embedded_samplers;
|
||||
|
||||
struct {
|
||||
/**
|
||||
* Mutex for the printfs array
|
||||
*/
|
||||
simple_mtx_t mutex;
|
||||
/**
|
||||
* Buffer in which the shader printfs are stored
|
||||
*/
|
||||
struct anv_bo *bo;
|
||||
/**
|
||||
* Array of pointers to u_printf_info
|
||||
*/
|
||||
struct util_dynarray prints;
|
||||
} printf;
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
|
@ -2154,6 +2169,10 @@ anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
|
|||
VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
|
||||
int64_t timeout);
|
||||
|
||||
VkResult anv_device_print_init(struct anv_device *device);
|
||||
void anv_device_print_fini(struct anv_device *device);
|
||||
void anv_device_print_shader_prints(struct anv_device *device);
|
||||
|
||||
VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
|
||||
const VkDeviceQueueCreateInfo *pCreateInfo,
|
||||
uint32_t index_in_family);
|
||||
|
@ -2193,6 +2212,9 @@ anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
|
|||
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
|
||||
anv_device_print_shader_prints(queue->device);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -89,7 +89,10 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue,
|
|||
return result;
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SYNC)) {
|
||||
/* Add a debug fence to wait on submissions if we're using the synchronized
|
||||
* submission feature or the shader-print feature.
|
||||
*/
|
||||
if (INTEL_DEBUG(DEBUG_SYNC | DEBUG_SHADER_PRINT)) {
|
||||
result = vk_sync_create(&device->vk,
|
||||
&device->physical->sync_syncobj_type,
|
||||
0, 0, &queue->sync);
|
||||
|
|
|
@ -154,3 +154,53 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
|
|||
default: unreachable("invalid state");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_print_init(struct anv_device *device)
|
||||
{
|
||||
VkResult result =
|
||||
anv_device_alloc_bo(device, "printf",
|
||||
debug_get_num_option("ANV_PRINTF_BUFFER_SIZE", 1024 * 1024),
|
||||
ANV_BO_ALLOC_CAPTURE |
|
||||
ANV_BO_ALLOC_MAPPED |
|
||||
ANV_BO_ALLOC_HOST_COHERENT |
|
||||
ANV_BO_ALLOC_NO_LOCAL_MEM,
|
||||
0 /* explicit_address */,
|
||||
&device->printf.bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
util_dynarray_init(&device->printf.prints, ralloc_context(NULL));
|
||||
simple_mtx_init(&device->printf.mutex, mtx_plain);
|
||||
|
||||
*((uint32_t *)device->printf.bo->map) = 4;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_print_fini(struct anv_device *device)
|
||||
{
|
||||
anv_device_release_bo(device, device->printf.bo);
|
||||
util_dynarray_fini(&device->printf.prints);
|
||||
simple_mtx_destroy(&device->printf.mutex);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_print_shader_prints(struct anv_device *device)
|
||||
{
|
||||
simple_mtx_lock(&device->printf.mutex);
|
||||
|
||||
uint32_t *size = device->printf.bo->map;
|
||||
|
||||
u_printf_ptr(stdout,
|
||||
device->printf.bo->map + sizeof(uint32_t),
|
||||
*size - 4,
|
||||
util_dynarray_begin(&device->printf.prints),
|
||||
util_dynarray_num_elements(&device->printf.prints, u_printf_info*));
|
||||
|
||||
/* Reset */
|
||||
*size = 4;
|
||||
|
||||
simple_mtx_unlock(&device->printf.mutex);
|
||||
}
|
||||
|
|
|
@ -822,6 +822,13 @@ i915_queue_exec_locked(struct anv_queue *queue,
|
|||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
|
||||
if (device->printf.bo) {
|
||||
result =
|
||||
anv_execbuf_add_bo(device, &execbuf, device->printf.bo, NULL, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf,
|
||||
waits[i].sync,
|
||||
|
|
Loading…
Reference in New Issue