radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang

The trace file will be dumped as part of the hang report into
$HOME/radv_dumps_<pid>/trace.log if a GPU hang is detected.

The old and famous RADV_TRACE_FILE envvar is now deprecated.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7233>
This commit is contained in:
Samuel Pitoiset 2020-10-19 18:37:26 +02:00 committed by Marge Bot
parent 9516f9369e
commit 33c9d4bf31
4 changed files with 24 additions and 18 deletions

View File

@ -554,6 +554,9 @@ RADV driver environment variables
``forcecompress``
Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it
but normally does not deem it beneficial.
``hang``
enable GPU hangs detection and dump a report to $HOME/radv_dumps_<pid>
if a GPU hang is detected
``info``
show GPU-related information
``metashaders``
@ -624,8 +627,6 @@ RADV driver environment variables
``RADV_TEX_ANISO``
force anisotropy filter (up to 16)
``RADV_TRACE_FILE``
generate cmdbuffer tracefiles when a GPU hang is detected
``ACO_DEBUG``
a comma-separated list of named flags, which do various things:

View File

@ -83,19 +83,10 @@ radv_init_trace(struct radv_device *device)
}
static void
radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs)
radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
{
const char *filename = getenv("RADV_TRACE_FILE");
FILE *f = fopen(filename, "w");
if (!f) {
fprintf(stderr, "Failed to write trace dump to %s\n", filename);
return;
}
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
fclose(f);
}
static void
@ -625,8 +616,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
fprintf(stderr, "radv: GPU hang detected...\n");
radv_dump_trace(queue->device, cs);
/* Create a directory into $HOME/radv_dumps_<pid> to save various
* debugging info about that GPU hang.
*/
@ -638,6 +627,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
abort();
}
/* Dump trace file. */
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
f = fopen(dump_path, "w+");
if (f) {
radv_dump_trace(queue->device, cs, f);
fclose(f);
}
/* Dump pipeline state. */
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
f = fopen(dump_path, "w+");

View File

@ -57,6 +57,7 @@ enum {
RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26,
RADV_DEBUG_LLVM = 1 << 27,
RADV_DEBUG_FORCE_COMPRESS = 1 << 28,
RADV_DEBUG_HANG = 1 << 29,
};
enum {

View File

@ -530,6 +530,7 @@ static const struct debug_control radv_debug_options[] = {
{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
{"llvm", RADV_DEBUG_LLVM},
{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
{"hang", RADV_DEBUG_HANG},
{NULL, 0}
};
@ -2794,19 +2795,25 @@ VkResult radv_CreateDevice(
device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
if (getenv("RADV_TRACE_FILE")) {
const char *filename = getenv("RADV_TRACE_FILE");
fprintf(stderr, "***********************************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
fprintf(stderr, "***********************************************************************************\n");
abort();
}
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
/* Enable GPU hangs detection and dump logs if a GPU hang is
* detected.
*/
keep_shader_info = true;
if (!radv_init_trace(device))
goto fail;
fprintf(stderr, "*****************************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
fprintf(stderr, "*****************************************************************************\n");
fprintf(stderr, "Trace file will be dumped to %s\n", filename);
/* Wait for idle after every draw/dispatch to identify the
* first bad call.
*/