radv: dump VA ranges history when a GPU hang is detected
This is enabled only with RADV_DEBUG=hang. This adds a small Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3904 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7891>
This commit is contained in:
parent
15e7e6443c
commit
6ed4332591
|
@ -734,6 +734,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
|
|||
fclose(f);
|
||||
}
|
||||
|
||||
/* Dump BO log. */
|
||||
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
|
||||
f = fopen(dump_path, "w+");
|
||||
if (f) {
|
||||
device->ws->dump_bo_log(device->ws, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
/* Dump VM fault info. */
|
||||
if (vm_fault_occurred) {
|
||||
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
|
||||
|
|
|
@ -301,6 +301,8 @@ struct radeon_winsys {
|
|||
|
||||
void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
|
||||
|
||||
void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
|
||||
|
||||
int (*surface_init)(struct radeon_winsys *ws,
|
||||
const struct ac_surf_info *surf_info,
|
||||
struct radeon_surf *surf);
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "util/u_atomic.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/os_time.h"
|
||||
|
||||
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
|
||||
|
||||
|
@ -282,6 +283,39 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct radv_amdgpu_winsys_bo_log {
|
||||
struct list_head list;
|
||||
uint64_t va;
|
||||
uint64_t size;
|
||||
uint64_t timestamp; /* CPU timestamp */
|
||||
uint8_t is_virtual : 1;
|
||||
uint8_t destroyed : 1;
|
||||
};
|
||||
|
||||
static void radv_amdgpu_log_bo(struct radv_amdgpu_winsys_bo *bo,
|
||||
bool destroyed)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = bo->ws;
|
||||
struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
|
||||
|
||||
if (!bo->ws->debug_log_bos)
|
||||
return;
|
||||
|
||||
bo_log = malloc(sizeof(*bo_log));
|
||||
if (!bo_log)
|
||||
return;
|
||||
|
||||
bo_log->va = bo->base.va;
|
||||
bo_log->size = bo->size;
|
||||
bo_log->timestamp = os_time_get_nano();
|
||||
bo_log->is_virtual = bo->is_virtual;
|
||||
bo_log->destroyed = destroyed;
|
||||
|
||||
u_rwlock_wrlock(&ws->log_bo_list_lock);
|
||||
list_addtail(&bo_log->list, &ws->log_bo_list);
|
||||
u_rwlock_wrunlock(&ws->log_bo_list_lock);
|
||||
}
|
||||
|
||||
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
|
||||
{
|
||||
struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
|
||||
|
@ -289,6 +323,9 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
|
|||
|
||||
if (p_atomic_dec_return(&bo->ref_count))
|
||||
return;
|
||||
|
||||
radv_amdgpu_log_bo(bo, true);
|
||||
|
||||
if (bo->is_virtual) {
|
||||
for (uint32_t i = 0; i < bo->range_count; ++i) {
|
||||
radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
|
||||
|
@ -391,6 +428,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
|
|||
bo->ranges[0].bo_offset = 0;
|
||||
|
||||
radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
|
||||
radv_amdgpu_log_bo(bo, false);
|
||||
|
||||
return (struct radeon_winsys_bo *)bo;
|
||||
}
|
||||
|
||||
|
@ -485,6 +524,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
|
|||
align64(bo->size, ws->info.gart_page_size));
|
||||
|
||||
radv_amdgpu_add_buffer_to_global_list(bo);
|
||||
radv_amdgpu_log_bo(bo, false);
|
||||
|
||||
return (struct radeon_winsys_bo *)bo;
|
||||
error_va_map:
|
||||
amdgpu_bo_free(buf_handle);
|
||||
|
@ -592,6 +633,8 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
|
|||
align64(bo->size, ws->info.gart_page_size));
|
||||
|
||||
radv_amdgpu_add_buffer_to_global_list(bo);
|
||||
radv_amdgpu_log_bo(bo, false);
|
||||
|
||||
return (struct radeon_winsys_bo *)bo;
|
||||
|
||||
error_va_map:
|
||||
|
@ -672,6 +715,8 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
|
|||
align64(bo->size, ws->info.gart_page_size));
|
||||
|
||||
radv_amdgpu_add_buffer_to_global_list(bo);
|
||||
radv_amdgpu_log_bo(bo, false);
|
||||
|
||||
return (struct radeon_winsys_bo *)bo;
|
||||
error_va_map:
|
||||
amdgpu_va_range_free(va_handle);
|
||||
|
@ -862,6 +907,24 @@ static int radv_amdgpu_bo_va_compare(const void *a, const void *b)
|
|||
return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
|
||||
}
|
||||
|
||||
static void radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
struct radv_amdgpu_winsys_bo_log *bo_log;
|
||||
|
||||
if (!ws->debug_log_bos)
|
||||
return;
|
||||
|
||||
u_rwlock_rdlock(&ws->log_bo_list_lock);
|
||||
LIST_FOR_EACH_ENTRY(bo_log, &ws->log_bo_list, list) {
|
||||
fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
|
||||
(long long)bo_log->timestamp, (long long)bo_log->va,
|
||||
(long long)(bo_log->va + bo_log->size),
|
||||
bo_log->destroyed, bo_log->is_virtual);
|
||||
}
|
||||
u_rwlock_rdunlock(&ws->log_bo_list_lock);
|
||||
}
|
||||
|
||||
static void radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
|
@ -905,4 +968,5 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
|
|||
ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
|
||||
ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
|
||||
ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
|
||||
ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
|
||||
}
|
||||
|
|
|
@ -168,6 +168,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
|||
|
||||
pthread_mutex_destroy(&ws->syncobj_lock);
|
||||
u_rwlock_destroy(&ws->global_bo_list_lock);
|
||||
u_rwlock_destroy(&ws->log_bo_list_lock);
|
||||
ac_addrlib_destroy(ws->addrlib);
|
||||
amdgpu_device_deinitialize(ws->dev);
|
||||
FREE(rws);
|
||||
|
@ -195,6 +196,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
|
|||
goto winsys_fail;
|
||||
|
||||
ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
|
||||
ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
|
||||
if (debug_flags & RADV_DEBUG_NO_IBS)
|
||||
ws->use_ib_bos = false;
|
||||
|
||||
|
@ -203,6 +205,8 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
|
|||
ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
|
||||
list_inithead(&ws->global_bo_list);
|
||||
u_rwlock_init(&ws->global_bo_list_lock);
|
||||
list_inithead(&ws->log_bo_list);
|
||||
u_rwlock_init(&ws->log_bo_list_lock);
|
||||
pthread_mutex_init(&ws->syncobj_lock, NULL);
|
||||
ws->base.query_info = radv_amdgpu_winsys_query_info;
|
||||
ws->base.query_value = radv_amdgpu_winsys_query_value;
|
||||
|
|
|
@ -44,6 +44,7 @@ struct radv_amdgpu_winsys {
|
|||
struct ac_addrlib *addrlib;
|
||||
|
||||
bool debug_all_bos;
|
||||
bool debug_log_bos;
|
||||
bool use_ib_bos;
|
||||
bool zero_all_vram_allocs;
|
||||
bool use_local_bos;
|
||||
|
@ -61,6 +62,10 @@ struct radv_amdgpu_winsys {
|
|||
pthread_mutex_t syncobj_lock;
|
||||
uint32_t *syncobj;
|
||||
uint32_t syncobj_count, syncobj_capacity;
|
||||
|
||||
/* BO log */
|
||||
struct u_rwlock log_bo_list_lock;
|
||||
struct list_head log_bo_list;
|
||||
};
|
||||
|
||||
static inline struct radv_amdgpu_winsys *
|
||||
|
|
Loading…
Reference in New Issue