turnip: Simple breadcrumbs implementation to debug hangs
A simple implementations of breadcrumbs tracking of GPU progress intended to be the last resort when debugging unrecoverable hangs. For best results use Vulkan traces to have a predictable place of hang. Requires compilation with TU_BREADCRUMBS_ENABLED=1. See tu_cs_breadcrumbs.c for details on how to use this feature. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15452>
This commit is contained in:
parent
83d820973c
commit
a9ebf55d02
|
@ -34,6 +34,7 @@ libtu_files = files(
|
|||
'tu_autotune.c',
|
||||
'tu_clear_blit.c',
|
||||
'tu_cmd_buffer.c',
|
||||
'tu_cs_breadcrumbs.c',
|
||||
'tu_cs.c',
|
||||
'tu_cs.h',
|
||||
'tu_device.c',
|
||||
|
|
|
@ -29,6 +29,11 @@
|
|||
|
||||
#include "freedreno_pm4.h"
|
||||
|
||||
/* For breadcrumbs we may open a network socket based on the envvar,
|
||||
* it's not something that should be enabled by default.
|
||||
*/
|
||||
#define TU_BREADCRUMBS_ENABLED 0
|
||||
|
||||
void
|
||||
tu_cs_init(struct tu_cs *cs,
|
||||
struct tu_device *device,
|
||||
|
@ -153,6 +158,9 @@ tu_cs_sanity_check(const struct tu_cs *cs)
|
|||
assert(cs->reserved_end <= cs->end);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt);
|
||||
|
||||
/**
|
||||
* Emit a uint32_t value into a command stream, without boundary checking.
|
||||
*/
|
||||
|
@ -162,6 +170,12 @@ tu_cs_emit(struct tu_cs *cs, uint32_t value)
|
|||
assert(cs->cur < cs->reserved_end);
|
||||
*cs->cur = value;
|
||||
++cs->cur;
|
||||
|
||||
#if TU_BREADCRUMBS_ENABLED
|
||||
cs->breadcrumb_emit_after--;
|
||||
if (cs->breadcrumb_emit_after == 0)
|
||||
tu_cs_emit_sync_breadcrumb(cs, -1, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -220,6 +234,10 @@ tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
|
|||
static inline void
|
||||
tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
|
||||
{
|
||||
#if TU_BREADCRUMBS_ENABLED
|
||||
tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1);
|
||||
#endif
|
||||
|
||||
tu_cs_reserve(cs, cnt + 1);
|
||||
tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
/*
|
||||
* Copyright © 2022 Igalia S.L.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "tu_cs.h"
|
||||
|
||||
/* A simple implementations of breadcrumbs tracking of GPU progress
|
||||
* intended to be a last resort when debugging unrecoverable hangs.
|
||||
* For best results use Vulkan traces to have a predictable place of hang.
|
||||
*
|
||||
* For ordinary hangs as a more user-friendly solution use GFR
|
||||
* "Graphics Flight Recorder".
|
||||
*
|
||||
* This implementation aims to handle cases where we cannot do anything
|
||||
* after the hang, which is achieved by:
|
||||
* - On GPU after each breadcrumb we wait until CPU acks it and sends udp
|
||||
* packet to the remote host;
|
||||
* - At specified breadcrumb require explicit user input to continue
|
||||
* execution up to the next breadcrumb.
|
||||
*
|
||||
* In-driver breadcrumbs also allow more precise tracking since we could
|
||||
* target a single GPU packet.
|
||||
*
|
||||
*
|
||||
* Breadcrumbs settings:
|
||||
*
|
||||
* TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS
|
||||
* Where:
|
||||
* $BREAKPOINT - the breadcrumb from which we require explicit ack
|
||||
* $BREAKPOINT_HITS - how many times breakpoint should be reached for
|
||||
* break to occur. Necessary for a gmem mode and re-usable cmdbuffers
|
||||
* in both of which the same cmdstream could be executed several times.
|
||||
*
|
||||
*
|
||||
* A typical work flow would be:
|
||||
* - Start listening for breadcrumbs on remote host:
|
||||
* nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}'
|
||||
*
|
||||
* - Start capturing command stream:
|
||||
* sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd
|
||||
*
|
||||
* - On device replay the hanging trace with:
|
||||
* TU_BREADCRUMBS=$IP:$PORT,break=-1:0
|
||||
* ! Try to reproduce the hang in a sysmem mode because it would
|
||||
* require much less breadcrumb writes and syncs.
|
||||
*
|
||||
* - Increase hangcheck period:
|
||||
* echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms
|
||||
*
|
||||
* - After GPU hang note the last breadcrumb and relaunch trace with:
|
||||
* TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS
|
||||
*
|
||||
* - After the breakpoint is reached each breadcrumb would require
|
||||
* explicit ack from the user. This way it's possible to find
|
||||
* the last packet which did't hang.
|
||||
*
|
||||
* - Find the packet in the decoded cmdstream.
|
||||
*/
|
||||
|
||||
struct breadcrumbs_context
|
||||
{
|
||||
char remote_host[64];
|
||||
int remote_port;
|
||||
uint32_t breadcrumb_breakpoint;
|
||||
uint32_t breadcrumb_breakpoint_hits;
|
||||
|
||||
bool thread_stop;
|
||||
pthread_t breadcrumbs_thread;
|
||||
|
||||
struct tu_device *device;
|
||||
|
||||
uint32_t breadcrumb_idx;
|
||||
};
|
||||
|
||||
static void *
|
||||
sync_gpu_with_cpu(void *_job)
|
||||
{
|
||||
struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job;
|
||||
struct tu6_global *global =
|
||||
(struct tu6_global *) ctx->device->global_bo->map;
|
||||
uint32_t last_breadcrumb = 0;
|
||||
uint32_t breakpoint_hits = 0;
|
||||
|
||||
int s = socket(AF_INET, SOCK_DGRAM, 0);
|
||||
|
||||
if (s < 0) {
|
||||
mesa_loge("TU_BREADCRUMBS: Error while creating socket");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sockaddr_in to_addr;
|
||||
to_addr.sin_family = AF_INET;
|
||||
to_addr.sin_port = htons(ctx->remote_port);
|
||||
to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host);
|
||||
|
||||
/* Run until we know that no more work would be submitted,
|
||||
* because each breadcrumb requires an ack from cpu side and without
|
||||
* the ack GPU would timeout.
|
||||
*/
|
||||
while (!ctx->thread_stop) {
|
||||
uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno;
|
||||
|
||||
if (current_breadcrumb != last_breadcrumb) {
|
||||
last_breadcrumb = current_breadcrumb;
|
||||
|
||||
uint32_t data = htonl(last_breadcrumb);
|
||||
if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr,
|
||||
sizeof(to_addr)) < 0) {
|
||||
mesa_loge("TU_BREADCRUMBS: sendto failed");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (last_breadcrumb >= ctx->breadcrumb_breakpoint &&
|
||||
breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) {
|
||||
printf("GPU is on breadcrumb %d, continue?", last_breadcrumb);
|
||||
while (getchar() != 'y')
|
||||
;
|
||||
}
|
||||
|
||||
if (ctx->breadcrumb_breakpoint == last_breadcrumb)
|
||||
breakpoint_hits++;
|
||||
|
||||
/* ack that we received the value */
|
||||
global->breadcrumb_cpu_sync_seqno = last_breadcrumb;
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
close(s);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Same as tu_cs_emit_pkt7 but without instrumentation */
|
||||
static inline void
|
||||
emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
|
||||
{
|
||||
tu_cs_reserve(cs, cnt + 1);
|
||||
tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
|
||||
}
|
||||
|
||||
void
|
||||
tu_breadcrumbs_init(struct tu_device *device)
|
||||
{
|
||||
const char *breadcrumbs_opt = NULL;
|
||||
#ifdef TU_BREADCRUMBS_ENABLED
|
||||
breadcrumbs_opt = os_get_option("TU_BREADCRUMBS");
|
||||
#endif
|
||||
|
||||
device->breadcrumbs_ctx = NULL;
|
||||
if (!breadcrumbs_opt) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct breadcrumbs_context *ctx =
|
||||
malloc(sizeof(struct breadcrumbs_context));
|
||||
ctx->device = device;
|
||||
ctx->breadcrumb_idx = 0;
|
||||
ctx->thread_stop = false;
|
||||
|
||||
if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host,
|
||||
&ctx->remote_port, &ctx->breadcrumb_breakpoint,
|
||||
&ctx->breadcrumb_breakpoint_hits) != 4) {
|
||||
free(ctx);
|
||||
mesa_loge("Wrong TU_BREADCRUMBS value");
|
||||
return;
|
||||
}
|
||||
|
||||
device->breadcrumbs_ctx = ctx;
|
||||
|
||||
struct tu6_global *global = device->global_bo->map;
|
||||
global->breadcrumb_cpu_sync_seqno = 0;
|
||||
global->breadcrumb_gpu_sync_seqno = 0;
|
||||
|
||||
pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx);
|
||||
}
|
||||
|
||||
void
|
||||
tu_breadcrumbs_finish(struct tu_device *device)
|
||||
{
|
||||
struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
|
||||
if (!ctx || ctx->thread_stop)
|
||||
return;
|
||||
|
||||
ctx->thread_stop = true;
|
||||
pthread_join(ctx->breadcrumbs_thread, NULL);
|
||||
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
|
||||
{
|
||||
/* TODO: we may run out of space if we add breadcrumbs
|
||||
* to non-growable CS.
|
||||
*/
|
||||
if (cs->mode != TU_CS_MODE_GROW)
|
||||
return;
|
||||
|
||||
struct tu_device *device = cs->device;
|
||||
struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
|
||||
if (!ctx || ctx->thread_stop)
|
||||
return;
|
||||
|
||||
bool before_packet = (cnt != 0);
|
||||
|
||||
if (before_packet) {
|
||||
switch (opcode) {
|
||||
case CP_EXEC_CS_INDIRECT:
|
||||
case CP_EXEC_CS:
|
||||
case CP_DRAW_INDX:
|
||||
case CP_DRAW_INDX_OFFSET:
|
||||
case CP_DRAW_INDIRECT:
|
||||
case CP_DRAW_INDX_INDIRECT:
|
||||
case CP_DRAW_INDIRECT_MULTI:
|
||||
case CP_DRAW_AUTO:
|
||||
case CP_BLIT:
|
||||
// case CP_SET_DRAW_STATE:
|
||||
// case CP_LOAD_STATE6_FRAG:
|
||||
// case CP_LOAD_STATE6_GEOM:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
};
|
||||
} else {
|
||||
assert(cs->breadcrumb_emit_after == 0);
|
||||
}
|
||||
|
||||
uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx);
|
||||
|
||||
if (ctx->breadcrumb_breakpoint != -1 &&
|
||||
current_breadcrumb < ctx->breadcrumb_breakpoint)
|
||||
return;
|
||||
|
||||
emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
|
||||
emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
emit_pkt7(cs, CP_MEM_WRITE, 3);
|
||||
tu_cs_emit_qw(
|
||||
cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno));
|
||||
tu_cs_emit(cs, current_breadcrumb);
|
||||
|
||||
/* Wait until CPU acknowledges the value written by GPU */
|
||||
emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
|
||||
CP_WAIT_REG_MEM_0_POLL_MEMORY);
|
||||
tu_cs_emit_qw(
|
||||
cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
|
||||
|
||||
if (before_packet)
|
||||
cs->breadcrumb_emit_after = cnt;
|
||||
}
|
|
@ -2037,6 +2037,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
tu_trace_read_ts,
|
||||
tu_trace_delete_flush_data);
|
||||
|
||||
tu_breadcrumbs_init(device);
|
||||
|
||||
*pDevice = tu_device_to_handle(device);
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
@ -2081,6 +2083,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
if (!device)
|
||||
return;
|
||||
|
||||
tu_breadcrumbs_finish(device);
|
||||
|
||||
u_trace_context_fini(&device->trace_context);
|
||||
|
||||
for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
|
||||
|
@ -2414,7 +2418,7 @@ tu_InvalidateMappedMemoryRanges(VkDevice _device,
|
|||
}
|
||||
|
||||
static void
|
||||
tu_get_buffer_memory_requirements(uint64_t size,
|
||||
tu_get_buffer_memory_requirements(uint64_t size,
|
||||
VkMemoryRequirements2 *pMemoryRequirements)
|
||||
{
|
||||
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
|
||||
|
|
|
@ -147,6 +147,7 @@ typedef uint32_t xcb_window_t;
|
|||
*/
|
||||
|
||||
struct tu_instance;
|
||||
struct breadcrumbs_context;
|
||||
|
||||
VkResult
|
||||
__vk_startup_errorf(struct tu_instance *instance,
|
||||
|
@ -500,6 +501,13 @@ struct tu6_global
|
|||
volatile uint32_t dbg_gmem_total_stores;
|
||||
volatile uint32_t dbg_gmem_taken_stores;
|
||||
|
||||
/* Written from GPU */
|
||||
volatile uint32_t breadcrumb_gpu_sync_seqno;
|
||||
uint32_t _pad3;
|
||||
/* Written from CPU, acknowledges value written from GPU */
|
||||
volatile uint32_t breadcrumb_cpu_sync_seqno;
|
||||
uint32_t _pad4;
|
||||
|
||||
/* note: larger global bo will be used for customBorderColors */
|
||||
struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];
|
||||
};
|
||||
|
@ -609,6 +617,8 @@ struct tu_device
|
|||
|
||||
struct tu_autotune autotune;
|
||||
|
||||
struct breadcrumbs_context *breadcrumbs_ctx;
|
||||
|
||||
#ifdef ANDROID
|
||||
const void *gralloc;
|
||||
enum {
|
||||
|
@ -813,6 +823,8 @@ struct tu_cs
|
|||
uint32_t cond_stack_depth;
|
||||
uint32_t cond_flags[TU_COND_EXEC_STACK_SIZE];
|
||||
uint32_t *cond_dwords[TU_COND_EXEC_STACK_SIZE];
|
||||
|
||||
uint32_t breadcrumb_emit_after;
|
||||
};
|
||||
|
||||
struct tu_device_memory
|
||||
|
@ -2311,6 +2323,12 @@ tu_u_trace_submission_data_finish(
|
|||
struct tu_device *device,
|
||||
struct tu_u_trace_submission_data *submission_data);
|
||||
|
||||
void
|
||||
tu_breadcrumbs_init(struct tu_device *device);
|
||||
|
||||
void
|
||||
tu_breadcrumbs_finish(struct tu_device *device);
|
||||
|
||||
#define TU_FROM_HANDLE(__tu_type, __name, __handle) \
|
||||
VK_FROM_HANDLE(__tu_type, __name, __handle)
|
||||
|
||||
|
|
Loading…
Reference in New Issue