From a9ebf55d02519091a76449c3b38680ce1d73908d Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 18 Mar 2022 13:47:24 +0200 Subject: [PATCH] turnip: Simple breadcrumbs implementation to debug hangs A simple implementations of breadcrumbs tracking of GPU progress intended to be the last resort when debugging unrecoverable hangs. For best results use Vulkan traces to have a predictable place of hang. Requires compilation with TU_BREADCRUMBS_ENABLED=1. See tu_cs_breadcrumbs.c for details on how to use this feature. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/meson.build | 1 + src/freedreno/vulkan/tu_cs.h | 18 ++ src/freedreno/vulkan/tu_cs_breadcrumbs.c | 279 +++++++++++++++++++++++ src/freedreno/vulkan/tu_device.c | 6 +- src/freedreno/vulkan/tu_private.h | 18 ++ 5 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 src/freedreno/vulkan/tu_cs_breadcrumbs.c diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index a3c1ab53ce0..b799d232ce5 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -34,6 +34,7 @@ libtu_files = files( 'tu_autotune.c', 'tu_clear_blit.c', 'tu_cmd_buffer.c', + 'tu_cs_breadcrumbs.c', 'tu_cs.c', 'tu_cs.h', 'tu_device.c', diff --git a/src/freedreno/vulkan/tu_cs.h b/src/freedreno/vulkan/tu_cs.h index 892c37a3294..60ea5938776 100644 --- a/src/freedreno/vulkan/tu_cs.h +++ b/src/freedreno/vulkan/tu_cs.h @@ -29,6 +29,11 @@ #include "freedreno_pm4.h" +/* For breadcrumbs we may open a network socket based on the envvar, + * it's not something that should be enabled by default. + */ +#define TU_BREADCRUMBS_ENABLED 0 + void tu_cs_init(struct tu_cs *cs, struct tu_device *device, @@ -153,6 +158,9 @@ tu_cs_sanity_check(const struct tu_cs *cs) assert(cs->reserved_end <= cs->end); } +void +tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt); + /** * Emit a uint32_t value into a command stream, without boundary checking. */ @@ -162,6 +170,12 @@ tu_cs_emit(struct tu_cs *cs, uint32_t value) assert(cs->cur < cs->reserved_end); *cs->cur = value; ++cs->cur; + +#if TU_BREADCRUMBS_ENABLED + cs->breadcrumb_emit_after--; + if (cs->breadcrumb_emit_after == 0) + tu_cs_emit_sync_breadcrumb(cs, -1, 0); +#endif } /** @@ -220,6 +234,10 @@ tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) static inline void tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) { +#if TU_BREADCRUMBS_ENABLED + tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1); +#endif + tu_cs_reserve(cs, cnt + 1); tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); } diff --git a/src/freedreno/vulkan/tu_cs_breadcrumbs.c b/src/freedreno/vulkan/tu_cs_breadcrumbs.c new file mode 100644 index 00000000000..d5b8b4074a6 --- /dev/null +++ b/src/freedreno/vulkan/tu_cs_breadcrumbs.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2022 Igalia S.L. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "tu_cs.h" + +/* A simple implementations of breadcrumbs tracking of GPU progress + * intended to be a last resort when debugging unrecoverable hangs. + * For best results use Vulkan traces to have a predictable place of hang. + * + * For ordinary hangs as a more user-friendly solution use GFR + * "Graphics Flight Recorder". + * + * This implementation aims to handle cases where we cannot do anything + * after the hang, which is achieved by: + * - On GPU after each breadcrumb we wait until CPU acks it and sends udp + * packet to the remote host; + * - At specified breadcrumb require explicit user input to continue + * execution up to the next breadcrumb. + * + * In-driver breadcrumbs also allow more precise tracking since we could + * target a single GPU packet. + * + * + * Breadcrumbs settings: + * + * TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS + * Where: + * $BREAKPOINT - the breadcrumb from which we require explicit ack + * $BREAKPOINT_HITS - how many times breakpoint should be reached for + * break to occur. Necessary for a gmem mode and re-usable cmdbuffers + * in both of which the same cmdstream could be executed several times. + * + * + * A typical work flow would be: + * - Start listening for breadcrumbs on remote host: + * nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}' + * + * - Start capturing command stream: + * sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd + * + * - On device replay the hanging trace with: + * TU_BREADCRUMBS=$IP:$PORT,break=-1:0 + * ! Try to reproduce the hang in a sysmem mode because it would + * require much less breadcrumb writes and syncs. + * + * - Increase hangcheck period: + * echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms + * + * - After GPU hang note the last breadcrumb and relaunch trace with: + * TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS + * + * - After the breakpoint is reached each breadcrumb would require + * explicit ack from the user. This way it's possible to find + * the last packet which did't hang. + * + * - Find the packet in the decoded cmdstream. + */ + +struct breadcrumbs_context +{ + char remote_host[64]; + int remote_port; + uint32_t breadcrumb_breakpoint; + uint32_t breadcrumb_breakpoint_hits; + + bool thread_stop; + pthread_t breadcrumbs_thread; + + struct tu_device *device; + + uint32_t breadcrumb_idx; +}; + +static void * +sync_gpu_with_cpu(void *_job) +{ + struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job; + struct tu6_global *global = + (struct tu6_global *) ctx->device->global_bo->map; + uint32_t last_breadcrumb = 0; + uint32_t breakpoint_hits = 0; + + int s = socket(AF_INET, SOCK_DGRAM, 0); + + if (s < 0) { + mesa_loge("TU_BREADCRUMBS: Error while creating socket"); + return NULL; + } + + struct sockaddr_in to_addr; + to_addr.sin_family = AF_INET; + to_addr.sin_port = htons(ctx->remote_port); + to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host); + + /* Run until we know that no more work would be submitted, + * because each breadcrumb requires an ack from cpu side and without + * the ack GPU would timeout. + */ + while (!ctx->thread_stop) { + uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno; + + if (current_breadcrumb != last_breadcrumb) { + last_breadcrumb = current_breadcrumb; + + uint32_t data = htonl(last_breadcrumb); + if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr, + sizeof(to_addr)) < 0) { + mesa_loge("TU_BREADCRUMBS: sendto failed"); + goto fail; + } + + if (last_breadcrumb >= ctx->breadcrumb_breakpoint && + breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) { + printf("GPU is on breadcrumb %d, continue?", last_breadcrumb); + while (getchar() != 'y') + ; + } + + if (ctx->breadcrumb_breakpoint == last_breadcrumb) + breakpoint_hits++; + + /* ack that we received the value */ + global->breadcrumb_cpu_sync_seqno = last_breadcrumb; + } + } + +fail: + close(s); + + return NULL; +} + +/* Same as tu_cs_emit_pkt7 but without instrumentation */ +static inline void +emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) +{ + tu_cs_reserve(cs, cnt + 1); + tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); +} + +void +tu_breadcrumbs_init(struct tu_device *device) +{ + const char *breadcrumbs_opt = NULL; +#ifdef TU_BREADCRUMBS_ENABLED + breadcrumbs_opt = os_get_option("TU_BREADCRUMBS"); +#endif + + device->breadcrumbs_ctx = NULL; + if (!breadcrumbs_opt) { + return; + } + + struct breadcrumbs_context *ctx = + malloc(sizeof(struct breadcrumbs_context)); + ctx->device = device; + ctx->breadcrumb_idx = 0; + ctx->thread_stop = false; + + if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host, + &ctx->remote_port, &ctx->breadcrumb_breakpoint, + &ctx->breadcrumb_breakpoint_hits) != 4) { + free(ctx); + mesa_loge("Wrong TU_BREADCRUMBS value"); + return; + } + + device->breadcrumbs_ctx = ctx; + + struct tu6_global *global = device->global_bo->map; + global->breadcrumb_cpu_sync_seqno = 0; + global->breadcrumb_gpu_sync_seqno = 0; + + pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx); +} + +void +tu_breadcrumbs_finish(struct tu_device *device) +{ + struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; + if (!ctx || ctx->thread_stop) + return; + + ctx->thread_stop = true; + pthread_join(ctx->breadcrumbs_thread, NULL); + + free(ctx); +} + +void +tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) +{ + /* TODO: we may run out of space if we add breadcrumbs + * to non-growable CS. + */ + if (cs->mode != TU_CS_MODE_GROW) + return; + + struct tu_device *device = cs->device; + struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; + if (!ctx || ctx->thread_stop) + return; + + bool before_packet = (cnt != 0); + + if (before_packet) { + switch (opcode) { + case CP_EXEC_CS_INDIRECT: + case CP_EXEC_CS: + case CP_DRAW_INDX: + case CP_DRAW_INDX_OFFSET: + case CP_DRAW_INDIRECT: + case CP_DRAW_INDX_INDIRECT: + case CP_DRAW_INDIRECT_MULTI: + case CP_DRAW_AUTO: + case CP_BLIT: + // case CP_SET_DRAW_STATE: + // case CP_LOAD_STATE6_FRAG: + // case CP_LOAD_STATE6_GEOM: + break; + default: + return; + }; + } else { + assert(cs->breadcrumb_emit_after == 0); + } + + uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx); + + if (ctx->breadcrumb_breakpoint != -1 && + current_breadcrumb < ctx->breadcrumb_breakpoint) + return; + + emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); + emit_pkt7(cs, CP_WAIT_FOR_ME, 0); + + emit_pkt7(cs, CP_MEM_WRITE, 3); + tu_cs_emit_qw( + cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno)); + tu_cs_emit(cs, current_breadcrumb); + + /* Wait until CPU acknowledges the value written by GPU */ + emit_pkt7(cs, CP_WAIT_REG_MEM, 6); + tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | + CP_WAIT_REG_MEM_0_POLL_MEMORY); + tu_cs_emit_qw( + cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno)); + tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb)); + tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0)); + tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); + + if (before_packet) + cs->breadcrumb_emit_after = cnt; +} \ No newline at end of file diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index b484a482813..718bb176cec 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -2037,6 +2037,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, tu_trace_read_ts, tu_trace_delete_flush_data); + tu_breadcrumbs_init(device); + *pDevice = tu_device_to_handle(device); return VK_SUCCESS; @@ -2081,6 +2083,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (!device) return; + tu_breadcrumbs_finish(device); + u_trace_context_fini(&device->trace_context); for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { @@ -2414,7 +2418,7 @@ tu_InvalidateMappedMemoryRanges(VkDevice _device, } static void -tu_get_buffer_memory_requirements(uint64_t size, +tu_get_buffer_memory_requirements(uint64_t size, VkMemoryRequirements2 *pMemoryRequirements) { pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 3ae319b6ee1..05ba4b248c4 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -147,6 +147,7 @@ typedef uint32_t xcb_window_t; */ struct tu_instance; +struct breadcrumbs_context; VkResult __vk_startup_errorf(struct tu_instance *instance, @@ -500,6 +501,13 @@ struct tu6_global volatile uint32_t dbg_gmem_total_stores; volatile uint32_t dbg_gmem_taken_stores; + /* Written from GPU */ + volatile uint32_t breadcrumb_gpu_sync_seqno; + uint32_t _pad3; + /* Written from CPU, acknowledges value written from GPU */ + volatile uint32_t breadcrumb_cpu_sync_seqno; + uint32_t _pad4; + /* note: larger global bo will be used for customBorderColors */ struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[]; }; @@ -609,6 +617,8 @@ struct tu_device struct tu_autotune autotune; + struct breadcrumbs_context *breadcrumbs_ctx; + #ifdef ANDROID const void *gralloc; enum { @@ -813,6 +823,8 @@ struct tu_cs uint32_t cond_stack_depth; uint32_t cond_flags[TU_COND_EXEC_STACK_SIZE]; uint32_t *cond_dwords[TU_COND_EXEC_STACK_SIZE]; + + uint32_t breadcrumb_emit_after; }; struct tu_device_memory @@ -2311,6 +2323,12 @@ tu_u_trace_submission_data_finish( struct tu_device *device, struct tu_u_trace_submission_data *submission_data); +void +tu_breadcrumbs_init(struct tu_device *device); + +void +tu_breadcrumbs_finish(struct tu_device *device); + #define TU_FROM_HANDLE(__tu_type, __name, __handle) \ VK_FROM_HANDLE(__tu_type, __name, __handle)