ddebug: simplify watchdog loop and fix crash in the no-timeout case
The following race condition could occur in the no-timeout case: API thread Gallium thread Watchdog ---------- -------------- -------- dd_before_draw u_threaded_context draw dd_after_draw add to dctx->records signal watchdog dump & destroy record execute draw dd_after_draw_async use-after-free! Alternatively, the same scenario would assert in a debug build when destroying the record because record->driver_finished has not signaled. Fix this and simplify the logic at the same time by - handing the record pointers off to the watchdog thread *before* each draw call and - waiting on the driver_finished fence in the watchdog thread Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
3627c9efff
commit
539fdc49f1
|
@ -596,7 +596,6 @@ dd_context_destroy(struct pipe_context *_pipe)
|
|||
cnd_destroy(&dctx->cond);
|
||||
|
||||
assert(list_empty(&dctx->records));
|
||||
assert(!dctx->record_pending);
|
||||
|
||||
if (pipe->set_log_context) {
|
||||
pipe->set_log_context(pipe, NULL);
|
||||
|
|
|
@ -988,10 +988,8 @@ dd_report_hang(struct dd_context *dctx)
|
|||
encountered_hang = true;
|
||||
}
|
||||
|
||||
if (num_later || dctx->record_pending) {
|
||||
fprintf(stderr, "... and %u%s additional draws.\n", num_later,
|
||||
dctx->record_pending ? "+1 (pending)" : "");
|
||||
}
|
||||
if (num_later)
|
||||
fprintf(stderr, "... and %u additional draws.\n", num_later);
|
||||
|
||||
fprintf(stderr, "\nDone.\n");
|
||||
dd_kill_process();
|
||||
|
@ -1008,9 +1006,6 @@ dd_thread_main(void *input)
|
|||
|
||||
for (;;) {
|
||||
struct list_head records;
|
||||
struct pipe_fence_handle *fence;
|
||||
struct pipe_fence_handle *fence2 = NULL;
|
||||
|
||||
list_replace(&dctx->records, &records);
|
||||
list_inithead(&dctx->records);
|
||||
dctx->num_records = 0;
|
||||
|
@ -1018,36 +1013,36 @@ dd_thread_main(void *input)
|
|||
if (dctx->api_stalled)
|
||||
cnd_signal(&dctx->cond);
|
||||
|
||||
if (!list_empty(&records)) {
|
||||
/* Wait for the youngest draw. This means hangs can take a bit longer
|
||||
* to detect, but it's more efficient this way. */
|
||||
struct dd_draw_record *youngest =
|
||||
LIST_ENTRY(struct dd_draw_record, records.prev, list);
|
||||
fence = youngest->bottom_of_pipe;
|
||||
} else if (dctx->record_pending) {
|
||||
/* Wait for pending fences, in case the driver ends up hanging internally. */
|
||||
fence = dctx->record_pending->prev_bottom_of_pipe;
|
||||
fence2 = dctx->record_pending->top_of_pipe;
|
||||
} else if (dctx->kill_thread) {
|
||||
break;
|
||||
} else {
|
||||
if (list_empty(&records)) {
|
||||
if (dctx->kill_thread)
|
||||
break;
|
||||
|
||||
cnd_wait(&dctx->cond, &dctx->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
mtx_unlock(&dctx->mutex);
|
||||
|
||||
/* Fences can be NULL legitimately when timeout detection is disabled. */
|
||||
if ((fence &&
|
||||
!screen->fence_finish(screen, NULL, fence,
|
||||
(uint64_t)dscreen->timeout_ms * 1000*1000)) ||
|
||||
(fence2 &&
|
||||
!screen->fence_finish(screen, NULL, fence2,
|
||||
(uint64_t)dscreen->timeout_ms * 1000*1000))) {
|
||||
mtx_lock(&dctx->mutex);
|
||||
list_splice(&records, &dctx->records);
|
||||
dd_report_hang(dctx);
|
||||
/* we won't actually get here */
|
||||
mtx_unlock(&dctx->mutex);
|
||||
/* Wait for the youngest draw. This means hangs can take a bit longer
|
||||
* to detect, but it's more efficient this way. */
|
||||
struct dd_draw_record *youngest =
|
||||
list_last_entry(&records, struct dd_draw_record, list);
|
||||
|
||||
if (dscreen->timeout_ms > 0) {
|
||||
uint64_t abs_timeout = os_time_get_absolute_timeout(
|
||||
(uint64_t)dscreen->timeout_ms * 1000*1000);
|
||||
|
||||
if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) ||
|
||||
!screen->fence_finish(screen, NULL, youngest->bottom_of_pipe,
|
||||
(uint64_t)dscreen->timeout_ms * 1000*1000)) {
|
||||
mtx_lock(&dctx->mutex);
|
||||
list_splice(&records, &dctx->records);
|
||||
dd_report_hang(dctx);
|
||||
/* we won't actually get here */
|
||||
mtx_unlock(&dctx->mutex);
|
||||
}
|
||||
} else {
|
||||
util_queue_fence_wait(&youngest->driver_finished);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {
|
||||
|
@ -1079,6 +1074,7 @@ dd_create_record(struct dd_context *dctx)
|
|||
record->bottom_of_pipe = NULL;
|
||||
record->log_page = NULL;
|
||||
util_queue_fence_init(&record->driver_finished);
|
||||
util_queue_fence_reset(&record->driver_finished);
|
||||
|
||||
dd_init_copy_of_draw_state(&record->draw_state);
|
||||
dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);
|
||||
|
@ -1115,13 +1111,23 @@ dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record)
|
|||
pipe->flush(pipe, &record->top_of_pipe,
|
||||
PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);
|
||||
}
|
||||
|
||||
mtx_lock(&dctx->mutex);
|
||||
dctx->record_pending = record;
|
||||
if (list_empty(&dctx->records))
|
||||
cnd_signal(&dctx->cond);
|
||||
mtx_unlock(&dctx->mutex);
|
||||
}
|
||||
|
||||
mtx_lock(&dctx->mutex);
|
||||
if (unlikely(dctx->num_records > 10000)) {
|
||||
dctx->api_stalled = true;
|
||||
/* Since this is only a heuristic to prevent the API thread from getting
|
||||
* too far ahead, we don't need a loop here. */
|
||||
cnd_wait(&dctx->cond, &dctx->mutex);
|
||||
dctx->api_stalled = false;
|
||||
}
|
||||
|
||||
if (list_empty(&dctx->records))
|
||||
cnd_signal(&dctx->cond);
|
||||
|
||||
list_addtail(&record->list, &dctx->records);
|
||||
dctx->num_records++;
|
||||
mtx_unlock(&dctx->mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1134,8 +1140,7 @@ dd_after_draw_async(void *data)
|
|||
record->log_page = u_log_new_page(&dctx->log);
|
||||
record->time_after = os_time_get_nano();
|
||||
|
||||
if (!util_queue_fence_is_signalled(&record->driver_finished))
|
||||
util_queue_fence_signal(&record->driver_finished);
|
||||
util_queue_fence_signal(&record->driver_finished);
|
||||
|
||||
if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
|
||||
dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {
|
||||
|
@ -1158,34 +1163,14 @@ dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record)
|
|||
else
|
||||
flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;
|
||||
pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);
|
||||
|
||||
assert(record == dctx->record_pending);
|
||||
}
|
||||
|
||||
if (pipe->callback) {
|
||||
util_queue_fence_reset(&record->driver_finished);
|
||||
pipe->callback(pipe, dd_after_draw_async, record, true);
|
||||
} else {
|
||||
dd_after_draw_async(record);
|
||||
}
|
||||
|
||||
mtx_lock(&dctx->mutex);
|
||||
if (unlikely(dctx->num_records > 10000)) {
|
||||
dctx->api_stalled = true;
|
||||
/* Since this is only a heuristic to prevent the API thread from getting
|
||||
* too far ahead, we don't need a loop here. */
|
||||
cnd_wait(&dctx->cond, &dctx->mutex);
|
||||
dctx->api_stalled = false;
|
||||
}
|
||||
|
||||
if (list_empty(&dctx->records))
|
||||
cnd_signal(&dctx->cond);
|
||||
|
||||
list_addtail(&record->list, &dctx->records);
|
||||
dctx->record_pending = NULL;
|
||||
dctx->num_records++;
|
||||
mtx_unlock(&dctx->mutex);
|
||||
|
||||
++dctx->num_draw_calls;
|
||||
if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
|
||||
fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
|
||||
|
|
|
@ -274,6 +274,7 @@ struct dd_draw_record {
|
|||
int64_t time_after;
|
||||
unsigned draw_call;
|
||||
|
||||
/* The fence pointers are guaranteed to be valid once driver_finished is signalled */
|
||||
struct pipe_fence_handle *prev_bottom_of_pipe;
|
||||
struct pipe_fence_handle *top_of_pipe;
|
||||
struct pipe_fence_handle *bottom_of_pipe;
|
||||
|
@ -297,24 +298,18 @@ struct dd_context
|
|||
|
||||
/* Pipelined hang detection.
|
||||
*
|
||||
* This is without unnecessary flushes and waits. There is a memory-based
|
||||
* fence that is incremented by clear_buffer every draw call. Driver fences
|
||||
* are not used.
|
||||
* Before each draw call, a new dd_draw_record is created that contains
|
||||
* a copy of all states. After each draw call, the driver's log is added
|
||||
* to this record. Additionally, deferred fences are associated to each
|
||||
* record both before and after the draw.
|
||||
*
|
||||
* After each draw call, a new dd_draw_record is created that contains
|
||||
* a copy of all states, the output of pipe_context::dump_debug_state,
|
||||
* and it has a fence number assigned. That's done without knowing whether
|
||||
* that draw call is problematic or not. The record is added into the list
|
||||
* of all records.
|
||||
*
|
||||
* An independent, separate thread loops over the list of records and checks
|
||||
* their fences. Records with signalled fences are freed. On fence timeout,
|
||||
* the thread dumps the records of in-flight draws.
|
||||
* The records are handed off to a separate thread which waits on the
|
||||
* records' fences. Records with signalled fences are freed. When a timeout
|
||||
* is detected, the thread dumps the records of in-flight draws.
|
||||
*/
|
||||
thrd_t thread;
|
||||
mtx_t mutex;
|
||||
cnd_t cond;
|
||||
struct dd_draw_record *record_pending; /* currently inside the driver */
|
||||
struct list_head records; /* oldest record first */
|
||||
unsigned num_records;
|
||||
bool kill_thread;
|
||||
|
|
Loading…
Reference in New Issue