zink: add env var to abort on device-lost if no reset callback is set

the alternative here is to just spin aimlessly until the process ooms,
which causes problems when trying to detect failures in cts caselists

a separate env var is used so that it can be exported without affecting
ZINK_DEBUG

Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17525>
This commit is contained in:
Mike Blumenkrantz 2022-07-12 09:17:25 -04:00 committed by Marge Bot
parent fe5c7f1418
commit 2ea0d735d4
4 changed files with 18 additions and 0 deletions

View File

@ -332,6 +332,9 @@ post_submit(void *data, void *gdata, int thread_index)
if (bs->is_device_lost) {
if (bs->ctx->reset.reset)
bs->ctx->reset.reset(bs->ctx->reset.data, PIPE_GUILTY_CONTEXT_RESET);
else if (screen->abort_on_hang && !screen->robust_ctx_count)
/* if nothing can save us, abort */
abort();
screen->device_lost = true;
} else if (bs->ctx->batch_states_count > 5000) {
zink_screen_timeline_wait(screen, bs->fence.batch_id - 2500, PIPE_TIMEOUT_INFINITE);

View File

@ -217,11 +217,20 @@ zink_set_device_reset_callback(struct pipe_context *pctx,
const struct pipe_device_reset_callback *cb)
{
struct zink_context *ctx = zink_context(pctx);
bool had_reset = !!ctx->reset.reset;
if (cb)
ctx->reset = *cb;
else
memset(&ctx->reset, 0, sizeof(ctx->reset));
bool have_reset = !!ctx->reset.reset;
if (had_reset != have_reset) {
if (have_reset)
p_atomic_inc(&zink_screen(pctx->screen)->robust_ctx_count);
else
p_atomic_dec(&zink_screen(pctx->screen)->robust_ctx_count);
}
}
static void

View File

@ -2103,6 +2103,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
return NULL;
screen->threaded = util_get_cpu_caps()->nr_cpus > 1 && debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1);
screen->abort_on_hang = debug_get_bool_option("ZINK_HANG_ABORT", false);
zink_debug = debug_get_option_zink_debug();
zink_descriptor_mode = debug_get_option_zink_descriptor_mode();

View File

@ -101,6 +101,7 @@ struct zink_screen {
bool threaded;
bool is_cpu;
bool abort_on_hang;
uint64_t curr_batch; //the current batch id
uint32_t last_finished;
VkSemaphore sem;
@ -110,6 +111,7 @@ struct zink_screen {
unsigned buffer_rebind_counter;
unsigned image_rebind_counter;
unsigned robust_ctx_count;
struct hash_table dts;
simple_mtx_t dt_lock;
@ -258,6 +260,9 @@ zink_screen_handle_vkresult(struct zink_screen *screen, VkResult ret)
case VK_ERROR_DEVICE_LOST:
screen->device_lost = true;
mesa_loge("zink: DEVICE LOST!\n");
/* if nothing can save us, abort */
if (screen->abort_on_hang && !screen->robust_ctx_count)
abort();
FALLTHROUGH;
default:
success = false;