diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 84fbb224533..ffa824744e5 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -231,13 +231,23 @@ _tc_sync(struct threaded_context *tc, MAYBE_UNUSED const char *info, MAYBE_UNUSE */ void threaded_context_flush(struct pipe_context *_pipe, - struct tc_unflushed_batch_token *token) + struct tc_unflushed_batch_token *token, + bool prefer_async) { struct threaded_context *tc = threaded_context(_pipe); /* This is called from the state-tracker / application thread. */ - if (token->tc && token->tc == tc) - tc_sync(token->tc); + if (token->tc && token->tc == tc) { + struct tc_batch *last = &tc->batch_slots[tc->last]; + + /* Prefer to do the flush in the driver thread if it is already + * running. That should be better for cache locality. + */ + if (prefer_async || !util_queue_fence_is_signalled(&last->fence)) + tc_batch_flush(tc); + else + tc_sync(token->tc); + } } static void diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 34089561f34..53c5a7e8c4c 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -381,7 +381,8 @@ threaded_context_create(struct pipe_context *pipe, void threaded_context_flush(struct pipe_context *_pipe, - struct tc_unflushed_batch_token *token); + struct tc_unflushed_batch_token *token, + bool prefer_async); static inline struct threaded_context * threaded_context(struct pipe_context *pipe) diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 5163d652c83..9d6bcfe1027 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -203,7 +203,8 @@ static boolean si_fence_finish(struct pipe_screen *screen, * be in flight in the driver thread, so the fence * may not be ready yet when this call returns. */ - threaded_context_flush(ctx, rfence->tc_token); + threaded_context_flush(ctx, rfence->tc_token, + timeout == 0); } if (timeout == PIPE_TIMEOUT_INFINITE) {