From 2900f82e1919dcb70d29f34e5ed10a09f7356b3e Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Mon, 12 Oct 2020 13:28:37 +0200 Subject: [PATCH] gallium/u_threaded: fix staging and non-staging conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the following sequence: - transfer_map(buffer, DISCARD) // use staging upload - memcpy(...) - transfer_unmap - draw - transfer_map(buffer, UNSYNCHRONIZED) // non-staging upload - memcpy(...) - draw Currently the order of operations is: - map#1 - staging buffer - memcpy to staging buffer - map#2 - memcpy to buffer - staging buffer copy to real buffer - draw#1 - draw#2 When the 2nd map operation doesn't use UNSYNCHRONIZED, the tc_sync_msg() call will make sure that the bo is unused before mapping it. But, if it does use UNSYNCHRONIZED and the mapped intervals overlap this commit clears the UNSYNCHRONIZED to make sure ordering is maintained. This will affect performance, but correct output is better than fast output. See https://gitlab.freedesktop.org/mesa/mesa/-/issues/3611. Reviewed-by: Marek Olšák Part-of: --- .../auxiliary/util/u_threaded_context.c | 61 ++++++++++++++++--- .../auxiliary/util/u_threaded_context.h | 8 +++ 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index c6e462bcb00..0227be371ba 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -374,6 +374,8 @@ threaded_resource_init(struct pipe_resource *res) tres->base_valid_buffer_range = &tres->valid_buffer_range; tres->is_shared = false; tres->is_user_ptr = false; + tres->pending_staging_uploads = 0; + util_range_init(&tres->pending_staging_uploads_range); } void @@ -384,6 +386,7 @@ threaded_resource_deinit(struct pipe_resource *res) if (tres->latest != &tres->b) pipe_resource_reference(&tres->latest, NULL); util_range_destroy(&tres->valid_buffer_range); + util_range_destroy(&tres->pending_staging_uploads_range); } struct pipe_context * @@ -1618,15 +1621,33 @@ tc_transfer_map(struct pipe_context *_pipe, ttrans->b.stride = 0; ttrans->b.layer_stride = 0; *transfer = &ttrans->b; + + p_atomic_inc(&tres->pending_staging_uploads); + util_range_add(resource, &tres->pending_staging_uploads_range, + box->x, box->x + box->width); + return map + (box->x % tc->map_buffer_alignment); } + + if (usage & PIPE_MAP_UNSYNCHRONIZED && + p_atomic_read(&tres->pending_staging_uploads) && + util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) { + /* Write conflict detected between a staging transfer and the direct mapping we're + * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping + * will have to wait for the staging transfer completion. + * Note: The conflict detection is only based on the mapped range, not on the actual + * written range(s). + */ + usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC; + } } /* Unsychronized buffer mappings don't have to synchronize the thread. */ - if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) + if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) { tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" : usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" : - usage & PIPE_MAP_READ ? " read" : " ??"); + usage & PIPE_MAP_READ ? " read" : " staging conflict"); + } tc->bytes_mapped_estimate += box->width; @@ -1719,10 +1740,27 @@ tc_transfer_flush_region(struct pipe_context *_pipe, p->box = *rel_box; } +struct tc_transfer_unmap { + union { + struct pipe_transfer *transfer; + struct pipe_resource *resource; + }; + bool was_staging_transfer; +}; + static void tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload) { - pipe->transfer_unmap(pipe, payload->transfer); + struct tc_transfer_unmap *p = (struct tc_transfer_unmap *) payload; + if (p->was_staging_transfer) { + struct threaded_resource *tres = threaded_resource(payload->resource); + /* Nothing to do except keeping track of staging uploads */ + assert(tres->pending_staging_uploads > 0); + p_atomic_dec(&tres->pending_staging_uploads); + tc_set_resource_reference(&p->resource, NULL); + return; + } + pipe->transfer_unmap(pipe, p->transfer); } static void @@ -1751,21 +1789,30 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) return; } + bool was_staging_transfer = false; + if (tres->b.target == PIPE_BUFFER) { if (transfer->usage & PIPE_MAP_WRITE && !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) tc_buffer_do_flush_region(tc, ttrans, &transfer->box); - /* Staging transfers don't send the call to the driver. */ if (ttrans->staging) { + was_staging_transfer = true; + pipe_resource_reference(&ttrans->staging, NULL); pipe_resource_reference(&ttrans->b.resource, NULL); slab_free(&tc->pool_transfers, ttrans); - return; } } - - tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer; + struct tc_transfer_unmap *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_unmap, + tc_transfer_unmap); + if (was_staging_transfer) { + tc_set_resource_reference(&p->resource, &tres->b); + p->was_staging_transfer = true; + } else { + p->transfer = transfer; + p->was_staging_transfer = false; + } /* tc_transfer_map directly maps the buffers, but tc_transfer_unmap * defers the unmap operation to the batch execution. diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 561e896e684..3adec8e61f7 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -282,6 +282,14 @@ struct threaded_resource { * are too large for the visible VRAM window. */ int max_forced_staging_uploads; + + /* If positive, then a staging transfer is in progress. + */ + int pending_staging_uploads; + /* If staging uploads are pending, this will hold the union of the mapped + * ranges. + */ + struct util_range pending_staging_uploads_range; }; struct threaded_transfer {