gallium/u_threaded: fix staging and non-staging conflicts
In the following sequence: - transfer_map(buffer, DISCARD) // use staging upload - memcpy(...) - transfer_unmap - draw - transfer_map(buffer, UNSYNCHRONIZED) // non-staging upload - memcpy(...) - draw Currently the order of operations is: - map#1 - staging buffer - memcpy to staging buffer - map#2 - memcpy to buffer - staging buffer copy to real buffer - draw#1 - draw#2 When the 2nd map operation doesn't use UNSYNCHRONIZED, the tc_sync_msg() call will make sure that the bo is unused before mapping it. But, if it does use UNSYNCHRONIZED and the mapped intervals overlap this commit clears the UNSYNCHRONIZED to make sure ordering is maintained. This will affect performance, but correct output is better than fast output. See https://gitlab.freedesktop.org/mesa/mesa/-/issues/3611. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7098>
This commit is contained in:
parent
a5e0a2e101
commit
2900f82e19
|
@ -374,6 +374,8 @@ threaded_resource_init(struct pipe_resource *res)
|
|||
tres->base_valid_buffer_range = &tres->valid_buffer_range;
|
||||
tres->is_shared = false;
|
||||
tres->is_user_ptr = false;
|
||||
tres->pending_staging_uploads = 0;
|
||||
util_range_init(&tres->pending_staging_uploads_range);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -384,6 +386,7 @@ threaded_resource_deinit(struct pipe_resource *res)
|
|||
if (tres->latest != &tres->b)
|
||||
pipe_resource_reference(&tres->latest, NULL);
|
||||
util_range_destroy(&tres->valid_buffer_range);
|
||||
util_range_destroy(&tres->pending_staging_uploads_range);
|
||||
}
|
||||
|
||||
struct pipe_context *
|
||||
|
@ -1618,15 +1621,33 @@ tc_transfer_map(struct pipe_context *_pipe,
|
|||
ttrans->b.stride = 0;
|
||||
ttrans->b.layer_stride = 0;
|
||||
*transfer = &ttrans->b;
|
||||
|
||||
p_atomic_inc(&tres->pending_staging_uploads);
|
||||
util_range_add(resource, &tres->pending_staging_uploads_range,
|
||||
box->x, box->x + box->width);
|
||||
|
||||
return map + (box->x % tc->map_buffer_alignment);
|
||||
}
|
||||
|
||||
if (usage & PIPE_MAP_UNSYNCHRONIZED &&
|
||||
p_atomic_read(&tres->pending_staging_uploads) &&
|
||||
util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
|
||||
/* Write conflict detected between a staging transfer and the direct mapping we're
|
||||
* going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
|
||||
* will have to wait for the staging transfer completion.
|
||||
* Note: The conflict detection is only based on the mapped range, not on the actual
|
||||
* written range(s).
|
||||
*/
|
||||
usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unsychronized buffer mappings don't have to synchronize the thread. */
|
||||
if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
|
||||
if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
|
||||
tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
|
||||
usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
|
||||
usage & PIPE_MAP_READ ? " read" : " ??");
|
||||
usage & PIPE_MAP_READ ? " read" : " staging conflict");
|
||||
}
|
||||
|
||||
tc->bytes_mapped_estimate += box->width;
|
||||
|
||||
|
@ -1719,10 +1740,27 @@ tc_transfer_flush_region(struct pipe_context *_pipe,
|
|||
p->box = *rel_box;
|
||||
}
|
||||
|
||||
struct tc_transfer_unmap {
|
||||
union {
|
||||
struct pipe_transfer *transfer;
|
||||
struct pipe_resource *resource;
|
||||
};
|
||||
bool was_staging_transfer;
|
||||
};
|
||||
|
||||
static void
|
||||
tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
|
||||
{
|
||||
pipe->transfer_unmap(pipe, payload->transfer);
|
||||
struct tc_transfer_unmap *p = (struct tc_transfer_unmap *) payload;
|
||||
if (p->was_staging_transfer) {
|
||||
struct threaded_resource *tres = threaded_resource(payload->resource);
|
||||
/* Nothing to do except keeping track of staging uploads */
|
||||
assert(tres->pending_staging_uploads > 0);
|
||||
p_atomic_dec(&tres->pending_staging_uploads);
|
||||
tc_set_resource_reference(&p->resource, NULL);
|
||||
return;
|
||||
}
|
||||
pipe->transfer_unmap(pipe, p->transfer);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1751,21 +1789,30 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
|
|||
return;
|
||||
}
|
||||
|
||||
bool was_staging_transfer = false;
|
||||
|
||||
if (tres->b.target == PIPE_BUFFER) {
|
||||
if (transfer->usage & PIPE_MAP_WRITE &&
|
||||
!(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
|
||||
tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
|
||||
|
||||
/* Staging transfers don't send the call to the driver. */
|
||||
if (ttrans->staging) {
|
||||
was_staging_transfer = true;
|
||||
|
||||
pipe_resource_reference(&ttrans->staging, NULL);
|
||||
pipe_resource_reference(&ttrans->b.resource, NULL);
|
||||
slab_free(&tc->pool_transfers, ttrans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
|
||||
struct tc_transfer_unmap *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_unmap,
|
||||
tc_transfer_unmap);
|
||||
if (was_staging_transfer) {
|
||||
tc_set_resource_reference(&p->resource, &tres->b);
|
||||
p->was_staging_transfer = true;
|
||||
} else {
|
||||
p->transfer = transfer;
|
||||
p->was_staging_transfer = false;
|
||||
}
|
||||
|
||||
/* tc_transfer_map directly maps the buffers, but tc_transfer_unmap
|
||||
* defers the unmap operation to the batch execution.
|
||||
|
|
|
@ -282,6 +282,14 @@ struct threaded_resource {
|
|||
* are too large for the visible VRAM window.
|
||||
*/
|
||||
int max_forced_staging_uploads;
|
||||
|
||||
/* If positive, then a staging transfer is in progress.
|
||||
*/
|
||||
int pending_staging_uploads;
|
||||
/* If staging uploads are pending, this will hold the union of the mapped
|
||||
* ranges.
|
||||
*/
|
||||
struct util_range pending_staging_uploads_range;
|
||||
};
|
||||
|
||||
struct threaded_transfer {
|
||||
|
|
Loading…
Reference in New Issue