gallium/u_threaded: fix staging and non-staging conflicts

In the following sequence:
  - transfer_map(buffer, DISCARD) // use staging upload
  - memcpy(...)
  - transfer_unmap
  - draw
  - transfer_map(buffer, UNSYNCHRONIZED) // non-staging upload
  - memcpy(...)
  - draw

Currently the order of operations is:
  - map#1 - staging buffer
  - memcpy to staging buffer
  - map#2
  - memcpy to buffer
  - staging buffer copy to real buffer
  - draw#1
  - draw#2

When the 2nd map operation doesn't use UNSYNCHRONIZED, the tc_sync_msg() call
will make sure that the bo is unused before mapping it.

But, if it does use UNSYNCHRONIZED and the mapped intervals overlap this commit
clears the UNSYNCHRONIZED to make sure ordering is maintained.

This will affect performance, but correct output is better than fast output.

See https://gitlab.freedesktop.org/mesa/mesa/-/issues/3611.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7098>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2020-10-12 13:28:37 +02:00
parent a5e0a2e101
commit 2900f82e19
2 changed files with 62 additions and 7 deletions

View File

@ -374,6 +374,8 @@ threaded_resource_init(struct pipe_resource *res)
tres->base_valid_buffer_range = &tres->valid_buffer_range;
tres->is_shared = false;
tres->is_user_ptr = false;
tres->pending_staging_uploads = 0;
util_range_init(&tres->pending_staging_uploads_range);
}
void
@ -384,6 +386,7 @@ threaded_resource_deinit(struct pipe_resource *res)
if (tres->latest != &tres->b)
pipe_resource_reference(&tres->latest, NULL);
util_range_destroy(&tres->valid_buffer_range);
util_range_destroy(&tres->pending_staging_uploads_range);
}
struct pipe_context *
@ -1618,15 +1621,33 @@ tc_transfer_map(struct pipe_context *_pipe,
ttrans->b.stride = 0;
ttrans->b.layer_stride = 0;
*transfer = &ttrans->b;
p_atomic_inc(&tres->pending_staging_uploads);
util_range_add(resource, &tres->pending_staging_uploads_range,
box->x, box->x + box->width);
return map + (box->x % tc->map_buffer_alignment);
}
if (usage & PIPE_MAP_UNSYNCHRONIZED &&
p_atomic_read(&tres->pending_staging_uploads) &&
util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
/* Write conflict detected between a staging transfer and the direct mapping we're
* going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
* will have to wait for the staging transfer completion.
* Note: The conflict detection is only based on the mapped range, not on the actual
* written range(s).
*/
usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
}
}
/* Unsychronized buffer mappings don't have to synchronize the thread. */
if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" :
usage & PIPE_MAP_READ ? " read" : " ??");
usage & PIPE_MAP_READ ? " read" : " staging conflict");
}
tc->bytes_mapped_estimate += box->width;
@ -1719,10 +1740,27 @@ tc_transfer_flush_region(struct pipe_context *_pipe,
p->box = *rel_box;
}
struct tc_transfer_unmap {
union {
struct pipe_transfer *transfer;
struct pipe_resource *resource;
};
bool was_staging_transfer;
};
static void
tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
{
pipe->transfer_unmap(pipe, payload->transfer);
struct tc_transfer_unmap *p = (struct tc_transfer_unmap *) payload;
if (p->was_staging_transfer) {
struct threaded_resource *tres = threaded_resource(payload->resource);
/* Nothing to do except keeping track of staging uploads */
assert(tres->pending_staging_uploads > 0);
p_atomic_dec(&tres->pending_staging_uploads);
tc_set_resource_reference(&p->resource, NULL);
return;
}
pipe->transfer_unmap(pipe, p->transfer);
}
static void
@ -1751,21 +1789,30 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
return;
}
bool was_staging_transfer = false;
if (tres->b.target == PIPE_BUFFER) {
if (transfer->usage & PIPE_MAP_WRITE &&
!(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
/* Staging transfers don't send the call to the driver. */
if (ttrans->staging) {
was_staging_transfer = true;
pipe_resource_reference(&ttrans->staging, NULL);
pipe_resource_reference(&ttrans->b.resource, NULL);
slab_free(&tc->pool_transfers, ttrans);
return;
}
}
tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
struct tc_transfer_unmap *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_unmap,
tc_transfer_unmap);
if (was_staging_transfer) {
tc_set_resource_reference(&p->resource, &tres->b);
p->was_staging_transfer = true;
} else {
p->transfer = transfer;
p->was_staging_transfer = false;
}
/* tc_transfer_map directly maps the buffers, but tc_transfer_unmap
* defers the unmap operation to the batch execution.

View File

@ -282,6 +282,14 @@ struct threaded_resource {
* are too large for the visible VRAM window.
*/
int max_forced_staging_uploads;
/* If positive, then a staging transfer is in progress.
*/
int pending_staging_uploads;
/* If staging uploads are pending, this will hold the union of the mapped
* ranges.
*/
struct util_range pending_staging_uploads_range;
};
struct threaded_transfer {