winsys/amdgpu: use only one fence per BO

The fence that is added to the BO during flush is guaranteed to be
signaled after all the fences that were in the fences array of the BO
before the flush, because those fences are added as dependencies for the
submission (and all this happens atomically under the bo_fence_lock).

Therefore, keeping only the last fence around is sufficient.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle 2016-09-07 10:50:14 +02:00
parent 480ac143df
commit 11cbf4d7ae
3 changed files with 50 additions and 62 deletions

View File

@ -44,7 +44,6 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_winsys *ws = bo->ws;
int64_t abs_timeout;
int i;
if (timeout == 0) {
if (p_atomic_read(&bo->num_active_ioctls))
@ -75,49 +74,42 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
if (timeout == 0) {
pipe_mutex_lock(ws->bo_fence_lock);
for (i = 0; i < RING_LAST; i++)
if (bo->fence[i]) {
if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
/* Release the idle fence to avoid checking it again later. */
amdgpu_fence_reference(&bo->fence[i], NULL);
} else {
pipe_mutex_unlock(ws->bo_fence_lock);
return false;
}
if (bo->fence) {
if (amdgpu_fence_wait(bo->fence, 0, false)) {
/* Release the idle fence to avoid checking it again later. */
amdgpu_fence_reference(&bo->fence, NULL);
} else {
pipe_mutex_unlock(ws->bo_fence_lock);
return false;
}
}
pipe_mutex_unlock(ws->bo_fence_lock);
return true;
} else {
struct pipe_fence_handle *fence[RING_LAST] = {};
bool fence_idle[RING_LAST] = {};
struct pipe_fence_handle *fence = NULL;
bool fence_idle = false;
bool buffer_idle = true;
/* Take references to all fences, so that we can wait for them
/* Take a reference to the fences, so that we can wait for it
* without the lock. */
pipe_mutex_lock(ws->bo_fence_lock);
for (i = 0; i < RING_LAST; i++)
amdgpu_fence_reference(&fence[i], bo->fence[i]);
amdgpu_fence_reference(&fence, bo->fence);
pipe_mutex_unlock(ws->bo_fence_lock);
/* Now wait for the fences. */
for (i = 0; i < RING_LAST; i++) {
if (fence[i]) {
if (amdgpu_fence_wait(fence[i], abs_timeout, true))
fence_idle[i] = true;
else
buffer_idle = false;
}
/* Now wait for the fence. */
if (fence) {
if (amdgpu_fence_wait(fence, abs_timeout, true))
fence_idle = true;
else
buffer_idle = false;
}
/* Release idle fences to avoid checking them again later. */
pipe_mutex_lock(ws->bo_fence_lock);
for (i = 0; i < RING_LAST; i++) {
if (fence[i] == bo->fence[i] && fence_idle[i])
amdgpu_fence_reference(&bo->fence[i], NULL);
amdgpu_fence_reference(&fence[i], NULL);
}
if (fence == bo->fence && fence_idle)
amdgpu_fence_reference(&bo->fence, NULL);
amdgpu_fence_reference(&fence, NULL);
pipe_mutex_unlock(ws->bo_fence_lock);
return buffer_idle;
@ -133,7 +125,6 @@ static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
void amdgpu_bo_destroy(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
int i;
pipe_mutex_lock(bo->ws->global_bo_list_lock);
LIST_DEL(&bo->global_list_item);
@ -144,8 +135,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
for (i = 0; i < RING_LAST; i++)
amdgpu_fence_reference(&bo->fence[i], NULL);
amdgpu_fence_reference(&bo->fence, NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);

View File

@ -62,8 +62,8 @@ struct amdgpu_winsys_bo {
*/
volatile int is_shared; /* bool (int for atomicity) */
/* Fences for buffer synchronization. */
struct pipe_fence_handle *fence[RING_LAST];
/* Fence for buffer synchronization. */
struct pipe_fence_handle *fence;
struct list_head global_list_item;
};

View File

@ -827,44 +827,42 @@ DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
{
struct amdgpu_cs_context *cs = acs->csc;
int i, j;
int i;
cs->request.number_of_dependencies = 0;
for (i = 0; i < cs->num_buffers; i++) {
for (j = 0; j < RING_LAST; j++) {
struct amdgpu_cs_fence *dep;
unsigned idx;
struct amdgpu_cs_fence *dep;
unsigned idx;
struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j];
if (!bo_fence)
continue;
struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence;
if (!bo_fence)
continue;
if (bo_fence->ctx == acs->ctx &&
bo_fence->fence.ip_type == cs->request.ip_type &&
bo_fence->fence.ip_instance == cs->request.ip_instance &&
bo_fence->fence.ring == cs->request.ring)
continue;
if (bo_fence->ctx == acs->ctx &&
bo_fence->fence.ip_type == cs->request.ip_type &&
bo_fence->fence.ip_instance == cs->request.ip_instance &&
bo_fence->fence.ring == cs->request.ring)
continue;
if (amdgpu_fence_wait((void *)bo_fence, 0, false))
continue;
if (amdgpu_fence_wait((void *)bo_fence, 0, false))
continue;
if (bo_fence->submission_in_progress)
os_wait_until_zero(&bo_fence->submission_in_progress,
PIPE_TIMEOUT_INFINITE);
if (bo_fence->submission_in_progress)
os_wait_until_zero(&bo_fence->submission_in_progress,
PIPE_TIMEOUT_INFINITE);
idx = cs->request.number_of_dependencies++;
if (idx >= cs->max_dependencies) {
unsigned size;
idx = cs->request.number_of_dependencies++;
if (idx >= cs->max_dependencies) {
unsigned size;
cs->max_dependencies = idx + 8;
size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
cs->request.dependencies = realloc(cs->request.dependencies, size);
}
dep = &cs->request.dependencies[idx];
memcpy(dep, &bo_fence->fence, sizeof(*dep));
cs->max_dependencies = idx + 8;
size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
cs->request.dependencies = realloc(cs->request.dependencies, size);
}
dep = &cs->request.dependencies[idx];
memcpy(dep, &bo_fence->fence, sizeof(*dep));
}
}
@ -1054,7 +1052,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
amdgpu_add_fence_dependencies(cs);
for (i = 0; i < num_buffers; i++) {
p_atomic_inc(&cur->buffers[i].bo->num_active_ioctls);
amdgpu_fence_reference(&cur->buffers[i].bo->fence[cs->ring_type],
amdgpu_fence_reference(&cur->buffers[i].bo->fence,
cur->fence);
}
pipe_mutex_unlock(ws->bo_fence_lock);