winsys/amdgpu: remove amdgpu_winsys_bo::num_cs_references to remove atomics

This decreases the CPU time percentage of amdgpu_cs_add_buffer by 50%
on Ryzen 3900X.

We don't need to call amdgpu_bo_is_referenced_by_any_cs
in amdgpu_bo_can_reclaim. The reclaim function is only called for buffers
that have 0 references.

The only downside is that amdgpu_bo_is_referenced_by_cs might be slower
in some very rare cases. Overall the driver overhead is better.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8849>
This commit is contained in:
Marek Olšák 2021-02-02 22:37:28 -05:00 committed by Marge Bot
parent 06b9dedfd9
commit ff311df6b5
4 changed files with 1 additions and 27 deletions

View File

@ -614,12 +614,6 @@ error_bo_alloc:
bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
return false;
}
return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
}

View File

@ -93,9 +93,6 @@ struct amdgpu_winsys_bo {
uint64_t va;
simple_mtx_t lock;
/* how many command streams is this bo referenced in? */
int num_cs_references;
/* how many command streams, which are being emitted in a separate
* thread, is this bo referenced in? */
volatile int num_active_ioctls;

View File

@ -489,7 +489,6 @@ amdgpu_do_add_real_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo
memset(buffer, 0, sizeof(*buffer));
amdgpu_winsys_bo_reference(&buffer->bo, bo);
p_atomic_inc(&bo->num_cs_references);
cs->num_real_buffers++;
return idx;
@ -560,7 +559,6 @@ static int amdgpu_lookup_or_add_slab_buffer(struct radeon_cmdbuf *rcs,
memset(buffer, 0, sizeof(*buffer));
amdgpu_winsys_bo_reference(&buffer->bo, bo);
buffer->u.slab.real_idx = real_idx;
p_atomic_inc(&bo->num_cs_references);
cs->num_slab_buffers++;
hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
@ -604,7 +602,6 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs,
memset(buffer, 0, sizeof(*buffer));
amdgpu_winsys_bo_reference(&buffer->bo, bo);
p_atomic_inc(&bo->num_cs_references);
cs->num_sparse_buffers++;
hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
@ -914,15 +911,12 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
unsigned i;
for (i = 0; i < cs->num_real_buffers; i++) {
p_atomic_dec(&cs->real_buffers[i].bo->num_cs_references);
amdgpu_winsys_bo_reference(&cs->real_buffers[i].bo, NULL);
}
for (i = 0; i < cs->num_slab_buffers; i++) {
p_atomic_dec(&cs->slab_buffers[i].bo->num_cs_references);
amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL);
}
for (i = 0; i < cs->num_sparse_buffers; i++) {
p_atomic_dec(&cs->sparse_buffers[i].bo->num_cs_references);
amdgpu_winsys_bo_reference(&cs->sparse_buffers[i].bo, NULL);
}
cleanup_fence_list(&cs->fence_dependencies);

View File

@ -225,9 +225,7 @@ static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo)
{
int num_refs = bo->num_cs_references;
return num_refs == bo->ws->num_cs ||
(num_refs && amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1);
return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1;
}
static inline bool
@ -238,9 +236,6 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
int index;
struct amdgpu_cs_buffer *buffer;
if (!bo->num_cs_references)
return false;
index = amdgpu_lookup_buffer_any_type(cs->csc, bo);
if (index == -1)
return false;
@ -252,12 +247,6 @@ amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
return (buffer->usage & usage) != 0;
}
static inline bool
amdgpu_bo_is_referenced_by_any_cs(struct amdgpu_winsys_bo *bo)
{
return bo->num_cs_references != 0;
}
bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
bool absolute);
void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,