From 86eb52adad583182b5878b933d12db87535b84ed Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 27 Jan 2017 14:35:23 +0100 Subject: [PATCH] winsys/amdgpu: do not iterate twice when adding fence dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perf difference is very small, 3.25->2.84% in amdgpu_cs_flush() in the DXMD benchmark. Signed-off-by: Samuel Pitoiset Reviewed-by: Marek Olšák --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 63 ++++++++++++----------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 789a5dbafa3..2ce8a6e71cf 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -941,22 +941,6 @@ static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs, bo->num_fences = new_num_fences; } -/* Since the kernel driver doesn't synchronize execution between different - * rings automatically, we have to add fence dependencies manually. - */ -static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs) -{ - struct amdgpu_cs_context *cs = acs->csc; - int i; - - cs->request.number_of_dependencies = 0; - - for (i = 0; i < cs->num_real_buffers; i++) - amdgpu_add_fence_dependency(acs, &cs->real_buffers[i]); - for (i = 0; i < cs->num_slab_buffers; i++) - amdgpu_add_fence_dependency(acs, &cs->slab_buffers[i]); -} - static void amdgpu_add_fence(struct amdgpu_winsys_bo *bo, struct pipe_fence_handle *fence) { @@ -984,6 +968,38 @@ static void amdgpu_add_fence(struct amdgpu_winsys_bo *bo, bo->num_fences++; } +/* Since the kernel driver doesn't synchronize execution between different + * rings automatically, we have to add fence dependencies manually. + */ +static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs) +{ + struct amdgpu_cs_context *cs = acs->csc; + unsigned num_buffers; + int i; + + cs->request.number_of_dependencies = 0; + + num_buffers = cs->num_real_buffers; + for (i = 0; i < num_buffers; i++) { + struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; + struct amdgpu_winsys_bo *bo = buffer->bo; + + amdgpu_add_fence_dependency(acs, buffer); + p_atomic_inc(&bo->num_active_ioctls); + amdgpu_add_fence(bo, cs->fence); + } + + num_buffers = cs->num_slab_buffers; + for (i = 0; i < num_buffers; i++) { + struct amdgpu_cs_buffer *buffer = &cs->slab_buffers[i]; + struct amdgpu_winsys_bo *bo = buffer->bo; + + amdgpu_add_fence_dependency(acs, buffer); + p_atomic_inc(&bo->num_active_ioctls); + amdgpu_add_fence(bo, cs->fence); + } +} + void amdgpu_cs_submit_ib(void *job, int thread_index) { struct amdgpu_cs *acs = (struct amdgpu_cs*)job; @@ -1146,7 +1162,6 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, cs->main.base.current.cdw <= cs->main.base.current.max_dw && !debug_get_option_noop())) { struct amdgpu_cs_context *cur = cs->csc; - unsigned i, num_buffers; /* Set IB sizes. */ amdgpu_ib_finalize(&cs->main); @@ -1183,20 +1198,6 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, pipe_mutex_lock(ws->bo_fence_lock); amdgpu_add_fence_dependencies(cs); - num_buffers = cur->num_real_buffers; - for (i = 0; i < num_buffers; i++) { - struct amdgpu_winsys_bo *bo = cur->real_buffers[i].bo; - p_atomic_inc(&bo->num_active_ioctls); - amdgpu_add_fence(bo, cur->fence); - } - - num_buffers = cur->num_slab_buffers; - for (i = 0; i < num_buffers; i++) { - struct amdgpu_winsys_bo *bo = cur->slab_buffers[i].bo; - p_atomic_inc(&bo->num_active_ioctls); - amdgpu_add_fence(bo, cur->fence); - } - /* Swap command streams. "cst" is going to be submitted. */ cs->csc = cs->cst; cs->cst = cur;