radv/winsys: remove the max IBs per submit limit for the fallback path

The chained submission is the fastest path and it should now
be used more often than before. This removes some EOP events.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Samuel Pitoiset 2018-11-15 11:29:53 +01:00
parent 8ca8a6a7b1
commit 4d30f2c6f4
1 changed files with 61 additions and 54 deletions

View File

@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
amdgpu_bo_list_handle bo_list;
struct amdgpu_cs_request request;
bool emit_signal_sem = sem_info->cs_emit_signal;
struct amdgpu_cs_request request = {};
struct amdgpu_cs_ib_info *ibs;
struct radv_amdgpu_cs *cs0;
unsigned number_of_ibs;
assert(cs_count);
cs0 = radv_amdgpu_cs(cs_array[0]);
for (unsigned i = 0; i < cs_count;) {
struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
cs_count - i);
/* Compute the number of IBs for this submit. */
number_of_ibs = cs_count + !!initial_preamble_cs;
memset(&request, 0, sizeof(request));
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
preamble_cs, radv_bo_list, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the fallback submission (%d)\n", r);
return r;
}
request.ip_type = cs0->hw_ip;
request.ring = queue_idx;
request.resources = bo_list;
request.number_of_ibs = cnt + !!preamble_cs;
request.ibs = ibs;
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
if (preamble_cs) {
ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
}
for (unsigned j = 0; j < cnt; ++j) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
ibs[j + !!preamble_cs] = cs->ib;
if (cs->is_chained) {
*cs->ib_size_ptr -= 4;
cs->is_chained = false;
}
}
sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
if (r) {
if (r == -ENOMEM)
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
else
fprintf(stderr, "amdgpu: The CS has been rejected, "
"see dmesg for more information.\n");
}
/* Create a buffer object list. */
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
initial_preamble_cs, radv_bo_list,
&bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the fallback submission (%d)\n", r);
return r;
}
ibs = malloc(number_of_ibs * sizeof(*ibs));
if (!ibs) {
if (bo_list)
amdgpu_bo_list_destroy(bo_list);
if (r)
return r;
i += cnt;
return -ENOMEM;
}
/* Configure the CS request. */
if (initial_preamble_cs)
ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
for (unsigned i = 0; i < cs_count; i++) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
ibs[i + !!initial_preamble_cs] = cs->ib;
if (cs->is_chained) {
*cs->ib_size_ptr -= 4;
cs->is_chained = false;
}
}
request.ip_type = cs0->hw_ip;
request.ring = queue_idx;
request.resources = bo_list;
request.number_of_ibs = number_of_ibs;
request.ibs = ibs;
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
/* Submit the CS. */
r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
if (r) {
if (r == -ENOMEM)
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
else
fprintf(stderr, "amdgpu: The CS has been rejected, "
"see dmesg for more information.\n");
}
if (bo_list)
amdgpu_bo_list_destroy(bo_list);
free(ibs);
if (r)
return r;
if (fence)
radv_amdgpu_request_to_fence(ctx, fence, &request);
@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
if (!cs->ws->use_ib_bos) {
ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
} else if (can_patch && cs->ws->batchchain) {
ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else {