radv: flush more stages when semaphore are waiting.

This still doesn't give us complete pWaitDstStageMask support,
but it should provide enough to be correct if not as efficent as
possible.

If we have wait semaphores we must flush between submits and
flush the shaders as well.

This fixes the remaining fails in:
dEQP-VK.synchronization.op.single_queue.semaphore.*ssbo*

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie 2017-05-03 07:13:21 +10:00
parent e0e01895b0
commit a524704025
2 changed files with 23 additions and 3 deletions

View File

@ -1046,6 +1046,22 @@ VkResult radv_CreateDevice(
break;
}
device->ws->cs_finalize(device->flush_cs[family]);
device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
switch (family) {
case RADV_QUEUE_GENERAL:
case RADV_QUEUE_COMPUTE:
si_cs_emit_cache_flush(device->flush_shader_cs[family],
device->physical_device->rad_info.chip_class,
family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2);
break;
}
device->ws->cs_finalize(device->flush_shader_cs[family]);
}
if (getenv("RADV_TRACE_FILE")) {
@ -1121,6 +1137,8 @@ void radv_DestroyDevice(
device->ws->cs_destroy(device->empty_cs[i]);
if (device->flush_cs[i])
device->ws->cs_destroy(device->flush_cs[i]);
if (device->flush_shader_cs[i])
device->ws->cs_destroy(device->flush_shader_cs[i]);
}
radv_device_finish_meta(device);
@ -1822,7 +1840,7 @@ VkResult radv_QueueSubmit(
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
bool do_flush = !i;
bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
bool can_patch = !do_flush;
uint32_t advance;
@ -1849,7 +1867,9 @@ VkResult radv_QueueSubmit(
(pSubmits[i].commandBufferCount + do_flush));
if(do_flush)
cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
cs_array[0] = pSubmits[i].waitSemaphoreCount ?
queue->device->flush_shader_cs[queue->queue_family_index] :
queue->device->flush_cs[queue->queue_family_index];
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,

View File

@ -495,7 +495,7 @@ struct radv_device {
int queue_count[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
uint64_t debug_flags;
bool llvm_supports_spill;