anv: implement VK_KHR_synchronization2

v2: Use u_foreach_bit64() (Samuel)

v3: Add missing handling of VkMemoryBarrier2KHR in pNext of
    VkSubpassDependency2KHR (Samuel)

v4: Remove unused ANV_PIPELINE_STAGE_PIPELINED_BITS (Ivan)

v5: fix missing anv_measure_submit() (Jason)
    constify anv_pipeline_stage_pipelined_bits (Jason)

v6: Split flushes & invalidation emissions on
    vkCmdSetEvent2KHR()/vkCmdWaitEvents2KHR() (Jason)

v7: Only apply flushes once on events (Jason)

v8: Drop split flushes for this patch

v9: Add comment about ignore some fields of VkMemoryBarrier2 in
    VkSubpassDependency2KHR (Jason)
    Drop spurious PIPE_CONTROL change s/,/;/ (Jason)

v10: Fix build issue on Android (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9045>
This commit is contained in:
Lionel Landwerlin 2020-11-11 21:38:25 +02:00 committed by Marge Bot
parent dff9098059
commit b996fa8efa
8 changed files with 139 additions and 124 deletions

View File

@ -15,3 +15,4 @@ VK_KHR_shader_float16_int8 on lavapipe
VK_KHR_shader_subgroup_extended_types on lavapipe
VK_KHR_spirv_1_4 on lavapipe
Experimental raytracing support on RADV
VK_KHR_synchronization2 on Intel

View File

@ -34,6 +34,7 @@
#include <sync/sync.h>
#include "anv_private.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h"
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
@ -875,7 +876,7 @@ anv_QueueSignalReleaseImageANDROID(
if (waitSemaphoreCount == 0)
goto done;
result = anv_QueueSubmit(queue, 1,
result = vk_common_QueueSubmit(queue, 1,
&(VkSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = 1,

View File

@ -230,6 +230,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_swapchain = true,
.KHR_swapchain_mutable_format = true,
#endif
.KHR_synchronization2 = true,
.KHR_timeline_semaphore = true,
.KHR_uniform_buffer_standard_layout = true,
.KHR_variable_pointers = true,
@ -1689,6 +1690,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
VkPhysicalDeviceSynchronization2FeaturesKHR *features =
(VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
features->synchronization2 = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;

View File

@ -31,22 +31,36 @@ anv_render_pass_add_subpass_dep(struct anv_device *device,
struct anv_render_pass *pass,
const VkSubpassDependency2KHR *dep)
{
/* From the Vulkan 1.2.195 spec:
*
* "If an instance of VkMemoryBarrier2 is included in the pNext chain,
* srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask
* parameters are ignored. The synchronization and access scopes instead
* are defined by the parameters of VkMemoryBarrier2."
*/
const VkMemoryBarrier2KHR *barrier =
vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2_KHR);
VkAccessFlags2KHR src_access_mask =
barrier ? barrier->srcAccessMask : dep->srcAccessMask;
VkAccessFlags2KHR dst_access_mask =
barrier ? barrier->dstAccessMask : dep->dstAccessMask;
if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[pass->subpass_count] |=
anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
} else {
assert(dep->dstSubpass < pass->subpass_count);
pass->subpass_flushes[dep->dstSubpass] |=
anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask);
anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
}
if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[0] |=
anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
} else {
assert(dep->srcSubpass < pass->subpass_count);
pass->subpass_flushes[dep->srcSubpass + 1] |=
anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask);
anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
}
}

View File

@ -66,6 +66,7 @@
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "vk_device.h"
#include "vk_enum_defines.h"
#include "vk_image.h"
#include "vk_instance.h"
#include "vk_physical_device.h"
@ -2538,34 +2539,35 @@ enum anv_pipe_bits {
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags)
VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT:
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available
* to future operations, flush the hdc pipeline.
*/
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush
* the render target cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush
* the depth cache.
*/
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like :
@ -2582,13 +2584,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_HOST_WRITE_BIT:
case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
/* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well.
@ -2596,8 +2598,8 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS.
@ -2614,13 +2616,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags)
VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
/* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
@ -2642,15 +2644,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case VK_ACCESS_2_INDEX_READ_BIT_KHR:
case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
/* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_UNIFORM_READ_BIT:
case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data
@ -2662,28 +2664,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
else
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_2_SHADER_READ_BIT_KHR:
case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_MEMORY_READ_BIT:
case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
*/
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command
@ -2694,7 +2696,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_HOST_READ_BIT:
case VK_ACCESS_2_HOST_READ_BIT_KHR:
/* We're transitioning a buffer that was written by CPU. Flush
* all the caches.
*/

View File

@ -1206,10 +1206,10 @@ anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
return VK_SUCCESS;
}
VkResult anv_QueueSubmit(
VkResult anv_QueueSubmit2KHR(
VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
const VkSubmitInfo2KHR* pSubmits,
VkFence _fence)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
@ -1242,23 +1242,14 @@ VkResult anv_QueueSubmit(
mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
vk_find_struct_const(pSubmits[i].pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
const VkPerformanceQuerySubmitInfoKHR *perf_info =
vk_find_struct_const(pSubmits[i].pNext,
PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
const uint64_t *wait_values =
timeline_info && timeline_info->waitSemaphoreValueCount ?
timeline_info->pWaitSemaphoreValues : NULL;
const uint64_t *signal_values =
timeline_info && timeline_info->signalSemaphoreValueCount ?
timeline_info->pSignalSemaphoreValues : NULL;
if (!anv_queue_submit_can_add_submit(submit,
pSubmits[i].waitSemaphoreCount,
pSubmits[i].signalSemaphoreCount,
pSubmits[i].waitSemaphoreInfoCount,
pSubmits[i].signalSemaphoreInfoCount,
perf_pass)) {
result = anv_queue_submit_post_and_alloc_new(queue, &submit);
if (result != VK_SUCCESS)
@ -1266,19 +1257,19 @@ VkResult anv_QueueSubmit(
}
/* Wait semaphores */
for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreCount; j++) {
for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_in_semaphore(submit,
device,
pSubmits[i].pWaitSemaphores[j],
wait_values ? wait_values[j] : 0);
pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
pSubmits[i].pWaitSemaphoreInfos[j].value);
if (result != VK_SUCCESS)
goto out;
}
/* Command buffers */
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]);
pSubmits[i].pCommandBufferInfos[j].commandBuffer);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
assert(!anv_batch_has_error(&cmd_buffer->batch));
anv_measure_submit(cmd_buffer);
@ -1298,11 +1289,11 @@ VkResult anv_QueueSubmit(
}
/* Signal semaphores */
for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreCount; j++) {
for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_out_semaphore(submit,
device,
pSubmits[i].pSignalSemaphores[j],
signal_values ? signal_values[j] : 0);
pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
pSubmits[i].pSignalSemaphoreInfos[j].value);
if (result != VK_SUCCESS)
goto out;
}
@ -1350,7 +1341,7 @@ out:
* anv_device_set_lost() would have been called already by a callee of
* anv_queue_submit().
*/
result = anv_device_set_lost(device, "vkQueueSubmit() failed");
result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
}
return result;

View File

@ -2403,43 +2403,37 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.pending_pipe_bits = bits;
}
void genX(CmdPipelineBarrier)(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
VkBool32 byRegion,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
static void
cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfoKHR *dep_info,
const char *reason)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* XXX: Right now, we're really dumb and just flush whatever categories
* the app asks for. One of these days we may make this a bit better
* but right now that's all the hardware allows for in most areas.
*/
VkAccessFlags src_flags = 0;
VkAccessFlags dst_flags = 0;
VkAccessFlags2KHR src_flags = 0;
VkAccessFlags2KHR dst_flags = 0;
for (uint32_t i = 0; i < memoryBarrierCount; i++) {
src_flags |= pMemoryBarriers[i].srcAccessMask;
dst_flags |= pMemoryBarriers[i].dstAccessMask;
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
src_flags |= pImageMemoryBarriers[i].srcAccessMask;
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
const VkImageSubresourceRange *range =
&pImageMemoryBarriers[i].subresourceRange;
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2KHR *img_barrier =
&dep_info->pImageMemoryBarriers[i];
src_flags |= img_barrier->srcAccessMask;
dst_flags |= img_barrier->dstAccessMask;
ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
uint32_t base_layer, layer_count;
if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
@ -2455,8 +2449,8 @@ void genX(CmdPipelineBarrier)(
if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
transition_depth_buffer(cmd_buffer, image,
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
img_barrier->oldLayout,
img_barrier->newLayout,
false /* will_full_fast_clear */);
}
@ -2464,8 +2458,8 @@ void genX(CmdPipelineBarrier)(
transition_stencil_buffer(cmd_buffer, image,
range->baseMipLevel, level_count,
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
img_barrier->oldLayout,
img_barrier->newLayout,
false /* will_full_fast_clear */);
}
@ -2476,19 +2470,29 @@ void genX(CmdPipelineBarrier)(
transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit,
range->baseMipLevel, level_count,
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
pImageMemoryBarriers[i].srcQueueFamilyIndex,
pImageMemoryBarriers[i].dstQueueFamilyIndex,
img_barrier->oldLayout,
img_barrier->newLayout,
img_barrier->srcQueueFamilyIndex,
img_barrier->dstQueueFamilyIndex,
false /* will_full_fast_clear */);
}
}
}
anv_add_pending_pipe_bits(cmd_buffer,
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags),
"pipe barrier");
enum anv_pipe_bits bits =
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
}
void genX(CmdPipelineBarrier2KHR)(
VkCommandBuffer commandBuffer,
const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier");
}
static void
@ -6866,24 +6870,33 @@ void genX(CmdEndConditionalRenderingEXT)(
* by the command streamer for later execution.
*/
#define ANV_PIPELINE_STAGE_PIPELINED_BITS \
~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | \
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | \
VK_PIPELINE_STAGE_HOST_BIT | \
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT)
~(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | \
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | \
VK_PIPELINE_STAGE_2_HOST_BIT_KHR | \
VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
void genX(CmdSetEvent)(
void genX(CmdSetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
VkPipelineStageFlags2KHR src_stages = 0;
for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
src_stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++)
src_stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask;
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++)
src_stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask;
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
pc.StallAtPixelScoreboard = true;
pc.CommandStreamerStallEnable = true;
}
@ -6899,10 +6912,10 @@ void genX(CmdSetEvent)(
}
}
void genX(CmdResetEvent)(
void genX(CmdResetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
VkPipelineStageFlags2KHR stageMask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
@ -6927,22 +6940,15 @@ void genX(CmdResetEvent)(
}
}
void genX(CmdWaitEvents)(
void genX(CmdWaitEvents2KHR)(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
const VkDependencyInfoKHR* pDependencyInfos)
{
#if GFX_VER >= 8
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
#if GFX_VER >= 8
for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
@ -6960,11 +6966,7 @@ void genX(CmdWaitEvents)(
anv_finishme("Implement events on gfx7");
#endif
genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
false, /* byRegion */
memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);
cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
}
VkResult genX(CmdSetPerformanceOverrideINTEL)(

View File

@ -1226,9 +1226,9 @@ void genX(CmdEndQueryIndexedEXT)(
#define TIMESTAMP 0x2358
void genX(CmdWriteTimestamp)(
void genX(CmdWriteTimestamp2KHR)(
VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage,
VkPipelineStageFlags2KHR stage,
VkQueryPool queryPool,
uint32_t query)
{
@ -1241,13 +1241,10 @@ void genX(CmdWriteTimestamp)(
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch);
switch (pipelineStage) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) {
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(TIMESTAMP));
break;
default:
} else {
/* Everything else is bottom-of-pipe */
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -1260,7 +1257,6 @@ void genX(CmdWriteTimestamp)(
if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4)
pc.CommandStreamerStallEnable = true;
}
break;
}
emit_query_pc_availability(cmd_buffer, query_addr, true);