anv: Implement VK_EXT_conditional_rendering for gen 7.5+
Conditional rendering affects next functions: - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase - vkCmdClearAttachments Value from conditional buffer is cached into designated register, MI_PREDICATE is emitted every time conditional rendering is enabled and command requires it. v2: by Jason Ekstrand - Use vk_find_struct_const instead of manually looping - Move draw count loading to prepare function - Zero the top 32-bits of MI_ALU_REG15 v3: Apply pipeline flush before accessing conditional buffer (The issue was found by Samuel Iglesias) v4: - Remove support of Haswell due to possible hardware bug - Made TMP_REG_PREDICATE and TMP_REG_DRAW_COUNT defines to define registers in one place. v5: thanks to Jason Ekstrand and Lionel Landwerlin - Workaround the fact that MI_PREDICATE_RESULT is not accessible on Haswell by manually calculating MI_PREDICATE_RESULT and re-emitting MI_PREDICATE when necessary. v6: suggested by Lionel Landwerlin - Instead of calculating the result of predicate once - re-emit MI_PREDICATE to make it easier to investigate error states. v7: suggested by Jason - Make anv_pipe_invalidate_bits_for_access_flag add CS_STALL if VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT is set. v8: suggested by Lionel - Precompute conditional predicate's result to support secondary command buffers. - Make prepare_for_draw_count_predicate more readable. Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
ed6e2bf263
commit
1952fd8d2c
|
@ -1144,8 +1144,12 @@ void anv_CmdClearAttachments(
|
||||||
* trash our depth and stencil buffers.
|
* trash our depth and stencil buffers.
|
||||||
*/
|
*/
|
||||||
struct blorp_batch batch;
|
struct blorp_batch batch;
|
||||||
blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
|
enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
|
||||||
BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
|
if (cmd_buffer->state.conditional_render_enabled) {
|
||||||
|
anv_cmd_emit_conditional_render_predicate(cmd_buffer);
|
||||||
|
flags |= BLORP_BATCH_PREDICATE_ENABLE;
|
||||||
|
}
|
||||||
|
blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags);
|
||||||
|
|
||||||
for (uint32_t a = 0; a < attachmentCount; ++a) {
|
for (uint32_t a = 0; a < attachmentCount; ++a) {
|
||||||
if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
|
if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
|
||||||
|
|
|
@ -377,6 +377,14 @@ anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
|
||||||
level, base_layer, layer_count);
|
level, base_layer, layer_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
anv_genX_call(&cmd_buffer->device->info,
|
||||||
|
cmd_emit_conditional_render_predicate,
|
||||||
|
cmd_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
void anv_CmdBindPipeline(
|
void anv_CmdBindPipeline(
|
||||||
VkCommandBuffer commandBuffer,
|
VkCommandBuffer commandBuffer,
|
||||||
VkPipelineBindPoint pipelineBindPoint,
|
VkPipelineBindPoint pipelineBindPoint,
|
||||||
|
|
|
@ -916,6 +916,18 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
|
||||||
|
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
|
||||||
|
(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
|
||||||
|
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
|
||||||
|
|
||||||
|
features->conditionalRendering = pdevice->info.gen >= 8 ||
|
||||||
|
pdevice->info.is_haswell;
|
||||||
|
features->inheritedConditionalRendering = pdevice->info.gen >= 8 ||
|
||||||
|
pdevice->info.is_haswell;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
|
||||||
VkPhysicalDeviceMultiviewFeatures *features =
|
VkPhysicalDeviceMultiviewFeatures *features =
|
||||||
(VkPhysicalDeviceMultiviewFeatures *)ext;
|
(VkPhysicalDeviceMultiviewFeatures *)ext;
|
||||||
|
|
|
@ -118,6 +118,7 @@ EXTENSIONS = [
|
||||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||||
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
|
||||||
Extension('VK_EXT_calibrated_timestamps', 1, True),
|
Extension('VK_EXT_calibrated_timestamps', 1, True),
|
||||||
|
Extension('VK_EXT_conditional_rendering', 1, 'device->info.gen >= 8 || device->info.is_haswell'),
|
||||||
Extension('VK_EXT_debug_report', 8, True),
|
Extension('VK_EXT_debug_report', 8, True),
|
||||||
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
||||||
Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
|
||||||
|
|
|
@ -66,6 +66,8 @@ void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
uint32_t base_layer,
|
uint32_t base_layer,
|
||||||
uint32_t layer_count);
|
uint32_t layer_count);
|
||||||
|
|
||||||
|
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
void
|
void
|
||||||
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||||
const struct gen_l3_config *l3_config,
|
const struct gen_l3_config *l3_config,
|
||||||
|
|
|
@ -183,6 +183,11 @@ struct gen_l3_config;
|
||||||
#define ANV_SVGS_VB_INDEX MAX_VBS
|
#define ANV_SVGS_VB_INDEX MAX_VBS
|
||||||
#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
|
#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
|
||||||
|
|
||||||
|
/* We reserve this MI ALU register for the purpose of handling predication.
|
||||||
|
* Other code which uses the MI ALU should leave it alone.
|
||||||
|
*/
|
||||||
|
#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15
|
||||||
|
|
||||||
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
|
#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
|
@ -1870,6 +1875,9 @@ anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags)
|
||||||
case VK_ACCESS_MEMORY_WRITE_BIT:
|
case VK_ACCESS_MEMORY_WRITE_BIT:
|
||||||
pipe_bits |= ANV_PIPE_FLUSH_BITS;
|
pipe_bits |= ANV_PIPE_FLUSH_BITS;
|
||||||
break;
|
break;
|
||||||
|
case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
|
||||||
|
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break; /* Nothing to do */
|
break; /* Nothing to do */
|
||||||
}
|
}
|
||||||
|
@ -2104,6 +2112,8 @@ struct anv_cmd_state {
|
||||||
*/
|
*/
|
||||||
bool hiz_enabled;
|
bool hiz_enabled;
|
||||||
|
|
||||||
|
bool conditional_render_enabled;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Array length is anv_cmd_state::pass::attachment_count. Array content is
|
* Array length is anv_cmd_state::pass::attachment_count. Array content is
|
||||||
* valid only when recording a render pass instance.
|
* valid only when recording a render pass instance.
|
||||||
|
@ -2261,6 +2271,8 @@ anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
|
||||||
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
|
void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
|
void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
|
||||||
|
|
||||||
enum anv_fence_type {
|
enum anv_fence_type {
|
||||||
ANV_FENCE_TYPE_NONE = 0,
|
ANV_FENCE_TYPE_NONE = 0,
|
||||||
ANV_FENCE_TYPE_BO,
|
ANV_FENCE_TYPE_BO,
|
||||||
|
|
|
@ -479,8 +479,9 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||||
0, 0, 1, hiz_op);
|
0, 0, 1, hiz_op);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MI_PREDICATE_SRC0 0x2400
|
#define MI_PREDICATE_SRC0 0x2400
|
||||||
#define MI_PREDICATE_SRC1 0x2408
|
#define MI_PREDICATE_SRC1 0x2408
|
||||||
|
#define MI_PREDICATE_RESULT 0x2418
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer,
|
set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
@ -1411,6 +1412,19 @@ genX(BeginCommandBuffer)(
|
||||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
|
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
|
||||||
|
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =
|
||||||
|
vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT);
|
||||||
|
|
||||||
|
/* If secondary buffer supports conditional rendering
|
||||||
|
* we should emit commands as if conditional rendering is enabled.
|
||||||
|
*/
|
||||||
|
cmd_buffer->state.conditional_render_enabled =
|
||||||
|
conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1515,6 +1529,19 @@ genX(CmdExecuteCommands)(
|
||||||
assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
|
assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
|
||||||
assert(!anv_batch_has_error(&secondary->batch));
|
assert(!anv_batch_has_error(&secondary->batch));
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
if (secondary->state.conditional_render_enabled) {
|
||||||
|
if (!primary->state.conditional_render_enabled) {
|
||||||
|
/* Secondary buffer is constructed as if it will be executed
|
||||||
|
* with conditional rendering, we should satisfy this dependency
|
||||||
|
* regardless of conditional rendering being enabled in primary.
|
||||||
|
*/
|
||||||
|
emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG), UINT32_MAX);
|
||||||
|
emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG) + 4, UINT32_MAX);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (secondary->usage_flags &
|
if (secondary->usage_flags &
|
||||||
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
|
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
|
||||||
/* If we're continuing a render pass from the primary, we need to
|
/* If we're continuing a render pass from the primary, we need to
|
||||||
|
@ -2777,6 +2804,9 @@ void genX(CmdDraw)(
|
||||||
|
|
||||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
if (vs_prog_data->uses_firstvertex ||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
vs_prog_data->uses_baseinstance)
|
vs_prog_data->uses_baseinstance)
|
||||||
emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
|
emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
|
||||||
|
@ -2789,6 +2819,7 @@ void genX(CmdDraw)(
|
||||||
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
|
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
||||||
|
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||||
prim.VertexAccessType = SEQUENTIAL;
|
prim.VertexAccessType = SEQUENTIAL;
|
||||||
prim.PrimitiveTopologyType = pipeline->topology;
|
prim.PrimitiveTopologyType = pipeline->topology;
|
||||||
prim.VertexCountPerInstance = vertexCount;
|
prim.VertexCountPerInstance = vertexCount;
|
||||||
|
@ -2818,6 +2849,9 @@ void genX(CmdDrawIndexed)(
|
||||||
|
|
||||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
if (vs_prog_data->uses_firstvertex ||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
vs_prog_data->uses_baseinstance)
|
vs_prog_data->uses_baseinstance)
|
||||||
emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
|
emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
|
||||||
|
@ -2830,6 +2864,7 @@ void genX(CmdDrawIndexed)(
|
||||||
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
|
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
||||||
|
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||||
prim.VertexAccessType = RANDOM;
|
prim.VertexAccessType = RANDOM;
|
||||||
prim.PrimitiveTopologyType = pipeline->topology;
|
prim.PrimitiveTopologyType = pipeline->topology;
|
||||||
prim.VertexCountPerInstance = indexCount;
|
prim.VertexCountPerInstance = indexCount;
|
||||||
|
@ -2954,6 +2989,9 @@ void genX(CmdDrawIndirect)(
|
||||||
|
|
||||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < drawCount; i++) {
|
for (uint32_t i = 0; i < drawCount; i++) {
|
||||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||||
|
|
||||||
|
@ -2967,6 +3005,7 @@ void genX(CmdDrawIndirect)(
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
||||||
prim.IndirectParameterEnable = true;
|
prim.IndirectParameterEnable = true;
|
||||||
|
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||||
prim.VertexAccessType = SEQUENTIAL;
|
prim.VertexAccessType = SEQUENTIAL;
|
||||||
prim.PrimitiveTopologyType = pipeline->topology;
|
prim.PrimitiveTopologyType = pipeline->topology;
|
||||||
}
|
}
|
||||||
|
@ -2994,6 +3033,9 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
|
|
||||||
genX(cmd_buffer_flush_state)(cmd_buffer);
|
genX(cmd_buffer_flush_state)(cmd_buffer);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < drawCount; i++) {
|
for (uint32_t i = 0; i < drawCount; i++) {
|
||||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||||
|
|
||||||
|
@ -3008,6 +3050,7 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
|
||||||
prim.IndirectParameterEnable = true;
|
prim.IndirectParameterEnable = true;
|
||||||
|
prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||||
prim.VertexAccessType = RANDOM;
|
prim.VertexAccessType = RANDOM;
|
||||||
prim.PrimitiveTopologyType = pipeline->topology;
|
prim.PrimitiveTopologyType = pipeline->topology;
|
||||||
}
|
}
|
||||||
|
@ -3018,17 +3061,27 @@ void genX(CmdDrawIndexedIndirect)(
|
||||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
|
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define TMP_DRAW_COUNT_REG MI_ALU_REG14
|
||||||
|
|
||||||
static void
|
static void
|
||||||
prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
|
prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
|
||||||
struct anv_address count_address)
|
struct anv_address count_address,
|
||||||
|
const bool conditional_render_enabled)
|
||||||
{
|
{
|
||||||
/* Upload the current draw count from the draw parameters buffer to
|
if (conditional_render_enabled) {
|
||||||
* MI_PREDICATE_SRC0.
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
*/
|
emit_lrm(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG), count_address);
|
||||||
emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address);
|
emit_lri(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG) + 4, 0);
|
||||||
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
|
#endif
|
||||||
|
} else {
|
||||||
|
/* Upload the current draw count from the draw parameters buffer to
|
||||||
|
* MI_PREDICATE_SRC0.
|
||||||
|
*/
|
||||||
|
emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
|
||||||
|
|
||||||
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -3060,6 +3113,54 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
static void
|
||||||
|
emit_draw_count_predicate_with_conditional_render(
|
||||||
|
struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
uint32_t draw_index)
|
||||||
|
{
|
||||||
|
const int draw_index_reg = MI_ALU_REG0;
|
||||||
|
const int tmp_result_reg = MI_ALU_REG1;
|
||||||
|
|
||||||
|
emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index);
|
||||||
|
emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0);
|
||||||
|
|
||||||
|
uint32_t *dw;
|
||||||
|
/* Compute (draw_index < draw_count).
|
||||||
|
* We do this by subtracting and storing the carry bit.
|
||||||
|
*/
|
||||||
|
dw = anv_batch_emitn(&cmd_buffer->batch, 9, GENX(MI_MATH));
|
||||||
|
dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg);
|
||||||
|
dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, TMP_DRAW_COUNT_REG);
|
||||||
|
dw[3] = mi_alu(MI_ALU_SUB, 0, 0);
|
||||||
|
dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF);
|
||||||
|
/* & condition */
|
||||||
|
dw[5] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg);
|
||||||
|
dw[6] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, ANV_PREDICATE_RESULT_REG);
|
||||||
|
dw[7] = mi_alu(MI_ALU_AND, 0, 0);
|
||||||
|
dw[8] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU);
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8
|
||||||
|
emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(tmp_result_reg));
|
||||||
|
#else
|
||||||
|
/* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
|
||||||
|
* so we emit MI_PREDICATE to set it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(tmp_result_reg));
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
|
||||||
|
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
|
||||||
|
mip.LoadOperation = LOAD_LOADINV;
|
||||||
|
mip.CombineOperation = COMBINE_SET;
|
||||||
|
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void genX(CmdDrawIndirectCountKHR)(
|
void genX(CmdDrawIndirectCountKHR)(
|
||||||
VkCommandBuffer commandBuffer,
|
VkCommandBuffer commandBuffer,
|
||||||
VkBuffer _buffer,
|
VkBuffer _buffer,
|
||||||
|
@ -3084,12 +3185,21 @@ void genX(CmdDrawIndirectCountKHR)(
|
||||||
struct anv_address count_address =
|
struct anv_address count_address =
|
||||||
anv_address_add(count_buffer->address, countBufferOffset);
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
|
|
||||||
prepare_for_draw_count_predicate(cmd_buffer, count_address);
|
prepare_for_draw_count_predicate(cmd_buffer, count_address,
|
||||||
|
cmd_state->conditional_render_enabled);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
||||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
if (cmd_state->conditional_render_enabled) {
|
||||||
|
emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
|
||||||
|
} else {
|
||||||
|
emit_draw_count_predicate(cmd_buffer, i);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_draw_count_predicate(cmd_buffer, i);
|
emit_draw_count_predicate(cmd_buffer, i);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (vs_prog_data->uses_firstvertex ||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
vs_prog_data->uses_baseinstance)
|
vs_prog_data->uses_baseinstance)
|
||||||
|
@ -3136,12 +3246,21 @@ void genX(CmdDrawIndexedIndirectCountKHR)(
|
||||||
struct anv_address count_address =
|
struct anv_address count_address =
|
||||||
anv_address_add(count_buffer->address, countBufferOffset);
|
anv_address_add(count_buffer->address, countBufferOffset);
|
||||||
|
|
||||||
prepare_for_draw_count_predicate(cmd_buffer, count_address);
|
prepare_for_draw_count_predicate(cmd_buffer, count_address,
|
||||||
|
cmd_state->conditional_render_enabled);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
||||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
if (cmd_state->conditional_render_enabled) {
|
||||||
|
emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
|
||||||
|
} else {
|
||||||
|
emit_draw_count_predicate(cmd_buffer, i);
|
||||||
|
}
|
||||||
|
#else
|
||||||
emit_draw_count_predicate(cmd_buffer, i);
|
emit_draw_count_predicate(cmd_buffer, i);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* TODO: We need to stomp base vertex to 0 somehow */
|
/* TODO: We need to stomp base vertex to 0 somehow */
|
||||||
if (vs_prog_data->uses_firstvertex ||
|
if (vs_prog_data->uses_firstvertex ||
|
||||||
|
@ -3369,7 +3488,11 @@ void genX(CmdDispatchBase)(
|
||||||
|
|
||||||
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
|
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
|
||||||
|
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
|
||||||
|
ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||||
ggw.ThreadDepthCounterMaximum = 0;
|
ggw.ThreadDepthCounterMaximum = 0;
|
||||||
ggw.ThreadHeightCounterMaximum = 0;
|
ggw.ThreadHeightCounterMaximum = 0;
|
||||||
|
@ -3463,11 +3586,28 @@ void genX(CmdDispatchIndirect)(
|
||||||
mip.CombineOperation = COMBINE_OR;
|
mip.CombineOperation = COMBINE_OR;
|
||||||
mip.CompareOperation = COMPARE_FALSE;
|
mip.CompareOperation = COMPARE_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if GEN_IS_HASWELL
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled) {
|
||||||
|
emit_lrr(batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG));
|
||||||
|
/* predicate &= !(conditional_rendering_predicate == 0); */
|
||||||
|
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||||
|
mip.LoadOperation = LOAD_LOADINV;
|
||||||
|
mip.CombineOperation = COMBINE_AND;
|
||||||
|
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else /* GEN_GEN > 7 */
|
||||||
|
if (cmd_buffer->state.conditional_render_enabled)
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
||||||
ggw.IndirectParameterEnable = true;
|
ggw.IndirectParameterEnable = true;
|
||||||
ggw.PredicateEnable = GEN_GEN <= 7;
|
ggw.PredicateEnable = GEN_GEN <= 7 ||
|
||||||
|
cmd_buffer->state.conditional_render_enabled;
|
||||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||||
ggw.ThreadDepthCounterMaximum = 0;
|
ggw.ThreadDepthCounterMaximum = 0;
|
||||||
ggw.ThreadHeightCounterMaximum = 0;
|
ggw.ThreadHeightCounterMaximum = 0;
|
||||||
|
@ -4423,3 +4563,75 @@ void genX(CmdEndRenderPass2KHR)(
|
||||||
{
|
{
|
||||||
genX(CmdEndRenderPass)(commandBuffer);
|
genX(CmdEndRenderPass)(commandBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG));
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
|
||||||
|
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
|
||||||
|
mip.LoadOperation = LOAD_LOADINV;
|
||||||
|
mip.CombineOperation = COMBINE_SET;
|
||||||
|
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
void genX(CmdBeginConditionalRenderingEXT)(
|
||||||
|
VkCommandBuffer commandBuffer,
|
||||||
|
const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)
|
||||||
|
{
|
||||||
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer);
|
||||||
|
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
|
||||||
|
struct anv_address value_address =
|
||||||
|
anv_address_add(buffer->address, pConditionalRenderingBegin->offset);
|
||||||
|
|
||||||
|
const bool isInverted = pConditionalRenderingBegin->flags &
|
||||||
|
VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
|
||||||
|
|
||||||
|
cmd_state->conditional_render_enabled = true;
|
||||||
|
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
|
|
||||||
|
/* Section 19.4 of the Vulkan 1.1.85 spec says:
|
||||||
|
*
|
||||||
|
* If the value of the predicate in buffer memory changes
|
||||||
|
* while conditional rendering is active, the rendering commands
|
||||||
|
* may be discarded in an implementation-dependent way.
|
||||||
|
* Some implementations may latch the value of the predicate
|
||||||
|
* upon beginning conditional rendering while others
|
||||||
|
* may read it before every rendering command.
|
||||||
|
*
|
||||||
|
* So it's perfectly fine to read a value from the buffer once.
|
||||||
|
*/
|
||||||
|
emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0), value_address);
|
||||||
|
/* Zero the top 32-bits of MI_PREDICATE_SRC0 */
|
||||||
|
emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0) + 4, 0);
|
||||||
|
|
||||||
|
/* Precompute predicate result, it is necessary to support secondary
|
||||||
|
* command buffers since it is unknown if conditional rendering is
|
||||||
|
* inverted when populating them.
|
||||||
|
*/
|
||||||
|
uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
|
||||||
|
dw[1] = mi_alu(MI_ALU_LOAD0, MI_ALU_SRCA, 0);
|
||||||
|
dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, MI_ALU_REG0);
|
||||||
|
dw[3] = mi_alu(MI_ALU_SUB, 0, 0);
|
||||||
|
dw[4] = mi_alu(isInverted ? MI_ALU_STOREINV : MI_ALU_STORE,
|
||||||
|
ANV_PREDICATE_RESULT_REG, MI_ALU_CF);
|
||||||
|
}
|
||||||
|
|
||||||
|
void genX(CmdEndConditionalRenderingEXT)(
|
||||||
|
VkCommandBuffer commandBuffer)
|
||||||
|
{
|
||||||
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
struct anv_cmd_state *cmd_state = &cmd_buffer->state;
|
||||||
|
|
||||||
|
cmd_state->conditional_render_enabled = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue