anv: support rasterizer discard dynamic state

Implemented by emitting 3DSTATE_STREAMOUT packet.

v2: logic fixes + merge and emit properly all contents (Lionel)

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10366>
This commit is contained in:
Tapani Pälli 2021-05-03 11:09:27 +03:00 committed by Marge Bot
parent 284290a876
commit 4d531c67df
5 changed files with 164 additions and 80 deletions

View File

@ -101,6 +101,7 @@ const struct anv_dynamic_state default_dynamic_state = {
.dyn_vbo_stride = 0,
.dyn_vbo_size = 0,
.color_writes = 0xff,
.raster_discard = 0,
};
/**
@ -189,6 +190,8 @@ anv_dynamic_state_copy(struct anv_dynamic_state *dest,
ANV_CMP_COPY(dyn_vbo_stride, ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE);
ANV_CMP_COPY(dyn_vbo_size, ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE);
ANV_CMP_COPY(raster_discard, ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
if (copy_mask & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
dest->sample_locations.samples = src->sample_locations.samples;
typed_memcpy(dest->sample_locations.locations,
@ -504,6 +507,17 @@ void anv_CmdBindPipeline(
}
}
void anv_CmdSetRasterizerDiscardEnableEXT(
VkCommandBuffer commandBuffer,
VkBool32 rasterizerDiscardEnable)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->state.gfx.dynamic.raster_discard = rasterizerDiscardEnable;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
}
void anv_CmdSetViewport(
VkCommandBuffer commandBuffer,
uint32_t firstViewport,

View File

@ -1299,6 +1299,16 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
pipeline->active_stages = 0;
/* Information on which states are considered dynamic. */
const VkPipelineDynamicStateCreateInfo *dyn_info =
info->pDynamicState;
uint32_t dynamic_states = 0;
if (dyn_info) {
for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
dynamic_states |=
anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
}
VkResult result;
for (uint32_t i = 0; i < info->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
@ -1343,7 +1353,8 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
break;
case MESA_SHADER_FRAGMENT: {
const bool raster_enabled =
!info->pRasterizationState->rasterizerDiscardEnable;
!info->pRasterizationState->rasterizerDiscardEnable ||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
populate_wm_prog_key(pipeline, sinfo->flags,
pipeline->base.device->robust_buffer_access,
pipeline->subpass,
@ -1907,12 +1918,16 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
bool raster_discard =
pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
!(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
/* Section 9.2 of the Vulkan 1.0.15 spec says:
*
* pViewportState is [...] NULL if the pipeline
* has rasterization disabled.
*/
if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
if (!raster_discard) {
assert(pCreateInfo->pViewportState);
dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
@ -1975,6 +1990,12 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
}
}
if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
assert(pCreateInfo->pRasterizationState);
dynamic->raster_discard =
pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
}
/* Section 9.2 of the Vulkan 1.0.15 spec says:
*
* pColorBlendState is [...] NULL if the pipeline has rasterization
@ -1989,8 +2010,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
}
}
if (uses_color_att &&
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
if (uses_color_att && !raster_discard) {
assert(pCreateInfo->pColorBlendState);
if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
@ -2010,8 +2030,7 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
* disabled or if the subpass of the render pass the pipeline is created
* against does not use a depth/stencil attachment.
*/
if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
subpass->depth_stencil_attachment) {
if (!raster_discard && subpass->depth_stencil_attachment) {
assert(pCreateInfo->pDepthStencilState);
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
@ -2150,7 +2169,8 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
pipeline->static_state_mask = states &
(ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE);
ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
}
static void

View File

@ -2676,6 +2676,7 @@ struct anv_dynamic_state {
VkCompareOp depth_compare_op;
bool depth_bounds_test_enable;
bool stencil_test_enable;
bool raster_discard;
bool dyn_vbo_stride;
bool dyn_vbo_size;
@ -3426,6 +3427,7 @@ struct anv_graphics_pipeline {
uint32_t xfb_bo_pitch[4];
uint32_t wm[3];
uint32_t blend_state[MAX_RTS * 2];
uint32_t streamout_state[3];
} gfx7;
struct {
@ -3435,6 +3437,7 @@ struct anv_graphics_pipeline {
uint32_t wm[2];
uint32_t ps_blend[2];
uint32_t blend_state[1 + MAX_RTS * 2];
uint32_t streamout_state[5];
} gfx8;
struct {

View File

@ -35,6 +35,8 @@
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "nir/nir_xfb_info.h"
/* We reserve :
* - GPR 14 for secondary command buffer returns
* - GPR 15 for conditional rendering
@ -3438,6 +3440,28 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
pipeline->gfx7.clip);
}
static void
cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
{
const struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
#if GFX_VER == 7
# define streamout_state_dw pipeline->gfx7.streamout_state
#else
# define streamout_state_dw pipeline->gfx8.streamout_state
#endif
uint32_t dwords[GENX(3DSTATE_STREAMOUT_length)];
struct GENX(3DSTATE_STREAMOUT) so = {
GENX(3DSTATE_STREAMOUT_header),
.RenderingDisable = d->raster_discard,
};
GENX(3DSTATE_STREAMOUT_pack)(NULL, dwords, &so);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, streamout_state_dw);
}
void
genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
{
@ -3659,6 +3683,9 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer_emit_clip(cmd_buffer);
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE)
cmd_buffer_emit_streamout(cmd_buffer);
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
gfx8_cmd_buffer_emit_viewport(cmd_buffer);

View File

@ -1483,7 +1483,8 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
static void
emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *rs_info)
const VkPipelineRasterizationStateCreateInfo *rs_info,
const uint32_t dynamic_states)
{
const struct brw_vue_prog_data *prog_data =
anv_pipeline_get_last_vue_prog_data(pipeline);
@ -1497,71 +1498,87 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
else
xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {
so.RenderingDisable = rs_info->rasterizerDiscardEnable;
if (xfb_info) {
so.SOFunctionEnable = true;
so.SOStatisticsEnable = true;
switch (vk_provoking_vertex_mode(rs_info)) {
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
so.ReorderMode = LEADING;
break;
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
so.ReorderMode = TRAILING;
break;
default:
unreachable("Invalid provoking vertex mode");
}
const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
so.RenderStreamSelect = stream_info ?
stream_info->rasterizationStream : 0;
#if GFX_VER >= 8
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
#if GFX_VER == 7
# define streamout_state_dw pipeline->gfx7.streamout_state
#else
pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
* is a bit inconvenient because we don't know what buffers will
* actually be enabled until draw time. We do our best here by
* setting them based on buffers_written and we disable them
* as-needed at draw time by setting EndAddress = BaseAddress.
*/
so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
# define streamout_state_dw pipeline->gfx8.streamout_state
#endif
int urb_entry_read_offset = 0;
int urb_entry_read_length =
(prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
struct GENX(3DSTATE_STREAMOUT) so = {
GENX(3DSTATE_STREAMOUT_header),
.RenderingDisable =
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) ?
0 : rs_info->rasterizerDiscardEnable,
};
/* We always read the whole vertex. This could be reduced at some
* point by reading less and offsetting the register index in the
* SO_DECLs.
*/
so.Stream0VertexReadOffset = urb_entry_read_offset;
so.Stream0VertexReadLength = urb_entry_read_length - 1;
so.Stream1VertexReadOffset = urb_entry_read_offset;
so.Stream1VertexReadLength = urb_entry_read_length - 1;
so.Stream2VertexReadOffset = urb_entry_read_offset;
so.Stream2VertexReadLength = urb_entry_read_length - 1;
so.Stream3VertexReadOffset = urb_entry_read_offset;
so.Stream3VertexReadLength = urb_entry_read_length - 1;
if (xfb_info) {
so.SOFunctionEnable = true;
so.SOStatisticsEnable = true;
switch (vk_provoking_vertex_mode(rs_info)) {
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
so.ReorderMode = LEADING;
break;
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
so.ReorderMode = TRAILING;
break;
default:
unreachable("Invalid provoking vertex mode");
}
const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
so.RenderStreamSelect = stream_info ?
stream_info->rasterizationStream : 0;
#if GFX_VER >= 8
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
#else
pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
* is a bit inconvenient because we don't know what buffers will
* actually be enabled until draw time. We do our best here by
* setting them based on buffers_written and we disable them
* as-needed at draw time by setting EndAddress = BaseAddress.
*/
so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
#endif
int urb_entry_read_offset = 0;
int urb_entry_read_length =
(prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
/* We always read the whole vertex. This could be reduced at some
* point by reading less and offsetting the register index in the
* SO_DECLs.
*/
so.Stream0VertexReadOffset = urb_entry_read_offset;
so.Stream0VertexReadLength = urb_entry_read_length - 1;
so.Stream1VertexReadOffset = urb_entry_read_offset;
so.Stream1VertexReadLength = urb_entry_read_length - 1;
so.Stream2VertexReadOffset = urb_entry_read_offset;
so.Stream2VertexReadLength = urb_entry_read_length - 1;
so.Stream3VertexReadOffset = urb_entry_read_offset;
so.Stream3VertexReadLength = urb_entry_read_length - 1;
}
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
} else {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), _so)
_so = so;
}
if (xfb_info) {
@ -2404,11 +2421,23 @@ genX(graphics_pipeline_create)(
return result;
}
/* Information on which states are considered dynamic. */
const VkPipelineDynamicStateCreateInfo *dyn_info =
pCreateInfo->pDynamicState;
uint32_t dynamic_states = 0;
if (dyn_info) {
for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
dynamic_states |=
anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
}
/* If rasterization is not enabled, various CreateInfo structs must be
* ignored.
*/
const bool raster_enabled =
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
const VkPipelineViewportStateCreateInfo *vp_info =
raster_enabled ? pCreateInfo->pViewportState : NULL;
@ -2426,16 +2455,6 @@ genX(graphics_pipeline_create)(
vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
/* Information on which states are considered dynamic. */
const VkPipelineDynamicStateCreateInfo *dyn_info =
pCreateInfo->pDynamicState;
uint32_t dynamic_states = 0;
if (dyn_info) {
for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
dynamic_states |=
anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
}
enum intel_urb_deref_block_size urb_deref_block_size;
emit_urb_setup(pipeline, &urb_deref_block_size);
@ -2456,7 +2475,8 @@ genX(graphics_pipeline_create)(
vp_info,
pCreateInfo->pRasterizationState,
dynamic_states);
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState,
dynamic_states);
#if GFX_VER == 12
emit_3dstate_primitive_replication(pipeline);