radv: fixup IA_MULTI_VGT_PARAM handling.
This ports the remains of the workarounds from radeonsi for the non-TESS cases. It should provide equivalent workarounds for hawaii and bonarie. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
a465eae38f
commit
3360dbe0c1
|
@ -1267,7 +1267,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
|
||||
static void
|
||||
radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_or_indirect_draw,
|
||||
uint32_t draw_vertex_count)
|
||||
{
|
||||
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
|
@ -1332,6 +1333,15 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR))
|
||||
radv_emit_scissor(cmd_buffer);
|
||||
|
||||
ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_or_indirect_draw, draw_vertex_count);
|
||||
if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
|
||||
radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
||||
else
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
||||
cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param;
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) {
|
||||
uint32_t stages = 0;
|
||||
|
||||
|
@ -1341,15 +1351,12 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
|
||||
ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
|
||||
radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
||||
radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
|
||||
radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim);
|
||||
} else {
|
||||
radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
|
||||
}
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out);
|
||||
|
@ -2188,7 +2195,8 @@ void radv_CmdDraw(
|
|||
uint32_t firstInstance)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
radv_cmd_buffer_flush_state(cmd_buffer);
|
||||
|
||||
radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), vertexCount);
|
||||
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
|
||||
|
||||
|
@ -2239,7 +2247,7 @@ void radv_CmdDrawIndexed(
|
|||
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
|
||||
uint64_t index_va;
|
||||
|
||||
radv_cmd_buffer_flush_state(cmd_buffer);
|
||||
radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), indexCount);
|
||||
radv_emit_primitive_reset_index(cmd_buffer);
|
||||
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
|
||||
|
@ -2337,7 +2345,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer command
|
|||
uint32_t stride)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
radv_cmd_buffer_flush_state(cmd_buffer);
|
||||
radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
|
||||
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
|
||||
cmd_buffer->cs, 14);
|
||||
|
@ -2362,7 +2370,7 @@ radv_cmd_draw_indexed_indirect_count(
|
|||
int index_size = cmd_buffer->state.index_type ? 4 : 2;
|
||||
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
|
||||
uint64_t index_va;
|
||||
radv_cmd_buffer_flush_state(cmd_buffer);
|
||||
radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
|
||||
radv_emit_primitive_reset_index(cmd_buffer);
|
||||
|
||||
index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
|
||||
|
|
|
@ -1483,6 +1483,24 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
|
|||
pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
|
||||
}
|
||||
|
||||
static const struct radv_prim_vertex_count prim_size_table[] = {
|
||||
[V_008958_DI_PT_NONE] = {0, 0},
|
||||
[V_008958_DI_PT_POINTLIST] = {1, 1},
|
||||
[V_008958_DI_PT_LINELIST] = {2, 2},
|
||||
[V_008958_DI_PT_LINESTRIP] = {2, 1},
|
||||
[V_008958_DI_PT_TRILIST] = {3, 3},
|
||||
[V_008958_DI_PT_TRIFAN] = {3, 1},
|
||||
[V_008958_DI_PT_TRISTRIP] = {3, 1},
|
||||
[V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
|
||||
[V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
|
||||
[V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
|
||||
[V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
|
||||
[V_008958_DI_PT_RECTLIST] = {3, 3},
|
||||
[V_008958_DI_PT_LINELOOP] = {2, 1},
|
||||
[V_008958_DI_PT_POLYGON] = {3, 1},
|
||||
[V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
|
||||
};
|
||||
|
||||
VkResult
|
||||
radv_pipeline_init(struct radv_pipeline *pipeline,
|
||||
struct radv_device *device,
|
||||
|
@ -1581,7 +1599,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
|
|||
pipeline->graphics.gs_out = V_028A6C_OUTPRIM_TYPE_TRISTRIP;
|
||||
}
|
||||
pipeline->graphics.prim_restart_enable = !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
|
||||
|
||||
/* prim vertex count will need TESS changes */
|
||||
pipeline->graphics.prim_vertex_count = prim_size_table[pipeline->graphics.prim];
|
||||
|
||||
const VkPipelineVertexInputStateCreateInfo *vi_info =
|
||||
pCreateInfo->pVertexInputState;
|
||||
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
|
||||
|
|
|
@ -686,8 +686,8 @@ struct radv_attachment_state {
|
|||
|
||||
struct radv_cmd_state {
|
||||
uint32_t vb_dirty;
|
||||
bool vertex_descriptors_dirty;
|
||||
radv_cmd_dirty_mask_t dirty;
|
||||
bool vertex_descriptors_dirty;
|
||||
|
||||
struct radv_pipeline * pipeline;
|
||||
struct radv_pipeline * emitted_pipeline;
|
||||
|
@ -710,6 +710,7 @@ struct radv_cmd_state {
|
|||
float offset_scale;
|
||||
uint32_t descriptors_dirty;
|
||||
uint32_t trace_id;
|
||||
uint32_t last_ia_multi_vgt_param;
|
||||
};
|
||||
|
||||
struct radv_cmd_pool {
|
||||
|
@ -771,7 +772,8 @@ void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
|
|||
int count, const VkViewport *viewports);
|
||||
void si_write_scissors(struct radeon_winsys_cs *cs, int first,
|
||||
int count, const VkRect2D *scissors);
|
||||
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer);
|
||||
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
||||
bool instanced_or_indirect_draw, uint32_t draw_vertex_count);
|
||||
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
||||
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
||||
uint64_t src_va, uint64_t dest_va,
|
||||
|
@ -925,6 +927,11 @@ struct radv_multisample_state {
|
|||
unsigned num_samples;
|
||||
};
|
||||
|
||||
struct radv_prim_vertex_count {
|
||||
uint8_t min;
|
||||
uint8_t incr;
|
||||
};
|
||||
|
||||
struct radv_pipeline {
|
||||
struct radv_device * device;
|
||||
uint32_t dynamic_state_mask;
|
||||
|
@ -956,6 +963,7 @@ struct radv_pipeline {
|
|||
bool prim_restart_enable;
|
||||
unsigned esgs_ring_size;
|
||||
unsigned gsvs_ring_size;
|
||||
struct radv_prim_vertex_count prim_vertex_count;
|
||||
} graphics;
|
||||
};
|
||||
|
||||
|
|
|
@ -565,8 +565,25 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
|
|||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num)
|
||||
{
|
||||
if (num == 0)
|
||||
return 0;
|
||||
|
||||
if (info->incr == 0)
|
||||
return 0;
|
||||
|
||||
if (num < info->min)
|
||||
return 0;
|
||||
|
||||
return 1 + ((num - info->min) / info->incr);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
|
||||
si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
||||
bool instanced_or_indirect_draw,
|
||||
uint32_t draw_vertex_count)
|
||||
{
|
||||
enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
|
||||
enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
|
||||
|
@ -580,10 +597,14 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
|
|||
bool ia_switch_on_eoi = false;
|
||||
bool partial_vs_wave = false;
|
||||
bool partial_es_wave = false;
|
||||
uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
|
||||
bool multi_instances_smaller_than_primgroup;
|
||||
|
||||
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
|
||||
primgroup_size = 64; /* recommended with a GS */
|
||||
|
||||
multi_instances_smaller_than_primgroup = (instanced_or_indirect_draw ||
|
||||
num_prims < primgroup_size);
|
||||
/* TODO TES */
|
||||
|
||||
/* TODO linestipple */
|
||||
|
@ -596,12 +617,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
|
|||
prim == V_008958_DI_PT_POLYGON ||
|
||||
prim == V_008958_DI_PT_LINELOOP ||
|
||||
prim == V_008958_DI_PT_TRIFAN ||
|
||||
prim == V_008958_DI_PT_TRISTRIP_ADJ)
|
||||
// info->primitive_restart ||
|
||||
// info->count_from_stream_output)
|
||||
prim == V_008958_DI_PT_TRISTRIP_ADJ ||
|
||||
(cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
|
||||
(family < CHIP_POLARIS10 ||
|
||||
(prim != V_008958_DI_PT_POINTLIST &&
|
||||
prim != V_008958_DI_PT_LINESTRIP &&
|
||||
prim != V_008958_DI_PT_TRISTRIP))))
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* TODO HAWAII */
|
||||
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
|
||||
* We don't know that for indirect drawing, so treat it as
|
||||
* always problematic. */
|
||||
if (family == CHIP_HAWAII &&
|
||||
instanced_or_indirect_draw)
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Performance recommendation for 4 SE Gfx7-8 parts if
|
||||
* instances are smaller than a primgroup.
|
||||
* Assume indirect draws always use small instances.
|
||||
* This is needed for good VS wave utilization.
|
||||
*/
|
||||
if (chip_class <= VI &&
|
||||
info->max_se == 4 &&
|
||||
multi_instances_smaller_than_primgroup)
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Required on CIK and later. */
|
||||
if (info->max_se > 2 && !wd_switch_on_eop)
|
||||
|
@ -614,12 +653,11 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
|
|||
(radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
|
||||
partial_vs_wave = true;
|
||||
|
||||
#if 0
|
||||
/* Instancing bug on Bonaire. */
|
||||
if (family == CHIP_BONAIRE && ia_switch_on_eoi &&
|
||||
(info->indirect || info->instance_count > 1))
|
||||
instanced_or_indirect_draw)
|
||||
partial_vs_wave = true;
|
||||
#endif
|
||||
|
||||
/* If the WD switch is false, the IA switch must be false too. */
|
||||
assert(wd_switch_on_eop || !ia_switch_on_eop);
|
||||
}
|
||||
|
@ -627,19 +665,19 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (ia_switch_on_eoi)
|
||||
partial_es_wave = true;
|
||||
|
||||
/* GS requirement. */
|
||||
if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
|
||||
partial_es_wave = true;
|
||||
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
|
||||
/* GS requirement. */
|
||||
if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
|
||||
partial_es_wave = true;
|
||||
|
||||
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
|
||||
* on multi-SE chips. */
|
||||
if (info->max_se >= 2 && ia_switch_on_eoi &&
|
||||
(instanced_or_indirect_draw &&
|
||||
num_prims <= 1))
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
|
||||
}
|
||||
|
||||
/* Hw bug with single-primitive instances and SWITCH_ON_EOI
|
||||
* on multi-SE chips. */
|
||||
#if 0
|
||||
if (info->max_se >= 2 && ia_switch_on_eoi &&
|
||||
(info->indirect ||
|
||||
(info->instance_count > 1 &&
|
||||
si_num_prims_for_vertices(info) <= 1)))
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
#endif
|
||||
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
|
||||
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
|
||||
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
|
||||
|
|
Loading…
Reference in New Issue