turnip: Split tu6_emit_xs()
Emit all the state layout config (such as push-const CONSTLEN) first, before emitting anything that depends on that state. This fixes an issue that was showing up when FLUT is enabled in ir3 (which results in higher probability of not having any immediats lowered to push- consts). Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8705>
This commit is contained in:
parent
71003e3c84
commit
4e802538e7
|
@ -469,12 +469,15 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
|
|||
.gfx_bindless = 0x1f,
|
||||
.cs_bindless = 0x1f));
|
||||
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs);
|
||||
|
||||
struct tu_pvtmem_config pvtmem = {};
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, &pvtmem, global_iova(cmd, shaders[GLOBAL_SH_VS]));
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, &pvtmem, 0);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, &pvtmem, 0);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, &pvtmem, 0);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id]));
|
||||
tu6_emit_xs(cs, MESA_SHADER_VERTEX, &vs, &pvtmem, global_iova(cmd, shaders[GLOBAL_SH_VS]));
|
||||
tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id]));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
|
||||
tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
|
||||
|
|
|
@ -2121,7 +2121,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (6 + util_bitcount(mask)));
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask)));
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state);
|
||||
|
@ -3719,6 +3720,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) {
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2));
|
||||
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_TESS, tess_consts);
|
||||
|
|
|
@ -330,63 +330,69 @@ tu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info)
|
|||
return false;
|
||||
}
|
||||
|
||||
static const struct xs_config {
|
||||
uint16_t reg_sp_xs_ctrl;
|
||||
uint16_t reg_sp_xs_config;
|
||||
uint16_t reg_sp_xs_instrlen;
|
||||
uint16_t reg_hlsq_xs_ctrl;
|
||||
uint16_t reg_sp_xs_first_exec_offset;
|
||||
uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
|
||||
} xs_config[] = {
|
||||
[MESA_SHADER_VERTEX] = {
|
||||
REG_A6XX_SP_VS_CTRL_REG0,
|
||||
REG_A6XX_SP_VS_CONFIG,
|
||||
REG_A6XX_SP_VS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_VS_CNTL,
|
||||
REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_TESS_CTRL] = {
|
||||
REG_A6XX_SP_HS_CTRL_REG0,
|
||||
REG_A6XX_SP_HS_CONFIG,
|
||||
REG_A6XX_SP_HS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_HS_CNTL,
|
||||
REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_TESS_EVAL] = {
|
||||
REG_A6XX_SP_DS_CTRL_REG0,
|
||||
REG_A6XX_SP_DS_CONFIG,
|
||||
REG_A6XX_SP_DS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_DS_CNTL,
|
||||
REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_GEOMETRY] = {
|
||||
REG_A6XX_SP_GS_CTRL_REG0,
|
||||
REG_A6XX_SP_GS_CONFIG,
|
||||
REG_A6XX_SP_GS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_GS_CNTL,
|
||||
REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_FRAGMENT] = {
|
||||
REG_A6XX_SP_FS_CTRL_REG0,
|
||||
REG_A6XX_SP_FS_CONFIG,
|
||||
REG_A6XX_SP_FS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_FS_CNTL,
|
||||
REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_COMPUTE] = {
|
||||
REG_A6XX_SP_CS_CTRL_REG0,
|
||||
REG_A6XX_SP_CS_CONFIG,
|
||||
REG_A6XX_SP_CS_INSTRLEN,
|
||||
REG_A6XX_HLSQ_CS_CNTL,
|
||||
REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
};
|
||||
|
||||
void
|
||||
tu6_emit_xs_config(struct tu_cs *cs,
|
||||
gl_shader_stage stage, /* xs->type, but xs may be NULL */
|
||||
const struct ir3_shader_variant *xs,
|
||||
const struct tu_pvtmem_config *pvtmem,
|
||||
uint64_t binary_iova)
|
||||
const struct ir3_shader_variant *xs)
|
||||
{
|
||||
static const struct xs_config {
|
||||
uint16_t reg_sp_xs_ctrl;
|
||||
uint16_t reg_sp_xs_config;
|
||||
uint16_t reg_hlsq_xs_ctrl;
|
||||
uint16_t reg_sp_xs_first_exec_offset;
|
||||
uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
|
||||
} xs_config[] = {
|
||||
[MESA_SHADER_VERTEX] = {
|
||||
REG_A6XX_SP_VS_CTRL_REG0,
|
||||
REG_A6XX_SP_VS_CONFIG,
|
||||
REG_A6XX_HLSQ_VS_CNTL,
|
||||
REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_TESS_CTRL] = {
|
||||
REG_A6XX_SP_HS_CTRL_REG0,
|
||||
REG_A6XX_SP_HS_CONFIG,
|
||||
REG_A6XX_HLSQ_HS_CNTL,
|
||||
REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_TESS_EVAL] = {
|
||||
REG_A6XX_SP_DS_CTRL_REG0,
|
||||
REG_A6XX_SP_DS_CONFIG,
|
||||
REG_A6XX_HLSQ_DS_CNTL,
|
||||
REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_GEOMETRY] = {
|
||||
REG_A6XX_SP_GS_CTRL_REG0,
|
||||
REG_A6XX_SP_GS_CONFIG,
|
||||
REG_A6XX_HLSQ_GS_CNTL,
|
||||
REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_FRAGMENT] = {
|
||||
REG_A6XX_SP_FS_CTRL_REG0,
|
||||
REG_A6XX_SP_FS_CONFIG,
|
||||
REG_A6XX_HLSQ_FS_CNTL,
|
||||
REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
[MESA_SHADER_COMPUTE] = {
|
||||
REG_A6XX_SP_CS_CTRL_REG0,
|
||||
REG_A6XX_SP_CS_CONFIG,
|
||||
REG_A6XX_HLSQ_CS_CNTL,
|
||||
REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
|
||||
REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
|
||||
},
|
||||
};
|
||||
const struct xs_config *cfg = &xs_config[stage];
|
||||
|
||||
if (!xs) {
|
||||
|
@ -399,6 +405,34 @@ tu6_emit_xs_config(struct tu_cs *cs,
|
|||
return;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
|
||||
COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
|
||||
COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
|
||||
COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
|
||||
COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
|
||||
A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
|
||||
A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
|
||||
tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
|
||||
A6XX_HLSQ_VS_CNTL_ENABLED);
|
||||
}
|
||||
|
||||
void
|
||||
tu6_emit_xs(struct tu_cs *cs,
|
||||
gl_shader_stage stage, /* xs->type, but xs may be NULL */
|
||||
const struct ir3_shader_variant *xs,
|
||||
const struct tu_pvtmem_config *pvtmem,
|
||||
uint64_t binary_iova)
|
||||
{
|
||||
const struct xs_config *cfg = &xs_config[stage];
|
||||
|
||||
if (!xs) {
|
||||
/* shader stage disabled */
|
||||
return;
|
||||
}
|
||||
|
||||
enum a6xx_threadsize thrsz =
|
||||
xs->info.double_threadsize ? THREAD128 : THREAD64;
|
||||
switch (stage) {
|
||||
|
@ -459,20 +493,9 @@ tu6_emit_xs_config(struct tu_cs *cs,
|
|||
unreachable("bad shader stage");
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 2);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
|
||||
COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
|
||||
COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
|
||||
COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
|
||||
COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
|
||||
A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
|
||||
A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1);
|
||||
tu_cs_emit(cs, xs->instrlen);
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
|
||||
tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
|
||||
A6XX_HLSQ_VS_CNTL_ENABLED);
|
||||
|
||||
/* emit program binary & private memory layout
|
||||
* binary_iova should be aligned to 1 instrlen unit (128 bytes)
|
||||
*/
|
||||
|
@ -578,7 +601,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
|
|||
.cs_state = true,
|
||||
.cs_ibo = true));
|
||||
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
||||
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
|
||||
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
||||
|
||||
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
|
@ -1523,6 +1547,26 @@ tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_program_config(struct tu_cs *cs,
|
||||
struct tu_pipeline_builder *builder)
|
||||
{
|
||||
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
||||
|
||||
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
||||
.vs_state = true,
|
||||
.hs_state = true,
|
||||
.ds_state = true,
|
||||
.gs_state = true,
|
||||
.fs_state = true,
|
||||
.gfx_ibo = true));
|
||||
for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
|
||||
tu6_emit_xs_config(cs, stage, builder->variants[stage]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_program(struct tu_cs *cs,
|
||||
struct tu_pipeline_builder *builder,
|
||||
|
@ -1540,22 +1584,12 @@ tu6_emit_program(struct tu_cs *cs,
|
|||
builder->create_info->pTessellationState->patchControlPoints : 0;
|
||||
bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output;
|
||||
|
||||
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
||||
.vs_state = true,
|
||||
.hs_state = true,
|
||||
.ds_state = true,
|
||||
.gs_state = true,
|
||||
.fs_state = true,
|
||||
.gfx_ibo = true));
|
||||
|
||||
/* Don't use the binning pass variant when GS is present because we don't
|
||||
* support compiling correct binning pass variants with GS.
|
||||
*/
|
||||
if (binning_pass && !gs) {
|
||||
vs = bs;
|
||||
tu6_emit_xs_config(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
|
||||
tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
|
||||
stage++;
|
||||
}
|
||||
|
||||
|
@ -1565,7 +1599,7 @@ tu6_emit_program(struct tu_cs *cs,
|
|||
if (stage == MESA_SHADER_FRAGMENT && binning_pass)
|
||||
fs = xs = NULL;
|
||||
|
||||
tu6_emit_xs_config(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
|
||||
tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
|
||||
}
|
||||
|
||||
uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1;
|
||||
|
@ -2456,6 +2490,22 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|||
struct tu_pipeline *pipeline)
|
||||
{
|
||||
struct tu_cs prog_cs;
|
||||
|
||||
/* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
|
||||
* else that could depend on that state (like push constants)
|
||||
*
|
||||
* Note also that this always uses the full VS even in binning pass. The
|
||||
* binning pass variant has the same const layout as the full VS, and
|
||||
* the constlen for the VS will be the same or greater than the constlen
|
||||
* for the binning pass variant. It is required that the constlen state
|
||||
* matches between binning and draw passes, as some parts of the push
|
||||
* consts are emitted in state groups that are shared between the binning
|
||||
* and draw passes.
|
||||
*/
|
||||
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
|
||||
tu6_emit_program_config(&prog_cs, builder);
|
||||
pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
||||
|
||||
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
|
||||
tu6_emit_program(&prog_cs, builder, false, pipeline);
|
||||
pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
||||
|
|
|
@ -498,6 +498,7 @@ enum tu_dynamic_state
|
|||
|
||||
enum tu_draw_state_group_id
|
||||
{
|
||||
TU_DRAW_STATE_PROGRAM_CONFIG,
|
||||
TU_DRAW_STATE_PROGRAM,
|
||||
TU_DRAW_STATE_PROGRAM_BINNING,
|
||||
TU_DRAW_STATE_TESS,
|
||||
|
@ -1159,6 +1160,7 @@ struct tu_pipeline
|
|||
|
||||
struct
|
||||
{
|
||||
struct tu_draw_state config_state;
|
||||
struct tu_draw_state state;
|
||||
struct tu_draw_state binning_state;
|
||||
|
||||
|
@ -1235,9 +1237,14 @@ struct tu_pvtmem_config {
|
|||
void
|
||||
tu6_emit_xs_config(struct tu_cs *cs,
|
||||
gl_shader_stage stage,
|
||||
const struct ir3_shader_variant *xs,
|
||||
const struct tu_pvtmem_config *pvtmem,
|
||||
uint64_t binary_iova);
|
||||
const struct ir3_shader_variant *xs);
|
||||
|
||||
void
|
||||
tu6_emit_xs(struct tu_cs *cs,
|
||||
gl_shader_stage stage,
|
||||
const struct ir3_shader_variant *xs,
|
||||
const struct tu_pvtmem_config *pvtmem,
|
||||
uint64_t binary_iova);
|
||||
|
||||
void
|
||||
tu6_emit_vpc(struct tu_cs *cs,
|
||||
|
|
Loading…
Reference in New Issue