turnip: Change to use shared consts for PushConstants
Follow the way blob is doing for PushConstants though it supports only 128b, same as previous. v1. Rename tu_push_constant_range.count into dwords to redue confusion. ( Danylo Piliaiev <dpiliaiev@igalia.com> ) v2. Enable shared constants only if necessary. v3. Merge the two draw states TU_DRAW_STATE_SHADER_GEOM_CONST and TU_DRAW_STATE_FS_CONST as shared constants are used. Note that this leaves tu_push_constant_range in tu_shader so we could use it again in the following patch. Signed-off-by: Hyunjun Ko <zzoon@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15503>
This commit is contained in:
parent
ce8e8051af
commit
e1f2cabc5e
|
@ -496,7 +496,6 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
|
||||||
switch (id) {
|
switch (id) {
|
||||||
case TU_DRAW_STATE_PROGRAM:
|
case TU_DRAW_STATE_PROGRAM:
|
||||||
case TU_DRAW_STATE_VI:
|
case TU_DRAW_STATE_VI:
|
||||||
case TU_DRAW_STATE_FS_CONST:
|
|
||||||
/* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even
|
/* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even
|
||||||
* when resources would actually be used in the binning shader.
|
* when resources would actually be used in the binning shader.
|
||||||
* Presumably the overhead of prefetching the resources isn't
|
* Presumably the overhead of prefetching the resources isn't
|
||||||
|
@ -847,13 +846,14 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_CHICKEN_BITS, 0x00000410);
|
tu_cs_emit_write_reg(cs, REG_A6XX_SP_CHICKEN_BITS, 0x00000410);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
|
tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
|
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_SHARED_CONSTS, 0);
|
tu_cs_emit_regs(cs, A6XX_HLSQ_SHARED_CONSTS(.enable = false));
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
|
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
|
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
|
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
|
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_MODE_CONTROL,
|
tu_cs_emit_regs(cs, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true,
|
||||||
A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
|
.isammode = ISAMMODE_GL,
|
||||||
|
.shared_consts_enable = false));
|
||||||
|
|
||||||
/* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
|
/* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
|
tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
|
||||||
|
@ -1060,7 +1060,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
||||||
tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
|
tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
|
||||||
CP_SET_DRAW_STATE__0_DISABLE |
|
CP_SET_DRAW_STATE__0_DISABLE |
|
||||||
CP_SET_DRAW_STATE__0_GROUP_ID(TU_DRAW_STATE_SHADER_GEOM_CONST));
|
CP_SET_DRAW_STATE__0_GROUP_ID(TU_DRAW_STATE_CONST));
|
||||||
tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
|
tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
|
||||||
tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
|
tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
|
||||||
|
|
||||||
|
@ -3676,9 +3676,9 @@ tu6_user_consts_size(const struct tu_pipeline *pipeline,
|
||||||
&pipeline->program.link[type];
|
&pipeline->program.link[type];
|
||||||
uint32_t dwords = 0;
|
uint32_t dwords = 0;
|
||||||
|
|
||||||
if (link->push_consts.count > 0) {
|
if (link->push_consts.dwords > 0) {
|
||||||
unsigned num_units = link->push_consts.count;
|
unsigned num_units = link->push_consts.dwords;
|
||||||
dwords += 4 + num_units * 4;
|
dwords += 4 + num_units;
|
||||||
}
|
}
|
||||||
|
|
||||||
return dwords;
|
return dwords;
|
||||||
|
@ -3693,47 +3693,81 @@ tu6_emit_user_consts(struct tu_cs *cs,
|
||||||
const struct tu_program_descriptor_linkage *link =
|
const struct tu_program_descriptor_linkage *link =
|
||||||
&pipeline->program.link[type];
|
&pipeline->program.link[type];
|
||||||
|
|
||||||
if (link->push_consts.count > 0) {
|
if (link->push_consts.dwords > 0) {
|
||||||
unsigned num_units = link->push_consts.count;
|
unsigned num_units = link->push_consts.dwords;
|
||||||
unsigned offset = link->push_consts.lo;
|
unsigned offset = link->push_consts.lo;
|
||||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units * 4);
|
|
||||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
/* DST_OFF and NUM_UNIT requires vec4 units */
|
||||||
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units);
|
||||||
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset / 4) |
|
||||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||||
|
CP_LOAD_STATE6_0_NUM_UNIT(num_units / 4));
|
||||||
|
tu_cs_emit(cs, 0);
|
||||||
|
tu_cs_emit(cs, 0);
|
||||||
|
for (unsigned i = 0; i < num_units; i++)
|
||||||
|
tu_cs_emit(cs, push_constants[i + offset]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu6_emit_shared_consts(struct tu_cs *cs,
|
||||||
|
const struct tu_pipeline *pipeline,
|
||||||
|
uint32_t *push_constants,
|
||||||
|
bool compute)
|
||||||
|
{
|
||||||
|
if (pipeline->shared_consts.dwords > 0) {
|
||||||
|
/* Offset and num_units for shared consts are in units of dwords. */
|
||||||
|
unsigned num_units = pipeline->shared_consts.dwords;
|
||||||
|
unsigned offset = pipeline->shared_consts.lo;
|
||||||
|
|
||||||
|
enum a6xx_state_type st = compute ? ST6_UBO : ST6_CONSTANTS;
|
||||||
|
uint32_t cp_load_state = compute ? CP_LOAD_STATE6_FRAG : CP_LOAD_STATE6;
|
||||||
|
|
||||||
|
tu_cs_emit_pkt7(cs, cp_load_state, 3 + num_units);
|
||||||
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_TYPE(st) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||||
|
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |
|
||||||
CP_LOAD_STATE6_0_NUM_UNIT(num_units));
|
CP_LOAD_STATE6_0_NUM_UNIT(num_units));
|
||||||
tu_cs_emit(cs, 0);
|
tu_cs_emit(cs, 0);
|
||||||
tu_cs_emit(cs, 0);
|
tu_cs_emit(cs, 0);
|
||||||
for (unsigned i = 0; i < num_units * 4; i++)
|
|
||||||
tu_cs_emit(cs, push_constants[i + offset * 4]);
|
for (unsigned i = 0; i < num_units; i++)
|
||||||
|
tu_cs_emit(cs, push_constants[i + offset]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
tu6_const_size(struct tu_cmd_buffer *cmd,
|
||||||
|
const struct tu_pipeline *pipeline,
|
||||||
|
bool compute)
|
||||||
|
{
|
||||||
|
uint32_t dwords = 0;
|
||||||
|
|
||||||
|
if (pipeline->shared_consts.dwords > 0) {
|
||||||
|
dwords = pipeline->shared_consts.dwords + 4;
|
||||||
|
} else {
|
||||||
|
if (compute) {
|
||||||
|
dwords = tu6_user_consts_size(pipeline, MESA_SHADER_COMPUTE);
|
||||||
|
} else {
|
||||||
|
for (uint32_t type = MESA_SHADER_VERTEX; type <= MESA_SHADER_FRAGMENT; type++)
|
||||||
|
dwords += tu6_user_consts_size(pipeline, type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dwords;
|
||||||
|
}
|
||||||
|
|
||||||
static struct tu_draw_state
|
static struct tu_draw_state
|
||||||
tu6_emit_consts(struct tu_cmd_buffer *cmd,
|
tu6_emit_consts(struct tu_cmd_buffer *cmd,
|
||||||
const struct tu_pipeline *pipeline,
|
const struct tu_pipeline *pipeline,
|
||||||
gl_shader_stage type)
|
bool compute)
|
||||||
{
|
|
||||||
uint32_t dwords = tu6_user_consts_size(pipeline, type);
|
|
||||||
if (dwords == 0)
|
|
||||||
return (struct tu_draw_state) {};
|
|
||||||
|
|
||||||
struct tu_cs cs;
|
|
||||||
tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);
|
|
||||||
|
|
||||||
tu6_emit_user_consts(&cs, pipeline, type, cmd->push_constants);
|
|
||||||
|
|
||||||
return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct tu_draw_state
|
|
||||||
tu6_emit_consts_geom(struct tu_cmd_buffer *cmd,
|
|
||||||
const struct tu_pipeline *pipeline)
|
|
||||||
{
|
{
|
||||||
uint32_t dwords = 0;
|
uint32_t dwords = 0;
|
||||||
|
|
||||||
for (uint32_t type = MESA_SHADER_VERTEX; type < MESA_SHADER_FRAGMENT; type++)
|
dwords = tu6_const_size(cmd, pipeline, compute);
|
||||||
dwords += tu6_user_consts_size(pipeline, type);
|
|
||||||
|
|
||||||
if (dwords == 0)
|
if (dwords == 0)
|
||||||
return (struct tu_draw_state) {};
|
return (struct tu_draw_state) {};
|
||||||
|
@ -3741,8 +3775,16 @@ tu6_emit_consts_geom(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs cs;
|
struct tu_cs cs;
|
||||||
tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);
|
tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);
|
||||||
|
|
||||||
for (uint32_t type = MESA_SHADER_VERTEX; type < MESA_SHADER_FRAGMENT; type++)
|
if (pipeline->shared_consts.dwords > 0) {
|
||||||
tu6_emit_user_consts(&cs, pipeline, type, cmd->push_constants);
|
tu6_emit_shared_consts(&cs, pipeline, cmd->push_constants, compute);
|
||||||
|
} else {
|
||||||
|
if (compute) {
|
||||||
|
tu6_emit_user_consts(&cs, pipeline, MESA_SHADER_COMPUTE, cmd->push_constants);
|
||||||
|
} else {
|
||||||
|
for (uint32_t type = MESA_SHADER_VERTEX; type <= MESA_SHADER_FRAGMENT; type++)
|
||||||
|
tu6_emit_user_consts(&cs, pipeline, type, cmd->push_constants);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
|
return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
|
||||||
}
|
}
|
||||||
|
@ -3968,12 +4010,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl));
|
tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
|
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS)
|
||||||
cmd->state.shader_const[0] =
|
cmd->state.shader_const = tu6_emit_consts(cmd, pipeline, false);
|
||||||
tu6_emit_consts_geom(cmd, pipeline);
|
|
||||||
cmd->state.shader_const[1] =
|
|
||||||
tu6_emit_consts(cmd, pipeline, MESA_SHADER_FRAGMENT);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmd->state.dirty & TU_CMD_DIRTY_VIEWPORTS) {
|
if (cmd->state.dirty & TU_CMD_DIRTY_VIEWPORTS) {
|
||||||
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * cmd->state.max_viewport);
|
struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * cmd->state.max_viewport);
|
||||||
|
@ -4008,8 +4046,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, pipeline->prim_order_state_sysmem);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, pipeline->prim_order_state_sysmem);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, pipeline->prim_order_state_gmem);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, pipeline->prim_order_state_gmem);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_SHADER_GEOM_CONST, cmd->state.shader_const[0]);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const[1]);
|
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
|
||||||
|
@ -4028,7 +4065,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
*/
|
*/
|
||||||
bool emit_binding_stride = false, emit_blend = false;
|
bool emit_binding_stride = false, emit_blend = false;
|
||||||
uint32_t draw_state_count =
|
uint32_t draw_state_count =
|
||||||
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 2 : 0) +
|
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 1 : 0) +
|
||||||
((cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) +
|
((cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) +
|
||||||
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
|
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
|
||||||
((cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
|
((cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
|
||||||
|
@ -4049,10 +4086,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
if (draw_state_count > 0)
|
if (draw_state_count > 0)
|
||||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
|
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
|
||||||
|
|
||||||
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
|
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS)
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_SHADER_GEOM_CONST, cmd->state.shader_const[0]);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const[1]);
|
|
||||||
}
|
|
||||||
if (cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD)
|
if (cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD)
|
||||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
||||||
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
|
if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
|
||||||
|
@ -4572,8 +4607,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
||||||
tu_emit_cache_flush(cmd, cs);
|
tu_emit_cache_flush(cmd, cs);
|
||||||
|
|
||||||
/* note: no reason to have this in a separate IB */
|
/* note: no reason to have this in a separate IB */
|
||||||
tu_cs_emit_state_ib(cs,
|
tu_cs_emit_state_ib(cs, tu6_emit_consts(cmd, pipeline, true));
|
||||||
tu6_emit_consts(cmd, pipeline, MESA_SHADER_COMPUTE));
|
|
||||||
|
|
||||||
tu_emit_compute_driver_params(cmd, cs, pipeline, info);
|
tu_emit_compute_driver_params(cmd, cs, pipeline, info);
|
||||||
|
|
||||||
|
|
|
@ -662,15 +662,29 @@ tu6_emit_xs(struct tu_cs *cs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable)
|
||||||
|
{
|
||||||
|
/* Enable/disable shared constants */
|
||||||
|
tu_cs_emit_regs(cs, A6XX_HLSQ_SHARED_CONSTS(.enable = enable));
|
||||||
|
tu_cs_emit_regs(cs, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true,
|
||||||
|
.isammode = ISAMMODE_GL,
|
||||||
|
.shared_consts_enable = enable));
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tu6_emit_cs_config(struct tu_cs *cs,
|
tu6_emit_cs_config(struct tu_cs *cs,
|
||||||
const struct ir3_shader_variant *v,
|
const struct ir3_shader_variant *v,
|
||||||
const struct tu_pvtmem_config *pvtmem,
|
const struct tu_pvtmem_config *pvtmem,
|
||||||
uint64_t binary_iova)
|
uint64_t binary_iova)
|
||||||
{
|
{
|
||||||
|
bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
|
||||||
|
tu6_emit_shared_consts_enable(cs, shared_consts_enable);
|
||||||
|
|
||||||
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
||||||
.cs_state = true,
|
.cs_state = true,
|
||||||
.cs_ibo = true));
|
.cs_ibo = true,
|
||||||
|
.cs_shared_const = shared_consts_enable));
|
||||||
|
|
||||||
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
|
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
|
||||||
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
||||||
|
@ -1678,13 +1692,17 @@ tu6_emit_program_config(struct tu_cs *cs,
|
||||||
|
|
||||||
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
||||||
|
|
||||||
|
bool shared_consts_enable = builder->layout->push_constant_size > 0;
|
||||||
|
tu6_emit_shared_consts_enable(cs, shared_consts_enable);
|
||||||
|
|
||||||
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
||||||
.vs_state = true,
|
.vs_state = true,
|
||||||
.hs_state = true,
|
.hs_state = true,
|
||||||
.ds_state = true,
|
.ds_state = true,
|
||||||
.gs_state = true,
|
.gs_state = true,
|
||||||
.fs_state = true,
|
.fs_state = true,
|
||||||
.gfx_ibo = true));
|
.gfx_ibo = true,
|
||||||
|
.gfx_shared_const = shared_consts_enable));
|
||||||
for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
|
for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
|
||||||
tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
|
tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
|
||||||
}
|
}
|
||||||
|
@ -2793,6 +2811,13 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
||||||
stage_infos[stage] = &builder->create_info->pStages[i];
|
stage_infos[stage] = &builder->create_info->pStages[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (builder->layout->push_constant_size > 0) {
|
||||||
|
pipeline->shared_consts = (struct tu_push_constant_range) {
|
||||||
|
.lo = 0,
|
||||||
|
.dwords = builder->layout->push_constant_size / 4,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
|
struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
|
||||||
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
||||||
stage < ARRAY_SIZE(keys); stage++) {
|
stage < ARRAY_SIZE(keys); stage++) {
|
||||||
|
@ -2952,7 +2977,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
||||||
stage < ARRAY_SIZE(shaders); stage++) {
|
stage < ARRAY_SIZE(shaders); stage++) {
|
||||||
if (!shaders[stage])
|
if (!shaders[stage])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int64_t stage_start = os_time_get_nano();
|
int64_t stage_start = os_time_get_nano();
|
||||||
|
|
||||||
compiled_shaders->variants[stage] =
|
compiled_shaders->variants[stage] =
|
||||||
|
@ -2965,6 +2990,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
||||||
|
|
||||||
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
||||||
}
|
}
|
||||||
|
compiled_shaders->shared_consts = pipeline->shared_consts;
|
||||||
|
|
||||||
uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler);
|
uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler);
|
||||||
|
|
||||||
|
@ -4038,6 +4064,13 @@ tu_compute_pipeline_create(VkDevice device,
|
||||||
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (layout->push_constant_size > 0) {
|
||||||
|
pipeline->shared_consts = (struct tu_push_constant_range) {
|
||||||
|
.lo = 0,
|
||||||
|
.dwords = layout->push_constant_size / 4,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
char *nir_initial_disasm = NULL;
|
char *nir_initial_disasm = NULL;
|
||||||
|
|
||||||
if (!compiled) {
|
if (!compiled) {
|
||||||
|
@ -4071,6 +4104,7 @@ tu_compute_pipeline_create(VkDevice device,
|
||||||
|
|
||||||
compiled->active_desc_sets = shader->active_desc_sets;
|
compiled->active_desc_sets = shader->active_desc_sets;
|
||||||
compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts;
|
compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts;
|
||||||
|
compiled->shared_consts = pipeline->shared_consts;
|
||||||
|
|
||||||
struct ir3_shader_variant *v =
|
struct ir3_shader_variant *v =
|
||||||
ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info);
|
ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info);
|
||||||
|
|
|
@ -720,8 +720,7 @@ enum tu_draw_state_group_id
|
||||||
TU_DRAW_STATE_VI,
|
TU_DRAW_STATE_VI,
|
||||||
TU_DRAW_STATE_VI_BINNING,
|
TU_DRAW_STATE_VI_BINNING,
|
||||||
TU_DRAW_STATE_RAST,
|
TU_DRAW_STATE_RAST,
|
||||||
TU_DRAW_STATE_SHADER_GEOM_CONST,
|
TU_DRAW_STATE_CONST,
|
||||||
TU_DRAW_STATE_FS_CONST,
|
|
||||||
TU_DRAW_STATE_DESC_SETS,
|
TU_DRAW_STATE_DESC_SETS,
|
||||||
TU_DRAW_STATE_DESC_SETS_LOAD,
|
TU_DRAW_STATE_DESC_SETS_LOAD,
|
||||||
TU_DRAW_STATE_VS_PARAMS,
|
TU_DRAW_STATE_VS_PARAMS,
|
||||||
|
@ -1189,7 +1188,7 @@ struct tu_cmd_state
|
||||||
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
|
/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
|
||||||
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
|
struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
|
||||||
struct tu_draw_state vertex_buffers;
|
struct tu_draw_state vertex_buffers;
|
||||||
struct tu_draw_state shader_const[2];
|
struct tu_draw_state shader_const;
|
||||||
struct tu_draw_state desc_sets;
|
struct tu_draw_state desc_sets;
|
||||||
|
|
||||||
struct tu_draw_state vs_params;
|
struct tu_draw_state vs_params;
|
||||||
|
@ -1377,7 +1376,7 @@ struct tu_event
|
||||||
struct tu_push_constant_range
|
struct tu_push_constant_range
|
||||||
{
|
{
|
||||||
uint32_t lo;
|
uint32_t lo;
|
||||||
uint32_t count;
|
uint32_t dwords;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tu_shader
|
struct tu_shader
|
||||||
|
@ -1399,6 +1398,7 @@ struct tu_compiled_shaders
|
||||||
{
|
{
|
||||||
struct vk_pipeline_cache_object base;
|
struct vk_pipeline_cache_object base;
|
||||||
|
|
||||||
|
struct tu_push_constant_range shared_consts;
|
||||||
struct tu_push_constant_range push_consts[MESA_SHADER_STAGES];
|
struct tu_push_constant_range push_consts[MESA_SHADER_STAGES];
|
||||||
uint8_t active_desc_sets;
|
uint8_t active_desc_sets;
|
||||||
bool multi_pos_output;
|
bool multi_pos_output;
|
||||||
|
@ -1498,6 +1498,8 @@ struct tu_pipeline
|
||||||
/* for vertex buffers state */
|
/* for vertex buffers state */
|
||||||
uint32_t num_vbs;
|
uint32_t num_vbs;
|
||||||
|
|
||||||
|
struct tu_push_constant_range shared_consts;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
struct tu_draw_state config_state;
|
struct tu_draw_state config_state;
|
||||||
|
|
|
@ -138,18 +138,21 @@ tu_spirv_to_nir(struct tu_device *dev,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
|
lower_load_push_constant(struct tu_device *dev,
|
||||||
|
nir_builder *b,
|
||||||
|
nir_intrinsic_instr *instr,
|
||||||
struct tu_shader *shader)
|
struct tu_shader *shader)
|
||||||
{
|
{
|
||||||
uint32_t base = nir_intrinsic_base(instr);
|
uint32_t base = nir_intrinsic_base(instr);
|
||||||
assert(base % 4 == 0);
|
assert(base % 4 == 0);
|
||||||
assert(base >= shader->push_consts.lo * 16);
|
assert(base >= shader->push_consts.lo * 4);
|
||||||
base -= shader->push_consts.lo * 16;
|
base -= shader->push_consts.lo * 4;
|
||||||
|
|
||||||
nir_ssa_def *load =
|
nir_ssa_def *load =
|
||||||
nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,
|
nir_load_uniform(b, instr->num_components,
|
||||||
nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),
|
instr->dest.ssa.bit_size,
|
||||||
.base = base / 4);
|
nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),
|
||||||
|
.base = base + dev->compiler->shared_consts_base_offset * 4);
|
||||||
|
|
||||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
|
||||||
|
|
||||||
|
@ -398,7 +401,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
||||||
{
|
{
|
||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
case nir_intrinsic_load_push_constant:
|
case nir_intrinsic_load_push_constant:
|
||||||
lower_load_push_constant(b, instr, shader);
|
lower_load_push_constant(dev, b, instr, shader);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case nir_intrinsic_load_vulkan_descriptor:
|
case nir_intrinsic_load_vulkan_descriptor:
|
||||||
|
@ -610,17 +613,21 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
|
||||||
|
|
||||||
if (min >= max) {
|
if (min >= max) {
|
||||||
tu_shader->push_consts.lo = 0;
|
tu_shader->push_consts.lo = 0;
|
||||||
tu_shader->push_consts.count = 0;
|
tu_shader->push_consts.dwords = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
|
/* CP_LOAD_STATE OFFSET and NUM_UNIT for SHARED_CONSTS are in units of
|
||||||
* however there's an alignment requirement of 4 on OFFSET. Expand the
|
* dwords while loading regular consts is in units of vec4's.
|
||||||
* range and change units accordingly.
|
* So we unify the unit here as dwords for tu_push_constant_range, then
|
||||||
|
* we should consider correct unit when emitting.
|
||||||
|
*
|
||||||
|
* Note there's an alignment requirement of 16 dwords on OFFSET. Expand
|
||||||
|
* the range and change units accordingly.
|
||||||
*/
|
*/
|
||||||
tu_shader->push_consts.lo = (min / 16) / 4 * 4;
|
tu_shader->push_consts.lo = (min / 4) / 4 * 4;
|
||||||
tu_shader->push_consts.count =
|
tu_shader->push_consts.dwords =
|
||||||
align(max, 16) / 16 - tu_shader->push_consts.lo;
|
align(max, 16) / 4 - tu_shader->push_consts.lo;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
@ -822,7 +829,8 @@ tu_shader_create(struct tu_device *dev,
|
||||||
|
|
||||||
shader->ir3_shader =
|
shader->ir3_shader =
|
||||||
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
||||||
.reserved_user_consts = align(shader->push_consts.count, 4),
|
.reserved_user_consts = 0,
|
||||||
|
.shared_consts_enable = layout->push_constant_size > 0,
|
||||||
.api_wavesize = key->api_wavesize,
|
.api_wavesize = key->api_wavesize,
|
||||||
.real_wavesize = key->real_wavesize,
|
.real_wavesize = key->real_wavesize,
|
||||||
}, &so_info);
|
}, &so_info);
|
||||||
|
|
Loading…
Reference in New Issue