diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index d135dcc79b8..4a4fd663e35 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -748,6 +748,7 @@ struct ir3_shader { /* for compute shaders: */ struct { unsigned req_input_mem; /* in dwords */ + unsigned req_local_mem; } cs; }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 1802a589a0b..bd8e2500ef2 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -70,7 +70,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); - uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); + uint32_t shared_size = MAX2(((int)v->shader->cs.req_local_mem - 1) / 1024, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) | A6XX_SP_CS_UNKNOWN_A9B1_UNK6); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 041ba15a487..64b95da5d0c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -298,6 +298,7 @@ ir3_shader_compute_state_create(struct pipe_context *pctx, struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL); shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */ + shader->cs.req_local_mem = cso->req_local_mem; struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));