From e42a8a5b92a0abae2bc3f9457f4703f73d46f450 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 14 Nov 2021 13:01:57 -0500 Subject: [PATCH] a4xx: add emission of compute state, and compute dispatch Signed-off-by: Ilia Mirkin Part-of: --- .../.gitlab-ci/reference/glxgears-a420.log | 4 +- src/freedreno/registers/adreno/a4xx.xml | 5 +- .../drivers/freedreno/a4xx/fd4_compute.c | 203 ++++++++++++++++++ .../drivers/freedreno/a4xx/fd4_compute.h | 34 +++ .../drivers/freedreno/a4xx/fd4_context.c | 2 + src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 35 +++ src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 6 + .../drivers/freedreno/a4xx/fd4_program.c | 8 +- .../drivers/freedreno/a4xx/fd4_program.h | 3 + src/gallium/drivers/freedreno/ir3/ir3_const.h | 4 + src/gallium/drivers/freedreno/meson.build | 2 + 11 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 src/gallium/drivers/freedreno/a4xx/fd4_compute.c create mode 100644 src/gallium/drivers/freedreno/a4xx/fd4_compute.h diff --git a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log index 10c32649b65..4dbad57add0 100644 --- a/src/freedreno/.gitlab-ci/reference/glxgears-a420.log +++ b/src/freedreno/.gitlab-ci/reference/glxgears-a420.log @@ -97,7 +97,7 @@ t0 write TPL1_TP_TEX_COUNT (2381) TPL1_TP_TEX_COUNT: { VS = 16 | HS = 0 | DS = 0 | GS = 0 } 108ce0f8: 0000: 00002381 00000010 t0 write TPL1_TP_FS_TEX_COUNT (23a0) - TPL1_TP_FS_TEX_COUNT: 0x10 + TPL1_TP_FS_TEX_COUNT: { FS = 16 | CS = 0 } 108ce100: 0000: 000023a0 00000010 t3 opcode: CP_SET_DRAW_STATE (43) (3 dwords) { COUNT = 0 | DISABLE_ALL_GROUPS | GROUP_ID = 0 } @@ -774,7 +774,7 @@ t3 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) !+ 7e420000 SP_GS_OBJ_OFFSET_REG: { CONSTOBJECTOFFSET = 66 | SHADEROBJOFFSET = 63 } + 00000000 TPL1_TP_TEX_OFFSET: 0 !+ 00000010 TPL1_TP_TEX_COUNT: { VS = 16 | HS = 0 | DS = 0 | GS = 0 } -!+ 00000010 TPL1_TP_FS_TEX_COUNT: 0x10 +!+ 00000010 TPL1_TP_FS_TEX_COUNT: { FS = 16 | CS = 0 } !+ 28000250 HLSQ_CONTROL_0_REG: { FSTHREADSIZE = FOUR_QUADS | FSSUPERTHREADENABLE | SPSHADERRESTART | CONSTMODE = 1 | SPCONSTFULLUPDATE } !+ fcfc0100 HLSQ_CONTROL_1_REG: { VSTHREADSIZE = TWO_QUADS | VSSUPERTHREADENABLE | COORDREGID = r63.x | ZWCOORDREGID = r63.x } !+ fff3f3f0 HLSQ_CONTROL_2_REG: { PRIMALLOCTHRESHOLD = 63 | FACEREGID = r63.x | SAMPLEID_REGID = r63.x | SAMPLEMASK_REGID = r63.x } diff --git a/src/freedreno/registers/adreno/a4xx.xml b/src/freedreno/registers/adreno/a4xx.xml index 19e0b0c9480..7ce141f224b 100644 --- a/src/freedreno/registers/adreno/a4xx.xml +++ b/src/freedreno/registers/adreno/a4xx.xml @@ -1917,7 +1917,10 @@ perhaps they should be taken with a grain of salt - + + + + diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_compute.c b/src/gallium/drivers/freedreno/a4xx/fd4_compute.c new file mode 100644 index 00000000000..b9c52ad5e29 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_compute.c @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2021 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Ilia Mirkin + */ + +#include "pipe/p_state.h" + +#include "freedreno_resource.h" + +#include "fd4_compute.h" +#include "fd4_context.h" +#include "fd4_emit.h" + +/* maybe move to fd4_program? */ +static void +cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) +{ + const struct ir3_info *i = &v->info; + enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS; + unsigned instrlen = v->instrlen; + + /* XXX verify that this is the case on a4xx */ + /* if shader is more than 32*16 instructions, don't preload it. Similar + * to the combined restriction of 64*16 for VS+FS + */ + if (instrlen > 32) + instrlen = 0; + + OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); + OUT_RING(ring, 0x00860010); /* SP_SP_CTRL_REG */ + + OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 1); + OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS) | + A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT | + 0x000001a0 /* XXX */); + + OUT_PKT0(ring, REG_A4XX_SP_CS_CTRL_REG0, 1); + OUT_RING(ring, A4XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | + A4XX_SP_CS_CTRL_REG0_SUPERTHREADMODE | + A4XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | + A4XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1)); + + OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); + OUT_RING(ring, 0x00000038); /* HLSQ_UPDATE_CONTROL */ + + OUT_PKT0(ring, REG_A4XX_HLSQ_CS_CONTROL_REG, 1); + OUT_RING(ring, A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(0) | + A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(0) | + A4XX_HLSQ_CS_CONTROL_REG_ENABLED | + A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(1) | + COND(v->has_ssbo, A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE) | + A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(v->constlen / 4)); + + uint32_t driver_param_base = v->const_state->offsets.driver_param * 4; + uint32_t local_invocation_id, work_group_id, local_group_size_id, + num_wg_id, work_dim_id, unused_id; + local_invocation_id = + ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); + work_group_id = driver_param_base + IR3_DP_WORKGROUP_ID_X; + num_wg_id = driver_param_base + IR3_DP_NUM_WORK_GROUPS_X; + local_group_size_id = driver_param_base + IR3_DP_LOCAL_GROUP_SIZE_X; + work_dim_id = driver_param_base + IR3_DP_WORK_DIM; + /* NOTE: At some point we'll want to use this, it's probably WGOFFSETCONSTID */ + unused_id = driver_param_base + IR3_DP_BASE_GROUP_X; + + OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2); + OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) | + A4XX_HLSQ_CL_CONTROL_0_KERNELDIMCONSTID(work_dim_id) | + A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(local_invocation_id)); + OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_1_UNK0CONSTID(unused_id) | + A4XX_HLSQ_CL_CONTROL_1_WORKGROUPSIZECONSTID(local_group_size_id)); + + OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_CONST, 1); + OUT_RING(ring, A4XX_HLSQ_CL_KERNEL_CONST_UNK0CONSTID(unused_id) | + A4XX_HLSQ_CL_KERNEL_CONST_NUMWGCONSTID(num_wg_id)); + + OUT_PKT0(ring, REG_A4XX_HLSQ_CL_WG_OFFSET, 1); + OUT_RING(ring, A4XX_HLSQ_CL_WG_OFFSET_UNK0CONSTID(unused_id)); + + OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1); + OUT_RING(ring, 0x00000003); /* HLSQ_MODE_CONTROL */ + + OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); + OUT_RING(ring, 0x00000000); /* HLSQ_UPDATE_CONTROL */ + + OUT_PKT0(ring, REG_A4XX_SP_CS_OBJ_START, 1); + OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START */ + + OUT_PKT0(ring, REG_A4XX_SP_CS_LENGTH_REG, 1); + OUT_RING(ring, v->instrlen); + + if (instrlen > 0) + fd4_emit_shader(ring, v); +} + +static void +fd4_launch_grid(struct fd_context *ctx, + const struct pipe_grid_info *info) assert_dt +{ + struct ir3_shader_key key = {}; + struct ir3_shader_variant *v; + struct fd_ringbuffer *ring = ctx->batch->draw; + unsigned nglobal = 0; + + v = + ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug); + if (!v) + return; + + if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG) + cs_program_emit(ring, v); + + fd4_emit_cs_state(ctx, ring, v); + fd4_emit_cs_consts(v, ring, ctx, info); + + u_foreach_bit (i, ctx->global_bindings.enabled_mask) + nglobal++; + + if (nglobal > 0) { + /* global resources don't otherwise get an OUT_RELOC(), since + * the raw ptr address is emitted ir ir3_emit_cs_consts(). + * So to make the kernel aware that these buffers are referenced + * by the batch, emit dummy reloc's as part of a no-op packet + * payload: + */ + OUT_PKT3(ring, CP_NOP, 2 * nglobal); + u_foreach_bit (i, ctx->global_bindings.enabled_mask) { + struct pipe_resource *prsc = ctx->global_bindings.buf[i]; + OUT_RELOC(ring, fd_resource(prsc)->bo, 0, 0, 0); + } + } + + const unsigned *local_size = + info->block; // v->shader->nir->info->workgroup_size; + const unsigned *num_groups = info->grid; + /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ + const unsigned work_dim = info->work_dim ? info->work_dim : 3; + OUT_PKT0(ring, REG_A4XX_HLSQ_CL_NDRANGE_0, 7); + OUT_RING(ring, A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(work_dim) | + A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | + A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | + A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); + OUT_RING(ring, + A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(local_size[0] * num_groups[0])); + OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_2_GLOBALOFF_X */ + OUT_RING(ring, + A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(local_size[1] * num_groups[1])); + OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_4_GLOBALOFF_Y */ + OUT_RING(ring, + A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(local_size[2] * num_groups[2])); + OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_6_GLOBALOFF_Z */ + + if (info->indirect) { + struct fd_resource *rsc = fd_resource(info->indirect); + + fd_event_write(ctx->batch, ring, CACHE_FLUSH); + fd_wfi(ctx->batch, ring); + + OUT_PKT3(ring, CP_EXEC_CS_INDIRECT, 3); + OUT_RING(ring, 0x00000000); + OUT_RELOC(ring, rsc->bo, info->indirect_offset, 0, 0); + OUT_RING(ring, + A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(local_size[0] - 1) | + A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(local_size[1] - 1) | + A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(local_size[2] - 1)); + } else { + OUT_PKT3(ring, CP_EXEC_CS, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(info->grid[0])); + OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(info->grid[1])); + OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(info->grid[2])); + } +} + +void +fd4_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis +{ + struct fd_context *ctx = fd_context(pctx); + ctx->launch_grid = fd4_launch_grid; + pctx->create_compute_state = ir3_shader_compute_state_create; + pctx->delete_compute_state = ir3_shader_state_delete; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_compute.h b/src/gallium/drivers/freedreno/a4xx/fd4_compute.h new file mode 100644 index 00000000000..f10fdeea7f7 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_compute.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2021 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Ilia Mirkin + */ + +#ifndef FD4_COMPUTE_H_ +#define FD4_COMPUTE_H_ + +#include "pipe/p_context.h" + +void fd4_compute_init(struct pipe_context *pctx); + +#endif /* FD4_COMPUTE_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index 18663a9d854..0cfe5926863 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -27,6 +27,7 @@ #include "freedreno_query_hw.h" #include "fd4_blend.h" +#include "fd4_compute.h" #include "fd4_context.h" #include "fd4_draw.h" #include "fd4_emit.h" @@ -83,6 +84,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create; fd4_draw_init(pctx); + fd4_compute_init(pctx); fd4_gmem_init(pctx); fd4_texture_init(pctx); fd4_prog_init(pctx); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 2697d4439cf..5b00245bea0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -136,6 +136,14 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, fd4_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets); } +void +fd4_emit_cs_consts(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_context *ctx, + const struct pipe_grid_info *info) +{ + ir3_emit_cs_consts(v, ring, ctx, info); +} + static void emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, enum a4xx_state_block sb, struct fd_texture_stateobj *tex, @@ -144,6 +152,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, static const uint32_t bcolor_reg[] = { [SB4_VS_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, [SB4_FS_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + [SB4_CS_TEX] = REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR, }; struct fd4_context *fd4_ctx = fd4_context(ctx); bool needs_border = false; @@ -909,6 +918,32 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +void +fd4_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; + + if (dirty & FD_DIRTY_SHADER_TEX) { + emit_textures(ctx, ring, SB4_CS_TEX, &ctx->tex[PIPE_SHADER_COMPUTE], cp); + + OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1); + OUT_RING(ring, 0); + } + + OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1); + OUT_RING(ring, A4XX_TPL1_TP_FS_TEX_COUNT_CS( + ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask + ? 0x80 + : ctx->tex[PIPE_SHADER_COMPUTE].num_textures)); + + if (dirty & FD_DIRTY_SHADER_SSBO) + emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]); + + if (dirty & FD_DIRTY_SHADER_IMAGE) + fd4_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp); +} + /* emit setup at begin of new cmdstream buffer (don't rely on previous * state, there could have been a context switch between ioctls): */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 2a012dab72a..d4299085e92 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -101,6 +101,12 @@ void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) assert_dt; +void fd4_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct ir3_shader_variant *cp) assert_dt; +void fd4_emit_cs_consts(const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_context *ctx, + const struct pipe_grid_info *info) assert_dt; + void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 6c0457158f8..212dbd49618 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -37,8 +37,8 @@ #include "fd4_program.h" #include "fd4_texture.h" -static void -emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) +void +fd4_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum a4xx_state_block sb = fd4_stage2shadersb(so->type); @@ -570,11 +570,11 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, } if (s[VS].instrlen) - emit_shader(ring, s[VS].v); + fd4_emit_shader(ring, s[VS].v); if (!emit->binning_pass) if (s[FS].instrlen) - emit_shader(ring, s[FS].v); + fd4_emit_shader(ring, s[FS].v); } static struct ir3_program_state * diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h index 7fcb0c72a2c..8de10963966 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h @@ -48,6 +48,9 @@ fd4_program_state(struct ir3_program_state *state) return (struct fd4_program_state *)state; } +void fd4_emit_shader(struct fd_ringbuffer *ring, + const struct ir3_shader_variant *so); + void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, struct pipe_surface **bufs); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index c2c239e9700..716fc32a7a0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -570,6 +570,10 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); emit_kernel_params(ctx, v, ring, info); + /* a3xx/a4xx can inject these directly */ + if (ctx->screen->gen <= 4) + return; + /* emit compute-shader driver-params: */ const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 854cdf56f27..07ccd10825a 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -118,6 +118,8 @@ files_libfreedreno = files( 'a3xx/fd3_zsa.h', 'a4xx/fd4_blend.c', 'a4xx/fd4_blend.h', + 'a4xx/fd4_compute.c', + 'a4xx/fd4_compute.h', 'a4xx/fd4_context.c', 'a4xx/fd4_context.h', 'a4xx/fd4_draw.c',