freedreno/a5xx: SSBO support
To simplify things for now, since all the gfx shader stages share a single SSBO state block, only advertise SSBO support for fragment shader (and compute when we have that). We could possibly use a fixed- partitioning of the SSBO index space to support SSBOs on other stages without having to resort to shader variants. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
edde00f5f1
commit
39c5a46a7a
|
@ -345,6 +345,72 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
return needs_border;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
|
||||
{
|
||||
unsigned count = util_last_bit(so->enabled_mask);
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
|
||||
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE4_0_NUM_UNIT(count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
|
||||
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
struct pipe_shader_buffer *buf = &so->sb[i];
|
||||
if (buf->buffer) {
|
||||
struct fd_resource *rsc = fd_resource(buf->buffer);
|
||||
OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
|
||||
} else {
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
}
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
}
|
||||
|
||||
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
|
||||
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE4_0_NUM_UNIT(count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
|
||||
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
struct pipe_shader_buffer *buf = &so->sb[i];
|
||||
|
||||
// TODO maybe offset encoded somewhere here??
|
||||
OUT_RING(ring, (buf->buffer_size << 16));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
}
|
||||
|
||||
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
|
||||
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE4_0_NUM_UNIT(count));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
|
||||
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
struct pipe_shader_buffer *buf = &so->sb[i];
|
||||
if (buf->buffer) {
|
||||
struct fd_resource *rsc = fd_resource(buf->buffer);
|
||||
OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
|
||||
} else {
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
{
|
||||
|
@ -663,6 +729,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
|
||||
if (needs_border)
|
||||
emit_border_color(ctx, ring);
|
||||
|
||||
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
|
||||
emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
|
||||
}
|
||||
|
||||
/* emit setup at begin of new cmdstream buffer (don't rely on previous
|
||||
|
|
|
@ -389,11 +389,16 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
|||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
|
||||
OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
|
||||
OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
|
||||
OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
|
||||
OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
|
||||
OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));
|
||||
OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
|
||||
COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
|
||||
OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
|
||||
COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
|
||||
OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
|
||||
COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
|
||||
OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
|
||||
COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
|
||||
OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
|
||||
COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
|
||||
OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
|
||||
|
|
|
@ -250,6 +250,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 120;
|
||||
return is_ir3(screen) ? 140 : 120;
|
||||
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
if (is_a5xx(screen))
|
||||
return 4;
|
||||
return 0;
|
||||
|
||||
/* Unsupported features. */
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
|
@ -282,7 +287,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
|
@ -439,7 +443,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||
|
||||
static int
|
||||
fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_type shader,
|
||||
enum pipe_shader_type shader,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
|
@ -518,6 +522,35 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
if (is_a5xx(screen)) {
|
||||
/* a5xx (and a4xx for that matter) has one state-block
|
||||
* for compute-shader SSBO's and another that is shared
|
||||
* by VS/HS/DS/GS/FS.. so to simplify things for now
|
||||
* just advertise SSBOs for FS and CS. We could possibly
|
||||
* do what blob does, and partition the space for
|
||||
* VS/HS/DS/GS/FS. The blob advertises:
|
||||
*
|
||||
* GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
|
||||
* GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
|
||||
* GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
|
||||
* GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
|
||||
* GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
|
||||
* GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
|
||||
* GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
|
||||
*
|
||||
* I think that way we could avoid having to patch shaders
|
||||
* for actual SSBO indexes by using a static partitioning.
|
||||
*/
|
||||
switch(shader)
|
||||
{
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
return 24;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
|
||||
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
|
||||
|
|
Loading…
Reference in New Issue