freedreno/a5xx: SSBO support

To simplify things for now, since all the gfx shader stages share a
single SSBO state block, only advertise SSBO support for fragment shader
(and compute when we have that).  We could possibly use a fixed-
partitioning of the SSBO index space to support SSBOs on other stages
without having to resort to shader variants.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2017-04-17 11:25:29 -04:00
parent edde00f5f1
commit 39c5a46a7a
3 changed files with 114 additions and 7 deletions

View File

@ -345,6 +345,72 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
return needs_border;
}
static void
emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
{
unsigned count = util_last_bit(so->enabled_mask);
if (count == 0)
return;
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
CP_LOAD_STATE4_0_NUM_UNIT(count));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
if (buf->buffer) {
struct fd_resource *rsc = fd_resource(buf->buffer);
OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
CP_LOAD_STATE4_0_NUM_UNIT(count));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
// TODO maybe offset encoded somewhere here??
OUT_RING(ring, (buf->buffer_size << 16));
OUT_RING(ring, 0x00000000);
}
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
CP_LOAD_STATE4_0_NUM_UNIT(count));
OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
if (buf->buffer) {
struct fd_resource *rsc = fd_resource(buf->buffer);
OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
}
}
void
fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
{
@ -663,6 +729,9 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (needs_border)
emit_border_color(ctx, ring);
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
}
/* emit setup at begin of new cmdstream buffer (don't rely on previous

View File

@ -389,11 +389,16 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
OUT_RING(ring, 0x00000000);
OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));
OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));
OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |

View File

@ -250,6 +250,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 120;
return is_ir3(screen) ? 140 : 120;
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
if (is_a5xx(screen))
return 4;
return 0;
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@ -282,7 +287,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
@ -439,7 +443,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
fd_screen_get_shader_param(struct pipe_screen *pscreen,
enum pipe_shader_type shader,
enum pipe_shader_type shader,
enum pipe_shader_cap param)
{
struct fd_screen *screen = fd_screen(pscreen);
@ -518,6 +522,35 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
if (is_a5xx(screen)) {
/* a5xx (and a4xx for that matter) has one state-block
* for compute-shader SSBO's and another that is shared
* by VS/HS/DS/GS/FS.. so to simplify things for now
* just advertise SSBOs for FS and CS. We could possibly
* do what blob does, and partition the space for
* VS/HS/DS/GS/FS. The blob advertises:
*
* GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4
* GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4
* GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4
* GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4
* GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4
* GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24
* GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24
*
* I think that way we could avoid having to patch shaders
* for actual SSBO indexes by using a static partitioning.
*/
switch(shader)
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_COMPUTE:
return 24;
default:
return 0;
}
}
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: