freedreno: core compute state support
Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
2ce449fa7d
commit
10c17f23b7
|
@ -255,6 +255,7 @@ struct fd_context {
|
|||
/* per shader-stage dirty status: */
|
||||
enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES];
|
||||
|
||||
void *compute;
|
||||
struct pipe_blend_state *blend;
|
||||
struct pipe_rasterizer_state *rasterizer;
|
||||
struct pipe_depth_stencil_alpha_state *zsa;
|
||||
|
@ -299,6 +300,9 @@ struct fd_context {
|
|||
void (*clear)(struct fd_context *ctx, unsigned buffers,
|
||||
const union pipe_color_union *color, double depth, unsigned stencil);
|
||||
|
||||
/* compute: */
|
||||
void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
|
||||
|
||||
/* constant emit: (note currently not used/needed for a2xx) */
|
||||
void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
|
||||
uint32_t regid, uint32_t offset, uint32_t sizedwords,
|
||||
|
@ -376,8 +380,16 @@ static inline void
|
|||
fd_context_all_clean(struct fd_context *ctx)
|
||||
{
|
||||
ctx->dirty = 0;
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
|
||||
/* don't mark compute state as clean, since it is not emitted
|
||||
* during normal draw call. The places that call _all_dirty(),
|
||||
* it is safe to mark compute state dirty as well, but the
|
||||
* inverse is not true.
|
||||
*/
|
||||
if (i == PIPE_SHADER_COMPUTE)
|
||||
continue;
|
||||
ctx->dirty_shader[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct pipe_scissor_state *
|
||||
|
|
|
@ -408,6 +408,49 @@ fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
|
|||
buffers, depth, stencil, x, y, w, h);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_batch *batch, *save_batch = NULL;
|
||||
unsigned i;
|
||||
|
||||
/* TODO maybe we don't want to allocate and flush a batch each time?
|
||||
* We could use a special bogus (ie. won't match any fb state) key
|
||||
* in the batch-case for compute shaders, and rely on the rest of
|
||||
* the dependency tracking mechanism to tell us when the compute
|
||||
* batch needs to be flushed?
|
||||
*/
|
||||
batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx);
|
||||
fd_batch_reference(&save_batch, ctx->batch);
|
||||
fd_batch_reference(&ctx->batch, batch);
|
||||
|
||||
mtx_lock(&ctx->screen->lock);
|
||||
|
||||
/* Mark SSBOs as being written.. we don't actually know which ones are
|
||||
* read vs written, so just assume the worst
|
||||
*/
|
||||
foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask)
|
||||
resource_read(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer);
|
||||
|
||||
/* UBO's are read */
|
||||
foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
|
||||
resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
|
||||
|
||||
/* Mark textures as being read */
|
||||
foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
|
||||
resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
|
||||
|
||||
mtx_unlock(&ctx->screen->lock);
|
||||
|
||||
ctx->launch_grid(ctx, info);
|
||||
|
||||
fd_gmem_flush_compute(batch);
|
||||
|
||||
fd_batch_reference(&ctx->batch, save_batch);
|
||||
fd_batch_reference(&save_batch, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fd_draw_init(struct pipe_context *pctx)
|
||||
{
|
||||
|
@ -415,4 +458,8 @@ fd_draw_init(struct pipe_context *pctx)
|
|||
pctx->clear = fd_clear;
|
||||
pctx->clear_render_target = fd_clear_render_target;
|
||||
pctx->clear_depth_stencil = fd_clear_depth_stencil;
|
||||
|
||||
if (has_compute(fd_screen(pctx->screen))) {
|
||||
pctx->launch_grid = fd_launch_grid;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -439,6 +439,13 @@ fd_gmem_render_noop(struct fd_batch *batch)
|
|||
flush_ring(batch);
|
||||
}
|
||||
|
||||
void
|
||||
fd_gmem_flush_compute(struct fd_batch *batch)
|
||||
{
|
||||
render_sysmem(batch);
|
||||
flush_ring(batch);
|
||||
}
|
||||
|
||||
/* tile needs restore if it isn't completely contained within the
|
||||
* cleared scissor:
|
||||
*/
|
||||
|
|
|
@ -63,6 +63,7 @@ struct fd_batch;
|
|||
|
||||
void fd_gmem_render_tiles(struct fd_batch *batch);
|
||||
void fd_gmem_render_noop(struct fd_batch *batch);
|
||||
void fd_gmem_flush_compute(struct fd_batch *batch);
|
||||
|
||||
bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
|
||||
uint32_t buffers);
|
||||
|
|
|
@ -189,13 +189,15 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_USER_CONSTANT_BUFFERS:
|
||||
return is_a4xx(screen) ? 0 : 1;
|
||||
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return has_compute(screen);
|
||||
|
||||
case PIPE_CAP_SHADER_STENCIL_EXPORT:
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
case PIPE_CAP_PCI_GROUP:
|
||||
case PIPE_CAP_PCI_BUS:
|
||||
|
@ -454,6 +456,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
case PIPE_SHADER_VERTEX:
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
if (has_compute(screen))
|
||||
break;
|
||||
return 0;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
/* maye we could emulate.. */
|
||||
return 0;
|
||||
|
@ -514,13 +519,30 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
|
||||
switch (shader) {
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
case PIPE_SHADER_VERTEX:
|
||||
if ((fd_mesa_debug & FD_DBG_NIR) && is_ir3(screen))
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
default:
|
||||
/* tgsi_to_nir doesn't really support much beyond FS/VS: */
|
||||
debug_assert(is_ir3(screen));
|
||||
return PIPE_SHADER_IR_NIR;
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
break;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
if (is_ir3(screen)) {
|
||||
return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
|
||||
} else {
|
||||
return (1 << PIPE_SHADER_IR_TGSI);
|
||||
}
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
|
||||
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
if (is_a5xx(screen)) {
|
||||
/* a5xx (and a4xx for that matter) has one state-block
|
||||
|
@ -552,14 +574,96 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||
}
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
|
||||
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
|
||||
/* probably should be same as MAX_SHADRER_BUFFERS but not implemented yet */
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this
|
||||
* into per-generation backend?
|
||||
*/
|
||||
static int
|
||||
fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *ret)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
const char *ir = "ir3";
|
||||
|
||||
if (!has_compute(screen))
|
||||
return 0;
|
||||
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
||||
if (ret) {
|
||||
uint32_t *address_bits = ret;
|
||||
address_bits[0] = 32;
|
||||
|
||||
if (is_a5xx(screen))
|
||||
address_bits[0] = 64;
|
||||
}
|
||||
return 1 * sizeof(uint32_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_IR_TARGET:
|
||||
if (ret)
|
||||
sprintf(ret, ir);
|
||||
return strlen(ir) * sizeof(char);
|
||||
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
if (ret) {
|
||||
uint64_t *grid_dimension = ret;
|
||||
grid_dimension[0] = 3;
|
||||
}
|
||||
return 1 * sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
if (ret) {
|
||||
uint64_t *grid_size = ret;
|
||||
grid_size[0] = 65535;
|
||||
grid_size[1] = 65535;
|
||||
grid_size[2] = 65535;
|
||||
}
|
||||
return 3 * sizeof(uint64_t) ;
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
if (ret) {
|
||||
uint64_t *grid_size = ret;
|
||||
grid_size[0] = 1024;
|
||||
grid_size[1] = 1024;
|
||||
grid_size[2] = 64;
|
||||
}
|
||||
return 3 * sizeof(uint64_t) ;
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t *max_threads_per_block = ret;
|
||||
*max_threads_per_block = 1024;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
|
||||
break;
|
||||
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
|
||||
if (ret) {
|
||||
uint64_t *max = ret;
|
||||
*max = 32768;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const void *
|
||||
fd_get_compiler_options(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir, unsigned shader)
|
||||
|
@ -752,6 +856,7 @@ fd_screen_create(struct fd_device *dev)
|
|||
pscreen->get_param = fd_screen_get_param;
|
||||
pscreen->get_paramf = fd_screen_get_paramf;
|
||||
pscreen->get_shader_param = fd_screen_get_shader_param;
|
||||
pscreen->get_compute_param = fd_get_compute_param;
|
||||
pscreen->get_compiler_options = fd_get_compiler_options;
|
||||
|
||||
fd_resource_screen_init(pscreen);
|
||||
|
|
|
@ -128,4 +128,10 @@ is_ir3(struct fd_screen *screen)
|
|||
return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
has_compute(struct fd_screen *screen)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* FREEDRENO_SCREEN_H_ */
|
||||
|
|
|
@ -451,6 +451,32 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
|
|||
ctx->dirty |= FD_DIRTY_STREAMOUT;
|
||||
}
|
||||
|
||||
static void
|
||||
fd_bind_compute_state(struct pipe_context *pctx, void *state)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->compute = state;
|
||||
ctx->dirty_shader[PIPE_SHADER_COMPUTE] |= FD_DIRTY_SHADER_PROG;
|
||||
}
|
||||
|
||||
static void
|
||||
fd_set_compute_resources(struct pipe_context *pctx,
|
||||
unsigned start, unsigned count, struct pipe_surface **prscs)
|
||||
{
|
||||
// TODO
|
||||
}
|
||||
|
||||
static void
|
||||
fd_set_global_binding(struct pipe_context *pctx,
|
||||
unsigned first, unsigned count, struct pipe_resource **prscs,
|
||||
uint32_t **handles)
|
||||
{
|
||||
/* TODO only used by clover.. seems to need us to return the actual
|
||||
* gpuaddr of the buffer.. which isn't really exposed to mesa atm.
|
||||
* How is this used?
|
||||
*/
|
||||
}
|
||||
|
||||
void
|
||||
fd_state_init(struct pipe_context *pctx)
|
||||
{
|
||||
|
@ -484,4 +510,10 @@ fd_state_init(struct pipe_context *pctx)
|
|||
pctx->create_stream_output_target = fd_create_stream_output_target;
|
||||
pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
|
||||
pctx->set_stream_output_targets = fd_set_stream_output_targets;
|
||||
|
||||
if (has_compute(fd_screen(pctx->screen))) {
|
||||
pctx->bind_compute_state = fd_bind_compute_state;
|
||||
pctx->set_compute_resources = fd_set_compute_resources;
|
||||
pctx->set_global_binding = fd_set_global_binding;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue