diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 2b196324754..a7b3adb6c63 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1806,7 +1806,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_memory_barrier_atomic_counter: case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_shared: + case nir_intrinsic_group_memory_barrier: /* We don't do any instruction scheduling of these NIR * instructions between each other, so we just need to make * sure that the TMU operations before the barrier are flushed @@ -1869,6 +1869,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_uniform_ui(c, 0xffff))); break; + case nir_intrinsic_load_subgroup_id: + ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c)); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); @@ -2444,6 +2448,8 @@ v3d_nir_to_vir(struct v3d_compile *c) case MESA_SHADER_VERTEX: emit_vert_end(c); break; + case MESA_SHADER_COMPUTE: + break; default: unreachable("bad stage"); } diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 94247860c68..b2bc40b10fe 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -691,6 +691,12 @@ struct v3d_fs_prog_data { bool uses_center_w; }; +struct v3d_compute_prog_data { + struct v3d_prog_data base; + /* Size in bytes of the workgroup's shared space. */ + uint32_t shared_size; +}; + static inline bool vir_has_uniform(struct qinst *inst) { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index b785b53c62c..6655e5e73bc 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -562,6 +562,21 @@ v3d_lower_nir(struct v3d_compile *c) } } + /* CS textures may not have return_size reflecting the shadow state. */ + nir_foreach_variable(var, &c->s->uniforms) { + const struct glsl_type *type = glsl_without_array(var->type); + unsigned array_len = MAX2(glsl_get_length(var->type), 1); + + if (!glsl_type_is_sampler(type) || + !glsl_sampler_type_is_shadow(type)) + continue; + + for (int i = 0; i < array_len; i++) { + tex_options.lower_tex_packing[var->data.binding + i] = + nir_lower_tex_packing_16; + } + } + NIR_PASS_V(c->s, nir_lower_tex, &tex_options); NIR_PASS_V(c->s, nir_lower_system_values); } @@ -669,6 +684,13 @@ v3d_fs_set_prog_data(struct v3d_compile *c, prog_data->uses_center_w = c->uses_center_w; } +static void +v3d_cs_set_prog_data(struct v3d_compile *c, + struct v3d_compute_prog_data *prog_data) +{ + prog_data->shared_size = c->s->info.cs.shared_size; +} + static void v3d_set_prog_data(struct v3d_compile *c, struct v3d_prog_data *prog_data) @@ -679,7 +701,9 @@ v3d_set_prog_data(struct v3d_compile *c, v3d_set_prog_data_uniforms(c, prog_data); - if (c->s->info.stage == MESA_SHADER_VERTEX) { + if (c->s->info.stage == MESA_SHADER_COMPUTE) { + v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data); + } else if (c->s->info.stage == MESA_SHADER_VERTEX) { v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data); } else { assert(c->s->info.stage == MESA_SHADER_FRAGMENT); @@ -865,13 +889,17 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, c->fs_key = (struct v3d_fs_key *)key; prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data)); break; + case MESA_SHADER_COMPUTE: + prog_data = rzalloc_size(NULL, + sizeof(struct v3d_compute_prog_data)); + break; default: unreachable("unsupported shader stage"); } if (c->s->info.stage == MESA_SHADER_VERTEX) { v3d_nir_lower_vs_early(c); - } else { + } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { assert(c->s->info.stage == MESA_SHADER_FRAGMENT); v3d_nir_lower_fs_early(c); } @@ -880,7 +908,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, if (c->s->info.stage == MESA_SHADER_VERTEX) { v3d_nir_lower_vs_late(c); - } else { + } else if (c->s->info.stage != MESA_SHADER_COMPUTE) { assert(c->s->info.stage == MESA_SHADER_FRAGMENT); v3d_nir_lower_fs_late(c); } diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h index 3b39d18145d..225ebe2f5ab 100644 --- a/src/gallium/drivers/v3d/v3d_context.h +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -186,6 +186,7 @@ struct v3d_compiled_shader { struct v3d_prog_data *base; struct v3d_vs_prog_data *vs; struct v3d_fs_prog_data *fs; + struct v3d_compute_prog_data *compute; } prog_data; /** @@ -197,8 +198,10 @@ struct v3d_compiled_shader { }; struct v3d_program_stateobj { - struct v3d_uncompiled_shader *bind_vs, *bind_fs; - struct v3d_compiled_shader *cs, *vs, *fs; + struct v3d_uncompiled_shader *bind_vs, *bind_fs, *bind_compute; + struct v3d_compiled_shader *cs, *vs, *fs, *compute; + + struct hash_table *cache[MESA_SHADER_STAGES]; struct v3d_bo *spill_bo; int spill_size_per_thread; @@ -414,7 +417,6 @@ struct v3d_context { struct primconvert_context *primconvert; - struct hash_table *fs_cache, *vs_cache; uint32_t next_uncompiled_program_id; uint64_t next_compiled_program_id; @@ -446,6 +448,8 @@ struct v3d_context { struct v3d_depth_stencil_alpha_state *zsa; struct v3d_program_stateobj prog; + uint32_t compute_num_workgroups[3]; + struct v3d_bo *compute_shared_memory; struct v3d_vertex_stateobj *vtx; @@ -584,6 +588,7 @@ void v3d_flush_jobs_writing_resource(struct v3d_context *v3d, void v3d_flush_jobs_reading_resource(struct v3d_context *v3d, struct pipe_resource *prsc); void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode); +void v3d_update_compiled_cs(struct v3d_context *v3d); bool v3d_rt_format_supported(const struct v3d_device_info *devinfo, enum pipe_format f); diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index e3e491e9fd7..7805b808a01 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -38,7 +38,8 @@ #include "broadcom/cle/v3d_packet_v33_pack.h" static struct v3d_compiled_shader * -v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key); +v3d_get_compiled_shader(struct v3d_context *v3d, + struct v3d_key *key, size_t key_size); static void v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, struct v3d_key *key); @@ -200,7 +201,7 @@ v3d_shader_precompile(struct v3d_context *v3d, } v3d_setup_shared_precompile_key(so, &key.base); - v3d_get_compiled_shader(v3d, &key.base); + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); } else { struct v3d_vs_key key = { .base.shader_state = so, @@ -223,7 +224,7 @@ v3d_shader_precompile(struct v3d_context *v3d, } } - v3d_get_compiled_shader(v3d, &key.base); + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); /* Compile VS bin shader: only position (XXX: include TF) */ key.is_coord = true; @@ -233,13 +234,13 @@ v3d_shader_precompile(struct v3d_context *v3d, v3d_slot_from_slot_and_component(VARYING_SLOT_POS, i); } - v3d_get_compiled_shader(v3d, &key.base); + v3d_get_compiled_shader(v3d, &key.base, sizeof(key)); } } static void * -v3d_shader_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) +v3d_uncompiled_shader_create(struct pipe_context *pctx, + enum pipe_shader_ir type, void *ir) { struct v3d_context *v3d = v3d_context(pctx); struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader); @@ -250,21 +251,21 @@ v3d_shader_state_create(struct pipe_context *pctx, nir_shader *s; - if (cso->type == PIPE_SHADER_IR_NIR) { + if (type == PIPE_SHADER_IR_NIR) { /* The backend takes ownership of the NIR shader on state * creation. */ - s = cso->ir.nir; + s = ir; } else { - assert(cso->type == PIPE_SHADER_IR_TGSI); + assert(type == PIPE_SHADER_IR_TGSI); if (V3D_DEBUG & V3D_DEBUG_TGSI) { fprintf(stderr, "prog %d TGSI:\n", so->program_id); - tgsi_dump(cso->tokens, 0); + tgsi_dump(ir, 0); fprintf(stderr, "\n"); } - s = tgsi_to_nir(cso->tokens, pctx->screen); + s = tgsi_to_nir(ir, pctx->screen); } nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform; @@ -289,8 +290,6 @@ v3d_shader_state_create(struct pipe_context *pctx, so->base.type = PIPE_SHADER_IR_NIR; so->base.ir.nir = s; - v3d_set_transform_feedback_outputs(so, &cso->stream_output); - if (V3D_DEBUG & (V3D_DEBUG_NIR | v3d_debug_flag_for_shader_stage(s->info.stage))) { fprintf(stderr, "%s prog %d NIR:\n", @@ -314,22 +313,31 @@ v3d_shader_debug_output(const char *message, void *data) pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); } -static struct v3d_compiled_shader * -v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) +static void * +v3d_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct v3d_uncompiled_shader *so = + v3d_uncompiled_shader_create(pctx, + cso->type, + (cso->type == PIPE_SHADER_IR_TGSI ? + (void *)cso->tokens : + cso->ir.nir)); + + v3d_set_transform_feedback_outputs(so, &cso->stream_output); + + return so; +} + +struct v3d_compiled_shader * +v3d_get_compiled_shader(struct v3d_context *v3d, + struct v3d_key *key, + size_t key_size) { struct v3d_uncompiled_shader *shader_state = key->shader_state; nir_shader *s = shader_state->base.ir.nir; - struct hash_table *ht; - uint32_t key_size; - if (s->info.stage == MESA_SHADER_FRAGMENT) { - ht = v3d->fs_cache; - key_size = sizeof(struct v3d_fs_key); - } else { - ht = v3d->vs_cache; - key_size = sizeof(struct v3d_vs_key); - } - + struct hash_table *ht = v3d->prog.cache[s->info.stage]; struct hash_entry *entry = _mesa_hash_table_search(ht, key); if (entry) return entry->data; @@ -359,10 +367,12 @@ v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) free(qpu_insts); - struct v3d_key *dup_key; - dup_key = ralloc_size(shader, key_size); - memcpy(dup_key, key, key_size); - _mesa_hash_table_insert(ht, dup_key, shader); + if (ht) { + struct v3d_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + } if (shader->prog_data.base->spill_size > v3d->prog.spill_size_per_thread) { @@ -446,8 +456,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; } } - - key->ucp_enables = v3d->rasterizer->base.clip_plane_enable; } static void @@ -489,6 +497,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) memset(key, 0, sizeof(*key)); v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); key->base.shader_state = v3d->prog.bind_fs; + key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; key->is_points = (prim_mode == PIPE_PRIM_POINTS); key->is_lines = (prim_mode >= PIPE_PRIM_LINES && prim_mode <= PIPE_PRIM_LINE_STRIP); @@ -554,7 +563,7 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) key->shade_model_flat = v3d->rasterizer->base.flatshade; struct v3d_compiled_shader *old_fs = v3d->prog.fs; - v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base); + v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (v3d->prog.fs == old_fs) return; @@ -602,6 +611,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) memset(key, 0, sizeof(*key)); v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); key->base.shader_state = v3d->prog.bind_vs; + key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs; STATIC_ASSERT(sizeof(key->fs_inputs) == sizeof(v3d->prog.fs->prog_data.fs->input_slots)); @@ -614,7 +624,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) v3d->rasterizer->base.point_size_per_vertex); struct v3d_compiled_shader *vs = - v3d_get_compiled_shader(v3d, &key->base); + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (vs != v3d->prog.vs) { v3d->prog.vs = vs; v3d->dirty |= VC5_DIRTY_COMPILED_VS; @@ -634,7 +644,7 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) key->num_fs_inputs = shader_state->num_tf_outputs; struct v3d_compiled_shader *cs = - v3d_get_compiled_shader(v3d, &key->base); + v3d_get_compiled_shader(v3d, &key->base, sizeof(*key)); if (cs != v3d->prog.cs) { v3d->prog.cs = cs; v3d->dirty |= VC5_DIRTY_COMPILED_CS; @@ -648,6 +658,30 @@ v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode) v3d_update_compiled_vs(v3d, prim_mode); } +void +v3d_update_compiled_cs(struct v3d_context *v3d) +{ + struct v3d_key local_key; + struct v3d_key *key = &local_key; + + if (!(v3d->dirty & (~0 | /* XXX */ + VC5_DIRTY_VERTTEX | + VC5_DIRTY_UNCOMPILED_FS))) { + return; + } + + memset(key, 0, sizeof(*key)); + v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]); + key->shader_state = v3d->prog.bind_compute; + + struct v3d_compiled_shader *cs = + v3d_get_compiled_shader(v3d, key, sizeof(*key)); + if (cs != v3d->prog.compute) { + v3d->prog.compute = cs; + v3d->dirty |= VC5_DIRTY_COMPILED_CS; /* XXX */ + } +} + static uint32_t fs_cache_hash(const void *key) { @@ -660,6 +694,12 @@ vs_cache_hash(const void *key) return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); } +static uint32_t +cs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_key)); +} + static bool fs_cache_compare(const void *key1, const void *key2) { @@ -672,23 +712,10 @@ vs_cache_compare(const void *key1, const void *key2) return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; } -static void -delete_from_cache_if_matches(struct hash_table *ht, - struct v3d_compiled_shader **last_compile, - struct hash_entry *entry, - struct v3d_uncompiled_shader *so) +static bool +cs_cache_compare(const void *key1, const void *key2) { - const struct v3d_key *key = entry->key; - - if (key->shader_state == so) { - struct v3d_compiled_shader *shader = entry->data; - _mesa_hash_table_remove(ht, entry); - - if (shader == *last_compile) - *last_compile = NULL; - - v3d_free_compiled_shader(shader); - } + return memcmp(key1, key2, sizeof(struct v3d_key)) == 0; } static void @@ -696,14 +723,26 @@ v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso) { struct v3d_context *v3d = v3d_context(pctx); struct v3d_uncompiled_shader *so = hwcso; + nir_shader *s = so->base.ir.nir; - hash_table_foreach(v3d->fs_cache, entry) { - delete_from_cache_if_matches(v3d->fs_cache, &v3d->prog.fs, - entry, so); - } - hash_table_foreach(v3d->vs_cache, entry) { - delete_from_cache_if_matches(v3d->vs_cache, &v3d->prog.vs, - entry, so); + hash_table_foreach(v3d->prog.cache[s->info.stage], entry) { + const struct v3d_key *key = entry->key; + struct v3d_compiled_shader *shader = entry->data; + + if (key->shader_state != so) + continue; + + if (v3d->prog.fs == shader) + v3d->prog.fs = NULL; + if (v3d->prog.vs == shader) + v3d->prog.vs = NULL; + if (v3d->prog.cs == shader) + v3d->prog.cs = NULL; + if (v3d->prog.compute == shader) + v3d->prog.compute = NULL; + + _mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry); + v3d_free_compiled_shader(shader); } ralloc_free(so->base.ir.nir); @@ -726,6 +765,22 @@ v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso) v3d->dirty |= VC5_DIRTY_UNCOMPILED_VS; } +static void +v3d_compute_state_bind(struct pipe_context *pctx, void *state) +{ + struct v3d_context *v3d = v3d_context(pctx); + + v3d->prog.bind_compute = state; +} + +static void * +v3d_create_compute_state(struct pipe_context *pctx, + const struct pipe_compute_state *cso) +{ + return v3d_uncompiled_shader_create(pctx, cso->ir_type, + (void *)cso->prog); +} + void v3d_program_init(struct pipe_context *pctx) { @@ -740,10 +795,18 @@ v3d_program_init(struct pipe_context *pctx) pctx->bind_fs_state = v3d_fp_state_bind; pctx->bind_vs_state = v3d_vp_state_bind; - v3d->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, - fs_cache_compare); - v3d->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, - vs_cache_compare); + if (v3d->screen->has_csd) { + pctx->create_compute_state = v3d_create_compute_state; + pctx->delete_compute_state = v3d_shader_state_delete; + pctx->bind_compute_state = v3d_compute_state_bind; + } + + v3d->prog.cache[MESA_SHADER_VERTEX] = + _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); + v3d->prog.cache[MESA_SHADER_FRAGMENT] = + _mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare); + v3d->prog.cache[MESA_SHADER_COMPUTE] = + _mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare); } void @@ -751,16 +814,16 @@ v3d_program_fini(struct pipe_context *pctx) { struct v3d_context *v3d = v3d_context(pctx); - hash_table_foreach(v3d->fs_cache, entry) { - struct v3d_compiled_shader *shader = entry->data; - v3d_free_compiled_shader(shader); - _mesa_hash_table_remove(v3d->fs_cache, entry); - } + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + struct hash_table *cache = v3d->prog.cache[i]; + if (!cache) + continue; - hash_table_foreach(v3d->vs_cache, entry) { - struct v3d_compiled_shader *shader = entry->data; - v3d_free_compiled_shader(shader); - _mesa_hash_table_remove(v3d->vs_cache, entry); + hash_table_foreach(cache, entry) { + struct v3d_compiled_shader *shader = entry->data; + v3d_free_compiled_shader(shader); + _mesa_hash_table_remove(cache, entry); + } } v3d_bo_unreference(&v3d->prog.spill_bo); diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index afac781725a..b77e3d9060e 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -22,6 +22,8 @@ * IN THE SOFTWARE. */ +#include + #include "util/os_misc.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -122,7 +124,6 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_OCCLUSION_QUERY: case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: - case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: @@ -143,6 +144,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 0; + case PIPE_CAP_COMPUTE: + return screen->has_csd && screen->devinfo.ver >= 41; + case PIPE_CAP_GENERATE_MIPMAP: return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU); @@ -260,8 +264,15 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, { struct v3d_screen *screen = v3d_screen(pscreen); - if (shader != PIPE_SHADER_VERTEX && - shader != PIPE_SHADER_FRAGMENT) { + switch (shader) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_FRAGMENT: + break; + case PIPE_SHADER_COMPUTE: + if (!screen->has_csd) + return 0; + break; + default: return 0; } @@ -335,7 +346,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 0; + return 1 << PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: @@ -348,6 +359,86 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 0; } +static int +v3d_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, void *ret) +{ + struct v3d_screen *screen = v3d_screen(pscreen); + + if (!screen->has_csd) + return 0; + +#define RET(x) do { \ + if (ret) \ + memcpy(ret, x, sizeof(x)); \ + return sizeof(x); \ + } while (0) + + switch (param) { + case PIPE_COMPUTE_CAP_ADDRESS_BITS: + RET((uint32_t []) { 32 }); + break; + + case PIPE_COMPUTE_CAP_IR_TARGET: + sprintf(ret, "v3d"); + return strlen(ret); + + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + RET((uint64_t []) { 3 }); + + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + /* GL_MAX_COMPUTE_SHADER_WORK_GROUP_COUNT: The CSD has a + * 16-bit field for the number of workgroups in each + * dimension. + */ + RET(((uint64_t []) { 65535, 65535, 65535 })); + + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + /* GL_MAX_COMPUTE_WORK_GROUP_SIZE */ + RET(((uint64_t []) { 256, 256, 256 })); + + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + /* GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS: This is + * limited by WG_SIZE in the CSD. + */ + RET((uint64_t []) { 256 }); + + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + RET((uint64_t []) { 1024 * 1024 * 1024 }); + + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + /* GL_MAX_COMPUTE_SHARED_MEMORY_SIZE */ + RET((uint64_t []) { 32768 }); + + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: + RET((uint64_t []) { 4096 }); + + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: { + struct sysinfo si; + sysinfo(&si); + RET((uint64_t []) { si.totalram }); + } + + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + /* OpenCL only */ + RET((uint32_t []) { 0 }); + + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { 1 }); + + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 1 }); + + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + RET((uint32_t []) { 16 }); + + } + + return 0; +} + static boolean v3d_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -565,6 +656,7 @@ v3d_screen_create(int fd, struct renderonly *ro) pscreen->get_param = v3d_screen_get_param; pscreen->get_paramf = v3d_screen_get_paramf; pscreen->get_shader_param = v3d_screen_get_shader_param; + pscreen->get_compute_param = v3d_get_compute_param; pscreen->context_create = v3d_context_create; pscreen->is_format_supported = v3d_screen_is_format_supported; @@ -590,6 +682,8 @@ v3d_screen_create(int fd, struct renderonly *ro) slab_create_parent(&screen->transfer_pool, sizeof(struct v3d_transfer), 16); + screen->has_csd = false; /* until the UABI is enabled. */ + v3d_fence_init(screen); v3d_process_debug_variable(); diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h index 94ae8b30f3a..6e90755e77c 100644 --- a/src/gallium/drivers/v3d/v3d_screen.h +++ b/src/gallium/drivers/v3d/v3d_screen.h @@ -77,6 +77,8 @@ struct v3d_screen { uint32_t bo_size; uint32_t bo_count; + bool has_csd; + struct v3d_simulator_file *sim_file; }; diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c index a5532bdf2b4..77101947e2b 100644 --- a/src/gallium/drivers/v3d/v3d_uniforms.c +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -358,6 +358,16 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader, v3d->prog.spill_size_per_thread); break; + case QUNIFORM_NUM_WORK_GROUPS: + cl_aligned_u32(&uniforms, + v3d->compute_num_workgroups[data]); + break; + + case QUNIFORM_SHARED_OFFSET: + cl_aligned_reloc(&job->indirect, &uniforms, + v3d->compute_shared_memory, 0); + break; + default: assert(quniform_contents_is_texture_p0(uinfo->contents[i])); @@ -444,6 +454,11 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader) dirty |= VC5_DIRTY_ZSA; break; + case QUNIFORM_NUM_WORK_GROUPS: + case QUNIFORM_SHARED_OFFSET: + /* Compute always recalculates uniforms. */ + break; + default: assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 14e85e78485..14e95c71204 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -489,7 +489,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* Before setting up the draw, flush anything writing to the textures * that we read from. */ - for (int s = 0; s < PIPE_SHADER_TYPES; s++) + for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) v3d_predraw_check_stage_inputs(pctx, s); if (info->indirect) @@ -514,7 +514,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* Mark SSBOs as being written. We don't actually know which ones are * read vs written, so just assume the worst */ - for (int s = 0; s < PIPE_SHADER_TYPES; s++) { + for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) { foreach_bit(i, v3d->ssbo[s].enabled_mask) { v3d_job_add_write_resource(job, v3d->ssbo[s].sb[i].buffer);