ir3: Add wavesize control
This allows the wavesize to be controlled per-shader. This will be used by VK_EXT_subgroup_size_control, and freedreno will also need it if legacy ARB_shader_ballot is to be supported (since it forces a wavesize of 64 or less). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
parent
30237b3d9c
commit
e6e34883a9
|
@ -115,6 +115,12 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
|
|||
{
|
||||
const struct ir3_compiler *compiler = v->shader->compiler;
|
||||
|
||||
/* If the user forced a particular wavesize respect that. */
|
||||
if (v->shader->real_wavesize == IR3_SINGLE_ONLY)
|
||||
return false;
|
||||
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY)
|
||||
return true;
|
||||
|
||||
/* We can't support more than compiler->branchstack_size diverging threads
|
||||
* in a wave. Thus, doubling the threadsize is only possible if we don't
|
||||
* exceed the branchstack size limit.
|
||||
|
|
|
@ -90,6 +90,11 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
|
|||
_mesa_sha1_update(&ctx, blob.data, blob.size);
|
||||
blob_finish(&blob);
|
||||
|
||||
_mesa_sha1_update(&ctx, &shader->api_wavesize,
|
||||
sizeof(shader->api_wavesize));
|
||||
_mesa_sha1_update(&ctx, &shader->real_wavesize,
|
||||
sizeof(shader->real_wavesize));
|
||||
|
||||
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
|
||||
* gens we maybe don't need to include stream-out in the cache key.
|
||||
*/
|
||||
|
|
|
@ -533,11 +533,39 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
|||
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
|
||||
(s->info.stage == MESA_SHADER_KERNEL) ||
|
||||
compiler->has_getfiberid) {
|
||||
/* If the API-facing subgroup size is forced to a particular value, lower
|
||||
* it here. Beyond this point nir_intrinsic_load_subgroup_size will return
|
||||
* the "real" subgroup size.
|
||||
*/
|
||||
unsigned subgroup_size = 0, max_subgroup_size = 0;
|
||||
switch (shader->api_wavesize) {
|
||||
case IR3_SINGLE_ONLY:
|
||||
subgroup_size = max_subgroup_size = compiler->threadsize_base;
|
||||
break;
|
||||
case IR3_DOUBLE_ONLY:
|
||||
subgroup_size = max_subgroup_size = compiler->threadsize_base * 2;
|
||||
break;
|
||||
case IR3_SINGLE_OR_DOUBLE:
|
||||
/* For vertex stages, we know the wavesize will never be doubled.
|
||||
* Lower subgroup_size here, to avoid having to deal with it when
|
||||
* translating from NIR. Otherwise use the "real" wavesize obtained as
|
||||
* a driver param.
|
||||
*/
|
||||
if (s->info.stage != MESA_SHADER_COMPUTE &&
|
||||
s->info.stage != MESA_SHADER_FRAGMENT) {
|
||||
subgroup_size = max_subgroup_size = compiler->threadsize_base;
|
||||
} else {
|
||||
subgroup_size = 0;
|
||||
max_subgroup_size = compiler->threadsize_base * 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
OPT(s, nir_lower_subgroups,
|
||||
&(nir_lower_subgroups_options){
|
||||
.subgroup_size = 128,
|
||||
.subgroup_size = subgroup_size,
|
||||
.ballot_bit_size = 32,
|
||||
.ballot_components = 4,
|
||||
.ballot_components = max_subgroup_size / 32,
|
||||
.lower_to_scalar = true,
|
||||
.lower_vote_eq = true,
|
||||
.lower_subgroup_masks = true,
|
||||
|
|
|
@ -2295,6 +2295,15 @@ ir3_ra(struct ir3_shader_variant *v)
|
|||
calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
|
||||
}
|
||||
|
||||
/* If the user forces a doubled threadsize, we may have to lower the limit
|
||||
* because on some gens the register file is not big enough to hold a
|
||||
* double-size wave with all 48 registers in use.
|
||||
*/
|
||||
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) {
|
||||
limit_pressure.full =
|
||||
MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
|
||||
}
|
||||
|
||||
/* If requested, lower the limit so that spilling happens more often. */
|
||||
if (ir3_shader_debug & IR3_DBG_SPILLALL)
|
||||
calc_min_limit_pressure(v, live, &limit_pressure);
|
||||
|
|
|
@ -596,6 +596,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
|
|||
memcpy(&shader->stream_output, stream_output,
|
||||
sizeof(shader->stream_output));
|
||||
shader->num_reserved_user_consts = options->reserved_user_consts;
|
||||
shader->api_wavesize = options->api_wavesize;
|
||||
shader->real_wavesize = options->real_wavesize;
|
||||
shader->nir = nir;
|
||||
|
||||
ir3_disk_cache_init_shader_key(compiler, shader);
|
||||
|
|
|
@ -92,6 +92,13 @@ enum ir3_bary {
|
|||
IJ_COUNT,
|
||||
};
|
||||
|
||||
/* Description of what wavesizes are allowed. */
|
||||
enum ir3_wavesize_option {
|
||||
IR3_SINGLE_ONLY,
|
||||
IR3_SINGLE_OR_DOUBLE,
|
||||
IR3_DOUBLE_ONLY,
|
||||
};
|
||||
|
||||
/**
|
||||
* Description of a lowered UBO.
|
||||
*/
|
||||
|
@ -757,6 +764,17 @@ struct ir3_shader {
|
|||
|
||||
unsigned num_reserved_user_consts;
|
||||
|
||||
/* What API-visible wavesizes are allowed. Even if only double wavesize is
|
||||
* allowed, we may still use the smaller wavesize "under the hood" and the
|
||||
* application simply sees the upper half as always disabled.
|
||||
*/
|
||||
enum ir3_wavesize_option api_wavesize;
|
||||
|
||||
/* What wavesizes we're allowed to actually use. If the API wavesize is
|
||||
* single-only, then this must be single-only too.
|
||||
*/
|
||||
enum ir3_wavesize_option real_wavesize;
|
||||
|
||||
bool nir_finalized;
|
||||
struct nir_shader *nir;
|
||||
struct ir3_stream_output_info stream_output;
|
||||
|
@ -822,6 +840,7 @@ ir3_shader_get_variant(struct ir3_shader *shader,
|
|||
|
||||
struct ir3_shader_options {
|
||||
unsigned reserved_user_consts;
|
||||
enum ir3_wavesize_option api_wavesize, real_wavesize;
|
||||
};
|
||||
|
||||
struct ir3_shader *
|
||||
|
|
|
@ -549,6 +549,8 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
|
|||
|
||||
struct ir3_shader *sh =
|
||||
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
||||
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
.reserved_user_consts = align(consts, 4),
|
||||
}, NULL);
|
||||
|
||||
|
|
|
@ -787,6 +787,8 @@ tu_shader_create(struct tu_device *dev,
|
|||
shader->ir3_shader =
|
||||
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
||||
.reserved_user_consts = align(shader->push_consts.count, 4),
|
||||
.api_wavesize = IR3_DOUBLE_ONLY,
|
||||
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
}, &so_info);
|
||||
|
||||
return shader;
|
||||
|
|
|
@ -308,7 +308,13 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
|
|||
}
|
||||
|
||||
struct ir3_shader *shader =
|
||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){}, NULL);
|
||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
|
||||
/* TODO: force to single on a6xx with legacy
|
||||
* ballot extension that uses 64-bit masks
|
||||
*/
|
||||
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
}, NULL);
|
||||
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
|
||||
shader->cs.req_local_mem = cso->req_local_mem;
|
||||
|
||||
|
@ -369,7 +375,13 @@ ir3_shader_state_create(struct pipe_context *pctx,
|
|||
copy_stream_out(&stream_output, &cso->stream_output);
|
||||
|
||||
hwcso->shader =
|
||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){},
|
||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
|
||||
/* TODO: force to single on a6xx with legacy
|
||||
* ballot extension that uses 64-bit masks
|
||||
*/
|
||||
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||
},
|
||||
&stream_output);
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue