ir3: Add wavesize control

This allows the wavesize to be controlled per-shader. This will be used
by VK_EXT_subgroup_size_control, and freedreno will also need it if
legacy ARB_shader_ballot is to be supported (since it forces a wavesize
of 64 or less).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
Connor Abbott 2021-11-25 15:17:36 +01:00 committed by Marge Bot
parent 30237b3d9c
commit e6e34883a9
9 changed files with 89 additions and 4 deletions

View File

@ -115,6 +115,12 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
{
const struct ir3_compiler *compiler = v->shader->compiler;
/* If the user forced a particular wavesize respect that. */
if (v->shader->real_wavesize == IR3_SINGLE_ONLY)
return false;
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY)
return true;
/* We can't support more than compiler->branchstack_size diverging threads
* in a wave. Thus, doubling the threadsize is only possible if we don't
* exceed the branchstack size limit.

View File

@ -90,6 +90,11 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
_mesa_sha1_update(&ctx, blob.data, blob.size);
blob_finish(&blob);
_mesa_sha1_update(&ctx, &shader->api_wavesize,
sizeof(shader->api_wavesize));
_mesa_sha1_update(&ctx, &shader->real_wavesize,
sizeof(shader->real_wavesize));
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
* gens we maybe don't need to include stream-out in the cache key.
*/

View File

@ -533,11 +533,39 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
(s->info.stage == MESA_SHADER_KERNEL) ||
compiler->has_getfiberid) {
/* If the API-facing subgroup size is forced to a particular value, lower
* it here. Beyond this point nir_intrinsic_load_subgroup_size will return
* the "real" subgroup size.
*/
unsigned subgroup_size = 0, max_subgroup_size = 0;
switch (shader->api_wavesize) {
case IR3_SINGLE_ONLY:
subgroup_size = max_subgroup_size = compiler->threadsize_base;
break;
case IR3_DOUBLE_ONLY:
subgroup_size = max_subgroup_size = compiler->threadsize_base * 2;
break;
case IR3_SINGLE_OR_DOUBLE:
/* For vertex stages, we know the wavesize will never be doubled.
* Lower subgroup_size here, to avoid having to deal with it when
* translating from NIR. Otherwise use the "real" wavesize obtained as
* a driver param.
*/
if (s->info.stage != MESA_SHADER_COMPUTE &&
s->info.stage != MESA_SHADER_FRAGMENT) {
subgroup_size = max_subgroup_size = compiler->threadsize_base;
} else {
subgroup_size = 0;
max_subgroup_size = compiler->threadsize_base * 2;
}
break;
}
OPT(s, nir_lower_subgroups,
&(nir_lower_subgroups_options){
.subgroup_size = 128,
.subgroup_size = subgroup_size,
.ballot_bit_size = 32,
.ballot_components = 4,
.ballot_components = max_subgroup_size / 32,
.lower_to_scalar = true,
.lower_vote_eq = true,
.lower_subgroup_masks = true,

View File

@ -2295,6 +2295,15 @@ ir3_ra(struct ir3_shader_variant *v)
calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
}
/* If the user forces a doubled threadsize, we may have to lower the limit
* because on some gens the register file is not big enough to hold a
* double-size wave with all 48 registers in use.
*/
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) {
limit_pressure.full =
MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
}
/* If requested, lower the limit so that spilling happens more often. */
if (ir3_shader_debug & IR3_DBG_SPILLALL)
calc_min_limit_pressure(v, live, &limit_pressure);

View File

@ -596,6 +596,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
memcpy(&shader->stream_output, stream_output,
sizeof(shader->stream_output));
shader->num_reserved_user_consts = options->reserved_user_consts;
shader->api_wavesize = options->api_wavesize;
shader->real_wavesize = options->real_wavesize;
shader->nir = nir;
ir3_disk_cache_init_shader_key(compiler, shader);

View File

@ -92,6 +92,13 @@ enum ir3_bary {
IJ_COUNT,
};
/* Description of what wavesizes are allowed. */
enum ir3_wavesize_option {
IR3_SINGLE_ONLY,
IR3_SINGLE_OR_DOUBLE,
IR3_DOUBLE_ONLY,
};
/**
* Description of a lowered UBO.
*/
@ -757,6 +764,17 @@ struct ir3_shader {
unsigned num_reserved_user_consts;
/* What API-visible wavesizes are allowed. Even if only double wavesize is
* allowed, we may still use the smaller wavesize "under the hood" and the
* application simply sees the upper half as always disabled.
*/
enum ir3_wavesize_option api_wavesize;
/* What wavesizes we're allowed to actually use. If the API wavesize is
* single-only, then this must be single-only too.
*/
enum ir3_wavesize_option real_wavesize;
bool nir_finalized;
struct nir_shader *nir;
struct ir3_stream_output_info stream_output;
@ -822,6 +840,7 @@ ir3_shader_get_variant(struct ir3_shader *shader,
struct ir3_shader_options {
unsigned reserved_user_consts;
enum ir3_wavesize_option api_wavesize, real_wavesize;
};
struct ir3_shader *

View File

@ -549,6 +549,8 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
struct ir3_shader *sh =
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
.reserved_user_consts = align(consts, 4),
}, NULL);

View File

@ -787,6 +787,8 @@ tu_shader_create(struct tu_device *dev,
shader->ir3_shader =
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
.reserved_user_consts = align(shader->push_consts.count, 4),
.api_wavesize = IR3_DOUBLE_ONLY,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
}, &so_info);
return shader;

View File

@ -308,7 +308,13 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
}
struct ir3_shader *shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){}, NULL);
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
}, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
shader->cs.req_local_mem = cso->req_local_mem;
@ -369,7 +375,13 @@ ir3_shader_state_create(struct pipe_context *pctx,
copy_stream_out(&stream_output, &cso->stream_output);
hwcso->shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){},
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
},
&stream_output);
/*