ir3: Add wavesize control
This allows the wavesize to be controlled per-shader. This will be used by VK_EXT_subgroup_size_control, and freedreno will also need it if legacy ARB_shader_ballot is to be supported (since it forces a wavesize of 64 or less). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
parent
30237b3d9c
commit
e6e34883a9
|
@ -115,6 +115,12 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
|
||||||
{
|
{
|
||||||
const struct ir3_compiler *compiler = v->shader->compiler;
|
const struct ir3_compiler *compiler = v->shader->compiler;
|
||||||
|
|
||||||
|
/* If the user forced a particular wavesize respect that. */
|
||||||
|
if (v->shader->real_wavesize == IR3_SINGLE_ONLY)
|
||||||
|
return false;
|
||||||
|
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY)
|
||||||
|
return true;
|
||||||
|
|
||||||
/* We can't support more than compiler->branchstack_size diverging threads
|
/* We can't support more than compiler->branchstack_size diverging threads
|
||||||
* in a wave. Thus, doubling the threadsize is only possible if we don't
|
* in a wave. Thus, doubling the threadsize is only possible if we don't
|
||||||
* exceed the branchstack size limit.
|
* exceed the branchstack size limit.
|
||||||
|
|
|
@ -90,6 +90,11 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
|
||||||
_mesa_sha1_update(&ctx, blob.data, blob.size);
|
_mesa_sha1_update(&ctx, blob.data, blob.size);
|
||||||
blob_finish(&blob);
|
blob_finish(&blob);
|
||||||
|
|
||||||
|
_mesa_sha1_update(&ctx, &shader->api_wavesize,
|
||||||
|
sizeof(shader->api_wavesize));
|
||||||
|
_mesa_sha1_update(&ctx, &shader->real_wavesize,
|
||||||
|
sizeof(shader->real_wavesize));
|
||||||
|
|
||||||
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
|
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
|
||||||
* gens we maybe don't need to include stream-out in the cache key.
|
* gens we maybe don't need to include stream-out in the cache key.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -533,11 +533,39 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
||||||
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
|
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
|
||||||
(s->info.stage == MESA_SHADER_KERNEL) ||
|
(s->info.stage == MESA_SHADER_KERNEL) ||
|
||||||
compiler->has_getfiberid) {
|
compiler->has_getfiberid) {
|
||||||
|
/* If the API-facing subgroup size is forced to a particular value, lower
|
||||||
|
* it here. Beyond this point nir_intrinsic_load_subgroup_size will return
|
||||||
|
* the "real" subgroup size.
|
||||||
|
*/
|
||||||
|
unsigned subgroup_size = 0, max_subgroup_size = 0;
|
||||||
|
switch (shader->api_wavesize) {
|
||||||
|
case IR3_SINGLE_ONLY:
|
||||||
|
subgroup_size = max_subgroup_size = compiler->threadsize_base;
|
||||||
|
break;
|
||||||
|
case IR3_DOUBLE_ONLY:
|
||||||
|
subgroup_size = max_subgroup_size = compiler->threadsize_base * 2;
|
||||||
|
break;
|
||||||
|
case IR3_SINGLE_OR_DOUBLE:
|
||||||
|
/* For vertex stages, we know the wavesize will never be doubled.
|
||||||
|
* Lower subgroup_size here, to avoid having to deal with it when
|
||||||
|
* translating from NIR. Otherwise use the "real" wavesize obtained as
|
||||||
|
* a driver param.
|
||||||
|
*/
|
||||||
|
if (s->info.stage != MESA_SHADER_COMPUTE &&
|
||||||
|
s->info.stage != MESA_SHADER_FRAGMENT) {
|
||||||
|
subgroup_size = max_subgroup_size = compiler->threadsize_base;
|
||||||
|
} else {
|
||||||
|
subgroup_size = 0;
|
||||||
|
max_subgroup_size = compiler->threadsize_base * 2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
OPT(s, nir_lower_subgroups,
|
OPT(s, nir_lower_subgroups,
|
||||||
&(nir_lower_subgroups_options){
|
&(nir_lower_subgroups_options){
|
||||||
.subgroup_size = 128,
|
.subgroup_size = subgroup_size,
|
||||||
.ballot_bit_size = 32,
|
.ballot_bit_size = 32,
|
||||||
.ballot_components = 4,
|
.ballot_components = max_subgroup_size / 32,
|
||||||
.lower_to_scalar = true,
|
.lower_to_scalar = true,
|
||||||
.lower_vote_eq = true,
|
.lower_vote_eq = true,
|
||||||
.lower_subgroup_masks = true,
|
.lower_subgroup_masks = true,
|
||||||
|
|
|
@ -2295,6 +2295,15 @@ ir3_ra(struct ir3_shader_variant *v)
|
||||||
calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
|
calc_limit_pressure_for_cs_with_barrier(v, &limit_pressure);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the user forces a doubled threadsize, we may have to lower the limit
|
||||||
|
* because on some gens the register file is not big enough to hold a
|
||||||
|
* double-size wave with all 48 registers in use.
|
||||||
|
*/
|
||||||
|
if (v->shader->real_wavesize == IR3_DOUBLE_ONLY) {
|
||||||
|
limit_pressure.full =
|
||||||
|
MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
|
||||||
|
}
|
||||||
|
|
||||||
/* If requested, lower the limit so that spilling happens more often. */
|
/* If requested, lower the limit so that spilling happens more often. */
|
||||||
if (ir3_shader_debug & IR3_DBG_SPILLALL)
|
if (ir3_shader_debug & IR3_DBG_SPILLALL)
|
||||||
calc_min_limit_pressure(v, live, &limit_pressure);
|
calc_min_limit_pressure(v, live, &limit_pressure);
|
||||||
|
|
|
@ -596,6 +596,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
|
||||||
memcpy(&shader->stream_output, stream_output,
|
memcpy(&shader->stream_output, stream_output,
|
||||||
sizeof(shader->stream_output));
|
sizeof(shader->stream_output));
|
||||||
shader->num_reserved_user_consts = options->reserved_user_consts;
|
shader->num_reserved_user_consts = options->reserved_user_consts;
|
||||||
|
shader->api_wavesize = options->api_wavesize;
|
||||||
|
shader->real_wavesize = options->real_wavesize;
|
||||||
shader->nir = nir;
|
shader->nir = nir;
|
||||||
|
|
||||||
ir3_disk_cache_init_shader_key(compiler, shader);
|
ir3_disk_cache_init_shader_key(compiler, shader);
|
||||||
|
|
|
@ -92,6 +92,13 @@ enum ir3_bary {
|
||||||
IJ_COUNT,
|
IJ_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Description of what wavesizes are allowed. */
|
||||||
|
enum ir3_wavesize_option {
|
||||||
|
IR3_SINGLE_ONLY,
|
||||||
|
IR3_SINGLE_OR_DOUBLE,
|
||||||
|
IR3_DOUBLE_ONLY,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Description of a lowered UBO.
|
* Description of a lowered UBO.
|
||||||
*/
|
*/
|
||||||
|
@ -757,6 +764,17 @@ struct ir3_shader {
|
||||||
|
|
||||||
unsigned num_reserved_user_consts;
|
unsigned num_reserved_user_consts;
|
||||||
|
|
||||||
|
/* What API-visible wavesizes are allowed. Even if only double wavesize is
|
||||||
|
* allowed, we may still use the smaller wavesize "under the hood" and the
|
||||||
|
* application simply sees the upper half as always disabled.
|
||||||
|
*/
|
||||||
|
enum ir3_wavesize_option api_wavesize;
|
||||||
|
|
||||||
|
/* What wavesizes we're allowed to actually use. If the API wavesize is
|
||||||
|
* single-only, then this must be single-only too.
|
||||||
|
*/
|
||||||
|
enum ir3_wavesize_option real_wavesize;
|
||||||
|
|
||||||
bool nir_finalized;
|
bool nir_finalized;
|
||||||
struct nir_shader *nir;
|
struct nir_shader *nir;
|
||||||
struct ir3_stream_output_info stream_output;
|
struct ir3_stream_output_info stream_output;
|
||||||
|
@ -822,6 +840,7 @@ ir3_shader_get_variant(struct ir3_shader *shader,
|
||||||
|
|
||||||
struct ir3_shader_options {
|
struct ir3_shader_options {
|
||||||
unsigned reserved_user_consts;
|
unsigned reserved_user_consts;
|
||||||
|
enum ir3_wavesize_option api_wavesize, real_wavesize;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ir3_shader *
|
struct ir3_shader *
|
||||||
|
|
|
@ -549,6 +549,8 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
|
||||||
|
|
||||||
struct ir3_shader *sh =
|
struct ir3_shader *sh =
|
||||||
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
||||||
|
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
|
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
.reserved_user_consts = align(consts, 4),
|
.reserved_user_consts = align(consts, 4),
|
||||||
}, NULL);
|
}, NULL);
|
||||||
|
|
||||||
|
|
|
@ -787,6 +787,8 @@ tu_shader_create(struct tu_device *dev,
|
||||||
shader->ir3_shader =
|
shader->ir3_shader =
|
||||||
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
|
||||||
.reserved_user_consts = align(shader->push_consts.count, 4),
|
.reserved_user_consts = align(shader->push_consts.count, 4),
|
||||||
|
.api_wavesize = IR3_DOUBLE_ONLY,
|
||||||
|
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
}, &so_info);
|
}, &so_info);
|
||||||
|
|
||||||
return shader;
|
return shader;
|
||||||
|
|
|
@ -308,7 +308,13 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ir3_shader *shader =
|
struct ir3_shader *shader =
|
||||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){}, NULL);
|
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
|
||||||
|
/* TODO: force to single on a6xx with legacy
|
||||||
|
* ballot extension that uses 64-bit masks
|
||||||
|
*/
|
||||||
|
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
|
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
|
}, NULL);
|
||||||
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
|
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
|
||||||
shader->cs.req_local_mem = cso->req_local_mem;
|
shader->cs.req_local_mem = cso->req_local_mem;
|
||||||
|
|
||||||
|
@ -369,7 +375,13 @@ ir3_shader_state_create(struct pipe_context *pctx,
|
||||||
copy_stream_out(&stream_output, &cso->stream_output);
|
copy_stream_out(&stream_output, &cso->stream_output);
|
||||||
|
|
||||||
hwcso->shader =
|
hwcso->shader =
|
||||||
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){},
|
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
|
||||||
|
/* TODO: force to single on a6xx with legacy
|
||||||
|
* ballot extension that uses 64-bit masks
|
||||||
|
*/
|
||||||
|
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
|
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
|
||||||
|
},
|
||||||
&stream_output);
|
&stream_output);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue