intel/compiler: Be more conservative about subgroup sizes in GL
The rules for gl_SubgroupSize in Vulkan require that it be a constant that can be queried through the API. However, all GL requires is that it's a uniform. Instead of always claiming that the subgroup size in the shader is 32 in GL like we have to do for Vulkan, claim 8 for geometry stages, the maximum for fragment shaders, and the actual size for compute. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
1981460af2
commit
c84b8eeeac
|
@ -47,6 +47,7 @@
|
|||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
#define KEY_INIT_NO_ID(gen) \
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
|
||||
.base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
|
||||
.base.tex.compressed_multisample_layout_mask = ~0, \
|
||||
.base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
|
||||
|
|
|
@ -203,9 +203,18 @@ struct brw_sampler_prog_key_data {
|
|||
float scale_factors[32];
|
||||
};
|
||||
|
||||
/** An enum representing what kind of input gl_SubgroupSize is. */
|
||||
enum PACKED brw_subgroup_size_type
|
||||
{
|
||||
BRW_SUBGROUP_SIZE_API_CONSTANT, /**< Vulkan behavior */
|
||||
BRW_SUBGROUP_SIZE_UNIFORM, /**< OpenGL behavior */
|
||||
};
|
||||
|
||||
struct brw_base_prog_key {
|
||||
unsigned program_string_id;
|
||||
|
||||
enum brw_subgroup_size_type subgroup_size_type;
|
||||
|
||||
struct brw_sampler_prog_key_data tex;
|
||||
};
|
||||
|
||||
|
|
|
@ -7967,7 +7967,9 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||
{
|
||||
const struct gen_device_info *devinfo = compiler->devinfo;
|
||||
|
||||
brw_nir_apply_key(shader, compiler, &key->base, true);
|
||||
unsigned max_subgroup_size = unlikely(INTEL_DEBUG & DEBUG_DO32) ? 32 : 16;
|
||||
|
||||
brw_nir_apply_key(shader, compiler, &key->base, max_subgroup_size, true);
|
||||
brw_nir_lower_fs_inputs(shader, devinfo, key);
|
||||
brw_nir_lower_fs_outputs(shader);
|
||||
|
||||
|
@ -8228,7 +8230,7 @@ compile_cs_to_nir(const struct brw_compiler *compiler,
|
|||
unsigned dispatch_width)
|
||||
{
|
||||
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
|
||||
brw_nir_apply_key(shader, compiler, &key->base, true);
|
||||
brw_nir_apply_key(shader, compiler, &key->base, dispatch_width, true);
|
||||
|
||||
NIR_PASS_V(shader, brw_nir_lower_cs_intrinsics, dispatch_width);
|
||||
|
||||
|
|
|
@ -809,13 +809,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
|
||||
UNUSED bool progress; /* Written by OPT */
|
||||
|
||||
const nir_lower_subgroups_options subgroups_options = {
|
||||
.subgroup_size = BRW_SUBGROUP_SIZE,
|
||||
.ballot_bit_size = 32,
|
||||
.lower_subgroup_masks = true,
|
||||
};
|
||||
OPT(nir_lower_subgroups, &subgroups_options);
|
||||
|
||||
OPT(brw_nir_lower_mem_access_bit_sizes);
|
||||
|
||||
do {
|
||||
|
@ -973,16 +966,48 @@ brw_nir_apply_sampler_key(nir_shader *nir,
|
|||
return nir_lower_tex(nir, &tex_options);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_subgroup_size(const struct brw_base_prog_key *key,
|
||||
unsigned max_subgroup_size)
|
||||
{
|
||||
switch (key->subgroup_size_type) {
|
||||
case BRW_SUBGROUP_SIZE_API_CONSTANT:
|
||||
/* We have to use the global constant size. */
|
||||
return BRW_SUBGROUP_SIZE;
|
||||
|
||||
case BRW_SUBGROUP_SIZE_UNIFORM:
|
||||
/* It has to be uniform across all invocations but can vary per stage
|
||||
* if we want. This gives us a bit more freedom.
|
||||
*
|
||||
* For compute, brw_nir_apply_key is called per-dispatch-width so this
|
||||
* is the actual subgroup size and not a maximum. However, we only
|
||||
* invoke one size of any given compute shader so it's still guaranteed
|
||||
* to be uniform across invocations.
|
||||
*/
|
||||
return max_subgroup_size;
|
||||
}
|
||||
|
||||
unreachable("Invalid subgroup size type");
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_apply_key(nir_shader *nir,
|
||||
const struct brw_compiler *compiler,
|
||||
const struct brw_base_prog_key *key,
|
||||
unsigned max_subgroup_size,
|
||||
bool is_scalar)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
OPT(brw_nir_apply_sampler_key, compiler, &key->tex);
|
||||
|
||||
const nir_lower_subgroups_options subgroups_options = {
|
||||
.subgroup_size = get_subgroup_size(key, max_subgroup_size),
|
||||
.ballot_bit_size = 32,
|
||||
.lower_subgroup_masks = true,
|
||||
};
|
||||
OPT(nir_lower_subgroups, &subgroups_options);
|
||||
|
||||
if (progress)
|
||||
brw_nir_optimize(nir, compiler, is_scalar, false);
|
||||
}
|
||||
|
|
|
@ -140,6 +140,7 @@ void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
|||
void brw_nir_apply_key(nir_shader *nir,
|
||||
const struct brw_compiler *compiler,
|
||||
const struct brw_base_prog_key *key,
|
||||
unsigned max_subgroup_size,
|
||||
bool is_scalar);
|
||||
|
||||
enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo,
|
||||
|
|
|
@ -1244,7 +1244,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
nir->info.inputs_read = key->inputs_read;
|
||||
nir->info.patch_inputs_read = key->patch_inputs_read;
|
||||
|
||||
brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
|
||||
brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
|
||||
brw_nir_lower_tes_inputs(nir, input_vue_map);
|
||||
brw_nir_lower_vue_outputs(nir);
|
||||
brw_postprocess_nir(nir, compiler, is_scalar);
|
||||
|
|
|
@ -2845,7 +2845,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||
char **error_str)
|
||||
{
|
||||
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
|
||||
brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
|
||||
brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
|
||||
|
||||
const unsigned *assembly = NULL;
|
||||
|
||||
|
|
|
@ -639,7 +639,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
&c.input_vue_map, inputs_read,
|
||||
shader->info.separate_shader);
|
||||
|
||||
brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
|
||||
brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
|
||||
brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
|
||||
brw_nir_lower_vue_outputs(shader);
|
||||
brw_postprocess_nir(shader, compiler, is_scalar);
|
||||
|
|
|
@ -397,7 +397,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
nir->info.outputs_written,
|
||||
nir->info.patch_outputs_written);
|
||||
|
||||
brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
|
||||
brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
|
||||
brw_nir_lower_vue_inputs(nir, &input_vue_map);
|
||||
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
|
||||
key->tes_primitive_mode);
|
||||
|
|
|
@ -355,6 +355,8 @@ static void
|
|||
populate_base_prog_key(const struct gen_device_info *devinfo,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
|
||||
|
||||
populate_sampler_prog_key(devinfo, &key->tex);
|
||||
}
|
||||
|
||||
|
|
|
@ -335,6 +335,7 @@ brw_populate_base_prog_key(struct gl_context *ctx,
|
|||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->program_string_id = prog->id;
|
||||
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
|
||||
brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
|
||||
}
|
||||
|
||||
|
@ -344,6 +345,7 @@ brw_populate_default_base_prog_key(const struct gen_device_info *devinfo,
|
|||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->program_string_id = prog->id;
|
||||
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
|
||||
brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue