nir: Reduce destination size of ballot intrinsic when possible

Some hardware, like i965, doesn't support group sizes greater than 32.
In that case, we can reduce the destination size of the ballot
intrinsic, which will simplify our code generation.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Matt Turner 2017-06-30 15:07:10 -07:00
parent 51c1659af8
commit 1038d385a9
3 changed files with 21 additions and 0 deletions

View File

@ -1843,6 +1843,8 @@ typedef struct nir_shader_compiler_options {
*/
bool use_interpolated_input_intrinsics;
unsigned max_subgroup_size;
unsigned max_unroll_iterations;
} nir_shader_compiler_options;

View File

@ -62,6 +62,24 @@ opt_intrinsics_impl(nir_function_impl *impl)
replacement = nir_imm_int(&b, NIR_TRUE);
break;
}
case nir_intrinsic_ballot: {
assert(b.shader->options->max_subgroup_size != 0);
if (b.shader->options->max_subgroup_size > 32 ||
intrin->dest.ssa.bit_size <= 32)
continue;
nir_intrinsic_instr *ballot =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
nir_ssa_dest_init(&ballot->instr, &ballot->dest, 1, 32, NULL);
nir_src_copy(&ballot->src[0], &intrin->src[0], ballot);
nir_builder_instr_insert(&b, &ballot->instr);
replacement = nir_pack_64_2x32_split(&b,
&ballot->dest.ssa,
nir_imm_int(&b, 0));
break;
}
case nir_intrinsic_load_subgroup_eq_mask:
case nir_intrinsic_load_subgroup_ge_mask:
case nir_intrinsic_load_subgroup_gt_mask:

View File

@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = {
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_subgroup_masks = true,
.max_subgroup_size = 64, /* FIXME */
.max_unroll_iterations = 32,
};