From 4397eb91c1e0081be2b17edb13dd4b47b9e97b62 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 22 Feb 2019 15:28:24 -0600 Subject: [PATCH] intel/compiler: Allow for varying subgroup sizes Reviewed-by: Caio Marcelo de Oliveira Filho --- src/intel/compiler/brw_compiler.h | 1 + src/intel/compiler/brw_fs_nir.cpp | 8 ++++++++ src/intel/compiler/brw_nir.c | 19 +++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index ba95df5b7dc..8b62e67e2d7 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -208,6 +208,7 @@ enum PACKED brw_subgroup_size_type { BRW_SUBGROUP_SIZE_API_CONSTANT, /**< Vulkan behavior */ BRW_SUBGROUP_SIZE_UNIFORM, /**< OpenGL behavior */ + BRW_SUBGROUP_SIZE_VARYING, /**< VK_EXT_subgroup_size_control */ }; struct brw_base_prog_key { diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index e5a927431c4..6ea012c74b6 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4916,6 +4916,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_load_subgroup_size: + /* This should only happen for fragment shaders because every other case + * is lowered in NIR so we can optimize on it. + */ + assert(stage == MESA_SHADER_FRAGMENT); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width)); + break; + case nir_intrinsic_load_subgroup_invocation: bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 9e4b33c8b49..3f90ab5a2ba 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -967,7 +967,8 @@ brw_nir_apply_sampler_key(nir_shader *nir, } static unsigned -get_subgroup_size(const struct brw_base_prog_key *key, +get_subgroup_size(gl_shader_stage stage, + const struct brw_base_prog_key *key, unsigned max_subgroup_size) { switch (key->subgroup_size_type) { @@ -985,6 +986,19 @@ get_subgroup_size(const struct brw_base_prog_key *key, * to be uniform across invocations. */ return max_subgroup_size; + + case BRW_SUBGROUP_SIZE_VARYING: + /* The subgroup size is allowed to be fully varying. For geometry + * stages, we know it's always 8 which is max_subgroup_size so we can + * return that. For compute, brw_nir_apply_key is called once per + * dispatch-width so max_subgroup_size is the real subgroup size. + * + * For fragment, we return 0 and let it fall through to the back-end + * compiler. This means we can't optimize based on subgroup size but + * that's a risk the client took when it asked for a varying subgroup + * size. + */ + return stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size; } unreachable("Invalid subgroup size type"); @@ -1002,7 +1016,8 @@ brw_nir_apply_key(nir_shader *nir, OPT(brw_nir_apply_sampler_key, compiler, &key->tex); const nir_lower_subgroups_options subgroups_options = { - .subgroup_size = get_subgroup_size(key, max_subgroup_size), + .subgroup_size = get_subgroup_size(nir->info.stage, key, + max_subgroup_size), .ballot_bit_size = 32, .lower_subgroup_masks = true, };