intel,anv,iris,crocus: Drop subgroup size from the shader key
Use nir->info.subgroup_size instead. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17337>
This commit is contained in:
parent
e9b2862c1a
commit
530de844ef
|
@ -49,7 +49,6 @@
|
|||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
#define KEY_INIT_NO_ID() \
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
|
||||
.base.tex.swizzles[0 ... BRW_MAX_SAMPLERS - 1] = 0x688, \
|
||||
.base.tex.compressed_multisample_layout_mask = ~0
|
||||
#define KEY_INIT() \
|
||||
|
|
|
@ -54,7 +54,6 @@
|
|||
#define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
|
||||
.base.program_string_id = prog_id, \
|
||||
.base.limit_trig_input_range = limit_trig_input, \
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
|
||||
.base.tex.swizzles[0 ... BRW_MAX_SAMPLERS - 1] = 0x688, \
|
||||
.base.tex.compressed_multisample_layout_mask = ~0, \
|
||||
.base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
|
||||
|
|
|
@ -230,26 +230,9 @@ struct brw_sampler_prog_key_data {
|
|||
float scale_factors[BRW_MAX_SAMPLERS];
|
||||
};
|
||||
|
||||
/** An enum representing what kind of input gl_SubgroupSize is. */
|
||||
enum PACKED brw_subgroup_size_type
|
||||
{
|
||||
BRW_SUBGROUP_SIZE_API_CONSTANT, /**< Default Vulkan behavior */
|
||||
BRW_SUBGROUP_SIZE_UNIFORM, /**< OpenGL behavior */
|
||||
BRW_SUBGROUP_SIZE_VARYING, /**< VK_EXT_subgroup_size_control */
|
||||
|
||||
/* These enums are specifically chosen so that the value of the enum is
|
||||
* also the subgroup size. If any new values are added, they must respect
|
||||
* this invariant.
|
||||
*/
|
||||
BRW_SUBGROUP_SIZE_REQUIRE_8 = 8, /**< VK_EXT_subgroup_size_control */
|
||||
BRW_SUBGROUP_SIZE_REQUIRE_16 = 16, /**< VK_EXT_subgroup_size_control */
|
||||
BRW_SUBGROUP_SIZE_REQUIRE_32 = 32, /**< VK_EXT_subgroup_size_control */
|
||||
};
|
||||
|
||||
struct brw_base_prog_key {
|
||||
unsigned program_string_id;
|
||||
|
||||
enum brw_subgroup_size_type subgroup_size_type;
|
||||
bool robust_buffer_access;
|
||||
|
||||
/**
|
||||
|
|
|
@ -7759,7 +7759,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
|||
}
|
||||
|
||||
const unsigned required_dispatch_width =
|
||||
brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type);
|
||||
brw_required_dispatch_width(&nir->info);
|
||||
|
||||
fs_visitor *v[3] = {0};
|
||||
const char *error[3] = {0};
|
||||
|
|
|
@ -439,9 +439,7 @@ brw_kernel_from_spirv(struct brw_compiler *compiler,
|
|||
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
|
||||
NIR_PASS_V(nir, lower_kernel_intrinsics);
|
||||
|
||||
struct brw_cs_prog_key key = {
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING,
|
||||
};
|
||||
struct brw_cs_prog_key key = { };
|
||||
|
||||
memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
|
||||
kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
|
||||
|
|
|
@ -217,7 +217,7 @@ brw_compile_task(const struct brw_compiler *compiler,
|
|||
NIR_PASS_V(nir, brw_nir_lower_tue_outputs, &prog_data->map);
|
||||
|
||||
const unsigned required_dispatch_width =
|
||||
brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type);
|
||||
brw_required_dispatch_width(&nir->info);
|
||||
|
||||
fs_visitor *v[3] = {0};
|
||||
const char *error[3] = {0};
|
||||
|
@ -715,7 +715,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
|||
NIR_PASS_V(nir, brw_nir_lower_mue_outputs, &prog_data->map);
|
||||
|
||||
const unsigned required_dispatch_width =
|
||||
brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type);
|
||||
brw_required_dispatch_width(&nir->info);
|
||||
|
||||
fs_visitor *v[3] = {0};
|
||||
const char *error[3] = {0};
|
||||
|
|
|
@ -1353,16 +1353,14 @@ brw_nir_apply_sampler_key(nir_shader *nir,
|
|||
}
|
||||
|
||||
static unsigned
|
||||
get_subgroup_size(gl_shader_stage stage,
|
||||
const struct brw_base_prog_key *key,
|
||||
unsigned max_subgroup_size)
|
||||
get_subgroup_size(const struct shader_info *info, unsigned max_subgroup_size)
|
||||
{
|
||||
switch (key->subgroup_size_type) {
|
||||
case BRW_SUBGROUP_SIZE_API_CONSTANT:
|
||||
switch (info->subgroup_size) {
|
||||
case SUBGROUP_SIZE_API_CONSTANT:
|
||||
/* We have to use the global constant size. */
|
||||
return BRW_SUBGROUP_SIZE;
|
||||
|
||||
case BRW_SUBGROUP_SIZE_UNIFORM:
|
||||
case SUBGROUP_SIZE_UNIFORM:
|
||||
/* It has to be uniform across all invocations but can vary per stage
|
||||
* if we want. This gives us a bit more freedom.
|
||||
*
|
||||
|
@ -1373,7 +1371,7 @@ get_subgroup_size(gl_shader_stage stage,
|
|||
*/
|
||||
return max_subgroup_size;
|
||||
|
||||
case BRW_SUBGROUP_SIZE_VARYING:
|
||||
case SUBGROUP_SIZE_VARYING:
|
||||
/* The subgroup size is allowed to be fully varying. For geometry
|
||||
* stages, we know it's always 8 which is max_subgroup_size so we can
|
||||
* return that. For compute, brw_nir_apply_key is called once per
|
||||
|
@ -1384,16 +1382,21 @@ get_subgroup_size(gl_shader_stage stage,
|
|||
* that's a risk the client took when it asked for a varying subgroup
|
||||
* size.
|
||||
*/
|
||||
return stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
|
||||
return info->stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
|
||||
|
||||
case BRW_SUBGROUP_SIZE_REQUIRE_8:
|
||||
case BRW_SUBGROUP_SIZE_REQUIRE_16:
|
||||
case BRW_SUBGROUP_SIZE_REQUIRE_32:
|
||||
assert(gl_shader_stage_uses_workgroup(stage));
|
||||
case SUBGROUP_SIZE_REQUIRE_8:
|
||||
case SUBGROUP_SIZE_REQUIRE_16:
|
||||
case SUBGROUP_SIZE_REQUIRE_32:
|
||||
assert(gl_shader_stage_uses_workgroup(info->stage));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
return key->subgroup_size_type;
|
||||
return info->subgroup_size;
|
||||
|
||||
case SUBGROUP_SIZE_FULL_SUBGROUPS:
|
||||
case SUBGROUP_SIZE_REQUIRE_64:
|
||||
case SUBGROUP_SIZE_REQUIRE_128:
|
||||
break;
|
||||
}
|
||||
|
||||
unreachable("Invalid subgroup size type");
|
||||
|
@ -1411,8 +1414,7 @@ brw_nir_apply_key(nir_shader *nir,
|
|||
OPT(brw_nir_apply_sampler_key, compiler, &key->tex);
|
||||
|
||||
const nir_lower_subgroups_options subgroups_options = {
|
||||
.subgroup_size = get_subgroup_size(nir->info.stage, key,
|
||||
max_subgroup_size),
|
||||
.subgroup_size = get_subgroup_size(&nir->info, max_subgroup_size),
|
||||
.ballot_bit_size = 32,
|
||||
.ballot_components = 1,
|
||||
.lower_subgroup_masks = true,
|
||||
|
|
|
@ -31,8 +31,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
unsigned brw_required_dispatch_width(const struct shader_info *info,
|
||||
enum brw_subgroup_size_type subgroup_size_type);
|
||||
unsigned brw_required_dispatch_width(const struct shader_info *info);
|
||||
|
||||
bool brw_simd_should_compile(void *mem_ctx,
|
||||
unsigned simd,
|
||||
|
|
|
@ -28,26 +28,17 @@
|
|||
#include "util/ralloc.h"
|
||||
|
||||
unsigned
|
||||
brw_required_dispatch_width(const struct shader_info *info,
|
||||
enum brw_subgroup_size_type subgroup_size_type)
|
||||
brw_required_dispatch_width(const struct shader_info *info)
|
||||
{
|
||||
unsigned required = 0;
|
||||
|
||||
if ((int)subgroup_size_type >= (int)BRW_SUBGROUP_SIZE_REQUIRE_8) {
|
||||
if ((int)info->subgroup_size >= (int)SUBGROUP_SIZE_REQUIRE_8) {
|
||||
assert(gl_shader_stage_uses_workgroup(info->stage));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
required = (unsigned)subgroup_size_type;
|
||||
return (unsigned)info->subgroup_size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (gl_shader_stage_is_compute(info->stage) &&
|
||||
info->subgroup_size >= SUBGROUP_SIZE_REQUIRE_8) {
|
||||
assert(required == 0 || required == info->subgroup_size);
|
||||
required = info->subgroup_size;
|
||||
}
|
||||
|
||||
return required;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
|
@ -315,11 +315,9 @@ populate_sampler_prog_key(const struct intel_device_info *devinfo,
|
|||
|
||||
static void
|
||||
populate_base_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->subgroup_size_type = subgroup_size_type;
|
||||
key->robust_buffer_access = robust_buffer_acccess;
|
||||
key->limit_trig_input_range =
|
||||
device->physical->instance->limit_trig_input_range;
|
||||
|
@ -329,14 +327,12 @@ populate_base_prog_key(const struct anv_device *device,
|
|||
|
||||
static void
|
||||
populate_vs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_vs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
|
||||
/* XXX: Handle vertex input work-arounds */
|
||||
|
||||
|
@ -345,41 +341,35 @@ populate_vs_prog_key(const struct anv_device *device,
|
|||
|
||||
static void
|
||||
populate_tcs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
unsigned input_vertices,
|
||||
struct brw_tcs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
|
||||
key->input_vertices = input_vertices;
|
||||
}
|
||||
|
||||
static void
|
||||
populate_tes_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_tes_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_gs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_gs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -439,29 +429,26 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
static void
|
||||
populate_task_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_task_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_mesh_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_mesh_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
||||
VkPipelineShaderStageCreateFlags flags,
|
||||
bool robust_buffer_acccess,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info,
|
||||
const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
|
||||
|
@ -472,7 +459,7 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, flags, robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
|
||||
/* We set this to 0 here and set to the actual value before we call
|
||||
* brw_compile_fs.
|
||||
|
@ -520,25 +507,22 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
static void
|
||||
populate_cs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_cs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_bs_prog_key(const struct anv_device *device,
|
||||
VkPipelineShaderStageCreateFlags flags,
|
||||
bool robust_buffer_access,
|
||||
struct brw_bs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(device, flags, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
struct anv_pipeline_stage {
|
||||
|
@ -1323,45 +1307,6 @@ anv_pipeline_add_executables(struct anv_pipeline *pipeline,
|
|||
pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
|
||||
}
|
||||
|
||||
static enum brw_subgroup_size_type
|
||||
anv_subgroup_size_type(gl_shader_stage stage,
|
||||
const struct vk_shader_module *module,
|
||||
VkPipelineShaderStageCreateFlags flags,
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
|
||||
{
|
||||
enum brw_subgroup_size_type subgroup_size_type;
|
||||
|
||||
const bool allow_varying =
|
||||
flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT ||
|
||||
vk_shader_module_spirv_version(module) >= 0x10600;
|
||||
|
||||
if (rss_info) {
|
||||
assert(gl_shader_stage_uses_workgroup(stage));
|
||||
/* These enum values are expressly chosen to be equal to the subgroup
|
||||
* size that they require.
|
||||
*/
|
||||
assert(rss_info->requiredSubgroupSize == 8 ||
|
||||
rss_info->requiredSubgroupSize == 16 ||
|
||||
rss_info->requiredSubgroupSize == 32);
|
||||
subgroup_size_type = rss_info->requiredSubgroupSize;
|
||||
} else if (allow_varying) {
|
||||
subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
|
||||
} else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
|
||||
assert(stage == MESA_SHADER_COMPUTE);
|
||||
/* If the client expressly requests full subgroups and they don't
|
||||
* specify a subgroup size neither allow varying subgroups, we need to
|
||||
* pick one. So we specify the API value of 32. Performance will
|
||||
* likely be terrible in this case but there's nothing we can do about
|
||||
* that. The client should have chosen a size.
|
||||
*/
|
||||
subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
|
||||
} else {
|
||||
subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
|
||||
}
|
||||
|
||||
return subgroup_size_type;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
|
||||
{
|
||||
|
@ -1404,7 +1349,6 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
VkResult result;
|
||||
for (uint32_t i = 0; i < info->stageCount; i++) {
|
||||
const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
|
||||
VK_FROM_HANDLE(vk_shader_module, module, sinfo->module);
|
||||
gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
|
||||
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
@ -1413,33 +1357,26 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
stages[stage].info = sinfo;
|
||||
vk_pipeline_hash_shader_stage(&info->pStages[i], stages[stage].shader_sha1);
|
||||
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
|
||||
vk_find_struct_const(sinfo->pNext,
|
||||
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
|
||||
|
||||
enum brw_subgroup_size_type subgroup_size_type =
|
||||
anv_subgroup_size_type(stage, module, sinfo->flags, rss_info);
|
||||
|
||||
const struct anv_device *device = pipeline->base.device;
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
populate_vs_prog_key(device, subgroup_size_type,
|
||||
populate_vs_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.vs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
populate_tcs_prog_key(device, subgroup_size_type,
|
||||
populate_tcs_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
info->pTessellationState->patchControlPoints,
|
||||
&stages[stage].key.tcs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
populate_tes_prog_key(device, subgroup_size_type,
|
||||
populate_tes_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.tes);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
populate_gs_prog_key(device, subgroup_size_type,
|
||||
populate_gs_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.gs);
|
||||
break;
|
||||
|
@ -1447,7 +1384,7 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
const bool raster_enabled =
|
||||
!info->pRasterizationState->rasterizerDiscardEnable ||
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
|
||||
populate_wm_prog_key(pipeline, subgroup_size_type,
|
||||
populate_wm_prog_key(pipeline,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
raster_enabled ? info->pMultisampleState : NULL,
|
||||
vk_find_struct_const(info->pNext,
|
||||
|
@ -1457,12 +1394,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
break;
|
||||
}
|
||||
case MESA_SHADER_TASK:
|
||||
populate_task_prog_key(device, subgroup_size_type,
|
||||
populate_task_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.task);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
populate_mesh_prog_key(device, subgroup_size_type,
|
||||
populate_mesh_prog_key(device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.mesh);
|
||||
break;
|
||||
|
@ -1849,7 +1786,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||
const VkComputePipelineCreateInfo *info)
|
||||
{
|
||||
const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
|
||||
VK_FROM_HANDLE(vk_shader_module, module, sinfo->module);
|
||||
assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
VkPipelineCreationFeedbackEXT pipeline_feedback = {
|
||||
|
@ -1874,16 +1810,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||
|
||||
struct anv_shader_bin *bin = NULL;
|
||||
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *rss_info =
|
||||
vk_find_struct_const(info->stage.pNext,
|
||||
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
|
||||
|
||||
const enum brw_subgroup_size_type subgroup_size_type =
|
||||
anv_subgroup_size_type(MESA_SHADER_COMPUTE, module, info->stage.flags, rss_info);
|
||||
|
||||
populate_cs_prog_key(device, subgroup_size_type,
|
||||
device->robust_buffer_access,
|
||||
&stage.key.cs);
|
||||
populate_cs_prog_key(device, device->robust_buffer_access, &stage.key.cs);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
|
@ -1939,10 +1866,19 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||
*/
|
||||
if (device->physical->instance->assume_full_subgroups &&
|
||||
stage.nir->info.cs.uses_wide_subgroup_intrinsics &&
|
||||
subgroup_size_type == BRW_SUBGROUP_SIZE_API_CONSTANT &&
|
||||
stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
|
||||
local_size &&
|
||||
local_size % BRW_SUBGROUP_SIZE == 0)
|
||||
stage.key.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
|
||||
stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
|
||||
|
||||
/* If the client requests that we dispatch full subgroups but doesn't
|
||||
* allow us to pick a subgroup size, we have to smash it to the API
|
||||
* value of 32. Performance will likely be terrible in this case but
|
||||
* there's nothing we can do about that. The client should have chosen
|
||||
* a size.
|
||||
*/
|
||||
if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
|
||||
stage.nir->info.subgroup_size = BRW_SUBGROUP_SIZE;
|
||||
|
||||
stage.num_stats = 1;
|
||||
|
||||
|
@ -2693,7 +2629,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
|
|||
},
|
||||
};
|
||||
|
||||
populate_bs_prog_key(pipeline->base.device, sinfo->flags,
|
||||
populate_bs_prog_key(pipeline->base.device,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[i].key.bs);
|
||||
|
||||
|
@ -2997,10 +2933,6 @@ anv_device_init_rt_shaders(struct anv_device *device)
|
|||
struct brw_cs_prog_key key;
|
||||
} trampoline_key = {
|
||||
.name = "rt-trampoline",
|
||||
.key = {
|
||||
/* TODO: Other subgroup sizes? */
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
|
||||
},
|
||||
};
|
||||
device->rt_trampoline =
|
||||
anv_device_search_for_kernel(device, device->internal_cache,
|
||||
|
@ -3012,6 +2944,8 @@ anv_device_init_rt_shaders(struct anv_device *device)
|
|||
nir_shader *trampoline_nir =
|
||||
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
|
||||
|
||||
trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
|
||||
|
||||
struct anv_pipeline_bind_map bind_map = {
|
||||
.surface_count = 0,
|
||||
.sampler_count = 0,
|
||||
|
|
Loading…
Reference in New Issue