tu: Implement VK_EXT_subgroup_size_control

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13960>
This commit is contained in:
Connor Abbott 2021-11-25 17:02:42 +01:00 committed by Marge Bot
parent 1a1e25dcce
commit c45c6e36eb
4 changed files with 59 additions and 4 deletions

View File

@ -195,6 +195,7 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_vertex_attribute_divisor = true,
.EXT_provoking_vertex = true,
.EXT_line_rasterization = true,
.EXT_subgroup_size_control = true,
#ifdef ANDROID
.ANDROID_native_buffer = true,
#endif
@ -782,6 +783,13 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->stippledSmoothLines = false;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
features->subgroupSizeControl = true;
features->computeFullSubgroups = true;
break;
}
default:
break;
@ -1141,6 +1149,16 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
props->lineSubPixelPrecisionBits = 8;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
/* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
props->minSubgroupSize = 64; /* threadsize_base */
props->maxSubgroupSize = 128; /* threadsize_base * 2 */
props->maxComputeWorkgroupSubgroups = 16; /* max_waves */
props->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
break;
}
default:
break;

View File

@ -2426,7 +2426,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
continue;
struct tu_shader *shader =
tu_shader_create(builder->device, nir[stage],
tu_shader_create(builder->device, nir[stage], stage_infos[stage],
builder->multiview_mask, builder->layout,
builder->alloc);
if (!shader)
@ -3367,7 +3367,7 @@ tu_compute_pipeline_create(VkDevice device,
nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
struct tu_shader *shader =
tu_shader_create(dev, nir, 0, layout, pAllocator);
tu_shader_create(dev, nir, stage_info, 0, layout, pAllocator);
if (!shader) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;

View File

@ -1167,6 +1167,7 @@ tu_spirv_to_nir(struct tu_device *dev,
struct tu_shader *
tu_shader_create(struct tu_device *dev,
nir_shader *nir,
const VkPipelineShaderStageCreateInfo *stage_info,
unsigned multiview_mask,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc);

View File

@ -705,6 +705,7 @@ tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
struct tu_shader *
tu_shader_create(struct tu_device *dev,
nir_shader *nir,
const VkPipelineShaderStageCreateInfo *stage_info,
unsigned multiview_mask,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc)
@ -784,11 +785,46 @@ tu_shader_create(struct tu_device *dev,
ir3_finalize_nir(dev->compiler, nir);
enum ir3_wavesize_option api_wavesize, real_wavesize;
if (stage_info) {
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
} else {
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info =
vk_find_struct_const(stage_info->pNext,
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
if (size_info) {
if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
api_wavesize = IR3_SINGLE_ONLY;
} else {
assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
api_wavesize = IR3_DOUBLE_ONLY;
}
} else {
/* Match the exposed subgroupSize. */
api_wavesize = IR3_DOUBLE_ONLY;
}
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
real_wavesize = api_wavesize;
else if (api_wavesize == IR3_SINGLE_ONLY)
real_wavesize = IR3_SINGLE_ONLY;
else
real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
} else {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
shader->ir3_shader =
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
.reserved_user_consts = align(shader->push_consts.count, 4),
.api_wavesize = IR3_DOUBLE_ONLY,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
.api_wavesize = api_wavesize,
.real_wavesize = real_wavesize,
}, &so_info);
return shader;