turnip: Implement VK_KHR_zero_initialize_workgroup_memory
Moved nir_lower_compute_system_values to lower load_local_invocation_index which could be emitted by nir_zero_initialize_shared_memory. Relevant CTS tests: dEQP-VK.compute.zero_initialize_workgroup_memory.* Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14829>
This commit is contained in:
parent
c6d1cac6e5
commit
ff059605aa
|
@ -479,7 +479,7 @@ Vulkan 1.3 -- all DONE: anv, radv
|
||||||
VK_KHR_shader_non_semantic_info DONE (anv, radv)
|
VK_KHR_shader_non_semantic_info DONE (anv, radv)
|
||||||
VK_KHR_shader_terminate_invocation DONE (anv, radv, tu)
|
VK_KHR_shader_terminate_invocation DONE (anv, radv, tu)
|
||||||
VK_KHR_synchronization2 DONE (anv, radv)
|
VK_KHR_synchronization2 DONE (anv, radv)
|
||||||
VK_KHR_zero_initialize_workgroup_memory DONE (anv, radv)
|
VK_KHR_zero_initialize_workgroup_memory DONE (anv, radv, tu)
|
||||||
VK_EXT_4444_formats DONE (anv, lvp, radv, tu, v3dv)
|
VK_EXT_4444_formats DONE (anv, lvp, radv, tu, v3dv)
|
||||||
VK_EXT_extended_dynamic_state DONE (anv, lvp, radv, tu)
|
VK_EXT_extended_dynamic_state DONE (anv, lvp, radv, tu)
|
||||||
VK_EXT_extended_dynamic_state2 DONE (anv, lvp, radv, tu)
|
VK_EXT_extended_dynamic_state2 DONE (anv, lvp, radv, tu)
|
||||||
|
|
|
@ -159,6 +159,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
||||||
.KHR_separate_depth_stencil_layouts = true,
|
.KHR_separate_depth_stencil_layouts = true,
|
||||||
.KHR_buffer_device_address = true,
|
.KHR_buffer_device_address = true,
|
||||||
.KHR_shader_integer_dot_product = true,
|
.KHR_shader_integer_dot_product = true,
|
||||||
|
.KHR_zero_initialize_workgroup_memory = true,
|
||||||
#ifndef TU_USE_KGSL
|
#ifndef TU_USE_KGSL
|
||||||
.KHR_timeline_semaphore = true,
|
.KHR_timeline_semaphore = true,
|
||||||
#endif
|
#endif
|
||||||
|
@ -593,7 +594,7 @@ tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice,
|
||||||
features->computeFullSubgroups = true;
|
features->computeFullSubgroups = true;
|
||||||
features->synchronization2 = false;
|
features->synchronization2 = false;
|
||||||
features->textureCompressionASTC_HDR = false;
|
features->textureCompressionASTC_HDR = false;
|
||||||
features->shaderZeroInitializeWorkgroupMemory = false;
|
features->shaderZeroInitializeWorkgroupMemory = true;
|
||||||
features->dynamicRendering = false;
|
features->dynamicRendering = false;
|
||||||
features->shaderIntegerDotProduct = true;
|
features->shaderIntegerDotProduct = true;
|
||||||
features->maintenance4 = false;
|
features->maintenance4 = false;
|
||||||
|
|
|
@ -87,10 +87,6 @@ tu_spirv_to_nir(struct tu_device *dev,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct nir_lower_compute_system_values_options compute_sysval_options = {
|
|
||||||
.has_base_workgroup_id = true,
|
|
||||||
};
|
|
||||||
|
|
||||||
const nir_shader_compiler_options *nir_options =
|
const nir_shader_compiler_options *nir_options =
|
||||||
ir3_get_compiler_options(dev->compiler);
|
ir3_get_compiler_options(dev->compiler);
|
||||||
|
|
||||||
|
@ -158,7 +154,6 @@ tu_spirv_to_nir(struct tu_device *dev,
|
||||||
NIR_PASS_V(nir, nir_lower_is_helper_invocation);
|
NIR_PASS_V(nir, nir_lower_is_helper_invocation);
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_system_values);
|
NIR_PASS_V(nir, nir_lower_system_values);
|
||||||
NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
|
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
||||||
|
|
||||||
|
@ -763,6 +758,22 @@ tu_shader_create(struct tu_device *dev,
|
||||||
NIR_PASS_V(nir, nir_lower_explicit_io,
|
NIR_PASS_V(nir, nir_lower_explicit_io,
|
||||||
nir_var_mem_shared,
|
nir_var_mem_shared,
|
||||||
nir_address_format_32bit_offset);
|
nir_address_format_32bit_offset);
|
||||||
|
|
||||||
|
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
|
||||||
|
const unsigned chunk_size = 16; /* max single store size */
|
||||||
|
/* Shared memory is allocated in 1024b chunks in HW, but the zero-init
|
||||||
|
* extension only requires us to initialize the memory that the shader
|
||||||
|
* is allocated at the API level, and it's up to the user to ensure
|
||||||
|
* that accesses are limited to those bounds.
|
||||||
|
*/
|
||||||
|
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||||
|
NIR_PASS_V(nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct nir_lower_compute_system_values_options compute_sysval_options = {
|
||||||
|
.has_base_workgroup_id = true,
|
||||||
|
};
|
||||||
|
NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
|
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
|
||||||
|
|
Loading…
Reference in New Issue