From b704d03efd47678613248fce3d63954f1fae61f8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 15 Jan 2021 16:44:44 -0600 Subject: [PATCH] anv: Do UBO loads with global addresses for bindless This makes UBO loads in the variable pointers or bindless case work just like SSBO loads in the sense that they use A64 messages and 64-bit global addresses. The primary difference is that we have an optimization in anv_nir_lower_ubo_loads which uses a (possibly predicated) block load message when the offset is constant so we get roughly the same performance as we would from plumbing load_ubo all the way to the back-end. Reviewed-by: Kenneth Graunke Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/intel/vulkan/anv_descriptor_set.c | 4 +++- src/intel/vulkan/anv_nir.h | 14 ++++++++++++++ src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 8 +++++++- src/intel/vulkan/anv_pipeline.c | 13 +++++++++++-- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 3b5412ca01e..b92088c13b3 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -100,7 +100,9 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device, */ if (device->has_a64_buffer_access && (type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || - type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC || + type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)) data |= ANV_DESCRIPTOR_ADDRESS_RANGE; /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index 4829be98cc5..0ffed5dfc0f 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -54,6 +54,20 @@ anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice, } } +static inline nir_address_format +anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice, + bool robust_buffer_access) +{ + if (pdevice->has_a64_buffer_access) { + if (robust_buffer_access) + return nir_address_format_64bit_bounded_global; + else + return nir_address_format_64bit_global_32bit_offset; + } else { + return nir_address_format_32bit_index_offset; + } +} + bool anv_nir_lower_ubo_loads(nir_shader *shader); void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index a007100f845..857d2e08dec 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -681,6 +681,12 @@ try_lower_direct_buffer_intrinsic(nir_builder *b, /* Rewrite to 32bit_index_offset whenever we can */ addr_format = nir_address_format_32bit_index_offset; + } else { + assert(nir_deref_mode_is(deref, nir_var_mem_ubo)); + + /* Rewrite to 32bit_index_offset whenever we can */ + if (descriptor_has_bti(desc, state)) + addr_format = nir_address_format_32bit_index_offset; } nir_ssa_def *addr = @@ -1294,7 +1300,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, .layout = layout, .add_bounds_checks = robust_buffer_access, .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access), - .ubo_addr_format = nir_address_format_32bit_index_offset, + .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_buffer_access), .lowered_instrs = _mesa_pointer_set_create(mem_ctx), }; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index f07b5e561c8..d565013a116 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -172,7 +172,8 @@ anv_shader_compile_to_nir(struct anv_device *device, .vk_memory_model_device_scope = true, .workgroup_memory_explicit_layout = true, }, - .ubo_addr_format = nir_address_format_32bit_index_offset, + .ubo_addr_format = + anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access), .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access), .phys_ssbo_addr_format = nir_address_format_64bit_global, @@ -735,13 +736,21 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, layout, nir, &stage->bind_map); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, - nir_address_format_32bit_index_offset); + anv_nir_ubo_addr_format(pdevice, + pipeline->device->robust_buffer_access)); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, anv_nir_ssbo_addr_format(pdevice, pipeline->device->robust_buffer_access)); + /* First run copy-prop to get rid of all of the vec() that address + * calculations often create and then constant-fold so that, when we + * get to anv_nir_lower_ubo_loads, we can detect constant offsets. + */ + NIR_PASS_V(nir, nir_copy_prop); NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, anv_nir_lower_ubo_loads); + /* We don't support non-uniform UBOs and non-uniform SSBO access is * handled naturally by falling back to A64 messages. */