anv: Do UBO loads with global addresses for bindless
This makes UBO loads in the variable pointers or bindless case work just like SSBO loads in the sense that they use A64 messages and 64-bit global addresses. The primary difference is that we have an optimization in anv_nir_lower_ubo_loads which uses a (possibly predicated) block load message when the offset is constant so we get roughly the same performance as we would from plumbing load_ubo all the way to the back-end. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8635>
This commit is contained in:
parent
61749b5a15
commit
b704d03efd
|
@ -100,7 +100,9 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
|||
*/
|
||||
if (device->has_a64_buffer_access &&
|
||||
(type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
|
||||
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
|
||||
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ||
|
||||
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC))
|
||||
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
|
||||
|
||||
/* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader
|
||||
|
|
|
@ -54,6 +54,20 @@ anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice,
|
|||
}
|
||||
}
|
||||
|
||||
static inline nir_address_format
|
||||
anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access)
|
||||
{
|
||||
if (pdevice->has_a64_buffer_access) {
|
||||
if (robust_buffer_access)
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
else
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
} else {
|
||||
return nir_address_format_32bit_index_offset;
|
||||
}
|
||||
}
|
||||
|
||||
bool anv_nir_lower_ubo_loads(nir_shader *shader);
|
||||
|
||||
void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
||||
|
|
|
@ -681,6 +681,12 @@ try_lower_direct_buffer_intrinsic(nir_builder *b,
|
|||
|
||||
/* Rewrite to 32bit_index_offset whenever we can */
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
} else {
|
||||
assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
|
||||
|
||||
/* Rewrite to 32bit_index_offset whenever we can */
|
||||
if (descriptor_has_bti(desc, state))
|
||||
addr_format = nir_address_format_32bit_index_offset;
|
||||
}
|
||||
|
||||
nir_ssa_def *addr =
|
||||
|
@ -1294,7 +1300,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
|||
.layout = layout,
|
||||
.add_bounds_checks = robust_buffer_access,
|
||||
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access),
|
||||
.ubo_addr_format = nir_address_format_32bit_index_offset,
|
||||
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_buffer_access),
|
||||
.lowered_instrs = _mesa_pointer_set_create(mem_ctx),
|
||||
};
|
||||
|
||||
|
|
|
@ -172,7 +172,8 @@ anv_shader_compile_to_nir(struct anv_device *device,
|
|||
.vk_memory_model_device_scope = true,
|
||||
.workgroup_memory_explicit_layout = true,
|
||||
},
|
||||
.ubo_addr_format = nir_address_format_32bit_index_offset,
|
||||
.ubo_addr_format =
|
||||
anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
|
||||
.ssbo_addr_format =
|
||||
anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
|
||||
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
||||
|
@ -735,13 +736,21 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
|||
layout, nir, &stage->bind_map);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||
nir_address_format_32bit_index_offset);
|
||||
anv_nir_ubo_addr_format(pdevice,
|
||||
pipeline->device->robust_buffer_access));
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
||||
anv_nir_ssbo_addr_format(pdevice,
|
||||
pipeline->device->robust_buffer_access));
|
||||
|
||||
/* First run copy-prop to get rid of all of the vec() that address
|
||||
* calculations often create and then constant-fold so that, when we
|
||||
* get to anv_nir_lower_ubo_loads, we can detect constant offsets.
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_copy_prop);
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
|
||||
|
||||
/* We don't support non-uniform UBOs and non-uniform SSBO access is
|
||||
* handled naturally by falling back to A64 messages.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue