diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3732cfb8d42..d4261628fd9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4609,17 +4609,11 @@ void visit_load_interpolated_input(isel_context *ctx, nir_intrinsic_instr *instr bool check_vertex_fetch_size(isel_context *ctx, const ac_data_format_info *vtx_info, unsigned offset, unsigned stride, unsigned channels) { + unsigned vertex_byte_size = vtx_info->chan_byte_size * channels; if (vtx_info->chan_byte_size != 4 && channels == 3) return false; - - /* Always split typed vertex buffer loads on GFX6 and GFX10+ to avoid any - * alignment issues that triggers memory violations and eventually a GPU - * hang. This can happen if the stride (static or dynamic) is unaligned and - * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO - * offset is 2 for R16G16B16A16_SNORM). - */ return (ctx->options->chip_class >= GFX7 && ctx->options->chip_class <= GFX9) || - (channels == 1); + (offset % vertex_byte_size == 0 && stride % vertex_byte_size == 0); } uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_info, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index c443a329adf..1b982e8e6b3 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1184,15 +1184,17 @@ handle_vs_input_decl(struct radv_shader_context *ctx, t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false); t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); - /* Always split typed vertex buffer loads on GFX6 and GFX10+ - * to avoid any alignment issues that triggers memory - * violations and eventually a GPU hang. This can happen if - * the stride (static or dynamic) is unaligned and also if the - * VBO offset is aligned to a scalar (eg. stride is 8 and VBO - * offset is 2 for R16G16B16A16_SNORM). + /* Perform per-channel vertex fetch operations if unaligned + * access are detected. Only GFX6 and GFX10 are affected. */ - if (ctx->ac.chip_class == GFX6 || - ctx->ac.chip_class >= GFX10) { + bool unaligned_vertex_fetches = false; + if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) && + vtx_info->chan_format != data_format && + ((attrib_offset % vtx_info->element_size) || + (attrib_stride % vtx_info->element_size))) + unaligned_vertex_fetches = true; + + if (unaligned_vertex_fetches) { unsigned chan_format = vtx_info->chan_format; LLVMValueRef values[4];