From ed351b9a71ec40de0dc010b79fb132293e511f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Feb 2021 03:54:19 -0500 Subject: [PATCH] ac/llvm: fix visit_load_ubo_buffer to use SMEM for 16 bits instead of VMEM This has 3 advantages: - It's SMEM. - Multiple single component loads are merged into 1 multi-dword load by LLVM. - The result is always packed for packed instructions. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 38 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 50542687966..40379622331 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2142,32 +2142,30 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrin rsrc = ctx->abi->load_ubo(ctx->abi, binding.desc_set, binding.binding, binding.success, rsrc); } + /* Convert to a scalar 32-bit load. */ if (instr->dest.ssa.bit_size == 64) num_components *= 2; + else if (instr->dest.ssa.bit_size == 16) + num_components = DIV_ROUND_UP(num_components, 2); + else if (instr->dest.ssa.bit_size == 8) + num_components = DIV_ROUND_UP(num_components, 4); - if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) { - unsigned load_bytes = instr->dest.ssa.bit_size / 8; - LLVMValueRef *const results = alloca(num_components * sizeof(LLVMValueRef)); - for (unsigned i = 0; i < num_components; ++i) { - LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, load_bytes * i, 0); + ret = + ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0, 0, true, true); - if (load_bytes == 1) { - results[i] = - ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0); - } else { - assert(load_bytes == 2); - results[i] = - ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset, 0); - } - } - ret = ac_build_gather_values(&ctx->ac, results, num_components); - } else { - ret = - ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0, 0, true, true); - - ret = ac_trim_vector(&ctx->ac, ret, num_components); + /* Convert to the original type. */ + if (instr->dest.ssa.bit_size == 64) { + ret = LLVMBuildBitCast(ctx->ac.builder, ret, + LLVMVectorType(ctx->ac.i64, num_components / 2), ""); + } else if (instr->dest.ssa.bit_size == 16) { + ret = LLVMBuildBitCast(ctx->ac.builder, ret, + LLVMVectorType(ctx->ac.i16, num_components * 2), ""); + } else if (instr->dest.ssa.bit_size == 8) { + ret = LLVMBuildBitCast(ctx->ac.builder, ret, + LLVMVectorType(ctx->ac.i8, num_components * 4), ""); } + ret = ac_trim_vector(&ctx->ac, ret, instr->num_components); ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); return exit_waterfall(ctx, &wctx, ret);