From 7a6dbe0c774a6034a0ce5e885036dede056fa796 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 25 Feb 2022 18:52:27 +0100 Subject: [PATCH] aco: Implement image_load d16. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Georg Lehmann Reviewed-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 96f6e32b258..e6063c9cfea 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5977,13 +5977,17 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) } if (is_sparse) expand_mask |= 1 << result_size; - unsigned num_components = util_bitcount(dmask) + is_sparse; + + bool d16 = instr->dest.ssa.bit_size == 16; + assert(!d16 || !is_sparse); + + unsigned num_bytes = util_bitcount(dmask) * (d16 ? 2 : 4) + is_sparse * 4; Temp tmp; - if (num_components == dst.size() && dst.type() == RegType::vgpr) + if (num_bytes == dst.bytes() && dst.type() == RegType::vgpr) tmp = dst; else - tmp = ctx->program->allocateTmp(RegClass(RegType::vgpr, num_components)); + tmp = bld.tmp(RegClass::get(RegType::vgpr, num_bytes)); Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); @@ -5991,12 +5995,22 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1); aco_opcode opcode; - switch (util_bitcount(dmask)) { - case 1: opcode = aco_opcode::buffer_load_format_x; break; - case 2: opcode = aco_opcode::buffer_load_format_xy; break; - case 3: opcode = aco_opcode::buffer_load_format_xyz; break; - case 4: opcode = aco_opcode::buffer_load_format_xyzw; break; - default: unreachable(">4 channel buffer image load"); + if (!d16) { + switch (util_bitcount(dmask)) { + case 1: opcode = aco_opcode::buffer_load_format_x; break; + case 2: opcode = aco_opcode::buffer_load_format_xy; break; + case 3: opcode = aco_opcode::buffer_load_format_xyz; break; + case 4: opcode = aco_opcode::buffer_load_format_xyzw; break; + default: unreachable(">4 channel buffer image load"); + } + } else { + switch (util_bitcount(dmask)) { + case 1: opcode = aco_opcode::buffer_load_format_d16_x; break; + case 2: opcode = aco_opcode::buffer_load_format_d16_xy; break; + case 3: opcode = aco_opcode::buffer_load_format_d16_xyz; break; + case 4: opcode = aco_opcode::buffer_load_format_d16_xyzw; break; + default: unreachable(">4 channel buffer image load"); + } } aco_ptr load{ create_instruction(opcode, Format::MUBUF, 3 + is_sparse, 1)}; @@ -6024,6 +6038,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT) ? 1 : 0; load->dlc = load->glc && ctx->options->chip_class >= GFX10; load->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array); + load->d16 = d16; load->dmask = dmask; load->unrm = true; load->da = should_declare_array(ctx, dim, is_array);