From 6eab06b866916d4fd52adf7b8bb6113948a3811a Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 5 May 2016 12:55:44 +0200 Subject: [PATCH] i965/fs: Add do_untyped_vector_read helper We are going to need the same logic for anything that reads doubles via untyped messages (CS shared variables and SSBOs). Add a helper function with that logic so that we can reuse it. v2: - Make this a static function instead of a method of fs_visitor (Iago) - We only support types with a size of 4 or 8 (Curro) - Avoid retypes by using a separate vgrf for the packed result (Curro) - Put dst parameter before source parameters (Curro) Reviewed-by: Kenneth Graunke Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 63 ++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0ff197f3bde..32128bb1025 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2131,6 +2131,69 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) return get_nir_src(*offset_src); } +static void +do_untyped_vector_read(const fs_builder &bld, + const fs_reg dest, + const fs_reg surf_index, + const fs_reg offset_reg, + unsigned num_components) +{ + if (type_sz(dest.type) == 4) { + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (unsigned i = 0; i < num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + } else if (type_sz(dest.type) == 8) { + /* Reading a dvec, so we need to: + * + * 1. Multiply num_components by 2, to account for the fact that we + * need to read 64-bit components. + * 2. Shuffle the result of the load to form valid 64-bit elements + * 3. Emit a second load (for components z/w) if needed. + */ + fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(read_offset, offset_reg); + + int iters = num_components <= 2 ? 1 : 2; + + /* Load the dvec, the first iteration loads components x/y, the second + * iteration, if needed, loads components z/w + */ + for (int it = 0; it < iters; it++) { + /* Compute number of components to read in this iteration */ + int iter_components = MIN2(2, num_components); + num_components -= iter_components; + + /* Read. Since this message reads 32-bit components, we need to + * read twice as many components. + */ + fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset, + 1 /* dims */, + iter_components * 2, + BRW_PREDICATE_NONE); + + /* Shuffle the 32-bit load result into valid 64-bit data */ + const fs_reg packed_result = bld.vgrf(dest.type, iter_components); + shuffle_32bit_load_result_to_64bit_data( + bld, packed_result, read_result, iter_components); + + /* Move each component to its destination */ + read_result = retype(read_result, BRW_REGISTER_TYPE_DF); + for (int c = 0; c < iter_components; c++) { + bld.MOV(offset(dest, bld, it * 2 + c), + offset(packed_result, bld, c)); + } + + bld.ADD(read_offset, read_offset, brw_imm_ud(16)); + } + } else { + unreachable("Unsupported type"); + } +} + void fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)