i965/fs: Add do_untyped_vector_read helper

We are going to need the same logic for anything that reads
doubles via untyped messages (CS shared variables and SSBOs). Add a
helper function with that logic so that we can reuse it.

v2:
- Make this a static function instead of a method of fs_visitor (Iago)
- We only support types with a size of 4 or 8 (Curro)
- Avoid retypes by using a separate vgrf for the packed result (Curro)
- Put dst parameter before source parameters (Curro)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
Iago Toral Quiroga 2016-05-05 12:55:44 +02:00 committed by Samuel Iglesias Gonsálvez
parent b86d4780ed
commit 6eab06b866
1 changed files with 63 additions and 0 deletions

View File

@ -2131,6 +2131,69 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
return get_nir_src(*offset_src);
}
static void
do_untyped_vector_read(const fs_builder &bld,
const fs_reg dest,
const fs_reg surf_index,
const fs_reg offset_reg,
unsigned num_components)
{
if (type_sz(dest.type) == 4) {
fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
1 /* dims */,
num_components,
BRW_PREDICATE_NONE);
read_result.type = dest.type;
for (unsigned i = 0; i < num_components; i++)
bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
} else if (type_sz(dest.type) == 8) {
/* Reading a dvec, so we need to:
*
* 1. Multiply num_components by 2, to account for the fact that we
* need to read 64-bit components.
* 2. Shuffle the result of the load to form valid 64-bit elements
* 3. Emit a second load (for components z/w) if needed.
*/
fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.MOV(read_offset, offset_reg);
int iters = num_components <= 2 ? 1 : 2;
/* Load the dvec, the first iteration loads components x/y, the second
* iteration, if needed, loads components z/w
*/
for (int it = 0; it < iters; it++) {
/* Compute number of components to read in this iteration */
int iter_components = MIN2(2, num_components);
num_components -= iter_components;
/* Read. Since this message reads 32-bit components, we need to
* read twice as many components.
*/
fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset,
1 /* dims */,
iter_components * 2,
BRW_PREDICATE_NONE);
/* Shuffle the 32-bit load result into valid 64-bit data */
const fs_reg packed_result = bld.vgrf(dest.type, iter_components);
shuffle_32bit_load_result_to_64bit_data(
bld, packed_result, read_result, iter_components);
/* Move each component to its destination */
read_result = retype(read_result, BRW_REGISTER_TYPE_DF);
for (int c = 0; c < iter_components; c++) {
bld.MOV(offset(dest, bld, it * 2 + c),
offset(packed_result, bld, c));
}
bld.ADD(read_offset, read_offset, brw_imm_ud(16));
}
} else {
unreachable("Unsupported type");
}
}
void
fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)