i965/fs: support doubles with shared variable stores
This is pretty much the same we do with SSBOs. v2: do not shuffle in-place, it is not safe since the original 64-bit data could be used after the write, instead use a temporary like we do for SSBO stores (Iago) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
943f9442bf
commit
8c6d147373
|
@ -3125,6 +3125,29 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
|||
/* Writemask */
|
||||
unsigned writemask = instr->const_index[1];
|
||||
|
||||
/* get_nir_src() retypes to integer. Be wary of 64-bit types though
|
||||
* since the untyped writes below operate in units of 32-bits, which
|
||||
* means that we need to write twice as many components each time.
|
||||
* Also, we have to suffle 64-bit data to be in the appropriate layout
|
||||
* expected by our 32-bit write messages.
|
||||
*/
|
||||
unsigned type_size = 4;
|
||||
unsigned bit_size = instr->src[0].is_ssa ?
|
||||
instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
|
||||
if (bit_size == 64) {
|
||||
type_size = 8;
|
||||
fs_reg tmp =
|
||||
fs_reg(VGRF, alloc.allocate(alloc.sizes[val_reg.nr]), val_reg.type);
|
||||
shuffle_64bit_data_for_32bit_write(
|
||||
bld,
|
||||
retype(tmp, BRW_REGISTER_TYPE_F),
|
||||
retype(val_reg, BRW_REGISTER_TYPE_DF),
|
||||
instr->num_components);
|
||||
val_reg = tmp;
|
||||
}
|
||||
|
||||
unsigned type_slots = type_size / 4;
|
||||
|
||||
/* Combine groups of consecutive enabled channels in one write
|
||||
* message. We use ffs to find the first enabled channel and then ffs on
|
||||
* the bit-inverse, down-shifted writemask to determine the length of
|
||||
|
@ -3133,22 +3156,29 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
|||
while (writemask) {
|
||||
unsigned first_component = ffs(writemask) - 1;
|
||||
unsigned length = ffs(~(writemask >> first_component)) - 1;
|
||||
fs_reg offset_reg;
|
||||
|
||||
/* We can't write more than 2 64-bit components at once. Limit the
|
||||
* length of the write to what we can do and let the next iteration
|
||||
* handle the rest
|
||||
*/
|
||||
if (type_size > 4)
|
||||
length = MIN2(2, length);
|
||||
|
||||
fs_reg offset_reg;
|
||||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
|
||||
if (const_offset) {
|
||||
offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
|
||||
4 * first_component);
|
||||
type_size * first_component);
|
||||
} else {
|
||||
offset_reg = vgrf(glsl_type::uint_type);
|
||||
bld.ADD(offset_reg,
|
||||
retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(instr->const_index[0] + 4 * first_component));
|
||||
brw_imm_ud(instr->const_index[0] + type_size * first_component));
|
||||
}
|
||||
|
||||
emit_untyped_write(bld, surf_index, offset_reg,
|
||||
offset(val_reg, bld, first_component),
|
||||
1 /* dims */, length,
|
||||
offset(val_reg, bld, first_component * type_slots),
|
||||
1 /* dims */, length * type_slots,
|
||||
BRW_PREDICATE_NONE);
|
||||
|
||||
/* Clear the bits in the writemask that we just wrote, then try
|
||||
|
|
Loading…
Reference in New Issue