nir: Add a new opcode for [un]packing doubles
HLSL doesn't support bitcasting a 64bit integer to a double. DXIL doesn't have generic pack/unpack instructions, so we lower those to integer bitwise ops. As a result, NIR generic double pack/unpack would require our backend to emit a bitcast to get a double, but we want to match HLSL semantics and emit MakeDouble/SplitDouble. Adding a dedicated opcode for double pack/unpack allows us to add a pass to emit that instead, which lets our backend emit the right instruction to pack and unpack doubles. Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10063>
This commit is contained in:
parent
bd219321a5
commit
3c8bcdc863
|
@ -241,6 +241,7 @@ lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_data)
|
||||||
case nir_op_unpack_64_2x32:
|
case nir_op_unpack_64_2x32:
|
||||||
case nir_op_unpack_64_4x16:
|
case nir_op_unpack_64_4x16:
|
||||||
case nir_op_unpack_32_2x16:
|
case nir_op_unpack_32_2x16:
|
||||||
|
case nir_op_unpack_double_2x32_dxil:
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
|
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
|
||||||
|
|
|
@ -1253,3 +1253,15 @@ binop("umul24", tint32, _2src_commutative + associative,
|
||||||
|
|
||||||
unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)")
|
unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)")
|
||||||
unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)")
|
unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)")
|
||||||
|
|
||||||
|
# DXIL specific double [un]pack
|
||||||
|
# DXIL doesn't support generic [un]pack instructions, so we want those
|
||||||
|
# lowered to bit ops. HLSL doesn't support 64bit bitcasts to/from
|
||||||
|
# double, only [un]pack. Technically DXIL does, but considering they
|
||||||
|
# can't be generated from HLSL, we want to match what would be coming from DXC.
|
||||||
|
# This is essentially just the standard [un]pack, except that it doesn't get
|
||||||
|
# lowered so we can handle it in the backend and turn it into MakeDouble/SplitDouble
|
||||||
|
unop_horiz("pack_double_2x32_dxil", 1, tuint64, 2, tuint32,
|
||||||
|
"dst.x = src0.x | ((uint64_t)src0.y << 32);")
|
||||||
|
unop_horiz("unpack_double_2x32_dxil", 2, tuint32, 1, tuint64,
|
||||||
|
"dst.x = src0.x; dst.y = src0.x >> 32;")
|
||||||
|
|
|
@ -1201,11 +1201,13 @@ optimizations.extend([
|
||||||
(('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
|
(('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
|
||||||
(('unpack_64_2x32', ('pack_64_2x32_split', a, b)), ('vec2', a, b)),
|
(('unpack_64_2x32', ('pack_64_2x32_split', a, b)), ('vec2', a, b)),
|
||||||
(('unpack_64_2x32', ('pack_64_2x32', a)), a),
|
(('unpack_64_2x32', ('pack_64_2x32', a)), a),
|
||||||
|
(('unpack_double_2x32_dxil', ('pack_double_2x32_dxil', a)), a),
|
||||||
(('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
|
(('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
|
||||||
('unpack_64_2x32_split_y', a)), a),
|
('unpack_64_2x32_split_y', a)), a),
|
||||||
(('pack_64_2x32', ('vec2', ('unpack_64_2x32_split_x', a),
|
(('pack_64_2x32', ('vec2', ('unpack_64_2x32_split_x', a),
|
||||||
('unpack_64_2x32_split_y', a))), a),
|
('unpack_64_2x32_split_y', a))), a),
|
||||||
(('pack_64_2x32', ('unpack_64_2x32', a)), a),
|
(('pack_64_2x32', ('unpack_64_2x32', a)), a),
|
||||||
|
(('pack_double_2x32_dxil', ('unpack_double_2x32_dxil', a)), a),
|
||||||
|
|
||||||
# Comparing two halves of an unpack separately. While this optimization
|
# Comparing two halves of an unpack separately. While this optimization
|
||||||
# should be correct for non-constant values, it's less obvious that it's
|
# should be correct for non-constant values, it's less obvious that it's
|
||||||
|
|
Loading…
Reference in New Issue