diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index f5884cef702..e72f17f99c7 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -241,6 +241,7 @@ lower_alu_instr_scalar(nir_builder *b, nir_instr *instr, void *_data) case nir_op_unpack_64_2x32: case nir_op_unpack_64_4x16: case nir_op_unpack_32_2x16: + case nir_op_unpack_double_2x32_dxil: return NULL; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 6966eeda4be..bf5b901b4e0 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1253,3 +1253,15 @@ binop("umul24", tint32, _2src_commutative + associative, unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)") unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)") + +# DXIL specific double [un]pack +# DXIL doesn't support generic [un]pack instructions, so we want those +# lowered to bit ops. HLSL doesn't support 64bit bitcasts to/from +# double, only [un]pack. Technically DXIL does, but considering they +# can't be generated from HLSL, we want to match what would be coming from DXC. +# This is essentially just the standard [un]pack, except that it doesn't get +# lowered so we can handle it in the backend and turn it into MakeDouble/SplitDouble +unop_horiz("pack_double_2x32_dxil", 1, tuint64, 2, tuint32, + "dst.x = src0.x | ((uint64_t)src0.y << 32);") +unop_horiz("unpack_double_2x32_dxil", 2, tuint32, 1, tuint64, + "dst.x = src0.x; dst.y = src0.x >> 32;") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 00fcbe81d71..e86aa4a65c1 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1201,11 +1201,13 @@ optimizations.extend([ (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b), (('unpack_64_2x32', ('pack_64_2x32_split', a, b)), ('vec2', a, b)), (('unpack_64_2x32', ('pack_64_2x32', a)), a), + (('unpack_double_2x32_dxil', ('pack_double_2x32_dxil', a)), a), (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), a), (('pack_64_2x32', ('vec2', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a))), a), (('pack_64_2x32', ('unpack_64_2x32', a)), a), + (('pack_double_2x32_dxil', ('unpack_double_2x32_dxil', a)), a), # Comparing two halves of an unpack separately. While this optimization # should be correct for non-constant values, it's less obvious that it's