nir: Add lowering support for packing opcodes.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
1dc312e295
commit
9b8786eba9
|
@ -1469,6 +1469,10 @@ typedef struct nir_shader_compiler_options {
|
|||
bool lower_ffract;
|
||||
|
||||
bool lower_pack_half_2x16;
|
||||
bool lower_pack_unorm_2x16;
|
||||
bool lower_pack_snorm_2x16;
|
||||
bool lower_pack_unorm_4x8;
|
||||
bool lower_pack_snorm_4x8;
|
||||
bool lower_unpack_half_2x16;
|
||||
|
||||
bool lower_extract_byte;
|
||||
|
|
|
@ -134,6 +134,38 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
return;
|
||||
}
|
||||
|
||||
case nir_op_pack_uvec2_to_uint: {
|
||||
assert(b->shader->options->lower_pack_snorm_2x16 ||
|
||||
b->shader->options->lower_pack_unorm_2x16);
|
||||
|
||||
nir_ssa_def *word =
|
||||
nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
|
||||
nir_channel(b, word, 0));
|
||||
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
|
||||
nir_instr_remove(&instr->instr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_pack_uvec4_to_uint: {
|
||||
assert(b->shader->options->lower_pack_snorm_4x8 ||
|
||||
b->shader->options->lower_pack_unorm_4x8);
|
||||
|
||||
nir_ssa_def *byte =
|
||||
nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
|
||||
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
|
||||
nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)),
|
||||
nir_channel(b, byte, 0)));
|
||||
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
|
||||
nir_instr_remove(&instr->instr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_fdph: {
|
||||
nir_ssa_def *sum[4];
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
|
|
|
@ -237,6 +237,16 @@ unpack_2x16("unorm")
|
|||
unpack_4x8("unorm")
|
||||
unpack_2x16("half")
|
||||
|
||||
unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
|
||||
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
|
||||
""")
|
||||
|
||||
unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
|
||||
dst.x = (src0.x << 0) |
|
||||
(src0.y << 8) |
|
||||
(src0.z << 16) |
|
||||
(src0.w << 24);
|
||||
""")
|
||||
|
||||
# Lowered floating point unpacking operations.
|
||||
|
||||
|
|
|
@ -258,6 +258,26 @@ optimizations = [
|
|||
(('extract_u16', a, b),
|
||||
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
|
||||
'options->lower_extract_word'),
|
||||
|
||||
(('pack_unorm_2x16', 'v'),
|
||||
('pack_uvec2_to_uint',
|
||||
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
|
||||
'options->lower_pack_unorm_2x16'),
|
||||
|
||||
(('pack_unorm_4x8', 'v'),
|
||||
('pack_uvec4_to_uint',
|
||||
('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
|
||||
'options->lower_pack_unorm_4x8'),
|
||||
|
||||
(('pack_snorm_2x16', 'v'),
|
||||
('pack_uvec2_to_uint',
|
||||
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
|
||||
'options->lower_pack_snorm_2x16'),
|
||||
|
||||
(('pack_snorm_4x8', 'v'),
|
||||
('pack_uvec4_to_uint',
|
||||
('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
|
||||
'options->lower_pack_snorm_4x8'),
|
||||
]
|
||||
|
||||
# Add optimizations to handle the case where the result of a ternary is
|
||||
|
|
Loading…
Reference in New Issue