From 15d08a06e28f0b7d674b28aee364f24481851604 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 7 Oct 2020 14:46:34 +0100 Subject: [PATCH] aco/tests: expand optimize.const_comparison_ordering tests Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/compiler/tests/test_optimizer.cpp | 81 +++++++++++++++++++---- 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 46b6fed36f3..a9e74544235 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -343,26 +343,83 @@ BEGIN_TEST(optimize.const_comparison_ordering) bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0]))); - /* NaN */ - uint16_t nan16 = 0x7e00; - uint32_t nan32 = 0x7fc00000; - - //! s2: %tmp6_0 = v_cmp_lt_f16 0x7e00, %a - //! s2: %tmp6_1 = v_cmp_neq_f16 %a, %a - //! s2: %res6, s1: %_:scc = s_or_b64 %tmp6_1, %tmp6_0 + /* similar but unoptimizable expressions */ + //! s2: %tmp6_0 = v_cmp_lt_f32 4.0, %a + //! s2: %tmp6_1 = v_cmp_neq_f32 %a, %a + //! s2: %res6, s1: %_:scc = s_and_b64 %tmp6_1, %tmp6_0 //! p_unit_test 6, %res6 - writeout(6, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), - bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0]))); + writeout(6, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); - //! s2: %tmp7_0 = v_cmp_lt_f32 0x7fc00000, %a - //! s2: %tmp7_1 = v_cmp_neq_f32 %a, %a + //! s2: %tmp7_0 = v_cmp_nge_f32 4.0, %a + //! s2: %tmp7_1 = v_cmp_eq_f32 %a, %a //! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0 //! p_unit_test 7, %res7 writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + //! s2: %tmp8_0 = v_cmp_lt_f32 4.0, %d + //! s2: %tmp8_1 = v_cmp_neq_f32 %a, %a + //! s2: %res8, s1: %_:scc = s_or_b64 %tmp8_1, %tmp8_0 + //! p_unit_test 8, %res8 + writeout(8, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[3]))); + + //! s2: %tmp9_0 = v_cmp_lt_f32 4.0, %a + //! s2: %tmp9_1 = v_cmp_neq_f32 %a, %d + //! s2: %res9, s1: %_:scc = s_or_b64 %tmp9_1, %tmp9_0 + //! p_unit_test 9, %res9 + writeout(9, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[3]), + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + + /* bit sizes */ + //! v2b: %b16 = p_extract_vector %b, 0 + //! s2: %res10 = v_cmp_nge_f16 4.0, %b16 + //! p_unit_test 10, %res10 + Temp input1_16 = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand(0u)); + writeout(10, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), input1_16, input1_16), + bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand((uint16_t)0x4400u), input1_16))); + + //! s2: %res11 = v_cmp_nge_f64 4.0, %c + //! p_unit_test 11, %res11 + writeout(11, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[2], inputs[2]), + bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(0x4010000000000000u), inputs[2]))); + + /* NaN */ + uint16_t nan16 = 0x7e00; + uint32_t nan32 = 0x7fc00000; + uint64_t nan64 = 0xffffffffffffffffllu; + + //! s2: %tmp12_0 = v_cmp_lt_f16 0x7e00, %a + //! s2: %tmp12_1 = v_cmp_neq_f16 %a, %a + //! s2: %res12, s1: %_:scc = s_or_b64 %tmp12_1, %tmp12_0 + //! p_unit_test 12, %res12 + writeout(12, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0]))); + + //! s2: %tmp13_0 = v_cmp_lt_f32 0x7fc00000, %a + //! s2: %tmp13_1 = v_cmp_neq_f32 %a, %a + //! s2: %res13, s1: %_:scc = s_or_b64 %tmp13_1, %tmp13_0 + //! p_unit_test 13, %res13 + writeout(13, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0]))); + //! s2: %tmp14_0 = v_cmp_lt_f64 -1, %a + //! s2: %tmp14_1 = v_cmp_neq_f64 %a, %a + //! s2: %res14, s1: %_:scc = s_or_b64 %tmp14_1, %tmp14_0 + //! p_unit_test 14, %res14 + writeout(14, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), + bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[0], inputs[0]), + bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(nan64), inputs[0]))); + finish_opt_test(); END_TEST