From 9845c1636ccab18d9294bb36bc56c17c6a02f706 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 9 Oct 2020 07:13:45 +1000 Subject: [PATCH] gallivm: add support for 8/16-bit mul_hi This 32x32 code only needs small tweaks for this case. Reviewed-by: Roland Scheidegger Part-of: --- .gitlab-ci/piglit/cl.txt | 69 +-------------------- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 11 ++-- 2 files changed, 9 insertions(+), 71 deletions(-) diff --git a/.gitlab-ci/piglit/cl.txt b/.gitlab-ci/piglit/cl.txt index d0073d85ffd..a71928b7d04 100644 --- a/.gitlab-ci/piglit/cl.txt +++ b/.gitlab-ci/piglit/cl.txt @@ -51,16 +51,6 @@ program/execute/atomic_int64_xchg-local: skip program/execute/atomic_int64_xor-global: skip program/execute/atomic_int64_xor-global-return: skip program/execute/atomic_int64_xor-local: skip -program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char1: fail -program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char16: fail -program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char2: fail -program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char4: fail -program/execute/builtin/builtin-char-mad_hi-1.0.generated/mad_hi char8: fail -program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char1: fail -program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char16: fail -program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char2: fail -program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char4: fail -program/execute/builtin/builtin-char-mul_hi-1.0.generated/mul_hi char8: fail program/execute/builtin/builtin-char-popcount-1.2.generated: skip program/execute/builtin/builtin-float-cos-1.0.generated: timeout program/execute/builtin/builtin-float-fmax-1.0.generated/fmax float1: fail @@ -99,68 +89,13 @@ program/execute/builtin/builtin-float-sin-1.0.generated: timeout program/execute/builtin/builtin-float-sincos-1.0.generated: timeout program/execute/builtin/builtin-float-tan-1.0.generated: timeout program/execute/builtin/builtin-int-popcount-1.2.generated: skip -program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long1: fail -program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long16: fail -program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long2: fail -program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long4: fail -program/execute/builtin/builtin-long-mad_hi-1.0.generated/mad_hi long8: fail -program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long1: fail -program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long16: fail -program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long2: fail -program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long4: fail -program/execute/builtin/builtin-long-mad_sat-1.0.generated/mad_sat long8: fail -program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long1: fail -program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long16: fail -program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long2: fail -program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long4: fail -program/execute/builtin/builtin-long-mul_hi-1.0.generated/mul_hi long8: fail program/execute/builtin/builtin-long-popcount-1.2.generated: skip -program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short1: fail -program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short16: fail -program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short2: fail -program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short4: fail -program/execute/builtin/builtin-short-mad_hi-1.0.generated/mad_hi short8: fail -program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short1: fail -program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short16: fail -program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short2: fail -program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short4: fail -program/execute/builtin/builtin-short-mul_hi-1.0.generated/mul_hi short8: fail program/execute/builtin/builtin-short-popcount-1.2.generated: skip program/execute/builtin/builtin-shuffle-half-ushort: skip program/execute/builtin/builtin-shuffle2-half-ushort: skip -program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar1: fail -program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar16: fail -program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar2: fail -program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar4: fail -program/execute/builtin/builtin-uchar-mad_hi-1.0.generated/mad_hi uchar8: fail -program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar1: fail -program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar16: fail -program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar2: fail -program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar4: fail -program/execute/builtin/builtin-uchar-mul_hi-1.0.generated/mul_hi uchar8: fail program/execute/builtin/builtin-uchar-popcount-1.2.generated: skip program/execute/builtin/builtin-uint-popcount-1.2.generated: skip -program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong1: fail -program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong16: fail -program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong2: fail -program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong4: fail -program/execute/builtin/builtin-ulong-mad_hi-1.0.generated/mad_hi ulong8: fail -program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong1: fail -program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong16: fail -program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong2: fail -program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong4: fail -program/execute/builtin/builtin-ulong-mul_hi-1.0.generated/mul_hi ulong8: fail program/execute/builtin/builtin-ulong-popcount-1.2.generated: skip -program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort1: fail -program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort16: fail -program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort2: fail -program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort4: fail -program/execute/builtin/builtin-ushort-mad_hi-1.0.generated/mad_hi ushort8: fail -program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort1: fail -program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort16: fail -program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort2: fail -program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort4: fail -program/execute/builtin/builtin-ushort-mul_hi-1.0.generated/mul_hi ushort8: fail program/execute/builtin/builtin-ushort-popcount-1.2.generated: skip program/execute/call-clobbers-amdgcn: skip program/execute/calls-large-struct: crash @@ -191,8 +126,8 @@ program/execute/vstore/vstore-half-private: skip summary: name: results ---- -------- - pass: 3672 - fail: 107 + pass: 3737 + fail: 42 crash: 6 skip: 73 timeout: 4 diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 717ab14c961..165d73d94fc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1178,8 +1178,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, /* - * Widening mul, valid for 32x32 bit -> 64bit only. - * Result is low 32bits, high bits returned in res_hi. + * Widening mul, valid for <= 32 (8, 16, 32) -> 64 + * Result is low N bits, high bits returned in res_hi. * * Emits generic code. */ @@ -1197,9 +1197,12 @@ lp_build_mul_32_lohi(struct lp_build_context *bld, type_tmp = bld->type; narrow_type = lp_build_vec_type(gallivm, type_tmp); - type_tmp.width *= 2; + if (bld->type.width < 32) + type_tmp.width = 32; + else + type_tmp.width *= 2; wide_type = lp_build_vec_type(gallivm, type_tmp); - shift = lp_build_const_vec(gallivm, type_tmp, 32); + shift = lp_build_const_vec(gallivm, type_tmp, bld->type.width); if (bld->type.sign) { a = LLVMBuildSExt(builder, a, wide_type, "");