util: Use SSE intrinsics in _mesa_lroundeven{f,}.

gcc actually generates this for us now that we use -fno-math-errno
(which is weird, since lrintf()/lrint() don't set errno) but clang still
does not. Presumably helps MSVC as well.

Reduced .text size by 8.5k with gcc before -fno-math-errno.

   text     data      bss      dec      hex  filename
4935850   195136    26192  5157178   4eb13a  i965_dri.so before
4927225   195128    26192  5148545   4e8f81  i965_dri.so after

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
Matt Turner 2015-06-29 09:38:34 -07:00
parent 3c050222b0
commit 680de24545
1 changed files with 22 additions and 0 deletions

View File

@ -25,6 +25,12 @@
#define _ROUNDING_H
#include <math.h>
#include <limits.h>
#ifdef __x86_64__
#include <xmmintrin.h>
#include <emmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
@ -87,7 +93,15 @@ _mesa_roundeven(double x)
static inline long
_mesa_lroundevenf(float x)
{
#ifdef __x86_64__
#if LONG_BIT == 64
return _mm_cvtss_si64(_mm_load_ss(&x));
#elif LONG_BIT == 32
return _mm_cvtss_si32(_mm_load_ss(&x));
#endif
#else
return lrintf(x);
#endif
}
/**
@ -97,7 +111,15 @@ _mesa_lroundevenf(float x)
static inline long
_mesa_lroundeven(double x)
{
#ifdef __x86_64__
#if LONG_BIT == 64
return _mm_cvtsd_si64(_mm_load_sd(&x));
#elif LONG_BIT == 32
return _mm_cvtsd_si32(_mm_load_sd(&x));
#endif
#else
return lrint(x);
#endif
}
#endif