* faster, smaller, cleaner implementation than the bit hacks of fdlibm * use arithmetics like y=(double)(x+0x1p52)-0x1p52, which is an integer neighbor of x in all rounding modes (0<=x<0x1p52) and only use bithacks when that's faster and smaller (for float it usually is) * the code assumes standard excess precision handling for casts * long double code supports both ld80 and ld128 * nearbyint is not changed (it is a wrapper around rint)
19 lines
303 B
C
19 lines
303 B
C
#include "libm.h"
|
|
|
|
double trunc(double x)
|
|
{
|
|
union {double f; uint64_t i;} u = {x};
|
|
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
|
|
uint64_t m;
|
|
|
|
if (e >= 52 + 12)
|
|
return x;
|
|
if (e < 12)
|
|
e = 1;
|
|
m = -1ULL >> e;
|
|
if ((u.i & m) == 0)
|
|
return x;
|
|
FORCE_EVAL(x + 0x1p120f);
|
|
u.i &= ~m;
|
|
return u.f;
|
|
}
|