When FLT_EVAL_METHOD!=0 (only i386 with x87 fp) the excess
precision of an expression must be removed in an assignment.
(gcc needs -fexcess-precision=standard or -std=c99 for this)
This is done by extra load/store instructions which adds code
bloat when lot of temporaries are used and it makes the result
less precise in many cases.
Using double_t and float_t avoids these issues on i386 and
it makes no difference on other archs.
For now only a few functions are modified where the excess
precision is clearly beneficial (mostly polynomial evaluations
with temporaries).
object size differences on i386, gcc-4.8:
old new
__cosdf.o 123 95
__cos.o 199 169
__sindf.o 131 95
__sin.o 225 203
__tandf.o 207 151
__tan.o 605 499
erff.o 1470 1416
erf.o 1703 1649
j0f.o 1779 1745
j0.o 2308 2274
j1f.o 1602 1568
j1.o 2286 2252
tgamma.o 1431 1424
math/*.o 64164 63635
36 lines
1.1 KiB
C
36 lines
1.1 KiB
C
/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
|
|
/*
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
* Optimized by Bruce D. Evans.
|
|
*/
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#include "libm.h"
|
|
|
|
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
|
|
static const double
|
|
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
|
|
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
|
|
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
|
|
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
|
|
|
|
float __sindf(double x)
|
|
{
|
|
double_t r, s, w, z;
|
|
|
|
/* Try to optimize for parallel evaluation as in __tandf.c. */
|
|
z = x*x;
|
|
w = z*z;
|
|
r = S3 + z*S4;
|
|
s = z*x;
|
|
return (x + s*(S1 + z*S2)) + s*w*r;
|
|
}
|