-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path0001-sin-cos-slow-paths-avoid-slow-paths-for-small-inputs.patch
196 lines (175 loc) · 5.41 KB
/
0001-sin-cos-slow-paths-avoid-slow-paths-for-small-inputs.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
From 3eaef825303efc4175751d3ca8cb50d873ddb081 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <[email protected]>
Date: Wed, 21 Mar 2018 17:50:07 +0000
Subject: [PATCH 1/7] sin/cos slow paths: avoid slow paths for small inputs
This series of patches removes the slow patchs from sin, cos and sincos.
Besides greatly simplifying the implementation, the new version is also much
faster for inputs up to PI (41% faster) and for large inputs needing range
reduction (27% faster).
ULP is ~0.55 with no errors found after testing 1.6 billion inputs across most
of the range with mpsin and mpcos. The number of incorrectly rounded results
(ie. ULP >0.5) is at most ~2750 per million inputs between 0.125 and 0.5,
the average is ~850 per million between 0 and PI.
Tested on AArch64 and x86_64 with no regressions.
The first patch removes the slow paths for the cases where the input is small
and doesn't require range reduction. Update ULP tables for sin, cos and sincos
on AArch64 and x86_64.
ChangeLog:
2018-03-20 Wilco Dijkstra <[email protected]>
* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small inputs.
(__cos): Likewise.
* sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sin, cos, sincos.
---
sysdeps/aarch64/libm-test-ulps | 6 ++++++
sysdeps/ieee754/dbl-64/s_sin.c | 40 ++++++++++++++-------------------------
sysdeps/x86_64/fpu/libm-test-ulps | 6 ++++++
3 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1f469803be5..be06085154d 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1012,7 +1012,9 @@ ildouble: 2
ldouble: 2
Function: "cos":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
@@ -1970,7 +1972,9 @@ ildouble: 2
ldouble: 2
Function: "sin":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
@@ -2000,7 +2004,9 @@ ildouble: 3
ldouble: 3
Function: "sincos":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 8c589cbd4ab..0c16b728df1 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -448,7 +448,7 @@ SECTION
#endif
__sin (double x)
{
- double xx, res, t, cor;
+ double xx, t, cor;
mynumber u;
int4 k, m;
double retval = 0;
@@ -471,26 +471,22 @@ __sin (double x)
xx = x * x;
/* Taylor series. */
t = POLYNOMIAL (xx) * (xx * x);
- res = x + t;
- cor = (x - res) + t;
- retval = (res == res + 1.07 * cor) ? res : slow (x);
+ /* Max ULP of x + t is 0.535. */
+ retval = x + t;
} /* else if (k < 0x3fd00000) */
/*---------------------------- 0.25<|x|< 0.855469---------------------- */
else if (k < 0x3feb6000)
{
- res = do_sin (x, 0, &cor);
- retval = (res == res + 1.096 * cor) ? res : slow1 (x);
- retval = __copysign (retval, x);
+ /* Max ULP is 0.548. */
+ retval = __copysign (do_sin (x, 0, &cor), x);
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
else if (k < 0x400368fd)
{
-
t = hp0 - fabs (x);
- res = do_cos (t, hp1, &cor);
- retval = (res == res + 1.020 * cor) ? res : slow2 (x);
- retval = __copysign (retval, x);
+ /* Max ULP is 0.51. */
+ retval = __copysign (do_cos (t, hp1, &cor), x);
} /* else if (k < 0x400368fd) */
#ifndef IN_SINCOS
@@ -541,7 +537,7 @@ SECTION
#endif
__cos (double x)
{
- double y, xx, res, cor, a, da;
+ double y, xx, cor, a, da;
mynumber u;
int4 k, m;
@@ -561,8 +557,8 @@ __cos (double x)
else if (k < 0x3feb6000)
{ /* 2^-27 < |x| < 0.855469 */
- res = do_cos (x, 0, &cor);
- retval = (res == res + 1.020 * cor) ? res : cslow2 (x);
+ /* Max ULP is 0.51. */
+ retval = do_cos (x, 0, &cor);
} /* else if (k < 0x3feb6000) */
else if (k < 0x400368fd)
@@ -571,20 +567,12 @@ __cos (double x)
a = y + hp1;
da = (y - a) + hp1;
xx = a * a;
+ /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
+ Range reduction uses 106 bits here which is sufficient. */
if (xx < 0.01588)
- {
- res = TAYLOR_SIN (xx, a, da, cor);
- cor = 1.02 * cor + __copysign (1.0e-31, cor);
- retval = (res == res + cor) ? res : sloww (a, da, x, true);
- }
+ retval = TAYLOR_SIN (xx, a, da, cor);
else
- {
- res = do_sin (a, da, &cor);
- cor = 1.035 * cor + __copysign (1.0e-31, cor);
- retval = ((res == res + cor) ? __copysign (res, a)
- : sloww1 (a, da, x, true));
- }
-
+ retval = __copysign (do_sin (a, da, &cor), a);
} /* else if (k < 0x400368fd) */
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 48e53f7ef2c..bbb8a4d0754 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1262,7 +1262,9 @@ ildouble: 1
ldouble: 1
Function: "cos":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
@@ -2528,7 +2530,9 @@ Function: "pow_vlen8_avx2":
float: 3
Function: "sin":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
@@ -2578,7 +2582,9 @@ Function: "sin_vlen8_avx2":
float: 1
Function: "sincos":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
--
2.16.2