1/* Optimized sincosf function.
2 Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include <errno.h>
21
22/* Short algorithm description:
23 *
24 * 1) if |x|==0: sin(x)=x,
25 * cos(x)=1.
26 * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
27 * cos(x)=1-|x|.
28 * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
29 * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
30 * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
31 * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
32 * 5) if |x| < 9*Pi/4:
33 * 5.1) Range reduction:
34 * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
35 * 5.2) Reconstruction:
36 * sign_sin = sign(x) * (-1.0)^(( n >>2)&1)
37 * sign_cos = (-1.0)^(((n+2)>>2)&1)
38 * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
39 * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
40 * if(n&2 != 0) {
41 * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
42 * cos(x) = poly_sin * sign_cos
43 * sin(x) = poly_cos * sign_sin
44 * } else {
45 * sin(x) = poly_sin * sign_sin
46 * cos(x) = poly_cos * sign_cos
47 * }
48 * 6) if |x| < 2^23, large args:
49 * 6.1) Range reduction:
50 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
51 * 6.2) Reconstruction same as (5.2).
52 * 7) if |x| >= 2^23, very large args:
53 * 7.1) Range reduction:
54 * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
55 * 7.2) Reconstruction same as (5.2).
56 * 8) if x is Inf, return x-x, and set errno=EDOM.
57 * 9) if x is NaN, return x-x.
58 *
59 * Special cases:
60 * sin/cos(+-0) = +-0/1 not raising inexact/underflow,
61 * sin/cos(subnormal) raises inexact/underflow,
62 * sin/cos(min_normalized) raises inexact/underflow,
63 * sin/cos(normalized) raises inexact,
64 * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
65 * sin/cos(NaN) = NaN.
66 */
67
68# define ARG_SIN_PTR %rdi
69# define ARG_COS_PTR %rsi
70
71 .text
72ENTRY(__sincosf)
73 /* Input: %xmm0 contains single precision argument x */
74 /* %rdi points to sin result */
75 /* %rsi points to cos result */
76
77 movd %xmm0, %eax /* Bits of x */
78 movaps %xmm0, %xmm7 /* Copy of x */
79 cvtss2sd %xmm0, %xmm0 /* DP x */
80 movss L(SP_ABS_MASK)(%rip), %xmm3
81 movl %eax, %r8d /* Copy of x bits */
82 andl $0x7fffffff, %eax /* |x| */
83
84 cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */
85 jb L(arg_less_pio4)
86
87 /* Here if |x|>=Pi/4 */
88 andps %xmm7, %xmm3 /* SP |x| */
89 andpd L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */
90 movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
91
92 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */
93 jae L(large_args)
94
95 /* Here if Pi/4<=|x|<9*Pi/4 */
96 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
97 movl %r8d, %ecx /* Load x */
98 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
99 lea L(PIO4J)(%rip), %r9
100 shrl $29, %ecx /* (sign of x) << 2 */
101 addl $1, %eax /* k+1 */
102 movl $0x0e, %edx
103 andl %eax, %edx /* j = (k+1)&0x0e */
104 subsd (%r9,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */
105
106L(reconstruction):
107 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
108
109 movaps %xmm0, %xmm4 /* t */
110 movhpd L(DP_ONES)(%rip), %xmm4 /* 1|t */
111 mulsd %xmm0, %xmm0 /* y=t^2 */
112 movl $2, %edx
113 unpcklpd %xmm0, %xmm0 /* y|y */
114 addl %eax, %edx /* k+2 */
115 movaps %xmm0, %xmm1 /* y|y */
116 mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */
117
118 movaps L(DP_SC4)(%rip), %xmm2 /* S4 */
119 mulpd %xmm0, %xmm2 /* z*S4 */
120 movaps L(DP_SC3)(%rip), %xmm3 /* S3 */
121 mulpd %xmm0, %xmm3 /* z*S3 */
122 xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */
123 addpd L(DP_SC2)(%rip), %xmm2 /* S2+z*S4 */
124 mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */
125 shrl $2, %edx /* (k+2)>>2 */
126 addpd L(DP_SC1)(%rip), %xmm3 /* S1+z*S3 */
127 mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */
128 shrl $2, %ecx /* sign_x ^ k>>2 */
129 addpd L(DP_SC0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */
130 andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */
131 mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
132 andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */
133 addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
134 lea L(DP_ONES)(%rip), %r9
135 mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
136 testl $2, %eax /* n&2 != 0 ? */
137 addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
138 jnz L(sin_result_sin_poly)
139
140/*L(sin_result_cos_poly):*/
141 /*
142 * Here if
143 * cos(x) = poly_sin * sign_cos
144 * sin(x) = poly_cos * sign_sin
145 */
146 movsd (%r9,%rcx,8), %xmm4 /* 0|sign_sin */
147 movhpd (%r9,%rdx,8), %xmm4 /* sign_cos|sign_sin */
148 mulpd %xmm4, %xmm3 /* result_cos|result_sin */
149 cvtpd2ps %xmm3, %xmm0 /* SP results */
150 movss %xmm0, (ARG_SIN_PTR) /* store sin(x) from xmm0[0] */
151 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
152 movss %xmm0, (ARG_COS_PTR) /* store cos(x) */
153 ret
154
155 .p2align 4
156L(sin_result_sin_poly):
157 /*
158 * Here if
159 * sin(x) = poly_sin * sign_sin
160 * cos(x) = poly_cos * sign_cos
161 */
162 movsd (%r9,%rdx,8), %xmm4 /* 0|sign_cos */
163 movhpd (%r9,%rcx,8), %xmm4 /* sign_sin|sign_cos */
164 mulpd %xmm4, %xmm3 /* result_sin|result_cos */
165 cvtpd2ps %xmm3, %xmm0 /* SP results */
166 movss %xmm0, (ARG_COS_PTR) /* store cos(x) from xmm0[0] */
167 shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */
168 movss %xmm0, (ARG_SIN_PTR) /* store sin(x) */
169 ret
170
171 .p2align 4
172L(large_args):
173 /* Here if |x|>=9*Pi/4 */
174 cmpl $0x7f800000, %eax /* x is Inf or NaN ? */
175 jae L(arg_inf_or_nan)
176
177 /* Here if finite |x|>=9*Pi/4 */
178 cmpl $0x4b000000, %eax /* |x|<2^23 ? */
179 jae L(very_large_args)
180
181 /* Here if 9*Pi/4<=|x|<2^23 */
182 movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
183 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
184 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
185 addl $1, %eax /* k+1 */
186 movl %eax, %edx
187 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
188 cvtsi2sdl %edx, %xmm4 /* DP j */
189 movl %r8d, %ecx /* Load x */
190 movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
191 shrl $29, %ecx /* (sign of x) << 2 */
192 mulsd %xmm4, %xmm2 /* -j*PIO4HI */
193 movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
194 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
195 mulsd %xmm3, %xmm4 /* j*PIO4LO */
196 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
197 jmp L(reconstruction)
198
199 .p2align 4
200L(very_large_args):
201 /* Here if finite |x|>=2^23 */
202
203 /* bitpos = (ix>>23) - BIAS_32 + 59; */
204 shrl $23, %eax /* eb = biased exponent of x */
205 subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */
206 /*is exponent bias */
207 movl $28, %ecx /* %cl=28 */
208 movl %eax, %edx /* bitpos copy */
209
210 /* j = bitpos/28; */
211 div %cl /* j in register %al=%ax/%cl */
212 movapd %xmm0, %xmm3 /* |x| */
213 andl $0xff, %eax /* clear unneeded remainder from %ah*/
214
215 imull $28, %eax, %ecx /* j*28 */
216 lea L(_FPI)(%rip), %r9
217 movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
218 movapd %xmm0, %xmm5 /* |x| */
219 mulsd -16(%r9,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
220 movapd %xmm0, %xmm1 /* |x| */
221 mulsd -8(%r9,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
222 mulsd (%r9,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
223 addl $19, %ecx /* j*28+19 */
224 mulsd 8(%r9,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
225 cmpl %ecx, %edx /* bitpos>=j*28+19 ? */
226 jl L(very_large_skip1)
227
228 /* Here if bitpos>=j*28+19 */
229 andpd %xmm3, %xmm4 /* HI(tmp3) */
230 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
231L(very_large_skip1):
232
233 movsd L(DP_2POW52)(%rip), %xmm6
234 movapd %xmm5, %xmm2 /* tmp2 copy */
235 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
236 movl $1, %edx
237 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
238 movsd 8+L(DP_2POW52)(%rip), %xmm4
239 movd %xmm6, %eax /* k = I64_LO(tmp6); */
240 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
241 movl %r8d, %ecx /* Load x */
242 comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */
243 jbe L(very_large_skip2)
244
245 /* Here if tmp4 > tmp5 */
246 subl $1, %eax /* k-- */
247 addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
248L(very_large_skip2):
249
250 andl %eax, %edx /* k&1 */
251 lea L(DP_ZERONE)(%rip), %r9
252 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
253 addsd (%r9,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
254 addsd %xmm2, %xmm3 /* t += tmp2 */
255 shrl $29, %ecx /* (sign of x) << 2 */
256 addsd %xmm3, %xmm0 /* t += tmp0 */
257 addl $1, %eax /* n=k+1 */
258 addsd %xmm1, %xmm0 /* t += tmp1 */
259 mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */
260
261 jmp L(reconstruction) /* end of very_large_args peth */
262
263 .p2align 4
264L(arg_less_pio4):
265 /* Here if |x|<Pi/4 */
266 cmpl $0x3d000000, %eax /* |x|<2^-5 ? */
267 jl L(arg_less_2pn5)
268
269 /* Here if 2^-5<=|x|<Pi/4 */
270 movaps %xmm0, %xmm3 /* DP x */
271 movhpd L(DP_ONES)(%rip), %xmm3 /* DP 1|x */
272 mulsd %xmm0, %xmm0 /* DP y=x^2 */
273 unpcklpd %xmm0, %xmm0 /* DP y|y */
274 movaps %xmm0, %xmm1 /* y|y */
275 mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */
276
277 movapd L(DP_SC4)(%rip), %xmm4 /* S4 */
278 mulpd %xmm0, %xmm4 /* z*S4 */
279 movapd L(DP_SC3)(%rip), %xmm5 /* S3 */
280 mulpd %xmm0, %xmm5 /* z*S3 */
281 addpd L(DP_SC2)(%rip), %xmm4 /* S2+z*S4 */
282 mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */
283 addpd L(DP_SC1)(%rip), %xmm5 /* S1+z*S3 */
284 mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */
285 addpd L(DP_SC0)(%rip), %xmm4 /* S0+z*(S2+z*S4) */
286 mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
287 mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
288 mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
289 addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
290 addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
291 cvtpd2ps %xmm3, %xmm0 /* SP results */
292 movss %xmm0, (ARG_SIN_PTR) /* store sin(x) from xmm0[0] */
293 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
294 movss %xmm0, (ARG_COS_PTR) /* store cos(x) */
295 ret
296
297 .p2align 4
298L(arg_less_2pn5):
299 /* Here if |x|<2^-5 */
300 cmpl $0x32000000, %eax /* |x|<2^-27 ? */
301 jl L(arg_less_2pn27)
302
303 /* Here if 2^-27<=|x|<2^-5 */
304 movaps %xmm0, %xmm1 /* DP x */
305 movhpd L(DP_ONES)(%rip), %xmm1 /* DP 1|x */
306 mulsd %xmm0, %xmm0 /* DP x^2 */
307 unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */
308
309 movaps L(DP_SINCOS2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */
310 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
311 addpd L(DP_SINCOS2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
312 mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
313 mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
314 addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
315 cvtpd2ps %xmm3, %xmm0 /* SP results */
316 movss %xmm0, (ARG_SIN_PTR) /* store sin(x) from xmm0[0] */
317 shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
318 movss %xmm0, (ARG_COS_PTR) /* store cos(x) */
319 ret
320
321 .p2align 4
322L(arg_less_2pn27):
323 cmpl $0, %eax /* x=0 ? */
324 je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */
325 /* Here if |x|<2^-27 */
326 /*
327 * Special cases here:
328 * sin(subnormal) raises inexact/underflow
329 * sin(min_normalized) raises inexact/underflow
330 * sin(normalized) raises inexact
331 * cos(here)=1-|x| (raising inexact)
332 */
333 movaps %xmm0, %xmm3 /* DP x */
334 mulsd L(DP_SMALL)(%rip), %xmm0/* DP x*DP_SMALL */
335 subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */
336 andps L(SP_ABS_MASK)(%rip), %xmm7/* SP |x| */
337 cvtsd2ss %xmm3, %xmm0 /* sin(x) */
338 movss L(SP_ONE)(%rip), %xmm1 /* SP 1.0 */
339 movss %xmm0, (ARG_SIN_PTR) /* sin(x) store */
340 subss %xmm7, %xmm1 /* cos(x) */
341 movss %xmm1, (ARG_COS_PTR) /* cos(x) store */
342 ret
343
344 .p2align 4
345L(arg_zero):
346 movss L(SP_ONE)(%rip), %xmm0 /* 1.0 */
347 movss %xmm7, (ARG_SIN_PTR) /* sin(+-0)==x */
348 movss %xmm0, (ARG_COS_PTR) /* cos(+-0)==1 */
349 ret
350
351 .p2align 4
352L(arg_inf_or_nan):
353 /* Here if |x| is Inf or NAN */
354 jne L(skip_errno_setting) /* in case of x is NaN */
355
356 /* Align stack to 16 bytes. */
357 subq $8, %rsp
358 cfi_adjust_cfa_offset (8)
359 /* Here if x is Inf. Set errno to EDOM. */
360 call JUMPTARGET(__errno_location)
361 addq $8, %rsp
362 cfi_adjust_cfa_offset (-8)
363
364 movl $EDOM, (%rax)
365
366 .p2align 4
367L(skip_errno_setting):
368 /* Here if |x| is Inf or NAN. Continued. */
369 subss %xmm7, %xmm7 /* x-x, result is NaN */
370 movss %xmm7, (ARG_SIN_PTR)
371 movss %xmm7, (ARG_COS_PTR)
372 ret
373END(__sincosf)
374
375 .section .rodata, "a"
376 .p2align 3
377L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
378 .long 0x00000000,0x00000000
379 .long 0x54442d18,0x3fe921fb
380 .long 0x54442d18,0x3ff921fb
381 .long 0x7f3321d2,0x4002d97c
382 .long 0x54442d18,0x400921fb
383 .long 0x2955385e,0x400f6a7a
384 .long 0x7f3321d2,0x4012d97c
385 .long 0xe9bba775,0x4015fdbb
386 .long 0x54442d18,0x401921fb
387 .long 0xbeccb2bb,0x401c463a
388 .long 0x2955385e,0x401f6a7a
389 .type L(PIO4J), @object
390 ASM_SIZE_DIRECTIVE(L(PIO4J))
391
392 .p2align 3
393L(_FPI): /* 4/Pi broken into sum of positive DP values */
394 .long 0x00000000,0x00000000
395 .long 0x6c000000,0x3ff45f30
396 .long 0x2a000000,0x3e3c9c88
397 .long 0xa8000000,0x3c54fe13
398 .long 0xd0000000,0x3aaf47d4
399 .long 0x6c000000,0x38fbb81b
400 .long 0xe0000000,0x3714acc9
401 .long 0x7c000000,0x3560e410
402 .long 0x56000000,0x33bca2c7
403 .long 0xac000000,0x31fbd778
404 .long 0xe0000000,0x300b7246
405 .long 0xe8000000,0x2e5d2126
406 .long 0x48000000,0x2c970032
407 .long 0xe8000000,0x2ad77504
408 .long 0xe0000000,0x290921cf
409 .long 0xb0000000,0x274deb1c
410 .long 0xe0000000,0x25829a73
411 .long 0xbe000000,0x23fd1046
412 .long 0x10000000,0x2224baed
413 .long 0x8e000000,0x20709d33
414 .long 0x80000000,0x1e535a2f
415 .long 0x64000000,0x1cef904e
416 .long 0x30000000,0x1b0d6398
417 .long 0x24000000,0x1964ce7d
418 .long 0x16000000,0x17b908bf
419 .type L(_FPI), @object
420 ASM_SIZE_DIRECTIVE(L(_FPI))
421
422/* Coefficients of polynomials for */
423/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */
424/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
425/* for |x|<2^-5. */
426 .p2align 4
427L(DP_SINCOS2_0):
428 .long 0x5543d49d,0xbfc55555
429 .long 0xff5cc6fd,0xbfdfffff
430 .type L(DP_SINCOS2_0), @object
431 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
432
433 .p2align 4
434L(DP_SINCOS2_1):
435 .long 0x75cec8c5,0x3f8110f4
436 .long 0xb178dac5,0x3fa55514
437 .type L(DP_SINCOS2_1), @object
438 ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
439
440
441 .p2align 3
442L(DP_ZERONE):
443 .long 0x00000000,0x00000000 /* 0.0 */
444 .long 0x00000000,0xbff00000 /* 1.0 */
445 .type L(DP_ZERONE), @object
446 ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
447
448 .p2align 3
449L(DP_ONES):
450 .long 0x00000000,0x3ff00000 /* +1.0 */
451 .long 0x00000000,0xbff00000 /* -1.0 */
452 .type L(DP_ONES), @object
453 ASM_SIZE_DIRECTIVE(L(DP_ONES))
454
455/* Coefficients of polynomials for */
456/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */
457/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
458/* for |t|<Pi/4. */
459 .p2align 4
460L(DP_SC4):
461 .long 0x1674b58a,0xbe5a947e
462 .long 0xdd8844d7,0xbe923c97
463 .type L(DP_SC4), @object
464 ASM_SIZE_DIRECTIVE(L(DP_SC4))
465
466 .p2align 4
467L(DP_SC3):
468 .long 0x64e6b5b4,0x3ec71d72
469 .long 0x9ac43cc0,0x3efa00eb
470 .type L(DP_SC3), @object
471 ASM_SIZE_DIRECTIVE(L(DP_SC3))
472
473 .p2align 4
474L(DP_SC2):
475 .long 0x8b4bd1f9,0xbf2a019f
476 .long 0x348b6874,0xbf56c16b
477 .type L(DP_SC2), @object
478 ASM_SIZE_DIRECTIVE(L(DP_SC2))
479
480 .p2align 4
481L(DP_SC1):
482 .long 0x10c2688b,0x3f811111
483 .long 0x545c50c7,0x3fa55555
484 .type L(DP_SC1), @object
485 ASM_SIZE_DIRECTIVE(L(DP_SC1))
486
487 .p2align 4
488L(DP_SC0):
489 .long 0x55551cd9,0xbfc55555
490 .long 0xfffe98ae,0xbfdfffff
491 .type L(DP_SC0), @object
492 ASM_SIZE_DIRECTIVE(L(DP_SC0))
493
494 .p2align 3
495L(DP_SMALL):
496 .long 0x00000000,0x3cd00000 /* 2^(-50) */
497 .type L(DP_SMALL), @object
498 ASM_SIZE_DIRECTIVE(L(DP_SMALL))
499
500 .p2align 3
501L(DP_PIO4):
502 .long 0x54442d18,0x3fe921fb /* Pi/4 */
503 .type L(DP_PIO4), @object
504 ASM_SIZE_DIRECTIVE(L(DP_PIO4))
505
506 .p2align 3
507L(DP_2POW52):
508 .long 0x00000000,0x43300000 /* +2^52 */
509 .long 0x00000000,0xc3300000 /* -2^52 */
510 .type L(DP_2POW52), @object
511 ASM_SIZE_DIRECTIVE(L(DP_2POW52))
512
513 .p2align 3
514L(DP_INVPIO4):
515 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
516 .type L(DP_INVPIO4), @object
517 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
518
519 .p2align 3
520L(DP_PIO4HI):
521 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
522 .type L(DP_PIO4HI), @object
523 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
524
525 .p2align 3
526L(DP_PIO4LO):
527 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
528 .type L(DP_PIO4LO), @object
529 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
530
531 .p2align 2
532L(SP_INVPIO4):
533 .long 0x3fa2f983 /* 4/Pi */
534 .type L(SP_INVPIO4), @object
535 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
536
537 .p2align 4
538L(DP_ABS_MASK): /* Mask for getting DP absolute value */
539 .long 0xffffffff,0x7fffffff
540 .long 0xffffffff,0x7fffffff
541 .type L(DP_ABS_MASK), @object
542 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
543
544 .p2align 3
545L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
546 .long 0x00000000,0xffffffff
547 .type L(DP_HI_MASK), @object
548 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
549
550 .p2align 4
551L(SP_ABS_MASK): /* Mask for getting SP absolute value */
552 .long 0x7fffffff,0x7fffffff
553 .long 0x7fffffff,0x7fffffff
554 .type L(SP_ABS_MASK), @object
555 ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
556
557 .p2align 2
558L(SP_ONE):
559 .long 0x3f800000 /* 1.0 */
560 .type L(SP_ONE), @object
561 ASM_SIZE_DIRECTIVE(L(SP_ONE))
562
563weak_alias(__sincosf, sincosf)
564