s_sinf.S source code [glibc/sysdeps/x86_64/fpu/s_sinf.S]

1	/ Optimized sinf function.*
2	Copyright (C) 2012-2017 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include <errno.h>
21
22	/ Short algorithm description:*
23	*
24	* 1) if \|x\| == 0: return x.
25	* 2) if \|x\| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
26	* 3) if \|x\| < 2^-5 : return x+x^3DP_SIN2_0+x^5DP_SIN2_1.
27	* 4) if \|x\| < Pi/4: return x+x^3(S0+x^2(S1+x^2(S2+x^2(S3+x^2*S4)))).
28	* 5) if \|x\| < 9*Pi/4:
29	* 5.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
30	* t=\|x\|-j*Pi/4.
31	* 5.2) Reconstruction:
32	* s = sign(x) * (-1.0)^((n>>2)&1)
33	* if(n&2 != 0) {
34	* using cos(t) polynomial for \|t\|<Pi/4, result is
35	* s * (1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2*C4))))).
36	* } else {
37	* using sin(t) polynomial for \|t\|<Pi/4, result is
38	* s * t * (1.0+t^2(S0+t^2(S1+t^2(S2+t^2(S3+t^2*S4))))).
39	* }
40	* 6) if \|x\| < 2^23, large args:
41	* 6.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
42	* t=\|x\|-j*Pi/4.
43	* 6.2) Reconstruction same as (5.2).
44	* 7) if \|x\| >= 2^23, very large args:
45	* 7.1) Range reduction: k=trunc(\|x\|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
46	* t=\|x\|-j*Pi/4.
47	* 7.2) Reconstruction same as (5.2).
48	* 8) if x is Inf, return x-x, and set errno=EDOM.
49	* 9) if x is NaN, return x-x.
50	*
51	* Special cases:
52	* sin(+-0) = +-0 not raising inexact/underflow,
53	* sin(subnormal) raises inexact/underflow,
54	* sin(min_normalized) raises inexact/underflow,
55	* sin(normalized) raises inexact,
56	* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
57	* sin(NaN) = NaN.
58	*/
59
60	.text
61	ENTRY(__sinf)
62	/ Input: single precision x in %xmm0 /
63
64	movd %xmm0, %eax / Bits of x /
65	movaps %xmm0, %xmm7 / Copy of x /
66	cvtss2sd %xmm0, %xmm0 / DP x /
67	movss L(SP_ABS_MASK)(%rip), %xmm3
68	movl %eax, %edi / Copy of x bits /
69	andl $`0x7fffffff`, %eax / \|x\| /
70
71	cmpl $`0x3f490fdb`, %eax / \|x\|<Pi/4? /
72	jb L(arg_less_pio4)
73
74	/ Here if \|x\|>=Pi/4 /
75	andps %xmm7, %xmm3 / SP \|x\| /
76	andpd L(DP_ABS_MASK)(%rip),%xmm0 / DP \|x\| /
77	movss L(SP_INVPIO4)(%rip), %xmm2 / SP 1/(Pi/4) /
78
79	cmpl $`0x40e231d6`, %eax / \|x\|<9Pi/4? /*
80	jae L(large_args)
81
82	/ Here if Pi/4<=\|x\|<9Pi/4 /*
83	mulss %xmm3, %xmm2 / SP \|x\|/(Pi/4) /
84	movl %edi, %ecx / Load x /
85	cvttss2si %xmm2, %eax / k, number of Pi/4 in x /
86	lea L(PIO4J)(%rip), %rsi
87	shrl $`31`, %ecx / sign of x /
88	addl $`1`, %eax / k+1 /
89	movl $`0x0e`, %edx
90	andl %eax, %edx / j = (k+1)&0x0e /
91	subsd (%rsi,%rdx,`8`), %xmm0 / t = \|x\| - j * Pi/4 /
92
93	L(reconstruction):
94	/ Input: %eax=n, %xmm0=t, %ecx=sign(x) /
95	testl $`2`, %eax / n&2 != 0? /
96	jz L(sin_poly)
97
98	/L(cos_poly):/
99	/ Here if sin(x) calculated using cos(t) polynomial for \|t\|<Pi/4:*
100	* y = tt; z = yy;
101	* s = sign(x) * (-1.0)^((n>>2)&1)
102	* result = s * (1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2*C4)))))
103	*/
104	shrl $`2`, %eax / n>>2 /
105	mulsd %xmm0, %xmm0 / y=t^2 /
106	andl $`1`, %eax / (n>>2)&1 /
107	movaps %xmm0, %xmm1 / y /
108	mulsd %xmm0, %xmm0 / z=t^4 /
109
110	movsd L(DP_C4)(%rip), %xmm4 / C4 /
111	mulsd %xmm0, %xmm4 / zC4 /*
112	xorl %eax, %ecx / (-1.0)^((n>>2)&1) XOR sign(x) /
113	movsd L(DP_C3)(%rip), %xmm3 / C3 /
114	mulsd %xmm0, %xmm3 / zC3 /*
115	lea L(DP_ONES)(%rip), %rsi
116	addsd L(DP_C2)(%rip), %xmm4 / C2+zC4 /*
117	mulsd %xmm0, %xmm4 / z(C2+zC4) /
118	addsd L(DP_C1)(%rip), %xmm3 / C1+zC3 /*
119	mulsd %xmm0, %xmm3 / z(C1+zC3) /
120	addsd L(DP_C0)(%rip), %xmm4 / C0+z(C2+zC4) /
121	mulsd %xmm1, %xmm4 / y(C0+z(C2+zC4)) /*
122
123	/ y(C0+y(C1+y(C2+y(C3+yC4)))) /*
124	addsd %xmm4, %xmm3
125	/ 1.0+y(C0+y(C1+y(C2+y(C3+yC4)))) /*
126	addsd L(DP_ONES)(%rip), %xmm3
127
128	mulsd (%rsi,%rcx,`8`), %xmm3 / DP result /
129	cvtsd2ss %xmm3, %xmm0 / SP result /
130	ret
131
132	.p2align `4`
133	L(sin_poly):
134	/ Here if sin(x) calculated using sin(t) polynomial for \|t\|<Pi/4:*
135	* y = tt; z = yy;
136	* s = sign(x) * (-1.0)^((n>>2)&1)
137	* result = s * t * (1.0+t^2(S0+t^2(S1+t^2(S2+t^2(S3+t^2*S4)))))
138	*/
139
140	movaps %xmm0, %xmm4 / t /
141	shrl $`2`, %eax / n>>2 /
142	mulsd %xmm0, %xmm0 / y=t^2 /
143	andl $`1`, %eax / (n>>2)&1 /
144	movaps %xmm0, %xmm1 / y /
145	xorl %eax, %ecx / (-1.0)^((n>>2)&1) XOR sign(x) /
146	mulsd %xmm0, %xmm0 / z=t^4 /
147
148	movsd L(DP_S4)(%rip), %xmm2 / S4 /
149	mulsd %xmm0, %xmm2 / zS4 /*
150	movsd L(DP_S3)(%rip), %xmm3 / S3 /
151	mulsd %xmm0, %xmm3 / zS3 /*
152	lea L(DP_ONES)(%rip), %rsi
153	addsd L(DP_S2)(%rip), %xmm2 / S2+zS4 /*
154	mulsd %xmm0, %xmm2 / z(S2+zS4) /
155	addsd L(DP_S1)(%rip), %xmm3 / S1+zS3 /*
156	mulsd %xmm0, %xmm3 / z(S1+zS3) /
157	addsd L(DP_S0)(%rip), %xmm2 / S0+z(S2+zS4) /
158	mulsd %xmm1, %xmm2 / y(S0+z(S2+zS4)) /*
159	/ ts, where s = sign(x) (-1.0)^((n>>2)&1) /
160	mulsd (%rsi,%rcx,`8`), %xmm4
161	/ y(S0+y(S1+y(S2+y(S3+yS4)))) /*
162	addsd %xmm2, %xmm3
163	/ tsy(S0+y(S1+y(S2+y(S3+yS4)))) /*
164	mulsd %xmm4, %xmm3
165	/ ts(1.0+y(S0+y(S1+y(S2+y(S3+yS4)))) /*
166	addsd %xmm4, %xmm3
167	cvtsd2ss %xmm3, %xmm0 / SP result /
168	ret
169
170	.p2align `4`
171	L(large_args):
172	/ Here if \|x\|>=9Pi/4 /*
173	cmpl $`0x7f800000`, %eax / x is Inf or NaN? /
174	jae L(arg_inf_or_nan)
175
176	/ Here if finite \|x\|>=9Pi/4 /*
177	cmpl $`0x4b000000`, %eax / \|x\|<2^23? /
178	jae L(very_large_args)
179
180	/ Here if 9Pi/4<=\|x\|<2^23 /*
181	movsd L(DP_INVPIO4)(%rip), %xmm1 / 1/(Pi/4) /
182	mulsd %xmm0, %xmm1 / \|x\|/(Pi/4) /
183	cvttsd2si %xmm1, %eax / k=trunc(\|x\|/(Pi/4)) /
184	addl $`1`, %eax / k+1 /
185	movl %eax, %edx
186	andl $`0xfffffffe`, %edx / j=(k+1)&0xfffffffe /
187	cvtsi2sdl %edx, %xmm4 / DP j /
188	movl %edi, %ecx / Load x /
189	movsd L(DP_PIO4HI)(%rip), %xmm2 / -PIO4HI = high part of -Pi/4 /
190	shrl $`31`, %ecx / sign bit of x /
191	mulsd %xmm4, %xmm2 / -jPIO4HI /*
192	movsd L(DP_PIO4LO)(%rip), %xmm3 / -PIO4LO = low part of -Pi/4 /
193	addsd %xmm2, %xmm0 / \|x\| - jPIO4HI /*
194	mulsd %xmm3, %xmm4 / jPIO4LO /*
195	addsd %xmm4, %xmm0 / t = \|x\| - jPIO4HI - jPIO4LO /
196	jmp L(reconstruction)
197
198	.p2align `4`
199	L(very_large_args):
200	/ Here if finite \|x\|>=2^23 /
201
202	/ bitpos = (ix>>23) - BIAS_32 + 59; /
203	shrl $`23`, %eax / eb = biased exponent of x /
204	/ bitpos = eb - 0x7f + 59, where 0x7f is exponent bias /
205	subl $`68`, %eax
206	movl $`28`, %ecx / %cl=28 /
207	movl %eax, %edx / bitpos copy /
208
209	/ j = bitpos/28; /
210	div %cl / j in register %al=%ax/%cl /
211	movapd %xmm0, %xmm3 / \|x\| /
212	/ clear unneeded remainder from %ah /
213	andl $`0xff`, %eax
214
215	imull $`28`, %eax, %ecx / j28 /*
216	lea L(_FPI)(%rip), %rsi
217	movsd L(DP_HI_MASK)(%rip), %xmm4 / DP_HI_MASK /
218	movapd %xmm0, %xmm5 / \|x\| /
219	mulsd -`16`(%rsi,%rax,`8`), %xmm3 / tmp3 = FPI[j-2]\|x\| /*
220	movapd %xmm0, %xmm1 / \|x\| /
221	mulsd -`8`(%rsi,%rax,`8`), %xmm5 / tmp2 = FPI[j-1]\|x\| /*
222	mulsd (%rsi,%rax,`8`), %xmm0 / tmp0 = FPI[j]\|x\| /*
223	addl $`19`, %ecx / j28+19 /*
224	mulsd `8`(%rsi,%rax,`8`), %xmm1 / tmp1 = FPI[j+1]\|x\| /*
225	cmpl %ecx, %edx / bitpos>=j28+19? /*
226	jl L(very_large_skip1)
227
228	/ Here if bitpos>=j28+19 /*
229	andpd %xmm3, %xmm4 / HI(tmp3) /
230	subsd %xmm4, %xmm3 / tmp3 = tmp3 - HI(tmp3) /
231	L(very_large_skip1):
232
233	movsd L(DP_2POW52)(%rip), %xmm6
234	movapd %xmm5, %xmm2 / tmp2 copy /
235	addsd %xmm3, %xmm5 / tmp5 = tmp3 + tmp2 /
236	movl $`1`, %edx
237	addsd %xmm5, %xmm6 / tmp6 = tmp5 + 2^52 /
238	movsd `8`+L(DP_2POW52)(%rip), %xmm4
239	movd %xmm6, %eax / k = I64_LO(tmp6); /
240	addsd %xmm6, %xmm4 / tmp4 = tmp6 - 2^52 /
241	movl %edi, %ecx / Load x /
242	comisd %xmm5, %xmm4 / tmp4 > tmp5? /
243	jbe L(very_large_skip2)
244
245	/ Here if tmp4 > tmp5 /
246	subl $`1`, %eax / k-- /
247	addsd `8`+L(DP_ONES)(%rip), %xmm4 / tmp4 -= 1.0 /
248	L(very_large_skip2):
249
250	andl %eax, %edx / k&1 /
251	lea L(DP_ZERONE)(%rip), %rsi
252	subsd %xmm4, %xmm3 / tmp3 -= tmp4 /
253	addsd (%rsi,%rdx,`8`), %xmm3 / t = DP_ZERONE[k&1] + tmp3 /
254	addsd %xmm2, %xmm3 / t += tmp2 /
255	shrl $`31`, %ecx / sign of x /
256	addsd %xmm3, %xmm0 / t += tmp0 /
257	addl $`1`, %eax / n=k+1 /
258	addsd %xmm1, %xmm0 / t += tmp1 /
259	mulsd L(DP_PIO4)(%rip), %xmm0 / t = PI04 /*
260
261	jmp L(reconstruction) / end of very_large_args peth /
262
263	.p2align `4`
264	L(arg_less_pio4):
265	/ Here if \|x\|<Pi/4 /
266	cmpl $`0x3d000000`, %eax / \|x\|<2^-5? /
267	jl L(arg_less_2pn5)
268
269	/ Here if 2^-5<=\|x\|<Pi/4 /
270	movaps %xmm0, %xmm3 / x /
271	mulsd %xmm0, %xmm0 / y=x^2 /
272	movaps %xmm0, %xmm1 / y /
273	mulsd %xmm0, %xmm0 / z=x^4 /
274	movsd L(DP_S4)(%rip), %xmm4 / S4 /
275	mulsd %xmm0, %xmm4 / zS4 /*
276	movsd L(DP_S3)(%rip), %xmm5 / S3 /
277	mulsd %xmm0, %xmm5 / zS3 /*
278	addsd L(DP_S2)(%rip), %xmm4 / S2+zS4 /*
279	mulsd %xmm0, %xmm4 / z(S2+zS4) /
280	addsd L(DP_S1)(%rip), %xmm5 / S1+zS3 /*
281	mulsd %xmm0, %xmm5 / z(S1+zS3) /
282	addsd L(DP_S0)(%rip), %xmm4 / S0+z(S2+zS4) /
283	mulsd %xmm1, %xmm4 / y(S0+z(S2+zS4)) /*
284	mulsd %xmm3, %xmm5 / xz(S1+zS3) /*
285	mulsd %xmm3, %xmm4 / xy(S0+z(S2+zS4)) /
286	/ xy(S0+y(S1+y(S2+y(S3+yS4)))) /
287	addsd %xmm5, %xmm4
288	/ x + xy(S0+y(S1+y(S2+y(S3+yS4)))) /
289	addsd %xmm4, %xmm3
290	cvtsd2ss %xmm3, %xmm0 / SP result /
291	ret
292
293	.p2align `4`
294	L(arg_less_2pn5):
295	/ Here if \|x\|<2^-5 /
296	cmpl $`0x32000000`, %eax / \|x\|<2^-27? /
297	jl L(arg_less_2pn27)
298
299	/ Here if 2^-27<=\|x\|<2^-5 /
300	movaps %xmm0, %xmm1 / DP x /
301	mulsd %xmm0, %xmm0 / DP x^2 /
302	movsd L(DP_SIN2_1)(%rip), %xmm3 / DP DP_SIN2_1 /
303	mulsd %xmm0, %xmm3 / DP x^2DP_SIN2_1 /*
304	addsd L(DP_SIN2_0)(%rip), %xmm3 / DP DP_SIN2_0+x^2DP_SIN2_1 /*
305	mulsd %xmm0, %xmm3 / DP x^2DP_SIN2_0+x^4DP_SIN2_1 /
306	mulsd %xmm1, %xmm3 / DP x^3DP_SIN2_0+x^5DP_SIN2_1 /
307	addsd %xmm1, %xmm3 / DP x+x^3DP_SIN2_0+x^5DP_SIN2_1 /
308	cvtsd2ss %xmm3, %xmm0 / SP result /
309	ret
310
311	.p2align `4`
312	L(arg_less_2pn27):
313	cmpl $`0`, %eax / x=0? /
314	je L(arg_zero) / in case x=0 return sin(+-0)==+-0 /
315	/ Here if \|x\|<2^-27 /
316	/*
317	* Special cases here:
318	* sin(subnormal) raises inexact/underflow
319	* sin(min_normalized) raises inexact/underflow
320	* sin(normalized) raises inexact
321	*/
322	movaps %xmm0, %xmm3 / Copy of DP x /
323	mulsd L(DP_SMALL)(%rip), %xmm0 / xDP_SMALL /*
324	subsd %xmm0, %xmm3 / Result is x-xDP_SMALL /*
325	cvtsd2ss %xmm3, %xmm0 / Result converted to SP /
326	ret
327
328	.p2align `4`
329	L(arg_zero):
330	movaps %xmm7, %xmm0 / SP x /
331	ret
332
333	.p2align `4`
334	L(arg_inf_or_nan):
335	/ Here if \|x\| is Inf or NAN /
336	jne L(skip_errno_setting) / in case of x is NaN /
337
338	/ Align stack to 16 bytes. /
339	subq $`8`, %rsp
340	cfi_adjust_cfa_offset (`8`)
341	/ Here if x is Inf. Set errno to EDOM. /
342	call JUMPTARGET(__errno_location)
343	addq $`8`, %rsp
344	cfi_adjust_cfa_offset (-`8`)
345
346	movl $EDOM, (%rax)
347
348	.p2align `4`
349	L(skip_errno_setting):
350	/ Here if \|x\| is Inf or NAN. Continued. /
351	movaps %xmm7, %xmm0 / load x /
352	subss %xmm0, %xmm0 / Result is NaN /
353	ret
354	END(__sinf)
355
356	.section .rodata, "a"
357	.p2align `3`
358	L(PIO4J): / Table of jPi/4, for j=0,1,..,10 /*
359	.long `0x00000000`,`0x00000000`
360	.long `0x54442d18`,`0x3fe921fb`
361	.long `0x54442d18`,`0x3ff921fb`
362	.long `0x7f3321d2`,`0x4002d97c`
363	.long `0x54442d18`,`0x400921fb`
364	.long `0x2955385e`,`0x400f6a7a`
365	.long `0x7f3321d2`,`0x4012d97c`
366	.long `0xe9bba775`,`0x4015fdbb`
367	.long `0x54442d18`,`0x401921fb`
368	.long `0xbeccb2bb`,`0x401c463a`
369	.long `0x2955385e`,`0x401f6a7a`
370	.type L(PIO4J), @object
371	ASM_SIZE_DIRECTIVE(L(PIO4J))
372
373	.p2align `3`
374	L(_FPI): / 4/Pi broken into sum of positive DP values /
375	.long `0x00000000`,`0x00000000`
376	.long `0x6c000000`,`0x3ff45f30`
377	.long `0x2a000000`,`0x3e3c9c88`
378	.long `0xa8000000`,`0x3c54fe13`
379	.long `0xd0000000`,`0x3aaf47d4`
380	.long `0x6c000000`,`0x38fbb81b`
381	.long `0xe0000000`,`0x3714acc9`
382	.long `0x7c000000`,`0x3560e410`
383	.long `0x56000000`,`0x33bca2c7`
384	.long `0xac000000`,`0x31fbd778`
385	.long `0xe0000000`,`0x300b7246`
386	.long `0xe8000000`,`0x2e5d2126`
387	.long `0x48000000`,`0x2c970032`
388	.long `0xe8000000`,`0x2ad77504`
389	.long `0xe0000000`,`0x290921cf`
390	.long `0xb0000000`,`0x274deb1c`
391	.long `0xe0000000`,`0x25829a73`
392	.long `0xbe000000`,`0x23fd1046`
393	.long `0x10000000`,`0x2224baed`
394	.long `0x8e000000`,`0x20709d33`
395	.long `0x80000000`,`0x1e535a2f`
396	.long `0x64000000`,`0x1cef904e`
397	.long `0x30000000`,`0x1b0d6398`
398	.long `0x24000000`,`0x1964ce7d`
399	.long `0x16000000`,`0x17b908bf`
400	.type L(_FPI), @object
401	ASM_SIZE_DIRECTIVE(L(_FPI))
402
403	/ Coefficients of polynomial*
404	for sin(x)~=x+x^3DP_SIN2_0+x^5DP_SIN2_1, \|x\|<2^-5. /*
405	.p2align `3`
406	L(DP_SIN2_0):
407	.long `0x5543d49d`,`0xbfc55555`
408	.type L(DP_SIN2_0), @object
409	ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
410
411	.p2align `3`
412	L(DP_SIN2_1):
413	.long `0x75cec8c5`,`0x3f8110f4`
414	.type L(DP_SIN2_1), @object
415	ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
416
417	.p2align `3`
418	L(DP_ZERONE):
419	.long `0x00000000`,`0x00000000` / 0.0 /
420	.long `0x00000000`,`0xbff00000` / 1.0 /
421	.type L(DP_ZERONE), @object
422	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
423
424	.p2align `3`
425	L(DP_ONES):
426	.long `0x00000000`,`0x3ff00000` / +1.0 /
427	.long `0x00000000`,`0xbff00000` / -1.0 /
428	.type L(DP_ONES), @object
429	ASM_SIZE_DIRECTIVE(L(DP_ONES))
430
431	/ Coefficients of polynomial*
432	for sin(t)~=t+t^3(S0+t^2(S1+t^2(S2+t^2(S3+t^2S4)))), \|t\|<Pi/4. /
433	.p2align `3`
434	L(DP_S3):
435	.long `0x64e6b5b4`,`0x3ec71d72`
436	.type L(DP_S3), @object
437	ASM_SIZE_DIRECTIVE(L(DP_S3))
438
439	.p2align `3`
440	L(DP_S1):
441	.long `0x10c2688b`,`0x3f811111`
442	.type L(DP_S1), @object
443	ASM_SIZE_DIRECTIVE(L(DP_S1))
444
445	.p2align `3`
446	L(DP_S4):
447	.long `0x1674b58a`,`0xbe5a947e`
448	.type L(DP_S4), @object
449	ASM_SIZE_DIRECTIVE(L(DP_S4))
450
451	.p2align `3`
452	L(DP_S2):
453	.long `0x8b4bd1f9`,`0xbf2a019f`
454	.type L(DP_S2), @object
455	ASM_SIZE_DIRECTIVE(L(DP_S2))
456
457	.p2align `3`
458	L(DP_S0):
459	.long `0x55551cd9`,`0xbfc55555`
460	.type L(DP_S0), @object
461	ASM_SIZE_DIRECTIVE(L(DP_S0))
462
463	.p2align `3`
464	L(DP_SMALL):
465	.long `0x00000000`,`0x3cd00000` / 2^(-50) /
466	.type L(DP_SMALL), @object
467	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
468
469	/ Coefficients of polynomial*
470	for cos(t)~=1.0+t^2(C0+t^2(C1+t^2(C2+t^2(C3+t^2C4)))), \|t\|<Pi/4. /
471	.p2align `3`
472	L(DP_C3):
473	.long `0x9ac43cc0`,`0x3efa00eb`
474	.type L(DP_C3), @object
475	ASM_SIZE_DIRECTIVE(L(DP_C3))
476
477	.p2align `3`
478	L(DP_C1):
479	.long `0x545c50c7`,`0x3fa55555`
480	.type L(DP_C1), @object
481	ASM_SIZE_DIRECTIVE(L(DP_C1))
482
483	.p2align `3`
484	L(DP_C4):
485	.long `0xdd8844d7`,`0xbe923c97`
486	.type L(DP_C4), @object
487	ASM_SIZE_DIRECTIVE(L(DP_C4))
488
489	.p2align `3`
490	L(DP_C2):
491	.long `0x348b6874`,`0xbf56c16b`
492	.type L(DP_C2), @object
493	ASM_SIZE_DIRECTIVE(L(DP_C2))
494
495	.p2align `3`
496	L(DP_C0):
497	.long `0xfffe98ae`,`0xbfdfffff`
498	.type L(DP_C0), @object
499	ASM_SIZE_DIRECTIVE(L(DP_C0))
500
501	.p2align `3`
502	L(DP_PIO4):
503	.long `0x54442d18`,`0x3fe921fb` / Pi/4 /
504	.type L(DP_PIO4), @object
505	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
506
507	.p2align `3`
508	L(DP_2POW52):
509	.long `0x00000000`,`0x43300000` / +2^52 /
510	.long `0x00000000`,`0xc3300000` / -2^52 /
511	.type L(DP_2POW52), @object
512	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
513
514	.p2align `3`
515	L(DP_INVPIO4):
516	.long `0x6dc9c883`,`0x3ff45f30` / 4/Pi /
517	.type L(DP_INVPIO4), @object
518	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
519
520	.p2align `3`
521	L(DP_PIO4HI):
522	.long `0x54000000`,`0xbfe921fb` / High part of Pi/4 /
523	.type L(DP_PIO4HI), @object
524	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
525
526	.p2align `3`
527	L(DP_PIO4LO):
528	.long `0x11A62633`,`0xbe010b46` / Low part of Pi/4 /
529	.type L(DP_PIO4LO), @object
530	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
531
532	.p2align `2`
533	L(SP_INVPIO4):
534	.long `0x3fa2f983` / 4/Pi /
535	.type L(SP_INVPIO4), @object
536	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
537
538	.p2align `4`
539	L(DP_ABS_MASK): / Mask for getting DP absolute value /
540	.long `0xffffffff`,`0x7fffffff`
541	.long `0xffffffff`,`0x7fffffff`
542	.type L(DP_ABS_MASK), @object
543	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
544
545	.p2align `3`
546	L(DP_HI_MASK): / Mask for getting high 21 bits of DP value /
547	.long `0x00000000`,`0xffffffff`
548	.type L(DP_HI_MASK),@object
549	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
550
551	.p2align `4`
552	L(SP_ABS_MASK): / Mask for getting SP absolute value /
553	.long `0x7fffffff`,`0x7fffffff`
554	.long `0x7fffffff`,`0x7fffffff`
555	.type L(SP_ABS_MASK), @object
556	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
557
558	weak_alias(__sinf, sinf)
559

Browse the source code of glibc/sysdeps/x86_64/fpu/s_sinf.S