e_expl.S source code [glibc/sysdeps/x86_64/fpu/e_expl.S]

1	/*
2	* Written by J.T. Conklin <jtc@netbsd.org>.
3	* Public domain.
4	*
5	* Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
6	*/
7
8	/*
9	* The 8087 method for the exponential function is to calculate
10	* exp(x) = 2^(x log2(e))
11	* after separating integer and fractional parts
12	* x log2(e) = i + f, \|f\| <= .5
13	* 2^i is immediate but f needs to be precise for long double accuracy.
14	* Suppress range reduction error in computing f by the following.
15	* Separate x into integer and fractional parts
16	* x = xi + xf, \|xf\| <= .5
17	* Separate log2(e) into the sum of an exact number c0 and small part c1.
18	* c0 + c1 = log2(e) to extra precision
19	* Then
20	* f = (c0 xi - i) + c0 xf + c1 x
21	* where c0 xi is exact and so also is (c0 xi - i).
22	* -- moshier@na-net.ornl.gov
23	*/
24
25	#include <machine/asm.h>
26	#include <x86_64-math-asm.h>
27
28	#ifdef USE_AS_EXP10L
29	# define IEEE754_EXPL __ieee754_exp10l
30	# define EXPL_FINITE __exp10l_finite
31	# define FLDLOG fldl2t
32	#elif defined USE_AS_EXPM1L
33	# define IEEE754_EXPL __expm1l
34	# undef EXPL_FINITE
35	# define FLDLOG fldl2e
36	#else
37	# define IEEE754_EXPL __ieee754_expl
38	# define EXPL_FINITE __expl_finite
39	# define FLDLOG fldl2e
40	#endif
41
42	.section .rodata.cst16,"aM",@progbits,`16`
43
44	.p2align `4`
45	#ifdef USE_AS_EXP10L
46	.type c0,@object
47	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0x9a`, `0xd4`, `0x00`, `0x40`
48	.byte `0`, `0`, `0`, `0`, `0`, `0`
49	ASM_SIZE_DIRECTIVE(c0)
50	.type c1,@object
51	c1: .byte `0x58`, `0x92`, `0xfc`, `0x15`, `0x37`, `0x9a`, `0x97`, `0xf0`, `0xef`, `0x3f`
52	.byte `0`, `0`, `0`, `0`, `0`, `0`
53	ASM_SIZE_DIRECTIVE(c1)
54	#else
55	.type c0,@object
56	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0xaa`, `0xb8`, `0xff`, `0x3f`
57	.byte `0`, `0`, `0`, `0`, `0`, `0`
58	ASM_SIZE_DIRECTIVE(c0)
59	.type c1,@object
60	c1: .byte `0x20`, `0xfa`, `0xee`, `0xc2`, `0x5f`, `0x70`, `0xa5`, `0xec`, `0xed`, `0x3f`
61	.byte `0`, `0`, `0`, `0`, `0`, `0`
62	ASM_SIZE_DIRECTIVE(c1)
63	#endif
64	#ifndef USE_AS_EXPM1L
65	.type csat,@object
66	csat: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0x80`, `0x0e`, `0x40`
67	.byte `0`, `0`, `0`, `0`, `0`, `0`
68	ASM_SIZE_DIRECTIVE(csat)
69	DEFINE_LDBL_MIN
70	#endif
71
72	#ifdef PIC
73	# define MO(op) op##(%rip)
74	#else
75	# define MO(op) op
76	#endif
77
78	.text
79	ENTRY(IEEE754_EXPL)
80	#ifdef USE_AS_EXPM1L
81	movzwl `8`+`8`(%rsp), %eax
82	xorb $`0x80`, %ah // invert sign bit (now 1 is "positive")
83	cmpl $`0xc006`, %eax // is num positive and exp >= 6 (number is >= 128.0)?
84	jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
85	#endif
86	fldt `8`(%rsp)
87	/ I added the following ugly construct because expl(+-Inf) resulted*
88	in NaN. The ugliness results from the bright minds at Intel.
89	For the i686 the code can be written better.
90	-- drepper@cygnus.com. /*
91	fxam / Is NaN or +-Inf? /
92	#ifdef USE_AS_EXPM1L
93	xorb $`0x80`, %ah
94	cmpl $`0xc006`, %eax
95	fstsw %ax
96	movb $`0x45`, %dh
97	jb `4f`
98
99	/ Below -64.0 (may be -NaN or -Inf). /
100	andb %ah, %dh
101	cmpb $`0x01`, %dh
102	je `2f` / Is +-NaN, jump. /
103	jmp `1f` / -large, possibly -Inf. /
104
105	`4`: / In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). /
106	/ Test for +-0 as argument. /
107	andb %ah, %dh
108	cmpb $`0x40`, %dh
109	je `2f`
110
111	/ Test for arguments that are small but not subnormal. /
112	movzwl `8`+`8`(%rsp), %eax
113	andl $`0x7fff`, %eax
114	cmpl $`0x3fbf`, %eax
115	jge `3f`
116	/ Argument's exponent below -64; avoid spurious underflow if*
117	normal. /*
118	cmpl $`0x0001`, %eax
119	jge `2f`
120	/ Force underflow and return the argument, to avoid wrong signs*
121	of zero results from the code below in some rounding modes. /*
122	fld %st
123	fmul %st
124	fstp %st
125	jmp `2f`
126	#else
127	movzwl `8`+`8`(%rsp), %eax
128	andl $`0x7fff`, %eax
129	cmpl $`0x400d`, %eax
130	jg `5f`
131	cmpl $`0x3fbc`, %eax
132	jge `3f`
133	/ Argument's exponent below -67, result rounds to 1. /
134	fld1
135	faddp
136	jmp `2f`
137	`5`: / Overflow, underflow or infinity or NaN as argument. /
138	fstsw %ax
139	movb $`0x45`, %dh
140	andb %ah, %dh
141	cmpb $`0x05`, %dh
142	je `1f` / Is +-Inf, jump. /
143	cmpb $`0x01`, %dh
144	je `2f` / Is +-NaN, jump. /
145	/ Overflow or underflow; saturate. /
146	fstp %st
147	fldt MO(csat)
148	andb $`2`, %ah
149	jz `3f`
150	fchs
151	#endif
152	`3`: FLDLOG / 1 log2(base) /
153	fmul %st(`1`), %st / 1 x log2(base) /
154	/ Set round-to-nearest temporarily. /
155	fstcw -`4`(%rsp)
156	movl $`0xf3ff`, %edx
157	andl -`4`(%rsp), %edx
158	movl %edx, -`8`(%rsp)
159	fldcw -`8`(%rsp)
160	frndint / 1 i /
161	fld %st(`1`) / 2 x /
162	frndint / 2 xi /
163	fldcw -`4`(%rsp)
164	fld %st(`1`) / 3 i /
165	fldt MO(c0) / 4 c0 /
166	fld %st(`2`) / 5 xi /
167	fmul %st(`1`), %st / 5 c0 xi /
168	fsubp %st, %st(`2`) / 4 f = c0 xi - i /
169	fld %st(`4`) / 5 x /
170	fsub %st(`3`), %st / 5 xf = x - xi /
171	fmulp %st, %st(`1`) / 4 c0 xf /
172	faddp %st, %st(`1`) / 3 f = f + c0 xf /
173	fldt MO(c1) / 4 /
174	fmul %st(`4`), %st / 4 c1 * x /
175	faddp %st, %st(`1`) / 3 f = f + c1 * x /
176	f2xm1 / 3 2^(fract(x * log2(base))) - 1 /
177	#ifdef USE_AS_EXPM1L
178	fstp %st(`1`) / 2 /
179	fscale / 2 scale factor is st(1); base^x - 2^i /
180	fxch / 2 i /
181	fld1 / 3 1.0 /
182	fscale / 3 2^i /
183	fld1 / 4 1.0 /
184	fsubrp %st, %st(`1`) / 3 2^i - 1.0 /
185	fstp %st(`1`) / 2 /
186	faddp %st, %st(`1`) / 1 base^x - 1.0 /
187	#else
188	fld1 / 4 1.0 /
189	faddp / 3 2^(fract(x * log2(base))) /
190	fstp %st(`1`) / 2 /
191	fscale / 2 scale factor is st(1); base^x /
192	fstp %st(`1`) / 1 /
193	LDBL_CHECK_FORCE_UFLOW_NONNEG
194	#endif
195	fstp %st(`1`) / 0 /
196	jmp `2f`
197	`1`:
198	#ifdef USE_AS_EXPM1L
199	/ For expm1l, only negative sign gets here. /
200	fstp %st
201	fld1
202	fchs
203	#else
204	testl $`0x200`, %eax / Test sign. /
205	jz `2f` / If positive, jump. /
206	fstp %st
207	fldz / Set result to 0. /
208	#endif
209	`2`: ret
210	END(IEEE754_EXPL)
211	#ifdef USE_AS_EXPM1L
212	libm_hidden_def (__expm1l)
213	weak_alias (__expm1l, expm1l)
214	#else
215	strong_alias (IEEE754_EXPL, EXPL_FINITE)
216	#endif
217

Browse the source code of glibc/sysdeps/x86_64/fpu/e_expl.S