1 | /* |
2 | * Written by J.T. Conklin <jtc@netbsd.org>. |
3 | * Public domain. |
4 | * |
5 | * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. |
6 | */ |
7 | |
8 | /* |
9 | * The 8087 method for the exponential function is to calculate |
10 | * exp(x) = 2^(x log2(e)) |
11 | * after separating integer and fractional parts |
12 | * x log2(e) = i + f, |f| <= .5 |
13 | * 2^i is immediate but f needs to be precise for long double accuracy. |
14 | * Suppress range reduction error in computing f by the following. |
15 | * Separate x into integer and fractional parts |
16 | * x = xi + xf, |xf| <= .5 |
17 | * Separate log2(e) into the sum of an exact number c0 and small part c1. |
18 | * c0 + c1 = log2(e) to extra precision |
19 | * Then |
20 | * f = (c0 xi - i) + c0 xf + c1 x |
21 | * where c0 xi is exact and so also is (c0 xi - i). |
22 | * -- moshier@na-net.ornl.gov |
23 | */ |
24 | |
25 | #include <machine/asm.h> |
26 | #include <x86_64-math-asm.h> |
27 | |
28 | #ifdef USE_AS_EXP10L |
29 | # define IEEE754_EXPL __ieee754_exp10l |
30 | # define EXPL_FINITE __exp10l_finite |
31 | # define FLDLOG fldl2t |
32 | #elif defined USE_AS_EXPM1L |
33 | # define IEEE754_EXPL __expm1l |
34 | # undef EXPL_FINITE |
35 | # define FLDLOG fldl2e |
36 | #else |
37 | # define IEEE754_EXPL __ieee754_expl |
38 | # define EXPL_FINITE __expl_finite |
39 | # define FLDLOG fldl2e |
40 | #endif |
41 | |
42 | .section .rodata.cst16,"aM" ,@progbits,16 |
43 | |
44 | .p2align 4 |
45 | #ifdef USE_AS_EXP10L |
46 | .type c0,@object |
47 | c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40 |
48 | .byte 0, 0, 0, 0, 0, 0 |
49 | ASM_SIZE_DIRECTIVE(c0) |
50 | .type c1,@object |
51 | c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f |
52 | .byte 0, 0, 0, 0, 0, 0 |
53 | ASM_SIZE_DIRECTIVE(c1) |
54 | #else |
55 | .type c0,@object |
56 | c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f |
57 | .byte 0, 0, 0, 0, 0, 0 |
58 | ASM_SIZE_DIRECTIVE(c0) |
59 | .type c1,@object |
60 | c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f |
61 | .byte 0, 0, 0, 0, 0, 0 |
62 | ASM_SIZE_DIRECTIVE(c1) |
63 | #endif |
64 | #ifndef USE_AS_EXPM1L |
65 | .type csat,@object |
66 | csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 |
67 | .byte 0, 0, 0, 0, 0, 0 |
68 | ASM_SIZE_DIRECTIVE(csat) |
69 | DEFINE_LDBL_MIN |
70 | #endif |
71 | |
72 | #ifdef PIC |
73 | # define MO(op) op##(%rip) |
74 | #else |
75 | # define MO(op) op |
76 | #endif |
77 | |
78 | .text |
79 | ENTRY(IEEE754_EXPL) |
80 | #ifdef USE_AS_EXPM1L |
81 | movzwl 8+8(%rsp), %eax |
82 | xorb $0x80, %ah // invert sign bit (now 1 is "positive") |
83 | cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)? |
84 | jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0) |
85 | #endif |
86 | fldt 8(%rsp) |
87 | /* I added the following ugly construct because expl(+-Inf) resulted |
88 | in NaN. The ugliness results from the bright minds at Intel. |
89 | For the i686 the code can be written better. |
90 | -- drepper@cygnus.com. */ |
91 | fxam /* Is NaN or +-Inf? */ |
92 | #ifdef USE_AS_EXPM1L |
93 | xorb $0x80, %ah |
94 | cmpl $0xc006, %eax |
95 | fstsw %ax |
96 | movb $0x45, %dh |
97 | jb 4f |
98 | |
99 | /* Below -64.0 (may be -NaN or -Inf). */ |
100 | andb %ah, %dh |
101 | cmpb $0x01, %dh |
102 | je 2f /* Is +-NaN, jump. */ |
103 | jmp 1f /* -large, possibly -Inf. */ |
104 | |
105 | 4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ |
106 | /* Test for +-0 as argument. */ |
107 | andb %ah, %dh |
108 | cmpb $0x40, %dh |
109 | je 2f |
110 | |
111 | /* Test for arguments that are small but not subnormal. */ |
112 | movzwl 8+8(%rsp), %eax |
113 | andl $0x7fff, %eax |
114 | cmpl $0x3fbf, %eax |
115 | jge 3f |
116 | /* Argument's exponent below -64; avoid spurious underflow if |
117 | normal. */ |
118 | cmpl $0x0001, %eax |
119 | jge 2f |
120 | /* Force underflow and return the argument, to avoid wrong signs |
121 | of zero results from the code below in some rounding modes. */ |
122 | fld %st |
123 | fmul %st |
124 | fstp %st |
125 | jmp 2f |
126 | #else |
127 | movzwl 8+8(%rsp), %eax |
128 | andl $0x7fff, %eax |
129 | cmpl $0x400d, %eax |
130 | jg 5f |
131 | cmpl $0x3fbc, %eax |
132 | jge 3f |
133 | /* Argument's exponent below -67, result rounds to 1. */ |
134 | fld1 |
135 | faddp |
136 | jmp 2f |
137 | 5: /* Overflow, underflow or infinity or NaN as argument. */ |
138 | fstsw %ax |
139 | movb $0x45, %dh |
140 | andb %ah, %dh |
141 | cmpb $0x05, %dh |
142 | je 1f /* Is +-Inf, jump. */ |
143 | cmpb $0x01, %dh |
144 | je 2f /* Is +-NaN, jump. */ |
145 | /* Overflow or underflow; saturate. */ |
146 | fstp %st |
147 | fldt MO(csat) |
148 | andb $2, %ah |
149 | jz 3f |
150 | fchs |
151 | #endif |
152 | 3: FLDLOG /* 1 log2(base) */ |
153 | fmul %st(1), %st /* 1 x log2(base) */ |
154 | /* Set round-to-nearest temporarily. */ |
155 | fstcw -4(%rsp) |
156 | movl $0xf3ff, %edx |
157 | andl -4(%rsp), %edx |
158 | movl %edx, -8(%rsp) |
159 | fldcw -8(%rsp) |
160 | frndint /* 1 i */ |
161 | fld %st(1) /* 2 x */ |
162 | frndint /* 2 xi */ |
163 | fldcw -4(%rsp) |
164 | fld %st(1) /* 3 i */ |
165 | fldt MO(c0) /* 4 c0 */ |
166 | fld %st(2) /* 5 xi */ |
167 | fmul %st(1), %st /* 5 c0 xi */ |
168 | fsubp %st, %st(2) /* 4 f = c0 xi - i */ |
169 | fld %st(4) /* 5 x */ |
170 | fsub %st(3), %st /* 5 xf = x - xi */ |
171 | fmulp %st, %st(1) /* 4 c0 xf */ |
172 | faddp %st, %st(1) /* 3 f = f + c0 xf */ |
173 | fldt MO(c1) /* 4 */ |
174 | fmul %st(4), %st /* 4 c1 * x */ |
175 | faddp %st, %st(1) /* 3 f = f + c1 * x */ |
176 | f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */ |
177 | #ifdef USE_AS_EXPM1L |
178 | fstp %st(1) /* 2 */ |
179 | fscale /* 2 scale factor is st(1); base^x - 2^i */ |
180 | fxch /* 2 i */ |
181 | fld1 /* 3 1.0 */ |
182 | fscale /* 3 2^i */ |
183 | fld1 /* 4 1.0 */ |
184 | fsubrp %st, %st(1) /* 3 2^i - 1.0 */ |
185 | fstp %st(1) /* 2 */ |
186 | faddp %st, %st(1) /* 1 base^x - 1.0 */ |
187 | #else |
188 | fld1 /* 4 1.0 */ |
189 | faddp /* 3 2^(fract(x * log2(base))) */ |
190 | fstp %st(1) /* 2 */ |
191 | fscale /* 2 scale factor is st(1); base^x */ |
192 | fstp %st(1) /* 1 */ |
193 | LDBL_CHECK_FORCE_UFLOW_NONNEG |
194 | #endif |
195 | fstp %st(1) /* 0 */ |
196 | jmp 2f |
197 | 1: |
198 | #ifdef USE_AS_EXPM1L |
199 | /* For expm1l, only negative sign gets here. */ |
200 | fstp %st |
201 | fld1 |
202 | fchs |
203 | #else |
204 | testl $0x200, %eax /* Test sign. */ |
205 | jz 2f /* If positive, jump. */ |
206 | fstp %st |
207 | fldz /* Set result to 0. */ |
208 | #endif |
209 | 2: ret |
210 | END(IEEE754_EXPL) |
211 | #ifdef USE_AS_EXPM1L |
212 | libm_hidden_def (__expm1l) |
213 | weak_alias (__expm1l, expm1l) |
214 | #else |
215 | strong_alias (IEEE754_EXPL, EXPL_FINITE) |
216 | #endif |
217 | |