1 | /* ix87 specific implementation of pow function. |
2 | Copyright (C) 1996-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <machine/asm.h> |
21 | #include <x86_64-math-asm.h> |
22 | #include <libm-alias-finite.h> |
23 | |
24 | .section .rodata.cst8,"aM" ,@progbits,8 |
25 | |
26 | .p2align 3 |
27 | .type one,@object |
28 | one: .double 1.0 |
29 | ASM_SIZE_DIRECTIVE(one) |
30 | .type p2,@object |
31 | p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 |
32 | ASM_SIZE_DIRECTIVE(p2) |
33 | .type p63,@object |
34 | p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 |
35 | ASM_SIZE_DIRECTIVE(p63) |
36 | .type p64,@object |
37 | p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 |
38 | ASM_SIZE_DIRECTIVE(p64) |
39 | .type p78,@object |
40 | p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 |
41 | ASM_SIZE_DIRECTIVE(p78) |
42 | .type pm79,@object |
43 | pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b |
44 | ASM_SIZE_DIRECTIVE(pm79) |
45 | |
46 | .section .rodata.cst16,"aM" ,@progbits,16 |
47 | |
48 | .p2align 3 |
49 | .type infinity,@object |
50 | inf_zero: |
51 | infinity: |
52 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f |
53 | ASM_SIZE_DIRECTIVE(infinity) |
54 | .type zero,@object |
55 | zero: .double 0.0 |
56 | ASM_SIZE_DIRECTIVE(zero) |
57 | .type minf_mzero,@object |
58 | minf_mzero: |
59 | minfinity: |
60 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff |
61 | mzero: |
62 | .byte 0, 0, 0, 0, 0, 0, 0, 0x80 |
63 | ASM_SIZE_DIRECTIVE(minf_mzero) |
64 | DEFINE_LDBL_MIN |
65 | |
66 | #ifdef PIC |
67 | # define MO(op) op##(%rip) |
68 | #else |
69 | # define MO(op) op |
70 | #endif |
71 | |
72 | .text |
73 | ENTRY(__ieee754_powl) |
74 | fldt 24(%rsp) // y |
75 | fxam |
76 | |
77 | |
78 | fnstsw |
79 | movb %ah, %dl |
80 | andb $0x45, %ah |
81 | cmpb $0x40, %ah // is y == 0 ? |
82 | je 11f |
83 | |
84 | cmpb $0x05, %ah // is y == ±inf ? |
85 | je 12f |
86 | |
87 | cmpb $0x01, %ah // is y == NaN ? |
88 | je 30f |
89 | |
90 | fldt 8(%rsp) // x : y |
91 | |
92 | fxam |
93 | fnstsw |
94 | movb %ah, %dh |
95 | andb $0x45, %ah |
96 | cmpb $0x40, %ah |
97 | je 20f // x is ±0 |
98 | |
99 | cmpb $0x05, %ah |
100 | je 15f // x is ±inf |
101 | |
102 | cmpb $0x01, %ah |
103 | je 31f // x is NaN |
104 | |
105 | fxch // y : x |
106 | |
107 | /* fistpll raises invalid exception for |y| >= 1L<<63. */ |
108 | fldl MO(p63) // 1L<<63 : y : x |
109 | fld %st(1) // y : 1L<<63 : y : x |
110 | fabs // |y| : 1L<<63 : y : x |
111 | fcomip %st(1), %st // 1L<<63 : y : x |
112 | fstp %st(0) // y : x |
113 | jnc 2f |
114 | |
115 | /* First see whether `y' is a natural number. In this case we |
116 | can use a more precise algorithm. */ |
117 | fld %st // y : y : x |
118 | fistpll -8(%rsp) // y : x |
119 | fildll -8(%rsp) // int(y) : y : x |
120 | fucomip %st(1),%st // y : x |
121 | je 9f |
122 | |
123 | // If y has absolute value at most 0x1p-79, then any finite |
124 | // nonzero x will result in 1. Saturate y to those bounds to |
125 | // avoid underflow in the calculation of y*log2(x). |
126 | fldl MO(pm79) // 0x1p-79 : y : x |
127 | fld %st(1) // y : 0x1p-79 : y : x |
128 | fabs // |y| : 0x1p-79 : y : x |
129 | fcomip %st(1), %st // 0x1p-79 : y : x |
130 | fstp %st(0) // y : x |
131 | jnc 3f |
132 | fstp %st(0) // pop y |
133 | fldl MO(pm79) // 0x1p-79 : x |
134 | testb $2, %dl |
135 | jnz 3f // y > 0 |
136 | fchs // -0x1p-79 : x |
137 | jmp 3f |
138 | |
139 | 9: /* OK, we have an integer value for y. Unless very small |
140 | (we use < 4), use the algorithm for real exponent to avoid |
141 | accumulation of errors. */ |
142 | fldl MO(p2) // 4 : y : x |
143 | fld %st(1) // y : 4 : y : x |
144 | fabs // |y| : 4 : y : x |
145 | fcomip %st(1), %st // 4 : y : x |
146 | fstp %st(0) // y : x |
147 | jnc 3f |
148 | mov -8(%rsp),%eax |
149 | mov -4(%rsp),%edx |
150 | orl $0, %edx |
151 | fstp %st(0) // x |
152 | jns 4f // y >= 0, jump |
153 | fdivrl MO(one) // 1/x (now referred to as x) |
154 | negl %eax |
155 | adcl $0, %edx |
156 | negl %edx |
157 | 4: fldl MO(one) // 1 : x |
158 | fxch |
159 | |
160 | /* If y is even, take the absolute value of x. Otherwise, |
161 | ensure all intermediate values that might overflow have the |
162 | sign of x. */ |
163 | testb $1, %al |
164 | jnz 6f |
165 | fabs |
166 | |
167 | 6: shrdl $1, %edx, %eax |
168 | jnc 5f |
169 | fxch |
170 | fabs |
171 | fmul %st(1) // x : ST*x |
172 | fxch |
173 | 5: fld %st // x : x : ST*x |
174 | fabs // |x| : x : ST*x |
175 | fmulp // |x|*x : ST*x |
176 | shrl $1, %edx |
177 | movl %eax, %ecx |
178 | orl %edx, %ecx |
179 | jnz 6b |
180 | fstp %st(0) // ST*x |
181 | LDBL_CHECK_FORCE_UFLOW_NONNAN |
182 | ret |
183 | |
184 | /* y is ±NAN */ |
185 | 30: fldt 8(%rsp) // x : y |
186 | fldl MO(one) // 1.0 : x : y |
187 | fucomip %st(1),%st // x : y |
188 | je 32f |
189 | 31: /* At least one argument NaN, and result should be NaN. */ |
190 | faddp |
191 | ret |
192 | 32: jc 31b |
193 | /* pow (1, NaN); check if the NaN signaling. */ |
194 | testb $0x40, 31(%rsp) |
195 | jz 31b |
196 | fstp %st(1) |
197 | ret |
198 | |
199 | .align ALIGNARG(4) |
200 | 2: // y is a large integer (absolute value at least 1L<<63). |
201 | // If y has absolute value at least 1L<<78, then any finite |
202 | // nonzero x will result in 0 (underflow), 1 or infinity (overflow). |
203 | // Saturate y to those bounds to avoid overflow in the calculation |
204 | // of y*log2(x). |
205 | fldl MO(p78) // 1L<<78 : y : x |
206 | fld %st(1) // y : 1L<<78 : y : x |
207 | fabs // |y| : 1L<<78 : y : x |
208 | fcomip %st(1), %st // 1L<<78 : y : x |
209 | fstp %st(0) // y : x |
210 | jc 3f |
211 | fstp %st(0) // pop y |
212 | fldl MO(p78) // 1L<<78 : x |
213 | testb $2, %dl |
214 | jz 3f // y > 0 |
215 | fchs // -(1L<<78) : x |
216 | .align ALIGNARG(4) |
217 | 3: /* y is a real number. */ |
218 | subq $40, %rsp |
219 | cfi_adjust_cfa_offset (40) |
220 | fstpt 16(%rsp) // x |
221 | fstpt (%rsp) // <empty> |
222 | call HIDDEN_JUMPTARGET (__powl_helper) // <result> |
223 | addq $40, %rsp |
224 | cfi_adjust_cfa_offset (-40) |
225 | ret |
226 | |
227 | // pow(x,±0) = 1, unless x is sNaN |
228 | .align ALIGNARG(4) |
229 | 11: fstp %st(0) // pop y |
230 | fldt 8(%rsp) // x |
231 | fxam |
232 | fnstsw |
233 | andb $0x45, %ah |
234 | cmpb $0x01, %ah |
235 | je 112f // x is NaN |
236 | 111: fstp %st(0) |
237 | fldl MO(one) |
238 | ret |
239 | |
240 | 112: testb $0x40, 15(%rsp) |
241 | jnz 111b |
242 | fadd %st(0) |
243 | ret |
244 | |
245 | // y == ±inf |
246 | .align ALIGNARG(4) |
247 | 12: fstp %st(0) // pop y |
248 | fldl MO(one) // 1 |
249 | fldt 8(%rsp) // x : 1 |
250 | fabs // abs(x) : 1 |
251 | fucompp // < 1, == 1, or > 1 |
252 | fnstsw |
253 | andb $0x45, %ah |
254 | cmpb $0x45, %ah |
255 | je 13f // jump if x is NaN |
256 | |
257 | cmpb $0x40, %ah |
258 | je 14f // jump if |x| == 1 |
259 | |
260 | shlb $1, %ah |
261 | xorb %ah, %dl |
262 | andl $2, %edx |
263 | #ifdef PIC |
264 | lea inf_zero(%rip),%rcx |
265 | fldl (%rcx, %rdx, 4) |
266 | #else |
267 | fldl inf_zero(,%rdx, 4) |
268 | #endif |
269 | ret |
270 | |
271 | .align ALIGNARG(4) |
272 | 14: fldl MO(one) |
273 | ret |
274 | |
275 | .align ALIGNARG(4) |
276 | 13: fldt 8(%rsp) // load x == NaN |
277 | fadd %st(0) |
278 | ret |
279 | |
280 | .align ALIGNARG(4) |
281 | // x is ±inf |
282 | 15: fstp %st(0) // y |
283 | testb $2, %dh |
284 | jz 16f // jump if x == +inf |
285 | |
286 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
287 | // may be odd unless we know |y| >= 1L<<64. |
288 | fldl MO(p64) // 1L<<64 : y |
289 | fld %st(1) // y : 1L<<64 : y |
290 | fabs // |y| : 1L<<64 : y |
291 | fcomip %st(1), %st // 1L<<64 : y |
292 | fstp %st(0) // y |
293 | jnc 16f |
294 | fldl MO(p63) // p63 : y |
295 | fxch // y : p63 |
296 | fprem // y%p63 : p63 |
297 | fstp %st(1) // y%p63 |
298 | |
299 | // We must find out whether y is an odd integer. |
300 | fld %st // y : y |
301 | fistpll -8(%rsp) // y |
302 | fildll -8(%rsp) // int(y) : y |
303 | fucomip %st(1),%st |
304 | ffreep %st // <empty> |
305 | jne 17f |
306 | |
307 | // OK, the value is an integer, but is it odd? |
308 | mov -8(%rsp), %eax |
309 | mov -4(%rsp), %edx |
310 | andb $1, %al |
311 | jz 18f // jump if not odd |
312 | // It's an odd integer. |
313 | shrl $31, %edx |
314 | #ifdef PIC |
315 | lea minf_mzero(%rip),%rcx |
316 | fldl (%rcx, %rdx, 8) |
317 | #else |
318 | fldl minf_mzero(,%rdx, 8) |
319 | #endif |
320 | ret |
321 | |
322 | .align ALIGNARG(4) |
323 | 16: fcompl MO(zero) |
324 | fnstsw |
325 | shrl $5, %eax |
326 | andl $8, %eax |
327 | #ifdef PIC |
328 | lea inf_zero(%rip),%rcx |
329 | fldl (%rcx, %rax, 1) |
330 | #else |
331 | fldl inf_zero(,%rax, 1) |
332 | #endif |
333 | ret |
334 | |
335 | .align ALIGNARG(4) |
336 | 17: shll $30, %edx // sign bit for y in right position |
337 | 18: shrl $31, %edx |
338 | #ifdef PIC |
339 | lea inf_zero(%rip),%rcx |
340 | fldl (%rcx, %rdx, 8) |
341 | #else |
342 | fldl inf_zero(,%rdx, 8) |
343 | #endif |
344 | ret |
345 | |
346 | .align ALIGNARG(4) |
347 | // x is ±0 |
348 | 20: fstp %st(0) // y |
349 | testb $2, %dl |
350 | jz 21f // y > 0 |
351 | |
352 | // x is ±0 and y is < 0. We must find out whether y is an odd integer. |
353 | testb $2, %dh |
354 | jz 25f |
355 | |
356 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
357 | // may be odd unless we know |y| >= 1L<<64. |
358 | fldl MO(p64) // 1L<<64 : y |
359 | fld %st(1) // y : 1L<<64 : y |
360 | fabs // |y| : 1L<<64 : y |
361 | fcomip %st(1), %st // 1L<<64 : y |
362 | fstp %st(0) // y |
363 | jnc 25f |
364 | fldl MO(p63) // p63 : y |
365 | fxch // y : p63 |
366 | fprem // y%p63 : p63 |
367 | fstp %st(1) // y%p63 |
368 | |
369 | fld %st // y : y |
370 | fistpll -8(%rsp) // y |
371 | fildll -8(%rsp) // int(y) : y |
372 | fucomip %st(1),%st |
373 | ffreep %st // <empty> |
374 | jne 26f |
375 | |
376 | // OK, the value is an integer, but is it odd? |
377 | mov -8(%rsp),%eax |
378 | mov -4(%rsp),%edx |
379 | andb $1, %al |
380 | jz 27f // jump if not odd |
381 | // It's an odd integer. |
382 | // Raise divide-by-zero exception and get minus infinity value. |
383 | fldl MO(one) |
384 | fdivl MO(zero) |
385 | fchs |
386 | ret |
387 | |
388 | 25: fstp %st(0) |
389 | 26: |
390 | 27: // Raise divide-by-zero exception and get infinity value. |
391 | fldl MO(one) |
392 | fdivl MO(zero) |
393 | ret |
394 | |
395 | .align ALIGNARG(4) |
396 | // x is ±0 and y is > 0. We must find out whether y is an odd integer. |
397 | 21: testb $2, %dh |
398 | jz 22f |
399 | |
400 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
401 | // may be odd unless we know |y| >= 1L<<64. |
402 | fldl MO(p64) // 1L<<64 : y |
403 | fxch // y : 1L<<64 |
404 | fcomi %st(1), %st // y : 1L<<64 |
405 | fstp %st(1) // y |
406 | jnc 22f |
407 | fldl MO(p63) // p63 : y |
408 | fxch // y : p63 |
409 | fprem // y%p63 : p63 |
410 | fstp %st(1) // y%p63 |
411 | |
412 | fld %st // y : y |
413 | fistpll -8(%rsp) // y |
414 | fildll -8(%rsp) // int(y) : y |
415 | fucomip %st(1),%st |
416 | ffreep %st // <empty> |
417 | jne 23f |
418 | |
419 | // OK, the value is an integer, but is it odd? |
420 | mov -8(%rsp),%eax |
421 | mov -4(%rsp),%edx |
422 | andb $1, %al |
423 | jz 24f // jump if not odd |
424 | // It's an odd integer. |
425 | fldl MO(mzero) |
426 | ret |
427 | |
428 | 22: fstp %st(0) |
429 | 23: |
430 | 24: fldl MO(zero) |
431 | ret |
432 | |
433 | END(__ieee754_powl) |
434 | libm_alias_finite (__ieee754_powl, __powl) |
435 | |