1 | /* ix87 specific implementation of pow function. |
2 | Copyright (C) 1996-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <machine/asm.h> |
20 | #include <x86_64-math-asm.h> |
21 | #include <libm-alias-finite.h> |
22 | |
23 | .section .rodata.cst8,"aM" ,@progbits,8 |
24 | |
25 | .p2align 3 |
26 | .type one,@object |
27 | one: .double 1.0 |
28 | ASM_SIZE_DIRECTIVE(one) |
29 | .type p2,@object |
30 | p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 |
31 | ASM_SIZE_DIRECTIVE(p2) |
32 | .type p63,@object |
33 | p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 |
34 | ASM_SIZE_DIRECTIVE(p63) |
35 | .type p64,@object |
36 | p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 |
37 | ASM_SIZE_DIRECTIVE(p64) |
38 | .type p78,@object |
39 | p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 |
40 | ASM_SIZE_DIRECTIVE(p78) |
41 | .type pm79,@object |
42 | pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b |
43 | ASM_SIZE_DIRECTIVE(pm79) |
44 | |
45 | .section .rodata.cst16,"aM" ,@progbits,16 |
46 | |
47 | .p2align 3 |
48 | .type infinity,@object |
49 | inf_zero: |
50 | infinity: |
51 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f |
52 | ASM_SIZE_DIRECTIVE(infinity) |
53 | .type zero,@object |
54 | zero: .double 0.0 |
55 | ASM_SIZE_DIRECTIVE(zero) |
56 | .type minf_mzero,@object |
57 | minf_mzero: |
58 | minfinity: |
59 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff |
60 | mzero: |
61 | .byte 0, 0, 0, 0, 0, 0, 0, 0x80 |
62 | ASM_SIZE_DIRECTIVE(minf_mzero) |
63 | DEFINE_LDBL_MIN |
64 | |
65 | #ifdef PIC |
66 | # define MO(op) op##(%rip) |
67 | #else |
68 | # define MO(op) op |
69 | #endif |
70 | |
71 | .text |
72 | ENTRY(__ieee754_powl) |
73 | fldt 24(%rsp) // y |
74 | fxam |
75 | |
76 | |
77 | fnstsw |
78 | movb %ah, %dl |
79 | andb $0x45, %ah |
80 | cmpb $0x40, %ah // is y == 0 ? |
81 | je 11f |
82 | |
83 | cmpb $0x05, %ah // is y == ±inf ? |
84 | je 12f |
85 | |
86 | cmpb $0x01, %ah // is y == NaN ? |
87 | je 30f |
88 | |
89 | fldt 8(%rsp) // x : y |
90 | |
91 | fxam |
92 | fnstsw |
93 | movb %ah, %dh |
94 | andb $0x45, %ah |
95 | cmpb $0x40, %ah |
96 | je 20f // x is ±0 |
97 | |
98 | cmpb $0x05, %ah |
99 | je 15f // x is ±inf |
100 | |
101 | cmpb $0x01, %ah |
102 | je 31f // x is NaN |
103 | |
104 | fxch // y : x |
105 | |
106 | /* fistpll raises invalid exception for |y| >= 1L<<63. */ |
107 | fldl MO(p63) // 1L<<63 : y : x |
108 | fld %st(1) // y : 1L<<63 : y : x |
109 | fabs // |y| : 1L<<63 : y : x |
110 | fcomip %st(1), %st // 1L<<63 : y : x |
111 | fstp %st(0) // y : x |
112 | jnc 2f |
113 | |
114 | /* First see whether `y' is a natural number. In this case we |
115 | can use a more precise algorithm. */ |
116 | fld %st // y : y : x |
117 | fistpll -8(%rsp) // y : x |
118 | fildll -8(%rsp) // int(y) : y : x |
119 | fucomip %st(1),%st // y : x |
120 | je 9f |
121 | |
122 | // If y has absolute value at most 0x1p-79, then any finite |
123 | // nonzero x will result in 1. Saturate y to those bounds to |
124 | // avoid underflow in the calculation of y*log2(x). |
125 | fldl MO(pm79) // 0x1p-79 : y : x |
126 | fld %st(1) // y : 0x1p-79 : y : x |
127 | fabs // |y| : 0x1p-79 : y : x |
128 | fcomip %st(1), %st // 0x1p-79 : y : x |
129 | fstp %st(0) // y : x |
130 | jnc 3f |
131 | fstp %st(0) // pop y |
132 | fldl MO(pm79) // 0x1p-79 : x |
133 | testb $2, %dl |
134 | jnz 3f // y > 0 |
135 | fchs // -0x1p-79 : x |
136 | jmp 3f |
137 | |
138 | 9: /* OK, we have an integer value for y. Unless very small |
139 | (we use < 4), use the algorithm for real exponent to avoid |
140 | accumulation of errors. */ |
141 | fldl MO(p2) // 4 : y : x |
142 | fld %st(1) // y : 4 : y : x |
143 | fabs // |y| : 4 : y : x |
144 | fcomip %st(1), %st // 4 : y : x |
145 | fstp %st(0) // y : x |
146 | jnc 3f |
147 | mov -8(%rsp),%eax |
148 | mov -4(%rsp),%edx |
149 | orl $0, %edx |
150 | fstp %st(0) // x |
151 | jns 4f // y >= 0, jump |
152 | fdivrl MO(one) // 1/x (now referred to as x) |
153 | negl %eax |
154 | adcl $0, %edx |
155 | negl %edx |
156 | 4: fldl MO(one) // 1 : x |
157 | fxch |
158 | |
159 | /* If y is even, take the absolute value of x. Otherwise, |
160 | ensure all intermediate values that might overflow have the |
161 | sign of x. */ |
162 | testb $1, %al |
163 | jnz 6f |
164 | fabs |
165 | |
166 | 6: shrdl $1, %edx, %eax |
167 | jnc 5f |
168 | fxch |
169 | fabs |
170 | fmul %st(1) // x : ST*x |
171 | fxch |
172 | 5: fld %st // x : x : ST*x |
173 | fabs // |x| : x : ST*x |
174 | fmulp // |x|*x : ST*x |
175 | shrl $1, %edx |
176 | movl %eax, %ecx |
177 | orl %edx, %ecx |
178 | jnz 6b |
179 | fstp %st(0) // ST*x |
180 | LDBL_CHECK_FORCE_UFLOW_NONNAN |
181 | ret |
182 | |
183 | /* y is ±NAN */ |
184 | 30: fldt 8(%rsp) // x : y |
185 | fldl MO(one) // 1.0 : x : y |
186 | fucomip %st(1),%st // x : y |
187 | je 32f |
188 | 31: /* At least one argument NaN, and result should be NaN. */ |
189 | faddp |
190 | ret |
191 | 32: jc 31b |
192 | /* pow (1, NaN); check if the NaN signaling. */ |
193 | testb $0x40, 31(%rsp) |
194 | jz 31b |
195 | fstp %st(1) |
196 | ret |
197 | |
198 | .align ALIGNARG(4) |
199 | 2: // y is a large integer (absolute value at least 1L<<63). |
200 | // If y has absolute value at least 1L<<78, then any finite |
201 | // nonzero x will result in 0 (underflow), 1 or infinity (overflow). |
202 | // Saturate y to those bounds to avoid overflow in the calculation |
203 | // of y*log2(x). |
204 | fldl MO(p78) // 1L<<78 : y : x |
205 | fld %st(1) // y : 1L<<78 : y : x |
206 | fabs // |y| : 1L<<78 : y : x |
207 | fcomip %st(1), %st // 1L<<78 : y : x |
208 | fstp %st(0) // y : x |
209 | jc 3f |
210 | fstp %st(0) // pop y |
211 | fldl MO(p78) // 1L<<78 : x |
212 | testb $2, %dl |
213 | jz 3f // y > 0 |
214 | fchs // -(1L<<78) : x |
215 | .align ALIGNARG(4) |
216 | 3: /* y is a real number. */ |
217 | subq $40, %rsp |
218 | cfi_adjust_cfa_offset (40) |
219 | fstpt 16(%rsp) // x |
220 | fstpt (%rsp) // <empty> |
221 | call HIDDEN_JUMPTARGET (__powl_helper) // <result> |
222 | addq $40, %rsp |
223 | cfi_adjust_cfa_offset (-40) |
224 | ret |
225 | |
226 | // pow(x,±0) = 1, unless x is sNaN |
227 | .align ALIGNARG(4) |
228 | 11: fstp %st(0) // pop y |
229 | fldt 8(%rsp) // x |
230 | fxam |
231 | fnstsw |
232 | andb $0x45, %ah |
233 | cmpb $0x01, %ah |
234 | je 112f // x is NaN |
235 | 111: fstp %st(0) |
236 | fldl MO(one) |
237 | ret |
238 | |
239 | 112: testb $0x40, 15(%rsp) |
240 | jnz 111b |
241 | fadd %st(0) |
242 | ret |
243 | |
244 | // y == ±inf |
245 | .align ALIGNARG(4) |
246 | 12: fstp %st(0) // pop y |
247 | fldl MO(one) // 1 |
248 | fldt 8(%rsp) // x : 1 |
249 | fabs // abs(x) : 1 |
250 | fucompp // < 1, == 1, or > 1 |
251 | fnstsw |
252 | andb $0x45, %ah |
253 | cmpb $0x45, %ah |
254 | je 13f // jump if x is NaN |
255 | |
256 | cmpb $0x40, %ah |
257 | je 14f // jump if |x| == 1 |
258 | |
259 | shlb $1, %ah |
260 | xorb %ah, %dl |
261 | andl $2, %edx |
262 | #ifdef PIC |
263 | lea inf_zero(%rip),%rcx |
264 | fldl (%rcx, %rdx, 4) |
265 | #else |
266 | fldl inf_zero(,%rdx, 4) |
267 | #endif |
268 | ret |
269 | |
270 | .align ALIGNARG(4) |
271 | 14: fldl MO(one) |
272 | ret |
273 | |
274 | .align ALIGNARG(4) |
275 | 13: fldt 8(%rsp) // load x == NaN |
276 | fadd %st(0) |
277 | ret |
278 | |
279 | .align ALIGNARG(4) |
280 | // x is ±inf |
281 | 15: fstp %st(0) // y |
282 | testb $2, %dh |
283 | jz 16f // jump if x == +inf |
284 | |
285 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
286 | // may be odd unless we know |y| >= 1L<<64. |
287 | fldl MO(p64) // 1L<<64 : y |
288 | fld %st(1) // y : 1L<<64 : y |
289 | fabs // |y| : 1L<<64 : y |
290 | fcomip %st(1), %st // 1L<<64 : y |
291 | fstp %st(0) // y |
292 | jnc 16f |
293 | fldl MO(p63) // p63 : y |
294 | fxch // y : p63 |
295 | fprem // y%p63 : p63 |
296 | fstp %st(1) // y%p63 |
297 | |
298 | // We must find out whether y is an odd integer. |
299 | fld %st // y : y |
300 | fistpll -8(%rsp) // y |
301 | fildll -8(%rsp) // int(y) : y |
302 | fucomip %st(1),%st |
303 | ffreep %st // <empty> |
304 | jne 17f |
305 | |
306 | // OK, the value is an integer, but is it odd? |
307 | mov -8(%rsp), %eax |
308 | mov -4(%rsp), %edx |
309 | andb $1, %al |
310 | jz 18f // jump if not odd |
311 | // It's an odd integer. |
312 | shrl $31, %edx |
313 | #ifdef PIC |
314 | lea minf_mzero(%rip),%rcx |
315 | fldl (%rcx, %rdx, 8) |
316 | #else |
317 | fldl minf_mzero(,%rdx, 8) |
318 | #endif |
319 | ret |
320 | |
321 | .align ALIGNARG(4) |
322 | 16: fcompl MO(zero) |
323 | fnstsw |
324 | shrl $5, %eax |
325 | andl $8, %eax |
326 | #ifdef PIC |
327 | lea inf_zero(%rip),%rcx |
328 | fldl (%rcx, %rax, 1) |
329 | #else |
330 | fldl inf_zero(,%rax, 1) |
331 | #endif |
332 | ret |
333 | |
334 | .align ALIGNARG(4) |
335 | 17: shll $30, %edx // sign bit for y in right position |
336 | 18: shrl $31, %edx |
337 | #ifdef PIC |
338 | lea inf_zero(%rip),%rcx |
339 | fldl (%rcx, %rdx, 8) |
340 | #else |
341 | fldl inf_zero(,%rdx, 8) |
342 | #endif |
343 | ret |
344 | |
345 | .align ALIGNARG(4) |
346 | // x is ±0 |
347 | 20: fstp %st(0) // y |
348 | testb $2, %dl |
349 | jz 21f // y > 0 |
350 | |
351 | // x is ±0 and y is < 0. We must find out whether y is an odd integer. |
352 | testb $2, %dh |
353 | jz 25f |
354 | |
355 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
356 | // may be odd unless we know |y| >= 1L<<64. |
357 | fldl MO(p64) // 1L<<64 : y |
358 | fld %st(1) // y : 1L<<64 : y |
359 | fabs // |y| : 1L<<64 : y |
360 | fcomip %st(1), %st // 1L<<64 : y |
361 | fstp %st(0) // y |
362 | jnc 25f |
363 | fldl MO(p63) // p63 : y |
364 | fxch // y : p63 |
365 | fprem // y%p63 : p63 |
366 | fstp %st(1) // y%p63 |
367 | |
368 | fld %st // y : y |
369 | fistpll -8(%rsp) // y |
370 | fildll -8(%rsp) // int(y) : y |
371 | fucomip %st(1),%st |
372 | ffreep %st // <empty> |
373 | jne 26f |
374 | |
375 | // OK, the value is an integer, but is it odd? |
376 | mov -8(%rsp),%eax |
377 | mov -4(%rsp),%edx |
378 | andb $1, %al |
379 | jz 27f // jump if not odd |
380 | // It's an odd integer. |
381 | // Raise divide-by-zero exception and get minus infinity value. |
382 | fldl MO(one) |
383 | fdivl MO(zero) |
384 | fchs |
385 | ret |
386 | |
387 | 25: fstp %st(0) |
388 | 26: |
389 | 27: // Raise divide-by-zero exception and get infinity value. |
390 | fldl MO(one) |
391 | fdivl MO(zero) |
392 | ret |
393 | |
394 | .align ALIGNARG(4) |
395 | // x is ±0 and y is > 0. We must find out whether y is an odd integer. |
396 | 21: testb $2, %dh |
397 | jz 22f |
398 | |
399 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
400 | // may be odd unless we know |y| >= 1L<<64. |
401 | fldl MO(p64) // 1L<<64 : y |
402 | fxch // y : 1L<<64 |
403 | fcomi %st(1), %st // y : 1L<<64 |
404 | fstp %st(1) // y |
405 | jnc 22f |
406 | fldl MO(p63) // p63 : y |
407 | fxch // y : p63 |
408 | fprem // y%p63 : p63 |
409 | fstp %st(1) // y%p63 |
410 | |
411 | fld %st // y : y |
412 | fistpll -8(%rsp) // y |
413 | fildll -8(%rsp) // int(y) : y |
414 | fucomip %st(1),%st |
415 | ffreep %st // <empty> |
416 | jne 23f |
417 | |
418 | // OK, the value is an integer, but is it odd? |
419 | mov -8(%rsp),%eax |
420 | mov -4(%rsp),%edx |
421 | andb $1, %al |
422 | jz 24f // jump if not odd |
423 | // It's an odd integer. |
424 | fldl MO(mzero) |
425 | ret |
426 | |
427 | 22: fstp %st(0) |
428 | 23: |
429 | 24: fldl MO(zero) |
430 | ret |
431 | |
432 | END(__ieee754_powl) |
433 | libm_alias_finite (__ieee754_powl, __powl) |
434 | |