| 1 | /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
| 2 | sum in a third limb vector. |
| 3 | Copyright (C) 2006-2021 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU MP Library. |
| 5 | |
| 6 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU Lesser General Public License as published by |
| 8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
| 9 | option) any later version. |
| 10 | |
| 11 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| 14 | License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public License |
| 17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
| 18 | see <https://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #include "sysdep.h" |
| 21 | #include "asm-syntax.h" |
| 22 | |
| 23 | #define rp %rdi |
| 24 | #define up %rsi |
| 25 | #define vp %rdx |
| 26 | #define n %rcx |
| 27 | #define cy %r8 |
| 28 | |
| 29 | #ifndef func |
| 30 | # define func __mpn_add_n |
| 31 | # define ADCSBB adc |
| 32 | #endif |
| 33 | |
| 34 | .text |
| 35 | ENTRY (func) |
| 36 | xor %r8, %r8 |
| 37 | mov (up), %r10 |
| 38 | mov (vp), %r11 |
| 39 | |
| 40 | lea -8(up,n,8), up |
| 41 | lea -8(vp,n,8), vp |
| 42 | lea -16(rp,n,8), rp |
| 43 | mov %ecx, %eax |
| 44 | neg n |
| 45 | and $3, %eax |
| 46 | je L(b00) |
| 47 | add %rax, n /* clear low rcx bits for jrcxz */ |
| 48 | cmp $2, %eax |
| 49 | jl L(b01) |
| 50 | je L(b10) |
| 51 | |
| 52 | L(b11): shr %r8 /* set cy */ |
| 53 | jmp L(e11) |
| 54 | |
| 55 | L(b00): shr %r8 /* set cy */ |
| 56 | mov %r10, %r8 |
| 57 | mov %r11, %r9 |
| 58 | lea 4(n), n |
| 59 | jmp L(e00) |
| 60 | |
| 61 | L(b01): shr %r8 /* set cy */ |
| 62 | jmp L(e01) |
| 63 | |
| 64 | L(b10): shr %r8 /* set cy */ |
| 65 | mov %r10, %r8 |
| 66 | mov %r11, %r9 |
| 67 | jmp L(e10) |
| 68 | |
| 69 | L(end): ADCSBB %r11, %r10 |
| 70 | mov %r10, 8(rp) |
| 71 | mov %ecx, %eax /* clear eax, ecx contains 0 */ |
| 72 | adc %eax, %eax |
| 73 | ret |
| 74 | |
| 75 | .p2align 4 |
| 76 | L(top): |
| 77 | mov -24(up,n,8), %r8 |
| 78 | mov -24(vp,n,8), %r9 |
| 79 | ADCSBB %r11, %r10 |
| 80 | mov %r10, -24(rp,n,8) |
| 81 | L(e00): |
| 82 | mov -16(up,n,8), %r10 |
| 83 | mov -16(vp,n,8), %r11 |
| 84 | ADCSBB %r9, %r8 |
| 85 | mov %r8, -16(rp,n,8) |
| 86 | L(e11): |
| 87 | mov -8(up,n,8), %r8 |
| 88 | mov -8(vp,n,8), %r9 |
| 89 | ADCSBB %r11, %r10 |
| 90 | mov %r10, -8(rp,n,8) |
| 91 | L(e10): |
| 92 | mov (up,n,8), %r10 |
| 93 | mov (vp,n,8), %r11 |
| 94 | ADCSBB %r9, %r8 |
| 95 | mov %r8, (rp,n,8) |
| 96 | L(e01): |
| 97 | jrcxz L(end) |
| 98 | lea 4(n), n |
| 99 | jmp L(top) |
| 100 | END (func) |
| 101 | |