1 | /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
2 | sum in a third limb vector. |
3 | Copyright (C) 2006-2021 Free Software Foundation, Inc. |
4 | This file is part of the GNU MP Library. |
5 | |
6 | The GNU MP Library is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU Lesser General Public License as published by |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
9 | option) any later version. |
10 | |
11 | The GNU MP Library is distributed in the hope that it will be useful, but |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
14 | License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public License |
17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
18 | see <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "sysdep.h" |
21 | #include "asm-syntax.h" |
22 | |
23 | #define rp %rdi |
24 | #define up %rsi |
25 | #define vp %rdx |
26 | #define n %rcx |
27 | #define cy %r8 |
28 | |
29 | #ifndef func |
30 | # define func __mpn_add_n |
31 | # define ADCSBB adc |
32 | #endif |
33 | |
34 | .text |
35 | ENTRY (func) |
36 | xor %r8, %r8 |
37 | mov (up), %r10 |
38 | mov (vp), %r11 |
39 | |
40 | lea -8(up,n,8), up |
41 | lea -8(vp,n,8), vp |
42 | lea -16(rp,n,8), rp |
43 | mov %ecx, %eax |
44 | neg n |
45 | and $3, %eax |
46 | je L(b00) |
47 | add %rax, n /* clear low rcx bits for jrcxz */ |
48 | cmp $2, %eax |
49 | jl L(b01) |
50 | je L(b10) |
51 | |
52 | L(b11): shr %r8 /* set cy */ |
53 | jmp L(e11) |
54 | |
55 | L(b00): shr %r8 /* set cy */ |
56 | mov %r10, %r8 |
57 | mov %r11, %r9 |
58 | lea 4(n), n |
59 | jmp L(e00) |
60 | |
61 | L(b01): shr %r8 /* set cy */ |
62 | jmp L(e01) |
63 | |
64 | L(b10): shr %r8 /* set cy */ |
65 | mov %r10, %r8 |
66 | mov %r11, %r9 |
67 | jmp L(e10) |
68 | |
69 | L(end): ADCSBB %r11, %r10 |
70 | mov %r10, 8(rp) |
71 | mov %ecx, %eax /* clear eax, ecx contains 0 */ |
72 | adc %eax, %eax |
73 | ret |
74 | |
75 | .p2align 4 |
76 | L(top): |
77 | mov -24(up,n,8), %r8 |
78 | mov -24(vp,n,8), %r9 |
79 | ADCSBB %r11, %r10 |
80 | mov %r10, -24(rp,n,8) |
81 | L(e00): |
82 | mov -16(up,n,8), %r10 |
83 | mov -16(vp,n,8), %r11 |
84 | ADCSBB %r9, %r8 |
85 | mov %r8, -16(rp,n,8) |
86 | L(e11): |
87 | mov -8(up,n,8), %r8 |
88 | mov -8(vp,n,8), %r9 |
89 | ADCSBB %r11, %r10 |
90 | mov %r10, -8(rp,n,8) |
91 | L(e10): |
92 | mov (up,n,8), %r10 |
93 | mov (vp,n,8), %r11 |
94 | ADCSBB %r9, %r8 |
95 | mov %r8, (rp,n,8) |
96 | L(e01): |
97 | jrcxz L(end) |
98 | lea 4(n), n |
99 | jmp L(top) |
100 | END (func) |
101 | |