1 | /* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add |
2 | the result to a second limb vector. |
3 | Copyright (C) 2003-2021 Free Software Foundation, Inc. |
4 | This file is part of the GNU MP Library. |
5 | |
6 | The GNU MP Library is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU Lesser General Public License as published by |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
9 | option) any later version. |
10 | |
11 | The GNU MP Library is distributed in the hope that it will be useful, but |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
14 | License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public License |
17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
18 | see <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "sysdep.h" |
21 | #include "asm-syntax.h" |
22 | |
23 | #define rp %rdi |
24 | #define up %rsi |
25 | #define n %rdx |
26 | #define v0 %rcx |
27 | |
28 | #ifndef func |
29 | # define func __mpn_addmul_1 |
30 | # define ADDSUB add |
31 | #endif |
32 | |
33 | .text |
34 | ENTRY (func) |
35 | push %rbx |
36 | push %rbp |
37 | lea (%rdx), %rbx |
38 | neg %rbx |
39 | |
40 | mov (up), %rax |
41 | mov (rp), %r10 |
42 | |
43 | lea -16(rp,%rdx,8), rp |
44 | lea (up,%rdx,8), up |
45 | mul %rcx |
46 | |
47 | bt $0, %ebx |
48 | jc L(odd) |
49 | |
50 | lea (%rax), %r11 |
51 | mov 8(up,%rbx,8), %rax |
52 | lea (%rdx), %rbp |
53 | mul %rcx |
54 | add $2, %rbx |
55 | jns L(n2) |
56 | |
57 | lea (%rax), %r8 |
58 | mov (up,%rbx,8), %rax |
59 | lea (%rdx), %r9 |
60 | jmp L(mid) |
61 | |
62 | L(odd): add $1, %rbx |
63 | jns L(n1) |
64 | |
65 | lea (%rax), %r8 |
66 | mov (up,%rbx,8), %rax |
67 | lea (%rdx), %r9 |
68 | mul %rcx |
69 | lea (%rax), %r11 |
70 | mov 8(up,%rbx,8), %rax |
71 | lea (%rdx), %rbp |
72 | jmp L(e) |
73 | |
74 | .p2align 4 |
75 | L(top): mul %rcx |
76 | ADDSUB %r8, %r10 |
77 | lea (%rax), %r8 |
78 | mov (up,%rbx,8), %rax |
79 | adc %r9, %r11 |
80 | mov %r10, -8(rp,%rbx,8) |
81 | mov (rp,%rbx,8), %r10 |
82 | lea (%rdx), %r9 |
83 | adc $0, %rbp |
84 | L(mid): mul %rcx |
85 | ADDSUB %r11, %r10 |
86 | lea (%rax), %r11 |
87 | mov 8(up,%rbx,8), %rax |
88 | adc %rbp, %r8 |
89 | mov %r10, (rp,%rbx,8) |
90 | mov 8(rp,%rbx,8), %r10 |
91 | lea (%rdx), %rbp |
92 | adc $0, %r9 |
93 | L(e): add $2, %rbx |
94 | js L(top) |
95 | |
96 | mul %rcx |
97 | ADDSUB %r8, %r10 |
98 | adc %r9, %r11 |
99 | mov %r10, -8(rp) |
100 | adc $0, %rbp |
101 | L(n2): mov (rp), %r10 |
102 | ADDSUB %r11, %r10 |
103 | adc %rbp, %rax |
104 | mov %r10, (rp) |
105 | adc $0, %rdx |
106 | L(n1): mov 8(rp), %r10 |
107 | ADDSUB %rax, %r10 |
108 | mov %r10, 8(rp) |
109 | mov %ebx, %eax /* zero rax */ |
110 | adc %rdx, %rax |
111 | pop %rbp |
112 | pop %rbx |
113 | ret |
114 | END (func) |
115 | |