1 | /* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store |
2 | the result in a second limb vector. |
3 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
4 | This file is part of the GNU MP Library. |
5 | |
6 | The GNU MP Library is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU Lesser General Public License as published by |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
9 | option) any later version. |
10 | |
11 | The GNU MP Library is distributed in the hope that it will be useful, but |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
14 | License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public License |
17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
18 | see <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | #define rp %rdi |
24 | #define up %rsi |
25 | #define n_param %rdx |
26 | #define vl %rcx |
27 | |
28 | #define n %r11 |
29 | |
30 | .text |
31 | ENTRY (__mpn_mul_1) |
32 | push %rbx |
33 | cfi_adjust_cfa_offset (8) |
34 | cfi_rel_offset (%rbx, 0) |
35 | xor %r10, %r10 |
36 | mov (up), %rax /* read first u limb early */ |
37 | mov n_param, %rbx /* move away n from rdx, mul uses it */ |
38 | mul vl |
39 | mov %rbx, %r11 |
40 | |
41 | add %r10, %rax |
42 | adc $0, %rdx |
43 | |
44 | and $3, %ebx |
45 | jz L(b0) |
46 | cmp $2, %ebx |
47 | jz L(b2) |
48 | jg L(b3) |
49 | |
50 | L(b1): dec n |
51 | jne L(gt1) |
52 | mov %rax, (rp) |
53 | jmp L(ret) |
54 | L(gt1): lea 8(up,n,8), up |
55 | lea -8(rp,n,8), rp |
56 | neg n |
57 | xor %r10, %r10 |
58 | xor %ebx, %ebx |
59 | mov %rax, %r9 |
60 | mov (up,n,8), %rax |
61 | mov %rdx, %r8 |
62 | jmp L(L1) |
63 | |
64 | L(b0): lea (up,n,8), up |
65 | lea -16(rp,n,8), rp |
66 | neg n |
67 | xor %r10, %r10 |
68 | mov %rax, %r8 |
69 | mov %rdx, %rbx |
70 | jmp L(L0) |
71 | |
72 | L(b3): lea -8(up,n,8), up |
73 | lea -24(rp,n,8), rp |
74 | neg n |
75 | mov %rax, %rbx |
76 | mov %rdx, %r10 |
77 | jmp L(L3) |
78 | |
79 | L(b2): lea -16(up,n,8), up |
80 | lea -32(rp,n,8), rp |
81 | neg n |
82 | xor %r8, %r8 |
83 | xor %ebx, %ebx |
84 | mov %rax, %r10 |
85 | mov 24(up,n,8), %rax |
86 | mov %rdx, %r9 |
87 | jmp L(L2) |
88 | |
89 | .p2align 4 |
90 | L(top): mov %r10, (rp,n,8) |
91 | add %rax, %r9 |
92 | mov (up,n,8), %rax |
93 | adc %rdx, %r8 |
94 | mov $0, %r10d |
95 | L(L1): mul vl |
96 | mov %r9, 8(rp,n,8) |
97 | add %rax, %r8 |
98 | adc %rdx, %rbx |
99 | L(L0): mov 8(up,n,8), %rax |
100 | mul vl |
101 | mov %r8, 16(rp,n,8) |
102 | add %rax, %rbx |
103 | adc %rdx, %r10 |
104 | L(L3): mov 16(up,n,8), %rax |
105 | mul vl |
106 | mov %rbx, 24(rp,n,8) |
107 | mov $0, %r8d # zero |
108 | mov %r8, %rbx # zero |
109 | add %rax, %r10 |
110 | mov 24(up,n,8), %rax |
111 | mov %r8, %r9 # zero |
112 | adc %rdx, %r9 |
113 | L(L2): mul vl |
114 | add $4, n |
115 | js L(top) |
116 | |
117 | mov %r10, (rp,n,8) |
118 | add %rax, %r9 |
119 | adc %r8, %rdx |
120 | mov %r9, 8(rp,n,8) |
121 | add %r8, %rdx |
122 | L(ret): mov %rdx, %rax |
123 | |
124 | pop %rbx |
125 | cfi_adjust_cfa_offset (-8) |
126 | cfi_restore (%rbx) |
127 | ret |
128 | END (__mpn_mul_1) |
129 | |