mul_1.S source code [glibc/sysdeps/x86_64/mul_1.S]

1	/ AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store*
2	the result in a second limb vector.
3	Copyright (C) 2003-2023 Free Software Foundation, Inc.
4	This file is part of the GNU MP Library.
5
6	The GNU MP Library is free software; you can redistribute it and/or modify
7	it under the terms of the GNU Lesser General Public License as published by
8	the Free Software Foundation; either version 2.1 of the License, or (at your
9	option) any later version.
10
11	The GNU MP Library is distributed in the hope that it will be useful, but
12	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14	License for more details.
15
16	You should have received a copy of the GNU Lesser General Public License
17	along with the GNU MP Library; see the file COPYING.LIB. If not,
18	see <https://www.gnu.org/licenses/>. /*
19
20	#include <sysdep.h>
21	#include "asm-syntax.h"
22
23	#define rp %rdi
24	#define up %rsi
25	#define n_param %rdx
26	#define vl %rcx
27
28	#define n %r11
29
30	.text
31	ENTRY (__mpn_mul_1)
32	push %rbx
33	cfi_adjust_cfa_offset (`8`)
34	cfi_rel_offset (%rbx, `0`)
35	xor %r10, %r10
36	mov (up), %rax / read first u limb early /
37	mov n_param, %rbx / move away n from rdx, mul uses it /
38	mul vl
39	mov %rbx, %r11
40
41	add %r10, %rax
42	adc $`0`, %rdx
43
44	and $`3`, %ebx
45	jz L(b0)
46	cmp $`2`, %ebx
47	jz L(b2)
48	jg L(b3)
49
50	L(b1): dec n
51	jne L(gt1)
52	mov %rax, (rp)
53	jmp L(ret)
54	L(gt1): lea `8`(up,n,`8`), up
55	lea -`8`(rp,n,`8`), rp
56	neg n
57	xor %r10, %r10
58	xor %ebx, %ebx
59	mov %rax, %r9
60	mov (up,n,`8`), %rax
61	mov %rdx, %r8
62	jmp L(L1)
63
64	L(b0): lea (up,n,`8`), up
65	lea -`16`(rp,n,`8`), rp
66	neg n
67	xor %r10, %r10
68	mov %rax, %r8
69	mov %rdx, %rbx
70	jmp L(L0)
71
72	L(b3): lea -`8`(up,n,`8`), up
73	lea -`24`(rp,n,`8`), rp
74	neg n
75	mov %rax, %rbx
76	mov %rdx, %r10
77	jmp L(L3)
78
79	L(b2): lea -`16`(up,n,`8`), up
80	lea -`32`(rp,n,`8`), rp
81	neg n
82	xor %r8, %r8
83	xor %ebx, %ebx
84	mov %rax, %r10
85	mov `24`(up,n,`8`), %rax
86	mov %rdx, %r9
87	jmp L(L2)
88
89	.p2align `4`
90	L(top): mov %r10, (rp,n,`8`)
91	add %rax, %r9
92	mov (up,n,`8`), %rax
93	adc %rdx, %r8
94	mov $`0`, %r10d
95	L(L1): mul vl
96	mov %r9, `8`(rp,n,`8`)
97	add %rax, %r8
98	adc %rdx, %rbx
99	L(L0): mov `8`(up,n,`8`), %rax
100	mul vl
101	mov %r8, `16`(rp,n,`8`)
102	add %rax, %rbx
103	adc %rdx, %r10
104	L(L3): mov `16`(up,n,`8`), %rax
105	mul vl
106	mov %rbx, `24`(rp,n,`8`)
107	mov $`0`, %r8d # zero
108	mov %r8, %rbx # zero
109	add %rax, %r10
110	mov `24`(up,n,`8`), %rax
111	mov %r8, %r9 # zero
112	adc %rdx, %r9
113	L(L2): mul vl
114	add $`4`, n
115	js L(top)
116
117	mov %r10, (rp,n,`8`)
118	add %rax, %r9
119	adc %r8, %rdx
120	mov %r9, `8`(rp,n,`8`)
121	add %r8, %rdx
122	L(ret): mov %rdx, %rax
123
124	pop %rbx
125	cfi_adjust_cfa_offset (-`8`)
126	cfi_restore (%rbx)
127	ret
128	END (__mpn_mul_1)
129

Browse the source code of glibc/sysdeps/x86_64/mul_1.S