addmul_1.S source code [glibc/sysdeps/x86_64/addmul_1.S]

1	/ x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add*
2	the result to a second limb vector.
3	Copyright (C) 2003-2021 Free Software Foundation, Inc.
4	This file is part of the GNU MP Library.
5
6	The GNU MP Library is free software; you can redistribute it and/or modify
7	it under the terms of the GNU Lesser General Public License as published by
8	the Free Software Foundation; either version 2.1 of the License, or (at your
9	option) any later version.
10
11	The GNU MP Library is distributed in the hope that it will be useful, but
12	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14	License for more details.
15
16	You should have received a copy of the GNU Lesser General Public License
17	along with the GNU MP Library; see the file COPYING.LIB. If not,
18	see <https://www.gnu.org/licenses/>. /*
19
20	#include "sysdep.h"
21	#include "asm-syntax.h"
22
23	#define rp %rdi
24	#define up %rsi
25	#define n %rdx
26	#define v0 %rcx
27
28	#ifndef func
29	# define func __mpn_addmul_1
30	# define ADDSUB add
31	#endif
32
33	.text
34	ENTRY (func)
35	push %rbx
36	push %rbp
37	lea (%rdx), %rbx
38	neg %rbx
39
40	mov (up), %rax
41	mov (rp), %r10
42
43	lea -`16`(rp,%rdx,`8`), rp
44	lea (up,%rdx,`8`), up
45	mul %rcx
46
47	bt $`0`, %ebx
48	jc L(odd)
49
50	lea (%rax), %r11
51	mov `8`(up,%rbx,`8`), %rax
52	lea (%rdx), %rbp
53	mul %rcx
54	add $`2`, %rbx
55	jns L(n2)
56
57	lea (%rax), %r8
58	mov (up,%rbx,`8`), %rax
59	lea (%rdx), %r9
60	jmp L(mid)
61
62	L(odd): add $`1`, %rbx
63	jns L(n1)
64
65	lea (%rax), %r8
66	mov (up,%rbx,`8`), %rax
67	lea (%rdx), %r9
68	mul %rcx
69	lea (%rax), %r11
70	mov `8`(up,%rbx,`8`), %rax
71	lea (%rdx), %rbp
72	jmp L(e)
73
74	.p2align `4`
75	L(top): mul %rcx
76	ADDSUB %r8, %r10
77	lea (%rax), %r8
78	mov (up,%rbx,`8`), %rax
79	adc %r9, %r11
80	mov %r10, -`8`(rp,%rbx,`8`)
81	mov (rp,%rbx,`8`), %r10
82	lea (%rdx), %r9
83	adc $`0`, %rbp
84	L(mid): mul %rcx
85	ADDSUB %r11, %r10
86	lea (%rax), %r11
87	mov `8`(up,%rbx,`8`), %rax
88	adc %rbp, %r8
89	mov %r10, (rp,%rbx,`8`)
90	mov `8`(rp,%rbx,`8`), %r10
91	lea (%rdx), %rbp
92	adc $`0`, %r9
93	L(e): add $`2`, %rbx
94	js L(top)
95
96	mul %rcx
97	ADDSUB %r8, %r10
98	adc %r9, %r11
99	mov %r10, -`8`(rp)
100	adc $`0`, %rbp
101	L(n2): mov (rp), %r10
102	ADDSUB %r11, %r10
103	adc %rbp, %rax
104	mov %r10, (rp)
105	adc $`0`, %rdx
106	L(n1): mov `8`(rp), %r10
107	ADDSUB %rax, %r10
108	mov %r10, `8`(rp)
109	mov %ebx, %eax / zero rax /
110	adc %rdx, %rax
111	pop %rbp
112	pop %rbx
113	ret
114	END (func)
115

Browse the source code of glibc/sysdeps/x86_64/addmul_1.S