add_n.S source code [glibc/sysdeps/x86_64/add_n.S]

1	/ x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store*
2	sum in a third limb vector.
3	Copyright (C) 2006-2021 Free Software Foundation, Inc.
4	This file is part of the GNU MP Library.
5
6	The GNU MP Library is free software; you can redistribute it and/or modify
7	it under the terms of the GNU Lesser General Public License as published by
8	the Free Software Foundation; either version 2.1 of the License, or (at your
9	option) any later version.
10
11	The GNU MP Library is distributed in the hope that it will be useful, but
12	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14	License for more details.
15
16	You should have received a copy of the GNU Lesser General Public License
17	along with the GNU MP Library; see the file COPYING.LIB. If not,
18	see <https://www.gnu.org/licenses/>. /*
19
20	#include "sysdep.h"
21	#include "asm-syntax.h"
22
23	#define rp %rdi
24	#define up %rsi
25	#define vp %rdx
26	#define n %rcx
27	#define cy %r8
28
29	#ifndef func
30	# define func __mpn_add_n
31	# define ADCSBB adc
32	#endif
33
34	.text
35	ENTRY (func)
36	xor %r8, %r8
37	mov (up), %r10
38	mov (vp), %r11
39
40	lea -`8`(up,n,`8`), up
41	lea -`8`(vp,n,`8`), vp
42	lea -`16`(rp,n,`8`), rp
43	mov %ecx, %eax
44	neg n
45	and $`3`, %eax
46	je L(b00)
47	add %rax, n / clear low rcx bits for jrcxz /
48	cmp $`2`, %eax
49	jl L(b01)
50	je L(b10)
51
52	L(b11): shr %r8 / set cy /
53	jmp L(e11)
54
55	L(b00): shr %r8 / set cy /
56	mov %r10, %r8
57	mov %r11, %r9
58	lea `4`(n), n
59	jmp L(e00)
60
61	L(b01): shr %r8 / set cy /
62	jmp L(e01)
63
64	L(b10): shr %r8 / set cy /
65	mov %r10, %r8
66	mov %r11, %r9
67	jmp L(e10)
68
69	L(end): ADCSBB %r11, %r10
70	mov %r10, `8`(rp)
71	mov %ecx, %eax / clear eax, ecx contains 0 /
72	adc %eax, %eax
73	ret
74
75	.p2align `4`
76	L(top):
77	mov -`24`(up,n,`8`), %r8
78	mov -`24`(vp,n,`8`), %r9
79	ADCSBB %r11, %r10
80	mov %r10, -`24`(rp,n,`8`)
81	L(e00):
82	mov -`16`(up,n,`8`), %r10
83	mov -`16`(vp,n,`8`), %r11
84	ADCSBB %r9, %r8
85	mov %r8, -`16`(rp,n,`8`)
86	L(e11):
87	mov -`8`(up,n,`8`), %r8
88	mov -`8`(vp,n,`8`), %r9
89	ADCSBB %r11, %r10
90	mov %r10, -`8`(rp,n,`8`)
91	L(e10):
92	mov (up,n,`8`), %r10
93	mov (vp,n,`8`), %r11
94	ADCSBB %r9, %r8
95	mov %r8, (rp,n,`8`)
96	L(e01):
97	jrcxz L(end)
98	lea `4`(n), n
99	jmp L(top)
100	END (func)
101

Browse the source code of glibc/sysdeps/x86_64/add_n.S