svml_d_sincos4_core.S source code [glibc/sysdeps/x86_64/fpu/svml_d_sincos4_core.S]

1	/ Function sincos vectorized with AVX2, wrapper version.*
2	Copyright (C) 2014-2021 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include "svml_d_wrapper_impl.h"
21
22	.text
23	ENTRY (_ZGVdN4vl8l8_sincos)
24	WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
25	END (_ZGVdN4vl8l8_sincos)
26	libmvec_hidden_def (_ZGVdN4vl8l8_sincos)
27
28	/ AVX2 ISA version as wrapper to SSE ISA version (for vector*
29	function declared with #pragma omp declare simd notinbranch). /*
30	.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
31	#ifndef __ILP32__
32	pushq %rbp
33	cfi_adjust_cfa_offset (`8`)
34	cfi_rel_offset (%rbp, `0`)
35	movq %rsp, %rbp
36	cfi_def_cfa_register (%rbp)
37	andq $-`32`, %rsp
38	subq $`160`, %rsp
39	vmovupd %ymm0, `128`(%rsp)
40	lea (%rsp), %rdi
41	vmovdqu %ymm1, `64`(%rdi)
42	vmovdqu %ymm2, `96`(%rdi)
43	lea `32`(%rsp), %rsi
44	vzeroupper
45	call HIDDEN_JUMPTARGET(\callee)
46	vmovupd `144`(%rsp), %xmm0
47	lea `16`(%rsp), %rdi
48	lea `48`(%rsp), %rsi
49	call HIDDEN_JUMPTARGET(\callee)
50	movq `64`(%rsp), %rdx
51	movq `96`(%rsp), %rsi
52	movq `72`(%rsp), %r8
53	movq `104`(%rsp), %r10
54	movq (%rsp), %rax
55	movq `32`(%rsp), %rcx
56	movq `8`(%rsp), %rdi
57	movq `40`(%rsp), %r9
58	movq %rax, (%rdx)
59	movq %rcx, (%rsi)
60	movq `80`(%rsp), %rax
61	movq `112`(%rsp), %rcx
62	movq %rdi, (%r8)
63	movq %r9, (%r10)
64	movq `88`(%rsp), %rdi
65	movq `120`(%rsp), %r9
66	movq `16`(%rsp), %r11
67	movq `48`(%rsp), %rdx
68	movq `24`(%rsp), %rsi
69	movq `56`(%rsp), %r8
70	movq %r11, (%rax)
71	movq %rdx, (%rcx)
72	movq %rsi, (%rdi)
73	movq %r8, (%r9)
74	movq %rbp, %rsp
75	cfi_def_cfa_register (%rsp)
76	popq %rbp
77	cfi_adjust_cfa_offset (-`8`)
78	cfi_restore (%rbp)
79	ret
80	#else
81	leal `8`(%rsp), %r10d
82	.cfi_def_cfa `10`, `0`
83	andl $-`32`, %esp
84	pushq -`8`(%r10d)
85	pushq %rbp
86	.cfi_escape `0x10`,`0x6`,`0x2`,`0x76`,`0`
87	movl %esp, %ebp
88	pushq %r12
89	leal -`80`(%rbp), %esi
90	pushq %r10
91	.cfi_escape `0xf`,`0x3`,`0x76`,`0x70`,`0x6`
92	.cfi_escape `0x10`,`0xc`,`0x2`,`0x76`,`0x78`
93	leal -`112`(%rbp), %edi
94	movq %rsi, %r12
95	pushq %rbx
96	.cfi_escape `0x10`,`0x3`,`0x2`,`0x76`,`0x68`
97	movq %rdi, %rbx
98	subl $`152`, %esp
99	vmovaps %xmm1, -`128`(%ebp)
100	vmovaps %xmm2, -`144`(%ebp)
101	vmovapd %ymm0, -`176`(%ebp)
102	vzeroupper
103	call HIDDEN_JUMPTARGET(\callee)
104	leal `16`(%r12), %esi
105	vmovapd -`160`(%ebp), %xmm0
106	leal `16`(%rbx), %edi
107	call HIDDEN_JUMPTARGET(\callee)
108	movq -`128`(%ebp), %rax
109	vmovsd -`112`(%ebp), %xmm0
110	vmovdqa -`128`(%ebp), %xmm5
111	vmovdqa -`144`(%ebp), %xmm1
112	vmovsd %xmm0, (%eax)
113	vmovsd -`104`(%ebp), %xmm0
114	vpextrd $`1`, %xmm5, %eax
115	vmovsd %xmm0, (%eax)
116	movq -`120`(%ebp), %rax
117	vmovsd -`96`(%ebp), %xmm0
118	vmovsd %xmm0, (%eax)
119	vmovsd -`88`(%ebp), %xmm0
120	vpextrd $`3`, %xmm5, %eax
121	vmovsd %xmm0, (%eax)
122	movq -`144`(%ebp), %rax
123	vmovsd -`80`(%ebp), %xmm0
124	vmovsd %xmm0, (%eax)
125	vmovsd -`72`(%ebp), %xmm0
126	vpextrd $`1`, %xmm1, %eax
127	vmovsd %xmm0, (%eax)
128	movq -`136`(%ebp), %rax
129	vmovsd -`64`(%ebp), %xmm0
130	vmovsd %xmm0, (%eax)
131	vmovsd -`56`(%ebp), %xmm0
132	vpextrd $`3`, %xmm1, %eax
133	vmovsd %xmm0, (%eax)
134	addl $`152`, %esp
135	popq %rbx
136	popq %r10
137	.cfi_def_cfa `10`, `0`
138	popq %r12
139	popq %rbp
140	leal -`8`(%r10), %esp
141	.cfi_def_cfa `7`, `8`
142	ret
143	#endif
144	.endm
145
146	ENTRY (_ZGVdN4vvv_sincos)
147	WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
148	END (_ZGVdN4vvv_sincos)
149
150	#ifndef USE_MULTIARCH
151	libmvec_hidden_def (_ZGVdN4vvv_sincos)
152	#endif
153

Browse the source code of glibc/sysdeps/x86_64/fpu/svml_d_sincos4_core.S