svml_s_wrapper_impl.h source code [glibc/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h]

1	/ Wrapper implementations of vector math functions.*
2	Copyright (C) 2014-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ SSE2 ISA version as wrapper to scalar. /
20	.macro WRAPPER_IMPL_SSE2 callee
21	push %rbx
22	cfi_adjust_cfa_offset (`8`)
23	cfi_rel_offset (%rbx, `0`)
24	subq $`16`, %rsp
25	cfi_adjust_cfa_offset (`16`)
26	movaps %xmm0, (%rsp)
27	call JUMPTARGET(\callee)
28	movss %xmm0, (%rsp)
29	movss `4`(%rsp), %xmm0
30	call JUMPTARGET(\callee)
31	movss %xmm0, `4`(%rsp)
32	movss `8`(%rsp), %xmm0
33	call JUMPTARGET(\callee)
34	movd %xmm0, %ebx
35	movss `12`(%rsp), %xmm0
36	call JUMPTARGET(\callee)
37	movd %ebx, %xmm1
38	unpcklps %xmm0, %xmm1
39	movsd (%rsp), %xmm0
40	unpcklpd %xmm1, %xmm0
41	addq $`16`, %rsp
42	cfi_adjust_cfa_offset (-`16`)
43	popq %rbx
44	cfi_adjust_cfa_offset (-`8`)
45	cfi_restore (%rbx)
46	ret
47	.endm
48
49	/ 2 argument SSE2 ISA version as wrapper to scalar. /
50	.macro WRAPPER_IMPL_SSE2_ff callee
51	push %rbx
52	cfi_adjust_cfa_offset (`8`)
53	cfi_rel_offset (%rbx, `0`)
54	subq $`32`, %rsp
55	cfi_adjust_cfa_offset (`40`)
56	movaps %xmm0, (%rsp)
57	movaps %xmm1, `16`(%rsp)
58	call JUMPTARGET(\callee)
59	movss `20`(%rsp), %xmm1
60	movss %xmm0, `0`(%rsp)
61	movss `4`(%rsp), %xmm0
62	call JUMPTARGET(\callee)
63	movss `24`(%rsp), %xmm1
64	movss %xmm0, `4`(%rsp)
65	movss `8`(%rsp), %xmm0
66	call JUMPTARGET(\callee)
67	movss `28`(%rsp), %xmm1
68	movd %xmm0, %ebx
69	movss `12`(%rsp), %xmm0
70	call JUMPTARGET(\callee)
71	/ merge 4x results into xmm0. /
72	movd %ebx, %xmm1
73	unpcklps %xmm0, %xmm1
74	movsd (%rsp), %xmm0
75	unpcklpd %xmm1, %xmm0
76	addq $`32`, %rsp
77	cfi_adjust_cfa_offset (-`32`)
78	popq %rbx
79	cfi_adjust_cfa_offset (-`8`)
80	cfi_restore (%rbx)
81	ret
82	.endm
83
84	/ 3 argument SSE2 ISA version as wrapper to scalar. /
85	.macro WRAPPER_IMPL_SSE2_fFF callee
86	pushq %rbp
87	cfi_adjust_cfa_offset (`8`)
88	cfi_rel_offset (%rbp, `0`)
89	pushq %rbx
90	cfi_adjust_cfa_offset (`8`)
91	cfi_rel_offset (%rbx, `0`)
92	movq %rdi, %rbp
93	movq %rsi, %rbx
94	subq $`24`, %rsp
95	cfi_adjust_cfa_offset (`24`)
96	movaps %xmm0, (%rsp)
97	call JUMPTARGET(\callee)
98	movss `4`(%rsp), %xmm0
99	leaq `4`(%rbp), %rdi
100	leaq `4`(%rbx), %rsi
101	call JUMPTARGET(\callee)
102	movss `8`(%rsp), %xmm0
103	leaq `8`(%rbp), %rdi
104	leaq `8`(%rbx), %rsi
105	call JUMPTARGET(\callee)
106	movss `12`(%rsp), %xmm0
107	leaq `12`(%rbp), %rdi
108	leaq `12`(%rbx), %rsi
109	call JUMPTARGET(\callee)
110	addq $`24`, %rsp
111	cfi_adjust_cfa_offset (-`24`)
112	popq %rbx
113	cfi_adjust_cfa_offset (-`8`)
114	cfi_restore (%rbx)
115	popq %rbp
116	cfi_adjust_cfa_offset (-`8`)
117	cfi_restore (%rbp)
118	ret
119	.endm
120
121	#include "svml_sd_wrapper_impl.h"
122

Browse the source code of glibc/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h