1 | /* Wrapper implementations of vector math functions. |
2 | Copyright (C) 2014-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* SSE2 ISA version as wrapper to scalar. */ |
20 | .macro WRAPPER_IMPL_SSE2 callee |
21 | push %rbx |
22 | cfi_adjust_cfa_offset (8) |
23 | cfi_rel_offset (%rbx, 0) |
24 | subq $16, %rsp |
25 | cfi_adjust_cfa_offset (16) |
26 | movaps %xmm0, (%rsp) |
27 | call JUMPTARGET(\callee) |
28 | movss %xmm0, (%rsp) |
29 | movss 4(%rsp), %xmm0 |
30 | call JUMPTARGET(\callee) |
31 | movss %xmm0, 4(%rsp) |
32 | movss 8(%rsp), %xmm0 |
33 | call JUMPTARGET(\callee) |
34 | movd %xmm0, %ebx |
35 | movss 12(%rsp), %xmm0 |
36 | call JUMPTARGET(\callee) |
37 | movd %ebx, %xmm1 |
38 | unpcklps %xmm0, %xmm1 |
39 | movsd (%rsp), %xmm0 |
40 | unpcklpd %xmm1, %xmm0 |
41 | addq $16, %rsp |
42 | cfi_adjust_cfa_offset (-16) |
43 | popq %rbx |
44 | cfi_adjust_cfa_offset (-8) |
45 | cfi_restore (%rbx) |
46 | ret |
47 | .endm |
48 | |
49 | /* 2 argument SSE2 ISA version as wrapper to scalar. */ |
50 | .macro WRAPPER_IMPL_SSE2_ff callee |
51 | push %rbx |
52 | cfi_adjust_cfa_offset (8) |
53 | cfi_rel_offset (%rbx, 0) |
54 | subq $32, %rsp |
55 | cfi_adjust_cfa_offset (40) |
56 | movaps %xmm0, (%rsp) |
57 | movaps %xmm1, 16(%rsp) |
58 | call JUMPTARGET(\callee) |
59 | movss 20(%rsp), %xmm1 |
60 | movss %xmm0, 0(%rsp) |
61 | movss 4(%rsp), %xmm0 |
62 | call JUMPTARGET(\callee) |
63 | movss 24(%rsp), %xmm1 |
64 | movss %xmm0, 4(%rsp) |
65 | movss 8(%rsp), %xmm0 |
66 | call JUMPTARGET(\callee) |
67 | movss 28(%rsp), %xmm1 |
68 | movd %xmm0, %ebx |
69 | movss 12(%rsp), %xmm0 |
70 | call JUMPTARGET(\callee) |
71 | /* merge 4x results into xmm0. */ |
72 | movd %ebx, %xmm1 |
73 | unpcklps %xmm0, %xmm1 |
74 | movsd (%rsp), %xmm0 |
75 | unpcklpd %xmm1, %xmm0 |
76 | addq $32, %rsp |
77 | cfi_adjust_cfa_offset (-32) |
78 | popq %rbx |
79 | cfi_adjust_cfa_offset (-8) |
80 | cfi_restore (%rbx) |
81 | ret |
82 | .endm |
83 | |
84 | /* 3 argument SSE2 ISA version as wrapper to scalar. */ |
85 | .macro WRAPPER_IMPL_SSE2_fFF callee |
86 | pushq %rbp |
87 | cfi_adjust_cfa_offset (8) |
88 | cfi_rel_offset (%rbp, 0) |
89 | pushq %rbx |
90 | cfi_adjust_cfa_offset (8) |
91 | cfi_rel_offset (%rbx, 0) |
92 | movq %rdi, %rbp |
93 | movq %rsi, %rbx |
94 | subq $24, %rsp |
95 | cfi_adjust_cfa_offset (24) |
96 | movaps %xmm0, (%rsp) |
97 | call JUMPTARGET(\callee) |
98 | movss 4(%rsp), %xmm0 |
99 | leaq 4(%rbp), %rdi |
100 | leaq 4(%rbx), %rsi |
101 | call JUMPTARGET(\callee) |
102 | movss 8(%rsp), %xmm0 |
103 | leaq 8(%rbp), %rdi |
104 | leaq 8(%rbx), %rsi |
105 | call JUMPTARGET(\callee) |
106 | movss 12(%rsp), %xmm0 |
107 | leaq 12(%rbp), %rdi |
108 | leaq 12(%rbx), %rsi |
109 | call JUMPTARGET(\callee) |
110 | addq $24, %rsp |
111 | cfi_adjust_cfa_offset (-24) |
112 | popq %rbx |
113 | cfi_adjust_cfa_offset (-8) |
114 | cfi_restore (%rbx) |
115 | popq %rbp |
116 | cfi_adjust_cfa_offset (-8) |
117 | cfi_restore (%rbp) |
118 | ret |
119 | .endm |
120 | |
121 | #include "svml_sd_wrapper_impl.h" |
122 | |