1 | /* Function sincos vectorized with SSE2. |
2 | Copyright (C) 2014-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | #include "svml_d_wrapper_impl.h" |
21 | |
22 | .text |
23 | ENTRY (_ZGVbN2vl8l8_sincos) |
24 | WRAPPER_IMPL_SSE2_fFF sincos |
25 | END (_ZGVbN2vl8l8_sincos) |
26 | libmvec_hidden_def (_ZGVbN2vl8l8_sincos) |
27 | |
28 | /* SSE2 ISA version as wrapper to scalar (for vector |
29 | function declared with #pragma omp declare simd notinbranch). */ |
30 | .macro WRAPPER_IMPL_SSE2_fFF_vvv callee |
31 | #ifndef __ILP32__ |
32 | subq $88, %rsp |
33 | cfi_adjust_cfa_offset(88) |
34 | movaps %xmm0, 64(%rsp) |
35 | lea (%rsp), %rdi |
36 | movdqa %xmm1, 32(%rdi) |
37 | lea 16(%rsp), %rsi |
38 | movdqa %xmm2, 32(%rsi) |
39 | call JUMPTARGET(\callee) |
40 | movsd 72(%rsp), %xmm0 |
41 | lea 8(%rsp), %rdi |
42 | lea 24(%rsp), %rsi |
43 | call JUMPTARGET(\callee) |
44 | movq 32(%rsp), %rdx |
45 | movq 48(%rsp), %rsi |
46 | movq 40(%rsp), %r8 |
47 | movq 56(%rsp), %r10 |
48 | movq (%rsp), %rax |
49 | movq 16(%rsp), %rcx |
50 | movq 8(%rsp), %rdi |
51 | movq 24(%rsp), %r9 |
52 | movq %rax, (%rdx) |
53 | movq %rcx, (%rsi) |
54 | movq %rdi, (%r8) |
55 | movq %r9, (%r10) |
56 | addq $88, %rsp |
57 | cfi_adjust_cfa_offset(-88) |
58 | ret |
59 | #else |
60 | pushq %rbp |
61 | .cfi_def_cfa_offset 16 |
62 | .cfi_offset 6, -16 |
63 | pushq %rbx |
64 | .cfi_def_cfa_offset 24 |
65 | .cfi_offset 3, -24 |
66 | subl $88, %esp |
67 | .cfi_def_cfa_offset 112 |
68 | leal 64(%rsp), %esi |
69 | movaps %xmm1, 32(%esp) |
70 | leal 48(%rsp), %edi |
71 | movaps %xmm2, 16(%esp) |
72 | movq %rsi, %rbp |
73 | movq %rdi, %rbx |
74 | movaps %xmm0, (%esp) |
75 | call JUMPTARGET(\callee) |
76 | movupd 8(%esp), %xmm0 |
77 | leal 8(%rbp), %esi |
78 | leal 8(%rbx), %edi |
79 | call JUMPTARGET(\callee) |
80 | movdqa 32(%esp), %xmm1 |
81 | movsd 48(%esp), %xmm0 |
82 | movq %xmm1, %rax |
83 | movdqa 16(%esp), %xmm2 |
84 | movsd %xmm0, (%eax) |
85 | movsd 56(%esp), %xmm0 |
86 | pextrd $1, %xmm1, %eax |
87 | movsd %xmm0, (%eax) |
88 | movsd 64(%esp), %xmm0 |
89 | movq %xmm2, %rax |
90 | movsd %xmm0, (%eax) |
91 | movsd 72(%esp), %xmm0 |
92 | pextrd $1, %xmm2, %eax |
93 | movsd %xmm0, (%eax) |
94 | addl $88, %esp |
95 | .cfi_def_cfa_offset 24 |
96 | popq %rbx |
97 | .cfi_def_cfa_offset 16 |
98 | popq %rbp |
99 | .cfi_def_cfa_offset 8 |
100 | ret |
101 | #endif |
102 | .endm |
103 | |
104 | ENTRY (_ZGVbN2vvv_sincos) |
105 | WRAPPER_IMPL_SSE2_fFF_vvv sincos |
106 | END (_ZGVbN2vvv_sincos) |
107 | |
108 | #ifndef USE_MULTIARCH |
109 | libmvec_hidden_def (_ZGVbN2vvv_sincos) |
110 | #endif |
111 | |