1 | /* Multiple versions of mempcpy |
2 | All versions must be listed in ifunc-impl-list.c. |
3 | Copyright (C) 2010-2016 Free Software Foundation, Inc. |
4 | Contributed by Intel Corporation. |
5 | This file is part of the GNU C Library. |
6 | |
7 | The GNU C Library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2.1 of the License, or (at your option) any later version. |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Lesser General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <sysdep.h> |
22 | #include <init-arch.h> |
23 | |
24 | /* Define multiple versions only for the definition in lib and for |
25 | DSO. In static binaries we need mempcpy before the initialization |
26 | happened. */ |
27 | #if defined SHARED && IS_IN (libc) |
28 | ENTRY(__mempcpy) |
29 | .type __mempcpy, @gnu_indirect_function |
30 | LOAD_RTLD_GLOBAL_RO_RDX |
31 | #ifdef HAVE_AVX512_ASM_SUPPORT |
32 | HAS_ARCH_FEATURE (AVX512F_Usable) |
33 | jz 1f |
34 | HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) |
35 | jz 1f |
36 | leaq __mempcpy_avx512_no_vzeroupper(%rip), %rax |
37 | ret |
38 | #endif |
39 | 1: leaq __mempcpy_sse2(%rip), %rax |
40 | HAS_CPU_FEATURE (SSSE3) |
41 | jz 2f |
42 | leaq __mempcpy_ssse3(%rip), %rax |
43 | HAS_ARCH_FEATURE (Fast_Copy_Backward) |
44 | jz 2f |
45 | leaq __mempcpy_ssse3_back(%rip), %rax |
46 | HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) |
47 | jz 2f |
48 | leaq __mempcpy_avx_unaligned(%rip), %rax |
49 | 2: ret |
50 | END(__mempcpy) |
51 | |
52 | # undef ENTRY |
53 | # define ENTRY(name) \ |
54 | .type __mempcpy_sse2, @function; \ |
55 | .p2align 4; \ |
56 | .globl __mempcpy_sse2; \ |
57 | .hidden __mempcpy_sse2; \ |
58 | __mempcpy_sse2: cfi_startproc; \ |
59 | CALL_MCOUNT |
60 | # undef END |
61 | # define END(name) \ |
62 | cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2 |
63 | |
64 | # undef ENTRY_CHK |
65 | # define ENTRY_CHK(name) \ |
66 | .type __mempcpy_chk_sse2, @function; \ |
67 | .globl __mempcpy_chk_sse2; \ |
68 | .p2align 4; \ |
69 | __mempcpy_chk_sse2: cfi_startproc; \ |
70 | CALL_MCOUNT |
71 | # undef END_CHK |
72 | # define END_CHK(name) \ |
73 | cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2 |
74 | |
75 | # undef libc_hidden_def |
76 | # undef libc_hidden_builtin_def |
77 | /* It doesn't make sense to send libc-internal mempcpy calls through a PLT. |
78 | The speedup we get from using SSSE3 instruction is likely eaten away |
79 | by the indirect call in the PLT. */ |
80 | # define libc_hidden_def(name) \ |
81 | .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2 |
82 | # define libc_hidden_builtin_def(name) \ |
83 | .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2 |
84 | #endif |
85 | |
86 | #include "../mempcpy.S" |
87 | |