1 | /* Multiple versions of memcpy |
2 | All versions must be listed in ifunc-impl-list.c. |
3 | Copyright (C) 2010-2016 Free Software Foundation, Inc. |
4 | Contributed by Intel Corporation. |
5 | This file is part of the GNU C Library. |
6 | |
7 | The GNU C Library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2.1 of the License, or (at your option) any later version. |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Lesser General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <sysdep.h> |
22 | #include <shlib-compat.h> |
23 | #include <init-arch.h> |
24 | |
25 | /* Define multiple versions only for the definition in lib and for |
26 | DSO. In static binaries we need memcpy before the initialization |
27 | happened. */ |
28 | #if defined SHARED && IS_IN (libc) |
29 | .text |
30 | ENTRY(__new_memcpy) |
31 | .type __new_memcpy, @gnu_indirect_function |
32 | LOAD_RTLD_GLOBAL_RO_RDX |
33 | #ifdef HAVE_AVX512_ASM_SUPPORT |
34 | HAS_ARCH_FEATURE (AVX512F_Usable) |
35 | jz 1f |
36 | HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) |
37 | jz 1f |
38 | leaq __memcpy_avx512_no_vzeroupper(%rip), %rax |
39 | ret |
40 | #endif |
41 | 1: leaq __memcpy_avx_unaligned(%rip), %rax |
42 | HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) |
43 | jz 2f |
44 | ret |
45 | 2: leaq __memcpy_sse2(%rip), %rax |
46 | HAS_ARCH_FEATURE (Slow_BSF) |
47 | jnz 3f |
48 | leaq __memcpy_sse2_unaligned(%rip), %rax |
49 | ret |
50 | 3: HAS_CPU_FEATURE (SSSE3) |
51 | jz 4f |
52 | leaq __memcpy_ssse3(%rip), %rax |
53 | 4: ret |
54 | END(__new_memcpy) |
55 | |
56 | # undef ENTRY |
57 | # define ENTRY(name) \ |
58 | .type __memcpy_sse2, @function; \ |
59 | .globl __memcpy_sse2; \ |
60 | .hidden __memcpy_sse2; \ |
61 | .p2align 4; \ |
62 | __memcpy_sse2: cfi_startproc; \ |
63 | CALL_MCOUNT |
64 | # undef END |
65 | # define END(name) \ |
66 | cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2 |
67 | |
68 | # undef ENTRY_CHK |
69 | # define ENTRY_CHK(name) \ |
70 | .type __memcpy_chk_sse2, @function; \ |
71 | .globl __memcpy_chk_sse2; \ |
72 | .p2align 4; \ |
73 | __memcpy_chk_sse2: cfi_startproc; \ |
74 | CALL_MCOUNT |
75 | # undef END_CHK |
76 | # define END_CHK(name) \ |
77 | cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2 |
78 | |
79 | # undef libc_hidden_builtin_def |
80 | /* It doesn't make sense to send libc-internal memcpy calls through a PLT. |
81 | The speedup we get from using SSSE3 instruction is likely eaten away |
82 | by the indirect call in the PLT. */ |
83 | # define libc_hidden_builtin_def(name) \ |
84 | .globl __GI_memcpy; __GI_memcpy = __memcpy_sse2 |
85 | |
86 | versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); |
87 | #endif |
88 | |
89 | #include "../memcpy.S" |
90 | |