1 | /* Common definition for memcpy, mempcpy and memmove implementation. |
2 | All versions must be listed in ifunc-impl-list.c. |
3 | Copyright (C) 2017-2022 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <init-arch.h> |
21 | |
22 | extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden; |
23 | |
24 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) |
25 | attribute_hidden; |
26 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) |
27 | attribute_hidden; |
28 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper) |
29 | attribute_hidden; |
30 | |
31 | extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) |
32 | attribute_hidden; |
33 | extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) |
34 | attribute_hidden; |
35 | |
36 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden; |
37 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms) |
38 | attribute_hidden; |
39 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm) |
40 | attribute_hidden; |
41 | extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm) |
42 | attribute_hidden; |
43 | |
44 | extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; |
45 | |
46 | extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) |
47 | attribute_hidden; |
48 | extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) |
49 | attribute_hidden; |
50 | |
51 | static inline void * |
52 | IFUNC_SELECTOR (void) |
53 | { |
54 | const struct cpu_features *cpu_features = __get_cpu_features (); |
55 | |
56 | if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS) |
57 | || CPU_FEATURES_ARCH_P (cpu_features, Prefer_FSRM)) |
58 | return OPTIMIZE (erms); |
59 | |
60 | if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F) |
61 | && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) |
62 | { |
63 | if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) |
64 | { |
65 | if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) |
66 | return OPTIMIZE (avx512_unaligned_erms); |
67 | |
68 | return OPTIMIZE (avx512_unaligned); |
69 | } |
70 | |
71 | return OPTIMIZE (avx512_no_vzeroupper); |
72 | } |
73 | |
74 | if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, |
75 | AVX_Fast_Unaligned_Load, )) |
76 | { |
77 | if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)) |
78 | { |
79 | if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) |
80 | return OPTIMIZE (evex_unaligned_erms); |
81 | |
82 | return OPTIMIZE (evex_unaligned); |
83 | } |
84 | |
85 | if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) |
86 | { |
87 | if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) |
88 | return OPTIMIZE (avx_unaligned_erms_rtm); |
89 | |
90 | return OPTIMIZE (avx_unaligned_rtm); |
91 | } |
92 | |
93 | if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, |
94 | Prefer_No_VZEROUPPER, !)) |
95 | { |
96 | if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) |
97 | return OPTIMIZE (avx_unaligned_erms); |
98 | |
99 | return OPTIMIZE (avx_unaligned); |
100 | } |
101 | } |
102 | |
103 | if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3) |
104 | /* Leave this as runtime check. The SSSE3 is optimized almost |
105 | exclusively for avoiding unaligned memory access during the |
106 | copy and by and large is not better than the sse2 |
107 | implementation as a general purpose memmove. */ |
108 | && !CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy)) |
109 | { |
110 | return OPTIMIZE (ssse3); |
111 | } |
112 | |
113 | if (CPU_FEATURE_USABLE_P (cpu_features, ERMS)) |
114 | return OPTIMIZE (sse2_unaligned_erms); |
115 | |
116 | return OPTIMIZE (sse2_unaligned); |
117 | } |
118 | |