1 | /* This file is part of the GNU C Library. |
2 | Copyright (C) 2008-2017 Free Software Foundation, Inc. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifndef cpu_features_h |
19 | #define cpu_features_h |
20 | |
21 | #define bit_arch_Fast_Rep_String (1 << 0) |
22 | #define bit_arch_Fast_Copy_Backward (1 << 1) |
23 | #define bit_arch_Slow_BSF (1 << 2) |
24 | #define bit_arch_Fast_Unaligned_Load (1 << 4) |
25 | #define bit_arch_Prefer_PMINUB_for_stringop (1 << 5) |
26 | #define bit_arch_AVX_Usable (1 << 6) |
27 | #define bit_arch_FMA_Usable (1 << 7) |
28 | #define bit_arch_FMA4_Usable (1 << 8) |
29 | #define bit_arch_Slow_SSE4_2 (1 << 9) |
30 | #define bit_arch_AVX2_Usable (1 << 10) |
31 | #define bit_arch_AVX_Fast_Unaligned_Load (1 << 11) |
32 | #define bit_arch_AVX512F_Usable (1 << 12) |
33 | #define bit_arch_AVX512DQ_Usable (1 << 13) |
34 | #define bit_arch_I586 (1 << 14) |
35 | #define bit_arch_I686 (1 << 15) |
36 | #define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16) |
37 | #define bit_arch_Prefer_No_VZEROUPPER (1 << 17) |
38 | #define bit_arch_Fast_Unaligned_Copy (1 << 18) |
39 | #define bit_arch_Prefer_ERMS (1 << 19) |
40 | #define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) |
41 | #define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) |
42 | #define bit_arch_Prefer_No_AVX512 (1 << 22) |
43 | |
44 | /* CPUID Feature flags. */ |
45 | |
46 | /* COMMON_CPUID_INDEX_1. */ |
47 | #define bit_cpu_CX8 (1 << 8) |
48 | #define bit_cpu_CMOV (1 << 15) |
49 | #define bit_cpu_SSE (1 << 25) |
50 | #define bit_cpu_SSE2 (1 << 26) |
51 | #define bit_cpu_SSSE3 (1 << 9) |
52 | #define bit_cpu_SSE4_1 (1 << 19) |
53 | #define bit_cpu_SSE4_2 (1 << 20) |
54 | #define bit_cpu_OSXSAVE (1 << 27) |
55 | #define bit_cpu_AVX (1 << 28) |
56 | #define bit_cpu_POPCOUNT (1 << 23) |
57 | #define bit_cpu_FMA (1 << 12) |
58 | #define bit_cpu_FMA4 (1 << 16) |
59 | #define bit_cpu_HTT (1 << 28) |
60 | #define bit_cpu_LZCNT (1 << 5) |
61 | #define bit_cpu_MOVBE (1 << 22) |
62 | #define bit_cpu_POPCNT (1 << 23) |
63 | |
64 | /* COMMON_CPUID_INDEX_7. */ |
65 | #define bit_cpu_BMI1 (1 << 3) |
66 | #define bit_cpu_BMI2 (1 << 8) |
67 | #define bit_cpu_ERMS (1 << 9) |
68 | #define bit_cpu_RTM (1 << 11) |
69 | #define bit_cpu_AVX2 (1 << 5) |
70 | #define bit_cpu_AVX512F (1 << 16) |
71 | #define bit_cpu_AVX512DQ (1 << 17) |
72 | #define bit_cpu_AVX512PF (1 << 26) |
73 | #define bit_cpu_AVX512ER (1 << 27) |
74 | #define bit_cpu_AVX512CD (1 << 28) |
75 | #define bit_cpu_AVX512BW (1 << 30) |
76 | #define bit_cpu_AVX512VL (1u << 31) |
77 | |
78 | /* XCR0 Feature flags. */ |
79 | #define bit_XMM_state (1 << 1) |
80 | #define bit_YMM_state (1 << 2) |
81 | #define bit_Opmask_state (1 << 5) |
82 | #define bit_ZMM0_15_state (1 << 6) |
83 | #define bit_ZMM16_31_state (1 << 7) |
84 | |
85 | /* The integer bit array index for the first set of internal feature bits. */ |
86 | #define FEATURE_INDEX_1 0 |
87 | |
88 | /* The current maximum size of the feature integer bit array. */ |
89 | #define FEATURE_INDEX_MAX 1 |
90 | |
91 | #ifdef __ASSEMBLER__ |
92 | |
93 | # include <cpu-features-offsets.h> |
94 | |
95 | # define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
96 | # define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
97 | # define index_cpu_SSE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
98 | # define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
99 | # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
100 | # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
101 | # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
102 | # define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
103 | # define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET |
104 | # define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET |
105 | # define index_cpu_MOVBE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
106 | |
107 | # define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE |
108 | # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE |
109 | # define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE |
110 | # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
111 | # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE |
112 | # define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE |
113 | # define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE |
114 | # define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE |
115 | # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE |
116 | # define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE |
117 | # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
118 | # define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE |
119 | # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE |
120 | # define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE |
121 | # define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE |
122 | # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE |
123 | # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE |
124 | # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE |
125 | # define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE |
126 | # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE |
127 | # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE |
128 | # define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1*FEATURE_SIZE |
129 | |
130 | |
131 | # if defined (_LIBC) && !IS_IN (nonlib) |
132 | # ifdef __x86_64__ |
133 | # ifdef SHARED |
134 | # if IS_IN (rtld) |
135 | # define LOAD_RTLD_GLOBAL_RO_RDX |
136 | # define HAS_FEATURE(offset, field, name) \ |
137 | testl $(bit_##field##_##name), \ |
138 | _rtld_local_ro+offset+(index_##field##_##name)(%rip) |
139 | # else |
140 | # define LOAD_RTLD_GLOBAL_RO_RDX \ |
141 | mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP |
142 | # define HAS_FEATURE(offset, field, name) \ |
143 | testl $(bit_##field##_##name), \ |
144 | RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx) |
145 | # endif |
146 | # else /* SHARED */ |
147 | # define LOAD_RTLD_GLOBAL_RO_RDX |
148 | # define HAS_FEATURE(offset, field, name) \ |
149 | testl $(bit_##field##_##name), \ |
150 | _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip) |
151 | # endif /* !SHARED */ |
152 | # else /* __x86_64__ */ |
153 | # ifdef SHARED |
154 | # define LOAD_FUNC_GOT_EAX(func) \ |
155 | leal func@GOTOFF(%edx), %eax |
156 | # if IS_IN (rtld) |
157 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
158 | LOAD_PIC_REG(dx) |
159 | # define HAS_FEATURE(offset, field, name) \ |
160 | testl $(bit_##field##_##name), \ |
161 | offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx) |
162 | # else |
163 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
164 | LOAD_PIC_REG(dx); \ |
165 | mov _rtld_global_ro@GOT(%edx), %ecx |
166 | # define HAS_FEATURE(offset, field, name) \ |
167 | testl $(bit_##field##_##name), \ |
168 | RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx) |
169 | # endif |
170 | # else /* SHARED */ |
171 | # define LOAD_FUNC_GOT_EAX(func) \ |
172 | leal func, %eax |
173 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO |
174 | # define HAS_FEATURE(offset, field, name) \ |
175 | testl $(bit_##field##_##name), \ |
176 | _dl_x86_cpu_features+offset+(index_##field##_##name) |
177 | # endif /* !SHARED */ |
178 | # endif /* !__x86_64__ */ |
179 | # else /* _LIBC && !nonlib */ |
180 | # error "Sorry, <cpu-features.h> is unimplemented for assembler" |
181 | # endif /* !_LIBC || nonlib */ |
182 | |
183 | /* HAS_* evaluates to true if we may use the feature at runtime. */ |
184 | # define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name) |
185 | # define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name) |
186 | |
187 | #else /* __ASSEMBLER__ */ |
188 | |
189 | enum |
190 | { |
191 | COMMON_CPUID_INDEX_1 = 0, |
192 | COMMON_CPUID_INDEX_7, |
193 | COMMON_CPUID_INDEX_80000001, /* for AMD */ |
194 | /* Keep the following line at the end. */ |
195 | COMMON_CPUID_INDEX_MAX |
196 | }; |
197 | |
198 | struct cpu_features |
199 | { |
200 | enum cpu_features_kind |
201 | { |
202 | arch_kind_unknown = 0, |
203 | arch_kind_intel, |
204 | arch_kind_amd, |
205 | arch_kind_other |
206 | } kind; |
207 | int max_cpuid; |
208 | struct cpuid_registers |
209 | { |
210 | unsigned int eax; |
211 | unsigned int ebx; |
212 | unsigned int ecx; |
213 | unsigned int edx; |
214 | } cpuid[COMMON_CPUID_INDEX_MAX]; |
215 | unsigned int family; |
216 | unsigned int model; |
217 | unsigned int feature[FEATURE_INDEX_MAX]; |
218 | /* Data cache size for use in memory and string routines, typically |
219 | L1 size. */ |
220 | unsigned long int data_cache_size; |
221 | /* Shared cache size for use in memory and string routines, typically |
222 | L2 or L3 size. */ |
223 | unsigned long int shared_cache_size; |
224 | /* Threshold to use non temporal store. */ |
225 | unsigned long int non_temporal_threshold; |
226 | }; |
227 | |
228 | /* Used from outside of glibc to get access to the CPU features |
229 | structure. */ |
230 | extern const struct cpu_features *__get_cpu_features (void) |
231 | __attribute__ ((const)); |
232 | |
233 | # if defined (_LIBC) && !IS_IN (nonlib) |
234 | /* Unused for x86. */ |
235 | # define INIT_ARCH() |
236 | # define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) |
237 | # endif |
238 | |
239 | |
240 | /* Only used directly in cpu-features.c. */ |
241 | # define CPU_FEATURES_CPU_P(ptr, name) \ |
242 | ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0) |
243 | # define CPU_FEATURES_ARCH_P(ptr, name) \ |
244 | ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0) |
245 | |
246 | /* HAS_* evaluates to true if we may use the feature at runtime. */ |
247 | # define HAS_CPU_FEATURE(name) \ |
248 | CPU_FEATURES_CPU_P (__get_cpu_features (), name) |
249 | # define HAS_ARCH_FEATURE(name) \ |
250 | CPU_FEATURES_ARCH_P (__get_cpu_features (), name) |
251 | |
252 | # define index_cpu_CX8 COMMON_CPUID_INDEX_1 |
253 | # define index_cpu_CMOV COMMON_CPUID_INDEX_1 |
254 | # define index_cpu_SSE COMMON_CPUID_INDEX_1 |
255 | # define index_cpu_SSE2 COMMON_CPUID_INDEX_1 |
256 | # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1 |
257 | # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1 |
258 | # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1 |
259 | # define index_cpu_AVX COMMON_CPUID_INDEX_1 |
260 | # define index_cpu_AVX2 COMMON_CPUID_INDEX_7 |
261 | # define index_cpu_AVX512F COMMON_CPUID_INDEX_7 |
262 | # define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7 |
263 | # define index_cpu_AVX512PF COMMON_CPUID_INDEX_7 |
264 | # define index_cpu_AVX512ER COMMON_CPUID_INDEX_7 |
265 | # define index_cpu_AVX512CD COMMON_CPUID_INDEX_7 |
266 | # define index_cpu_AVX512BW COMMON_CPUID_INDEX_7 |
267 | # define index_cpu_AVX512VL COMMON_CPUID_INDEX_7 |
268 | # define index_cpu_ERMS COMMON_CPUID_INDEX_7 |
269 | # define index_cpu_RTM COMMON_CPUID_INDEX_7 |
270 | # define index_cpu_FMA COMMON_CPUID_INDEX_1 |
271 | # define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 |
272 | # define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 |
273 | # define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 |
274 | # define index_cpu_HTT COMMON_CPUID_INDEX_1 |
275 | # define index_cpu_BMI1 COMMON_CPUID_INDEX_7 |
276 | # define index_cpu_BMI2 COMMON_CPUID_INDEX_7 |
277 | # define index_cpu_LZCNT COMMON_CPUID_INDEX_1 |
278 | # define index_cpu_MOVBE COMMON_CPUID_INDEX_1 |
279 | # define index_cpu_POPCNT COMMON_CPUID_INDEX_1 |
280 | |
281 | # define reg_CX8 edx |
282 | # define reg_CMOV edx |
283 | # define reg_SSE edx |
284 | # define reg_SSE2 edx |
285 | # define reg_SSSE3 ecx |
286 | # define reg_SSE4_1 ecx |
287 | # define reg_SSE4_2 ecx |
288 | # define reg_AVX ecx |
289 | # define reg_AVX2 ebx |
290 | # define reg_AVX512F ebx |
291 | # define reg_AVX512DQ ebx |
292 | # define reg_AVX512PF ebx |
293 | # define reg_AVX512ER ebx |
294 | # define reg_AVX512CD ebx |
295 | # define reg_AVX512BW ebx |
296 | # define reg_AVX512VL ebx |
297 | # define reg_ERMS ebx |
298 | # define reg_RTM ebx |
299 | # define reg_FMA ecx |
300 | # define reg_FMA4 ecx |
301 | # define reg_POPCOUNT ecx |
302 | # define reg_OSXSAVE ecx |
303 | # define reg_HTT edx |
304 | # define reg_BMI1 ebx |
305 | # define reg_BMI2 ebx |
306 | # define reg_LZCNT ecx |
307 | # define reg_MOVBE ecx |
308 | # define reg_POPCNT ecx |
309 | |
310 | # define index_arch_Fast_Rep_String FEATURE_INDEX_1 |
311 | # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 |
312 | # define index_arch_Slow_BSF FEATURE_INDEX_1 |
313 | # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1 |
314 | # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 |
315 | # define index_arch_AVX_Usable FEATURE_INDEX_1 |
316 | # define index_arch_FMA_Usable FEATURE_INDEX_1 |
317 | # define index_arch_FMA4_Usable FEATURE_INDEX_1 |
318 | # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1 |
319 | # define index_arch_AVX2_Usable FEATURE_INDEX_1 |
320 | # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 |
321 | # define index_arch_AVX512F_Usable FEATURE_INDEX_1 |
322 | # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1 |
323 | # define index_arch_I586 FEATURE_INDEX_1 |
324 | # define index_arch_I686 FEATURE_INDEX_1 |
325 | # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 |
326 | # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 |
327 | # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 |
328 | # define index_arch_Prefer_ERMS FEATURE_INDEX_1 |
329 | # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 |
330 | # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 |
331 | # define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 |
332 | |
333 | #endif /* !__ASSEMBLER__ */ |
334 | |
335 | #ifdef __x86_64__ |
336 | # define HAS_CPUID 1 |
337 | #elif defined __i586__ || defined __pentium__ |
338 | # define HAS_CPUID 1 |
339 | # define HAS_I586 1 |
340 | # define HAS_I686 HAS_ARCH_FEATURE (I686) |
341 | #elif (defined __i686__ || defined __pentiumpro__ \ |
342 | || defined __pentium4__ || defined __nocona__ \ |
343 | || defined __atom__ || defined __core2__ \ |
344 | || defined __corei7__ || defined __corei7_avx__ \ |
345 | || defined __core_avx2__ || defined __nehalem__ \ |
346 | || defined __sandybridge__ || defined __haswell__ \ |
347 | || defined __knl__ || defined __bonnell__ \ |
348 | || defined __silvermont__ \ |
349 | || defined __k6__ || defined __k8__ \ |
350 | || defined __athlon__ || defined __amdfam10__ \ |
351 | || defined __bdver1__ || defined __bdver2__ \ |
352 | || defined __bdver3__ || defined __bdver4__ \ |
353 | || defined __btver1__ || defined __btver2__) |
354 | # define HAS_CPUID 1 |
355 | # define HAS_I586 1 |
356 | # define HAS_I686 1 |
357 | #else |
358 | # define HAS_CPUID 0 |
359 | # define HAS_I586 HAS_ARCH_FEATURE (I586) |
360 | # define HAS_I686 HAS_ARCH_FEATURE (I686) |
361 | #endif |
362 | |
363 | #endif /* cpu_features_h */ |
364 | |