1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2020 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <cpuid.h>
20#include <cpu-features.h>
21#include <dl-hwcap.h>
22#include <libc-pointer-arith.h>
23
24#if HAVE_TUNABLES
25# define TUNABLE_NAMESPACE cpu
26# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
27# include <elf/dl-tunables.h>
28
29extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
30 attribute_hidden;
31
32# if CET_ENABLED
33extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
34 attribute_hidden;
35extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
36 attribute_hidden;
37# endif
38#endif
39
40#if CET_ENABLED
41# include <dl-cet.h>
42#endif
43
44static void
45update_usable (struct cpu_features *cpu_features)
46{
47 /* Before COMMON_CPUID_INDEX_80000001, copy the cpuid array elements to
48 the usable array. */
49 unsigned int i;
50 for (i = 0; i < COMMON_CPUID_INDEX_80000001; i++)
51 cpu_features->features[i].usable = cpu_features->features[i].cpuid;
52
53 /* Before COMMON_CPUID_INDEX_80000001, clear the unknown usable bits
54 and the always zero bits. */
55 CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_16);
56 CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_31);
57 CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_10);
58 CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_20);
59 CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_30);
60 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_6);
61 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_22);
62 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_13);
63 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_15);
64 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_16);
65 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_23);
66 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_24);
67 CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_26);
68 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_0);
69 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_1);
70 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_5);
71 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6);
72 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7);
73 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9);
74 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11);
75 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12);
76 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13);
77 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17);
78 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_19);
79 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_21);
80 CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_23);
81
82 /* EAX/EBX from COMMON_CPUID_INDEX_1 and EAX from COMMON_CPUID_INDEX_7
83 aren't used for CPU feature detection. */
84 cpu_features->features[COMMON_CPUID_INDEX_1].usable.eax = 0;
85 cpu_features->features[COMMON_CPUID_INDEX_1].usable.ebx = 0;
86 cpu_features->features[COMMON_CPUID_INDEX_7].usable.eax = 0;
87
88 /* Starting from COMMON_CPUID_INDEX_80000001, copy the cpuid bits to
89 usable bits. */
90 CPU_FEATURE_SET_USABLE (cpu_features, LAHF64_SAHF64);
91 CPU_FEATURE_SET_USABLE (cpu_features, SVM);
92 CPU_FEATURE_SET_USABLE (cpu_features, LZCNT);
93 CPU_FEATURE_SET_USABLE (cpu_features, SSE4A);
94 CPU_FEATURE_SET_USABLE (cpu_features, PREFETCHW);
95 CPU_FEATURE_SET_USABLE (cpu_features, XOP);
96 CPU_FEATURE_SET_USABLE (cpu_features, LWP);
97 CPU_FEATURE_SET_USABLE (cpu_features, FMA4);
98 CPU_FEATURE_SET_USABLE (cpu_features, TBM);
99 CPU_FEATURE_SET_USABLE (cpu_features, SYSCALL_SYSRET);
100 CPU_FEATURE_SET_USABLE (cpu_features, NX);
101 CPU_FEATURE_SET_USABLE (cpu_features, PAGE1GB);
102 CPU_FEATURE_SET_USABLE (cpu_features, RDTSCP);
103 CPU_FEATURE_SET_USABLE (cpu_features, LM);
104 CPU_FEATURE_SET_USABLE (cpu_features, XSAVEOPT);
105 CPU_FEATURE_SET_USABLE (cpu_features, XSAVEC);
106 CPU_FEATURE_SET_USABLE (cpu_features, XGETBV_ECX_1);
107 CPU_FEATURE_SET_USABLE (cpu_features, XSAVES);
108 CPU_FEATURE_SET_USABLE (cpu_features, XFD);
109 CPU_FEATURE_SET_USABLE (cpu_features, INVARIANT_TSC);
110 CPU_FEATURE_SET_USABLE (cpu_features, WBNOINVD);
111 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16);
112
113 /* MPX has been deprecated. */
114 CPU_FEATURE_UNSET (cpu_features, MPX);
115
116 /* Clear the usable bits which require OS support. */
117 CPU_FEATURE_UNSET (cpu_features, FMA);
118 CPU_FEATURE_UNSET (cpu_features, AVX);
119 CPU_FEATURE_UNSET (cpu_features, F16C);
120 CPU_FEATURE_UNSET (cpu_features, AVX2);
121 CPU_FEATURE_UNSET (cpu_features, AVX512F);
122 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
123 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
124 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
125 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
126 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
127 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
128 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
129 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
130 CPU_FEATURE_UNSET (cpu_features, PKU);
131 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
132 CPU_FEATURE_UNSET (cpu_features, VAES);
133 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
134 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
135 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
136 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
137 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
138 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
139 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
140 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
141 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
142 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
143 CPU_FEATURE_UNSET (cpu_features, XOP);
144 CPU_FEATURE_UNSET (cpu_features, FMA4);
145 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
146 CPU_FEATURE_UNSET (cpu_features, XFD);
147 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
148
149 /* Can we call xgetbv? */
150 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
151 {
152 unsigned int xcrlow;
153 unsigned int xcrhigh;
154 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
155 /* Is YMM and XMM state usable? */
156 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
157 == (bit_YMM_state | bit_XMM_state))
158 {
159 /* Determine if AVX is usable. */
160 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
161 {
162 CPU_FEATURE_SET (cpu_features, AVX);
163 /* The following features depend on AVX being usable. */
164 /* Determine if AVX2 is usable. */
165 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
166 {
167 CPU_FEATURE_SET (cpu_features, AVX2);
168
169 /* Unaligned load with 256-bit AVX registers are faster
170 on Intel/AMD processors with AVX2. */
171 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
172 |= bit_arch_AVX_Fast_Unaligned_Load;
173 }
174 /* Determine if FMA is usable. */
175 CPU_FEATURE_SET_USABLE (cpu_features, FMA);
176 /* Determine if VAES is usable. */
177 CPU_FEATURE_SET_USABLE (cpu_features, VAES);
178 /* Determine if VPCLMULQDQ is usable. */
179 CPU_FEATURE_SET_USABLE (cpu_features, VPCLMULQDQ);
180 /* Determine if XOP is usable. */
181 CPU_FEATURE_SET_USABLE (cpu_features, XOP);
182 /* Determine if F16C is usable. */
183 CPU_FEATURE_SET_USABLE (cpu_features, F16C);
184 }
185
186 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
187 ZMM16-ZMM31 state are enabled. */
188 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
189 | bit_ZMM16_31_state))
190 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
191 {
192 /* Determine if AVX512F is usable. */
193 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
194 {
195 CPU_FEATURE_SET (cpu_features, AVX512F);
196 /* Determine if AVX512CD is usable. */
197 CPU_FEATURE_SET_USABLE (cpu_features, AVX512CD);
198 /* Determine if AVX512ER is usable. */
199 CPU_FEATURE_SET_USABLE (cpu_features, AVX512ER);
200 /* Determine if AVX512PF is usable. */
201 CPU_FEATURE_SET_USABLE (cpu_features, AVX512PF);
202 /* Determine if AVX512VL is usable. */
203 CPU_FEATURE_SET_USABLE (cpu_features, AVX512VL);
204 /* Determine if AVX512DQ is usable. */
205 CPU_FEATURE_SET_USABLE (cpu_features, AVX512DQ);
206 /* Determine if AVX512BW is usable. */
207 CPU_FEATURE_SET_USABLE (cpu_features, AVX512BW);
208 /* Determine if AVX512_4FMAPS is usable. */
209 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4FMAPS);
210 /* Determine if AVX512_4VNNIW is usable. */
211 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4VNNIW);
212 /* Determine if AVX512_BITALG is usable. */
213 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BITALG);
214 /* Determine if AVX512_IFMA is usable. */
215 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_IFMA);
216 /* Determine if AVX512_VBMI is usable. */
217 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI);
218 /* Determine if AVX512_VBMI2 is usable. */
219 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI2);
220 /* Determine if is AVX512_VNNI usable. */
221 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VNNI);
222 /* Determine if AVX512_VPOPCNTDQ is usable. */
223 CPU_FEATURE_SET_USABLE (cpu_features,
224 AVX512_VPOPCNTDQ);
225 /* Determine if AVX512_VP2INTERSECT is usable. */
226 CPU_FEATURE_SET_USABLE (cpu_features,
227 AVX512_VP2INTERSECT);
228 /* Determine if AVX512_BF16 is usable. */
229 CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16);
230 }
231 }
232 }
233
234 /* Are XTILECFG and XTILEDATA states usable? */
235 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
236 == (bit_XTILECFG_state | bit_XTILEDATA_state))
237 {
238 /* Determine if AMX_BF16 is usable. */
239 CPU_FEATURE_SET_USABLE (cpu_features, AMX_BF16);
240 /* Determine if AMX_TILE is usable. */
241 CPU_FEATURE_SET_USABLE (cpu_features, AMX_TILE);
242 /* Determine if AMX_INT8 is usable. */
243 CPU_FEATURE_SET_USABLE (cpu_features, AMX_INT8);
244 }
245
246
247 /* XFD is usable only when OSXSAVE is enabled. */
248 CPU_FEATURE_SET_USABLE (cpu_features, XFD);
249
250 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
251 size + integer register save size and align it to 64 bytes. */
252 if (cpu_features->basic.max_cpuid >= 0xd)
253 {
254 unsigned int eax, ebx, ecx, edx;
255
256 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
257 if (ebx != 0)
258 {
259 unsigned int xsave_state_full_size
260 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
261
262 cpu_features->xsave_state_size
263 = xsave_state_full_size;
264 cpu_features->xsave_state_full_size
265 = xsave_state_full_size;
266
267 /* Check if XSAVEC is available. */
268 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
269 {
270 unsigned int xstate_comp_offsets[32];
271 unsigned int xstate_comp_sizes[32];
272 unsigned int i;
273
274 xstate_comp_offsets[0] = 0;
275 xstate_comp_offsets[1] = 160;
276 xstate_comp_offsets[2] = 576;
277 xstate_comp_sizes[0] = 160;
278 xstate_comp_sizes[1] = 256;
279
280 for (i = 2; i < 32; i++)
281 {
282 if ((STATE_SAVE_MASK & (1 << i)) != 0)
283 {
284 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
285 xstate_comp_sizes[i] = eax;
286 }
287 else
288 {
289 ecx = 0;
290 xstate_comp_sizes[i] = 0;
291 }
292
293 if (i > 2)
294 {
295 xstate_comp_offsets[i]
296 = (xstate_comp_offsets[i - 1]
297 + xstate_comp_sizes[i -1]);
298 if ((ecx & (1 << 1)) != 0)
299 xstate_comp_offsets[i]
300 = ALIGN_UP (xstate_comp_offsets[i], 64);
301 }
302 }
303
304 /* Use XSAVEC. */
305 unsigned int size
306 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
307 if (size)
308 {
309 cpu_features->xsave_state_size
310 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
311 CPU_FEATURE_SET (cpu_features, XSAVEC);
312 }
313 }
314 }
315 }
316 }
317
318 /* Determine if PKU is usable. */
319 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
320 CPU_FEATURE_SET (cpu_features, PKU);
321}
322
323static void
324get_extended_indices (struct cpu_features *cpu_features)
325{
326 unsigned int eax, ebx, ecx, edx;
327 __cpuid (0x80000000, eax, ebx, ecx, edx);
328 if (eax >= 0x80000001)
329 __cpuid (0x80000001,
330 cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.eax,
331 cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ebx,
332 cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ecx,
333 cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.edx);
334 if (eax >= 0x80000007)
335 __cpuid (0x80000007,
336 cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.eax,
337 cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ebx,
338 cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ecx,
339 cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.edx);
340 if (eax >= 0x80000008)
341 __cpuid (0x80000008,
342 cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.eax,
343 cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ebx,
344 cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ecx,
345 cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.edx);
346}
347
348static void
349get_common_indices (struct cpu_features *cpu_features,
350 unsigned int *family, unsigned int *model,
351 unsigned int *extended_model, unsigned int *stepping)
352{
353 if (family)
354 {
355 unsigned int eax;
356 __cpuid (1, eax,
357 cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx,
358 cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx,
359 cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.edx);
360 cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.eax = eax;
361 *family = (eax >> 8) & 0x0f;
362 *model = (eax >> 4) & 0x0f;
363 *extended_model = (eax >> 12) & 0xf0;
364 *stepping = eax & 0x0f;
365 if (*family == 0x0f)
366 {
367 *family += (eax >> 20) & 0xff;
368 *model += *extended_model;
369 }
370 }
371
372 if (cpu_features->basic.max_cpuid >= 7)
373 {
374 __cpuid_count (7, 0,
375 cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.eax,
376 cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ebx,
377 cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ecx,
378 cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.edx);
379 __cpuid_count (7, 1,
380 cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.eax,
381 cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ebx,
382 cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ecx,
383 cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.edx);
384 }
385
386 if (cpu_features->basic.max_cpuid >= 0xd)
387 __cpuid_count (0xd, 1,
388 cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.eax,
389 cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ebx,
390 cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ecx,
391 cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.edx);
392
393}
394
395_Static_assert (((index_arch_Fast_Unaligned_Load
396 == index_arch_Fast_Unaligned_Copy)
397 && (index_arch_Fast_Unaligned_Load
398 == index_arch_Prefer_PMINUB_for_stringop)
399 && (index_arch_Fast_Unaligned_Load
400 == index_arch_Slow_SSE4_2)
401 && (index_arch_Fast_Unaligned_Load
402 == index_arch_Fast_Rep_String)
403 && (index_arch_Fast_Unaligned_Load
404 == index_arch_Fast_Copy_Backward)),
405 "Incorrect index_arch_Fast_Unaligned_Load");
406
407static inline void
408init_cpu_features (struct cpu_features *cpu_features)
409{
410 unsigned int ebx, ecx, edx;
411 unsigned int family = 0;
412 unsigned int model = 0;
413 unsigned int stepping = 0;
414 enum cpu_features_kind kind;
415
416#if !HAS_CPUID
417 if (__get_cpuid_max (0, 0) == 0)
418 {
419 kind = arch_kind_other;
420 goto no_cpuid;
421 }
422#endif
423
424 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
425
426 /* This spells out "GenuineIntel". */
427 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
428 {
429 unsigned int extended_model;
430
431 kind = arch_kind_intel;
432
433 get_common_indices (cpu_features, &family, &model, &extended_model,
434 &stepping);
435
436 get_extended_indices (cpu_features);
437
438 update_usable (cpu_features);
439
440 if (family == 0x06)
441 {
442 model += extended_model;
443 switch (model)
444 {
445 case 0x1c:
446 case 0x26:
447 /* BSF is slow on Atom. */
448 cpu_features->preferred[index_arch_Slow_BSF]
449 |= bit_arch_Slow_BSF;
450 break;
451
452 case 0x57:
453 /* Knights Landing. Enable Silvermont optimizations. */
454
455 case 0x7a:
456 /* Unaligned load versions are faster than SSSE3
457 on Goldmont Plus. */
458
459 case 0x5c:
460 case 0x5f:
461 /* Unaligned load versions are faster than SSSE3
462 on Goldmont. */
463
464 case 0x4c:
465 case 0x5a:
466 case 0x75:
467 /* Airmont is a die shrink of Silvermont. */
468
469 case 0x37:
470 case 0x4a:
471 case 0x4d:
472 case 0x5d:
473 /* Unaligned load versions are faster than SSSE3
474 on Silvermont. */
475 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
476 |= (bit_arch_Fast_Unaligned_Load
477 | bit_arch_Fast_Unaligned_Copy
478 | bit_arch_Prefer_PMINUB_for_stringop
479 | bit_arch_Slow_SSE4_2);
480 break;
481
482 case 0x86:
483 case 0x96:
484 case 0x9c:
485 /* Enable rep string instructions, unaligned load, unaligned
486 copy, pminub and avoid SSE 4.2 on Tremont. */
487 cpu_features->preferred[index_arch_Fast_Rep_String]
488 |= (bit_arch_Fast_Rep_String
489 | bit_arch_Fast_Unaligned_Load
490 | bit_arch_Fast_Unaligned_Copy
491 | bit_arch_Prefer_PMINUB_for_stringop
492 | bit_arch_Slow_SSE4_2);
493 break;
494
495 default:
496 /* Unknown family 0x06 processors. Assuming this is one
497 of Core i3/i5/i7 processors if AVX is available. */
498 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
499 break;
500 /* Fall through. */
501
502 case 0x1a:
503 case 0x1e:
504 case 0x1f:
505 case 0x25:
506 case 0x2c:
507 case 0x2e:
508 case 0x2f:
509 /* Rep string instructions, unaligned load, unaligned copy,
510 and pminub are fast on Intel Core i3, i5 and i7. */
511 cpu_features->preferred[index_arch_Fast_Rep_String]
512 |= (bit_arch_Fast_Rep_String
513 | bit_arch_Fast_Unaligned_Load
514 | bit_arch_Fast_Unaligned_Copy
515 | bit_arch_Prefer_PMINUB_for_stringop);
516 break;
517 }
518
519 /* Disable TSX on some Haswell processors to avoid TSX on kernels that
520 weren't updated with the latest microcode package (which disables
521 broken feature by default). */
522 switch (model)
523 {
524 case 0x3f:
525 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
526 if (stepping >= 4)
527 break;
528 /* Fall through. */
529 case 0x3c:
530 case 0x45:
531 case 0x46:
532 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
533 with stepping >= 4) to avoid TSX on kernels that weren't
534 updated with the latest microcode package (which disables
535 broken feature by default). */
536 CPU_FEATURE_UNSET (cpu_features, RTM);
537 break;
538 }
539 }
540
541
542 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
543 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
544 frequency if AVX512ER isn't available. */
545 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
546 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
547 |= bit_arch_Prefer_No_VZEROUPPER;
548 else
549 cpu_features->preferred[index_arch_Prefer_No_AVX512]
550 |= bit_arch_Prefer_No_AVX512;
551 }
552 /* This spells out "AuthenticAMD" or "HygonGenuine". */
553 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
554 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
555 {
556 unsigned int extended_model;
557
558 kind = arch_kind_amd;
559
560 get_common_indices (cpu_features, &family, &model, &extended_model,
561 &stepping);
562
563 get_extended_indices (cpu_features);
564
565 update_usable (cpu_features);
566
567 ecx = cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx;
568
569 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
570 {
571 /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and
572 FMA4 requires AVX, determine if FMA4 is usable here. */
573 CPU_FEATURE_SET_USABLE (cpu_features, FMA4);
574 }
575
576 if (family == 0x15)
577 {
578 /* "Excavator" */
579 if (model >= 0x60 && model <= 0x7f)
580 {
581 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
582 |= (bit_arch_Fast_Unaligned_Load
583 | bit_arch_Fast_Copy_Backward);
584
585 /* Unaligned AVX loads are slower.*/
586 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
587 &= ~bit_arch_AVX_Fast_Unaligned_Load;
588 }
589 }
590 }
591 /* This spells out "CentaurHauls" or " Shanghai ". */
592 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
593 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
594 {
595 unsigned int extended_model, stepping;
596
597 kind = arch_kind_zhaoxin;
598
599 get_common_indices (cpu_features, &family, &model, &extended_model,
600 &stepping);
601
602 get_extended_indices (cpu_features);
603
604 update_usable (cpu_features);
605
606 model += extended_model;
607 if (family == 0x6)
608 {
609 if (model == 0xf || model == 0x19)
610 {
611 CPU_FEATURE_UNSET (cpu_features, AVX);
612 CPU_FEATURE_UNSET (cpu_features, AVX2);
613
614 cpu_features->preferred[index_arch_Slow_SSE4_2]
615 |= bit_arch_Slow_SSE4_2;
616
617 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
618 &= ~bit_arch_AVX_Fast_Unaligned_Load;
619 }
620 }
621 else if (family == 0x7)
622 {
623 if (model == 0x1b)
624 {
625 CPU_FEATURE_UNSET (cpu_features, AVX);
626 CPU_FEATURE_UNSET (cpu_features, AVX2);
627
628 cpu_features->preferred[index_arch_Slow_SSE4_2]
629 |= bit_arch_Slow_SSE4_2;
630
631 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
632 &= ~bit_arch_AVX_Fast_Unaligned_Load;
633 }
634 else if (model == 0x3b)
635 {
636 CPU_FEATURE_UNSET (cpu_features, AVX);
637 CPU_FEATURE_UNSET (cpu_features, AVX2);
638
639 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
640 &= ~bit_arch_AVX_Fast_Unaligned_Load;
641 }
642 }
643 }
644 else
645 {
646 kind = arch_kind_other;
647 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
648 update_usable (cpu_features);
649 }
650
651 /* Support i586 if CX8 is available. */
652 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
653 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
654
655 /* Support i686 if CMOV is available. */
656 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
657 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
658
659#if !HAS_CPUID
660no_cpuid:
661#endif
662
663 cpu_features->basic.kind = kind;
664 cpu_features->basic.family = family;
665 cpu_features->basic.model = model;
666 cpu_features->basic.stepping = stepping;
667
668#if HAVE_TUNABLES
669 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
670 cpu_features->non_temporal_threshold
671 = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
672 cpu_features->rep_movsb_threshold
673 = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
674 cpu_features->rep_stosb_threshold
675 = TUNABLE_GET (x86_rep_stosb_threshold, long int, NULL);
676 cpu_features->data_cache_size
677 = TUNABLE_GET (x86_data_cache_size, long int, NULL);
678 cpu_features->shared_cache_size
679 = TUNABLE_GET (x86_shared_cache_size, long int, NULL);
680#endif
681
682 /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. */
683#if !HAVE_TUNABLES && defined SHARED
684 /* The glibc.cpu.hwcap_mask tunable is initialized already, so no need to do
685 this. */
686 GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
687#endif
688
689#ifdef __x86_64__
690 GLRO(dl_hwcap) = HWCAP_X86_64;
691 if (cpu_features->basic.kind == arch_kind_intel)
692 {
693 const char *platform = NULL;
694
695 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
696 {
697 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
698 {
699 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
700 platform = "xeon_phi";
701 }
702 else
703 {
704 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
705 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
706 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
707 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
708 }
709 }
710
711 if (platform == NULL
712 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
713 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
714 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
715 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
716 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
717 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
718 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
719 platform = "haswell";
720
721 if (platform != NULL)
722 GLRO(dl_platform) = platform;
723 }
724#else
725 GLRO(dl_hwcap) = 0;
726 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
727 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
728
729 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
730 GLRO(dl_platform) = "i686";
731 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
732 GLRO(dl_platform) = "i586";
733#endif
734
735#if CET_ENABLED
736# if HAVE_TUNABLES
737 TUNABLE_GET (x86_ibt, tunable_val_t *,
738 TUNABLE_CALLBACK (set_x86_ibt));
739 TUNABLE_GET (x86_shstk, tunable_val_t *,
740 TUNABLE_CALLBACK (set_x86_shstk));
741# endif
742
743 /* Check CET status. */
744 unsigned int cet_status = get_cet_status ();
745
746 if (cet_status)
747 {
748 GL(dl_x86_feature_1) = cet_status;
749
750# ifndef SHARED
751 /* Check if IBT and SHSTK are enabled by kernel. */
752 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
753 || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
754 {
755 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
756 disabled by environment variable:
757
758 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
759 */
760 unsigned int cet_feature = 0;
761 if (!CPU_FEATURE_USABLE (IBT))
762 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
763 if (!CPU_FEATURE_USABLE (SHSTK))
764 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
765
766 if (cet_feature)
767 {
768 int res = dl_cet_disable_cet (cet_feature);
769
770 /* Clear the disabled bits in dl_x86_feature_1. */
771 if (res == 0)
772 GL(dl_x86_feature_1) &= ~cet_feature;
773 }
774
775 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
776 lock CET if IBT or SHSTK is enabled permissively. */
777 if (GL(dl_x86_feature_control).ibt != cet_permissive
778 && GL(dl_x86_feature_control).shstk != cet_permissive)
779 dl_cet_lock_cet ();
780 }
781# endif
782 }
783#endif
784}
785