1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2021 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <dl-hwcap.h>
20#include <libc-pointer-arith.h>
21#include <get-isa-level.h>
22#include <cacheinfo.h>
23#include <dl-cacheinfo.h>
24#include <dl-minsigstacksize.h>
25
26#if HAVE_TUNABLES
27extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28 attribute_hidden;
29
30# if CET_ENABLED
31extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
32 attribute_hidden;
33extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
34 attribute_hidden;
35# endif
36#endif
37
38#if CET_ENABLED
39# include <dl-cet.h>
40#endif
41
42static void
43update_active (struct cpu_features *cpu_features)
44{
45 /* Copy the cpuid bits to active bits for CPU featuress whose usability
46 in user space can be detected without additonal OS support. */
47 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
48 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
49 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
50 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
51 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
52 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
53 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
54 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
55 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
56 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
57 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
58 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
59 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
60 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
61 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
62 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
63 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
64 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
65 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
66 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
67 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
68 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
69 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
70 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
71 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
72 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
73 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
74 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
75 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
76 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
77 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
78 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
79 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
80 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
81 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
82 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
83 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
84 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
85 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
86 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
87 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
88 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
89 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
90 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
91 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
92 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
93 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
94 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
99
100 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
101 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
102
103#if CET_ENABLED
104 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
106#endif
107
108 /* Can we call xgetbv? */
109 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
110 {
111 unsigned int xcrlow;
112 unsigned int xcrhigh;
113 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
114 /* Is YMM and XMM state usable? */
115 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
116 == (bit_YMM_state | bit_XMM_state))
117 {
118 /* Determine if AVX is usable. */
119 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
120 {
121 CPU_FEATURE_SET (cpu_features, AVX);
122 /* The following features depend on AVX being usable. */
123 /* Determine if AVX2 is usable. */
124 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
125 {
126 CPU_FEATURE_SET (cpu_features, AVX2);
127
128 /* Unaligned load with 256-bit AVX registers are faster
129 on Intel/AMD processors with AVX2. */
130 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
131 |= bit_arch_AVX_Fast_Unaligned_Load;
132 }
133 /* Determine if AVX-VNNI is usable. */
134 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
135 /* Determine if FMA is usable. */
136 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
137 /* Determine if VAES is usable. */
138 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
139 /* Determine if VPCLMULQDQ is usable. */
140 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
141 /* Determine if XOP is usable. */
142 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
143 /* Determine if F16C is usable. */
144 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
145 }
146
147 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
148 ZMM16-ZMM31 state are enabled. */
149 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
150 | bit_ZMM16_31_state))
151 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
152 {
153 /* Determine if AVX512F is usable. */
154 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
155 {
156 CPU_FEATURE_SET (cpu_features, AVX512F);
157 /* Determine if AVX512CD is usable. */
158 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
159 /* Determine if AVX512ER is usable. */
160 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
161 /* Determine if AVX512PF is usable. */
162 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
163 /* Determine if AVX512VL is usable. */
164 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
165 /* Determine if AVX512DQ is usable. */
166 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
167 /* Determine if AVX512BW is usable. */
168 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
169 /* Determine if AVX512_4FMAPS is usable. */
170 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
171 /* Determine if AVX512_4VNNIW is usable. */
172 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
173 /* Determine if AVX512_BITALG is usable. */
174 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
175 /* Determine if AVX512_IFMA is usable. */
176 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
177 /* Determine if AVX512_VBMI is usable. */
178 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
179 /* Determine if AVX512_VBMI2 is usable. */
180 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
181 /* Determine if is AVX512_VNNI usable. */
182 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
183 /* Determine if AVX512_VPOPCNTDQ is usable. */
184 CPU_FEATURE_SET_ACTIVE (cpu_features,
185 AVX512_VPOPCNTDQ);
186 /* Determine if AVX512_VP2INTERSECT is usable. */
187 CPU_FEATURE_SET_ACTIVE (cpu_features,
188 AVX512_VP2INTERSECT);
189 /* Determine if AVX512_BF16 is usable. */
190 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
191 /* Determine if AVX512_FP16 is usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
193 }
194 }
195 }
196
197 /* Are XTILECFG and XTILEDATA states usable? */
198 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
199 == (bit_XTILECFG_state | bit_XTILEDATA_state))
200 {
201 /* Determine if AMX_BF16 is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
203 /* Determine if AMX_TILE is usable. */
204 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
205 /* Determine if AMX_INT8 is usable. */
206 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
207 }
208
209 /* These features are usable only when OSXSAVE is enabled. */
210 CPU_FEATURE_SET (cpu_features, XSAVE);
211 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
212 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
213 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
214 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
215
216 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
217 size + integer register save size and align it to 64 bytes. */
218 if (cpu_features->basic.max_cpuid >= 0xd)
219 {
220 unsigned int eax, ebx, ecx, edx;
221
222 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
223 if (ebx != 0)
224 {
225 unsigned int xsave_state_full_size
226 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
227
228 cpu_features->xsave_state_size
229 = xsave_state_full_size;
230 cpu_features->xsave_state_full_size
231 = xsave_state_full_size;
232
233 /* Check if XSAVEC is available. */
234 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
235 {
236 unsigned int xstate_comp_offsets[32];
237 unsigned int xstate_comp_sizes[32];
238 unsigned int i;
239
240 xstate_comp_offsets[0] = 0;
241 xstate_comp_offsets[1] = 160;
242 xstate_comp_offsets[2] = 576;
243 xstate_comp_sizes[0] = 160;
244 xstate_comp_sizes[1] = 256;
245
246 for (i = 2; i < 32; i++)
247 {
248 if ((STATE_SAVE_MASK & (1 << i)) != 0)
249 {
250 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
251 xstate_comp_sizes[i] = eax;
252 }
253 else
254 {
255 ecx = 0;
256 xstate_comp_sizes[i] = 0;
257 }
258
259 if (i > 2)
260 {
261 xstate_comp_offsets[i]
262 = (xstate_comp_offsets[i - 1]
263 + xstate_comp_sizes[i -1]);
264 if ((ecx & (1 << 1)) != 0)
265 xstate_comp_offsets[i]
266 = ALIGN_UP (xstate_comp_offsets[i], 64);
267 }
268 }
269
270 /* Use XSAVEC. */
271 unsigned int size
272 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
273 if (size)
274 {
275 cpu_features->xsave_state_size
276 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
277 CPU_FEATURE_SET (cpu_features, XSAVEC);
278 }
279 }
280 }
281 }
282 }
283
284 /* Determine if PKU is usable. */
285 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
286 CPU_FEATURE_SET (cpu_features, PKU);
287
288 /* Determine if Key Locker instructions are usable. */
289 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
290 {
291 CPU_FEATURE_SET (cpu_features, AESKLE);
292 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
293 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
294 }
295
296 cpu_features->isa_1 = get_isa_level (cpu_features);
297}
298
299static void
300get_extended_indices (struct cpu_features *cpu_features)
301{
302 unsigned int eax, ebx, ecx, edx;
303 __cpuid (0x80000000, eax, ebx, ecx, edx);
304 if (eax >= 0x80000001)
305 __cpuid (0x80000001,
306 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
307 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
308 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
309 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
310 if (eax >= 0x80000007)
311 __cpuid (0x80000007,
312 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
313 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
314 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
315 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
316 if (eax >= 0x80000008)
317 __cpuid (0x80000008,
318 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
319 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
320 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
321 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
322}
323
324static void
325get_common_indices (struct cpu_features *cpu_features,
326 unsigned int *family, unsigned int *model,
327 unsigned int *extended_model, unsigned int *stepping)
328{
329 if (family)
330 {
331 unsigned int eax;
332 __cpuid (1, eax,
333 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
334 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
335 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
336 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
337 *family = (eax >> 8) & 0x0f;
338 *model = (eax >> 4) & 0x0f;
339 *extended_model = (eax >> 12) & 0xf0;
340 *stepping = eax & 0x0f;
341 if (*family == 0x0f)
342 {
343 *family += (eax >> 20) & 0xff;
344 *model += *extended_model;
345 }
346 }
347
348 if (cpu_features->basic.max_cpuid >= 7)
349 {
350 __cpuid_count (7, 0,
351 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
352 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
353 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
354 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
355 __cpuid_count (7, 1,
356 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
357 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
358 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
359 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
360 }
361
362 if (cpu_features->basic.max_cpuid >= 0xd)
363 __cpuid_count (0xd, 1,
364 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
365 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
366 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
367 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
368
369 if (cpu_features->basic.max_cpuid >= 0x14)
370 __cpuid_count (0x14, 0,
371 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
372 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
373 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
374 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
375
376 if (cpu_features->basic.max_cpuid >= 0x19)
377 __cpuid_count (0x19, 0,
378 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
379 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
380 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
381 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
382
383 dl_check_minsigstacksize (cpu_features);
384}
385
386_Static_assert (((index_arch_Fast_Unaligned_Load
387 == index_arch_Fast_Unaligned_Copy)
388 && (index_arch_Fast_Unaligned_Load
389 == index_arch_Prefer_PMINUB_for_stringop)
390 && (index_arch_Fast_Unaligned_Load
391 == index_arch_Slow_SSE4_2)
392 && (index_arch_Fast_Unaligned_Load
393 == index_arch_Fast_Rep_String)
394 && (index_arch_Fast_Unaligned_Load
395 == index_arch_Fast_Copy_Backward)),
396 "Incorrect index_arch_Fast_Unaligned_Load");
397
398static inline void
399init_cpu_features (struct cpu_features *cpu_features)
400{
401 unsigned int ebx, ecx, edx;
402 unsigned int family = 0;
403 unsigned int model = 0;
404 unsigned int stepping = 0;
405 enum cpu_features_kind kind;
406
407#if !HAS_CPUID
408 if (__get_cpuid_max (0, 0) == 0)
409 {
410 kind = arch_kind_other;
411 goto no_cpuid;
412 }
413#endif
414
415 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
416
417 /* This spells out "GenuineIntel". */
418 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
419 {
420 unsigned int extended_model;
421
422 kind = arch_kind_intel;
423
424 get_common_indices (cpu_features, &family, &model, &extended_model,
425 &stepping);
426
427 get_extended_indices (cpu_features);
428
429 update_active (cpu_features);
430
431 if (family == 0x06)
432 {
433 model += extended_model;
434 switch (model)
435 {
436 case 0x1c:
437 case 0x26:
438 /* BSF is slow on Atom. */
439 cpu_features->preferred[index_arch_Slow_BSF]
440 |= bit_arch_Slow_BSF;
441 break;
442
443 case 0x57:
444 /* Knights Landing. Enable Silvermont optimizations. */
445
446 case 0x7a:
447 /* Unaligned load versions are faster than SSSE3
448 on Goldmont Plus. */
449
450 case 0x5c:
451 case 0x5f:
452 /* Unaligned load versions are faster than SSSE3
453 on Goldmont. */
454
455 case 0x4c:
456 case 0x5a:
457 case 0x75:
458 /* Airmont is a die shrink of Silvermont. */
459
460 case 0x37:
461 case 0x4a:
462 case 0x4d:
463 case 0x5d:
464 /* Unaligned load versions are faster than SSSE3
465 on Silvermont. */
466 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
467 |= (bit_arch_Fast_Unaligned_Load
468 | bit_arch_Fast_Unaligned_Copy
469 | bit_arch_Prefer_PMINUB_for_stringop
470 | bit_arch_Slow_SSE4_2);
471 break;
472
473 case 0x86:
474 case 0x96:
475 case 0x9c:
476 /* Enable rep string instructions, unaligned load, unaligned
477 copy, pminub and avoid SSE 4.2 on Tremont. */
478 cpu_features->preferred[index_arch_Fast_Rep_String]
479 |= (bit_arch_Fast_Rep_String
480 | bit_arch_Fast_Unaligned_Load
481 | bit_arch_Fast_Unaligned_Copy
482 | bit_arch_Prefer_PMINUB_for_stringop
483 | bit_arch_Slow_SSE4_2);
484 break;
485
486 default:
487 /* Unknown family 0x06 processors. Assuming this is one
488 of Core i3/i5/i7 processors if AVX is available. */
489 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
490 break;
491 /* Fall through. */
492
493 case 0x1a:
494 case 0x1e:
495 case 0x1f:
496 case 0x25:
497 case 0x2c:
498 case 0x2e:
499 case 0x2f:
500 /* Rep string instructions, unaligned load, unaligned copy,
501 and pminub are fast on Intel Core i3, i5 and i7. */
502 cpu_features->preferred[index_arch_Fast_Rep_String]
503 |= (bit_arch_Fast_Rep_String
504 | bit_arch_Fast_Unaligned_Load
505 | bit_arch_Fast_Unaligned_Copy
506 | bit_arch_Prefer_PMINUB_for_stringop);
507 break;
508 }
509
510 /* Disable TSX on some Haswell processors to avoid TSX on kernels that
511 weren't updated with the latest microcode package (which disables
512 broken feature by default). */
513 switch (model)
514 {
515 case 0x3f:
516 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
517 if (stepping >= 4)
518 break;
519 /* Fall through. */
520 case 0x3c:
521 case 0x45:
522 case 0x46:
523 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
524 with stepping >= 4) to avoid TSX on kernels that weren't
525 updated with the latest microcode package (which disables
526 broken feature by default). */
527 CPU_FEATURE_UNSET (cpu_features, RTM);
528 break;
529 }
530 }
531
532
533 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
534 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
535 frequency if AVX512ER isn't available. */
536 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
537 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
538 |= bit_arch_Prefer_No_VZEROUPPER;
539 else
540 {
541 cpu_features->preferred[index_arch_Prefer_No_AVX512]
542 |= bit_arch_Prefer_No_AVX512;
543
544 /* Avoid RTM abort triggered by VZEROUPPER inside a
545 transactionally executing RTM region. */
546 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
547 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
548 |= bit_arch_Prefer_No_VZEROUPPER;
549
550 /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
551 requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
552 requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
553 AVX2 strcmp is faster than EVEX strcmp. */
554 if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
555 cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
556 |= bit_arch_Prefer_AVX2_STRCMP;
557 }
558
559 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
560 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
561 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
562 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
563 }
564 /* This spells out "AuthenticAMD" or "HygonGenuine". */
565 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
566 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
567 {
568 unsigned int extended_model;
569
570 kind = arch_kind_amd;
571
572 get_common_indices (cpu_features, &family, &model, &extended_model,
573 &stepping);
574
575 get_extended_indices (cpu_features);
576
577 update_active (cpu_features);
578
579 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
580
581 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
582 {
583 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
584 FMA4 requires AVX, determine if FMA4 is usable here. */
585 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
586 }
587
588 if (family == 0x15)
589 {
590 /* "Excavator" */
591 if (model >= 0x60 && model <= 0x7f)
592 {
593 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
594 |= (bit_arch_Fast_Unaligned_Load
595 | bit_arch_Fast_Copy_Backward);
596
597 /* Unaligned AVX loads are slower.*/
598 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
599 &= ~bit_arch_AVX_Fast_Unaligned_Load;
600 }
601 }
602 }
603 /* This spells out "CentaurHauls" or " Shanghai ". */
604 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
605 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
606 {
607 unsigned int extended_model, stepping;
608
609 kind = arch_kind_zhaoxin;
610
611 get_common_indices (cpu_features, &family, &model, &extended_model,
612 &stepping);
613
614 get_extended_indices (cpu_features);
615
616 update_active (cpu_features);
617
618 model += extended_model;
619 if (family == 0x6)
620 {
621 if (model == 0xf || model == 0x19)
622 {
623 CPU_FEATURE_UNSET (cpu_features, AVX);
624 CPU_FEATURE_UNSET (cpu_features, AVX2);
625
626 cpu_features->preferred[index_arch_Slow_SSE4_2]
627 |= bit_arch_Slow_SSE4_2;
628
629 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
630 &= ~bit_arch_AVX_Fast_Unaligned_Load;
631 }
632 }
633 else if (family == 0x7)
634 {
635 if (model == 0x1b)
636 {
637 CPU_FEATURE_UNSET (cpu_features, AVX);
638 CPU_FEATURE_UNSET (cpu_features, AVX2);
639
640 cpu_features->preferred[index_arch_Slow_SSE4_2]
641 |= bit_arch_Slow_SSE4_2;
642
643 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
644 &= ~bit_arch_AVX_Fast_Unaligned_Load;
645 }
646 else if (model == 0x3b)
647 {
648 CPU_FEATURE_UNSET (cpu_features, AVX);
649 CPU_FEATURE_UNSET (cpu_features, AVX2);
650
651 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
652 &= ~bit_arch_AVX_Fast_Unaligned_Load;
653 }
654 }
655 }
656 else
657 {
658 kind = arch_kind_other;
659 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
660 update_active (cpu_features);
661 }
662
663 /* Support i586 if CX8 is available. */
664 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
665 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
666
667 /* Support i686 if CMOV is available. */
668 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
669 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
670
671#if !HAS_CPUID
672no_cpuid:
673#endif
674
675 cpu_features->basic.kind = kind;
676 cpu_features->basic.family = family;
677 cpu_features->basic.model = model;
678 cpu_features->basic.stepping = stepping;
679
680 dl_init_cacheinfo (cpu_features);
681
682#if HAVE_TUNABLES
683 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
684
685 bool disable_xsave_features = false;
686
687 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
688 {
689 /* These features are usable only if OSXSAVE is usable. */
690 CPU_FEATURE_UNSET (cpu_features, XSAVE);
691 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
692 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
693 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
694 CPU_FEATURE_UNSET (cpu_features, XFD);
695
696 disable_xsave_features = true;
697 }
698
699 if (disable_xsave_features
700 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
701 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
702 {
703 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
704 cpu_features->xsave_state_size = 0;
705
706 CPU_FEATURE_UNSET (cpu_features, AVX);
707 CPU_FEATURE_UNSET (cpu_features, AVX2);
708 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
709 CPU_FEATURE_UNSET (cpu_features, FMA);
710 CPU_FEATURE_UNSET (cpu_features, VAES);
711 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
712 CPU_FEATURE_UNSET (cpu_features, XOP);
713 CPU_FEATURE_UNSET (cpu_features, F16C);
714 CPU_FEATURE_UNSET (cpu_features, AVX512F);
715 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
716 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
717 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
718 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
719 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
720 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
721 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
722 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
723 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
724 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
725 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
726 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
727 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
728 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
729 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
730 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
731 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
732 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
733 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
734 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
735
736 CPU_FEATURE_UNSET (cpu_features, FMA4);
737 }
738
739#elif defined SHARED
740 /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. The
741 glibc.cpu.hwcap_mask tunable is initialized already, so no
742 need to do this. */
743 GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
744#endif
745
746#ifdef __x86_64__
747 GLRO(dl_hwcap) = HWCAP_X86_64;
748 if (cpu_features->basic.kind == arch_kind_intel)
749 {
750 const char *platform = NULL;
751
752 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
753 {
754 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
755 {
756 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
757 platform = "xeon_phi";
758 }
759 else
760 {
761 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
762 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
763 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
764 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
765 }
766 }
767
768 if (platform == NULL
769 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
770 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
771 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
772 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
773 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
774 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
775 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
776 platform = "haswell";
777
778 if (platform != NULL)
779 GLRO(dl_platform) = platform;
780 }
781#else
782 GLRO(dl_hwcap) = 0;
783 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
784 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
785
786 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
787 GLRO(dl_platform) = "i686";
788 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
789 GLRO(dl_platform) = "i586";
790#endif
791
792#if CET_ENABLED
793# if HAVE_TUNABLES
794 TUNABLE_GET (x86_ibt, tunable_val_t *,
795 TUNABLE_CALLBACK (set_x86_ibt));
796 TUNABLE_GET (x86_shstk, tunable_val_t *,
797 TUNABLE_CALLBACK (set_x86_shstk));
798# endif
799
800 /* Check CET status. */
801 unsigned int cet_status = get_cet_status ();
802
803 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
804 CPU_FEATURE_UNSET (cpu_features, IBT)
805 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
806 CPU_FEATURE_UNSET (cpu_features, SHSTK)
807
808 if (cet_status)
809 {
810 GL(dl_x86_feature_1) = cet_status;
811
812# ifndef SHARED
813 /* Check if IBT and SHSTK are enabled by kernel. */
814 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
815 || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
816 {
817 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
818 disabled by environment variable:
819
820 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
821 */
822 unsigned int cet_feature = 0;
823 if (!CPU_FEATURE_USABLE (IBT))
824 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
825 if (!CPU_FEATURE_USABLE (SHSTK))
826 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
827
828 if (cet_feature)
829 {
830 int res = dl_cet_disable_cet (cet_feature);
831
832 /* Clear the disabled bits in dl_x86_feature_1. */
833 if (res == 0)
834 GL(dl_x86_feature_1) &= ~cet_feature;
835 }
836
837 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
838 lock CET if IBT or SHSTK is enabled permissively. */
839 if (GL(dl_x86_feature_control).ibt != cet_permissive
840 && GL(dl_x86_feature_control).shstk != cet_permissive)
841 dl_cet_lock_cet ();
842 }
843# endif
844 }
845#endif
846
847#ifndef SHARED
848 /* NB: In libc.a, call init_cacheinfo. */
849 init_cacheinfo ();
850#endif
851}
852