1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <dl-hwcap.h>
20#include <libc-pointer-arith.h>
21#include <get-isa-level.h>
22#include <cacheinfo.h>
23#include <dl-cacheinfo.h>
24#include <dl-minsigstacksize.h>
25
26#if HAVE_TUNABLES
27extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28 attribute_hidden;
29
30# if CET_ENABLED
31extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
32 attribute_hidden;
33extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
34 attribute_hidden;
35# endif
36#endif
37
38#if CET_ENABLED
39# include <dl-cet.h>
40#endif
41
42static void
43update_active (struct cpu_features *cpu_features)
44{
45 /* Copy the cpuid bits to active bits for CPU featuress whose usability
46 in user space can be detected without additonal OS support. */
47 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
48 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
49 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
50 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
51 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
52 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
53 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
54 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
55 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
56 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
57 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
58 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
59 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
60 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
61 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
62 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
63 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
64 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
65 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
66 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
67 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
68 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
69 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
70 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
71 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
72 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
73 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
74 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
75 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
76 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
77 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
78 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
79 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
80 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
81 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
82 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
83 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
84 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
85 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
86 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
87 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
88 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
89 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
90 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
91 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
92 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
93 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
94 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
99
100 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
101 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
102
103#if CET_ENABLED
104 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
106#endif
107
108 /* Can we call xgetbv? */
109 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
110 {
111 unsigned int xcrlow;
112 unsigned int xcrhigh;
113 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
114 /* Is YMM and XMM state usable? */
115 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
116 == (bit_YMM_state | bit_XMM_state))
117 {
118 /* Determine if AVX is usable. */
119 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
120 {
121 CPU_FEATURE_SET (cpu_features, AVX);
122 /* The following features depend on AVX being usable. */
123 /* Determine if AVX2 is usable. */
124 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
125 {
126 CPU_FEATURE_SET (cpu_features, AVX2);
127
128 /* Unaligned load with 256-bit AVX registers are faster
129 on Intel/AMD processors with AVX2. */
130 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
131 |= bit_arch_AVX_Fast_Unaligned_Load;
132 }
133 /* Determine if AVX-VNNI is usable. */
134 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
135 /* Determine if FMA is usable. */
136 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
137 /* Determine if VAES is usable. */
138 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
139 /* Determine if VPCLMULQDQ is usable. */
140 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
141 /* Determine if XOP is usable. */
142 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
143 /* Determine if F16C is usable. */
144 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
145 }
146
147 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
148 ZMM16-ZMM31 state are enabled. */
149 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
150 | bit_ZMM16_31_state))
151 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
152 {
153 /* Determine if AVX512F is usable. */
154 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
155 {
156 CPU_FEATURE_SET (cpu_features, AVX512F);
157 /* Determine if AVX512CD is usable. */
158 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
159 /* Determine if AVX512ER is usable. */
160 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
161 /* Determine if AVX512PF is usable. */
162 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
163 /* Determine if AVX512VL is usable. */
164 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
165 /* Determine if AVX512DQ is usable. */
166 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
167 /* Determine if AVX512BW is usable. */
168 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
169 /* Determine if AVX512_4FMAPS is usable. */
170 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
171 /* Determine if AVX512_4VNNIW is usable. */
172 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
173 /* Determine if AVX512_BITALG is usable. */
174 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
175 /* Determine if AVX512_IFMA is usable. */
176 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
177 /* Determine if AVX512_VBMI is usable. */
178 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
179 /* Determine if AVX512_VBMI2 is usable. */
180 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
181 /* Determine if is AVX512_VNNI usable. */
182 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
183 /* Determine if AVX512_VPOPCNTDQ is usable. */
184 CPU_FEATURE_SET_ACTIVE (cpu_features,
185 AVX512_VPOPCNTDQ);
186 /* Determine if AVX512_VP2INTERSECT is usable. */
187 CPU_FEATURE_SET_ACTIVE (cpu_features,
188 AVX512_VP2INTERSECT);
189 /* Determine if AVX512_BF16 is usable. */
190 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
191 /* Determine if AVX512_FP16 is usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
193 }
194 }
195 }
196
197 /* Are XTILECFG and XTILEDATA states usable? */
198 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
199 == (bit_XTILECFG_state | bit_XTILEDATA_state))
200 {
201 /* Determine if AMX_BF16 is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
203 /* Determine if AMX_TILE is usable. */
204 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
205 /* Determine if AMX_INT8 is usable. */
206 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
207 }
208
209 /* These features are usable only when OSXSAVE is enabled. */
210 CPU_FEATURE_SET (cpu_features, XSAVE);
211 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
212 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
213 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
214 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
215
216 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
217 size + integer register save size and align it to 64 bytes. */
218 if (cpu_features->basic.max_cpuid >= 0xd)
219 {
220 unsigned int eax, ebx, ecx, edx;
221
222 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
223 if (ebx != 0)
224 {
225 unsigned int xsave_state_full_size
226 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
227
228 cpu_features->xsave_state_size
229 = xsave_state_full_size;
230 cpu_features->xsave_state_full_size
231 = xsave_state_full_size;
232
233 /* Check if XSAVEC is available. */
234 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
235 {
236 unsigned int xstate_comp_offsets[32];
237 unsigned int xstate_comp_sizes[32];
238 unsigned int i;
239
240 xstate_comp_offsets[0] = 0;
241 xstate_comp_offsets[1] = 160;
242 xstate_comp_offsets[2] = 576;
243 xstate_comp_sizes[0] = 160;
244 xstate_comp_sizes[1] = 256;
245
246 for (i = 2; i < 32; i++)
247 {
248 if ((STATE_SAVE_MASK & (1 << i)) != 0)
249 {
250 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
251 xstate_comp_sizes[i] = eax;
252 }
253 else
254 {
255 ecx = 0;
256 xstate_comp_sizes[i] = 0;
257 }
258
259 if (i > 2)
260 {
261 xstate_comp_offsets[i]
262 = (xstate_comp_offsets[i - 1]
263 + xstate_comp_sizes[i -1]);
264 if ((ecx & (1 << 1)) != 0)
265 xstate_comp_offsets[i]
266 = ALIGN_UP (xstate_comp_offsets[i], 64);
267 }
268 }
269
270 /* Use XSAVEC. */
271 unsigned int size
272 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
273 if (size)
274 {
275 cpu_features->xsave_state_size
276 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
277 CPU_FEATURE_SET (cpu_features, XSAVEC);
278 }
279 }
280 }
281 }
282 }
283
284 /* Determine if PKU is usable. */
285 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
286 CPU_FEATURE_SET (cpu_features, PKU);
287
288 /* Determine if Key Locker instructions are usable. */
289 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
290 {
291 CPU_FEATURE_SET (cpu_features, AESKLE);
292 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
293 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
294 }
295
296 cpu_features->isa_1 = get_isa_level (cpu_features);
297}
298
299static void
300get_extended_indices (struct cpu_features *cpu_features)
301{
302 unsigned int eax, ebx, ecx, edx;
303 __cpuid (0x80000000, eax, ebx, ecx, edx);
304 if (eax >= 0x80000001)
305 __cpuid (0x80000001,
306 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
307 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
308 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
309 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
310 if (eax >= 0x80000007)
311 __cpuid (0x80000007,
312 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
313 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
314 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
315 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
316 if (eax >= 0x80000008)
317 __cpuid (0x80000008,
318 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
319 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
320 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
321 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
322}
323
324static void
325get_common_indices (struct cpu_features *cpu_features,
326 unsigned int *family, unsigned int *model,
327 unsigned int *extended_model, unsigned int *stepping)
328{
329 if (family)
330 {
331 unsigned int eax;
332 __cpuid (1, eax,
333 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
334 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
335 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
336 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
337 *family = (eax >> 8) & 0x0f;
338 *model = (eax >> 4) & 0x0f;
339 *extended_model = (eax >> 12) & 0xf0;
340 *stepping = eax & 0x0f;
341 if (*family == 0x0f)
342 {
343 *family += (eax >> 20) & 0xff;
344 *model += *extended_model;
345 }
346 }
347
348 if (cpu_features->basic.max_cpuid >= 7)
349 {
350 __cpuid_count (7, 0,
351 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
352 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
353 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
354 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
355 __cpuid_count (7, 1,
356 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
357 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
358 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
359 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
360 }
361
362 if (cpu_features->basic.max_cpuid >= 0xd)
363 __cpuid_count (0xd, 1,
364 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
365 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
366 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
367 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
368
369 if (cpu_features->basic.max_cpuid >= 0x14)
370 __cpuid_count (0x14, 0,
371 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
372 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
373 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
374 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
375
376 if (cpu_features->basic.max_cpuid >= 0x19)
377 __cpuid_count (0x19, 0,
378 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
379 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
380 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
381 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
382
383 dl_check_minsigstacksize (cpu_features);
384}
385
386_Static_assert (((index_arch_Fast_Unaligned_Load
387 == index_arch_Fast_Unaligned_Copy)
388 && (index_arch_Fast_Unaligned_Load
389 == index_arch_Prefer_PMINUB_for_stringop)
390 && (index_arch_Fast_Unaligned_Load
391 == index_arch_Slow_SSE4_2)
392 && (index_arch_Fast_Unaligned_Load
393 == index_arch_Fast_Rep_String)
394 && (index_arch_Fast_Unaligned_Load
395 == index_arch_Fast_Copy_Backward)),
396 "Incorrect index_arch_Fast_Unaligned_Load");
397
398static inline void
399init_cpu_features (struct cpu_features *cpu_features)
400{
401 unsigned int ebx, ecx, edx;
402 unsigned int family = 0;
403 unsigned int model = 0;
404 unsigned int stepping = 0;
405 enum cpu_features_kind kind;
406
407#if !HAS_CPUID
408 if (__get_cpuid_max (0, 0) == 0)
409 {
410 kind = arch_kind_other;
411 goto no_cpuid;
412 }
413#endif
414
415 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
416
417 /* This spells out "GenuineIntel". */
418 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
419 {
420 unsigned int extended_model;
421
422 kind = arch_kind_intel;
423
424 get_common_indices (cpu_features, &family, &model, &extended_model,
425 &stepping);
426
427 get_extended_indices (cpu_features);
428
429 update_active (cpu_features);
430
431 if (family == 0x06)
432 {
433 model += extended_model;
434 switch (model)
435 {
436 case 0x1c:
437 case 0x26:
438 /* BSF is slow on Atom. */
439 cpu_features->preferred[index_arch_Slow_BSF]
440 |= bit_arch_Slow_BSF;
441 break;
442
443 case 0x57:
444 /* Knights Landing. Enable Silvermont optimizations. */
445
446 case 0x7a:
447 /* Unaligned load versions are faster than SSSE3
448 on Goldmont Plus. */
449
450 case 0x5c:
451 case 0x5f:
452 /* Unaligned load versions are faster than SSSE3
453 on Goldmont. */
454
455 case 0x4c:
456 case 0x5a:
457 case 0x75:
458 /* Airmont is a die shrink of Silvermont. */
459
460 case 0x37:
461 case 0x4a:
462 case 0x4d:
463 case 0x5d:
464 /* Unaligned load versions are faster than SSSE3
465 on Silvermont. */
466 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
467 |= (bit_arch_Fast_Unaligned_Load
468 | bit_arch_Fast_Unaligned_Copy
469 | bit_arch_Prefer_PMINUB_for_stringop
470 | bit_arch_Slow_SSE4_2);
471 break;
472
473 case 0x86:
474 case 0x96:
475 case 0x9c:
476 /* Enable rep string instructions, unaligned load, unaligned
477 copy, pminub and avoid SSE 4.2 on Tremont. */
478 cpu_features->preferred[index_arch_Fast_Rep_String]
479 |= (bit_arch_Fast_Rep_String
480 | bit_arch_Fast_Unaligned_Load
481 | bit_arch_Fast_Unaligned_Copy
482 | bit_arch_Prefer_PMINUB_for_stringop
483 | bit_arch_Slow_SSE4_2);
484 break;
485
486 default:
487 /* Unknown family 0x06 processors. Assuming this is one
488 of Core i3/i5/i7 processors if AVX is available. */
489 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
490 break;
491 /* Fall through. */
492
493 case 0x1a:
494 case 0x1e:
495 case 0x1f:
496 case 0x25:
497 case 0x2c:
498 case 0x2e:
499 case 0x2f:
500 /* Rep string instructions, unaligned load, unaligned copy,
501 and pminub are fast on Intel Core i3, i5 and i7. */
502 cpu_features->preferred[index_arch_Fast_Rep_String]
503 |= (bit_arch_Fast_Rep_String
504 | bit_arch_Fast_Unaligned_Load
505 | bit_arch_Fast_Unaligned_Copy
506 | bit_arch_Prefer_PMINUB_for_stringop);
507 break;
508 }
509
510 /* Disable TSX on some processors to avoid TSX on kernels that
511 weren't updated with the latest microcode package (which
512 disables broken feature by default). */
513 switch (model)
514 {
515 case 0x55:
516 if (stepping <= 5)
517 goto disable_tsx;
518 break;
519 case 0x8e:
520 /* NB: Although the errata documents that for model == 0x8e,
521 only 0xb stepping or lower are impacted, the intention of
522 the errata was to disable TSX on all client processors on
523 all steppings. Include 0xc stepping which is an Intel
524 Core i7-8665U, a client mobile processor. */
525 case 0x9e:
526 if (stepping > 0xc)
527 break;
528 /* Fall through. */
529 case 0x4e:
530 case 0x5e:
531 {
532 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
533 processors listed in:
534
535https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
536 */
537disable_tsx:
538 CPU_FEATURE_UNSET (cpu_features, HLE);
539 CPU_FEATURE_UNSET (cpu_features, RTM);
540 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
541 }
542 break;
543 case 0x3f:
544 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
545 if (stepping >= 4)
546 break;
547 /* Fall through. */
548 case 0x3c:
549 case 0x45:
550 case 0x46:
551 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
552 with stepping >= 4) to avoid TSX on kernels that weren't
553 updated with the latest microcode package (which disables
554 broken feature by default). */
555 CPU_FEATURE_UNSET (cpu_features, RTM);
556 break;
557 }
558 }
559
560
561 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
562 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
563 frequency if AVX512ER isn't available. */
564 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
565 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
566 |= bit_arch_Prefer_No_VZEROUPPER;
567 else
568 {
569 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
570 when ZMM load and store instructions are used. */
571 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
572 cpu_features->preferred[index_arch_Prefer_No_AVX512]
573 |= bit_arch_Prefer_No_AVX512;
574
575 /* Avoid RTM abort triggered by VZEROUPPER inside a
576 transactionally executing RTM region. */
577 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
578 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
579 |= bit_arch_Prefer_No_VZEROUPPER;
580 }
581
582 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
583 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
584 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
585 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
586 }
587 /* This spells out "AuthenticAMD" or "HygonGenuine". */
588 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
589 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
590 {
591 unsigned int extended_model;
592
593 kind = arch_kind_amd;
594
595 get_common_indices (cpu_features, &family, &model, &extended_model,
596 &stepping);
597
598 get_extended_indices (cpu_features);
599
600 update_active (cpu_features);
601
602 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
603
604 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
605 {
606 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
607 FMA4 requires AVX, determine if FMA4 is usable here. */
608 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
609 }
610
611 if (family == 0x15)
612 {
613 /* "Excavator" */
614 if (model >= 0x60 && model <= 0x7f)
615 {
616 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
617 |= (bit_arch_Fast_Unaligned_Load
618 | bit_arch_Fast_Copy_Backward);
619
620 /* Unaligned AVX loads are slower.*/
621 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
622 &= ~bit_arch_AVX_Fast_Unaligned_Load;
623 }
624 }
625 }
626 /* This spells out "CentaurHauls" or " Shanghai ". */
627 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
628 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
629 {
630 unsigned int extended_model, stepping;
631
632 kind = arch_kind_zhaoxin;
633
634 get_common_indices (cpu_features, &family, &model, &extended_model,
635 &stepping);
636
637 get_extended_indices (cpu_features);
638
639 update_active (cpu_features);
640
641 model += extended_model;
642 if (family == 0x6)
643 {
644 if (model == 0xf || model == 0x19)
645 {
646 CPU_FEATURE_UNSET (cpu_features, AVX);
647 CPU_FEATURE_UNSET (cpu_features, AVX2);
648
649 cpu_features->preferred[index_arch_Slow_SSE4_2]
650 |= bit_arch_Slow_SSE4_2;
651
652 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
653 &= ~bit_arch_AVX_Fast_Unaligned_Load;
654 }
655 }
656 else if (family == 0x7)
657 {
658 if (model == 0x1b)
659 {
660 CPU_FEATURE_UNSET (cpu_features, AVX);
661 CPU_FEATURE_UNSET (cpu_features, AVX2);
662
663 cpu_features->preferred[index_arch_Slow_SSE4_2]
664 |= bit_arch_Slow_SSE4_2;
665
666 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
667 &= ~bit_arch_AVX_Fast_Unaligned_Load;
668 }
669 else if (model == 0x3b)
670 {
671 CPU_FEATURE_UNSET (cpu_features, AVX);
672 CPU_FEATURE_UNSET (cpu_features, AVX2);
673
674 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
675 &= ~bit_arch_AVX_Fast_Unaligned_Load;
676 }
677 }
678 }
679 else
680 {
681 kind = arch_kind_other;
682 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
683 update_active (cpu_features);
684 }
685
686 /* Support i586 if CX8 is available. */
687 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
688 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
689
690 /* Support i686 if CMOV is available. */
691 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
692 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
693
694#if !HAS_CPUID
695no_cpuid:
696#endif
697
698 cpu_features->basic.kind = kind;
699 cpu_features->basic.family = family;
700 cpu_features->basic.model = model;
701 cpu_features->basic.stepping = stepping;
702
703 dl_init_cacheinfo (cpu_features);
704
705#if HAVE_TUNABLES
706 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
707
708 bool disable_xsave_features = false;
709
710 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
711 {
712 /* These features are usable only if OSXSAVE is usable. */
713 CPU_FEATURE_UNSET (cpu_features, XSAVE);
714 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
715 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
716 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
717 CPU_FEATURE_UNSET (cpu_features, XFD);
718
719 disable_xsave_features = true;
720 }
721
722 if (disable_xsave_features
723 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
724 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
725 {
726 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
727 cpu_features->xsave_state_size = 0;
728
729 CPU_FEATURE_UNSET (cpu_features, AVX);
730 CPU_FEATURE_UNSET (cpu_features, AVX2);
731 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
732 CPU_FEATURE_UNSET (cpu_features, FMA);
733 CPU_FEATURE_UNSET (cpu_features, VAES);
734 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
735 CPU_FEATURE_UNSET (cpu_features, XOP);
736 CPU_FEATURE_UNSET (cpu_features, F16C);
737 CPU_FEATURE_UNSET (cpu_features, AVX512F);
738 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
739 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
740 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
741 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
742 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
743 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
744 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
745 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
746 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
747 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
748 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
749 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
750 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
751 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
752 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
753 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
754 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
755 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
756 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
757 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
758
759 CPU_FEATURE_UNSET (cpu_features, FMA4);
760 }
761
762#elif defined SHARED
763 /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. The
764 glibc.cpu.hwcap_mask tunable is initialized already, so no
765 need to do this. */
766 GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
767#endif
768
769#ifdef __x86_64__
770 GLRO(dl_hwcap) = HWCAP_X86_64;
771 if (cpu_features->basic.kind == arch_kind_intel)
772 {
773 const char *platform = NULL;
774
775 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
776 {
777 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
778 {
779 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
780 platform = "xeon_phi";
781 }
782 else
783 {
784 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
785 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
786 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
787 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
788 }
789 }
790
791 if (platform == NULL
792 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
793 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
794 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
795 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
796 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
797 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
798 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
799 platform = "haswell";
800
801 if (platform != NULL)
802 GLRO(dl_platform) = platform;
803 }
804#else
805 GLRO(dl_hwcap) = 0;
806 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
807 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
808
809 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
810 GLRO(dl_platform) = "i686";
811 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
812 GLRO(dl_platform) = "i586";
813#endif
814
815#if CET_ENABLED
816# if HAVE_TUNABLES
817 TUNABLE_GET (x86_ibt, tunable_val_t *,
818 TUNABLE_CALLBACK (set_x86_ibt));
819 TUNABLE_GET (x86_shstk, tunable_val_t *,
820 TUNABLE_CALLBACK (set_x86_shstk));
821# endif
822
823 /* Check CET status. */
824 unsigned int cet_status = get_cet_status ();
825
826 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
827 CPU_FEATURE_UNSET (cpu_features, IBT)
828 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
829 CPU_FEATURE_UNSET (cpu_features, SHSTK)
830
831 if (cet_status)
832 {
833 GL(dl_x86_feature_1) = cet_status;
834
835# ifndef SHARED
836 /* Check if IBT and SHSTK are enabled by kernel. */
837 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
838 || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
839 {
840 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
841 disabled by environment variable:
842
843 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
844 */
845 unsigned int cet_feature = 0;
846 if (!CPU_FEATURE_USABLE (IBT))
847 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
848 if (!CPU_FEATURE_USABLE (SHSTK))
849 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
850
851 if (cet_feature)
852 {
853 int res = dl_cet_disable_cet (cet_feature);
854
855 /* Clear the disabled bits in dl_x86_feature_1. */
856 if (res == 0)
857 GL(dl_x86_feature_1) &= ~cet_feature;
858 }
859
860 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
861 lock CET if IBT or SHSTK is enabled permissively. */
862 if (GL(dl_x86_feature_control).ibt != cet_permissive
863 && GL(dl_x86_feature_control).shstk != cet_permissive)
864 dl_cet_lock_cet ();
865 }
866# endif
867 }
868#endif
869
870#ifndef SHARED
871 /* NB: In libc.a, call init_cacheinfo. */
872 init_cacheinfo ();
873#endif
874}
875