1/* This file is part of the GNU C Library.
2 Copyright (C) 2008-2017 Free Software Foundation, Inc.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#ifndef cpu_features_h
19#define cpu_features_h
20
21#define bit_arch_Fast_Rep_String (1 << 0)
22#define bit_arch_Fast_Copy_Backward (1 << 1)
23#define bit_arch_Slow_BSF (1 << 2)
24#define bit_arch_Fast_Unaligned_Load (1 << 4)
25#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
26#define bit_arch_AVX_Usable (1 << 6)
27#define bit_arch_FMA_Usable (1 << 7)
28#define bit_arch_FMA4_Usable (1 << 8)
29#define bit_arch_Slow_SSE4_2 (1 << 9)
30#define bit_arch_AVX2_Usable (1 << 10)
31#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
32#define bit_arch_AVX512F_Usable (1 << 12)
33#define bit_arch_AVX512DQ_Usable (1 << 13)
34#define bit_arch_I586 (1 << 14)
35#define bit_arch_I686 (1 << 15)
36#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
37#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
38#define bit_arch_Fast_Unaligned_Copy (1 << 18)
39#define bit_arch_Prefer_ERMS (1 << 19)
40#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
41#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
42#define bit_arch_Prefer_No_AVX512 (1 << 22)
43
44/* CPUID Feature flags. */
45
46/* COMMON_CPUID_INDEX_1. */
47#define bit_cpu_CX8 (1 << 8)
48#define bit_cpu_CMOV (1 << 15)
49#define bit_cpu_SSE (1 << 25)
50#define bit_cpu_SSE2 (1 << 26)
51#define bit_cpu_SSSE3 (1 << 9)
52#define bit_cpu_SSE4_1 (1 << 19)
53#define bit_cpu_SSE4_2 (1 << 20)
54#define bit_cpu_OSXSAVE (1 << 27)
55#define bit_cpu_AVX (1 << 28)
56#define bit_cpu_POPCOUNT (1 << 23)
57#define bit_cpu_FMA (1 << 12)
58#define bit_cpu_FMA4 (1 << 16)
59#define bit_cpu_HTT (1 << 28)
60#define bit_cpu_LZCNT (1 << 5)
61#define bit_cpu_MOVBE (1 << 22)
62#define bit_cpu_POPCNT (1 << 23)
63
64/* COMMON_CPUID_INDEX_7. */
65#define bit_cpu_BMI1 (1 << 3)
66#define bit_cpu_BMI2 (1 << 8)
67#define bit_cpu_ERMS (1 << 9)
68#define bit_cpu_RTM (1 << 11)
69#define bit_cpu_AVX2 (1 << 5)
70#define bit_cpu_AVX512F (1 << 16)
71#define bit_cpu_AVX512DQ (1 << 17)
72#define bit_cpu_AVX512PF (1 << 26)
73#define bit_cpu_AVX512ER (1 << 27)
74#define bit_cpu_AVX512CD (1 << 28)
75#define bit_cpu_AVX512BW (1 << 30)
76#define bit_cpu_AVX512VL (1u << 31)
77
78/* XCR0 Feature flags. */
79#define bit_XMM_state (1 << 1)
80#define bit_YMM_state (1 << 2)
81#define bit_Opmask_state (1 << 5)
82#define bit_ZMM0_15_state (1 << 6)
83#define bit_ZMM16_31_state (1 << 7)
84
85/* The integer bit array index for the first set of internal feature bits. */
86#define FEATURE_INDEX_1 0
87
88/* The current maximum size of the feature integer bit array. */
89#define FEATURE_INDEX_MAX 1
90
91#ifdef __ASSEMBLER__
92
93# include <cpu-features-offsets.h>
94
95# define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
96# define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
97# define index_cpu_SSE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
98# define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
99# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
100# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
101# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
102# define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
103# define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
104# define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
105# define index_cpu_MOVBE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
106
107# define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
108# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
109# define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
110# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
111# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
112# define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
113# define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
114# define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
115# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
116# define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
117# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
118# define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
119# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
120# define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE
121# define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE
122# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
123# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
124# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE
125# define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE
126# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
127# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
128# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1*FEATURE_SIZE
129
130
131# if defined (_LIBC) && !IS_IN (nonlib)
132# ifdef __x86_64__
133# ifdef SHARED
134# if IS_IN (rtld)
135# define LOAD_RTLD_GLOBAL_RO_RDX
136# define HAS_FEATURE(offset, field, name) \
137 testl $(bit_##field##_##name), \
138 _rtld_local_ro+offset+(index_##field##_##name)(%rip)
139# else
140# define LOAD_RTLD_GLOBAL_RO_RDX \
141 mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
142# define HAS_FEATURE(offset, field, name) \
143 testl $(bit_##field##_##name), \
144 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx)
145# endif
146# else /* SHARED */
147# define LOAD_RTLD_GLOBAL_RO_RDX
148# define HAS_FEATURE(offset, field, name) \
149 testl $(bit_##field##_##name), \
150 _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip)
151# endif /* !SHARED */
152# else /* __x86_64__ */
153# ifdef SHARED
154# define LOAD_FUNC_GOT_EAX(func) \
155 leal func@GOTOFF(%edx), %eax
156# if IS_IN (rtld)
157# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
158 LOAD_PIC_REG(dx)
159# define HAS_FEATURE(offset, field, name) \
160 testl $(bit_##field##_##name), \
161 offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx)
162# else
163# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
164 LOAD_PIC_REG(dx); \
165 mov _rtld_global_ro@GOT(%edx), %ecx
166# define HAS_FEATURE(offset, field, name) \
167 testl $(bit_##field##_##name), \
168 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx)
169# endif
170# else /* SHARED */
171# define LOAD_FUNC_GOT_EAX(func) \
172 leal func, %eax
173# define LOAD_GOT_AND_RTLD_GLOBAL_RO
174# define HAS_FEATURE(offset, field, name) \
175 testl $(bit_##field##_##name), \
176 _dl_x86_cpu_features+offset+(index_##field##_##name)
177# endif /* !SHARED */
178# endif /* !__x86_64__ */
179# else /* _LIBC && !nonlib */
180# error "Sorry, <cpu-features.h> is unimplemented for assembler"
181# endif /* !_LIBC || nonlib */
182
183/* HAS_* evaluates to true if we may use the feature at runtime. */
184# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name)
185# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name)
186
187#else /* __ASSEMBLER__ */
188
189enum
190 {
191 COMMON_CPUID_INDEX_1 = 0,
192 COMMON_CPUID_INDEX_7,
193 COMMON_CPUID_INDEX_80000001, /* for AMD */
194 /* Keep the following line at the end. */
195 COMMON_CPUID_INDEX_MAX
196 };
197
198struct cpu_features
199{
200 enum cpu_features_kind
201 {
202 arch_kind_unknown = 0,
203 arch_kind_intel,
204 arch_kind_amd,
205 arch_kind_other
206 } kind;
207 int max_cpuid;
208 struct cpuid_registers
209 {
210 unsigned int eax;
211 unsigned int ebx;
212 unsigned int ecx;
213 unsigned int edx;
214 } cpuid[COMMON_CPUID_INDEX_MAX];
215 unsigned int family;
216 unsigned int model;
217 unsigned int feature[FEATURE_INDEX_MAX];
218 /* Data cache size for use in memory and string routines, typically
219 L1 size. */
220 unsigned long int data_cache_size;
221 /* Shared cache size for use in memory and string routines, typically
222 L2 or L3 size. */
223 unsigned long int shared_cache_size;
224 /* Threshold to use non temporal store. */
225 unsigned long int non_temporal_threshold;
226};
227
228/* Used from outside of glibc to get access to the CPU features
229 structure. */
230extern const struct cpu_features *__get_cpu_features (void)
231 __attribute__ ((const));
232
233# if defined (_LIBC) && !IS_IN (nonlib)
234/* Unused for x86. */
235# define INIT_ARCH()
236# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
237# endif
238
239
240/* Only used directly in cpu-features.c. */
241# define CPU_FEATURES_CPU_P(ptr, name) \
242 ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
243# define CPU_FEATURES_ARCH_P(ptr, name) \
244 ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
245
246/* HAS_* evaluates to true if we may use the feature at runtime. */
247# define HAS_CPU_FEATURE(name) \
248 CPU_FEATURES_CPU_P (__get_cpu_features (), name)
249# define HAS_ARCH_FEATURE(name) \
250 CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
251
252# define index_cpu_CX8 COMMON_CPUID_INDEX_1
253# define index_cpu_CMOV COMMON_CPUID_INDEX_1
254# define index_cpu_SSE COMMON_CPUID_INDEX_1
255# define index_cpu_SSE2 COMMON_CPUID_INDEX_1
256# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
257# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
258# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
259# define index_cpu_AVX COMMON_CPUID_INDEX_1
260# define index_cpu_AVX2 COMMON_CPUID_INDEX_7
261# define index_cpu_AVX512F COMMON_CPUID_INDEX_7
262# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
263# define index_cpu_AVX512PF COMMON_CPUID_INDEX_7
264# define index_cpu_AVX512ER COMMON_CPUID_INDEX_7
265# define index_cpu_AVX512CD COMMON_CPUID_INDEX_7
266# define index_cpu_AVX512BW COMMON_CPUID_INDEX_7
267# define index_cpu_AVX512VL COMMON_CPUID_INDEX_7
268# define index_cpu_ERMS COMMON_CPUID_INDEX_7
269# define index_cpu_RTM COMMON_CPUID_INDEX_7
270# define index_cpu_FMA COMMON_CPUID_INDEX_1
271# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
272# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
273# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
274# define index_cpu_HTT COMMON_CPUID_INDEX_1
275# define index_cpu_BMI1 COMMON_CPUID_INDEX_7
276# define index_cpu_BMI2 COMMON_CPUID_INDEX_7
277# define index_cpu_LZCNT COMMON_CPUID_INDEX_1
278# define index_cpu_MOVBE COMMON_CPUID_INDEX_1
279# define index_cpu_POPCNT COMMON_CPUID_INDEX_1
280
281# define reg_CX8 edx
282# define reg_CMOV edx
283# define reg_SSE edx
284# define reg_SSE2 edx
285# define reg_SSSE3 ecx
286# define reg_SSE4_1 ecx
287# define reg_SSE4_2 ecx
288# define reg_AVX ecx
289# define reg_AVX2 ebx
290# define reg_AVX512F ebx
291# define reg_AVX512DQ ebx
292# define reg_AVX512PF ebx
293# define reg_AVX512ER ebx
294# define reg_AVX512CD ebx
295# define reg_AVX512BW ebx
296# define reg_AVX512VL ebx
297# define reg_ERMS ebx
298# define reg_RTM ebx
299# define reg_FMA ecx
300# define reg_FMA4 ecx
301# define reg_POPCOUNT ecx
302# define reg_OSXSAVE ecx
303# define reg_HTT edx
304# define reg_BMI1 ebx
305# define reg_BMI2 ebx
306# define reg_LZCNT ecx
307# define reg_MOVBE ecx
308# define reg_POPCNT ecx
309
310# define index_arch_Fast_Rep_String FEATURE_INDEX_1
311# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
312# define index_arch_Slow_BSF FEATURE_INDEX_1
313# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1
314# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
315# define index_arch_AVX_Usable FEATURE_INDEX_1
316# define index_arch_FMA_Usable FEATURE_INDEX_1
317# define index_arch_FMA4_Usable FEATURE_INDEX_1
318# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1
319# define index_arch_AVX2_Usable FEATURE_INDEX_1
320# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
321# define index_arch_AVX512F_Usable FEATURE_INDEX_1
322# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
323# define index_arch_I586 FEATURE_INDEX_1
324# define index_arch_I686 FEATURE_INDEX_1
325# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
326# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
327# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
328# define index_arch_Prefer_ERMS FEATURE_INDEX_1
329# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
330# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
331# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
332
333#endif /* !__ASSEMBLER__ */
334
335#ifdef __x86_64__
336# define HAS_CPUID 1
337#elif defined __i586__ || defined __pentium__
338# define HAS_CPUID 1
339# define HAS_I586 1
340# define HAS_I686 HAS_ARCH_FEATURE (I686)
341#elif (defined __i686__ || defined __pentiumpro__ \
342 || defined __pentium4__ || defined __nocona__ \
343 || defined __atom__ || defined __core2__ \
344 || defined __corei7__ || defined __corei7_avx__ \
345 || defined __core_avx2__ || defined __nehalem__ \
346 || defined __sandybridge__ || defined __haswell__ \
347 || defined __knl__ || defined __bonnell__ \
348 || defined __silvermont__ \
349 || defined __k6__ || defined __k8__ \
350 || defined __athlon__ || defined __amdfam10__ \
351 || defined __bdver1__ || defined __bdver2__ \
352 || defined __bdver3__ || defined __bdver4__ \
353 || defined __btver1__ || defined __btver2__)
354# define HAS_CPUID 1
355# define HAS_I586 1
356# define HAS_I686 1
357#else
358# define HAS_CPUID 0
359# define HAS_I586 HAS_ARCH_FEATURE (I586)
360# define HAS_I686 HAS_ARCH_FEATURE (I686)
361#endif
362
363#endif /* cpu_features_h */
364