1/* This file is part of the GNU C Library.
2 Copyright (C) 2008-2017 Free Software Foundation, Inc.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#ifndef cpu_features_h
19#define cpu_features_h
20
21#define bit_arch_Fast_Rep_String (1 << 0)
22#define bit_arch_Fast_Copy_Backward (1 << 1)
23#define bit_arch_Slow_BSF (1 << 2)
24#define bit_arch_Fast_Unaligned_Load (1 << 4)
25#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
26#define bit_arch_AVX_Usable (1 << 6)
27#define bit_arch_FMA_Usable (1 << 7)
28#define bit_arch_FMA4_Usable (1 << 8)
29#define bit_arch_Slow_SSE4_2 (1 << 9)
30#define bit_arch_AVX2_Usable (1 << 10)
31#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
32#define bit_arch_AVX512F_Usable (1 << 12)
33#define bit_arch_AVX512DQ_Usable (1 << 13)
34#define bit_arch_I586 (1 << 14)
35#define bit_arch_I686 (1 << 15)
36#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
37#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
38#define bit_arch_Fast_Unaligned_Copy (1 << 18)
39#define bit_arch_Prefer_ERMS (1 << 19)
40#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
41#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
42
43/* CPUID Feature flags. */
44
45/* COMMON_CPUID_INDEX_1. */
46#define bit_cpu_CX8 (1 << 8)
47#define bit_cpu_CMOV (1 << 15)
48#define bit_cpu_SSE2 (1 << 26)
49#define bit_cpu_SSSE3 (1 << 9)
50#define bit_cpu_SSE4_1 (1 << 19)
51#define bit_cpu_SSE4_2 (1 << 20)
52#define bit_cpu_OSXSAVE (1 << 27)
53#define bit_cpu_AVX (1 << 28)
54#define bit_cpu_POPCOUNT (1 << 23)
55#define bit_cpu_FMA (1 << 12)
56#define bit_cpu_FMA4 (1 << 16)
57#define bit_cpu_HTT (1 << 28)
58
59/* COMMON_CPUID_INDEX_7. */
60#define bit_cpu_ERMS (1 << 9)
61#define bit_cpu_RTM (1 << 11)
62#define bit_cpu_AVX2 (1 << 5)
63#define bit_cpu_AVX512F (1 << 16)
64#define bit_cpu_AVX512DQ (1 << 17)
65
66/* XCR0 Feature flags. */
67#define bit_XMM_state (1 << 1)
68#define bit_YMM_state (1 << 2)
69#define bit_Opmask_state (1 << 5)
70#define bit_ZMM0_15_state (1 << 6)
71#define bit_ZMM16_31_state (1 << 7)
72
73/* The integer bit array index for the first set of internal feature bits. */
74#define FEATURE_INDEX_1 0
75
76/* The current maximum size of the feature integer bit array. */
77#define FEATURE_INDEX_MAX 1
78
79#ifdef __ASSEMBLER__
80
81# include <cpu-features-offsets.h>
82
83# define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
84# define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
85# define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
86# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
87# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
88# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
89# define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
90# define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
91# define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
92
93# define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
94# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
95# define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
96# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
97# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
98# define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
99# define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
100# define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
101# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
102# define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
103# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
104# define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
105# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
106# define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE
107# define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE
108# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
109# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
110# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE
111# define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE
112# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
113# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
114
115
116# if defined (_LIBC) && !IS_IN (nonlib)
117# ifdef __x86_64__
118# ifdef SHARED
119# if IS_IN (rtld)
120# define LOAD_RTLD_GLOBAL_RO_RDX
121# define HAS_FEATURE(offset, field, name) \
122 testl $(bit_##field##_##name), \
123 _rtld_local_ro+offset+(index_##field##_##name)(%rip)
124# else
125# define LOAD_RTLD_GLOBAL_RO_RDX \
126 mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
127# define HAS_FEATURE(offset, field, name) \
128 testl $(bit_##field##_##name), \
129 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx)
130# endif
131# else /* SHARED */
132# define LOAD_RTLD_GLOBAL_RO_RDX
133# define HAS_FEATURE(offset, field, name) \
134 testl $(bit_##field##_##name), \
135 _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip)
136# endif /* !SHARED */
137# else /* __x86_64__ */
138# ifdef SHARED
139# define LOAD_FUNC_GOT_EAX(func) \
140 leal func@GOTOFF(%edx), %eax
141# if IS_IN (rtld)
142# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
143 LOAD_PIC_REG(dx)
144# define HAS_FEATURE(offset, field, name) \
145 testl $(bit_##field##_##name), \
146 offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx)
147# else
148# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
149 LOAD_PIC_REG(dx); \
150 mov _rtld_global_ro@GOT(%edx), %ecx
151# define HAS_FEATURE(offset, field, name) \
152 testl $(bit_##field##_##name), \
153 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx)
154# endif
155# else /* SHARED */
156# define LOAD_FUNC_GOT_EAX(func) \
157 leal func, %eax
158# define LOAD_GOT_AND_RTLD_GLOBAL_RO
159# define HAS_FEATURE(offset, field, name) \
160 testl $(bit_##field##_##name), \
161 _dl_x86_cpu_features+offset+(index_##field##_##name)
162# endif /* !SHARED */
163# endif /* !__x86_64__ */
164# else /* _LIBC && !nonlib */
165# error "Sorry, <cpu-features.h> is unimplemented for assembler"
166# endif /* !_LIBC || nonlib */
167
168/* HAS_* evaluates to true if we may use the feature at runtime. */
169# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name)
170# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name)
171
172#else /* __ASSEMBLER__ */
173
174enum
175 {
176 COMMON_CPUID_INDEX_1 = 0,
177 COMMON_CPUID_INDEX_7,
178 COMMON_CPUID_INDEX_80000001, /* for AMD */
179 /* Keep the following line at the end. */
180 COMMON_CPUID_INDEX_MAX
181 };
182
183struct cpu_features
184{
185 enum cpu_features_kind
186 {
187 arch_kind_unknown = 0,
188 arch_kind_intel,
189 arch_kind_amd,
190 arch_kind_other
191 } kind;
192 int max_cpuid;
193 struct cpuid_registers
194 {
195 unsigned int eax;
196 unsigned int ebx;
197 unsigned int ecx;
198 unsigned int edx;
199 } cpuid[COMMON_CPUID_INDEX_MAX];
200 unsigned int family;
201 unsigned int model;
202 unsigned int feature[FEATURE_INDEX_MAX];
203};
204
205/* Used from outside of glibc to get access to the CPU features
206 structure. */
207extern const struct cpu_features *__get_cpu_features (void)
208 __attribute__ ((const));
209
210# if defined (_LIBC) && !IS_IN (nonlib)
211/* Unused for x86. */
212# define INIT_ARCH()
213# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
214# endif
215
216
217/* Only used directly in cpu-features.c. */
218# define CPU_FEATURES_CPU_P(ptr, name) \
219 ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
220# define CPU_FEATURES_ARCH_P(ptr, name) \
221 ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
222
223/* HAS_* evaluates to true if we may use the feature at runtime. */
224# define HAS_CPU_FEATURE(name) \
225 CPU_FEATURES_CPU_P (__get_cpu_features (), name)
226# define HAS_ARCH_FEATURE(name) \
227 CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
228
229# define index_cpu_CX8 COMMON_CPUID_INDEX_1
230# define index_cpu_CMOV COMMON_CPUID_INDEX_1
231# define index_cpu_SSE2 COMMON_CPUID_INDEX_1
232# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
233# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
234# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
235# define index_cpu_AVX COMMON_CPUID_INDEX_1
236# define index_cpu_AVX2 COMMON_CPUID_INDEX_7
237# define index_cpu_AVX512F COMMON_CPUID_INDEX_7
238# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
239# define index_cpu_ERMS COMMON_CPUID_INDEX_7
240# define index_cpu_RTM COMMON_CPUID_INDEX_7
241# define index_cpu_FMA COMMON_CPUID_INDEX_1
242# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
243# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
244# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
245# define index_cpu_HTT COMMON_CPUID_INDEX_1
246
247# define reg_CX8 edx
248# define reg_CMOV edx
249# define reg_SSE2 edx
250# define reg_SSSE3 ecx
251# define reg_SSE4_1 ecx
252# define reg_SSE4_2 ecx
253# define reg_AVX ecx
254# define reg_AVX2 ebx
255# define reg_AVX512F ebx
256# define reg_AVX512DQ ebx
257# define reg_ERMS ebx
258# define reg_RTM ebx
259# define reg_FMA ecx
260# define reg_FMA4 ecx
261# define reg_POPCOUNT ecx
262# define reg_OSXSAVE ecx
263# define reg_HTT edx
264
265# define index_arch_Fast_Rep_String FEATURE_INDEX_1
266# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
267# define index_arch_Slow_BSF FEATURE_INDEX_1
268# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1
269# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
270# define index_arch_AVX_Usable FEATURE_INDEX_1
271# define index_arch_FMA_Usable FEATURE_INDEX_1
272# define index_arch_FMA4_Usable FEATURE_INDEX_1
273# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1
274# define index_arch_AVX2_Usable FEATURE_INDEX_1
275# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
276# define index_arch_AVX512F_Usable FEATURE_INDEX_1
277# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
278# define index_arch_I586 FEATURE_INDEX_1
279# define index_arch_I686 FEATURE_INDEX_1
280# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
281# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
282# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
283# define index_arch_Prefer_ERMS FEATURE_INDEX_1
284# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
285# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
286
287#endif /* !__ASSEMBLER__ */
288
289#ifdef __x86_64__
290# define HAS_CPUID 1
291#elif defined __i586__ || defined __pentium__
292# define HAS_CPUID 1
293# define HAS_I586 1
294# define HAS_I686 HAS_ARCH_FEATURE (I686)
295#elif (defined __i686__ || defined __pentiumpro__ \
296 || defined __pentium4__ || defined __nocona__ \
297 || defined __atom__ || defined __core2__ \
298 || defined __corei7__ || defined __corei7_avx__ \
299 || defined __core_avx2__ || defined __nehalem__ \
300 || defined __sandybridge__ || defined __haswell__ \
301 || defined __knl__ || defined __bonnell__ \
302 || defined __silvermont__ \
303 || defined __k6__ || defined __k8__ \
304 || defined __athlon__ || defined __amdfam10__ \
305 || defined __bdver1__ || defined __bdver2__ \
306 || defined __bdver3__ || defined __bdver4__ \
307 || defined __btver1__ || defined __btver2__)
308# define HAS_CPUID 1
309# define HAS_I586 1
310# define HAS_I686 1
311#else
312# define HAS_CPUID 0
313# define HAS_I586 HAS_ARCH_FEATURE (I586)
314# define HAS_I686 HAS_ARCH_FEATURE (I686)
315#endif
316
317#endif /* cpu_features_h */
318