1/* This file is part of the GNU C Library.
2 Copyright (C) 2008-2016 Free Software Foundation, Inc.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#ifndef cpu_features_h
19#define cpu_features_h
20
21#define bit_Fast_Rep_String (1 << 0)
22#define bit_Fast_Copy_Backward (1 << 1)
23#define bit_Slow_BSF (1 << 2)
24#define bit_Fast_Unaligned_Load (1 << 4)
25#define bit_Prefer_PMINUB_for_stringop (1 << 5)
26#define bit_AVX_Usable (1 << 6)
27#define bit_FMA_Usable (1 << 7)
28#define bit_FMA4_Usable (1 << 8)
29#define bit_Slow_SSE4_2 (1 << 9)
30#define bit_AVX2_Usable (1 << 10)
31#define bit_AVX_Fast_Unaligned_Load (1 << 11)
32#define bit_AVX512F_Usable (1 << 12)
33#define bit_AVX512DQ_Usable (1 << 13)
34#define bit_I586 (1 << 14)
35#define bit_I686 (1 << 15)
36#define bit_Prefer_MAP_32BIT_EXEC (1 << 16)
37#define bit_Prefer_No_VZEROUPPER (1 << 17)
38
39/* CPUID Feature flags. */
40
41/* COMMON_CPUID_INDEX_1. */
42#define bit_CX8 (1 << 8)
43#define bit_CMOV (1 << 15)
44#define bit_SSE2 (1 << 26)
45#define bit_SSSE3 (1 << 9)
46#define bit_SSE4_1 (1 << 19)
47#define bit_SSE4_2 (1 << 20)
48#define bit_OSXSAVE (1 << 27)
49#define bit_AVX (1 << 28)
50#define bit_POPCOUNT (1 << 23)
51#define bit_FMA (1 << 12)
52#define bit_FMA4 (1 << 16)
53
54/* COMMON_CPUID_INDEX_7. */
55#define bit_RTM (1 << 11)
56#define bit_AVX2 (1 << 5)
57#define bit_AVX512F (1 << 16)
58#define bit_AVX512DQ (1 << 17)
59
60/* XCR0 Feature flags. */
61#define bit_XMM_state (1 << 1)
62#define bit_YMM_state (2 << 1)
63#define bit_Opmask_state (1 << 5)
64#define bit_ZMM0_15_state (1 << 6)
65#define bit_ZMM16_31_state (1 << 7)
66
67/* The integer bit array index for the first set of internal feature bits. */
68#define FEATURE_INDEX_1 0
69
70/* The current maximum size of the feature integer bit array. */
71#define FEATURE_INDEX_MAX 1
72
73#ifdef __ASSEMBLER__
74
75# include <ifunc-defines.h>
76# include <rtld-global-offsets.h>
77
78# define index_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
79# define index_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
80# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
81# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
82# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
83# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
84# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
85# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
86
87# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
88# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
89# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
90# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
91# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
92# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
93# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
94# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
95# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
96# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
97# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
98# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
99# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
100# define index_I586 FEATURE_INDEX_1*FEATURE_SIZE
101# define index_I686 FEATURE_INDEX_1*FEATURE_SIZE
102# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
103# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
104
105
106# if defined (_LIBC) && !IS_IN (nonlib)
107# ifdef __x86_64__
108# ifdef SHARED
109# if IS_IN (rtld)
110# define LOAD_RTLD_GLOBAL_RO_RDX
111# define HAS_FEATURE(offset, name) \
112 testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip)
113# else
114# define LOAD_RTLD_GLOBAL_RO_RDX \
115 mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
116# define HAS_FEATURE(offset, name) \
117 testl $(bit_##name), \
118 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx)
119# endif
120# else /* SHARED */
121# define LOAD_RTLD_GLOBAL_RO_RDX
122# define HAS_FEATURE(offset, name) \
123 testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip)
124# endif /* !SHARED */
125# else /* __x86_64__ */
126# ifdef SHARED
127# define LOAD_FUNC_GOT_EAX(func) \
128 leal func@GOTOFF(%edx), %eax
129# if IS_IN (rtld)
130# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
131 LOAD_PIC_REG(dx)
132# define HAS_FEATURE(offset, name) \
133 testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx)
134# else
135# define LOAD_GOT_AND_RTLD_GLOBAL_RO \
136 LOAD_PIC_REG(dx); \
137 mov _rtld_global_ro@GOT(%edx), %ecx
138# define HAS_FEATURE(offset, name) \
139 testl $(bit_##name), \
140 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx)
141# endif
142# else /* SHARED */
143# define LOAD_FUNC_GOT_EAX(func) \
144 leal func, %eax
145# define LOAD_GOT_AND_RTLD_GLOBAL_RO
146# define HAS_FEATURE(offset, name) \
147 testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)
148# endif /* !SHARED */
149# endif /* !__x86_64__ */
150# else /* _LIBC && !nonlib */
151# error "Sorry, <cpu-features.h> is unimplemented for assembler"
152# endif /* !_LIBC || nonlib */
153
154/* HAS_* evaluates to true if we may use the feature at runtime. */
155# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name)
156# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name)
157
158#else /* __ASSEMBLER__ */
159
160enum
161 {
162 COMMON_CPUID_INDEX_1 = 0,
163 COMMON_CPUID_INDEX_7,
164 COMMON_CPUID_INDEX_80000001, /* for AMD */
165 /* Keep the following line at the end. */
166 COMMON_CPUID_INDEX_MAX
167 };
168
169struct cpu_features
170{
171 enum cpu_features_kind
172 {
173 arch_kind_unknown = 0,
174 arch_kind_intel,
175 arch_kind_amd,
176 arch_kind_other
177 } kind;
178 int max_cpuid;
179 struct cpuid_registers
180 {
181 unsigned int eax;
182 unsigned int ebx;
183 unsigned int ecx;
184 unsigned int edx;
185 } cpuid[COMMON_CPUID_INDEX_MAX];
186 unsigned int family;
187 unsigned int model;
188 unsigned int feature[FEATURE_INDEX_MAX];
189};
190
191/* Used from outside of glibc to get access to the CPU features
192 structure. */
193extern const struct cpu_features *__get_cpu_features (void)
194 __attribute__ ((const));
195
196# if defined (_LIBC) && !IS_IN (nonlib)
197/* Unused for x86. */
198# define INIT_ARCH()
199# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
200# endif
201
202
203/* HAS_* evaluates to true if we may use the feature at runtime. */
204# define HAS_CPU_FEATURE(name) \
205 ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0)
206# define HAS_ARCH_FEATURE(name) \
207 ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
208
209# define index_CX8 COMMON_CPUID_INDEX_1
210# define index_CMOV COMMON_CPUID_INDEX_1
211# define index_SSE2 COMMON_CPUID_INDEX_1
212# define index_SSSE3 COMMON_CPUID_INDEX_1
213# define index_SSE4_1 COMMON_CPUID_INDEX_1
214# define index_SSE4_2 COMMON_CPUID_INDEX_1
215# define index_AVX COMMON_CPUID_INDEX_1
216# define index_AVX2 COMMON_CPUID_INDEX_7
217# define index_AVX512F COMMON_CPUID_INDEX_7
218# define index_AVX512DQ COMMON_CPUID_INDEX_7
219# define index_RTM COMMON_CPUID_INDEX_7
220# define index_FMA COMMON_CPUID_INDEX_1
221# define index_FMA4 COMMON_CPUID_INDEX_80000001
222# define index_POPCOUNT COMMON_CPUID_INDEX_1
223# define index_OSXSAVE COMMON_CPUID_INDEX_1
224
225# define reg_CX8 edx
226# define reg_CMOV edx
227# define reg_SSE2 edx
228# define reg_SSSE3 ecx
229# define reg_SSE4_1 ecx
230# define reg_SSE4_2 ecx
231# define reg_AVX ecx
232# define reg_AVX2 ebx
233# define reg_AVX512F ebx
234# define reg_AVX512DQ ebx
235# define reg_RTM ebx
236# define reg_FMA ecx
237# define reg_FMA4 ecx
238# define reg_POPCOUNT ecx
239# define reg_OSXSAVE ecx
240
241# define index_Fast_Rep_String FEATURE_INDEX_1
242# define index_Fast_Copy_Backward FEATURE_INDEX_1
243# define index_Slow_BSF FEATURE_INDEX_1
244# define index_Fast_Unaligned_Load FEATURE_INDEX_1
245# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
246# define index_AVX_Usable FEATURE_INDEX_1
247# define index_FMA_Usable FEATURE_INDEX_1
248# define index_FMA4_Usable FEATURE_INDEX_1
249# define index_Slow_SSE4_2 FEATURE_INDEX_1
250# define index_AVX2_Usable FEATURE_INDEX_1
251# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
252# define index_AVX512F_Usable FEATURE_INDEX_1
253# define index_AVX512DQ_Usable FEATURE_INDEX_1
254# define index_I586 FEATURE_INDEX_1
255# define index_I686 FEATURE_INDEX_1
256# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
257# define index_Prefer_No_VZEROUPPER FEATURE_INDEX_1
258
259#endif /* !__ASSEMBLER__ */
260
261#ifdef __x86_64__
262# define HAS_CPUID 1
263#elif defined __i586__ || defined __pentium__
264# define HAS_CPUID 1
265# define HAS_I586 1
266# define HAS_I686 HAS_ARCH_FEATURE (I686)
267#elif (defined __i686__ || defined __pentiumpro__ \
268 || defined __pentium4__ || defined __nocona__ \
269 || defined __atom__ || defined __core2__ \
270 || defined __corei7__ || defined __corei7_avx__ \
271 || defined __core_avx2__ || defined __nehalem__ \
272 || defined __sandybridge__ || defined __haswell__ \
273 || defined __knl__ || defined __bonnell__ \
274 || defined __silvermont__ \
275 || defined __k6__ || defined __k8__ \
276 || defined __athlon__ || defined __amdfam10__ \
277 || defined __bdver1__ || defined __bdver2__ \
278 || defined __bdver3__ || defined __bdver4__ \
279 || defined __btver1__ || defined __btver2__)
280# define HAS_CPUID 1
281# define HAS_I586 1
282# define HAS_I686 1
283#else
284# define HAS_CPUID 0
285# define HAS_I586 HAS_ARCH_FEATURE (I586)
286# define HAS_I686 HAS_ARCH_FEATURE (I686)
287#endif
288
289#endif /* cpu_features_h */
290