1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2016 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <cpuid.h>
20#include <cpu-features.h>
21
22static inline void
23get_common_indeces (struct cpu_features *cpu_features,
24 unsigned int *family, unsigned int *model,
25 unsigned int *extended_model)
26{
27 unsigned int eax;
28 __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
29 cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
30 cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
31 GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax;
32 *family = (eax >> 8) & 0x0f;
33 *model = (eax >> 4) & 0x0f;
34 *extended_model = (eax >> 12) & 0xf0;
35 if (*family == 0x0f)
36 {
37 *family += (eax >> 20) & 0xff;
38 *model += *extended_model;
39 }
40}
41
42static inline void
43init_cpu_features (struct cpu_features *cpu_features)
44{
45 unsigned int ebx, ecx, edx;
46 unsigned int family = 0;
47 unsigned int model = 0;
48 enum cpu_features_kind kind;
49
50#if !HAS_CPUID
51 if (__get_cpuid_max (0, 0) == 0)
52 {
53 kind = arch_kind_other;
54 goto no_cpuid;
55 }
56#endif
57
58 __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
59
60 /* This spells out "GenuineIntel". */
61 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
62 {
63 unsigned int extended_model;
64
65 kind = arch_kind_intel;
66
67 get_common_indeces (cpu_features, &family, &model, &extended_model);
68
69 if (family == 0x06)
70 {
71 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
72 model += extended_model;
73 switch (model)
74 {
75 case 0x1c:
76 case 0x26:
77 /* BSF is slow on Atom. */
78 cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
79 break;
80
81 case 0x57:
82 /* Knights Landing. Enable Silvermont optimizations. */
83 cpu_features->feature[index_Prefer_No_VZEROUPPER]
84 |= bit_Prefer_No_VZEROUPPER;
85
86 case 0x37:
87 case 0x4a:
88 case 0x4d:
89 case 0x5a:
90 case 0x5d:
91 /* Unaligned load versions are faster than SSSE3
92 on Silvermont. */
93#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
94# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
95#endif
96#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
97# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
98#endif
99 cpu_features->feature[index_Fast_Unaligned_Load]
100 |= (bit_Fast_Unaligned_Load
101 | bit_Prefer_PMINUB_for_stringop
102 | bit_Slow_SSE4_2);
103 break;
104
105 default:
106 /* Unknown family 0x06 processors. Assuming this is one
107 of Core i3/i5/i7 processors if AVX is available. */
108 if ((ecx & bit_AVX) == 0)
109 break;
110
111 case 0x1a:
112 case 0x1e:
113 case 0x1f:
114 case 0x25:
115 case 0x2c:
116 case 0x2e:
117 case 0x2f:
118 /* Rep string instructions, copy backward, unaligned loads
119 and pminub are fast on Intel Core i3, i5 and i7. */
120#if index_Fast_Rep_String != index_Fast_Copy_Backward
121# error index_Fast_Rep_String != index_Fast_Copy_Backward
122#endif
123#if index_Fast_Rep_String != index_Fast_Unaligned_Load
124# error index_Fast_Rep_String != index_Fast_Unaligned_Load
125#endif
126#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
127# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
128#endif
129 cpu_features->feature[index_Fast_Rep_String]
130 |= (bit_Fast_Rep_String
131 | bit_Fast_Copy_Backward
132 | bit_Fast_Unaligned_Load
133 | bit_Prefer_PMINUB_for_stringop);
134 break;
135 }
136 }
137 }
138 /* This spells out "AuthenticAMD". */
139 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
140 {
141 unsigned int extended_model;
142
143 kind = arch_kind_amd;
144
145 get_common_indeces (cpu_features, &family, &model, &extended_model);
146
147 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
148
149 unsigned int eax;
150 __cpuid (0x80000000, eax, ebx, ecx, edx);
151 if (eax >= 0x80000001)
152 __cpuid (0x80000001,
153 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
154 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
155 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
156 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
157
158 if (family == 0x15)
159 {
160 /* "Excavator" */
161 if (model >= 0x60 && model <= 0x7f)
162 cpu_features->feature[index_Fast_Unaligned_Load]
163 |= bit_Fast_Unaligned_Load;
164 }
165 }
166 else
167 kind = arch_kind_other;
168
169 /* Support i586 if CX8 is available. */
170 if (HAS_CPU_FEATURE (CX8))
171 cpu_features->feature[index_I586] |= bit_I586;
172
173 /* Support i686 if CMOV is available. */
174 if (HAS_CPU_FEATURE (CMOV))
175 cpu_features->feature[index_I686] |= bit_I686;
176
177 if (cpu_features->max_cpuid >= 7)
178 __cpuid_count (7, 0,
179 cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
180 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
181 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
182 cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
183
184 /* Can we call xgetbv? */
185 if (HAS_CPU_FEATURE (OSXSAVE))
186 {
187 unsigned int xcrlow;
188 unsigned int xcrhigh;
189 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
190 /* Is YMM and XMM state usable? */
191 if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
192 (bit_YMM_state | bit_XMM_state))
193 {
194 /* Determine if AVX is usable. */
195 if (HAS_CPU_FEATURE (AVX))
196 cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
197#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
198# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
199#endif
200 /* Determine if AVX2 is usable. Unaligned load with 256-bit
201 AVX registers are faster on processors with AVX2. */
202 if (HAS_CPU_FEATURE (AVX2))
203 cpu_features->feature[index_AVX2_Usable]
204 |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
205 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
206 ZMM16-ZMM31 state are enabled. */
207 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
208 | bit_ZMM16_31_state)) ==
209 (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
210 {
211 /* Determine if AVX512F is usable. */
212 if (HAS_CPU_FEATURE (AVX512F))
213 {
214 cpu_features->feature[index_AVX512F_Usable]
215 |= bit_AVX512F_Usable;
216 /* Determine if AVX512DQ is usable. */
217 if (HAS_CPU_FEATURE (AVX512DQ))
218 cpu_features->feature[index_AVX512DQ_Usable]
219 |= bit_AVX512DQ_Usable;
220 }
221 }
222 /* Determine if FMA is usable. */
223 if (HAS_CPU_FEATURE (FMA))
224 cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
225 /* Determine if FMA4 is usable. */
226 if (HAS_CPU_FEATURE (FMA4))
227 cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
228 }
229 }
230
231#if !HAS_CPUID
232no_cpuid:
233#endif
234
235 cpu_features->family = family;
236 cpu_features->model = model;
237 cpu_features->kind = kind;
238}
239