| 1 | /* This file is part of the GNU C Library. |
| 2 | Copyright (C) 2008-2017 Free Software Foundation, Inc. |
| 3 | |
| 4 | The GNU C Library is free software; you can redistribute it and/or |
| 5 | modify it under the terms of the GNU Lesser General Public |
| 6 | License as published by the Free Software Foundation; either |
| 7 | version 2.1 of the License, or (at your option) any later version. |
| 8 | |
| 9 | The GNU C Library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with the GNU C Library; if not, see |
| 16 | <http://www.gnu.org/licenses/>. */ |
| 17 | |
| 18 | #ifndef cpu_features_h |
| 19 | #define cpu_features_h |
| 20 | |
| 21 | #define bit_arch_Fast_Rep_String (1 << 0) |
| 22 | #define bit_arch_Fast_Copy_Backward (1 << 1) |
| 23 | #define bit_arch_Slow_BSF (1 << 2) |
| 24 | #define bit_arch_Fast_Unaligned_Load (1 << 4) |
| 25 | #define bit_arch_Prefer_PMINUB_for_stringop (1 << 5) |
| 26 | #define bit_arch_AVX_Usable (1 << 6) |
| 27 | #define bit_arch_FMA_Usable (1 << 7) |
| 28 | #define bit_arch_FMA4_Usable (1 << 8) |
| 29 | #define bit_arch_Slow_SSE4_2 (1 << 9) |
| 30 | #define bit_arch_AVX2_Usable (1 << 10) |
| 31 | #define bit_arch_AVX_Fast_Unaligned_Load (1 << 11) |
| 32 | #define bit_arch_AVX512F_Usable (1 << 12) |
| 33 | #define bit_arch_AVX512DQ_Usable (1 << 13) |
| 34 | #define bit_arch_I586 (1 << 14) |
| 35 | #define bit_arch_I686 (1 << 15) |
| 36 | #define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16) |
| 37 | #define bit_arch_Prefer_No_VZEROUPPER (1 << 17) |
| 38 | #define bit_arch_Fast_Unaligned_Copy (1 << 18) |
| 39 | #define bit_arch_Prefer_ERMS (1 << 19) |
| 40 | #define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) |
| 41 | #define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) |
| 42 | |
| 43 | /* CPUID Feature flags. */ |
| 44 | |
| 45 | /* COMMON_CPUID_INDEX_1. */ |
| 46 | #define bit_cpu_CX8 (1 << 8) |
| 47 | #define bit_cpu_CMOV (1 << 15) |
| 48 | #define bit_cpu_SSE2 (1 << 26) |
| 49 | #define bit_cpu_SSSE3 (1 << 9) |
| 50 | #define bit_cpu_SSE4_1 (1 << 19) |
| 51 | #define bit_cpu_SSE4_2 (1 << 20) |
| 52 | #define bit_cpu_OSXSAVE (1 << 27) |
| 53 | #define bit_cpu_AVX (1 << 28) |
| 54 | #define bit_cpu_POPCOUNT (1 << 23) |
| 55 | #define bit_cpu_FMA (1 << 12) |
| 56 | #define bit_cpu_FMA4 (1 << 16) |
| 57 | #define bit_cpu_HTT (1 << 28) |
| 58 | |
| 59 | /* COMMON_CPUID_INDEX_7. */ |
| 60 | #define bit_cpu_ERMS (1 << 9) |
| 61 | #define bit_cpu_RTM (1 << 11) |
| 62 | #define bit_cpu_AVX2 (1 << 5) |
| 63 | #define bit_cpu_AVX512F (1 << 16) |
| 64 | #define bit_cpu_AVX512DQ (1 << 17) |
| 65 | |
| 66 | /* XCR0 Feature flags. */ |
| 67 | #define bit_XMM_state (1 << 1) |
| 68 | #define bit_YMM_state (1 << 2) |
| 69 | #define bit_Opmask_state (1 << 5) |
| 70 | #define bit_ZMM0_15_state (1 << 6) |
| 71 | #define bit_ZMM16_31_state (1 << 7) |
| 72 | |
| 73 | /* The integer bit array index for the first set of internal feature bits. */ |
| 74 | #define FEATURE_INDEX_1 0 |
| 75 | |
| 76 | /* The current maximum size of the feature integer bit array. */ |
| 77 | #define FEATURE_INDEX_MAX 1 |
| 78 | |
| 79 | #ifdef __ASSEMBLER__ |
| 80 | |
| 81 | # include <cpu-features-offsets.h> |
| 82 | |
| 83 | # define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
| 84 | # define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
| 85 | # define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET |
| 86 | # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
| 87 | # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
| 88 | # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
| 89 | # define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET |
| 90 | # define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET |
| 91 | # define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET |
| 92 | |
| 93 | # define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE |
| 94 | # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE |
| 95 | # define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE |
| 96 | # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
| 97 | # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE |
| 98 | # define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 99 | # define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 100 | # define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 101 | # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE |
| 102 | # define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 103 | # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE |
| 104 | # define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 105 | # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE |
| 106 | # define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE |
| 107 | # define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE |
| 108 | # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE |
| 109 | # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE |
| 110 | # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE |
| 111 | # define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE |
| 112 | # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE |
| 113 | # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE |
| 114 | |
| 115 | |
| 116 | # if defined (_LIBC) && !IS_IN (nonlib) |
| 117 | # ifdef __x86_64__ |
| 118 | # ifdef SHARED |
| 119 | # if IS_IN (rtld) |
| 120 | # define LOAD_RTLD_GLOBAL_RO_RDX |
| 121 | # define HAS_FEATURE(offset, field, name) \ |
| 122 | testl $(bit_##field##_##name), \ |
| 123 | _rtld_local_ro+offset+(index_##field##_##name)(%rip) |
| 124 | # else |
| 125 | # define LOAD_RTLD_GLOBAL_RO_RDX \ |
| 126 | mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP |
| 127 | # define HAS_FEATURE(offset, field, name) \ |
| 128 | testl $(bit_##field##_##name), \ |
| 129 | RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx) |
| 130 | # endif |
| 131 | # else /* SHARED */ |
| 132 | # define LOAD_RTLD_GLOBAL_RO_RDX |
| 133 | # define HAS_FEATURE(offset, field, name) \ |
| 134 | testl $(bit_##field##_##name), \ |
| 135 | _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip) |
| 136 | # endif /* !SHARED */ |
| 137 | # else /* __x86_64__ */ |
| 138 | # ifdef SHARED |
| 139 | # define LOAD_FUNC_GOT_EAX(func) \ |
| 140 | leal func@GOTOFF(%edx), %eax |
| 141 | # if IS_IN (rtld) |
| 142 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
| 143 | LOAD_PIC_REG(dx) |
| 144 | # define HAS_FEATURE(offset, field, name) \ |
| 145 | testl $(bit_##field##_##name), \ |
| 146 | offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx) |
| 147 | # else |
| 148 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO \ |
| 149 | LOAD_PIC_REG(dx); \ |
| 150 | mov _rtld_global_ro@GOT(%edx), %ecx |
| 151 | # define HAS_FEATURE(offset, field, name) \ |
| 152 | testl $(bit_##field##_##name), \ |
| 153 | RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx) |
| 154 | # endif |
| 155 | # else /* SHARED */ |
| 156 | # define LOAD_FUNC_GOT_EAX(func) \ |
| 157 | leal func, %eax |
| 158 | # define LOAD_GOT_AND_RTLD_GLOBAL_RO |
| 159 | # define HAS_FEATURE(offset, field, name) \ |
| 160 | testl $(bit_##field##_##name), \ |
| 161 | _dl_x86_cpu_features+offset+(index_##field##_##name) |
| 162 | # endif /* !SHARED */ |
| 163 | # endif /* !__x86_64__ */ |
| 164 | # else /* _LIBC && !nonlib */ |
| 165 | # error "Sorry, <cpu-features.h> is unimplemented for assembler" |
| 166 | # endif /* !_LIBC || nonlib */ |
| 167 | |
| 168 | /* HAS_* evaluates to true if we may use the feature at runtime. */ |
| 169 | # define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name) |
| 170 | # define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name) |
| 171 | |
| 172 | #else /* __ASSEMBLER__ */ |
| 173 | |
| 174 | enum |
| 175 | { |
| 176 | COMMON_CPUID_INDEX_1 = 0, |
| 177 | COMMON_CPUID_INDEX_7, |
| 178 | COMMON_CPUID_INDEX_80000001, /* for AMD */ |
| 179 | /* Keep the following line at the end. */ |
| 180 | COMMON_CPUID_INDEX_MAX |
| 181 | }; |
| 182 | |
| 183 | struct cpu_features |
| 184 | { |
| 185 | enum cpu_features_kind |
| 186 | { |
| 187 | arch_kind_unknown = 0, |
| 188 | arch_kind_intel, |
| 189 | arch_kind_amd, |
| 190 | arch_kind_other |
| 191 | } kind; |
| 192 | int max_cpuid; |
| 193 | struct cpuid_registers |
| 194 | { |
| 195 | unsigned int eax; |
| 196 | unsigned int ebx; |
| 197 | unsigned int ecx; |
| 198 | unsigned int edx; |
| 199 | } cpuid[COMMON_CPUID_INDEX_MAX]; |
| 200 | unsigned int family; |
| 201 | unsigned int model; |
| 202 | unsigned int feature[FEATURE_INDEX_MAX]; |
| 203 | }; |
| 204 | |
| 205 | /* Used from outside of glibc to get access to the CPU features |
| 206 | structure. */ |
| 207 | extern const struct cpu_features *__get_cpu_features (void) |
| 208 | __attribute__ ((const)); |
| 209 | |
| 210 | # if defined (_LIBC) && !IS_IN (nonlib) |
| 211 | /* Unused for x86. */ |
| 212 | # define INIT_ARCH() |
| 213 | # define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) |
| 214 | # endif |
| 215 | |
| 216 | |
| 217 | /* Only used directly in cpu-features.c. */ |
| 218 | # define CPU_FEATURES_CPU_P(ptr, name) \ |
| 219 | ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0) |
| 220 | # define CPU_FEATURES_ARCH_P(ptr, name) \ |
| 221 | ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0) |
| 222 | |
| 223 | /* HAS_* evaluates to true if we may use the feature at runtime. */ |
| 224 | # define HAS_CPU_FEATURE(name) \ |
| 225 | CPU_FEATURES_CPU_P (__get_cpu_features (), name) |
| 226 | # define HAS_ARCH_FEATURE(name) \ |
| 227 | CPU_FEATURES_ARCH_P (__get_cpu_features (), name) |
| 228 | |
| 229 | # define index_cpu_CX8 COMMON_CPUID_INDEX_1 |
| 230 | # define index_cpu_CMOV COMMON_CPUID_INDEX_1 |
| 231 | # define index_cpu_SSE2 COMMON_CPUID_INDEX_1 |
| 232 | # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1 |
| 233 | # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1 |
| 234 | # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1 |
| 235 | # define index_cpu_AVX COMMON_CPUID_INDEX_1 |
| 236 | # define index_cpu_AVX2 COMMON_CPUID_INDEX_7 |
| 237 | # define index_cpu_AVX512F COMMON_CPUID_INDEX_7 |
| 238 | # define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7 |
| 239 | # define index_cpu_ERMS COMMON_CPUID_INDEX_7 |
| 240 | # define index_cpu_RTM COMMON_CPUID_INDEX_7 |
| 241 | # define index_cpu_FMA COMMON_CPUID_INDEX_1 |
| 242 | # define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 |
| 243 | # define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 |
| 244 | # define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 |
| 245 | # define index_cpu_HTT COMMON_CPUID_INDEX_1 |
| 246 | |
| 247 | # define reg_CX8 edx |
| 248 | # define reg_CMOV edx |
| 249 | # define reg_SSE2 edx |
| 250 | # define reg_SSSE3 ecx |
| 251 | # define reg_SSE4_1 ecx |
| 252 | # define reg_SSE4_2 ecx |
| 253 | # define reg_AVX ecx |
| 254 | # define reg_AVX2 ebx |
| 255 | # define reg_AVX512F ebx |
| 256 | # define reg_AVX512DQ ebx |
| 257 | # define reg_ERMS ebx |
| 258 | # define reg_RTM ebx |
| 259 | # define reg_FMA ecx |
| 260 | # define reg_FMA4 ecx |
| 261 | # define reg_POPCOUNT ecx |
| 262 | # define reg_OSXSAVE ecx |
| 263 | # define reg_HTT edx |
| 264 | |
| 265 | # define index_arch_Fast_Rep_String FEATURE_INDEX_1 |
| 266 | # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 |
| 267 | # define index_arch_Slow_BSF FEATURE_INDEX_1 |
| 268 | # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1 |
| 269 | # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 |
| 270 | # define index_arch_AVX_Usable FEATURE_INDEX_1 |
| 271 | # define index_arch_FMA_Usable FEATURE_INDEX_1 |
| 272 | # define index_arch_FMA4_Usable FEATURE_INDEX_1 |
| 273 | # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1 |
| 274 | # define index_arch_AVX2_Usable FEATURE_INDEX_1 |
| 275 | # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 |
| 276 | # define index_arch_AVX512F_Usable FEATURE_INDEX_1 |
| 277 | # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1 |
| 278 | # define index_arch_I586 FEATURE_INDEX_1 |
| 279 | # define index_arch_I686 FEATURE_INDEX_1 |
| 280 | # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 |
| 281 | # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 |
| 282 | # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 |
| 283 | # define index_arch_Prefer_ERMS FEATURE_INDEX_1 |
| 284 | # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 |
| 285 | # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 |
| 286 | |
| 287 | #endif /* !__ASSEMBLER__ */ |
| 288 | |
| 289 | #ifdef __x86_64__ |
| 290 | # define HAS_CPUID 1 |
| 291 | #elif defined __i586__ || defined __pentium__ |
| 292 | # define HAS_CPUID 1 |
| 293 | # define HAS_I586 1 |
| 294 | # define HAS_I686 HAS_ARCH_FEATURE (I686) |
| 295 | #elif (defined __i686__ || defined __pentiumpro__ \ |
| 296 | || defined __pentium4__ || defined __nocona__ \ |
| 297 | || defined __atom__ || defined __core2__ \ |
| 298 | || defined __corei7__ || defined __corei7_avx__ \ |
| 299 | || defined __core_avx2__ || defined __nehalem__ \ |
| 300 | || defined __sandybridge__ || defined __haswell__ \ |
| 301 | || defined __knl__ || defined __bonnell__ \ |
| 302 | || defined __silvermont__ \ |
| 303 | || defined __k6__ || defined __k8__ \ |
| 304 | || defined __athlon__ || defined __amdfam10__ \ |
| 305 | || defined __bdver1__ || defined __bdver2__ \ |
| 306 | || defined __bdver3__ || defined __bdver4__ \ |
| 307 | || defined __btver1__ || defined __btver2__) |
| 308 | # define HAS_CPUID 1 |
| 309 | # define HAS_I586 1 |
| 310 | # define HAS_I686 1 |
| 311 | #else |
| 312 | # define HAS_CPUID 0 |
| 313 | # define HAS_I586 HAS_ARCH_FEATURE (I586) |
| 314 | # define HAS_I686 HAS_ARCH_FEATURE (I686) |
| 315 | #endif |
| 316 | |
| 317 | #endif /* cpu_features_h */ |
| 318 | |