| 1 | /* |
| 2 | * Copyright (c) 2003-2016 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * @OSF_COPYRIGHT@ |
| 30 | */ |
| 31 | /* |
| 32 | * Mach Operating System |
| 33 | * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University |
| 34 | * All Rights Reserved. |
| 35 | * |
| 36 | * Permission to use, copy, modify and distribute this software and its |
| 37 | * documentation is hereby granted, provided that both the copyright |
| 38 | * notice and this permission notice appear in all copies of the |
| 39 | * software, derivative works or modified versions, and any portions |
| 40 | * thereof, and that both notices appear in supporting documentation. |
| 41 | * |
| 42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
| 44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 45 | * |
| 46 | * Carnegie Mellon requests users of this software to return to |
| 47 | * |
| 48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 49 | * School of Computer Science |
| 50 | * Carnegie Mellon University |
| 51 | * Pittsburgh PA 15213-3890 |
| 52 | * |
| 53 | * any improvements or extensions that they make and grant Carnegie Mellon |
| 54 | * the rights to redistribute these changes. |
| 55 | */ |
| 56 | |
| 57 | |
| 58 | #include <mach/i386/vm_param.h> |
| 59 | |
| 60 | #include <string.h> |
| 61 | #include <mach/vm_param.h> |
| 62 | #include <mach/vm_prot.h> |
| 63 | #include <mach/machine.h> |
| 64 | #include <mach/time_value.h> |
| 65 | #include <kern/spl.h> |
| 66 | #include <kern/assert.h> |
| 67 | #include <kern/debug.h> |
| 68 | #include <kern/misc_protos.h> |
| 69 | #include <kern/startup.h> |
| 70 | #include <kern/clock.h> |
| 71 | #include <kern/pms.h> |
| 72 | #include <kern/xpr.h> |
| 73 | #include <kern/cpu_data.h> |
| 74 | #include <kern/processor.h> |
| 75 | #include <sys/kdebug.h> |
| 76 | #include <console/serial_protos.h> |
| 77 | #include <vm/vm_page.h> |
| 78 | #include <vm/pmap.h> |
| 79 | #include <vm/vm_kern.h> |
| 80 | #include <machine/pal_routines.h> |
| 81 | #include <i386/fpu.h> |
| 82 | #include <i386/pmap.h> |
| 83 | #include <i386/misc_protos.h> |
| 84 | #include <i386/cpu_threads.h> |
| 85 | #include <i386/cpuid.h> |
| 86 | #include <i386/lapic.h> |
| 87 | #include <i386/mp.h> |
| 88 | #include <i386/mp_desc.h> |
| 89 | #if CONFIG_MTRR |
| 90 | #include <i386/mtrr.h> |
| 91 | #endif |
| 92 | #include <i386/machine_routines.h> |
| 93 | #if CONFIG_MCA |
| 94 | #include <i386/machine_check.h> |
| 95 | #endif |
| 96 | #include <i386/ucode.h> |
| 97 | #include <i386/postcode.h> |
| 98 | #include <i386/Diagnostics.h> |
| 99 | #include <i386/pmCPU.h> |
| 100 | #include <i386/tsc.h> |
| 101 | #include <i386/locks.h> /* LcksOpts */ |
| 102 | #if DEBUG |
| 103 | #include <machine/pal_routines.h> |
| 104 | #endif |
| 105 | |
| 106 | #if MONOTONIC |
| 107 | #include <kern/monotonic.h> |
| 108 | #endif /* MONOTONIC */ |
| 109 | |
| 110 | #include <san/kasan.h> |
| 111 | |
| 112 | #if DEBUG |
| 113 | #define DBG(x...) kprintf(x) |
| 114 | #else |
| 115 | #define DBG(x...) |
| 116 | #endif |
| 117 | |
| 118 | int debug_task; |
| 119 | |
| 120 | static boot_args *kernelBootArgs; |
| 121 | |
| 122 | extern int disableConsoleOutput; |
| 123 | extern const char version[]; |
| 124 | extern const char version_variant[]; |
| 125 | extern int nx_enabled; |
| 126 | |
| 127 | /* |
| 128 | * Set initial values so that ml_phys_* routines can use the booter's ID mapping |
| 129 | * to touch physical space before the kernel's physical aperture exists. |
| 130 | */ |
| 131 | uint64_t physmap_base = 0; |
| 132 | uint64_t physmap_max = 4*GB; |
| 133 | |
| 134 | pd_entry_t *KPTphys; |
| 135 | pd_entry_t *IdlePTD; |
| 136 | pdpt_entry_t *IdlePDPT; |
| 137 | pml4_entry_t *IdlePML4; |
| 138 | |
| 139 | char *physfree; |
| 140 | void idt64_remap(void); |
| 141 | |
| 142 | /* |
| 143 | * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init() |
| 144 | * due to the mutation of physfree. |
| 145 | */ |
| 146 | static void * |
| 147 | ALLOCPAGES(int npages) |
| 148 | { |
| 149 | uintptr_t tmp = (uintptr_t)physfree; |
| 150 | bzero(physfree, npages * PAGE_SIZE); |
| 151 | physfree += npages * PAGE_SIZE; |
| 152 | tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; |
| 153 | return (void *)tmp; |
| 154 | } |
| 155 | |
| 156 | static void |
| 157 | fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) |
| 158 | { |
| 159 | int i; |
| 160 | for (i=0; i<count; i++) { |
| 161 | base[index] = src | prot | INTEL_PTE_VALID; |
| 162 | src += PAGE_SIZE; |
| 163 | index++; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | extern pmap_paddr_t first_avail; |
| 168 | |
| 169 | int break_kprintf = 0; |
| 170 | |
| 171 | uint64_t |
| 172 | x86_64_pre_sleep(void) |
| 173 | { |
| 174 | IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX]; |
| 175 | uint64_t oldcr3 = get_cr3_raw(); |
| 176 | set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4)); |
| 177 | return oldcr3; |
| 178 | } |
| 179 | |
| 180 | void |
| 181 | x86_64_post_sleep(uint64_t new_cr3) |
| 182 | { |
| 183 | IdlePML4[0] = 0; |
| 184 | set_cr3_raw((uint32_t) new_cr3); |
| 185 | } |
| 186 | |
| 187 | |
| 188 | |
| 189 | |
| 190 | // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address |
| 191 | // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account |
| 192 | // the PCI hole (which is less 4GB but not more). |
| 193 | |
| 194 | /* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for |
| 195 | * randomisation |
| 196 | */ |
| 197 | extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1]; |
| 198 | |
| 199 | static void |
| 200 | physmap_init(void) |
| 201 | { |
| 202 | pt_entry_t *physmapL3 = ALLOCPAGES(1); |
| 203 | struct { |
| 204 | pt_entry_t entries[PTE_PER_PAGE]; |
| 205 | } * physmapL2 = ALLOCPAGES(NPHYSMAP); |
| 206 | |
| 207 | uint64_t i; |
| 208 | uint8_t phys_random_L3 = early_random() & 0xFF; |
| 209 | |
| 210 | /* We assume NX support. Mark all levels of the PHYSMAP NX |
| 211 | * to avoid granting executability via a single bit flip. |
| 212 | */ |
| 213 | #if DEVELOPMENT || DEBUG |
| 214 | uint32_t reg[4]; |
| 215 | do_cpuid(0x80000000, reg); |
| 216 | if (reg[eax] >= 0x80000001) { |
| 217 | do_cpuid(0x80000001, reg); |
| 218 | assert(reg[edx] & CPUID_EXTFEATURE_XD); |
| 219 | } |
| 220 | #endif /* DEVELOPMENT || DEBUG */ |
| 221 | |
| 222 | for(i = 0; i < NPHYSMAP; i++) { |
| 223 | physmapL3[i + phys_random_L3] = |
| 224 | ((uintptr_t)ID_MAP_VTOP(&physmapL2[i])) |
| 225 | | INTEL_PTE_VALID |
| 226 | | INTEL_PTE_NX |
| 227 | | INTEL_PTE_WRITE; |
| 228 | |
| 229 | uint64_t j; |
| 230 | for(j = 0; j < PTE_PER_PAGE; j++) { |
| 231 | physmapL2[i].entries[j] = |
| 232 | ((i * PTE_PER_PAGE + j) << PDSHIFT) |
| 233 | | INTEL_PTE_PS |
| 234 | | INTEL_PTE_VALID |
| 235 | | INTEL_PTE_NX |
| 236 | | INTEL_PTE_WRITE; |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] = |
| 241 | ((uintptr_t)ID_MAP_VTOP(physmapL3)) |
| 242 | | INTEL_PTE_VALID |
| 243 | | INTEL_PTE_NX |
| 244 | | INTEL_PTE_WRITE; |
| 245 | |
| 246 | physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0); |
| 247 | physmap_max = physmap_base + NPHYSMAP * GB; |
| 248 | DBG("Physical address map base: 0x%qx\n" , physmap_base); |
| 249 | DBG("Physical map idlepml4[%d]: 0x%llx\n" , |
| 250 | KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]); |
| 251 | } |
| 252 | |
| 253 | void doublemap_init(void); |
| 254 | |
| 255 | static void |
| 256 | Idle_PTs_init(void) |
| 257 | { |
| 258 | /* Allocate the "idle" kernel page tables: */ |
| 259 | KPTphys = ALLOCPAGES(NKPT); /* level 1 */ |
| 260 | IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */ |
| 261 | IdlePDPT = ALLOCPAGES(1); /* level 3 */ |
| 262 | IdlePML4 = ALLOCPAGES(1); /* level 4 */ |
| 263 | |
| 264 | // Fill the lowest level with everything up to physfree |
| 265 | fillkpt(KPTphys, |
| 266 | INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); |
| 267 | |
| 268 | /* IdlePTD */ |
| 269 | fillkpt(IdlePTD, |
| 270 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); |
| 271 | |
| 272 | // IdlePDPT entries |
| 273 | fillkpt(IdlePDPT, |
| 274 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD); |
| 275 | |
| 276 | // IdlePML4 single entry for kernel space. |
| 277 | fillkpt(IdlePML4 + KERNEL_PML4_INDEX, |
| 278 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1); |
| 279 | |
| 280 | postcode(VSTART_PHYSMAP_INIT); |
| 281 | |
| 282 | physmap_init(); |
| 283 | doublemap_init(); |
| 284 | idt64_remap(); |
| 285 | |
| 286 | postcode(VSTART_SET_CR3); |
| 287 | |
| 288 | // Switch to the page tables.. |
| 289 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); |
| 290 | |
| 291 | } |
| 292 | |
| 293 | extern void vstart_trap_handler; |
| 294 | |
| 295 | #define BOOT_TRAP_VECTOR(t) \ |
| 296 | [t] = { \ |
| 297 | (uintptr_t) &vstart_trap_handler, \ |
| 298 | KERNEL64_CS, \ |
| 299 | 0, \ |
| 300 | ACC_P|ACC_PL_K|ACC_INTR_GATE, \ |
| 301 | 0 \ |
| 302 | }, |
| 303 | |
| 304 | /* Recursive macro to iterate 0..31 */ |
| 305 | #define L0(x,n) x(n) |
| 306 | #define L1(x,n) L0(x,n-1) L0(x,n) |
| 307 | #define L2(x,n) L1(x,n-2) L1(x,n) |
| 308 | #define L3(x,n) L2(x,n-4) L2(x,n) |
| 309 | #define L4(x,n) L3(x,n-8) L3(x,n) |
| 310 | #define L5(x,n) L4(x,n-16) L4(x,n) |
| 311 | #define FOR_0_TO_31(x) L5(x,31) |
| 312 | |
| 313 | /* |
| 314 | * Bootstrap IDT. Active only during early startup. |
| 315 | * Only the trap vectors are defined since interrupts are masked. |
| 316 | * All traps point to a common handler. |
| 317 | */ |
| 318 | struct fake_descriptor64 master_boot_idt64[IDTSZ] |
| 319 | __attribute__((section("__HIB,__desc" ))) |
| 320 | __attribute__((aligned(PAGE_SIZE))) = { |
| 321 | FOR_0_TO_31(BOOT_TRAP_VECTOR) |
| 322 | }; |
| 323 | |
| 324 | static void |
| 325 | vstart_idt_init(void) |
| 326 | { |
| 327 | x86_64_desc_register_t vstart_idt = { |
| 328 | sizeof(master_boot_idt64), |
| 329 | master_boot_idt64 }; |
| 330 | |
| 331 | fix_desc64(master_boot_idt64, 32); |
| 332 | lidt((void *)&vstart_idt); |
| 333 | } |
| 334 | |
| 335 | /* |
| 336 | * vstart() is called in the natural mode (64bit for K64, 32 for K32) |
| 337 | * on a set of bootstrap pagetables which use large, 2MB pages to map |
| 338 | * all of physical memory in both. See idle_pt.c for details. |
| 339 | * |
| 340 | * In K64 this identity mapping is mirrored the top and bottom 512GB |
| 341 | * slots of PML4. |
| 342 | * |
| 343 | * The bootstrap processor called with argument boot_args_start pointing to |
| 344 | * the boot-args block. The kernel's (4K page) page tables are allocated and |
| 345 | * initialized before switching to these. |
| 346 | * |
| 347 | * Non-bootstrap processors are called with argument boot_args_start NULL. |
| 348 | * These processors switch immediately to the existing kernel page tables. |
| 349 | */ |
| 350 | __attribute__((noreturn)) |
| 351 | void |
| 352 | vstart(vm_offset_t boot_args_start) |
| 353 | { |
| 354 | boolean_t is_boot_cpu = !(boot_args_start == 0); |
| 355 | int cpu = 0; |
| 356 | uint32_t lphysfree; |
| 357 | |
| 358 | postcode(VSTART_ENTRY); |
| 359 | |
| 360 | if (is_boot_cpu) { |
| 361 | /* |
| 362 | * Set-up temporary trap handlers during page-table set-up. |
| 363 | */ |
| 364 | vstart_idt_init(); |
| 365 | postcode(VSTART_IDT_INIT); |
| 366 | |
| 367 | /* |
| 368 | * Get startup parameters. |
| 369 | */ |
| 370 | kernelBootArgs = (boot_args *)boot_args_start; |
| 371 | lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize; |
| 372 | physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1)); |
| 373 | |
| 374 | #if DEVELOPMENT || DEBUG |
| 375 | pal_serial_init(); |
| 376 | #endif |
| 377 | DBG("revision 0x%x\n" , kernelBootArgs->Revision); |
| 378 | DBG("version 0x%x\n" , kernelBootArgs->Version); |
| 379 | DBG("command line %s\n" , kernelBootArgs->CommandLine); |
| 380 | DBG("memory map 0x%x\n" , kernelBootArgs->MemoryMap); |
| 381 | DBG("memory map sz 0x%x\n" , kernelBootArgs->MemoryMapSize); |
| 382 | DBG("kaddr 0x%x\n" , kernelBootArgs->kaddr); |
| 383 | DBG("ksize 0x%x\n" , kernelBootArgs->ksize); |
| 384 | DBG("physfree %p\n" , physfree); |
| 385 | DBG("bootargs: %p, &ksize: %p &kaddr: %p\n" , |
| 386 | kernelBootArgs, |
| 387 | &kernelBootArgs->ksize, |
| 388 | &kernelBootArgs->kaddr); |
| 389 | DBG("SMBIOS mem sz 0x%llx\n" , kernelBootArgs->PhysicalMemorySize); |
| 390 | |
| 391 | /* |
| 392 | * Setup boot args given the physical start address. |
| 393 | * Note: PE_init_platform needs to be called before Idle_PTs_init |
| 394 | * because access to the DeviceTree is required to read the |
| 395 | * random seed before generating a random physical map slide. |
| 396 | */ |
| 397 | kernelBootArgs = (boot_args *) |
| 398 | ml_static_ptovirt(boot_args_start); |
| 399 | DBG("i386_init(0x%lx) kernelBootArgs=%p\n" , |
| 400 | (unsigned long)boot_args_start, kernelBootArgs); |
| 401 | |
| 402 | #if KASAN |
| 403 | kasan_reserve_memory(kernelBootArgs); |
| 404 | #endif |
| 405 | |
| 406 | PE_init_platform(FALSE, kernelBootArgs); |
| 407 | postcode(PE_INIT_PLATFORM_D); |
| 408 | |
| 409 | Idle_PTs_init(); |
| 410 | postcode(VSTART_IDLE_PTS_INIT); |
| 411 | |
| 412 | #if KASAN |
| 413 | /* Init kasan and map whatever was stolen from physfree */ |
| 414 | kasan_init(); |
| 415 | kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree)); |
| 416 | #endif |
| 417 | |
| 418 | #if MONOTONIC |
| 419 | mt_early_init(); |
| 420 | #endif /* MONOTONIC */ |
| 421 | |
| 422 | first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); |
| 423 | |
| 424 | cpu_data_alloc(TRUE); |
| 425 | |
| 426 | cpu_desc_init(cpu_datap(0)); |
| 427 | postcode(VSTART_CPU_DESC_INIT); |
| 428 | cpu_desc_load(cpu_datap(0)); |
| 429 | |
| 430 | postcode(VSTART_CPU_MODE_INIT); |
| 431 | cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be |
| 432 | * invoked on the APs |
| 433 | * via i386_init_slave() |
| 434 | */ |
| 435 | } else { |
| 436 | /* Switch to kernel's page tables (from the Boot PTs) */ |
| 437 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); |
| 438 | /* Find our logical cpu number */ |
| 439 | cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; |
| 440 | DBG("CPU: %d, GSBASE initial value: 0x%llx\n" , cpu, rdmsr64(MSR_IA32_GS_BASE)); |
| 441 | cpu_desc_load(cpu_datap(cpu)); |
| 442 | } |
| 443 | |
| 444 | postcode(VSTART_EXIT); |
| 445 | x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init |
| 446 | : (uintptr_t) i386_init_slave, |
| 447 | cpu_datap(cpu)->cpu_int_stack_top); |
| 448 | } |
| 449 | |
| 450 | void |
| 451 | pstate_trace(void) |
| 452 | { |
| 453 | } |
| 454 | |
| 455 | /* |
| 456 | * Cpu initialization. Running virtual, but without MACH VM |
| 457 | * set up. |
| 458 | */ |
| 459 | void |
| 460 | i386_init(void) |
| 461 | { |
| 462 | unsigned int maxmem; |
| 463 | uint64_t maxmemtouse; |
| 464 | unsigned int cpus = 0; |
| 465 | boolean_t fidn; |
| 466 | boolean_t IA32e = TRUE; |
| 467 | |
| 468 | postcode(I386_INIT_ENTRY); |
| 469 | |
| 470 | pal_i386_init(); |
| 471 | tsc_init(); |
| 472 | rtclock_early_init(); /* mach_absolute_time() now functionsl */ |
| 473 | |
| 474 | kernel_debug_string_early("i386_init" ); |
| 475 | pstate_trace(); |
| 476 | |
| 477 | #if CONFIG_MCA |
| 478 | /* Initialize machine-check handling */ |
| 479 | mca_cpu_init(); |
| 480 | #endif |
| 481 | |
| 482 | master_cpu = 0; |
| 483 | cpu_init(); |
| 484 | |
| 485 | postcode(CPU_INIT_D); |
| 486 | |
| 487 | printf_init(); /* Init this in case we need debugger */ |
| 488 | panic_init(); /* Init this in case we need debugger */ |
| 489 | |
| 490 | /* setup debugging output if one has been chosen */ |
| 491 | kernel_debug_string_early("PE_init_kprintf" ); |
| 492 | PE_init_kprintf(FALSE); |
| 493 | |
| 494 | kernel_debug_string_early("kernel_early_bootstrap" ); |
| 495 | kernel_early_bootstrap(); |
| 496 | |
| 497 | if (!PE_parse_boot_argn("diag" , &dgWork.dgFlags, sizeof (dgWork.dgFlags))) |
| 498 | dgWork.dgFlags = 0; |
| 499 | |
| 500 | serialmode = 0; |
| 501 | if (PE_parse_boot_argn("serial" , &serialmode, sizeof(serialmode))) { |
| 502 | /* We want a serial keyboard and/or console */ |
| 503 | kprintf("Serial mode specified: %08X\n" , serialmode); |
| 504 | int force_sync = serialmode & SERIALMODE_SYNCDRAIN; |
| 505 | if (force_sync || PE_parse_boot_argn("drain_uart_sync" , &force_sync, sizeof(force_sync))) { |
| 506 | if (force_sync) { |
| 507 | serialmode |= SERIALMODE_SYNCDRAIN; |
| 508 | kprintf( |
| 509 | "WARNING: Forcing uart driver to output synchronously." |
| 510 | "printf()s/IOLogs will impact kernel performance.\n" |
| 511 | "You are advised to avoid using 'drain_uart_sync' boot-arg.\n" ); |
| 512 | } |
| 513 | } |
| 514 | } |
| 515 | if (serialmode & SERIALMODE_OUTPUT) { |
| 516 | (void)switch_to_serial_console(); |
| 517 | disableConsoleOutput = FALSE; /* Allow printfs to happen */ |
| 518 | } |
| 519 | |
| 520 | /* setup console output */ |
| 521 | kernel_debug_string_early("PE_init_printf" ); |
| 522 | PE_init_printf(FALSE); |
| 523 | |
| 524 | kprintf("version_variant = %s\n" , version_variant); |
| 525 | kprintf("version = %s\n" , version); |
| 526 | |
| 527 | if (!PE_parse_boot_argn("maxmem" , &maxmem, sizeof (maxmem))) |
| 528 | maxmemtouse = 0; |
| 529 | else |
| 530 | maxmemtouse = ((uint64_t)maxmem) * MB; |
| 531 | |
| 532 | if (PE_parse_boot_argn("cpus" , &cpus, sizeof (cpus))) { |
| 533 | if ((0 < cpus) && (cpus < max_ncpus)) |
| 534 | max_ncpus = cpus; |
| 535 | } |
| 536 | |
| 537 | /* |
| 538 | * debug support for > 4G systems |
| 539 | */ |
| 540 | PE_parse_boot_argn("himemory_mode" , &vm_himemory_mode, sizeof (vm_himemory_mode)); |
| 541 | if (vm_himemory_mode != 0) |
| 542 | kprintf("himemory_mode: %d\n" , vm_himemory_mode); |
| 543 | |
| 544 | if (!PE_parse_boot_argn("immediate_NMI" , &fidn, sizeof (fidn))) |
| 545 | force_immediate_debugger_NMI = FALSE; |
| 546 | else |
| 547 | force_immediate_debugger_NMI = fidn; |
| 548 | |
| 549 | #if DEBUG |
| 550 | nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold); |
| 551 | #endif |
| 552 | PE_parse_boot_argn("urgency_notification_abstime" , |
| 553 | &urgency_notification_assert_abstime_threshold, |
| 554 | sizeof(urgency_notification_assert_abstime_threshold)); |
| 555 | |
| 556 | if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) |
| 557 | nx_enabled = 0; |
| 558 | |
| 559 | /* |
| 560 | * VM initialization, after this we're using page tables... |
| 561 | * Thn maximum number of cpus must be set beforehand. |
| 562 | */ |
| 563 | kernel_debug_string_early("i386_vm_init" ); |
| 564 | i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); |
| 565 | |
| 566 | /* create the console for verbose or pretty mode */ |
| 567 | /* Note: doing this prior to tsc_init() allows for graceful panic! */ |
| 568 | PE_init_platform(TRUE, kernelBootArgs); |
| 569 | PE_create_console(); |
| 570 | |
| 571 | kernel_debug_string_early("power_management_init" ); |
| 572 | power_management_init(); |
| 573 | processor_bootstrap(); |
| 574 | thread_bootstrap(); |
| 575 | |
| 576 | pstate_trace(); |
| 577 | kernel_debug_string_early("machine_startup" ); |
| 578 | machine_startup(); |
| 579 | pstate_trace(); |
| 580 | } |
| 581 | |
| 582 | static void |
| 583 | do_init_slave(boolean_t fast_restart) |
| 584 | { |
| 585 | void *init_param = FULL_SLAVE_INIT; |
| 586 | |
| 587 | postcode(I386_INIT_SLAVE); |
| 588 | |
| 589 | if (!fast_restart) { |
| 590 | /* Ensure that caching and write-through are enabled */ |
| 591 | set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); |
| 592 | |
| 593 | DBG("i386_init_slave() CPU%d: phys (%d) active.\n" , |
| 594 | get_cpu_number(), get_cpu_phys_number()); |
| 595 | |
| 596 | assert(!ml_get_interrupts_enabled()); |
| 597 | |
| 598 | cpu_syscall_init(current_cpu_datap()); |
| 599 | pmap_cpu_init(); |
| 600 | |
| 601 | #if CONFIG_MCA |
| 602 | mca_cpu_init(); |
| 603 | #endif |
| 604 | |
| 605 | LAPIC_INIT(); |
| 606 | lapic_configure(); |
| 607 | LAPIC_DUMP(); |
| 608 | LAPIC_CPU_MAP_DUMP(); |
| 609 | |
| 610 | init_fpu(); |
| 611 | |
| 612 | #if CONFIG_MTRR |
| 613 | mtrr_update_cpu(); |
| 614 | #endif |
| 615 | /* update CPU microcode */ |
| 616 | ucode_update_wake(); |
| 617 | } else |
| 618 | init_param = FAST_SLAVE_INIT; |
| 619 | |
| 620 | #if CONFIG_VMX |
| 621 | /* resume VT operation */ |
| 622 | vmx_resume(FALSE); |
| 623 | #endif |
| 624 | |
| 625 | #if CONFIG_MTRR |
| 626 | if (!fast_restart) |
| 627 | pat_init(); |
| 628 | #endif |
| 629 | |
| 630 | cpu_thread_init(); /* not strictly necessary */ |
| 631 | |
| 632 | cpu_init(); /* Sets cpu_running which starter cpu waits for */ |
| 633 | slave_main(init_param); |
| 634 | |
| 635 | panic("do_init_slave() returned from slave_main()" ); |
| 636 | } |
| 637 | |
| 638 | /* |
| 639 | * i386_init_slave() is called from pstart. |
| 640 | * We're in the cpu's interrupt stack with interrupts disabled. |
| 641 | * At this point we are in legacy mode. We need to switch on IA32e |
| 642 | * if the mode is set to 64-bits. |
| 643 | */ |
| 644 | void |
| 645 | i386_init_slave(void) |
| 646 | { |
| 647 | do_init_slave(FALSE); |
| 648 | } |
| 649 | |
| 650 | /* |
| 651 | * i386_init_slave_fast() is called from pmCPUHalt. |
| 652 | * We're running on the idle thread and need to fix up |
| 653 | * some accounting and get it so that the scheduler sees this |
| 654 | * CPU again. |
| 655 | */ |
| 656 | void |
| 657 | i386_init_slave_fast(void) |
| 658 | { |
| 659 | do_init_slave(TRUE); |
| 660 | } |
| 661 | |
| 662 | #include <libkern/kernel_mach_header.h> |
| 663 | |
| 664 | /* TODO: Evaluate global PTEs for the double-mapped translations */ |
| 665 | |
| 666 | uint64_t dblmap_base, dblmap_max; |
| 667 | kernel_segment_command_t *hdescseg; |
| 668 | |
| 669 | pt_entry_t *dblmapL3; |
| 670 | unsigned int dblallocs; |
| 671 | uint64_t dblmap_dist; |
| 672 | extern uint64_t idt64_hndl_table0[]; |
| 673 | |
| 674 | |
| 675 | void doublemap_init(void) { |
| 676 | dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries |
| 677 | dblallocs++; |
| 678 | |
| 679 | struct { |
| 680 | pt_entry_t entries[PTE_PER_PAGE]; |
| 681 | } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries |
| 682 | dblallocs++; |
| 683 | |
| 684 | dblmapL3[0] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0])) |
| 685 | | INTEL_PTE_VALID |
| 686 | | INTEL_PTE_WRITE; |
| 687 | |
| 688 | hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB" ); |
| 689 | |
| 690 | vm_offset_t hdescb = hdescseg->vmaddr; |
| 691 | unsigned long hdescsz = hdescseg->vmsize; |
| 692 | unsigned long hdescszr = round_page_64(hdescsz); |
| 693 | vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr; |
| 694 | |
| 695 | kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB" , "__text" ); |
| 696 | vm_offset_t thdescb = thdescsect->addr; |
| 697 | unsigned long thdescsz = thdescsect->size; |
| 698 | unsigned long thdescszr = round_page_64(thdescsz); |
| 699 | vm_offset_t thdesce = thdescb + thdescszr; |
| 700 | |
| 701 | assert((hdescb & 0xFFF) == 0); |
| 702 | /* Mirror HIB translations into the double-mapped pagetable subtree*/ |
| 703 | for(int i = 0; hdescc < hdesce; i++) { |
| 704 | struct { |
| 705 | pt_entry_t entries[PTE_PER_PAGE]; |
| 706 | } * dblmapL1 = ALLOCPAGES(1); |
| 707 | dblallocs++; |
| 708 | dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; |
| 709 | int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE); |
| 710 | for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) { |
| 711 | uint64_t template = INTEL_PTE_VALID; |
| 712 | if ((hdescc >= thdescb) && (hdescc < thdesce)) { |
| 713 | /* executable */ |
| 714 | } else { |
| 715 | template |= INTEL_PTE_WRITE | INTEL_PTE_NX ; /* Writeable, NX */ |
| 716 | } |
| 717 | dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template; |
| 718 | hdescc += PAGE_SIZE; |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; |
| 723 | |
| 724 | dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, dblmapL3, 0, 0); |
| 725 | dblmap_max = dblmap_base + hdescszr; |
| 726 | /* Calculate the double-map distance, which accounts for the current |
| 727 | * KASLR slide |
| 728 | */ |
| 729 | |
| 730 | dblmap_dist = dblmap_base - hdescb; |
| 731 | idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]); |
| 732 | idt64_hndl_table0[6] = (uint64_t)(uintptr_t)&kernel_stack_mask; |
| 733 | |
| 734 | extern cpu_data_t cpshadows[], scdatas[]; |
| 735 | uintptr_t cd1 = (uintptr_t) &cpshadows[0]; |
| 736 | uintptr_t cd2 = (uintptr_t) &scdatas[0]; |
| 737 | /* Record the displacement from the kernel's per-CPU data pointer, eventually |
| 738 | * programmed into GSBASE, to the "shadows" in the doublemapped |
| 739 | * region. These are not aliases, but separate physical allocations |
| 740 | * containing data required in the doublemapped trampolines. |
| 741 | */ |
| 742 | idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2; |
| 743 | |
| 744 | DBG("Double map base: 0x%qx\n" , dblmap_base); |
| 745 | DBG("double map idlepml4[%d]: 0x%llx\n" , KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]); |
| 746 | assert(LDTSZ > LDTSZ_MIN); |
| 747 | } |
| 748 | |
| 749 | vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t); |
| 750 | |
| 751 | #include <i386/pmap_internal.h> |
| 752 | |
| 753 | /* Use of this routine is expected to be synchronized by callers |
| 754 | * Creates non-executable aliases. |
| 755 | */ |
| 756 | vm_offset_t dyn_dblmap(vm_offset_t cva, vm_offset_t sz) { |
| 757 | vm_offset_t ava = dblmap_max; |
| 758 | |
| 759 | assert((sz & PAGE_MASK) == 0); |
| 760 | assert(cva != 0); |
| 761 | |
| 762 | pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP); |
| 763 | dblmap_max += sz; |
| 764 | return (ava - cva); |
| 765 | } |
| 766 | /* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect |
| 767 | * control to the double-mapped interrupt vectors. The IDTR proper will be |
| 768 | * programmed via cpu_desc_load() |
| 769 | */ |
| 770 | void idt64_remap(void) { |
| 771 | for (int i = 0; i < IDTSZ; i++) { |
| 772 | master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64); |
| 773 | } |
| 774 | } |
| 775 | |