1 | /* |
2 | * Copyright (c) 2003-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | |
57 | |
58 | #include <mach/i386/vm_param.h> |
59 | |
60 | #include <string.h> |
61 | #include <mach/vm_param.h> |
62 | #include <mach/vm_prot.h> |
63 | #include <mach/machine.h> |
64 | #include <mach/time_value.h> |
65 | #include <kern/spl.h> |
66 | #include <kern/assert.h> |
67 | #include <kern/debug.h> |
68 | #include <kern/misc_protos.h> |
69 | #include <kern/startup.h> |
70 | #include <kern/clock.h> |
71 | #include <kern/pms.h> |
72 | #include <kern/xpr.h> |
73 | #include <kern/cpu_data.h> |
74 | #include <kern/processor.h> |
75 | #include <sys/kdebug.h> |
76 | #include <console/serial_protos.h> |
77 | #include <vm/vm_page.h> |
78 | #include <vm/pmap.h> |
79 | #include <vm/vm_kern.h> |
80 | #include <machine/pal_routines.h> |
81 | #include <i386/fpu.h> |
82 | #include <i386/pmap.h> |
83 | #include <i386/misc_protos.h> |
84 | #include <i386/cpu_threads.h> |
85 | #include <i386/cpuid.h> |
86 | #include <i386/lapic.h> |
87 | #include <i386/mp.h> |
88 | #include <i386/mp_desc.h> |
89 | #if CONFIG_MTRR |
90 | #include <i386/mtrr.h> |
91 | #endif |
92 | #include <i386/machine_routines.h> |
93 | #if CONFIG_MCA |
94 | #include <i386/machine_check.h> |
95 | #endif |
96 | #include <i386/ucode.h> |
97 | #include <i386/postcode.h> |
98 | #include <i386/Diagnostics.h> |
99 | #include <i386/pmCPU.h> |
100 | #include <i386/tsc.h> |
101 | #include <i386/locks.h> /* LcksOpts */ |
102 | #if DEBUG |
103 | #include <machine/pal_routines.h> |
104 | #endif |
105 | |
106 | #if MONOTONIC |
107 | #include <kern/monotonic.h> |
108 | #endif /* MONOTONIC */ |
109 | |
110 | #include <san/kasan.h> |
111 | |
112 | #if DEBUG |
113 | #define DBG(x...) kprintf(x) |
114 | #else |
115 | #define DBG(x...) |
116 | #endif |
117 | |
118 | int debug_task; |
119 | |
120 | static boot_args *kernelBootArgs; |
121 | |
122 | extern int disableConsoleOutput; |
123 | extern const char version[]; |
124 | extern const char version_variant[]; |
125 | extern int nx_enabled; |
126 | |
127 | /* |
128 | * Set initial values so that ml_phys_* routines can use the booter's ID mapping |
129 | * to touch physical space before the kernel's physical aperture exists. |
130 | */ |
131 | uint64_t physmap_base = 0; |
132 | uint64_t physmap_max = 4*GB; |
133 | |
134 | pd_entry_t *KPTphys; |
135 | pd_entry_t *IdlePTD; |
136 | pdpt_entry_t *IdlePDPT; |
137 | pml4_entry_t *IdlePML4; |
138 | |
139 | char *physfree; |
140 | void idt64_remap(void); |
141 | |
142 | /* |
143 | * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init() |
144 | * due to the mutation of physfree. |
145 | */ |
146 | static void * |
147 | ALLOCPAGES(int npages) |
148 | { |
149 | uintptr_t tmp = (uintptr_t)physfree; |
150 | bzero(physfree, npages * PAGE_SIZE); |
151 | physfree += npages * PAGE_SIZE; |
152 | tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; |
153 | return (void *)tmp; |
154 | } |
155 | |
156 | static void |
157 | fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) |
158 | { |
159 | int i; |
160 | for (i=0; i<count; i++) { |
161 | base[index] = src | prot | INTEL_PTE_VALID; |
162 | src += PAGE_SIZE; |
163 | index++; |
164 | } |
165 | } |
166 | |
167 | extern pmap_paddr_t first_avail; |
168 | |
169 | int break_kprintf = 0; |
170 | |
171 | uint64_t |
172 | x86_64_pre_sleep(void) |
173 | { |
174 | IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX]; |
175 | uint64_t oldcr3 = get_cr3_raw(); |
176 | set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4)); |
177 | return oldcr3; |
178 | } |
179 | |
180 | void |
181 | x86_64_post_sleep(uint64_t new_cr3) |
182 | { |
183 | IdlePML4[0] = 0; |
184 | set_cr3_raw((uint32_t) new_cr3); |
185 | } |
186 | |
187 | |
188 | |
189 | |
190 | // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address |
191 | // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account |
192 | // the PCI hole (which is less 4GB but not more). |
193 | |
194 | /* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for |
195 | * randomisation |
196 | */ |
197 | extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1]; |
198 | |
199 | static void |
200 | physmap_init(void) |
201 | { |
202 | pt_entry_t *physmapL3 = ALLOCPAGES(1); |
203 | struct { |
204 | pt_entry_t entries[PTE_PER_PAGE]; |
205 | } * physmapL2 = ALLOCPAGES(NPHYSMAP); |
206 | |
207 | uint64_t i; |
208 | uint8_t phys_random_L3 = early_random() & 0xFF; |
209 | |
210 | /* We assume NX support. Mark all levels of the PHYSMAP NX |
211 | * to avoid granting executability via a single bit flip. |
212 | */ |
213 | #if DEVELOPMENT || DEBUG |
214 | uint32_t reg[4]; |
215 | do_cpuid(0x80000000, reg); |
216 | if (reg[eax] >= 0x80000001) { |
217 | do_cpuid(0x80000001, reg); |
218 | assert(reg[edx] & CPUID_EXTFEATURE_XD); |
219 | } |
220 | #endif /* DEVELOPMENT || DEBUG */ |
221 | |
222 | for(i = 0; i < NPHYSMAP; i++) { |
223 | physmapL3[i + phys_random_L3] = |
224 | ((uintptr_t)ID_MAP_VTOP(&physmapL2[i])) |
225 | | INTEL_PTE_VALID |
226 | | INTEL_PTE_NX |
227 | | INTEL_PTE_WRITE; |
228 | |
229 | uint64_t j; |
230 | for(j = 0; j < PTE_PER_PAGE; j++) { |
231 | physmapL2[i].entries[j] = |
232 | ((i * PTE_PER_PAGE + j) << PDSHIFT) |
233 | | INTEL_PTE_PS |
234 | | INTEL_PTE_VALID |
235 | | INTEL_PTE_NX |
236 | | INTEL_PTE_WRITE; |
237 | } |
238 | } |
239 | |
240 | IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] = |
241 | ((uintptr_t)ID_MAP_VTOP(physmapL3)) |
242 | | INTEL_PTE_VALID |
243 | | INTEL_PTE_NX |
244 | | INTEL_PTE_WRITE; |
245 | |
246 | physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0); |
247 | physmap_max = physmap_base + NPHYSMAP * GB; |
248 | DBG("Physical address map base: 0x%qx\n" , physmap_base); |
249 | DBG("Physical map idlepml4[%d]: 0x%llx\n" , |
250 | KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]); |
251 | } |
252 | |
253 | void doublemap_init(void); |
254 | |
255 | static void |
256 | Idle_PTs_init(void) |
257 | { |
258 | /* Allocate the "idle" kernel page tables: */ |
259 | KPTphys = ALLOCPAGES(NKPT); /* level 1 */ |
260 | IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */ |
261 | IdlePDPT = ALLOCPAGES(1); /* level 3 */ |
262 | IdlePML4 = ALLOCPAGES(1); /* level 4 */ |
263 | |
264 | // Fill the lowest level with everything up to physfree |
265 | fillkpt(KPTphys, |
266 | INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); |
267 | |
268 | /* IdlePTD */ |
269 | fillkpt(IdlePTD, |
270 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); |
271 | |
272 | // IdlePDPT entries |
273 | fillkpt(IdlePDPT, |
274 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD); |
275 | |
276 | // IdlePML4 single entry for kernel space. |
277 | fillkpt(IdlePML4 + KERNEL_PML4_INDEX, |
278 | INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1); |
279 | |
280 | postcode(VSTART_PHYSMAP_INIT); |
281 | |
282 | physmap_init(); |
283 | doublemap_init(); |
284 | idt64_remap(); |
285 | |
286 | postcode(VSTART_SET_CR3); |
287 | |
288 | // Switch to the page tables.. |
289 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); |
290 | |
291 | } |
292 | |
293 | extern void vstart_trap_handler; |
294 | |
295 | #define BOOT_TRAP_VECTOR(t) \ |
296 | [t] = { \ |
297 | (uintptr_t) &vstart_trap_handler, \ |
298 | KERNEL64_CS, \ |
299 | 0, \ |
300 | ACC_P|ACC_PL_K|ACC_INTR_GATE, \ |
301 | 0 \ |
302 | }, |
303 | |
304 | /* Recursive macro to iterate 0..31 */ |
305 | #define L0(x,n) x(n) |
306 | #define L1(x,n) L0(x,n-1) L0(x,n) |
307 | #define L2(x,n) L1(x,n-2) L1(x,n) |
308 | #define L3(x,n) L2(x,n-4) L2(x,n) |
309 | #define L4(x,n) L3(x,n-8) L3(x,n) |
310 | #define L5(x,n) L4(x,n-16) L4(x,n) |
311 | #define FOR_0_TO_31(x) L5(x,31) |
312 | |
313 | /* |
314 | * Bootstrap IDT. Active only during early startup. |
315 | * Only the trap vectors are defined since interrupts are masked. |
316 | * All traps point to a common handler. |
317 | */ |
318 | struct fake_descriptor64 master_boot_idt64[IDTSZ] |
319 | __attribute__((section("__HIB,__desc" ))) |
320 | __attribute__((aligned(PAGE_SIZE))) = { |
321 | FOR_0_TO_31(BOOT_TRAP_VECTOR) |
322 | }; |
323 | |
324 | static void |
325 | vstart_idt_init(void) |
326 | { |
327 | x86_64_desc_register_t vstart_idt = { |
328 | sizeof(master_boot_idt64), |
329 | master_boot_idt64 }; |
330 | |
331 | fix_desc64(master_boot_idt64, 32); |
332 | lidt((void *)&vstart_idt); |
333 | } |
334 | |
335 | /* |
336 | * vstart() is called in the natural mode (64bit for K64, 32 for K32) |
337 | * on a set of bootstrap pagetables which use large, 2MB pages to map |
338 | * all of physical memory in both. See idle_pt.c for details. |
339 | * |
340 | * In K64 this identity mapping is mirrored the top and bottom 512GB |
341 | * slots of PML4. |
342 | * |
343 | * The bootstrap processor called with argument boot_args_start pointing to |
344 | * the boot-args block. The kernel's (4K page) page tables are allocated and |
345 | * initialized before switching to these. |
346 | * |
347 | * Non-bootstrap processors are called with argument boot_args_start NULL. |
348 | * These processors switch immediately to the existing kernel page tables. |
349 | */ |
350 | __attribute__((noreturn)) |
351 | void |
352 | vstart(vm_offset_t boot_args_start) |
353 | { |
354 | boolean_t is_boot_cpu = !(boot_args_start == 0); |
355 | int cpu = 0; |
356 | uint32_t lphysfree; |
357 | |
358 | postcode(VSTART_ENTRY); |
359 | |
360 | if (is_boot_cpu) { |
361 | /* |
362 | * Set-up temporary trap handlers during page-table set-up. |
363 | */ |
364 | vstart_idt_init(); |
365 | postcode(VSTART_IDT_INIT); |
366 | |
367 | /* |
368 | * Get startup parameters. |
369 | */ |
370 | kernelBootArgs = (boot_args *)boot_args_start; |
371 | lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize; |
372 | physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1)); |
373 | |
374 | #if DEVELOPMENT || DEBUG |
375 | pal_serial_init(); |
376 | #endif |
377 | DBG("revision 0x%x\n" , kernelBootArgs->Revision); |
378 | DBG("version 0x%x\n" , kernelBootArgs->Version); |
379 | DBG("command line %s\n" , kernelBootArgs->CommandLine); |
380 | DBG("memory map 0x%x\n" , kernelBootArgs->MemoryMap); |
381 | DBG("memory map sz 0x%x\n" , kernelBootArgs->MemoryMapSize); |
382 | DBG("kaddr 0x%x\n" , kernelBootArgs->kaddr); |
383 | DBG("ksize 0x%x\n" , kernelBootArgs->ksize); |
384 | DBG("physfree %p\n" , physfree); |
385 | DBG("bootargs: %p, &ksize: %p &kaddr: %p\n" , |
386 | kernelBootArgs, |
387 | &kernelBootArgs->ksize, |
388 | &kernelBootArgs->kaddr); |
389 | DBG("SMBIOS mem sz 0x%llx\n" , kernelBootArgs->PhysicalMemorySize); |
390 | |
391 | /* |
392 | * Setup boot args given the physical start address. |
393 | * Note: PE_init_platform needs to be called before Idle_PTs_init |
394 | * because access to the DeviceTree is required to read the |
395 | * random seed before generating a random physical map slide. |
396 | */ |
397 | kernelBootArgs = (boot_args *) |
398 | ml_static_ptovirt(boot_args_start); |
399 | DBG("i386_init(0x%lx) kernelBootArgs=%p\n" , |
400 | (unsigned long)boot_args_start, kernelBootArgs); |
401 | |
402 | #if KASAN |
403 | kasan_reserve_memory(kernelBootArgs); |
404 | #endif |
405 | |
406 | PE_init_platform(FALSE, kernelBootArgs); |
407 | postcode(PE_INIT_PLATFORM_D); |
408 | |
409 | Idle_PTs_init(); |
410 | postcode(VSTART_IDLE_PTS_INIT); |
411 | |
412 | #if KASAN |
413 | /* Init kasan and map whatever was stolen from physfree */ |
414 | kasan_init(); |
415 | kasan_notify_stolen((uintptr_t)ml_static_ptovirt((vm_offset_t)physfree)); |
416 | #endif |
417 | |
418 | #if MONOTONIC |
419 | mt_early_init(); |
420 | #endif /* MONOTONIC */ |
421 | |
422 | first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); |
423 | |
424 | cpu_data_alloc(TRUE); |
425 | |
426 | cpu_desc_init(cpu_datap(0)); |
427 | postcode(VSTART_CPU_DESC_INIT); |
428 | cpu_desc_load(cpu_datap(0)); |
429 | |
430 | postcode(VSTART_CPU_MODE_INIT); |
431 | cpu_syscall_init(cpu_datap(0)); /* cpu_syscall_init() will be |
432 | * invoked on the APs |
433 | * via i386_init_slave() |
434 | */ |
435 | } else { |
436 | /* Switch to kernel's page tables (from the Boot PTs) */ |
437 | set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); |
438 | /* Find our logical cpu number */ |
439 | cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; |
440 | DBG("CPU: %d, GSBASE initial value: 0x%llx\n" , cpu, rdmsr64(MSR_IA32_GS_BASE)); |
441 | cpu_desc_load(cpu_datap(cpu)); |
442 | } |
443 | |
444 | postcode(VSTART_EXIT); |
445 | x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init |
446 | : (uintptr_t) i386_init_slave, |
447 | cpu_datap(cpu)->cpu_int_stack_top); |
448 | } |
449 | |
450 | void |
451 | pstate_trace(void) |
452 | { |
453 | } |
454 | |
455 | /* |
456 | * Cpu initialization. Running virtual, but without MACH VM |
457 | * set up. |
458 | */ |
459 | void |
460 | i386_init(void) |
461 | { |
462 | unsigned int maxmem; |
463 | uint64_t maxmemtouse; |
464 | unsigned int cpus = 0; |
465 | boolean_t fidn; |
466 | boolean_t IA32e = TRUE; |
467 | |
468 | postcode(I386_INIT_ENTRY); |
469 | |
470 | pal_i386_init(); |
471 | tsc_init(); |
472 | rtclock_early_init(); /* mach_absolute_time() now functionsl */ |
473 | |
474 | kernel_debug_string_early("i386_init" ); |
475 | pstate_trace(); |
476 | |
477 | #if CONFIG_MCA |
478 | /* Initialize machine-check handling */ |
479 | mca_cpu_init(); |
480 | #endif |
481 | |
482 | master_cpu = 0; |
483 | cpu_init(); |
484 | |
485 | postcode(CPU_INIT_D); |
486 | |
487 | printf_init(); /* Init this in case we need debugger */ |
488 | panic_init(); /* Init this in case we need debugger */ |
489 | |
490 | /* setup debugging output if one has been chosen */ |
491 | kernel_debug_string_early("PE_init_kprintf" ); |
492 | PE_init_kprintf(FALSE); |
493 | |
494 | kernel_debug_string_early("kernel_early_bootstrap" ); |
495 | kernel_early_bootstrap(); |
496 | |
497 | if (!PE_parse_boot_argn("diag" , &dgWork.dgFlags, sizeof (dgWork.dgFlags))) |
498 | dgWork.dgFlags = 0; |
499 | |
500 | serialmode = 0; |
501 | if (PE_parse_boot_argn("serial" , &serialmode, sizeof(serialmode))) { |
502 | /* We want a serial keyboard and/or console */ |
503 | kprintf("Serial mode specified: %08X\n" , serialmode); |
504 | int force_sync = serialmode & SERIALMODE_SYNCDRAIN; |
505 | if (force_sync || PE_parse_boot_argn("drain_uart_sync" , &force_sync, sizeof(force_sync))) { |
506 | if (force_sync) { |
507 | serialmode |= SERIALMODE_SYNCDRAIN; |
508 | kprintf( |
509 | "WARNING: Forcing uart driver to output synchronously." |
510 | "printf()s/IOLogs will impact kernel performance.\n" |
511 | "You are advised to avoid using 'drain_uart_sync' boot-arg.\n" ); |
512 | } |
513 | } |
514 | } |
515 | if (serialmode & SERIALMODE_OUTPUT) { |
516 | (void)switch_to_serial_console(); |
517 | disableConsoleOutput = FALSE; /* Allow printfs to happen */ |
518 | } |
519 | |
520 | /* setup console output */ |
521 | kernel_debug_string_early("PE_init_printf" ); |
522 | PE_init_printf(FALSE); |
523 | |
524 | kprintf("version_variant = %s\n" , version_variant); |
525 | kprintf("version = %s\n" , version); |
526 | |
527 | if (!PE_parse_boot_argn("maxmem" , &maxmem, sizeof (maxmem))) |
528 | maxmemtouse = 0; |
529 | else |
530 | maxmemtouse = ((uint64_t)maxmem) * MB; |
531 | |
532 | if (PE_parse_boot_argn("cpus" , &cpus, sizeof (cpus))) { |
533 | if ((0 < cpus) && (cpus < max_ncpus)) |
534 | max_ncpus = cpus; |
535 | } |
536 | |
537 | /* |
538 | * debug support for > 4G systems |
539 | */ |
540 | PE_parse_boot_argn("himemory_mode" , &vm_himemory_mode, sizeof (vm_himemory_mode)); |
541 | if (vm_himemory_mode != 0) |
542 | kprintf("himemory_mode: %d\n" , vm_himemory_mode); |
543 | |
544 | if (!PE_parse_boot_argn("immediate_NMI" , &fidn, sizeof (fidn))) |
545 | force_immediate_debugger_NMI = FALSE; |
546 | else |
547 | force_immediate_debugger_NMI = fidn; |
548 | |
549 | #if DEBUG |
550 | nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold); |
551 | #endif |
552 | PE_parse_boot_argn("urgency_notification_abstime" , |
553 | &urgency_notification_assert_abstime_threshold, |
554 | sizeof(urgency_notification_assert_abstime_threshold)); |
555 | |
556 | if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) |
557 | nx_enabled = 0; |
558 | |
559 | /* |
560 | * VM initialization, after this we're using page tables... |
561 | * Thn maximum number of cpus must be set beforehand. |
562 | */ |
563 | kernel_debug_string_early("i386_vm_init" ); |
564 | i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); |
565 | |
566 | /* create the console for verbose or pretty mode */ |
567 | /* Note: doing this prior to tsc_init() allows for graceful panic! */ |
568 | PE_init_platform(TRUE, kernelBootArgs); |
569 | PE_create_console(); |
570 | |
571 | kernel_debug_string_early("power_management_init" ); |
572 | power_management_init(); |
573 | processor_bootstrap(); |
574 | thread_bootstrap(); |
575 | |
576 | pstate_trace(); |
577 | kernel_debug_string_early("machine_startup" ); |
578 | machine_startup(); |
579 | pstate_trace(); |
580 | } |
581 | |
582 | static void |
583 | do_init_slave(boolean_t fast_restart) |
584 | { |
585 | void *init_param = FULL_SLAVE_INIT; |
586 | |
587 | postcode(I386_INIT_SLAVE); |
588 | |
589 | if (!fast_restart) { |
590 | /* Ensure that caching and write-through are enabled */ |
591 | set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); |
592 | |
593 | DBG("i386_init_slave() CPU%d: phys (%d) active.\n" , |
594 | get_cpu_number(), get_cpu_phys_number()); |
595 | |
596 | assert(!ml_get_interrupts_enabled()); |
597 | |
598 | cpu_syscall_init(current_cpu_datap()); |
599 | pmap_cpu_init(); |
600 | |
601 | #if CONFIG_MCA |
602 | mca_cpu_init(); |
603 | #endif |
604 | |
605 | LAPIC_INIT(); |
606 | lapic_configure(); |
607 | LAPIC_DUMP(); |
608 | LAPIC_CPU_MAP_DUMP(); |
609 | |
610 | init_fpu(); |
611 | |
612 | #if CONFIG_MTRR |
613 | mtrr_update_cpu(); |
614 | #endif |
615 | /* update CPU microcode */ |
616 | ucode_update_wake(); |
617 | } else |
618 | init_param = FAST_SLAVE_INIT; |
619 | |
620 | #if CONFIG_VMX |
621 | /* resume VT operation */ |
622 | vmx_resume(FALSE); |
623 | #endif |
624 | |
625 | #if CONFIG_MTRR |
626 | if (!fast_restart) |
627 | pat_init(); |
628 | #endif |
629 | |
630 | cpu_thread_init(); /* not strictly necessary */ |
631 | |
632 | cpu_init(); /* Sets cpu_running which starter cpu waits for */ |
633 | slave_main(init_param); |
634 | |
635 | panic("do_init_slave() returned from slave_main()" ); |
636 | } |
637 | |
638 | /* |
639 | * i386_init_slave() is called from pstart. |
640 | * We're in the cpu's interrupt stack with interrupts disabled. |
641 | * At this point we are in legacy mode. We need to switch on IA32e |
642 | * if the mode is set to 64-bits. |
643 | */ |
644 | void |
645 | i386_init_slave(void) |
646 | { |
647 | do_init_slave(FALSE); |
648 | } |
649 | |
650 | /* |
651 | * i386_init_slave_fast() is called from pmCPUHalt. |
652 | * We're running on the idle thread and need to fix up |
653 | * some accounting and get it so that the scheduler sees this |
654 | * CPU again. |
655 | */ |
656 | void |
657 | i386_init_slave_fast(void) |
658 | { |
659 | do_init_slave(TRUE); |
660 | } |
661 | |
662 | #include <libkern/kernel_mach_header.h> |
663 | |
664 | /* TODO: Evaluate global PTEs for the double-mapped translations */ |
665 | |
666 | uint64_t dblmap_base, dblmap_max; |
667 | kernel_segment_command_t *hdescseg; |
668 | |
669 | pt_entry_t *dblmapL3; |
670 | unsigned int dblallocs; |
671 | uint64_t dblmap_dist; |
672 | extern uint64_t idt64_hndl_table0[]; |
673 | |
674 | |
675 | void doublemap_init(void) { |
676 | dblmapL3 = ALLOCPAGES(1); // for 512 1GiB entries |
677 | dblallocs++; |
678 | |
679 | struct { |
680 | pt_entry_t entries[PTE_PER_PAGE]; |
681 | } * dblmapL2 = ALLOCPAGES(1); // for 512 2MiB entries |
682 | dblallocs++; |
683 | |
684 | dblmapL3[0] = ((uintptr_t)ID_MAP_VTOP(&dblmapL2[0])) |
685 | | INTEL_PTE_VALID |
686 | | INTEL_PTE_WRITE; |
687 | |
688 | hdescseg = getsegbynamefromheader(&_mh_execute_header, "__HIB" ); |
689 | |
690 | vm_offset_t hdescb = hdescseg->vmaddr; |
691 | unsigned long hdescsz = hdescseg->vmsize; |
692 | unsigned long hdescszr = round_page_64(hdescsz); |
693 | vm_offset_t hdescc = hdescb, hdesce = hdescb + hdescszr; |
694 | |
695 | kernel_section_t *thdescsect = getsectbynamefromheader(&_mh_execute_header, "__HIB" , "__text" ); |
696 | vm_offset_t thdescb = thdescsect->addr; |
697 | unsigned long thdescsz = thdescsect->size; |
698 | unsigned long thdescszr = round_page_64(thdescsz); |
699 | vm_offset_t thdesce = thdescb + thdescszr; |
700 | |
701 | assert((hdescb & 0xFFF) == 0); |
702 | /* Mirror HIB translations into the double-mapped pagetable subtree*/ |
703 | for(int i = 0; hdescc < hdesce; i++) { |
704 | struct { |
705 | pt_entry_t entries[PTE_PER_PAGE]; |
706 | } * dblmapL1 = ALLOCPAGES(1); |
707 | dblallocs++; |
708 | dblmapL2[0].entries[i] = ((uintptr_t)ID_MAP_VTOP(&dblmapL1[0])) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; |
709 | int hdescn = (int) ((hdesce - hdescc) / PAGE_SIZE); |
710 | for (int j = 0; j < MIN(PTE_PER_PAGE, hdescn); j++) { |
711 | uint64_t template = INTEL_PTE_VALID; |
712 | if ((hdescc >= thdescb) && (hdescc < thdesce)) { |
713 | /* executable */ |
714 | } else { |
715 | template |= INTEL_PTE_WRITE | INTEL_PTE_NX ; /* Writeable, NX */ |
716 | } |
717 | dblmapL1[0].entries[j] = ((uintptr_t)ID_MAP_VTOP(hdescc)) | template; |
718 | hdescc += PAGE_SIZE; |
719 | } |
720 | } |
721 | |
722 | IdlePML4[KERNEL_DBLMAP_PML4_INDEX] = ((uintptr_t)ID_MAP_VTOP(dblmapL3)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_REF; |
723 | |
724 | dblmap_base = KVADDR(KERNEL_DBLMAP_PML4_INDEX, dblmapL3, 0, 0); |
725 | dblmap_max = dblmap_base + hdescszr; |
726 | /* Calculate the double-map distance, which accounts for the current |
727 | * KASLR slide |
728 | */ |
729 | |
730 | dblmap_dist = dblmap_base - hdescb; |
731 | idt64_hndl_table0[1] = DBLMAP(idt64_hndl_table0[1]); |
732 | idt64_hndl_table0[6] = (uint64_t)(uintptr_t)&kernel_stack_mask; |
733 | |
734 | extern cpu_data_t cpshadows[], scdatas[]; |
735 | uintptr_t cd1 = (uintptr_t) &cpshadows[0]; |
736 | uintptr_t cd2 = (uintptr_t) &scdatas[0]; |
737 | /* Record the displacement from the kernel's per-CPU data pointer, eventually |
738 | * programmed into GSBASE, to the "shadows" in the doublemapped |
739 | * region. These are not aliases, but separate physical allocations |
740 | * containing data required in the doublemapped trampolines. |
741 | */ |
742 | idt64_hndl_table0[2] = dblmap_dist + cd1 - cd2; |
743 | |
744 | DBG("Double map base: 0x%qx\n" , dblmap_base); |
745 | DBG("double map idlepml4[%d]: 0x%llx\n" , KERNEL_DBLMAP_PML4_INDEX, IdlePML4[KERNEL_DBLMAP_PML4_INDEX]); |
746 | assert(LDTSZ > LDTSZ_MIN); |
747 | } |
748 | |
749 | vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t); |
750 | |
751 | #include <i386/pmap_internal.h> |
752 | |
753 | /* Use of this routine is expected to be synchronized by callers |
754 | * Creates non-executable aliases. |
755 | */ |
756 | vm_offset_t dyn_dblmap(vm_offset_t cva, vm_offset_t sz) { |
757 | vm_offset_t ava = dblmap_max; |
758 | |
759 | assert((sz & PAGE_MASK) == 0); |
760 | assert(cva != 0); |
761 | |
762 | pmap_alias(ava, cva, cva + sz, VM_PROT_READ | VM_PROT_WRITE, PMAP_EXPAND_OPTIONS_ALIASMAP); |
763 | dblmap_max += sz; |
764 | return (ava - cva); |
765 | } |
766 | /* Adjust offsets interior to the bootstrap interrupt descriptor table to redirect |
767 | * control to the double-mapped interrupt vectors. The IDTR proper will be |
768 | * programmed via cpu_desc_load() |
769 | */ |
770 | void idt64_remap(void) { |
771 | for (int i = 0; i < IDTSZ; i++) { |
772 | master_idt64[i].offset64 = DBLMAP(master_idt64[i].offset64); |
773 | } |
774 | } |
775 | |