1 | /* |
2 | * Copyright (c) 2000-2012 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <i386/machine_routines.h> |
30 | #include <i386/io_map_entries.h> |
31 | #include <i386/cpuid.h> |
32 | #include <i386/fpu.h> |
33 | #include <mach/processor.h> |
34 | #include <kern/processor.h> |
35 | #include <kern/machine.h> |
36 | |
37 | #include <kern/cpu_number.h> |
38 | #include <kern/thread.h> |
39 | #include <kern/thread_call.h> |
40 | #include <kern/policy_internal.h> |
41 | |
42 | #include <prng/random.h> |
43 | #include <i386/machine_cpu.h> |
44 | #include <i386/lapic.h> |
45 | #include <i386/bit_routines.h> |
46 | #include <i386/mp_events.h> |
47 | #include <i386/pmCPU.h> |
48 | #include <i386/trap.h> |
49 | #include <i386/tsc.h> |
50 | #include <i386/cpu_threads.h> |
51 | #include <i386/proc_reg.h> |
52 | #include <mach/vm_param.h> |
53 | #include <i386/pmap.h> |
54 | #include <i386/pmap_internal.h> |
55 | #include <i386/misc_protos.h> |
56 | #include <kern/timer_queue.h> |
57 | #if KPC |
58 | #include <kern/kpc.h> |
59 | #endif |
60 | #include <architecture/i386/pio.h> |
61 | #include <i386/cpu_data.h> |
62 | #if DEBUG |
63 | #define DBG(x...) kprintf("DBG: " x) |
64 | #else |
65 | #define DBG(x...) |
66 | #endif |
67 | |
68 | #if MONOTONIC |
69 | #include <kern/monotonic.h> |
70 | #endif /* MONOTONIC */ |
71 | |
72 | extern void wakeup(void *); |
73 | |
74 | static int max_cpus_initialized = 0; |
75 | |
76 | uint64_t LockTimeOut; |
77 | uint64_t TLBTimeOut; |
78 | uint64_t LockTimeOutTSC; |
79 | uint32_t LockTimeOutUsec; |
80 | uint64_t MutexSpin; |
81 | uint64_t LastDebuggerEntryAllowance; |
82 | uint64_t delay_spin_threshold; |
83 | |
84 | extern uint64_t panic_restart_timeout; |
85 | |
86 | boolean_t virtualized = FALSE; |
87 | |
88 | decl_simple_lock_data(static, ml_timer_evaluation_slock); |
89 | uint32_t ml_timer_eager_evaluations; |
90 | uint64_t ml_timer_eager_evaluation_max; |
91 | static boolean_t ml_timer_evaluation_in_progress = FALSE; |
92 | |
93 | |
94 | #define MAX_CPUS_SET 0x1 |
95 | #define MAX_CPUS_WAIT 0x2 |
96 | |
97 | /* IO memory map services */ |
98 | |
99 | /* Map memory map IO space */ |
100 | vm_offset_t ml_io_map( |
101 | vm_offset_t phys_addr, |
102 | vm_size_t size) |
103 | { |
104 | return(io_map(phys_addr,size,VM_WIMG_IO)); |
105 | } |
106 | |
107 | /* boot memory allocation */ |
108 | vm_offset_t ml_static_malloc( |
109 | __unused vm_size_t size) |
110 | { |
111 | return((vm_offset_t)NULL); |
112 | } |
113 | |
114 | |
115 | void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size) |
116 | { |
117 | *phys_addr = 0; |
118 | *size = 0; |
119 | } |
120 | |
121 | |
122 | vm_offset_t |
123 | ml_static_ptovirt( |
124 | vm_offset_t paddr) |
125 | { |
126 | #if defined(__x86_64__) |
127 | return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS); |
128 | #else |
129 | return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS); |
130 | #endif |
131 | } |
132 | |
133 | vm_offset_t |
134 | ml_static_slide( |
135 | vm_offset_t vaddr) |
136 | { |
137 | return VM_KERNEL_SLIDE(vaddr); |
138 | } |
139 | |
140 | vm_offset_t |
141 | ml_static_unslide( |
142 | vm_offset_t vaddr) |
143 | { |
144 | return VM_KERNEL_UNSLIDE(vaddr); |
145 | } |
146 | |
147 | |
148 | /* |
149 | * Routine: ml_static_mfree |
150 | * Function: |
151 | */ |
152 | void |
153 | ml_static_mfree( |
154 | vm_offset_t vaddr, |
155 | vm_size_t size) |
156 | { |
157 | addr64_t vaddr_cur; |
158 | ppnum_t ppn; |
159 | uint32_t freed_pages = 0; |
160 | assert(vaddr >= VM_MIN_KERNEL_ADDRESS); |
161 | |
162 | assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */ |
163 | |
164 | for (vaddr_cur = vaddr; |
165 | vaddr_cur < round_page_64(vaddr+size); |
166 | vaddr_cur += PAGE_SIZE) { |
167 | ppn = pmap_find_phys(kernel_pmap, vaddr_cur); |
168 | if (ppn != (vm_offset_t)NULL) { |
169 | kernel_pmap->stats.resident_count++; |
170 | if (kernel_pmap->stats.resident_count > |
171 | kernel_pmap->stats.resident_max) { |
172 | kernel_pmap->stats.resident_max = |
173 | kernel_pmap->stats.resident_count; |
174 | } |
175 | pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE); |
176 | assert(pmap_valid_page(ppn)); |
177 | if (IS_MANAGED_PAGE(ppn)) { |
178 | vm_page_create(ppn,(ppn+1)); |
179 | freed_pages++; |
180 | } |
181 | } |
182 | } |
183 | vm_page_lockspin_queues(); |
184 | vm_page_wire_count -= freed_pages; |
185 | vm_page_wire_count_initial -= freed_pages; |
186 | vm_page_unlock_queues(); |
187 | |
188 | #if DEBUG |
189 | kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n" , freed_pages, (void *)vaddr, (uint64_t)size, ppn); |
190 | #endif |
191 | } |
192 | |
193 | |
194 | /* virtual to physical on wired pages */ |
195 | vm_offset_t ml_vtophys( |
196 | vm_offset_t vaddr) |
197 | { |
198 | return (vm_offset_t)kvtophys(vaddr); |
199 | } |
200 | |
201 | /* |
202 | * Routine: ml_nofault_copy |
203 | * Function: Perform a physical mode copy if the source and |
204 | * destination have valid translations in the kernel pmap. |
205 | * If translations are present, they are assumed to |
206 | * be wired; i.e. no attempt is made to guarantee that the |
207 | * translations obtained remained valid for |
208 | * the duration of the copy process. |
209 | */ |
210 | |
211 | vm_size_t ml_nofault_copy( |
212 | vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size) |
213 | { |
214 | addr64_t cur_phys_dst, cur_phys_src; |
215 | uint32_t count, nbytes = 0; |
216 | |
217 | while (size > 0) { |
218 | if (!(cur_phys_src = kvtophys(virtsrc))) |
219 | break; |
220 | if (!(cur_phys_dst = kvtophys(virtdst))) |
221 | break; |
222 | if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) |
223 | break; |
224 | count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); |
225 | if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) |
226 | count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK)); |
227 | if (count > size) |
228 | count = (uint32_t)size; |
229 | |
230 | bcopy_phys(cur_phys_src, cur_phys_dst, count); |
231 | |
232 | nbytes += count; |
233 | virtsrc += count; |
234 | virtdst += count; |
235 | size -= count; |
236 | } |
237 | |
238 | return nbytes; |
239 | } |
240 | |
241 | /* |
242 | * Routine: ml_validate_nofault |
243 | * Function: Validate that ths address range has a valid translations |
244 | * in the kernel pmap. If translations are present, they are |
245 | * assumed to be wired; i.e. no attempt is made to guarantee |
246 | * that the translation persist after the check. |
247 | * Returns: TRUE if the range is mapped and will not cause a fault, |
248 | * FALSE otherwise. |
249 | */ |
250 | |
251 | boolean_t ml_validate_nofault( |
252 | vm_offset_t virtsrc, vm_size_t size) |
253 | { |
254 | addr64_t cur_phys_src; |
255 | uint32_t count; |
256 | |
257 | while (size > 0) { |
258 | if (!(cur_phys_src = kvtophys(virtsrc))) |
259 | return FALSE; |
260 | if (!pmap_valid_page(i386_btop(cur_phys_src))) |
261 | return FALSE; |
262 | count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); |
263 | if (count > size) |
264 | count = (uint32_t)size; |
265 | |
266 | virtsrc += count; |
267 | size -= count; |
268 | } |
269 | |
270 | return TRUE; |
271 | } |
272 | |
273 | /* Interrupt handling */ |
274 | |
275 | /* Initialize Interrupts */ |
276 | void ml_init_interrupt(void) |
277 | { |
278 | (void) ml_set_interrupts_enabled(TRUE); |
279 | } |
280 | |
281 | |
282 | /* Get Interrupts Enabled */ |
283 | boolean_t ml_get_interrupts_enabled(void) |
284 | { |
285 | unsigned long flags; |
286 | |
287 | __asm__ volatile("pushf; pop %0" : "=r" (flags)); |
288 | return (flags & EFL_IF) != 0; |
289 | } |
290 | |
291 | /* Set Interrupts Enabled */ |
292 | boolean_t ml_set_interrupts_enabled(boolean_t enable) |
293 | { |
294 | unsigned long flags; |
295 | boolean_t istate; |
296 | |
297 | __asm__ volatile("pushf; pop %0" : "=r" (flags)); |
298 | |
299 | assert(get_interrupt_level() ? (enable == FALSE) : TRUE); |
300 | |
301 | istate = ((flags & EFL_IF) != 0); |
302 | |
303 | if (enable) { |
304 | __asm__ volatile("sti;nop" ); |
305 | |
306 | if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) |
307 | __asm__ volatile ("int %0" :: "N" (T_PREEMPT)); |
308 | } |
309 | else { |
310 | if (istate) |
311 | __asm__ volatile("cli" ); |
312 | } |
313 | |
314 | return istate; |
315 | } |
316 | |
317 | /* Check if running at interrupt context */ |
318 | boolean_t ml_at_interrupt_context(void) |
319 | { |
320 | return get_interrupt_level() != 0; |
321 | } |
322 | |
323 | void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) { |
324 | *icp = (get_interrupt_level() != 0); |
325 | /* These will be technically inaccurate for interrupts that occur |
326 | * successively within a single "idle exit" event, but shouldn't |
327 | * matter statistically. |
328 | */ |
329 | *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage); |
330 | } |
331 | |
332 | /* Generate a fake interrupt */ |
333 | void ml_cause_interrupt(void) |
334 | { |
335 | panic("ml_cause_interrupt not defined yet on Intel" ); |
336 | } |
337 | |
338 | /* |
339 | * TODO: transition users of this to kernel_thread_start_priority |
340 | * ml_thread_policy is an unsupported KPI |
341 | */ |
342 | void ml_thread_policy( |
343 | thread_t thread, |
344 | __unused unsigned policy_id, |
345 | unsigned policy_info) |
346 | { |
347 | if (policy_info & MACHINE_NETWORK_WORKLOOP) { |
348 | thread_precedence_policy_data_t info; |
349 | __assert_only kern_return_t kret; |
350 | |
351 | info.importance = 1; |
352 | |
353 | kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, |
354 | (thread_policy_t)&info, |
355 | THREAD_PRECEDENCE_POLICY_COUNT); |
356 | assert(kret == KERN_SUCCESS); |
357 | } |
358 | } |
359 | |
360 | /* Initialize Interrupts */ |
361 | void ml_install_interrupt_handler( |
362 | void *nub, |
363 | int source, |
364 | void *target, |
365 | IOInterruptHandler handler, |
366 | void *refCon) |
367 | { |
368 | boolean_t current_state; |
369 | |
370 | current_state = ml_set_interrupts_enabled(FALSE); |
371 | |
372 | PE_install_interrupt_handler(nub, source, target, |
373 | (IOInterruptHandler) handler, refCon); |
374 | |
375 | (void) ml_set_interrupts_enabled(current_state); |
376 | |
377 | initialize_screen(NULL, kPEAcquireScreen); |
378 | } |
379 | |
380 | |
381 | void |
382 | machine_signal_idle( |
383 | processor_t processor) |
384 | { |
385 | cpu_interrupt(processor->cpu_id); |
386 | } |
387 | |
388 | void |
389 | machine_signal_idle_deferred( |
390 | __unused processor_t processor) |
391 | { |
392 | panic("Unimplemented" ); |
393 | } |
394 | |
395 | void |
396 | machine_signal_idle_cancel( |
397 | __unused processor_t processor) |
398 | { |
399 | panic("Unimplemented" ); |
400 | } |
401 | |
402 | static kern_return_t |
403 | register_cpu( |
404 | uint32_t lapic_id, |
405 | processor_t *processor_out, |
406 | boolean_t boot_cpu ) |
407 | { |
408 | int target_cpu; |
409 | cpu_data_t *this_cpu_datap; |
410 | |
411 | this_cpu_datap = cpu_data_alloc(boot_cpu); |
412 | if (this_cpu_datap == NULL) { |
413 | return KERN_FAILURE; |
414 | } |
415 | target_cpu = this_cpu_datap->cpu_number; |
416 | assert((boot_cpu && (target_cpu == 0)) || |
417 | (!boot_cpu && (target_cpu != 0))); |
418 | |
419 | lapic_cpu_map(lapic_id, target_cpu); |
420 | |
421 | /* The cpu_id is not known at registration phase. Just do |
422 | * lapic_id for now |
423 | */ |
424 | this_cpu_datap->cpu_phys_number = lapic_id; |
425 | |
426 | this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu); |
427 | if (this_cpu_datap->cpu_console_buf == NULL) |
428 | goto failed; |
429 | |
430 | #if KPC |
431 | if (kpc_register_cpu(this_cpu_datap) != TRUE) |
432 | goto failed; |
433 | #endif |
434 | |
435 | if (!boot_cpu) { |
436 | cpu_thread_alloc(this_cpu_datap->cpu_number); |
437 | if (this_cpu_datap->lcpu.core == NULL) |
438 | goto failed; |
439 | |
440 | #if NCOPY_WINDOWS > 0 |
441 | this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu); |
442 | if (this_cpu_datap->cpu_pmap == NULL) |
443 | goto failed; |
444 | #endif |
445 | |
446 | this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu); |
447 | if (this_cpu_datap->cpu_processor == NULL) |
448 | goto failed; |
449 | /* |
450 | * processor_init() deferred to topology start |
451 | * because "slot numbers" a.k.a. logical processor numbers |
452 | * are not yet finalized. |
453 | */ |
454 | } |
455 | |
456 | *processor_out = this_cpu_datap->cpu_processor; |
457 | |
458 | return KERN_SUCCESS; |
459 | |
460 | failed: |
461 | cpu_processor_free(this_cpu_datap->cpu_processor); |
462 | #if NCOPY_WINDOWS > 0 |
463 | pmap_cpu_free(this_cpu_datap->cpu_pmap); |
464 | #endif |
465 | console_cpu_free(this_cpu_datap->cpu_console_buf); |
466 | #if KPC |
467 | kpc_unregister_cpu(this_cpu_datap); |
468 | #endif /* KPC */ |
469 | |
470 | return KERN_FAILURE; |
471 | } |
472 | |
473 | |
474 | kern_return_t |
475 | ml_processor_register( |
476 | cpu_id_t cpu_id, |
477 | uint32_t lapic_id, |
478 | processor_t *processor_out, |
479 | boolean_t boot_cpu, |
480 | boolean_t start ) |
481 | { |
482 | static boolean_t done_topo_sort = FALSE; |
483 | static uint32_t num_registered = 0; |
484 | |
485 | /* Register all CPUs first, and track max */ |
486 | if( start == FALSE ) |
487 | { |
488 | num_registered++; |
489 | |
490 | DBG( "registering CPU lapic id %d\n" , lapic_id ); |
491 | |
492 | return register_cpu( lapic_id, processor_out, boot_cpu ); |
493 | } |
494 | |
495 | /* Sort by topology before we start anything */ |
496 | if( !done_topo_sort ) |
497 | { |
498 | DBG( "about to start CPUs. %d registered\n" , num_registered ); |
499 | |
500 | cpu_topology_sort( num_registered ); |
501 | done_topo_sort = TRUE; |
502 | } |
503 | |
504 | /* Assign the cpu ID */ |
505 | uint32_t cpunum = -1; |
506 | cpu_data_t *this_cpu_datap = NULL; |
507 | |
508 | /* find cpu num and pointer */ |
509 | cpunum = ml_get_cpuid( lapic_id ); |
510 | |
511 | if( cpunum == 0xFFFFFFFF ) /* never heard of it? */ |
512 | panic( "trying to start invalid/unregistered CPU %d\n" , lapic_id ); |
513 | |
514 | this_cpu_datap = cpu_datap(cpunum); |
515 | |
516 | /* fix the CPU id */ |
517 | this_cpu_datap->cpu_id = cpu_id; |
518 | |
519 | /* allocate and initialize other per-cpu structures */ |
520 | if (!boot_cpu) { |
521 | mp_cpus_call_cpu_init(cpunum); |
522 | early_random_cpu_init(cpunum); |
523 | } |
524 | |
525 | /* output arg */ |
526 | *processor_out = this_cpu_datap->cpu_processor; |
527 | |
528 | /* OK, try and start this CPU */ |
529 | return cpu_topology_start_cpu( cpunum ); |
530 | } |
531 | |
532 | |
533 | void |
534 | ml_cpu_get_info(ml_cpu_info_t *cpu_infop) |
535 | { |
536 | boolean_t os_supports_sse; |
537 | i386_cpu_info_t *cpuid_infop; |
538 | |
539 | if (cpu_infop == NULL) |
540 | return; |
541 | |
542 | /* |
543 | * Are we supporting MMX/SSE/SSE2/SSE3? |
544 | * As distinct from whether the cpu has these capabilities. |
545 | */ |
546 | os_supports_sse = !!(get_cr4() & CR4_OSXMM); |
547 | |
548 | if (ml_fpu_avx_enabled()) |
549 | cpu_infop->vector_unit = 9; |
550 | else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) |
551 | cpu_infop->vector_unit = 8; |
552 | else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) |
553 | cpu_infop->vector_unit = 7; |
554 | else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) |
555 | cpu_infop->vector_unit = 6; |
556 | else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) |
557 | cpu_infop->vector_unit = 5; |
558 | else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) |
559 | cpu_infop->vector_unit = 4; |
560 | else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) |
561 | cpu_infop->vector_unit = 3; |
562 | else if (cpuid_features() & CPUID_FEATURE_MMX) |
563 | cpu_infop->vector_unit = 2; |
564 | else |
565 | cpu_infop->vector_unit = 0; |
566 | |
567 | cpuid_infop = cpuid_info(); |
568 | |
569 | cpu_infop->cache_line_size = cpuid_infop->cache_linesize; |
570 | |
571 | cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I]; |
572 | cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D]; |
573 | |
574 | if (cpuid_infop->cache_size[L2U] > 0) { |
575 | cpu_infop->l2_settings = 1; |
576 | cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U]; |
577 | } else { |
578 | cpu_infop->l2_settings = 0; |
579 | cpu_infop->l2_cache_size = 0xFFFFFFFF; |
580 | } |
581 | |
582 | if (cpuid_infop->cache_size[L3U] > 0) { |
583 | cpu_infop->l3_settings = 1; |
584 | cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U]; |
585 | } else { |
586 | cpu_infop->l3_settings = 0; |
587 | cpu_infop->l3_cache_size = 0xFFFFFFFF; |
588 | } |
589 | } |
590 | |
591 | void |
592 | ml_init_max_cpus(unsigned long max_cpus) |
593 | { |
594 | boolean_t current_state; |
595 | |
596 | current_state = ml_set_interrupts_enabled(FALSE); |
597 | if (max_cpus_initialized != MAX_CPUS_SET) { |
598 | if (max_cpus > 0 && max_cpus <= MAX_CPUS) { |
599 | /* |
600 | * Note: max_cpus is the number of enabled processors |
601 | * that ACPI found; max_ncpus is the maximum number |
602 | * that the kernel supports or that the "cpus=" |
603 | * boot-arg has set. Here we take int minimum. |
604 | */ |
605 | machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus); |
606 | } |
607 | if (max_cpus_initialized == MAX_CPUS_WAIT) |
608 | wakeup((event_t)&max_cpus_initialized); |
609 | max_cpus_initialized = MAX_CPUS_SET; |
610 | } |
611 | (void) ml_set_interrupts_enabled(current_state); |
612 | } |
613 | |
614 | int |
615 | ml_get_max_cpus(void) |
616 | { |
617 | boolean_t current_state; |
618 | |
619 | current_state = ml_set_interrupts_enabled(FALSE); |
620 | if (max_cpus_initialized != MAX_CPUS_SET) { |
621 | max_cpus_initialized = MAX_CPUS_WAIT; |
622 | assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT); |
623 | (void)thread_block(THREAD_CONTINUE_NULL); |
624 | } |
625 | (void) ml_set_interrupts_enabled(current_state); |
626 | return(machine_info.max_cpus); |
627 | } |
628 | |
629 | boolean_t |
630 | ml_wants_panic_trap_to_debugger(void) |
631 | { |
632 | return FALSE; |
633 | } |
634 | |
635 | void |
636 | ml_panic_trap_to_debugger(__unused const char *panic_format_str, |
637 | __unused va_list *panic_args, |
638 | __unused unsigned int reason, |
639 | __unused void *ctx, |
640 | __unused uint64_t panic_options_mask, |
641 | __unused unsigned long panic_caller) |
642 | { |
643 | return; |
644 | } |
645 | |
646 | /* |
647 | * Routine: ml_init_lock_timeout |
648 | * Function: |
649 | */ |
650 | void |
651 | ml_init_lock_timeout(void) |
652 | { |
653 | uint64_t abstime; |
654 | uint32_t mtxspin; |
655 | #if DEVELOPMENT || DEBUG |
656 | uint64_t default_timeout_ns = NSEC_PER_SEC>>2; |
657 | #else |
658 | uint64_t default_timeout_ns = NSEC_PER_SEC>>1; |
659 | #endif |
660 | uint32_t slto; |
661 | uint32_t prt; |
662 | |
663 | if (PE_parse_boot_argn("slto_us" , &slto, sizeof (slto))) |
664 | default_timeout_ns = slto * NSEC_PER_USEC; |
665 | |
666 | /* |
667 | * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks, |
668 | * and LockTimeOutUsec is in microseconds and it's 32-bits. |
669 | */ |
670 | LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC); |
671 | nanoseconds_to_absolutetime(default_timeout_ns, &abstime); |
672 | LockTimeOut = abstime; |
673 | LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t); |
674 | |
675 | /* |
676 | * TLBTimeOut dictates the TLB flush timeout period. It defaults to |
677 | * LockTimeOut but can be overriden separately. In particular, a |
678 | * zero value inhibits the timeout-panic and cuts a trace evnt instead |
679 | * - see pmap_flush_tlbs(). |
680 | */ |
681 | if (PE_parse_boot_argn("tlbto_us" , &slto, sizeof (slto))) { |
682 | default_timeout_ns = slto * NSEC_PER_USEC; |
683 | nanoseconds_to_absolutetime(default_timeout_ns, &abstime); |
684 | TLBTimeOut = (uint32_t) abstime; |
685 | } else { |
686 | TLBTimeOut = LockTimeOut; |
687 | } |
688 | |
689 | #if DEVELOPMENT || DEBUG |
690 | reportphyreaddelayabs = LockTimeOut >> 1; |
691 | #endif |
692 | if (PE_parse_boot_argn("phyreadmaxus" , &slto, sizeof (slto))) { |
693 | default_timeout_ns = slto * NSEC_PER_USEC; |
694 | nanoseconds_to_absolutetime(default_timeout_ns, &abstime); |
695 | reportphyreaddelayabs = abstime; |
696 | } |
697 | |
698 | if (PE_parse_boot_argn("mtxspin" , &mtxspin, sizeof (mtxspin))) { |
699 | if (mtxspin > USEC_PER_SEC>>4) |
700 | mtxspin = USEC_PER_SEC>>4; |
701 | nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime); |
702 | } else { |
703 | nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime); |
704 | } |
705 | MutexSpin = (unsigned int)abstime; |
706 | |
707 | nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance); |
708 | if (PE_parse_boot_argn("panic_restart_timeout" , &prt, sizeof (prt))) |
709 | nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout); |
710 | |
711 | virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0); |
712 | if (virtualized) { |
713 | int vti; |
714 | |
715 | if (!PE_parse_boot_argn("vti" , &vti, sizeof (vti))) |
716 | vti = 6; |
717 | printf("Timeouts adjusted for virtualization (<<%d)\n" , vti); |
718 | kprintf("Timeouts adjusted for virtualization (<<%d):\n" , vti); |
719 | #define VIRTUAL_TIMEOUT_INFLATE64(_timeout) \ |
720 | MACRO_BEGIN \ |
721 | kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \ |
722 | _timeout <<= vti; \ |
723 | kprintf("-> 0x%016llx\n", _timeout); \ |
724 | MACRO_END |
725 | #define VIRTUAL_TIMEOUT_INFLATE32(_timeout) \ |
726 | MACRO_BEGIN \ |
727 | kprintf("%24s: 0x%08x ", #_timeout, _timeout); \ |
728 | if ((_timeout <<vti) >> vti == _timeout) \ |
729 | _timeout <<= vti; \ |
730 | else \ |
731 | _timeout = ~0; /* cap rather than overflow */ \ |
732 | kprintf("-> 0x%08x\n", _timeout); \ |
733 | MACRO_END |
734 | VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec); |
735 | VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut); |
736 | VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC); |
737 | VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut); |
738 | VIRTUAL_TIMEOUT_INFLATE64(MutexSpin); |
739 | VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs); |
740 | } |
741 | |
742 | interrupt_latency_tracker_setup(); |
743 | simple_lock_init(&ml_timer_evaluation_slock, 0); |
744 | } |
745 | |
746 | /* |
747 | * Threshold above which we should attempt to block |
748 | * instead of spinning for clock_delay_until(). |
749 | */ |
750 | |
751 | void |
752 | ml_init_delay_spin_threshold(int threshold_us) |
753 | { |
754 | nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold); |
755 | } |
756 | |
757 | boolean_t |
758 | ml_delay_should_spin(uint64_t interval) |
759 | { |
760 | return (interval < delay_spin_threshold) ? TRUE : FALSE; |
761 | } |
762 | |
763 | /* |
764 | * This is called from the machine-independent layer |
765 | * to perform machine-dependent info updates. Defer to cpu_thread_init(). |
766 | */ |
767 | void |
768 | ml_cpu_up(void) |
769 | { |
770 | return; |
771 | } |
772 | |
773 | /* |
774 | * This is called from the machine-independent layer |
775 | * to perform machine-dependent info updates. |
776 | */ |
777 | void |
778 | ml_cpu_down(void) |
779 | { |
780 | i386_deactivate_cpu(); |
781 | |
782 | return; |
783 | } |
784 | |
785 | /* |
786 | * The following are required for parts of the kernel |
787 | * that cannot resolve these functions as inlines: |
788 | */ |
789 | extern thread_t current_act(void); |
790 | thread_t |
791 | current_act(void) |
792 | { |
793 | return(current_thread_fast()); |
794 | } |
795 | |
796 | #undef current_thread |
797 | extern thread_t current_thread(void); |
798 | thread_t |
799 | current_thread(void) |
800 | { |
801 | return(current_thread_fast()); |
802 | } |
803 | |
804 | |
805 | boolean_t ml_is64bit(void) { |
806 | |
807 | return (cpu_mode_is64bit()); |
808 | } |
809 | |
810 | |
811 | boolean_t ml_thread_is64bit(thread_t thread) { |
812 | |
813 | return (thread_is_64bit_addr(thread)); |
814 | } |
815 | |
816 | |
817 | boolean_t ml_state_is64bit(void *saved_state) { |
818 | |
819 | return is_saved_state64(saved_state); |
820 | } |
821 | |
822 | void ml_cpu_set_ldt(int selector) |
823 | { |
824 | /* |
825 | * Avoid loading the LDT |
826 | * if we're setting the KERNEL LDT and it's already set. |
827 | */ |
828 | if (selector == KERNEL_LDT && |
829 | current_cpu_datap()->cpu_ldt == KERNEL_LDT) |
830 | return; |
831 | |
832 | lldt(selector); |
833 | current_cpu_datap()->cpu_ldt = selector; |
834 | } |
835 | |
836 | void ml_fp_setvalid(boolean_t value) |
837 | { |
838 | fp_setvalid(value); |
839 | } |
840 | |
841 | uint64_t ml_cpu_int_event_time(void) |
842 | { |
843 | return current_cpu_datap()->cpu_int_event_time; |
844 | } |
845 | |
846 | vm_offset_t ml_stack_remaining(void) |
847 | { |
848 | uintptr_t local = (uintptr_t) &local; |
849 | |
850 | if (ml_at_interrupt_context() != 0) { |
851 | return (local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE)); |
852 | } else { |
853 | return (local - current_thread()->kernel_stack); |
854 | } |
855 | } |
856 | |
857 | #if KASAN |
858 | vm_offset_t ml_stack_base(void); |
859 | vm_size_t ml_stack_size(void); |
860 | |
861 | vm_offset_t |
862 | ml_stack_base(void) |
863 | { |
864 | if (ml_at_interrupt_context()) { |
865 | return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE; |
866 | } else { |
867 | return current_thread()->kernel_stack; |
868 | } |
869 | } |
870 | |
871 | vm_size_t |
872 | ml_stack_size(void) |
873 | { |
874 | if (ml_at_interrupt_context()) { |
875 | return INTSTACK_SIZE; |
876 | } else { |
877 | return kernel_stack_size; |
878 | } |
879 | } |
880 | #endif |
881 | |
882 | void |
883 | kernel_preempt_check(void) |
884 | { |
885 | boolean_t intr; |
886 | unsigned long flags; |
887 | |
888 | assert(get_preemption_level() == 0); |
889 | |
890 | if (__improbable(*ast_pending() & AST_URGENT)) { |
891 | /* |
892 | * can handle interrupts and preemptions |
893 | * at this point |
894 | */ |
895 | __asm__ volatile("pushf; pop %0" : "=r" (flags)); |
896 | |
897 | intr = ((flags & EFL_IF) != 0); |
898 | |
899 | /* |
900 | * now cause the PRE-EMPTION trap |
901 | */ |
902 | if (intr == TRUE){ |
903 | __asm__ volatile ("int %0" :: "N" (T_PREEMPT)); |
904 | } |
905 | } |
906 | } |
907 | |
908 | boolean_t machine_timeout_suspended(void) { |
909 | return (pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake()); |
910 | } |
911 | |
912 | /* Eagerly evaluate all pending timer and thread callouts |
913 | */ |
914 | void ml_timer_evaluate(void) { |
915 | KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_START, 0, 0, 0, 0, 0); |
916 | |
917 | uint64_t te_end, te_start = mach_absolute_time(); |
918 | simple_lock(&ml_timer_evaluation_slock); |
919 | ml_timer_evaluation_in_progress = TRUE; |
920 | thread_call_delayed_timer_rescan_all(); |
921 | mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL); |
922 | ml_timer_evaluation_in_progress = FALSE; |
923 | ml_timer_eager_evaluations++; |
924 | te_end = mach_absolute_time(); |
925 | ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start)); |
926 | simple_unlock(&ml_timer_evaluation_slock); |
927 | |
928 | KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_END, 0, 0, 0, 0, 0); |
929 | } |
930 | |
931 | boolean_t |
932 | ml_timer_forced_evaluation(void) { |
933 | return ml_timer_evaluation_in_progress; |
934 | } |
935 | |
936 | /* 32-bit right-rotate n bits */ |
937 | static inline uint32_t ror32(uint32_t val, const unsigned int n) |
938 | { |
939 | __asm__ volatile("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n)); |
940 | return val; |
941 | } |
942 | |
943 | void |
944 | ml_entropy_collect(void) |
945 | { |
946 | uint32_t tsc_lo, tsc_hi; |
947 | uint32_t *ep; |
948 | |
949 | assert(cpu_number() == master_cpu); |
950 | |
951 | /* update buffer pointer cyclically */ |
952 | if (EntropyData.index_ptr - EntropyData.buffer == ENTROPY_BUFFER_SIZE) |
953 | ep = EntropyData.index_ptr = EntropyData.buffer; |
954 | else |
955 | ep = EntropyData.index_ptr++; |
956 | |
957 | rdtsc_nofence(tsc_lo, tsc_hi); |
958 | *ep = ror32(*ep, 9) ^ tsc_lo; |
959 | } |
960 | |
961 | uint64_t |
962 | ml_energy_stat(__unused thread_t t) { |
963 | return 0; |
964 | } |
965 | |
966 | void |
967 | ml_gpu_stat_update(uint64_t gpu_ns_delta) { |
968 | current_thread()->machine.thread_gpu_ns += gpu_ns_delta; |
969 | } |
970 | |
971 | uint64_t |
972 | ml_gpu_stat(thread_t t) { |
973 | return t->machine.thread_gpu_ns; |
974 | } |
975 | |
976 | int plctrace_enabled = 0; |
977 | |
978 | void _disable_preemption(void) { |
979 | disable_preemption_internal(); |
980 | } |
981 | |
982 | void _enable_preemption(void) { |
983 | enable_preemption_internal(); |
984 | } |
985 | |
986 | void plctrace_disable(void) { |
987 | plctrace_enabled = 0; |
988 | } |
989 | |
990 | static boolean_t ml_quiescing; |
991 | |
992 | void ml_set_is_quiescing(boolean_t quiescing) |
993 | { |
994 | assert(FALSE == ml_get_interrupts_enabled()); |
995 | ml_quiescing = quiescing; |
996 | } |
997 | |
998 | boolean_t ml_is_quiescing(void) |
999 | { |
1000 | assert(FALSE == ml_get_interrupts_enabled()); |
1001 | return (ml_quiescing); |
1002 | } |
1003 | |
1004 | uint64_t ml_get_booter_memory_size(void) |
1005 | { |
1006 | return (0); |
1007 | } |
1008 | |