1 | /* |
2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | |
59 | /* |
60 | * File: pmap.h |
61 | * |
62 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young |
63 | * Date: 1985 |
64 | * |
65 | * Machine-dependent structures for the physical map module. |
66 | */ |
67 | #ifdef KERNEL_PRIVATE |
68 | #ifndef _PMAP_MACHINE_ |
69 | #define _PMAP_MACHINE_ 1 |
70 | |
71 | #ifndef ASSEMBLER |
72 | |
73 | #include <mach/kern_return.h> |
74 | #include <mach/machine/vm_types.h> |
75 | #include <mach/vm_prot.h> |
76 | #include <mach/vm_statistics.h> |
77 | #include <mach/machine/vm_param.h> |
78 | #include <kern/kern_types.h> |
79 | #include <kern/thread.h> |
80 | #include <kern/simple_lock.h> |
81 | |
82 | #include <i386/mp.h> |
83 | #include <i386/proc_reg.h> |
84 | |
85 | #include <i386/pal_routines.h> |
86 | |
87 | /* |
88 | * Define the generic in terms of the specific |
89 | */ |
90 | |
91 | #define INTEL_PGBYTES I386_PGBYTES |
92 | #define INTEL_PGSHIFT I386_PGSHIFT |
93 | #define intel_btop(x) i386_btop(x) |
94 | #define intel_ptob(x) i386_ptob(x) |
95 | #define intel_round_page(x) i386_round_page(x) |
96 | #define intel_trunc_page(x) i386_trunc_page(x) |
97 | |
98 | /* |
99 | * i386/i486/i860 Page Table Entry |
100 | */ |
101 | |
102 | #endif /* ASSEMBLER */ |
103 | |
104 | #define NPGPTD 4ULL |
105 | #define PDESHIFT 21ULL |
106 | #define PTEMASK 0x1ffULL |
107 | #define PTEINDX 3ULL |
108 | |
109 | #define PTESHIFT 12ULL |
110 | |
111 | #define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL) |
112 | |
113 | #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ |
114 | #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ |
115 | |
116 | #define INTEL_OFFMASK (I386_PGBYTES - 1) |
117 | #define INTEL_LOFFMASK (I386_LPGBYTES - 1) |
118 | #define PG_FRAME 0x000FFFFFFFFFF000ULL |
119 | #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) |
120 | #define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t))) |
121 | |
122 | #define NBPTD (NPGPTD << PAGE_SHIFT) |
123 | #define NPDEPTD (NBPTD / (sizeof (pd_entry_t))) |
124 | #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) |
125 | #define NBPDE (1ULL << PDESHIFT) |
126 | #define PDEMASK (NBPDE - 1) |
127 | |
128 | #define PTE_PER_PAGE 512 /* number of PTE's per page on any level */ |
129 | |
130 | /* cleanly define parameters for all the page table levels */ |
131 | typedef uint64_t pml4_entry_t; |
132 | #define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t))) |
133 | #define PML4SHIFT 39 |
134 | #define PML4PGSHIFT 9 |
135 | #define NBPML4 (1ULL << PML4SHIFT) |
136 | #define PML4MASK (NBPML4-1) |
137 | #define PML4_ENTRY_NULL ((pml4_entry_t *) 0) |
138 | |
139 | typedef uint64_t pdpt_entry_t; |
140 | #define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t))) |
141 | #define PDPTSHIFT 30 |
142 | #define PDPTPGSHIFT 9 |
143 | #define NBPDPT (1ULL << PDPTSHIFT) |
144 | #define PDPTMASK (NBPDPT-1) |
145 | #define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0) |
146 | |
147 | typedef uint64_t pd_entry_t; |
148 | #define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t))) |
149 | #define PDSHIFT 21 |
150 | #define PDPGSHIFT 9 |
151 | #define NBPD (1ULL << PDSHIFT) |
152 | #define PDMASK (NBPD-1) |
153 | #define PD_ENTRY_NULL ((pd_entry_t *) 0) |
154 | |
155 | typedef uint64_t pt_entry_t; |
156 | #define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t))) |
157 | #define PTSHIFT 12 |
158 | #define PTPGSHIFT 9 |
159 | #define NBPT (1ULL << PTSHIFT) |
160 | #define PTMASK (NBPT-1) |
161 | #define PT_ENTRY_NULL ((pt_entry_t *) 0) |
162 | |
163 | typedef uint64_t pmap_paddr_t; |
164 | |
165 | #if DEVELOPMENT || DEBUG |
166 | #define PMAP_ASSERT 1 |
167 | extern int pmap_asserts_enabled; |
168 | extern int pmap_asserts_traced; |
169 | #endif |
170 | |
171 | #if PMAP_ASSERT |
172 | #define pmap_assert(ex) (pmap_asserts_enabled ? ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) : (void)0) |
173 | |
174 | #define pmap_assert2(ex, fmt, args...) \ |
175 | do { \ |
176 | if (__improbable(pmap_asserts_enabled && !(ex))) { \ |
177 | if (pmap_asserts_traced) { \ |
178 | KERNEL_DEBUG_CONSTANT(0xDEAD1000, __builtin_return_address(0), __LINE__, 0, 0, 0); \ |
179 | kdebug_enable = 0; \ |
180 | } else { \ |
181 | kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \ |
182 | panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \ |
183 | } \ |
184 | } \ |
185 | } while(0) |
186 | #else |
187 | #define pmap_assert(ex) |
188 | #define pmap_assert2(ex, fmt, args...) |
189 | #endif |
190 | |
191 | /* superpages */ |
192 | #define SUPERPAGE_NBASEPAGES 512 |
193 | |
194 | /* |
195 | * Atomic 64-bit store of a page table entry. |
196 | */ |
197 | static inline void |
198 | pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) |
199 | { |
200 | /* |
201 | * In the 32-bit kernel a compare-and-exchange loop was |
202 | * required to provide atomicity. For K64, life is easier: |
203 | */ |
204 | *entryp = value; |
205 | } |
206 | |
207 | /* in 64 bit spaces, the number of each type of page in the page tables */ |
208 | #define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t)))) |
209 | #define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t)))) |
210 | #define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t)))) |
211 | #define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t)))) |
212 | |
213 | #define KERNEL_PML4_INDEX 511 |
214 | #define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */ |
215 | #define KERNEL_PHYSMAP_PML4_INDEX 509 /* virtual to physical map */ |
216 | #define KERNEL_KASAN_PML4_INDEX0 508 |
217 | #define KERNEL_KASAN_PML4_INDEX1 507 |
218 | #define KERNEL_DBLMAP_PML4_INDEX (506) |
219 | #define KERNEL_BASE (0ULL - NBPML4) |
220 | #define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) |
221 | |
222 | #define VM_WIMG_COPYBACK VM_MEM_COHERENT |
223 | #define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK |
224 | #define VM_WIMG_DEFAULT VM_MEM_COHERENT |
225 | /* ?? intel ?? */ |
226 | #define VM_WIMG_IO (VM_MEM_COHERENT | \ |
227 | VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED) |
228 | #define VM_WIMG_POSTED VM_WIMG_IO |
229 | #define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED) |
230 | /* write combining mode, aka store gather */ |
231 | #define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) |
232 | #define VM_WIMG_INNERWBACK VM_MEM_COHERENT |
233 | /* |
234 | * Pte related macros |
235 | */ |
236 | #define KVADDR(pmi, pdpi, pdi, pti) \ |
237 | ((vm_offset_t) \ |
238 | ((uint64_t) -1 << 47) | \ |
239 | ((uint64_t)(pmi) << PML4SHIFT) | \ |
240 | ((uint64_t)(pdpi) << PDPTSHIFT) | \ |
241 | ((uint64_t)(pdi) << PDESHIFT) | \ |
242 | ((uint64_t)(pti) << PTESHIFT)) |
243 | |
244 | /* |
245 | * Size of Kernel address space. This is the number of page table pages |
246 | * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. |
247 | * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). |
248 | */ |
249 | #ifndef KVA_PAGES |
250 | #define KVA_PAGES 1024 |
251 | #endif |
252 | |
253 | #ifndef NKPT |
254 | #define NKPT 500 /* actual number of kernel page tables */ |
255 | #endif |
256 | #ifndef NKPDE |
257 | #define NKPDE (KVA_PAGES - 1) /* addressable number of page tables/pde's */ |
258 | #endif |
259 | |
260 | |
261 | |
262 | /* |
263 | * Convert address offset to page descriptor index |
264 | */ |
265 | #define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK) |
266 | #define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK) |
267 | #define PMAP_INVALID_PDPTNUM (~0ULL) |
268 | |
269 | #define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1)) |
270 | #define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1)) |
271 | #define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1)) |
272 | |
273 | |
274 | /* |
275 | * Convert page descriptor index to user virtual address |
276 | */ |
277 | #define pdetova(a) ((vm_offset_t)(a) << PDESHIFT) |
278 | |
279 | /* |
280 | * Convert address offset to page table index |
281 | */ |
282 | #define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK) |
283 | |
284 | /* |
285 | * Hardware pte bit definitions (to be used directly on the ptes |
286 | * without using the bit fields). |
287 | */ |
288 | |
289 | #define INTEL_PTE_VALID 0x00000001ULL |
290 | #define INTEL_PTE_WRITE 0x00000002ULL |
291 | #define INTEL_PTE_RW 0x00000002ULL |
292 | #define INTEL_PTE_USER 0x00000004ULL |
293 | #define INTEL_PTE_WTHRU 0x00000008ULL |
294 | #define INTEL_PTE_NCACHE 0x00000010ULL |
295 | #define INTEL_PTE_REF 0x00000020ULL |
296 | #define INTEL_PTE_MOD 0x00000040ULL |
297 | #define INTEL_PTE_PS 0x00000080ULL |
298 | #define INTEL_PTE_PTA 0x00000080ULL |
299 | #define INTEL_PTE_GLOBAL 0x00000100ULL |
300 | #define INTEL_PTE_WIRED 0x00000400ULL |
301 | #define INTEL_PDPTE_NESTED 0x00000800ULL |
302 | #define INTEL_PTE_PFN PG_FRAME |
303 | |
304 | #define INTEL_PTE_NX (1ULL << 63) |
305 | |
306 | #define INTEL_PTE_INVALID 0 |
307 | /* This is conservative, but suffices */ |
308 | #define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54)) |
309 | |
310 | #define INTEL_PTE_COMPRESSED (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/ |
311 | #define INTEL_PTE_COMPRESSED_ALT (1ULL << 61) /* compressed but with "alternate accounting" */ |
312 | |
313 | #define INTEL_PTE_COMPRESSED_MASK (INTEL_PTE_COMPRESSED | \ |
314 | INTEL_PTE_COMPRESSED_ALT) |
315 | #define PTE_IS_COMPRESSED(x) \ |
316 | ((((x) & INTEL_PTE_VALID) == 0) && /* PTE is not valid... */ \ |
317 | ((x) & INTEL_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \ |
318 | ((!((x) & ~INTEL_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \ |
319 | (panic("compressed PTE %p 0x%llx has extra bits 0x%llx: corrupted?", \ |
320 | &(x), (x), (x) & ~INTEL_PTE_COMPRESSED_MASK), FALSE))) |
321 | |
322 | #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */ |
323 | #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */ |
324 | #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) |
325 | |
326 | #define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW)) |
327 | #define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID)) |
328 | #define pte_user_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW)) |
329 | #define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER)) |
330 | |
331 | #define PMAP_INVEPT_SINGLE_CONTEXT 1 |
332 | |
333 | |
334 | #define INTEL_EPTP_AD 0x00000040ULL |
335 | |
336 | #define INTEL_EPT_READ 0x00000001ULL |
337 | #define INTEL_EPT_WRITE 0x00000002ULL |
338 | #define INTEL_EPT_EX 0x00000004ULL |
339 | #define INTEL_EPT_IPTA 0x00000040ULL |
340 | #define INTEL_EPT_PS 0x00000080ULL |
341 | #define INTEL_EPT_REF 0x00000100ULL |
342 | #define INTEL_EPT_MOD 0x00000200ULL |
343 | |
344 | #define INTEL_EPT_CACHE_MASK 0x00000038ULL |
345 | #define INTEL_EPT_NCACHE 0x00000000ULL |
346 | #define INTEL_EPT_WC 0x00000008ULL |
347 | #define INTEL_EPT_WTHRU 0x00000020ULL |
348 | #define INTEL_EPT_WP 0x00000028ULL |
349 | #define INTEL_EPT_WB 0x00000030ULL |
350 | |
351 | /* |
352 | * Routines to filter correct bits depending on the pmap type |
353 | */ |
354 | |
355 | static inline pt_entry_t |
356 | pte_remove_ex(pt_entry_t pte, boolean_t is_ept) |
357 | { |
358 | if (__probable(!is_ept)) { |
359 | return (pte | INTEL_PTE_NX); |
360 | } |
361 | |
362 | return (pte & (~INTEL_EPT_EX)); |
363 | } |
364 | |
365 | static inline pt_entry_t |
366 | pte_set_ex(pt_entry_t pte, boolean_t is_ept) |
367 | { |
368 | if (__probable(!is_ept)) { |
369 | return (pte & (~INTEL_PTE_NX)); |
370 | } |
371 | |
372 | return (pte | INTEL_EPT_EX); |
373 | } |
374 | |
375 | static inline pt_entry_t |
376 | physmap_refmod_to_ept(pt_entry_t physmap_pte) |
377 | { |
378 | pt_entry_t ept_pte = 0; |
379 | |
380 | if (physmap_pte & INTEL_PTE_MOD) { |
381 | ept_pte |= INTEL_EPT_MOD; |
382 | } |
383 | |
384 | if (physmap_pte & INTEL_PTE_REF) { |
385 | ept_pte |= INTEL_EPT_REF; |
386 | } |
387 | |
388 | return ept_pte; |
389 | } |
390 | |
391 | static inline pt_entry_t |
392 | ept_refmod_to_physmap(pt_entry_t ept_pte) |
393 | { |
394 | pt_entry_t physmap_pte = 0; |
395 | |
396 | assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0); |
397 | |
398 | if (ept_pte & INTEL_EPT_REF) { |
399 | physmap_pte |= INTEL_PTE_REF; |
400 | } |
401 | |
402 | if (ept_pte & INTEL_EPT_MOD) { |
403 | physmap_pte |= INTEL_PTE_MOD; |
404 | } |
405 | |
406 | return physmap_pte; |
407 | } |
408 | |
409 | /* |
410 | * Note: Not all Intel processors support EPT referenced access and dirty bits. |
411 | * During pmap_init() we check the VMX capability for the current hardware |
412 | * and update this variable accordingly. |
413 | */ |
414 | extern boolean_t pmap_ept_support_ad; |
415 | |
416 | #define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID) |
417 | #define PTE_READ(is_ept) ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID) |
418 | #define PTE_WRITE(is_ept) ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE) |
419 | #define PTE_PS INTEL_PTE_PS |
420 | #define PTE_COMPRESSED INTEL_PTE_COMPRESSED |
421 | #define PTE_COMPRESSED_ALT INTEL_PTE_COMPRESSED_ALT |
422 | #define PTE_NCACHE(is_ept) ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE) |
423 | #define PTE_WTHRU(is_ept) ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU) |
424 | #define PTE_REF(is_ept) ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF) |
425 | #define PTE_MOD(is_ept) ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD) |
426 | #define PTE_WIRED INTEL_PTE_WIRED |
427 | |
428 | |
429 | #define PMAP_DEFAULT_CACHE 0 |
430 | #define PMAP_INHIBIT_CACHE 1 |
431 | #define PMAP_GUARDED_CACHE 2 |
432 | #define PMAP_ACTIVATE_CACHE 4 |
433 | #define PMAP_NO_GUARD_CACHE 8 |
434 | |
435 | /* Per-pmap ledger operations */ |
436 | #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a) |
437 | #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a) |
438 | |
439 | #ifndef ASSEMBLER |
440 | |
441 | #include <sys/queue.h> |
442 | |
443 | /* |
444 | * Address of current and alternate address space page table maps |
445 | * and directories. |
446 | */ |
447 | |
448 | extern pt_entry_t *PTmap; |
449 | extern pdpt_entry_t *IdlePDPT; |
450 | extern pml4_entry_t *IdlePML4; |
451 | extern boolean_t no_shared_cr3; |
452 | extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */ |
453 | |
454 | extern uint64_t pmap_pv_hashlist_walks; |
455 | extern uint64_t pmap_pv_hashlist_cnts; |
456 | extern uint32_t pmap_pv_hashlist_max; |
457 | extern uint32_t pmap_kernel_text_ps; |
458 | |
459 | #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK)) |
460 | |
461 | extern uint64_t physmap_base, physmap_max; |
462 | |
463 | #define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4)) |
464 | |
465 | static inline boolean_t physmap_enclosed(addr64_t a) { |
466 | return (a < (NPHYSMAP * GB)); |
467 | } |
468 | |
469 | static inline void * PHYSMAP_PTOV_check(void *paddr) { |
470 | uint64_t pvaddr = (uint64_t)paddr + physmap_base; |
471 | |
472 | if (__improbable(pvaddr >= physmap_max)) |
473 | panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx" , |
474 | pvaddr, physmap_base, physmap_max); |
475 | |
476 | return (void *)pvaddr; |
477 | } |
478 | |
479 | #define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x))) |
480 | #if MACH_KERNEL_PRIVATE |
481 | extern uint64_t dblmap_base, dblmap_max, dblmap_dist; |
482 | |
483 | static inline uint64_t DBLMAP_CHECK(uintptr_t x) { |
484 | uint64_t dbladdr = (uint64_t)x + dblmap_dist; |
485 | if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) { |
486 | panic("DBLMAP bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx" , |
487 | (uint64_t)x, dbladdr, dblmap_base, dblmap_max); |
488 | } |
489 | return dbladdr; |
490 | |
491 | } |
492 | #define DBLMAP(x) (DBLMAP_CHECK((uint64_t) x)) |
493 | extern uint64_t ldt_alias_offset; |
494 | static inline uint64_t LDTALIAS_CHECK(uintptr_t x) { |
495 | uint64_t dbladdr = (uint64_t)x + ldt_alias_offset; |
496 | if (__improbable((dbladdr >= dblmap_max) || (dbladdr < dblmap_base))) { |
497 | panic("LDTALIAS: bounds exceeded, 0x%qx, 0x%qx 0x%qx, 0x%qx" , |
498 | (uint64_t)x, dbladdr, dblmap_base, dblmap_max); |
499 | } |
500 | return dbladdr; |
501 | } |
502 | #define LDTALIAS(x) (LDTALIAS_CHECK((uint64_t) x)) |
503 | #endif |
504 | |
505 | /* |
506 | * For KASLR, we alias the master processor's IDT and GDT at fixed |
507 | * virtual addresses to defeat SIDT/SGDT address leakage. |
508 | * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS) |
509 | * The low global vector page is mapped at a fixed alias also. |
510 | */ |
511 | #define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000) |
512 | |
513 | /* |
514 | * This indicates (roughly) where there is free space for the VM |
515 | * to use for the heap; this does not need to be precise. |
516 | */ |
517 | #define KERNEL_PMAP_HEAP_RANGE_START VM_MIN_KERNEL_AND_KEXT_ADDRESS |
518 | |
519 | #include <vm/vm_page.h> |
520 | |
521 | /* |
522 | * For each vm_page_t, there is a list of all currently |
523 | * valid virtual mappings of that page. An entry is |
524 | * a pv_entry_t; the list is the pv_table. |
525 | */ |
526 | |
527 | struct pmap { |
528 | decl_simple_lock_data(,lock) /* lock on map */ |
529 | pmap_paddr_t pm_cr3; /* Kernel+user shared PML4 physical*/ |
530 | pmap_paddr_t pm_ucr3; /* Mirrored user PML4 physical */ |
531 | task_map_t pm_task_map; |
532 | boolean_t pm_shared; |
533 | boolean_t pagezero_accessible; |
534 | #define PMAP_PCID_MAX_CPUS MAX_CPUS /* Must be a multiple of 8 */ |
535 | pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS]; |
536 | volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS]; |
537 | struct pmap_statistics stats; /* map statistics */ |
538 | int ref_count; /* reference count */ |
539 | int nx_enabled; |
540 | pml4_entry_t *pm_pml4; /* VKA of top level */ |
541 | pml4_entry_t *pm_upml4; /* Shadow VKA of top level */ |
542 | vm_object_t pm_obj; /* object to hold pde's */ |
543 | vm_object_t pm_obj_pdpt; /* holds pdpt pages */ |
544 | vm_object_t pm_obj_pml4; /* holds pml4 pages */ |
545 | pmap_paddr_t pm_eptp; /* EPTP */ |
546 | ledger_t ledger; /* ledger tracking phys mappings */ |
547 | #if MACH_ASSERT |
548 | boolean_t pmap_stats_assert; |
549 | int pmap_pid; |
550 | char pmap_procname[17]; |
551 | #endif /* MACH_ASSERT */ |
552 | }; |
553 | |
554 | static inline boolean_t |
555 | is_ept_pmap(pmap_t p) |
556 | { |
557 | if (__probable(p->pm_cr3 != 0)) { |
558 | assert(p->pm_eptp == 0); |
559 | return FALSE; |
560 | } |
561 | |
562 | assert(p->pm_eptp != 0); |
563 | |
564 | return TRUE; |
565 | } |
566 | |
567 | void hv_ept_pmap_create(void **ept_pmap, void **eptp); |
568 | |
569 | #if NCOPY_WINDOWS > 0 |
570 | #define PMAP_PDPT_FIRST_WINDOW 0 |
571 | #define PMAP_PDPT_NWINDOWS 4 |
572 | #define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS) |
573 | #define PMAP_PDE_NWINDOWS 4 |
574 | #define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS) |
575 | #define PMAP_PTE_NWINDOWS 4 |
576 | |
577 | #define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS) |
578 | #define PMAP_WINDOW_SIZE 8 |
579 | #define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE) |
580 | |
581 | typedef struct { |
582 | pt_entry_t *prv_CMAP; |
583 | caddr_t prv_CADDR; |
584 | } mapwindow_t; |
585 | |
586 | typedef struct cpu_pmap { |
587 | int pdpt_window_index; |
588 | int pde_window_index; |
589 | int pte_window_index; |
590 | mapwindow_t mapwindow[PMAP_NWINDOWS]; |
591 | } cpu_pmap_t; |
592 | |
593 | |
594 | extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry); |
595 | extern void pmap_put_mapwindow(mapwindow_t *map); |
596 | #endif |
597 | |
598 | typedef struct pmap_memory_regions { |
599 | ppnum_t base; /* first page of this region */ |
600 | ppnum_t alloc_up; /* pages below this one have been "stolen" */ |
601 | ppnum_t alloc_down; /* pages above this one have been "stolen" */ |
602 | ppnum_t end; /* last page of this region */ |
603 | uint32_t type; |
604 | uint64_t attribute; |
605 | } pmap_memory_region_t; |
606 | |
607 | extern unsigned pmap_memory_region_count; |
608 | extern unsigned pmap_memory_region_current; |
609 | |
610 | #define PMAP_MEMORY_REGIONS_SIZE 128 |
611 | |
612 | extern pmap_memory_region_t pmap_memory_regions[]; |
613 | #include <i386/pmap_pcid.h> |
614 | |
615 | static inline void |
616 | set_dirbase(pmap_t tpmap, thread_t thread, int my_cpu) { |
617 | int ccpu = my_cpu; |
618 | uint64_t pcr3 = tpmap->pm_cr3, ucr3 = tpmap->pm_ucr3; |
619 | cpu_datap(ccpu)->cpu_task_cr3 = pcr3; |
620 | cpu_shadowp(ccpu)->cpu_task_cr3 = pcr3; |
621 | |
622 | cpu_datap(ccpu)->cpu_ucr3 = ucr3; |
623 | cpu_shadowp(ccpu)->cpu_ucr3 = ucr3; |
624 | |
625 | cpu_datap(ccpu)->cpu_task_map = cpu_shadowp(ccpu)->cpu_task_map = |
626 | tpmap->pm_task_map; |
627 | |
628 | assert((get_preemption_level() > 0) || (ml_get_interrupts_enabled() == FALSE)); |
629 | assert(ccpu == cpu_number()); |
630 | /* |
631 | * Switch cr3 if necessary |
632 | * - unless running with no_shared_cr3 debugging mode |
633 | * and we're not on the kernel's cr3 (after pre-empted copyio) |
634 | */ |
635 | boolean_t nopagezero = tpmap->pagezero_accessible; |
636 | boolean_t priorpagezero = cpu_datap(ccpu)->cpu_pagezero_mapped; |
637 | cpu_datap(ccpu)->cpu_pagezero_mapped = nopagezero; |
638 | |
639 | if (__probable(!no_shared_cr3)) { |
640 | if (__improbable(nopagezero)) { |
641 | boolean_t copyio_active = ((thread->machine.specFlags & CopyIOActive) != 0); |
642 | if (pmap_pcid_ncpus) { |
643 | pmap_pcid_activate(tpmap, ccpu, TRUE, copyio_active); |
644 | } else { |
645 | if (copyio_active) { |
646 | if (get_cr3_base() != tpmap->pm_cr3) { |
647 | set_cr3_raw(tpmap->pm_cr3); |
648 | } |
649 | } else if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) { |
650 | set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3); |
651 | } |
652 | } |
653 | } else if ((get_cr3_base() != tpmap->pm_cr3) || priorpagezero) { |
654 | if (pmap_pcid_ncpus) { |
655 | pmap_pcid_activate(tpmap, ccpu, FALSE, FALSE); |
656 | } else { |
657 | set_cr3_raw(tpmap->pm_cr3); |
658 | } |
659 | } |
660 | } else { |
661 | if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) |
662 | set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3); |
663 | } |
664 | } |
665 | |
666 | /* |
667 | * External declarations for PMAP_ACTIVATE. |
668 | */ |
669 | |
670 | extern void process_pmap_updates(void); |
671 | extern void pmap_update_interrupt(void); |
672 | |
673 | extern addr64_t (kvtophys)( |
674 | vm_offset_t addr); |
675 | |
676 | extern kern_return_t pmap_expand( |
677 | pmap_t pmap, |
678 | vm_map_offset_t addr, |
679 | unsigned int options); |
680 | extern vm_offset_t pmap_map( |
681 | vm_offset_t virt, |
682 | vm_map_offset_t start, |
683 | vm_map_offset_t end, |
684 | vm_prot_t prot, |
685 | unsigned int flags); |
686 | |
687 | extern vm_offset_t pmap_map_bd( |
688 | vm_offset_t virt, |
689 | vm_map_offset_t start, |
690 | vm_map_offset_t end, |
691 | vm_prot_t prot, |
692 | unsigned int flags); |
693 | extern void pmap_bootstrap( |
694 | vm_offset_t load_start, |
695 | boolean_t IA32e); |
696 | |
697 | extern boolean_t pmap_valid_page( |
698 | ppnum_t pn); |
699 | |
700 | extern int pmap_list_resident_pages( |
701 | struct pmap *pmap, |
702 | vm_offset_t *listp, |
703 | int space); |
704 | extern void x86_filter_TLB_coherency_interrupts(boolean_t); |
705 | /* |
706 | * Get cache attributes (as pagetable bits) for the specified phys page |
707 | */ |
708 | extern unsigned pmap_get_cache_attributes(ppnum_t, boolean_t is_ept); |
709 | #if NCOPY_WINDOWS > 0 |
710 | extern struct cpu_pmap *pmap_cpu_alloc( |
711 | boolean_t is_boot_cpu); |
712 | extern void pmap_cpu_free( |
713 | struct cpu_pmap *cp); |
714 | #endif |
715 | |
716 | extern kern_return_t pmap_map_block( |
717 | pmap_t pmap, |
718 | addr64_t va, |
719 | ppnum_t pa, |
720 | uint32_t size, |
721 | vm_prot_t prot, |
722 | int attr, |
723 | unsigned int flags); |
724 | |
725 | extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); |
726 | extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); |
727 | extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); |
728 | |
729 | extern void pmap_cpu_init(void); |
730 | extern void pmap_disable_NX(pmap_t pmap); |
731 | |
732 | extern void pt_fake_zone_init(int); |
733 | extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, |
734 | uint64_t *, int *, int *, int *); |
735 | extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2)); |
736 | |
737 | /* |
738 | * Macros for speed. |
739 | */ |
740 | |
741 | |
742 | #include <kern/spl.h> |
743 | |
744 | |
745 | #define PMAP_ACTIVATE_MAP(map, thread, my_cpu) { \ |
746 | pmap_t tpmap; \ |
747 | \ |
748 | tpmap = vm_map_pmap(map); \ |
749 | set_dirbase(tpmap, thread, my_cpu); \ |
750 | } |
751 | |
752 | #if defined(__x86_64__) |
753 | #define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \ |
754 | pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible); |
755 | #else |
756 | #define PMAP_DEACTIVATE_MAP(map, thread) |
757 | #endif |
758 | |
759 | #if NCOPY_WINDOWS > 0 |
760 | #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ |
761 | spl_t spl; \ |
762 | \ |
763 | spl = splhigh(); \ |
764 | PMAP_DEACTIVATE_MAP(th->map, th); \ |
765 | th->map = new_map; \ |
766 | PMAP_ACTIVATE_MAP(th->map, th); \ |
767 | splx(spl); \ |
768 | inval_copy_windows(th); \ |
769 | } |
770 | #else |
771 | #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ |
772 | spl_t spl; \ |
773 | \ |
774 | spl = splhigh(); \ |
775 | PMAP_DEACTIVATE_MAP(th->map, th, my_cpu); \ |
776 | th->map = new_map; \ |
777 | PMAP_ACTIVATE_MAP(th->map, th, my_cpu); \ |
778 | splx(spl); \ |
779 | } |
780 | #endif |
781 | |
782 | /* |
783 | * Marking the current cpu's cr3 inactive is achieved by setting its lsb. |
784 | * Marking the current cpu's cr3 active once more involves clearng this bit. |
785 | * Note that valid page tables are page-aligned and so the bottom 12 bits |
786 | * are normally zero, modulo PCID. |
787 | * We can only mark the current cpu active/inactive but we can test any cpu. |
788 | */ |
789 | #define CPU_CR3_MARK_INACTIVE() \ |
790 | current_cpu_datap()->cpu_active_cr3 |= 1 |
791 | |
792 | #define CPU_CR3_MARK_ACTIVE() \ |
793 | current_cpu_datap()->cpu_active_cr3 &= ~1 |
794 | |
795 | #define CPU_CR3_IS_ACTIVE(cpu) \ |
796 | ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0) |
797 | |
798 | #define CPU_GET_ACTIVE_CR3(cpu) \ |
799 | (cpu_datap(cpu)->cpu_active_cr3 & ~1) |
800 | |
801 | #define CPU_GET_TASK_CR3(cpu) \ |
802 | (cpu_datap(cpu)->cpu_task_cr3) |
803 | |
804 | /* |
805 | * Mark this cpu idle, and remove it from the active set, |
806 | * since it is not actively using any pmap. Signal_cpus |
807 | * will notice that it is idle, and avoid signaling it, |
808 | * but will queue the update request for when the cpu |
809 | * becomes active. |
810 | */ |
811 | #define MARK_CPU_IDLE(my_cpu) { \ |
812 | assert(ml_get_interrupts_enabled() == FALSE); \ |
813 | CPU_CR3_MARK_INACTIVE(); \ |
814 | mfence(); \ |
815 | } |
816 | |
817 | #define MARK_CPU_ACTIVE(my_cpu) { \ |
818 | assert(ml_get_interrupts_enabled() == FALSE); \ |
819 | /* \ |
820 | * If a kernel_pmap update was requested while this cpu \ |
821 | * was idle, process it as if we got the interrupt. \ |
822 | * Before doing so, remove this cpu from the idle set. \ |
823 | * Since we do not grab any pmap locks while we flush \ |
824 | * our TLB, another cpu may start an update operation \ |
825 | * before we finish. Removing this cpu from the idle \ |
826 | * set assures that we will receive another update \ |
827 | * interrupt if this happens. \ |
828 | */ \ |
829 | CPU_CR3_MARK_ACTIVE(); \ |
830 | mfence(); \ |
831 | \ |
832 | if (current_cpu_datap()->cpu_tlb_invalid) \ |
833 | process_pmap_updates(); \ |
834 | } |
835 | |
836 | #define PMAP_CONTEXT(pmap, thread) |
837 | |
838 | #define pmap_kernel_va(VA) \ |
839 | ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \ |
840 | (((vm_offset_t) (VA)) <= vm_max_kernel_address)) |
841 | |
842 | |
843 | #define pmap_compressed(pmap) ((pmap)->stats.compressed) |
844 | #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) |
845 | #define pmap_resident_max(pmap) ((pmap)->stats.resident_max) |
846 | #define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr) |
847 | #define pmap_attribute(pmap,addr,size,attr,value) \ |
848 | (KERN_INVALID_ADDRESS) |
849 | #define pmap_attribute_cache_sync(addr,size,attr,value) \ |
850 | (KERN_INVALID_ADDRESS) |
851 | |
852 | #define MACHINE_PMAP_IS_EMPTY 1 |
853 | extern boolean_t pmap_is_empty(pmap_t pmap, |
854 | vm_map_offset_t start, |
855 | vm_map_offset_t end); |
856 | |
857 | #define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */ |
858 | |
859 | kern_return_t |
860 | pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t); |
861 | |
862 | #if MACH_ASSERT |
863 | extern int pmap_stats_assert; |
864 | #define PMAP_STATS_ASSERTF(args) \ |
865 | MACRO_BEGIN \ |
866 | if (pmap_stats_assert) assertf args; \ |
867 | MACRO_END |
868 | #else /* MACH_ASSERT */ |
869 | #define PMAP_STATS_ASSERTF(args) |
870 | #endif /* MACH_ASSERT */ |
871 | #endif /* ASSEMBLER */ |
872 | #endif /* _PMAP_MACHINE_ */ |
873 | #endif /* KERNEL_PRIVATE */ |
874 | |