| 1 | /* |
| 2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * @OSF_COPYRIGHT@ |
| 30 | */ |
| 31 | /* |
| 32 | * Mach Operating System |
| 33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
| 34 | * All Rights Reserved. |
| 35 | * |
| 36 | * Permission to use, copy, modify and distribute this software and its |
| 37 | * documentation is hereby granted, provided that both the copyright |
| 38 | * notice and this permission notice appear in all copies of the |
| 39 | * software, derivative works or modified versions, and any portions |
| 40 | * thereof, and that both notices appear in supporting documentation. |
| 41 | * |
| 42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
| 44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 45 | * |
| 46 | * Carnegie Mellon requests users of this software to return to |
| 47 | * |
| 48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 49 | * School of Computer Science |
| 50 | * Carnegie Mellon University |
| 51 | * Pittsburgh PA 15213-3890 |
| 52 | * |
| 53 | * any improvements or extensions that they make and grant Carnegie Mellon |
| 54 | * the rights to redistribute these changes. |
| 55 | */ |
| 56 | /* |
| 57 | */ |
| 58 | /* |
| 59 | * File: kern/zalloc.c |
| 60 | * Author: Avadis Tevanian, Jr. |
| 61 | * |
| 62 | * Zone-based memory allocator. A zone is a collection of fixed size |
| 63 | * data blocks for which quick allocation/deallocation is possible. |
| 64 | */ |
| 65 | #include <zone_debug.h> |
| 66 | |
| 67 | #include <mach/mach_types.h> |
| 68 | #include <mach/vm_param.h> |
| 69 | #include <mach/kern_return.h> |
| 70 | #include <mach/mach_host_server.h> |
| 71 | #include <mach/task_server.h> |
| 72 | #include <mach/machine/vm_types.h> |
| 73 | #include <mach/vm_map.h> |
| 74 | #include <mach/sdt.h> |
| 75 | |
| 76 | #include <kern/bits.h> |
| 77 | #include <kern/kern_types.h> |
| 78 | #include <kern/assert.h> |
| 79 | #include <kern/backtrace.h> |
| 80 | #include <kern/host.h> |
| 81 | #include <kern/macro_help.h> |
| 82 | #include <kern/sched.h> |
| 83 | #include <kern/locks.h> |
| 84 | #include <kern/sched_prim.h> |
| 85 | #include <kern/misc_protos.h> |
| 86 | #include <kern/thread_call.h> |
| 87 | #include <kern/zalloc.h> |
| 88 | #include <kern/kalloc.h> |
| 89 | |
| 90 | #include <prng/random.h> |
| 91 | |
| 92 | #include <vm/pmap.h> |
| 93 | #include <vm/vm_map.h> |
| 94 | #include <vm/vm_kern.h> |
| 95 | #include <vm/vm_page.h> |
| 96 | |
| 97 | #include <pexpert/pexpert.h> |
| 98 | |
| 99 | #include <machine/machparam.h> |
| 100 | #include <machine/machine_routines.h> /* ml_cpu_get_info */ |
| 101 | |
| 102 | #include <libkern/OSDebug.h> |
| 103 | #include <libkern/OSAtomic.h> |
| 104 | #include <libkern/section_keywords.h> |
| 105 | #include <sys/kdebug.h> |
| 106 | |
| 107 | #include <san/kasan.h> |
| 108 | |
| 109 | /* |
| 110 | * ZONE_ALIAS_ADDR (deprecated) |
| 111 | */ |
| 112 | |
| 113 | #define from_zone_map(addr, size) \ |
| 114 | ((vm_offset_t)(addr) >= zone_map_min_address && \ |
| 115 | ((vm_offset_t)(addr) + size - 1) < zone_map_max_address ) |
| 116 | |
| 117 | /* |
| 118 | * Zone Corruption Debugging |
| 119 | * |
| 120 | * We use three techniques to detect modification of a zone element |
| 121 | * after it's been freed. |
| 122 | * |
| 123 | * (1) Check the freelist next pointer for sanity. |
| 124 | * (2) Store a backup of the next pointer at the end of the element, |
| 125 | * and compare it to the primary next pointer when the element is allocated |
| 126 | * to detect corruption of the freelist due to use-after-free bugs. |
| 127 | * The backup pointer is also XORed with a per-boot random cookie. |
| 128 | * (3) Poison the freed element by overwriting it with 0xdeadbeef, |
| 129 | * and check for that value when the element is being reused to make sure |
| 130 | * no part of the element has been modified while it was on the freelist. |
| 131 | * This will also help catch read-after-frees, as code will now dereference |
| 132 | * 0xdeadbeef instead of a valid but freed pointer. |
| 133 | * |
| 134 | * (1) and (2) occur for every allocation and free to a zone. |
| 135 | * This is done to make it slightly more difficult for an attacker to |
| 136 | * manipulate the freelist to behave in a specific way. |
| 137 | * |
| 138 | * Poisoning (3) occurs periodically for every N frees (counted per-zone) |
| 139 | * and on every free for zones smaller than a cacheline. If -zp |
| 140 | * is passed as a boot arg, poisoning occurs for every free. |
| 141 | * |
| 142 | * Performance slowdown is inversely proportional to the frequency of poisoning, |
| 143 | * with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32 |
| 144 | * and higher. You can expect to find a 100% reproducible bug in an average of |
| 145 | * N tries, with a standard deviation of about N, but you will want to set |
| 146 | * "-zp" to always poison every free if you are attempting to reproduce |
| 147 | * a known bug. |
| 148 | * |
| 149 | * For a more heavyweight, but finer-grained method of detecting misuse |
| 150 | * of zone memory, look up the "Guard mode" zone allocator in gzalloc.c. |
| 151 | * |
| 152 | * Zone Corruption Logging |
| 153 | * |
| 154 | * You can also track where corruptions come from by using the boot-arguments |
| 155 | * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later |
| 156 | * in this document for more implementation and usage information. |
| 157 | * |
| 158 | * Zone Leak Detection |
| 159 | * |
| 160 | * To debug leaks of zone memory, use the zone leak detection tool 'zleaks' |
| 161 | * found later in this file via the showtopztrace and showz* macros in kgmacros, |
| 162 | * or use zlog without the -zc argument. |
| 163 | * |
| 164 | */ |
| 165 | |
| 166 | /* Returns TRUE if we rolled over the counter at factor */ |
| 167 | static inline boolean_t |
| 168 | sample_counter(volatile uint32_t * count_p, uint32_t factor) |
| 169 | { |
| 170 | uint32_t old_count, new_count; |
| 171 | boolean_t rolled_over; |
| 172 | |
| 173 | do { |
| 174 | new_count = old_count = *count_p; |
| 175 | |
| 176 | if (++new_count >= factor) { |
| 177 | rolled_over = TRUE; |
| 178 | new_count = 0; |
| 179 | } else { |
| 180 | rolled_over = FALSE; |
| 181 | } |
| 182 | |
| 183 | } while (!OSCompareAndSwap(old_count, new_count, count_p)); |
| 184 | |
| 185 | return rolled_over; |
| 186 | } |
| 187 | |
| 188 | #if defined(__LP64__) |
| 189 | #define ZP_POISON 0xdeadbeefdeadbeef |
| 190 | #else |
| 191 | #define ZP_POISON 0xdeadbeef |
| 192 | #endif |
| 193 | |
| 194 | boolean_t zfree_poison_element(zone_t zone, vm_offset_t elem); |
| 195 | void zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr); |
| 196 | |
| 197 | #define ZP_DEFAULT_SAMPLING_FACTOR 16 |
| 198 | #define ZP_DEFAULT_SCALE_FACTOR 4 |
| 199 | |
| 200 | /* |
| 201 | * A zp_factor of 0 indicates zone poisoning is disabled, |
| 202 | * however, we still poison zones smaller than zp_tiny_zone_limit (a cacheline). |
| 203 | * Passing the -no-zp boot-arg disables even this behavior. |
| 204 | * In all cases, we record and check the integrity of a backup pointer. |
| 205 | */ |
| 206 | |
| 207 | /* set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled */ |
| 208 | #if DEBUG |
| 209 | #define DEFAULT_ZP_FACTOR (1) |
| 210 | #else |
| 211 | #define DEFAULT_ZP_FACTOR (0) |
| 212 | #endif |
| 213 | uint32_t zp_factor = DEFAULT_ZP_FACTOR; |
| 214 | |
| 215 | /* set by zp-scale=N boot arg, scales zp_factor by zone size */ |
| 216 | uint32_t zp_scale = 0; |
| 217 | |
| 218 | /* set in zp_init, zero indicates -no-zp boot-arg */ |
| 219 | vm_size_t zp_tiny_zone_limit = 0; |
| 220 | |
| 221 | /* initialized to a per-boot random value in zp_init */ |
| 222 | uintptr_t zp_poisoned_cookie = 0; |
| 223 | uintptr_t zp_nopoison_cookie = 0; |
| 224 | |
| 225 | #if VM_MAX_TAG_ZONES |
| 226 | boolean_t zone_tagging_on; |
| 227 | #endif /* VM_MAX_TAG_ZONES */ |
| 228 | |
| 229 | SECURITY_READ_ONLY_LATE(boolean_t) copyio_zalloc_check = TRUE; |
| 230 | static struct bool_gen zone_bool_gen; |
| 231 | |
| 232 | /* |
| 233 | * initialize zone poisoning |
| 234 | * called from zone_bootstrap before any allocations are made from zalloc |
| 235 | */ |
| 236 | static inline void |
| 237 | zp_init(void) |
| 238 | { |
| 239 | char temp_buf[16]; |
| 240 | |
| 241 | /* |
| 242 | * Initialize backup pointer random cookie for poisoned elements |
| 243 | * Try not to call early_random() back to back, it may return |
| 244 | * the same value if mach_absolute_time doesn't have sufficient time |
| 245 | * to tick over between calls. <rdar://problem/11597395> |
| 246 | * (This is only a problem on embedded devices) |
| 247 | */ |
| 248 | zp_poisoned_cookie = (uintptr_t) early_random(); |
| 249 | |
| 250 | /* |
| 251 | * Always poison zones smaller than a cacheline, |
| 252 | * because it's pretty close to free |
| 253 | */ |
| 254 | ml_cpu_info_t cpu_info; |
| 255 | ml_cpu_get_info(&cpu_info); |
| 256 | zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size; |
| 257 | |
| 258 | zp_factor = ZP_DEFAULT_SAMPLING_FACTOR; |
| 259 | zp_scale = ZP_DEFAULT_SCALE_FACTOR; |
| 260 | |
| 261 | //TODO: Bigger permutation? |
| 262 | /* |
| 263 | * Permute the default factor +/- 1 to make it less predictable |
| 264 | * This adds or subtracts ~4 poisoned objects per 1000 frees. |
| 265 | */ |
| 266 | if (zp_factor != 0) { |
| 267 | uint32_t rand_bits = early_random() & 0x3; |
| 268 | |
| 269 | if (rand_bits == 0x1) |
| 270 | zp_factor += 1; |
| 271 | else if (rand_bits == 0x2) |
| 272 | zp_factor -= 1; |
| 273 | /* if 0x0 or 0x3, leave it alone */ |
| 274 | } |
| 275 | |
| 276 | /* -zp: enable poisoning for every alloc and free */ |
| 277 | if (PE_parse_boot_argn("-zp" , temp_buf, sizeof(temp_buf))) { |
| 278 | zp_factor = 1; |
| 279 | } |
| 280 | |
| 281 | /* -no-zp: disable poisoning completely even for tiny zones */ |
| 282 | if (PE_parse_boot_argn("-no-zp" , temp_buf, sizeof(temp_buf))) { |
| 283 | zp_factor = 0; |
| 284 | zp_tiny_zone_limit = 0; |
| 285 | printf("Zone poisoning disabled\n" ); |
| 286 | } |
| 287 | |
| 288 | /* zp-factor=XXXX: override how often to poison freed zone elements */ |
| 289 | if (PE_parse_boot_argn("zp-factor" , &zp_factor, sizeof(zp_factor))) { |
| 290 | printf("Zone poisoning factor override: %u\n" , zp_factor); |
| 291 | } |
| 292 | |
| 293 | /* zp-scale=XXXX: override how much zone size scales zp-factor by */ |
| 294 | if (PE_parse_boot_argn("zp-scale" , &zp_scale, sizeof(zp_scale))) { |
| 295 | printf("Zone poisoning scale factor override: %u\n" , zp_scale); |
| 296 | } |
| 297 | |
| 298 | /* Initialize backup pointer random cookie for unpoisoned elements */ |
| 299 | zp_nopoison_cookie = (uintptr_t) early_random(); |
| 300 | |
| 301 | #if MACH_ASSERT |
| 302 | if (zp_poisoned_cookie == zp_nopoison_cookie) |
| 303 | panic("early_random() is broken: %p and %p are not random\n" , |
| 304 | (void *) zp_poisoned_cookie, (void *) zp_nopoison_cookie); |
| 305 | #endif |
| 306 | |
| 307 | /* |
| 308 | * Use the last bit in the backup pointer to hint poisoning state |
| 309 | * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so |
| 310 | * the low bits are zero. |
| 311 | */ |
| 312 | zp_poisoned_cookie |= (uintptr_t)0x1ULL; |
| 313 | zp_nopoison_cookie &= ~((uintptr_t)0x1ULL); |
| 314 | |
| 315 | #if defined(__LP64__) |
| 316 | /* |
| 317 | * Make backup pointers more obvious in GDB for 64 bit |
| 318 | * by making OxFFFFFF... ^ cookie = 0xFACADE... |
| 319 | * (0xFACADE = 0xFFFFFF ^ 0x053521) |
| 320 | * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011) |
| 321 | * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked |
| 322 | * by the sanity check, so it's OK for that part of the cookie to be predictable. |
| 323 | * |
| 324 | * TODO: Use #defines, xors, and shifts |
| 325 | */ |
| 326 | |
| 327 | zp_poisoned_cookie &= 0x000000FFFFFFFFFF; |
| 328 | zp_poisoned_cookie |= 0x0535210000000000; /* 0xFACADE */ |
| 329 | |
| 330 | zp_nopoison_cookie &= 0x000000FFFFFFFFFF; |
| 331 | zp_nopoison_cookie |= 0x3f00110000000000; /* 0xC0FFEE */ |
| 332 | #endif |
| 333 | } |
| 334 | |
| 335 | /* |
| 336 | * These macros are used to keep track of the number |
| 337 | * of pages being used by the zone currently. The |
| 338 | * z->page_count is not protected by the zone lock. |
| 339 | */ |
| 340 | #define ZONE_PAGE_COUNT_INCR(z, count) \ |
| 341 | { \ |
| 342 | OSAddAtomic64(count, &(z->page_count)); \ |
| 343 | } |
| 344 | |
| 345 | #define ZONE_PAGE_COUNT_DECR(z, count) \ |
| 346 | { \ |
| 347 | OSAddAtomic64(-count, &(z->page_count)); \ |
| 348 | } |
| 349 | |
| 350 | vm_map_t zone_map = VM_MAP_NULL; |
| 351 | |
| 352 | /* for is_sane_zone_element and garbage collection */ |
| 353 | |
| 354 | vm_offset_t zone_map_min_address = 0; /* initialized in zone_init */ |
| 355 | vm_offset_t zone_map_max_address = 0; |
| 356 | |
| 357 | /* Globals for random boolean generator for elements in free list */ |
| 358 | #define MAX_ENTROPY_PER_ZCRAM 4 |
| 359 | |
| 360 | /* VM region for all metadata structures */ |
| 361 | vm_offset_t zone_metadata_region_min = 0; |
| 362 | vm_offset_t zone_metadata_region_max = 0; |
| 363 | decl_lck_mtx_data(static ,zone_metadata_region_lck) |
| 364 | lck_attr_t zone_metadata_lock_attr; |
| 365 | lck_mtx_ext_t zone_metadata_region_lck_ext; |
| 366 | |
| 367 | /* Helpful for walking through a zone's free element list. */ |
| 368 | struct zone_free_element { |
| 369 | struct zone_free_element *next; |
| 370 | /* ... */ |
| 371 | /* void *backup_ptr; */ |
| 372 | }; |
| 373 | |
| 374 | #if CONFIG_ZCACHE |
| 375 | |
| 376 | #if !CONFIG_GZALLOC |
| 377 | bool use_caching = TRUE; |
| 378 | #else |
| 379 | bool use_caching = FALSE; |
| 380 | #endif /* !CONFIG_GZALLOC */ |
| 381 | |
| 382 | /* |
| 383 | * Decides whether per-cpu zone caching is to be enabled for all zones. |
| 384 | * Can be set to TRUE via the boot-arg '-zcache_all'. |
| 385 | */ |
| 386 | bool cache_all_zones = FALSE; |
| 387 | |
| 388 | /* |
| 389 | * Specifies a single zone to enable CPU caching for. |
| 390 | * Can be set using boot-args: zcc_enable_for_zone_name=<zone> |
| 391 | */ |
| 392 | static char cache_zone_name[MAX_ZONE_NAME]; |
| 393 | |
| 394 | static inline bool zone_caching_enabled(zone_t z) |
| 395 | { |
| 396 | return (z->cpu_cache_enabled && !z->tags && !z->zleak_on); |
| 397 | } |
| 398 | |
| 399 | #endif /* CONFIG_ZCACHE */ |
| 400 | |
| 401 | /* |
| 402 | * Protects zone_array, num_zones, num_zones_in_use, and zone_empty_bitmap |
| 403 | */ |
| 404 | decl_simple_lock_data(, all_zones_lock) |
| 405 | unsigned int num_zones_in_use; |
| 406 | unsigned int num_zones; |
| 407 | |
| 408 | #define MAX_ZONES 320 |
| 409 | struct zone zone_array[MAX_ZONES]; |
| 410 | |
| 411 | /* Used to keep track of empty slots in the zone_array */ |
| 412 | bitmap_t zone_empty_bitmap[BITMAP_LEN(MAX_ZONES)]; |
| 413 | |
| 414 | #if DEBUG || DEVELOPMENT |
| 415 | /* |
| 416 | * Used for sysctl kern.run_zone_test which is not thread-safe. Ensure only one thread goes through at a time. |
| 417 | * Or we can end up with multiple test zones (if a second zinit() comes through before zdestroy()), which could lead us to |
| 418 | * run out of zones. |
| 419 | */ |
| 420 | decl_simple_lock_data(, zone_test_lock) |
| 421 | static boolean_t zone_test_running = FALSE; |
| 422 | static zone_t test_zone_ptr = NULL; |
| 423 | #endif /* DEBUG || DEVELOPMENT */ |
| 424 | |
| 425 | #define PAGE_METADATA_GET_ZINDEX(page_meta) \ |
| 426 | (page_meta->zindex) |
| 427 | |
| 428 | #define PAGE_METADATA_GET_ZONE(page_meta) \ |
| 429 | (&(zone_array[page_meta->zindex])) |
| 430 | |
| 431 | #define PAGE_METADATA_SET_ZINDEX(page_meta, index) \ |
| 432 | page_meta->zindex = (index); |
| 433 | |
| 434 | struct zone_page_metadata { |
| 435 | queue_chain_t pages; /* linkage pointer for metadata lists */ |
| 436 | |
| 437 | /* Union for maintaining start of element free list and real metadata (for multipage allocations) */ |
| 438 | union { |
| 439 | /* |
| 440 | * The start of the freelist can be maintained as a 32-bit offset instead of a pointer because |
| 441 | * the free elements would be at max ZONE_MAX_ALLOC_SIZE bytes away from the metadata. Offset |
| 442 | * from start of the allocation chunk to free element list head. |
| 443 | */ |
| 444 | uint32_t freelist_offset; |
| 445 | /* |
| 446 | * This field is used to lookup the real metadata for multipage allocations, where we mark the |
| 447 | * metadata for all pages except the first as "fake" metadata using MULTIPAGE_METADATA_MAGIC. |
| 448 | * Offset from this fake metadata to real metadata of allocation chunk (-ve offset). |
| 449 | */ |
| 450 | uint32_t real_metadata_offset; |
| 451 | }; |
| 452 | |
| 453 | /* |
| 454 | * For the first page in the allocation chunk, this represents the total number of free elements in |
| 455 | * the chunk. |
| 456 | */ |
| 457 | uint16_t free_count; |
| 458 | unsigned zindex : ZINDEX_BITS; /* Zone index within the zone_array */ |
| 459 | unsigned page_count : PAGECOUNT_BITS; /* Count of pages within the allocation chunk */ |
| 460 | }; |
| 461 | |
| 462 | /* Macro to get page index (within zone_map) of page containing element */ |
| 463 | #define PAGE_INDEX_FOR_ELEMENT(element) \ |
| 464 | (((vm_offset_t)trunc_page(element) - zone_map_min_address) / PAGE_SIZE) |
| 465 | |
| 466 | /* Macro to get metadata structure given a page index in zone_map */ |
| 467 | #define PAGE_METADATA_FOR_PAGE_INDEX(index) \ |
| 468 | (zone_metadata_region_min + ((index) * sizeof(struct zone_page_metadata))) |
| 469 | |
| 470 | /* Macro to get index (within zone_map) for given metadata */ |
| 471 | #define PAGE_INDEX_FOR_METADATA(page_meta) \ |
| 472 | (((vm_offset_t)page_meta - zone_metadata_region_min) / sizeof(struct zone_page_metadata)) |
| 473 | |
| 474 | /* Macro to get page for given page index in zone_map */ |
| 475 | #define PAGE_FOR_PAGE_INDEX(index) \ |
| 476 | (zone_map_min_address + (PAGE_SIZE * (index))) |
| 477 | |
| 478 | /* Macro to get the actual metadata for a given address */ |
| 479 | #define PAGE_METADATA_FOR_ELEMENT(element) \ |
| 480 | (struct zone_page_metadata *)(PAGE_METADATA_FOR_PAGE_INDEX(PAGE_INDEX_FOR_ELEMENT(element))) |
| 481 | |
| 482 | /* Magic value to indicate empty element free list */ |
| 483 | #define PAGE_METADATA_EMPTY_FREELIST ((uint32_t)(~0)) |
| 484 | |
| 485 | vm_map_copy_t create_vm_map_copy(vm_offset_t start_addr, vm_size_t total_size, vm_size_t used_size); |
| 486 | boolean_t get_zone_info(zone_t z, mach_zone_name_t *zn, mach_zone_info_t *zi); |
| 487 | boolean_t is_zone_map_nearing_exhaustion(void); |
| 488 | extern void vm_pageout_garbage_collect(int collect); |
| 489 | |
| 490 | static inline void * |
| 491 | page_metadata_get_freelist(struct zone_page_metadata *page_meta) |
| 492 | { |
| 493 | assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC); |
| 494 | if (page_meta->freelist_offset == PAGE_METADATA_EMPTY_FREELIST) |
| 495 | return NULL; |
| 496 | else { |
| 497 | if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) |
| 498 | return (void *)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta)) + page_meta->freelist_offset); |
| 499 | else |
| 500 | return (void *)((vm_offset_t)page_meta + page_meta->freelist_offset); |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | static inline void |
| 505 | page_metadata_set_freelist(struct zone_page_metadata *page_meta, void *addr) |
| 506 | { |
| 507 | assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC); |
| 508 | if (addr == NULL) |
| 509 | page_meta->freelist_offset = PAGE_METADATA_EMPTY_FREELIST; |
| 510 | else { |
| 511 | if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) |
| 512 | page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta))); |
| 513 | else |
| 514 | page_meta->freelist_offset = (uint32_t)((vm_offset_t)(addr) - (vm_offset_t)page_meta); |
| 515 | } |
| 516 | } |
| 517 | |
| 518 | static inline struct zone_page_metadata * |
| 519 | page_metadata_get_realmeta(struct zone_page_metadata *page_meta) |
| 520 | { |
| 521 | assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC); |
| 522 | return (struct zone_page_metadata *)((vm_offset_t)page_meta - page_meta->real_metadata_offset); |
| 523 | } |
| 524 | |
| 525 | static inline void |
| 526 | page_metadata_set_realmeta(struct zone_page_metadata *page_meta, struct zone_page_metadata *real_meta) |
| 527 | { |
| 528 | assert(PAGE_METADATA_GET_ZINDEX(page_meta) == MULTIPAGE_METADATA_MAGIC); |
| 529 | assert(PAGE_METADATA_GET_ZINDEX(real_meta) != MULTIPAGE_METADATA_MAGIC); |
| 530 | assert((vm_offset_t)page_meta > (vm_offset_t)real_meta); |
| 531 | vm_offset_t offset = (vm_offset_t)page_meta - (vm_offset_t)real_meta; |
| 532 | assert(offset <= UINT32_MAX); |
| 533 | page_meta->real_metadata_offset = (uint32_t)offset; |
| 534 | } |
| 535 | |
| 536 | /* The backup pointer is stored in the last pointer-sized location in an element. */ |
| 537 | static inline vm_offset_t * |
| 538 | get_backup_ptr(vm_size_t elem_size, |
| 539 | vm_offset_t *element) |
| 540 | { |
| 541 | return (vm_offset_t *) ((vm_offset_t)element + elem_size - sizeof(vm_offset_t)); |
| 542 | } |
| 543 | |
| 544 | /* |
| 545 | * Routine to populate a page backing metadata in the zone_metadata_region. |
| 546 | * Must be called without the zone lock held as it might potentially block. |
| 547 | */ |
| 548 | static inline void |
| 549 | zone_populate_metadata_page(struct zone_page_metadata *page_meta) |
| 550 | { |
| 551 | vm_offset_t page_metadata_begin = trunc_page(page_meta); |
| 552 | vm_offset_t page_metadata_end = trunc_page((vm_offset_t)page_meta + sizeof(struct zone_page_metadata)); |
| 553 | |
| 554 | for(;page_metadata_begin <= page_metadata_end; page_metadata_begin += PAGE_SIZE) { |
| 555 | #if !KASAN |
| 556 | /* |
| 557 | * This can race with another thread doing a populate on the same metadata |
| 558 | * page, where we see an updated pmap but unmapped KASan shadow, causing a |
| 559 | * fault in the shadow when we first access the metadata page. Avoid this |
| 560 | * by always synchronizing on the zone_metadata_region lock with KASan. |
| 561 | */ |
| 562 | if (pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) |
| 563 | continue; |
| 564 | #endif |
| 565 | /* All updates to the zone_metadata_region are done under the zone_metadata_region_lck */ |
| 566 | lck_mtx_lock(&zone_metadata_region_lck); |
| 567 | if (0 == pmap_find_phys(kernel_pmap, (vm_map_address_t)page_metadata_begin)) { |
| 568 | kern_return_t __assert_only ret = kernel_memory_populate(zone_map, |
| 569 | page_metadata_begin, |
| 570 | PAGE_SIZE, |
| 571 | KMA_KOBJECT, |
| 572 | VM_KERN_MEMORY_OSFMK); |
| 573 | |
| 574 | /* should not fail with the given arguments */ |
| 575 | assert(ret == KERN_SUCCESS); |
| 576 | } |
| 577 | lck_mtx_unlock(&zone_metadata_region_lck); |
| 578 | } |
| 579 | return; |
| 580 | } |
| 581 | |
| 582 | static inline uint16_t |
| 583 | get_metadata_alloc_count(struct zone_page_metadata *page_meta) |
| 584 | { |
| 585 | assert(PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC); |
| 586 | struct zone *z = PAGE_METADATA_GET_ZONE(page_meta); |
| 587 | return ((page_meta->page_count * PAGE_SIZE) / z->elem_size); |
| 588 | } |
| 589 | |
| 590 | /* |
| 591 | * Routine to lookup metadata for any given address. |
| 592 | * If init is marked as TRUE, this should be called without holding the zone lock |
| 593 | * since the initialization might block. |
| 594 | */ |
| 595 | static inline struct zone_page_metadata * |
| 596 | get_zone_page_metadata(struct zone_free_element *element, boolean_t init) |
| 597 | { |
| 598 | struct zone_page_metadata *page_meta = 0; |
| 599 | |
| 600 | if (from_zone_map(element, sizeof(struct zone_free_element))) { |
| 601 | page_meta = (struct zone_page_metadata *)(PAGE_METADATA_FOR_ELEMENT(element)); |
| 602 | if (init) |
| 603 | zone_populate_metadata_page(page_meta); |
| 604 | } else { |
| 605 | page_meta = (struct zone_page_metadata *)(trunc_page((vm_offset_t)element)); |
| 606 | } |
| 607 | if (init) { |
| 608 | bzero((char *)page_meta, sizeof(struct zone_page_metadata)); |
| 609 | } |
| 610 | return ((PAGE_METADATA_GET_ZINDEX(page_meta) != MULTIPAGE_METADATA_MAGIC) ? page_meta : page_metadata_get_realmeta(page_meta)); |
| 611 | } |
| 612 | |
| 613 | /* Routine to get the page for a given metadata */ |
| 614 | static inline vm_offset_t |
| 615 | get_zone_page(struct zone_page_metadata *page_meta) |
| 616 | { |
| 617 | if (from_zone_map(page_meta, sizeof(struct zone_page_metadata))) |
| 618 | return (vm_offset_t)(PAGE_FOR_PAGE_INDEX(PAGE_INDEX_FOR_METADATA(page_meta))); |
| 619 | else |
| 620 | return (vm_offset_t)(trunc_page(page_meta)); |
| 621 | } |
| 622 | |
| 623 | /* |
| 624 | * ZTAGS |
| 625 | */ |
| 626 | |
| 627 | #if VM_MAX_TAG_ZONES |
| 628 | |
| 629 | // for zones with tagging enabled: |
| 630 | |
| 631 | // calculate a pointer to the tag base entry, |
| 632 | // holding either a uint32_t the first tag offset for a page in the zone map, |
| 633 | // or two uint16_t tags if the page can only hold one or two elements |
| 634 | |
| 635 | #define ZTAGBASE(zone, element) \ |
| 636 | (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_map_min_address)]) |
| 637 | |
| 638 | // pointer to the tag for an element |
| 639 | #define ZTAG(zone, element) \ |
| 640 | ({ \ |
| 641 | vm_tag_t * result; \ |
| 642 | if ((zone)->tags_inline) { \ |
| 643 | result = (vm_tag_t *) ZTAGBASE((zone), (element)); \ |
| 644 | if ((page_mask & element) >= (zone)->elem_size) result++; \ |
| 645 | } else { \ |
| 646 | result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / (zone)->elem_size]; \ |
| 647 | } \ |
| 648 | result; \ |
| 649 | }) |
| 650 | |
| 651 | |
| 652 | static vm_offset_t zone_tagbase_min; |
| 653 | static vm_offset_t zone_tagbase_max; |
| 654 | static vm_offset_t zone_tagbase_map_size; |
| 655 | static vm_map_t zone_tagbase_map; |
| 656 | |
| 657 | static vm_offset_t zone_tags_min; |
| 658 | static vm_offset_t zone_tags_max; |
| 659 | static vm_offset_t zone_tags_map_size; |
| 660 | static vm_map_t zone_tags_map; |
| 661 | |
| 662 | // simple heap allocator for allocating the tags for new memory |
| 663 | |
| 664 | decl_lck_mtx_data(,ztLock) /* heap lock */ |
| 665 | enum |
| 666 | { |
| 667 | ztFreeIndexCount = 8, |
| 668 | ztFreeIndexMax = (ztFreeIndexCount - 1), |
| 669 | ztTagsPerBlock = 4 |
| 670 | }; |
| 671 | |
| 672 | struct ztBlock |
| 673 | { |
| 674 | #if __LITTLE_ENDIAN__ |
| 675 | uint64_t free:1, |
| 676 | next:21, |
| 677 | prev:21, |
| 678 | size:21; |
| 679 | #else |
| 680 | // ztBlock needs free bit least significant |
| 681 | #error !__LITTLE_ENDIAN__ |
| 682 | #endif |
| 683 | }; |
| 684 | typedef struct ztBlock ztBlock; |
| 685 | |
| 686 | static ztBlock * ztBlocks; |
| 687 | static uint32_t ztBlocksCount; |
| 688 | static uint32_t ztBlocksFree; |
| 689 | |
| 690 | static uint32_t |
| 691 | ztLog2up(uint32_t size) |
| 692 | { |
| 693 | if (1 == size) size = 0; |
| 694 | else size = 32 - __builtin_clz(size - 1); |
| 695 | return (size); |
| 696 | } |
| 697 | |
| 698 | static uint32_t |
| 699 | ztLog2down(uint32_t size) |
| 700 | { |
| 701 | size = 31 - __builtin_clz(size); |
| 702 | return (size); |
| 703 | } |
| 704 | |
| 705 | static void |
| 706 | ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags) |
| 707 | { |
| 708 | vm_map_offset_t addr = (vm_map_offset_t) address; |
| 709 | vm_map_offset_t page, end; |
| 710 | |
| 711 | page = trunc_page(addr); |
| 712 | end = round_page(addr + size); |
| 713 | |
| 714 | for (; page < end; page += page_size) |
| 715 | { |
| 716 | if (!pmap_find_phys(kernel_pmap, page)) |
| 717 | { |
| 718 | kern_return_t __unused |
| 719 | ret = kernel_memory_populate(map, page, PAGE_SIZE, |
| 720 | KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG); |
| 721 | assert(ret == KERN_SUCCESS); |
| 722 | } |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | static boolean_t |
| 727 | ztPresent(const void * address, size_t size) |
| 728 | { |
| 729 | vm_map_offset_t addr = (vm_map_offset_t) address; |
| 730 | vm_map_offset_t page, end; |
| 731 | boolean_t result; |
| 732 | |
| 733 | page = trunc_page(addr); |
| 734 | end = round_page(addr + size); |
| 735 | for (result = TRUE; (page < end); page += page_size) |
| 736 | { |
| 737 | result = pmap_find_phys(kernel_pmap, page); |
| 738 | if (!result) break; |
| 739 | } |
| 740 | return (result); |
| 741 | } |
| 742 | |
| 743 | |
| 744 | void __unused |
| 745 | ztDump(boolean_t sanity); |
| 746 | void __unused |
| 747 | ztDump(boolean_t sanity) |
| 748 | { |
| 749 | uint32_t q, cq, p; |
| 750 | |
| 751 | for (q = 0; q <= ztFreeIndexMax; q++) |
| 752 | { |
| 753 | p = q; |
| 754 | do |
| 755 | { |
| 756 | if (sanity) |
| 757 | { |
| 758 | cq = ztLog2down(ztBlocks[p].size); |
| 759 | if (cq > ztFreeIndexMax) cq = ztFreeIndexMax; |
| 760 | if (!ztBlocks[p].free |
| 761 | || ((p != q) && (q != cq)) |
| 762 | || (ztBlocks[ztBlocks[p].next].prev != p) |
| 763 | || (ztBlocks[ztBlocks[p].prev].next != p)) |
| 764 | { |
| 765 | kprintf("zterror at %d" , p); |
| 766 | ztDump(FALSE); |
| 767 | kprintf("zterror at %d" , p); |
| 768 | assert(FALSE); |
| 769 | } |
| 770 | continue; |
| 771 | } |
| 772 | kprintf("zt[%03d]%c %d, %d, %d\n" , |
| 773 | p, ztBlocks[p].free ? 'F' : 'A', |
| 774 | ztBlocks[p].next, ztBlocks[p].prev, |
| 775 | ztBlocks[p].size); |
| 776 | p = ztBlocks[p].next; |
| 777 | if (p == q) break; |
| 778 | } |
| 779 | while (p != q); |
| 780 | if (!sanity) printf("\n" ); |
| 781 | } |
| 782 | if (!sanity) printf("-----------------------\n" ); |
| 783 | } |
| 784 | |
| 785 | |
| 786 | |
| 787 | #define ZTBDEQ(idx) \ |
| 788 | ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \ |
| 789 | ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev; |
| 790 | |
| 791 | static void |
| 792 | ztFree(zone_t zone __unused, uint32_t index, uint32_t count) |
| 793 | { |
| 794 | uint32_t q, w, p, size, merge; |
| 795 | |
| 796 | assert(count); |
| 797 | ztBlocksFree += count; |
| 798 | |
| 799 | // merge with preceding |
| 800 | merge = (index + count); |
| 801 | if ((merge < ztBlocksCount) |
| 802 | && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge])) |
| 803 | && ztBlocks[merge].free) |
| 804 | { |
| 805 | ZTBDEQ(merge); |
| 806 | count += ztBlocks[merge].size; |
| 807 | } |
| 808 | |
| 809 | // merge with following |
| 810 | merge = (index - 1); |
| 811 | if ((merge > ztFreeIndexMax) |
| 812 | && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge])) |
| 813 | && ztBlocks[merge].free) |
| 814 | { |
| 815 | size = ztBlocks[merge].size; |
| 816 | count += size; |
| 817 | index -= size; |
| 818 | ZTBDEQ(index); |
| 819 | } |
| 820 | |
| 821 | q = ztLog2down(count); |
| 822 | if (q > ztFreeIndexMax) q = ztFreeIndexMax; |
| 823 | w = q; |
| 824 | // queue in order of size |
| 825 | while (TRUE) |
| 826 | { |
| 827 | p = ztBlocks[w].next; |
| 828 | if (p == q) break; |
| 829 | if (ztBlocks[p].size >= count) break; |
| 830 | w = p; |
| 831 | } |
| 832 | ztBlocks[p].prev = index; |
| 833 | ztBlocks[w].next = index; |
| 834 | |
| 835 | // fault in first |
| 836 | ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0); |
| 837 | |
| 838 | // mark first & last with free flag and size |
| 839 | ztBlocks[index].free = TRUE; |
| 840 | ztBlocks[index].size = count; |
| 841 | ztBlocks[index].prev = w; |
| 842 | ztBlocks[index].next = p; |
| 843 | if (count > 1) |
| 844 | { |
| 845 | index += (count - 1); |
| 846 | // fault in last |
| 847 | ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0); |
| 848 | ztBlocks[index].free = TRUE; |
| 849 | ztBlocks[index].size = count; |
| 850 | } |
| 851 | } |
| 852 | |
| 853 | static uint32_t |
| 854 | ztAlloc(zone_t zone, uint32_t count) |
| 855 | { |
| 856 | uint32_t q, w, p, leftover; |
| 857 | |
| 858 | assert(count); |
| 859 | |
| 860 | q = ztLog2up(count); |
| 861 | if (q > ztFreeIndexMax) q = ztFreeIndexMax; |
| 862 | do |
| 863 | { |
| 864 | w = q; |
| 865 | while (TRUE) |
| 866 | { |
| 867 | p = ztBlocks[w].next; |
| 868 | if (p == q) break; |
| 869 | if (ztBlocks[p].size >= count) |
| 870 | { |
| 871 | // dequeue, mark both ends allocated |
| 872 | ztBlocks[w].next = ztBlocks[p].next; |
| 873 | ztBlocks[ztBlocks[p].next].prev = w; |
| 874 | ztBlocks[p].free = FALSE; |
| 875 | ztBlocksFree -= ztBlocks[p].size; |
| 876 | if (ztBlocks[p].size > 1) ztBlocks[p + ztBlocks[p].size - 1].free = FALSE; |
| 877 | |
| 878 | // fault all the allocation |
| 879 | ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0); |
| 880 | // mark last as allocated |
| 881 | if (count > 1) ztBlocks[p + count - 1].free = FALSE; |
| 882 | // free remainder |
| 883 | leftover = ztBlocks[p].size - count; |
| 884 | if (leftover) ztFree(zone, p + ztBlocks[p].size - leftover, leftover); |
| 885 | |
| 886 | return (p); |
| 887 | } |
| 888 | w = p; |
| 889 | } |
| 890 | q++; |
| 891 | } |
| 892 | while (q <= ztFreeIndexMax); |
| 893 | |
| 894 | return (-1U); |
| 895 | } |
| 896 | |
| 897 | static void |
| 898 | ztInit(vm_size_t max_zonemap_size, lck_grp_t * group) |
| 899 | { |
| 900 | kern_return_t ret; |
| 901 | vm_map_kernel_flags_t vmk_flags; |
| 902 | uint32_t idx; |
| 903 | |
| 904 | lck_mtx_init(&ztLock, group, LCK_ATTR_NULL); |
| 905 | |
| 906 | // allocate submaps VM_KERN_MEMORY_DIAG |
| 907 | |
| 908 | zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t); |
| 909 | vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; |
| 910 | vmk_flags.vmkf_permanent = TRUE; |
| 911 | ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size, |
| 912 | FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG, |
| 913 | &zone_tagbase_map); |
| 914 | |
| 915 | if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed" ); |
| 916 | zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size); |
| 917 | |
| 918 | zone_tags_map_size = 2048*1024 * sizeof(vm_tag_t); |
| 919 | vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; |
| 920 | vmk_flags.vmkf_permanent = TRUE; |
| 921 | ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size, |
| 922 | FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG, |
| 923 | &zone_tags_map); |
| 924 | |
| 925 | if (ret != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed" ); |
| 926 | zone_tags_max = zone_tags_min + round_page(zone_tags_map_size); |
| 927 | |
| 928 | ztBlocks = (ztBlock *) zone_tags_min; |
| 929 | ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock)); |
| 930 | |
| 931 | // initialize the qheads |
| 932 | lck_mtx_lock(&ztLock); |
| 933 | |
| 934 | ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0); |
| 935 | for (idx = 0; idx < ztFreeIndexCount; idx++) |
| 936 | { |
| 937 | ztBlocks[idx].free = TRUE; |
| 938 | ztBlocks[idx].next = idx; |
| 939 | ztBlocks[idx].prev = idx; |
| 940 | ztBlocks[idx].size = 0; |
| 941 | } |
| 942 | // free remaining space |
| 943 | ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount); |
| 944 | |
| 945 | lck_mtx_unlock(&ztLock); |
| 946 | } |
| 947 | |
| 948 | static void |
| 949 | ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size) |
| 950 | { |
| 951 | uint32_t * tagbase; |
| 952 | uint32_t count, block, blocks, idx; |
| 953 | size_t pages; |
| 954 | |
| 955 | pages = atop(size); |
| 956 | tagbase = ZTAGBASE(zone, mem); |
| 957 | |
| 958 | lck_mtx_lock(&ztLock); |
| 959 | |
| 960 | // fault tagbase |
| 961 | ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0); |
| 962 | |
| 963 | if (!zone->tags_inline) |
| 964 | { |
| 965 | // allocate tags |
| 966 | count = (uint32_t)(size / zone->elem_size); |
| 967 | blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock); |
| 968 | block = ztAlloc(zone, blocks); |
| 969 | if (-1U == block) ztDump(false); |
| 970 | assert(-1U != block); |
| 971 | } |
| 972 | |
| 973 | lck_mtx_unlock(&ztLock); |
| 974 | |
| 975 | if (!zone->tags_inline) |
| 976 | { |
| 977 | // set tag base for each page |
| 978 | block *= ztTagsPerBlock; |
| 979 | for (idx = 0; idx < pages; idx++) |
| 980 | { |
| 981 | tagbase[idx] = block + (uint32_t)((ptoa(idx) + (zone->elem_size - 1)) / zone->elem_size); |
| 982 | } |
| 983 | } |
| 984 | } |
| 985 | |
| 986 | static void |
| 987 | ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size) |
| 988 | { |
| 989 | uint32_t * tagbase; |
| 990 | uint32_t count, block, blocks, idx; |
| 991 | size_t pages; |
| 992 | |
| 993 | // set tag base for each page |
| 994 | pages = atop(size); |
| 995 | tagbase = ZTAGBASE(zone, mem); |
| 996 | block = tagbase[0]; |
| 997 | for (idx = 0; idx < pages; idx++) |
| 998 | { |
| 999 | tagbase[idx] = 0xFFFFFFFF; |
| 1000 | } |
| 1001 | |
| 1002 | lck_mtx_lock(&ztLock); |
| 1003 | if (!zone->tags_inline) |
| 1004 | { |
| 1005 | count = (uint32_t)(size / zone->elem_size); |
| 1006 | blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock); |
| 1007 | assert(block != 0xFFFFFFFF); |
| 1008 | block /= ztTagsPerBlock; |
| 1009 | ztFree(NULL /* zone is unlocked */, block, blocks); |
| 1010 | } |
| 1011 | |
| 1012 | lck_mtx_unlock(&ztLock); |
| 1013 | } |
| 1014 | |
| 1015 | uint32_t |
| 1016 | zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size) |
| 1017 | { |
| 1018 | zone_t z; |
| 1019 | uint32_t idx; |
| 1020 | |
| 1021 | simple_lock(&all_zones_lock); |
| 1022 | |
| 1023 | for (idx = 0; idx < num_zones; idx++) |
| 1024 | { |
| 1025 | z = &(zone_array[idx]); |
| 1026 | if (!z->tags) continue; |
| 1027 | if (tag_zone_index != z->tag_zone_index) continue; |
| 1028 | *elem_size = z->elem_size; |
| 1029 | break; |
| 1030 | } |
| 1031 | |
| 1032 | simple_unlock(&all_zones_lock); |
| 1033 | |
| 1034 | if (idx == num_zones) idx = -1U; |
| 1035 | |
| 1036 | return (idx); |
| 1037 | } |
| 1038 | |
| 1039 | #endif /* VM_MAX_TAG_ZONES */ |
| 1040 | |
| 1041 | /* Routine to get the size of a zone allocated address. If the address doesnt belong to the |
| 1042 | * zone_map, returns 0. |
| 1043 | */ |
| 1044 | vm_size_t |
| 1045 | zone_element_size(void *addr, zone_t *z) |
| 1046 | { |
| 1047 | struct zone *src_zone; |
| 1048 | if (from_zone_map(addr, sizeof(void *))) { |
| 1049 | struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE); |
| 1050 | src_zone = PAGE_METADATA_GET_ZONE(page_meta); |
| 1051 | if (z) { |
| 1052 | *z = src_zone; |
| 1053 | } |
| 1054 | return (src_zone->elem_size); |
| 1055 | } else { |
| 1056 | #if CONFIG_GZALLOC |
| 1057 | vm_size_t gzsize; |
| 1058 | if (gzalloc_element_size(addr, z, &gzsize)) { |
| 1059 | return gzsize; |
| 1060 | } |
| 1061 | #endif /* CONFIG_GZALLOC */ |
| 1062 | |
| 1063 | return 0; |
| 1064 | } |
| 1065 | } |
| 1066 | |
| 1067 | #if DEBUG || DEVELOPMENT |
| 1068 | |
| 1069 | vm_size_t |
| 1070 | zone_element_info(void *addr, vm_tag_t * ptag) |
| 1071 | { |
| 1072 | vm_size_t size = 0; |
| 1073 | vm_tag_t tag = VM_KERN_MEMORY_NONE; |
| 1074 | struct zone * src_zone; |
| 1075 | |
| 1076 | if (from_zone_map(addr, sizeof(void *))) { |
| 1077 | struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE); |
| 1078 | src_zone = PAGE_METADATA_GET_ZONE(page_meta); |
| 1079 | #if VM_MAX_TAG_ZONES |
| 1080 | if (__improbable(src_zone->tags)) { |
| 1081 | tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1); |
| 1082 | } |
| 1083 | #endif /* VM_MAX_TAG_ZONES */ |
| 1084 | size = src_zone->elem_size; |
| 1085 | } else { |
| 1086 | #if CONFIG_GZALLOC |
| 1087 | gzalloc_element_size(addr, NULL, &size); |
| 1088 | #endif /* CONFIG_GZALLOC */ |
| 1089 | } |
| 1090 | *ptag = tag; |
| 1091 | return size; |
| 1092 | } |
| 1093 | |
| 1094 | #endif /* DEBUG || DEVELOPMENT */ |
| 1095 | |
| 1096 | /* |
| 1097 | * Zone checking helper function. |
| 1098 | * A pointer that satisfies these conditions is OK to be a freelist next pointer |
| 1099 | * A pointer that doesn't satisfy these conditions indicates corruption |
| 1100 | */ |
| 1101 | static inline boolean_t |
| 1102 | is_sane_zone_ptr(zone_t zone, |
| 1103 | vm_offset_t addr, |
| 1104 | size_t obj_size) |
| 1105 | { |
| 1106 | /* Must be aligned to pointer boundary */ |
| 1107 | if (__improbable((addr & (sizeof(vm_offset_t) - 1)) != 0)) |
| 1108 | return FALSE; |
| 1109 | |
| 1110 | /* Must be a kernel address */ |
| 1111 | if (__improbable(!pmap_kernel_va(addr))) |
| 1112 | return FALSE; |
| 1113 | |
| 1114 | /* Must be from zone map if the zone only uses memory from the zone_map */ |
| 1115 | /* |
| 1116 | * TODO: Remove the zone->collectable check when every |
| 1117 | * zone using foreign memory is properly tagged with allows_foreign |
| 1118 | */ |
| 1119 | if (zone->collectable && !zone->allows_foreign) { |
| 1120 | /* check if addr is from zone map */ |
| 1121 | if (addr >= zone_map_min_address && |
| 1122 | (addr + obj_size - 1) < zone_map_max_address ) |
| 1123 | return TRUE; |
| 1124 | |
| 1125 | return FALSE; |
| 1126 | } |
| 1127 | |
| 1128 | return TRUE; |
| 1129 | } |
| 1130 | |
| 1131 | static inline boolean_t |
| 1132 | is_sane_zone_page_metadata(zone_t zone, |
| 1133 | vm_offset_t page_meta) |
| 1134 | { |
| 1135 | /* NULL page metadata structures are invalid */ |
| 1136 | if (page_meta == 0) |
| 1137 | return FALSE; |
| 1138 | return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata)); |
| 1139 | } |
| 1140 | |
| 1141 | static inline boolean_t |
| 1142 | is_sane_zone_element(zone_t zone, |
| 1143 | vm_offset_t addr) |
| 1144 | { |
| 1145 | /* NULL is OK because it indicates the tail of the list */ |
| 1146 | if (addr == 0) |
| 1147 | return TRUE; |
| 1148 | return is_sane_zone_ptr(zone, addr, zone->elem_size); |
| 1149 | } |
| 1150 | |
| 1151 | /* Someone wrote to freed memory. */ |
| 1152 | static inline void /* noreturn */ |
| 1153 | zone_element_was_modified_panic(zone_t zone, |
| 1154 | vm_offset_t element, |
| 1155 | vm_offset_t found, |
| 1156 | vm_offset_t expected, |
| 1157 | vm_offset_t offset) |
| 1158 | { |
| 1159 | panic("a freed zone element has been modified in zone %s: expected %p but found %p, bits changed %p, at offset %d of %d in element %p, cookies %p %p" , |
| 1160 | zone->zone_name, |
| 1161 | (void *) expected, |
| 1162 | (void *) found, |
| 1163 | (void *) (expected ^ found), |
| 1164 | (uint32_t) offset, |
| 1165 | (uint32_t) zone->elem_size, |
| 1166 | (void *) element, |
| 1167 | (void *) zp_nopoison_cookie, |
| 1168 | (void *) zp_poisoned_cookie); |
| 1169 | } |
| 1170 | |
| 1171 | /* |
| 1172 | * The primary and backup pointers don't match. |
| 1173 | * Determine which one was likely the corrupted pointer, find out what it |
| 1174 | * probably should have been, and panic. |
| 1175 | * I would like to mark this as noreturn, but panic() isn't marked noreturn. |
| 1176 | */ |
| 1177 | static void /* noreturn */ |
| 1178 | backup_ptr_mismatch_panic(zone_t zone, |
| 1179 | vm_offset_t element, |
| 1180 | vm_offset_t primary, |
| 1181 | vm_offset_t backup) |
| 1182 | { |
| 1183 | vm_offset_t likely_backup; |
| 1184 | vm_offset_t likely_primary; |
| 1185 | |
| 1186 | likely_primary = primary ^ zp_nopoison_cookie; |
| 1187 | boolean_t sane_backup; |
| 1188 | boolean_t sane_primary = is_sane_zone_element(zone, likely_primary); |
| 1189 | boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE; |
| 1190 | |
| 1191 | #if defined(__LP64__) |
| 1192 | /* We can inspect the tag in the upper bits for additional confirmation */ |
| 1193 | if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000) |
| 1194 | element_was_poisoned = TRUE; |
| 1195 | else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000) |
| 1196 | element_was_poisoned = FALSE; |
| 1197 | #endif |
| 1198 | |
| 1199 | if (element_was_poisoned) { |
| 1200 | likely_backup = backup ^ zp_poisoned_cookie; |
| 1201 | sane_backup = is_sane_zone_element(zone, likely_backup); |
| 1202 | } else { |
| 1203 | likely_backup = backup ^ zp_nopoison_cookie; |
| 1204 | sane_backup = is_sane_zone_element(zone, likely_backup); |
| 1205 | } |
| 1206 | |
| 1207 | /* The primary is definitely the corrupted one */ |
| 1208 | if (!sane_primary && sane_backup) |
| 1209 | zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0); |
| 1210 | |
| 1211 | /* The backup is definitely the corrupted one */ |
| 1212 | if (sane_primary && !sane_backup) |
| 1213 | zone_element_was_modified_panic(zone, element, backup, |
| 1214 | (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)), |
| 1215 | zone->elem_size - sizeof(vm_offset_t)); |
| 1216 | |
| 1217 | /* |
| 1218 | * Not sure which is the corrupted one. |
| 1219 | * It's less likely that the backup pointer was overwritten with |
| 1220 | * ( (sane address) ^ (valid cookie) ), so we'll guess that the |
| 1221 | * primary pointer has been overwritten with a sane but incorrect address. |
| 1222 | */ |
| 1223 | if (sane_primary && sane_backup) |
| 1224 | zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0); |
| 1225 | |
| 1226 | /* Neither are sane, so just guess. */ |
| 1227 | zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0); |
| 1228 | } |
| 1229 | |
| 1230 | /* |
| 1231 | * Adds the element to the head of the zone's free list |
| 1232 | * Keeps a backup next-pointer at the end of the element |
| 1233 | */ |
| 1234 | static inline void |
| 1235 | free_to_zone(zone_t zone, |
| 1236 | vm_offset_t element, |
| 1237 | boolean_t poison) |
| 1238 | { |
| 1239 | vm_offset_t old_head; |
| 1240 | struct zone_page_metadata *page_meta; |
| 1241 | |
| 1242 | vm_offset_t *primary = (vm_offset_t *) element; |
| 1243 | vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary); |
| 1244 | |
| 1245 | page_meta = get_zone_page_metadata((struct zone_free_element *)element, FALSE); |
| 1246 | assert(PAGE_METADATA_GET_ZONE(page_meta) == zone); |
| 1247 | old_head = (vm_offset_t)page_metadata_get_freelist(page_meta); |
| 1248 | |
| 1249 | if (__improbable(!is_sane_zone_element(zone, old_head))) |
| 1250 | panic("zfree: invalid head pointer %p for freelist of zone %s\n" , |
| 1251 | (void *) old_head, zone->zone_name); |
| 1252 | |
| 1253 | if (__improbable(!is_sane_zone_element(zone, element))) |
| 1254 | panic("zfree: freeing invalid pointer %p to zone %s\n" , |
| 1255 | (void *) element, zone->zone_name); |
| 1256 | |
| 1257 | if (__improbable(old_head == element)) |
| 1258 | panic("zfree: double free of %p to zone %s\n" , |
| 1259 | (void *) element, zone->zone_name); |
| 1260 | /* |
| 1261 | * Always write a redundant next pointer |
| 1262 | * So that it is more difficult to forge, xor it with a random cookie |
| 1263 | * A poisoned element is indicated by using zp_poisoned_cookie |
| 1264 | * instead of zp_nopoison_cookie |
| 1265 | */ |
| 1266 | |
| 1267 | *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie); |
| 1268 | |
| 1269 | /* |
| 1270 | * Insert this element at the head of the free list. We also xor the |
| 1271 | * primary pointer with the zp_nopoison_cookie to make sure a free |
| 1272 | * element does not provide the location of the next free element directly. |
| 1273 | */ |
| 1274 | *primary = old_head ^ zp_nopoison_cookie; |
| 1275 | page_metadata_set_freelist(page_meta, (struct zone_free_element *)element); |
| 1276 | page_meta->free_count++; |
| 1277 | if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) { |
| 1278 | if (page_meta->free_count == 1) { |
| 1279 | /* first foreign element freed on page, move from all_used */ |
| 1280 | re_queue_tail(&zone->pages.any_free_foreign, &(page_meta->pages)); |
| 1281 | } else { |
| 1282 | /* no other list transitions */ |
| 1283 | } |
| 1284 | } else if (page_meta->free_count == get_metadata_alloc_count(page_meta)) { |
| 1285 | /* whether the page was on the intermediate or all_used, queue, move it to free */ |
| 1286 | re_queue_tail(&zone->pages.all_free, &(page_meta->pages)); |
| 1287 | zone->count_all_free_pages += page_meta->page_count; |
| 1288 | } else if (page_meta->free_count == 1) { |
| 1289 | /* first free element on page, move from all_used */ |
| 1290 | re_queue_tail(&zone->pages.intermediate, &(page_meta->pages)); |
| 1291 | } |
| 1292 | zone->count--; |
| 1293 | zone->countfree++; |
| 1294 | |
| 1295 | #if KASAN_ZALLOC |
| 1296 | kasan_poison_range(element, zone->elem_size, ASAN_HEAP_FREED); |
| 1297 | #endif |
| 1298 | } |
| 1299 | |
| 1300 | |
| 1301 | /* |
| 1302 | * Removes an element from the zone's free list, returning 0 if the free list is empty. |
| 1303 | * Verifies that the next-pointer and backup next-pointer are intact, |
| 1304 | * and verifies that a poisoned element hasn't been modified. |
| 1305 | */ |
| 1306 | static inline vm_offset_t |
| 1307 | try_alloc_from_zone(zone_t zone, |
| 1308 | vm_tag_t tag __unused, |
| 1309 | boolean_t* check_poison) |
| 1310 | { |
| 1311 | vm_offset_t element; |
| 1312 | struct zone_page_metadata *page_meta; |
| 1313 | |
| 1314 | *check_poison = FALSE; |
| 1315 | |
| 1316 | /* if zone is empty, bail */ |
| 1317 | if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign)) |
| 1318 | page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign); |
| 1319 | else if (!queue_empty(&zone->pages.intermediate)) |
| 1320 | page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate); |
| 1321 | else if (!queue_empty(&zone->pages.all_free)) { |
| 1322 | page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free); |
| 1323 | assert(zone->count_all_free_pages >= page_meta->page_count); |
| 1324 | zone->count_all_free_pages -= page_meta->page_count; |
| 1325 | } else { |
| 1326 | return 0; |
| 1327 | } |
| 1328 | /* Check if page_meta passes is_sane_zone_element */ |
| 1329 | if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta))) |
| 1330 | panic("zalloc: invalid metadata structure %p for freelist of zone %s\n" , |
| 1331 | (void *) page_meta, zone->zone_name); |
| 1332 | assert(PAGE_METADATA_GET_ZONE(page_meta) == zone); |
| 1333 | element = (vm_offset_t)page_metadata_get_freelist(page_meta); |
| 1334 | |
| 1335 | if (__improbable(!is_sane_zone_ptr(zone, element, zone->elem_size))) |
| 1336 | panic("zfree: invalid head pointer %p for freelist of zone %s\n" , |
| 1337 | (void *) element, zone->zone_name); |
| 1338 | |
| 1339 | vm_offset_t *primary = (vm_offset_t *) element; |
| 1340 | vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary); |
| 1341 | |
| 1342 | /* |
| 1343 | * Since the primary next pointer is xor'ed with zp_nopoison_cookie |
| 1344 | * for obfuscation, retrieve the original value back |
| 1345 | */ |
| 1346 | vm_offset_t next_element = *primary ^ zp_nopoison_cookie; |
| 1347 | vm_offset_t next_element_primary = *primary; |
| 1348 | vm_offset_t next_element_backup = *backup; |
| 1349 | |
| 1350 | /* |
| 1351 | * backup_ptr_mismatch_panic will determine what next_element |
| 1352 | * should have been, and print it appropriately |
| 1353 | */ |
| 1354 | if (__improbable(!is_sane_zone_element(zone, next_element))) |
| 1355 | backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup); |
| 1356 | |
| 1357 | /* Check the backup pointer for the regular cookie */ |
| 1358 | if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) { |
| 1359 | |
| 1360 | /* Check for the poisoned cookie instead */ |
| 1361 | if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) |
| 1362 | /* Neither cookie is valid, corruption has occurred */ |
| 1363 | backup_ptr_mismatch_panic(zone, element, next_element_primary, next_element_backup); |
| 1364 | |
| 1365 | /* |
| 1366 | * Element was marked as poisoned, so check its integrity before using it. |
| 1367 | */ |
| 1368 | *check_poison = TRUE; |
| 1369 | } |
| 1370 | |
| 1371 | /* Make sure the page_meta is at the correct offset from the start of page */ |
| 1372 | if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element, FALSE))) |
| 1373 | panic("zalloc: Incorrect metadata %p found in zone %s page queue. Expected metadata: %p\n" , |
| 1374 | page_meta, zone->zone_name, get_zone_page_metadata((struct zone_free_element *)element, FALSE)); |
| 1375 | |
| 1376 | /* Make sure next_element belongs to the same page as page_meta */ |
| 1377 | if (next_element) { |
| 1378 | if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element, FALSE))) |
| 1379 | panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n" , |
| 1380 | (void *)next_element, (void *)element, zone->zone_name); |
| 1381 | } |
| 1382 | |
| 1383 | /* Remove this element from the free list */ |
| 1384 | page_metadata_set_freelist(page_meta, (struct zone_free_element *)next_element); |
| 1385 | page_meta->free_count--; |
| 1386 | |
| 1387 | if (page_meta->free_count == 0) { |
| 1388 | /* move to all used */ |
| 1389 | re_queue_tail(&zone->pages.all_used, &(page_meta->pages)); |
| 1390 | } else { |
| 1391 | if (!zone->allows_foreign || from_zone_map(element, zone->elem_size)) { |
| 1392 | if (get_metadata_alloc_count(page_meta) == page_meta->free_count + 1) { |
| 1393 | /* remove from free, move to intermediate */ |
| 1394 | re_queue_tail(&zone->pages.intermediate, &(page_meta->pages)); |
| 1395 | } |
| 1396 | } |
| 1397 | } |
| 1398 | zone->countfree--; |
| 1399 | zone->count++; |
| 1400 | zone->sum_count++; |
| 1401 | |
| 1402 | #if VM_MAX_TAG_ZONES |
| 1403 | if (__improbable(zone->tags)) { |
| 1404 | // set the tag with b0 clear so the block remains inuse |
| 1405 | ZTAG(zone, element)[0] = (tag << 1); |
| 1406 | } |
| 1407 | #endif /* VM_MAX_TAG_ZONES */ |
| 1408 | |
| 1409 | |
| 1410 | #if KASAN_ZALLOC |
| 1411 | kasan_poison_range(element, zone->elem_size, ASAN_VALID); |
| 1412 | #endif |
| 1413 | |
| 1414 | return element; |
| 1415 | } |
| 1416 | |
| 1417 | /* |
| 1418 | * End of zone poisoning |
| 1419 | */ |
| 1420 | |
| 1421 | /* |
| 1422 | * Zone info options |
| 1423 | */ |
| 1424 | #define ZINFO_SLOTS MAX_ZONES /* for now */ |
| 1425 | |
| 1426 | zone_t zone_find_largest(void); |
| 1427 | |
| 1428 | /* |
| 1429 | * Async allocation of zones |
| 1430 | * This mechanism allows for bootstrapping an empty zone which is setup with |
| 1431 | * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call |
| 1432 | * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free. |
| 1433 | * This will prime the zone for the next use. |
| 1434 | * |
| 1435 | * Currently the thread_callout function (zalloc_async) will loop through all zones |
| 1436 | * looking for any zone with async_pending set and do the work for it. |
| 1437 | * |
| 1438 | * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call, |
| 1439 | * then zalloc_noblock to an empty zone may succeed. |
| 1440 | */ |
| 1441 | void zalloc_async( |
| 1442 | thread_call_param_t p0, |
| 1443 | thread_call_param_t p1); |
| 1444 | |
| 1445 | static thread_call_data_t call_async_alloc; |
| 1446 | |
| 1447 | /* |
| 1448 | * Align elements that use the zone page list to 32 byte boundaries. |
| 1449 | */ |
| 1450 | #define ZONE_ELEMENT_ALIGNMENT 32 |
| 1451 | |
| 1452 | #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) |
| 1453 | #define zone_sleep(zone) \ |
| 1454 | (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN_ALWAYS, (event_t)(zone), THREAD_UNINT); |
| 1455 | |
| 1456 | /* |
| 1457 | * The zone_locks_grp allows for collecting lock statistics. |
| 1458 | * All locks are associated to this group in zinit. |
| 1459 | * Look at tools/lockstat for debugging lock contention. |
| 1460 | */ |
| 1461 | |
| 1462 | lck_grp_t zone_locks_grp; |
| 1463 | lck_grp_attr_t zone_locks_grp_attr; |
| 1464 | |
| 1465 | #define lock_zone_init(zone) \ |
| 1466 | MACRO_BEGIN \ |
| 1467 | lck_attr_setdefault(&(zone)->lock_attr); \ |
| 1468 | lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \ |
| 1469 | &zone_locks_grp, &(zone)->lock_attr); \ |
| 1470 | MACRO_END |
| 1471 | |
| 1472 | #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) |
| 1473 | |
| 1474 | /* |
| 1475 | * Exclude more than one concurrent garbage collection |
| 1476 | */ |
| 1477 | decl_lck_mtx_data(, zone_gc_lock) |
| 1478 | |
| 1479 | lck_attr_t zone_gc_lck_attr; |
| 1480 | lck_grp_t zone_gc_lck_grp; |
| 1481 | lck_grp_attr_t zone_gc_lck_grp_attr; |
| 1482 | lck_mtx_ext_t zone_gc_lck_ext; |
| 1483 | |
| 1484 | boolean_t zone_gc_allowed = TRUE; |
| 1485 | boolean_t panic_include_zprint = FALSE; |
| 1486 | |
| 1487 | mach_memory_info_t *panic_kext_memory_info = NULL; |
| 1488 | vm_size_t panic_kext_memory_size = 0; |
| 1489 | |
| 1490 | #define ZALLOC_DEBUG_ZONEGC 0x00000001 |
| 1491 | #define ZALLOC_DEBUG_ZCRAM 0x00000002 |
| 1492 | uint32_t zalloc_debug = 0; |
| 1493 | |
| 1494 | /* |
| 1495 | * Zone leak debugging code |
| 1496 | * |
| 1497 | * When enabled, this code keeps a log to track allocations to a particular zone that have not |
| 1498 | * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated |
| 1499 | * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is |
| 1500 | * off by default. |
| 1501 | * |
| 1502 | * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone> |
| 1503 | * is the name of the zone you wish to log. |
| 1504 | * |
| 1505 | * This code only tracks one zone, so you need to identify which one is leaking first. |
| 1506 | * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone |
| 1507 | * garbage collector. Note that the zone name printed in the panic message is not necessarily the one |
| 1508 | * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This |
| 1509 | * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The |
| 1510 | * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs. |
| 1511 | * See the help in the kgmacros for usage info. |
| 1512 | * |
| 1513 | * |
| 1514 | * Zone corruption logging |
| 1515 | * |
| 1516 | * Logging can also be used to help identify the source of a zone corruption. First, identify the zone |
| 1517 | * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction |
| 1518 | * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the |
| 1519 | * corruption is detected, examining the log will show you the stack traces of the callers who last allocated |
| 1520 | * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been |
| 1521 | * corrupted to examine its history. This should lead to the source of the corruption. |
| 1522 | */ |
| 1523 | |
| 1524 | static boolean_t log_records_init = FALSE; |
| 1525 | static int log_records; /* size of the log, expressed in number of records */ |
| 1526 | |
| 1527 | #define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */ |
| 1528 | |
| 1529 | static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING; |
| 1530 | static int num_zones_logged = 0; |
| 1531 | |
| 1532 | static char zone_name_to_log[MAX_ZONE_NAME] = "" ; /* the zone name we're logging, if any */ |
| 1533 | |
| 1534 | /* Log allocations and frees to help debug a zone element corruption */ |
| 1535 | boolean_t corruption_debug_flag = DEBUG; /* enabled by "-zc" boot-arg */ |
| 1536 | /* Making pointer scanning leaks detection possible for all zones */ |
| 1537 | |
| 1538 | #if DEBUG || DEVELOPMENT |
| 1539 | boolean_t leak_scan_debug_flag = FALSE; /* enabled by "-zl" boot-arg */ |
| 1540 | #endif /* DEBUG || DEVELOPMENT */ |
| 1541 | |
| 1542 | |
| 1543 | /* |
| 1544 | * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to |
| 1545 | * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this |
| 1546 | * is the number of stacks suspected of leaking, we don't need many records. |
| 1547 | */ |
| 1548 | |
| 1549 | #if defined(__LP64__) |
| 1550 | #define ZRECORDS_MAX 2560 /* Max records allowed in the log */ |
| 1551 | #else |
| 1552 | #define ZRECORDS_MAX 1536 /* Max records allowed in the log */ |
| 1553 | #endif |
| 1554 | #define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */ |
| 1555 | |
| 1556 | /* |
| 1557 | * Each record in the log contains a pointer to the zone element it refers to, |
| 1558 | * and a small array to hold the pc's from the stack trace. A |
| 1559 | * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging, |
| 1560 | * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees. |
| 1561 | * If the log fills, old records are replaced as if it were a circular buffer. |
| 1562 | */ |
| 1563 | |
| 1564 | |
| 1565 | /* |
| 1566 | * Decide if we want to log this zone by doing a string compare between a zone name and the name |
| 1567 | * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not |
| 1568 | * possible to include spaces in strings passed in via the boot-args, a period in the logname will |
| 1569 | * match a space in the zone name. |
| 1570 | */ |
| 1571 | |
| 1572 | int |
| 1573 | track_this_zone(const char *zonename, const char *logname) |
| 1574 | { |
| 1575 | unsigned int len; |
| 1576 | const char *zc = zonename; |
| 1577 | const char *lc = logname; |
| 1578 | |
| 1579 | /* |
| 1580 | * Compare the strings. We bound the compare by MAX_ZONE_NAME. |
| 1581 | */ |
| 1582 | |
| 1583 | for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) { |
| 1584 | |
| 1585 | /* |
| 1586 | * If the current characters don't match, check for a space in |
| 1587 | * in the zone name and a corresponding period in the log name. |
| 1588 | * If that's not there, then the strings don't match. |
| 1589 | */ |
| 1590 | |
| 1591 | if (*zc != *lc && !(*zc == ' ' && *lc == '.')) |
| 1592 | break; |
| 1593 | |
| 1594 | /* |
| 1595 | * The strings are equal so far. If we're at the end, then it's a match. |
| 1596 | */ |
| 1597 | |
| 1598 | if (*zc == '\0') |
| 1599 | return TRUE; |
| 1600 | } |
| 1601 | |
| 1602 | return FALSE; |
| 1603 | } |
| 1604 | |
| 1605 | |
| 1606 | /* |
| 1607 | * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and |
| 1608 | * the buffer for the records has been allocated. |
| 1609 | */ |
| 1610 | |
| 1611 | #define DO_LOGGING(z) (z->zone_logging == TRUE && z->zlog_btlog) |
| 1612 | |
| 1613 | extern boolean_t kmem_alloc_ready; |
| 1614 | |
| 1615 | #if CONFIG_ZLEAKS |
| 1616 | #pragma mark - |
| 1617 | #pragma mark Zone Leak Detection |
| 1618 | |
| 1619 | /* |
| 1620 | * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding |
| 1621 | * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a |
| 1622 | * backtrace. Every free, we examine the table and determine if the allocation was being tracked, |
| 1623 | * and stop tracking it if it was being tracked. |
| 1624 | * |
| 1625 | * We track the allocations in the zallocations hash table, which stores the address that was returned from |
| 1626 | * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which |
| 1627 | * stores the backtrace associated with that allocation. This provides uniquing for the relatively large |
| 1628 | * backtraces - we don't store them more than once. |
| 1629 | * |
| 1630 | * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up |
| 1631 | * a large amount of virtual space. |
| 1632 | */ |
| 1633 | #define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */ |
| 1634 | #define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */ |
| 1635 | #define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */ |
| 1636 | #define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */ |
| 1637 | uint32_t zleak_state = 0; /* State of collection, as above */ |
| 1638 | |
| 1639 | boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */ |
| 1640 | vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */ |
| 1641 | vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */ |
| 1642 | unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */ |
| 1643 | |
| 1644 | /* |
| 1645 | * Counters for allocation statistics. |
| 1646 | */ |
| 1647 | |
| 1648 | /* Times two active records want to occupy the same spot */ |
| 1649 | unsigned int z_alloc_collisions = 0; |
| 1650 | unsigned int z_trace_collisions = 0; |
| 1651 | |
| 1652 | /* Times a new record lands on a spot previously occupied by a freed allocation */ |
| 1653 | unsigned int z_alloc_overwrites = 0; |
| 1654 | unsigned int z_trace_overwrites = 0; |
| 1655 | |
| 1656 | /* Times a new alloc or trace is put into the hash table */ |
| 1657 | unsigned int z_alloc_recorded = 0; |
| 1658 | unsigned int z_trace_recorded = 0; |
| 1659 | |
| 1660 | /* Times zleak_log returned false due to not being able to acquire the lock */ |
| 1661 | unsigned int z_total_conflicts = 0; |
| 1662 | |
| 1663 | |
| 1664 | #pragma mark struct zallocation |
| 1665 | /* |
| 1666 | * Structure for keeping track of an allocation |
| 1667 | * An allocation bucket is in use if its element is not NULL |
| 1668 | */ |
| 1669 | struct zallocation { |
| 1670 | uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */ |
| 1671 | vm_size_t za_size; /* how much memory did this allocation take up? */ |
| 1672 | uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */ |
| 1673 | /* TODO: #if this out */ |
| 1674 | uint32_t za_hit_count; /* for determining effectiveness of hash function */ |
| 1675 | }; |
| 1676 | |
| 1677 | /* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */ |
| 1678 | uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM; |
| 1679 | uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM; |
| 1680 | |
| 1681 | vm_size_t zleak_max_zonemap_size; |
| 1682 | |
| 1683 | /* Hashmaps of allocations and their corresponding traces */ |
| 1684 | static struct zallocation* zallocations; |
| 1685 | static struct ztrace* ztraces; |
| 1686 | |
| 1687 | /* not static so that panic can see this, see kern/debug.c */ |
| 1688 | struct ztrace* top_ztrace; |
| 1689 | |
| 1690 | /* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */ |
| 1691 | static lck_spin_t zleak_lock; |
| 1692 | static lck_attr_t zleak_lock_attr; |
| 1693 | static lck_grp_t zleak_lock_grp; |
| 1694 | static lck_grp_attr_t zleak_lock_grp_attr; |
| 1695 | |
| 1696 | /* |
| 1697 | * Initializes the zone leak monitor. Called from zone_init() |
| 1698 | */ |
| 1699 | static void |
| 1700 | zleak_init(vm_size_t max_zonemap_size) |
| 1701 | { |
| 1702 | char scratch_buf[16]; |
| 1703 | boolean_t zleak_enable_flag = FALSE; |
| 1704 | |
| 1705 | zleak_max_zonemap_size = max_zonemap_size; |
| 1706 | zleak_global_tracking_threshold = max_zonemap_size / 2; |
| 1707 | zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8; |
| 1708 | |
| 1709 | #if CONFIG_EMBEDDED |
| 1710 | if (PE_parse_boot_argn("-zleakon" , scratch_buf, sizeof(scratch_buf))) { |
| 1711 | zleak_enable_flag = TRUE; |
| 1712 | printf("zone leak detection enabled\n" ); |
| 1713 | } else { |
| 1714 | zleak_enable_flag = FALSE; |
| 1715 | printf("zone leak detection disabled\n" ); |
| 1716 | } |
| 1717 | #else /* CONFIG_EMBEDDED */ |
| 1718 | /* -zleakoff (flag to disable zone leak monitor) */ |
| 1719 | if (PE_parse_boot_argn("-zleakoff" , scratch_buf, sizeof(scratch_buf))) { |
| 1720 | zleak_enable_flag = FALSE; |
| 1721 | printf("zone leak detection disabled\n" ); |
| 1722 | } else { |
| 1723 | zleak_enable_flag = TRUE; |
| 1724 | printf("zone leak detection enabled\n" ); |
| 1725 | } |
| 1726 | #endif /* CONFIG_EMBEDDED */ |
| 1727 | |
| 1728 | /* zfactor=XXXX (override how often to sample the zone allocator) */ |
| 1729 | if (PE_parse_boot_argn("zfactor" , &zleak_sample_factor, sizeof(zleak_sample_factor))) { |
| 1730 | printf("Zone leak factor override: %u\n" , zleak_sample_factor); |
| 1731 | } |
| 1732 | |
| 1733 | /* zleak-allocs=XXXX (override number of buckets in zallocations) */ |
| 1734 | if (PE_parse_boot_argn("zleak-allocs" , &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) { |
| 1735 | printf("Zone leak alloc buckets override: %u\n" , zleak_alloc_buckets); |
| 1736 | /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ |
| 1737 | if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) { |
| 1738 | printf("Override isn't a power of two, bad things might happen!\n" ); |
| 1739 | } |
| 1740 | } |
| 1741 | |
| 1742 | /* zleak-traces=XXXX (override number of buckets in ztraces) */ |
| 1743 | if (PE_parse_boot_argn("zleak-traces" , &zleak_trace_buckets, sizeof(zleak_trace_buckets))) { |
| 1744 | printf("Zone leak trace buckets override: %u\n" , zleak_trace_buckets); |
| 1745 | /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ |
| 1746 | if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) { |
| 1747 | printf("Override isn't a power of two, bad things might happen!\n" ); |
| 1748 | } |
| 1749 | } |
| 1750 | |
| 1751 | /* allocate the zleak_lock */ |
| 1752 | lck_grp_attr_setdefault(&zleak_lock_grp_attr); |
| 1753 | lck_grp_init(&zleak_lock_grp, "zleak_lock" , &zleak_lock_grp_attr); |
| 1754 | lck_attr_setdefault(&zleak_lock_attr); |
| 1755 | lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr); |
| 1756 | |
| 1757 | if (zleak_enable_flag) { |
| 1758 | zleak_state = ZLEAK_STATE_ENABLED; |
| 1759 | } |
| 1760 | } |
| 1761 | |
| 1762 | #if CONFIG_ZLEAKS |
| 1763 | |
| 1764 | /* |
| 1765 | * Support for kern.zleak.active sysctl - a simplified |
| 1766 | * version of the zleak_state variable. |
| 1767 | */ |
| 1768 | int |
| 1769 | get_zleak_state(void) |
| 1770 | { |
| 1771 | if (zleak_state & ZLEAK_STATE_FAILED) |
| 1772 | return (-1); |
| 1773 | if (zleak_state & ZLEAK_STATE_ACTIVE) |
| 1774 | return (1); |
| 1775 | return (0); |
| 1776 | } |
| 1777 | |
| 1778 | #endif |
| 1779 | |
| 1780 | |
| 1781 | kern_return_t |
| 1782 | zleak_activate(void) |
| 1783 | { |
| 1784 | kern_return_t retval; |
| 1785 | vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation); |
| 1786 | vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace); |
| 1787 | void *allocations_ptr = NULL; |
| 1788 | void *traces_ptr = NULL; |
| 1789 | |
| 1790 | /* Only one thread attempts to activate at a time */ |
| 1791 | if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { |
| 1792 | return KERN_SUCCESS; |
| 1793 | } |
| 1794 | |
| 1795 | /* Indicate that we're doing the setup */ |
| 1796 | lck_spin_lock(&zleak_lock); |
| 1797 | if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { |
| 1798 | lck_spin_unlock(&zleak_lock); |
| 1799 | return KERN_SUCCESS; |
| 1800 | } |
| 1801 | |
| 1802 | zleak_state |= ZLEAK_STATE_ACTIVATING; |
| 1803 | lck_spin_unlock(&zleak_lock); |
| 1804 | |
| 1805 | /* Allocate and zero tables */ |
| 1806 | retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK); |
| 1807 | if (retval != KERN_SUCCESS) { |
| 1808 | goto fail; |
| 1809 | } |
| 1810 | |
| 1811 | retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK); |
| 1812 | if (retval != KERN_SUCCESS) { |
| 1813 | goto fail; |
| 1814 | } |
| 1815 | |
| 1816 | bzero(allocations_ptr, z_alloc_size); |
| 1817 | bzero(traces_ptr, z_trace_size); |
| 1818 | |
| 1819 | /* Everything's set. Install tables, mark active. */ |
| 1820 | zallocations = allocations_ptr; |
| 1821 | ztraces = traces_ptr; |
| 1822 | |
| 1823 | /* |
| 1824 | * Initialize the top_ztrace to the first entry in ztraces, |
| 1825 | * so we don't have to check for null in zleak_log |
| 1826 | */ |
| 1827 | top_ztrace = &ztraces[0]; |
| 1828 | |
| 1829 | /* |
| 1830 | * Note that we do need a barrier between installing |
| 1831 | * the tables and setting the active flag, because the zfree() |
| 1832 | * path accesses the table without a lock if we're active. |
| 1833 | */ |
| 1834 | lck_spin_lock(&zleak_lock); |
| 1835 | zleak_state |= ZLEAK_STATE_ACTIVE; |
| 1836 | zleak_state &= ~ZLEAK_STATE_ACTIVATING; |
| 1837 | lck_spin_unlock(&zleak_lock); |
| 1838 | |
| 1839 | return 0; |
| 1840 | |
| 1841 | fail: |
| 1842 | /* |
| 1843 | * If we fail to allocate memory, don't further tax |
| 1844 | * the system by trying again. |
| 1845 | */ |
| 1846 | lck_spin_lock(&zleak_lock); |
| 1847 | zleak_state |= ZLEAK_STATE_FAILED; |
| 1848 | zleak_state &= ~ZLEAK_STATE_ACTIVATING; |
| 1849 | lck_spin_unlock(&zleak_lock); |
| 1850 | |
| 1851 | if (allocations_ptr != NULL) { |
| 1852 | kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size); |
| 1853 | } |
| 1854 | |
| 1855 | if (traces_ptr != NULL) { |
| 1856 | kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size); |
| 1857 | } |
| 1858 | |
| 1859 | return retval; |
| 1860 | } |
| 1861 | |
| 1862 | /* |
| 1863 | * TODO: What about allocations that never get deallocated, |
| 1864 | * especially ones with unique backtraces? Should we wait to record |
| 1865 | * until after boot has completed? |
| 1866 | * (How many persistent zallocs are there?) |
| 1867 | */ |
| 1868 | |
| 1869 | /* |
| 1870 | * This function records the allocation in the allocations table, |
| 1871 | * and stores the associated backtrace in the traces table |
| 1872 | * (or just increments the refcount if the trace is already recorded) |
| 1873 | * If the allocation slot is in use, the old allocation is replaced with the new allocation, and |
| 1874 | * the associated trace's refcount is decremented. |
| 1875 | * If the trace slot is in use, it returns. |
| 1876 | * The refcount is incremented by the amount of memory the allocation consumes. |
| 1877 | * The return value indicates whether to try again next time. |
| 1878 | */ |
| 1879 | static boolean_t |
| 1880 | zleak_log(uintptr_t* bt, |
| 1881 | uintptr_t addr, |
| 1882 | uint32_t depth, |
| 1883 | vm_size_t allocation_size) |
| 1884 | { |
| 1885 | /* Quit if there's someone else modifying the hash tables */ |
| 1886 | if (!lck_spin_try_lock(&zleak_lock)) { |
| 1887 | z_total_conflicts++; |
| 1888 | return FALSE; |
| 1889 | } |
| 1890 | |
| 1891 | struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; |
| 1892 | |
| 1893 | uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets); |
| 1894 | struct ztrace* trace = &ztraces[trace_index]; |
| 1895 | |
| 1896 | allocation->za_hit_count++; |
| 1897 | trace->zt_hit_count++; |
| 1898 | |
| 1899 | /* |
| 1900 | * If the allocation bucket we want to be in is occupied, and if the occupier |
| 1901 | * has the same trace as us, just bail. |
| 1902 | */ |
| 1903 | if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) { |
| 1904 | z_alloc_collisions++; |
| 1905 | |
| 1906 | lck_spin_unlock(&zleak_lock); |
| 1907 | return TRUE; |
| 1908 | } |
| 1909 | |
| 1910 | /* STEP 1: Store the backtrace in the traces array. */ |
| 1911 | /* A size of zero indicates that the trace bucket is free. */ |
| 1912 | |
| 1913 | if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0 ) { |
| 1914 | /* |
| 1915 | * Different unique trace with same hash! |
| 1916 | * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated |
| 1917 | * and get out of the way for later chances |
| 1918 | */ |
| 1919 | trace->zt_collisions++; |
| 1920 | z_trace_collisions++; |
| 1921 | |
| 1922 | lck_spin_unlock(&zleak_lock); |
| 1923 | return TRUE; |
| 1924 | } else if (trace->zt_size > 0) { |
| 1925 | /* Same trace, already added, so increment refcount */ |
| 1926 | trace->zt_size += allocation_size; |
| 1927 | } else { |
| 1928 | /* Found an unused trace bucket, record the trace here! */ |
| 1929 | if (trace->zt_depth != 0) /* if this slot was previously used but not currently in use */ |
| 1930 | z_trace_overwrites++; |
| 1931 | |
| 1932 | z_trace_recorded++; |
| 1933 | trace->zt_size = allocation_size; |
| 1934 | memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) ); |
| 1935 | |
| 1936 | trace->zt_depth = depth; |
| 1937 | trace->zt_collisions = 0; |
| 1938 | } |
| 1939 | |
| 1940 | /* STEP 2: Store the allocation record in the allocations array. */ |
| 1941 | |
| 1942 | if (allocation->za_element != (uintptr_t) 0) { |
| 1943 | /* |
| 1944 | * Straight up replace any allocation record that was there. We don't want to do the work |
| 1945 | * to preserve the allocation entries that were there, because we only record a subset of the |
| 1946 | * allocations anyways. |
| 1947 | */ |
| 1948 | |
| 1949 | z_alloc_collisions++; |
| 1950 | |
| 1951 | struct ztrace* associated_trace = &ztraces[allocation->za_trace_index]; |
| 1952 | /* Knock off old allocation's size, not the new allocation */ |
| 1953 | associated_trace->zt_size -= allocation->za_size; |
| 1954 | } else if (allocation->za_trace_index != 0) { |
| 1955 | /* Slot previously used but not currently in use */ |
| 1956 | z_alloc_overwrites++; |
| 1957 | } |
| 1958 | |
| 1959 | allocation->za_element = addr; |
| 1960 | allocation->za_trace_index = trace_index; |
| 1961 | allocation->za_size = allocation_size; |
| 1962 | |
| 1963 | z_alloc_recorded++; |
| 1964 | |
| 1965 | if (top_ztrace->zt_size < trace->zt_size) |
| 1966 | top_ztrace = trace; |
| 1967 | |
| 1968 | lck_spin_unlock(&zleak_lock); |
| 1969 | return TRUE; |
| 1970 | } |
| 1971 | |
| 1972 | /* |
| 1973 | * Free the allocation record and release the stacktrace. |
| 1974 | * This should be as fast as possible because it will be called for every free. |
| 1975 | */ |
| 1976 | static void |
| 1977 | zleak_free(uintptr_t addr, |
| 1978 | vm_size_t allocation_size) |
| 1979 | { |
| 1980 | if (addr == (uintptr_t) 0) |
| 1981 | return; |
| 1982 | |
| 1983 | struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; |
| 1984 | |
| 1985 | /* Double-checked locking: check to find out if we're interested, lock, check to make |
| 1986 | * sure it hasn't changed, then modify it, and release the lock. |
| 1987 | */ |
| 1988 | |
| 1989 | if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { |
| 1990 | /* if the allocation was the one, grab the lock, check again, then delete it */ |
| 1991 | lck_spin_lock(&zleak_lock); |
| 1992 | |
| 1993 | if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { |
| 1994 | struct ztrace *trace; |
| 1995 | |
| 1996 | /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */ |
| 1997 | if (allocation->za_size != allocation_size) { |
| 1998 | panic("Freeing as size %lu memory that was allocated with size %lu\n" , |
| 1999 | (uintptr_t)allocation_size, (uintptr_t)allocation->za_size); |
| 2000 | } |
| 2001 | |
| 2002 | trace = &ztraces[allocation->za_trace_index]; |
| 2003 | |
| 2004 | /* size of 0 indicates trace bucket is unused */ |
| 2005 | if (trace->zt_size > 0) { |
| 2006 | trace->zt_size -= allocation_size; |
| 2007 | } |
| 2008 | |
| 2009 | /* A NULL element means the allocation bucket is unused */ |
| 2010 | allocation->za_element = 0; |
| 2011 | } |
| 2012 | lck_spin_unlock(&zleak_lock); |
| 2013 | } |
| 2014 | } |
| 2015 | |
| 2016 | #endif /* CONFIG_ZLEAKS */ |
| 2017 | |
| 2018 | /* These functions outside of CONFIG_ZLEAKS because they are also used in |
| 2019 | * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix. |
| 2020 | */ |
| 2021 | |
| 2022 | /* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */ |
| 2023 | uintptr_t |
| 2024 | hash_mix(uintptr_t x) |
| 2025 | { |
| 2026 | #ifndef __LP64__ |
| 2027 | x += ~(x << 15); |
| 2028 | x ^= (x >> 10); |
| 2029 | x += (x << 3 ); |
| 2030 | x ^= (x >> 6 ); |
| 2031 | x += ~(x << 11); |
| 2032 | x ^= (x >> 16); |
| 2033 | #else |
| 2034 | x += ~(x << 32); |
| 2035 | x ^= (x >> 22); |
| 2036 | x += ~(x << 13); |
| 2037 | x ^= (x >> 8 ); |
| 2038 | x += (x << 3 ); |
| 2039 | x ^= (x >> 15); |
| 2040 | x += ~(x << 27); |
| 2041 | x ^= (x >> 31); |
| 2042 | #endif |
| 2043 | return x; |
| 2044 | } |
| 2045 | |
| 2046 | uint32_t |
| 2047 | hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size) |
| 2048 | { |
| 2049 | |
| 2050 | uintptr_t hash = 0; |
| 2051 | uintptr_t mask = max_size - 1; |
| 2052 | |
| 2053 | while (depth) { |
| 2054 | hash += bt[--depth]; |
| 2055 | } |
| 2056 | |
| 2057 | hash = hash_mix(hash) & mask; |
| 2058 | |
| 2059 | assert(hash < max_size); |
| 2060 | |
| 2061 | return (uint32_t) hash; |
| 2062 | } |
| 2063 | |
| 2064 | /* |
| 2065 | * TODO: Determine how well distributed this is |
| 2066 | * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask |
| 2067 | */ |
| 2068 | uint32_t |
| 2069 | hashaddr(uintptr_t pt, uint32_t max_size) |
| 2070 | { |
| 2071 | uintptr_t hash = 0; |
| 2072 | uintptr_t mask = max_size - 1; |
| 2073 | |
| 2074 | hash = hash_mix(pt) & mask; |
| 2075 | |
| 2076 | assert(hash < max_size); |
| 2077 | |
| 2078 | return (uint32_t) hash; |
| 2079 | } |
| 2080 | |
| 2081 | /* End of all leak-detection code */ |
| 2082 | #pragma mark - |
| 2083 | |
| 2084 | #define ZONE_MAX_ALLOC_SIZE (32 * 1024) |
| 2085 | #define ZONE_ALLOC_FRAG_PERCENT(alloc_size, ele_size) (((alloc_size % ele_size) * 100) / alloc_size) |
| 2086 | |
| 2087 | /* Used to manage copying in of new zone names */ |
| 2088 | static vm_offset_t zone_names_start; |
| 2089 | static vm_offset_t zone_names_next; |
| 2090 | |
| 2091 | static vm_size_t |
| 2092 | compute_element_size(vm_size_t requested_size) |
| 2093 | { |
| 2094 | vm_size_t element_size = requested_size; |
| 2095 | |
| 2096 | /* Zone elements must fit both a next pointer and a backup pointer */ |
| 2097 | vm_size_t minimum_element_size = sizeof(vm_offset_t) * 2; |
| 2098 | if (element_size < minimum_element_size) |
| 2099 | element_size = minimum_element_size; |
| 2100 | |
| 2101 | /* |
| 2102 | * Round element size to a multiple of sizeof(pointer) |
| 2103 | * This also enforces that allocations will be aligned on pointer boundaries |
| 2104 | */ |
| 2105 | element_size = ((element_size-1) + sizeof(vm_offset_t)) - |
| 2106 | ((element_size-1) % sizeof(vm_offset_t)); |
| 2107 | |
| 2108 | return element_size; |
| 2109 | } |
| 2110 | |
| 2111 | #if KASAN_ZALLOC |
| 2112 | |
| 2113 | /* |
| 2114 | * Called from zinit(). |
| 2115 | * |
| 2116 | * Fixes up the zone's element size to incorporate the redzones. |
| 2117 | */ |
| 2118 | static void |
| 2119 | kasan_update_element_size_for_redzone( |
| 2120 | zone_t zone, /* the zone that needs to be updated */ |
| 2121 | vm_size_t *size, /* requested zone element size */ |
| 2122 | vm_size_t *max, /* maximum memory to use */ |
| 2123 | const char *name) /* zone name */ |
| 2124 | { |
| 2125 | /* Expand the zone allocation size to include the redzones. For page-multiple |
| 2126 | * zones add a full guard page because they likely require alignment. kalloc |
| 2127 | * and fakestack handles its own KASan state, so ignore those zones. */ |
| 2128 | /* XXX: remove this when zinit_with_options() is a thing */ |
| 2129 | const char *kalloc_name = "kalloc." ; |
| 2130 | const char *fakestack_name = "fakestack." ; |
| 2131 | if (strncmp(name, kalloc_name, strlen(kalloc_name)) == 0) { |
| 2132 | zone->kasan_redzone = 0; |
| 2133 | } else if (strncmp(name, fakestack_name, strlen(fakestack_name)) == 0) { |
| 2134 | zone->kasan_redzone = 0; |
| 2135 | } else { |
| 2136 | if ((*size % PAGE_SIZE) != 0) { |
| 2137 | zone->kasan_redzone = KASAN_GUARD_SIZE; |
| 2138 | } else { |
| 2139 | zone->kasan_redzone = PAGE_SIZE; |
| 2140 | } |
| 2141 | *max = (*max / *size) * (*size + zone->kasan_redzone * 2); |
| 2142 | *size += zone->kasan_redzone * 2; |
| 2143 | } |
| 2144 | } |
| 2145 | |
| 2146 | /* |
| 2147 | * Called from zalloc_internal() to fix up the address of the newly |
| 2148 | * allocated element. |
| 2149 | * |
| 2150 | * Returns the element address skipping over the redzone on the left. |
| 2151 | */ |
| 2152 | static vm_offset_t |
| 2153 | kasan_fixup_allocated_element_address( |
| 2154 | zone_t zone, /* the zone the element belongs to */ |
| 2155 | vm_offset_t addr) /* address of the element, including the redzone */ |
| 2156 | { |
| 2157 | /* Fixup the return address to skip the redzone */ |
| 2158 | if (zone->kasan_redzone) { |
| 2159 | addr = kasan_alloc(addr, zone->elem_size, |
| 2160 | zone->elem_size - 2 * zone->kasan_redzone, zone->kasan_redzone); |
| 2161 | } |
| 2162 | return addr; |
| 2163 | } |
| 2164 | |
| 2165 | /* |
| 2166 | * Called from zfree() to add the element being freed to the KASan quarantine. |
| 2167 | * |
| 2168 | * Returns true if the newly-freed element made it into the quarantine without |
| 2169 | * displacing another, false otherwise. In the latter case, addrp points to the |
| 2170 | * address of the displaced element, which will be freed by the zone. |
| 2171 | */ |
| 2172 | static bool |
| 2173 | kasan_quarantine_freed_element( |
| 2174 | zone_t *zonep, /* the zone the element is being freed to */ |
| 2175 | void **addrp) /* address of the element being freed */ |
| 2176 | { |
| 2177 | zone_t zone = *zonep; |
| 2178 | void *addr = *addrp; |
| 2179 | |
| 2180 | /* |
| 2181 | * Resize back to the real allocation size and hand off to the KASan |
| 2182 | * quarantine. `addr` may then point to a different allocation, if the |
| 2183 | * current element replaced another in the quarantine. The zone then |
| 2184 | * takes ownership of the swapped out free element. |
| 2185 | */ |
| 2186 | vm_size_t usersz = zone->elem_size - 2 * zone->kasan_redzone; |
| 2187 | vm_size_t sz = usersz; |
| 2188 | |
| 2189 | if (addr && zone->kasan_redzone) { |
| 2190 | kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC); |
| 2191 | addr = (void *)kasan_dealloc((vm_address_t)addr, &sz); |
| 2192 | assert(sz == zone->elem_size); |
| 2193 | } |
| 2194 | if (addr && zone->kasan_quarantine) { |
| 2195 | kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, zonep, usersz, true); |
| 2196 | if (!addr) { |
| 2197 | return TRUE; |
| 2198 | } |
| 2199 | } |
| 2200 | *addrp = addr; |
| 2201 | return FALSE; |
| 2202 | } |
| 2203 | |
| 2204 | #endif /* KASAN_ZALLOC */ |
| 2205 | |
| 2206 | /* |
| 2207 | * zinit initializes a new zone. The zone data structures themselves |
| 2208 | * are stored in a zone, which is initially a static structure that |
| 2209 | * is initialized by zone_init. |
| 2210 | */ |
| 2211 | |
| 2212 | zone_t |
| 2213 | zinit( |
| 2214 | vm_size_t size, /* the size of an element */ |
| 2215 | vm_size_t max, /* maximum memory to use */ |
| 2216 | vm_size_t alloc, /* allocation size */ |
| 2217 | const char *name) /* a name for the zone */ |
| 2218 | { |
| 2219 | zone_t z; |
| 2220 | |
| 2221 | size = compute_element_size(size); |
| 2222 | |
| 2223 | simple_lock(&all_zones_lock); |
| 2224 | |
| 2225 | assert(num_zones < MAX_ZONES); |
| 2226 | assert(num_zones_in_use <= num_zones); |
| 2227 | |
| 2228 | /* If possible, find a previously zdestroy'ed zone in the zone_array that we can reuse instead of initializing a new zone. */ |
| 2229 | for (int index = bitmap_first(zone_empty_bitmap, MAX_ZONES); |
| 2230 | index >= 0 && index < (int)num_zones; |
| 2231 | index = bitmap_next(zone_empty_bitmap, index)) { |
| 2232 | z = &(zone_array[index]); |
| 2233 | |
| 2234 | /* |
| 2235 | * If the zone name and the element size are the same, we can just reuse the old zone struct. |
| 2236 | * Otherwise hand out a new zone from the zone_array. |
| 2237 | */ |
| 2238 | if (!strcmp(z->zone_name, name)) { |
| 2239 | vm_size_t old_size = z->elem_size; |
| 2240 | #if KASAN_ZALLOC |
| 2241 | old_size -= z->kasan_redzone * 2; |
| 2242 | #endif |
| 2243 | if (old_size == size) { |
| 2244 | /* Clear the empty bit for this zone, increment num_zones_in_use, and mark the zone as valid again. */ |
| 2245 | bitmap_clear(zone_empty_bitmap, index); |
| 2246 | num_zones_in_use++; |
| 2247 | z->zone_valid = TRUE; |
| 2248 | |
| 2249 | /* All other state is already set up since the zone was previously in use. Return early. */ |
| 2250 | simple_unlock(&all_zones_lock); |
| 2251 | return (z); |
| 2252 | } |
| 2253 | } |
| 2254 | } |
| 2255 | |
| 2256 | /* If we're here, it means we didn't find a zone above that we could simply reuse. Set up a new zone. */ |
| 2257 | |
| 2258 | /* Clear the empty bit for the new zone */ |
| 2259 | bitmap_clear(zone_empty_bitmap, num_zones); |
| 2260 | |
| 2261 | z = &(zone_array[num_zones]); |
| 2262 | z->index = num_zones; |
| 2263 | |
| 2264 | num_zones++; |
| 2265 | num_zones_in_use++; |
| 2266 | |
| 2267 | /* |
| 2268 | * Initialize the zone lock here before dropping the all_zones_lock. Otherwise we could race with |
| 2269 | * zalloc_async() and try to grab the zone lock before it has been initialized, causing a panic. |
| 2270 | */ |
| 2271 | lock_zone_init(z); |
| 2272 | |
| 2273 | simple_unlock(&all_zones_lock); |
| 2274 | |
| 2275 | #if KASAN_ZALLOC |
| 2276 | kasan_update_element_size_for_redzone(z, &size, &max, name); |
| 2277 | #endif |
| 2278 | |
| 2279 | max = round_page(max); |
| 2280 | |
| 2281 | vm_size_t best_alloc = PAGE_SIZE; |
| 2282 | |
| 2283 | if ((size % PAGE_SIZE) == 0) { |
| 2284 | /* zero fragmentation by definition */ |
| 2285 | best_alloc = size; |
| 2286 | } else { |
| 2287 | vm_size_t alloc_size; |
| 2288 | for (alloc_size = (2 * PAGE_SIZE); alloc_size <= ZONE_MAX_ALLOC_SIZE; alloc_size += PAGE_SIZE) { |
| 2289 | if (ZONE_ALLOC_FRAG_PERCENT(alloc_size, size) < ZONE_ALLOC_FRAG_PERCENT(best_alloc, size)) { |
| 2290 | best_alloc = alloc_size; |
| 2291 | } |
| 2292 | } |
| 2293 | } |
| 2294 | |
| 2295 | alloc = best_alloc; |
| 2296 | if (max && (max < alloc)) |
| 2297 | max = alloc; |
| 2298 | |
| 2299 | z->free_elements = NULL; |
| 2300 | queue_init(&z->pages.any_free_foreign); |
| 2301 | queue_init(&z->pages.all_free); |
| 2302 | queue_init(&z->pages.intermediate); |
| 2303 | queue_init(&z->pages.all_used); |
| 2304 | z->cur_size = 0; |
| 2305 | z->page_count = 0; |
| 2306 | z->max_size = max; |
| 2307 | z->elem_size = size; |
| 2308 | z->alloc_size = alloc; |
| 2309 | z->count = 0; |
| 2310 | z->countfree = 0; |
| 2311 | z->count_all_free_pages = 0; |
| 2312 | z->sum_count = 0LL; |
| 2313 | z->doing_alloc_without_vm_priv = FALSE; |
| 2314 | z->doing_alloc_with_vm_priv = FALSE; |
| 2315 | z->exhaustible = FALSE; |
| 2316 | z->collectable = TRUE; |
| 2317 | z->allows_foreign = FALSE; |
| 2318 | z->expandable = TRUE; |
| 2319 | z->waiting = FALSE; |
| 2320 | z->async_pending = FALSE; |
| 2321 | z->caller_acct = TRUE; |
| 2322 | z->noencrypt = FALSE; |
| 2323 | z->no_callout = FALSE; |
| 2324 | z->async_prio_refill = FALSE; |
| 2325 | z->gzalloc_exempt = FALSE; |
| 2326 | z->alignment_required = FALSE; |
| 2327 | z->zone_replenishing = FALSE; |
| 2328 | z->prio_refill_watermark = 0; |
| 2329 | z->zone_replenish_thread = NULL; |
| 2330 | z->zp_count = 0; |
| 2331 | z->kasan_quarantine = TRUE; |
| 2332 | z->zone_valid = TRUE; |
| 2333 | z->cpu_cache_enabled = FALSE; |
| 2334 | |
| 2335 | #if CONFIG_ZLEAKS |
| 2336 | z->zleak_capture = 0; |
| 2337 | z->zleak_on = FALSE; |
| 2338 | #endif /* CONFIG_ZLEAKS */ |
| 2339 | |
| 2340 | /* |
| 2341 | * If the VM is ready to handle kmem_alloc requests, copy the zone name passed in. |
| 2342 | * |
| 2343 | * Else simply maintain a pointer to the name string. The only zones we'll actually have |
| 2344 | * to do this for would be the VM-related zones that are created very early on before any |
| 2345 | * kexts can be loaded (unloaded). So we should be fine with just a pointer in this case. |
| 2346 | */ |
| 2347 | if (kmem_alloc_ready) { |
| 2348 | size_t len = MIN(strlen(name)+1, MACH_ZONE_NAME_MAX_LEN); |
| 2349 | |
| 2350 | if (zone_names_start == 0 || ((zone_names_next - zone_names_start) + len) > PAGE_SIZE) { |
| 2351 | printf("zalloc: allocating memory for zone names buffer\n" ); |
| 2352 | kern_return_t retval = kmem_alloc_kobject(kernel_map, &zone_names_start, |
| 2353 | PAGE_SIZE, VM_KERN_MEMORY_OSFMK); |
| 2354 | if (retval != KERN_SUCCESS) { |
| 2355 | panic("zalloc: zone_names memory allocation failed" ); |
| 2356 | } |
| 2357 | bzero((char *)zone_names_start, PAGE_SIZE); |
| 2358 | zone_names_next = zone_names_start; |
| 2359 | } |
| 2360 | |
| 2361 | strlcpy((char *)zone_names_next, name, len); |
| 2362 | z->zone_name = (char *)zone_names_next; |
| 2363 | zone_names_next += len; |
| 2364 | } else { |
| 2365 | z->zone_name = name; |
| 2366 | } |
| 2367 | |
| 2368 | /* |
| 2369 | * Check for and set up zone leak detection if requested via boot-args. We recognized two |
| 2370 | * boot-args: |
| 2371 | * |
| 2372 | * zlog=<zone_to_log> |
| 2373 | * zrecs=<num_records_in_log> |
| 2374 | * |
| 2375 | * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to |
| 2376 | * control the size of the log. If zrecs is not specified, a default value is used. |
| 2377 | */ |
| 2378 | |
| 2379 | if (num_zones_logged < max_num_zones_to_log) { |
| 2380 | |
| 2381 | int i = 1; /* zlog0 isn't allowed. */ |
| 2382 | boolean_t zone_logging_enabled = FALSE; |
| 2383 | char zlog_name[MAX_ZONE_NAME] = "" ; /* Temp. buffer to create the strings zlog1, zlog2 etc... */ |
| 2384 | |
| 2385 | while (i <= max_num_zones_to_log) { |
| 2386 | |
| 2387 | snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d" , i); |
| 2388 | |
| 2389 | if (PE_parse_boot_argn(zlog_name, zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) { |
| 2390 | if (track_this_zone(z->zone_name, zone_name_to_log)) { |
| 2391 | if (z->zone_valid) { |
| 2392 | z->zone_logging = TRUE; |
| 2393 | zone_logging_enabled = TRUE; |
| 2394 | num_zones_logged++; |
| 2395 | break; |
| 2396 | } |
| 2397 | } |
| 2398 | } |
| 2399 | i++; |
| 2400 | } |
| 2401 | |
| 2402 | if (zone_logging_enabled == FALSE) { |
| 2403 | /* |
| 2404 | * Backwards compat. with the old boot-arg used to specify single zone logging i.e. zlog |
| 2405 | * Needs to happen after the newer zlogn checks because the prefix will match all the zlogn |
| 2406 | * boot-args. |
| 2407 | */ |
| 2408 | if (PE_parse_boot_argn("zlog" , zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) { |
| 2409 | if (track_this_zone(z->zone_name, zone_name_to_log)) { |
| 2410 | if (z->zone_valid) { |
| 2411 | z->zone_logging = TRUE; |
| 2412 | zone_logging_enabled = TRUE; |
| 2413 | num_zones_logged++; |
| 2414 | } |
| 2415 | } |
| 2416 | } |
| 2417 | } |
| 2418 | |
| 2419 | if (log_records_init == FALSE && zone_logging_enabled == TRUE) { |
| 2420 | if (PE_parse_boot_argn("zrecs" , &log_records, sizeof(log_records)) == TRUE) { |
| 2421 | /* |
| 2422 | * Don't allow more than ZRECORDS_MAX records even if the user asked for more. |
| 2423 | * This prevents accidentally hogging too much kernel memory and making the system |
| 2424 | * unusable. |
| 2425 | */ |
| 2426 | |
| 2427 | log_records = MIN(ZRECORDS_MAX, log_records); |
| 2428 | log_records_init = TRUE; |
| 2429 | } else { |
| 2430 | log_records = ZRECORDS_DEFAULT; |
| 2431 | log_records_init = TRUE; |
| 2432 | } |
| 2433 | } |
| 2434 | |
| 2435 | /* |
| 2436 | * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are |
| 2437 | * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. kmem_alloc_ready is set to |
| 2438 | * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one |
| 2439 | * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again |
| 2440 | * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized |
| 2441 | * right now. |
| 2442 | */ |
| 2443 | if (kmem_alloc_ready) { |
| 2444 | |
| 2445 | zone_t curr_zone = NULL; |
| 2446 | unsigned int max_zones = 0, zone_idx = 0; |
| 2447 | |
| 2448 | simple_lock(&all_zones_lock); |
| 2449 | max_zones = num_zones; |
| 2450 | simple_unlock(&all_zones_lock); |
| 2451 | |
| 2452 | for (zone_idx = 0; zone_idx < max_zones; zone_idx++) { |
| 2453 | |
| 2454 | curr_zone = &(zone_array[zone_idx]); |
| 2455 | |
| 2456 | if (!curr_zone->zone_valid) { |
| 2457 | continue; |
| 2458 | } |
| 2459 | |
| 2460 | /* |
| 2461 | * We work with the zone unlocked here because we could end up needing the zone lock to |
| 2462 | * enable logging for this zone e.g. need a VM object to allocate memory to enable logging for the |
| 2463 | * VM objects zone. |
| 2464 | * |
| 2465 | * We don't expect these zones to be needed at this early a time in boot and so take this chance. |
| 2466 | */ |
| 2467 | if (curr_zone->zone_logging && curr_zone->zlog_btlog == NULL) { |
| 2468 | |
| 2469 | curr_zone->zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH, (corruption_debug_flag == FALSE) /* caller_will_remove_entries_for_element? */); |
| 2470 | |
| 2471 | if (curr_zone->zlog_btlog) { |
| 2472 | |
| 2473 | printf("zone: logging started for zone %s\n" , curr_zone->zone_name); |
| 2474 | } else { |
| 2475 | printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n" ); |
| 2476 | curr_zone->zone_logging = FALSE; |
| 2477 | } |
| 2478 | } |
| 2479 | |
| 2480 | } |
| 2481 | } |
| 2482 | } |
| 2483 | |
| 2484 | #if CONFIG_GZALLOC |
| 2485 | gzalloc_zone_init(z); |
| 2486 | #endif |
| 2487 | |
| 2488 | #if CONFIG_ZCACHE |
| 2489 | /* Check if boot-arg specified it should have a cache */ |
| 2490 | if (cache_all_zones || track_this_zone(name, cache_zone_name)) { |
| 2491 | zone_change(z, Z_CACHING_ENABLED, TRUE); |
| 2492 | } |
| 2493 | #endif |
| 2494 | |
| 2495 | return(z); |
| 2496 | } |
| 2497 | unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated, zone_replenish_throttle_count; |
| 2498 | |
| 2499 | static void zone_replenish_thread(zone_t); |
| 2500 | |
| 2501 | /* High priority VM privileged thread used to asynchronously refill a designated |
| 2502 | * zone, such as the reserved VM map entry zone. |
| 2503 | */ |
| 2504 | __attribute__((noreturn)) |
| 2505 | static void |
| 2506 | zone_replenish_thread(zone_t z) |
| 2507 | { |
| 2508 | vm_size_t free_size; |
| 2509 | current_thread()->options |= TH_OPT_VMPRIV; |
| 2510 | |
| 2511 | for (;;) { |
| 2512 | lock_zone(z); |
| 2513 | assert(z->zone_valid); |
| 2514 | z->zone_replenishing = TRUE; |
| 2515 | assert(z->prio_refill_watermark != 0); |
| 2516 | while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) { |
| 2517 | assert(z->doing_alloc_without_vm_priv == FALSE); |
| 2518 | assert(z->doing_alloc_with_vm_priv == FALSE); |
| 2519 | assert(z->async_prio_refill == TRUE); |
| 2520 | |
| 2521 | unlock_zone(z); |
| 2522 | int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; |
| 2523 | vm_offset_t space, alloc_size; |
| 2524 | kern_return_t kr; |
| 2525 | |
| 2526 | if (vm_pool_low()) |
| 2527 | alloc_size = round_page(z->elem_size); |
| 2528 | else |
| 2529 | alloc_size = z->alloc_size; |
| 2530 | |
| 2531 | if (z->noencrypt) |
| 2532 | zflags |= KMA_NOENCRYPT; |
| 2533 | |
| 2534 | /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */ |
| 2535 | if (is_zone_map_nearing_exhaustion()) { |
| 2536 | thread_wakeup((event_t) &vm_pageout_garbage_collect); |
| 2537 | } |
| 2538 | |
| 2539 | kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE); |
| 2540 | |
| 2541 | if (kr == KERN_SUCCESS) { |
| 2542 | zcram(z, space, alloc_size); |
| 2543 | } else if (kr == KERN_RESOURCE_SHORTAGE) { |
| 2544 | VM_PAGE_WAIT(); |
| 2545 | } else if (kr == KERN_NO_SPACE) { |
| 2546 | kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE); |
| 2547 | if (kr == KERN_SUCCESS) { |
| 2548 | zcram(z, space, alloc_size); |
| 2549 | } else { |
| 2550 | assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC); |
| 2551 | thread_block(THREAD_CONTINUE_NULL); |
| 2552 | } |
| 2553 | } |
| 2554 | |
| 2555 | lock_zone(z); |
| 2556 | assert(z->zone_valid); |
| 2557 | zone_replenish_loops++; |
| 2558 | } |
| 2559 | |
| 2560 | z->zone_replenishing = FALSE; |
| 2561 | /* Signal any potential throttled consumers, terminating |
| 2562 | * their timer-bounded waits. |
| 2563 | */ |
| 2564 | thread_wakeup(z); |
| 2565 | |
| 2566 | assert_wait(&z->zone_replenish_thread, THREAD_UNINT); |
| 2567 | unlock_zone(z); |
| 2568 | thread_block(THREAD_CONTINUE_NULL); |
| 2569 | zone_replenish_wakeups++; |
| 2570 | } |
| 2571 | } |
| 2572 | |
| 2573 | void |
| 2574 | zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) { |
| 2575 | z->prio_refill_watermark = low_water_mark; |
| 2576 | |
| 2577 | z->async_prio_refill = TRUE; |
| 2578 | OSMemoryBarrier(); |
| 2579 | kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread); |
| 2580 | |
| 2581 | if (tres != KERN_SUCCESS) { |
| 2582 | panic("zone_prio_refill_configure, thread create: 0x%x" , tres); |
| 2583 | } |
| 2584 | |
| 2585 | thread_deallocate(z->zone_replenish_thread); |
| 2586 | } |
| 2587 | |
| 2588 | void |
| 2589 | zdestroy(zone_t z) |
| 2590 | { |
| 2591 | unsigned int zindex; |
| 2592 | |
| 2593 | assert(z != NULL); |
| 2594 | |
| 2595 | lock_zone(z); |
| 2596 | assert(z->zone_valid); |
| 2597 | |
| 2598 | /* Assert that the zone does not have any allocations in flight */ |
| 2599 | assert(z->doing_alloc_without_vm_priv == FALSE); |
| 2600 | assert(z->doing_alloc_with_vm_priv == FALSE); |
| 2601 | assert(z->async_pending == FALSE); |
| 2602 | assert(z->waiting == FALSE); |
| 2603 | assert(z->async_prio_refill == FALSE); |
| 2604 | |
| 2605 | #if !KASAN_ZALLOC |
| 2606 | /* |
| 2607 | * Unset the valid bit. We'll hit an assert failure on further operations on this zone, until zinit() is called again. |
| 2608 | * Leave the zone valid for KASan as we will see zfree's on quarantined free elements even after the zone is destroyed. |
| 2609 | */ |
| 2610 | z->zone_valid = FALSE; |
| 2611 | #endif |
| 2612 | unlock_zone(z); |
| 2613 | |
| 2614 | #if CONFIG_ZCACHE |
| 2615 | /* Drain the per-cpu caches if caching is enabled for the zone. */ |
| 2616 | if (zone_caching_enabled(z)) { |
| 2617 | panic("zdestroy: Zone caching enabled for zone %s" , z->zone_name); |
| 2618 | } |
| 2619 | #endif /* CONFIG_ZCACHE */ |
| 2620 | |
| 2621 | /* Dump all the free elements */ |
| 2622 | drop_free_elements(z); |
| 2623 | |
| 2624 | #if CONFIG_GZALLOC |
| 2625 | /* If the zone is gzalloc managed dump all the elements in the free cache */ |
| 2626 | gzalloc_empty_free_cache(z); |
| 2627 | #endif |
| 2628 | |
| 2629 | lock_zone(z); |
| 2630 | |
| 2631 | #if !KASAN_ZALLOC |
| 2632 | /* Assert that all counts are zero */ |
| 2633 | assert(z->count == 0); |
| 2634 | assert(z->countfree == 0); |
| 2635 | assert(z->cur_size == 0); |
| 2636 | assert(z->page_count == 0); |
| 2637 | assert(z->count_all_free_pages == 0); |
| 2638 | |
| 2639 | /* Assert that all queues except the foreign queue are empty. The zone allocator doesn't know how to free up foreign memory. */ |
| 2640 | assert(queue_empty(&z->pages.all_used)); |
| 2641 | assert(queue_empty(&z->pages.intermediate)); |
| 2642 | assert(queue_empty(&z->pages.all_free)); |
| 2643 | #endif |
| 2644 | |
| 2645 | zindex = z->index; |
| 2646 | |
| 2647 | unlock_zone(z); |
| 2648 | |
| 2649 | simple_lock(&all_zones_lock); |
| 2650 | |
| 2651 | assert(!bitmap_test(zone_empty_bitmap, zindex)); |
| 2652 | /* Mark the zone as empty in the bitmap */ |
| 2653 | bitmap_set(zone_empty_bitmap, zindex); |
| 2654 | num_zones_in_use--; |
| 2655 | assert(num_zones_in_use > 0); |
| 2656 | |
| 2657 | simple_unlock(&all_zones_lock); |
| 2658 | } |
| 2659 | |
| 2660 | /* Initialize the metadata for an allocation chunk */ |
| 2661 | static inline void |
| 2662 | zcram_metadata_init(vm_offset_t newmem, vm_size_t size, struct zone_page_metadata *chunk_metadata) |
| 2663 | { |
| 2664 | struct zone_page_metadata *page_metadata; |
| 2665 | |
| 2666 | /* The first page is the real metadata for this allocation chunk. We mark the others as fake metadata */ |
| 2667 | size -= PAGE_SIZE; |
| 2668 | newmem += PAGE_SIZE; |
| 2669 | |
| 2670 | for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) { |
| 2671 | page_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE); |
| 2672 | assert(page_metadata != chunk_metadata); |
| 2673 | PAGE_METADATA_SET_ZINDEX(page_metadata, MULTIPAGE_METADATA_MAGIC); |
| 2674 | page_metadata_set_realmeta(page_metadata, chunk_metadata); |
| 2675 | page_metadata->free_count = 0; |
| 2676 | } |
| 2677 | return; |
| 2678 | } |
| 2679 | |
| 2680 | |
| 2681 | static void |
| 2682 | random_free_to_zone( |
| 2683 | zone_t zone, |
| 2684 | vm_offset_t newmem, |
| 2685 | vm_offset_t first_element_offset, |
| 2686 | int element_count, |
| 2687 | unsigned int *entropy_buffer) |
| 2688 | { |
| 2689 | vm_offset_t last_element_offset; |
| 2690 | vm_offset_t element_addr; |
| 2691 | vm_size_t elem_size; |
| 2692 | int index; |
| 2693 | |
| 2694 | assert(element_count && element_count <= ZONE_CHUNK_MAXELEMENTS); |
| 2695 | elem_size = zone->elem_size; |
| 2696 | last_element_offset = first_element_offset + ((element_count * elem_size) - elem_size); |
| 2697 | for (index = 0; index < element_count; index++) { |
| 2698 | assert(first_element_offset <= last_element_offset); |
| 2699 | if ( |
| 2700 | #if DEBUG || DEVELOPMENT |
| 2701 | leak_scan_debug_flag || __improbable(zone->tags) || |
| 2702 | #endif /* DEBUG || DEVELOPMENT */ |
| 2703 | random_bool_gen_bits(&zone_bool_gen, entropy_buffer, MAX_ENTROPY_PER_ZCRAM, 1)) { |
| 2704 | element_addr = newmem + first_element_offset; |
| 2705 | first_element_offset += elem_size; |
| 2706 | } else { |
| 2707 | element_addr = newmem + last_element_offset; |
| 2708 | last_element_offset -= elem_size; |
| 2709 | } |
| 2710 | if (element_addr != (vm_offset_t)zone) { |
| 2711 | zone->count++; /* compensate for free_to_zone */ |
| 2712 | free_to_zone(zone, element_addr, FALSE); |
| 2713 | } |
| 2714 | zone->cur_size += elem_size; |
| 2715 | } |
| 2716 | } |
| 2717 | |
| 2718 | /* |
| 2719 | * Cram the given memory into the specified zone. Update the zone page count accordingly. |
| 2720 | */ |
| 2721 | void |
| 2722 | zcram( |
| 2723 | zone_t zone, |
| 2724 | vm_offset_t newmem, |
| 2725 | vm_size_t size) |
| 2726 | { |
| 2727 | vm_size_t elem_size; |
| 2728 | boolean_t from_zm = FALSE; |
| 2729 | int element_count; |
| 2730 | unsigned int entropy_buffer[MAX_ENTROPY_PER_ZCRAM] = { 0 }; |
| 2731 | |
| 2732 | /* Basic sanity checks */ |
| 2733 | assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); |
| 2734 | assert(!zone->collectable || zone->allows_foreign |
| 2735 | || (from_zone_map(newmem, size))); |
| 2736 | |
| 2737 | elem_size = zone->elem_size; |
| 2738 | |
| 2739 | KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START, zone->index, size); |
| 2740 | |
| 2741 | if (from_zone_map(newmem, size)) |
| 2742 | from_zm = TRUE; |
| 2743 | |
| 2744 | if (!from_zm) { |
| 2745 | /* We cannot support elements larger than page size for foreign memory because we |
| 2746 | * put metadata on the page itself for each page of foreign memory. We need to do |
| 2747 | * this in order to be able to reach the metadata when any element is freed |
| 2748 | */ |
| 2749 | assert((zone->allows_foreign == TRUE) && (zone->elem_size <= (PAGE_SIZE - sizeof(struct zone_page_metadata)))); |
| 2750 | } |
| 2751 | |
| 2752 | if (zalloc_debug & ZALLOC_DEBUG_ZCRAM) |
| 2753 | kprintf("zcram(%p[%s], 0x%lx%s, 0x%lx)\n" , zone, zone->zone_name, |
| 2754 | (unsigned long)newmem, from_zm ? "" : "[F]" , (unsigned long)size); |
| 2755 | |
| 2756 | ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE)); |
| 2757 | |
| 2758 | /* |
| 2759 | * Initialize the metadata for all pages. We dont need the zone lock |
| 2760 | * here because we are not manipulating any zone related state yet. |
| 2761 | */ |
| 2762 | |
| 2763 | struct zone_page_metadata *chunk_metadata; |
| 2764 | size_t zone_page_metadata_size = sizeof(struct zone_page_metadata); |
| 2765 | |
| 2766 | assert((newmem & PAGE_MASK) == 0); |
| 2767 | assert((size & PAGE_MASK) == 0); |
| 2768 | |
| 2769 | chunk_metadata = get_zone_page_metadata((struct zone_free_element *)newmem, TRUE); |
| 2770 | chunk_metadata->pages.next = NULL; |
| 2771 | chunk_metadata->pages.prev = NULL; |
| 2772 | page_metadata_set_freelist(chunk_metadata, 0); |
| 2773 | PAGE_METADATA_SET_ZINDEX(chunk_metadata, zone->index); |
| 2774 | chunk_metadata->free_count = 0; |
| 2775 | assert((size / PAGE_SIZE) <= ZONE_CHUNK_MAXPAGES); |
| 2776 | chunk_metadata->page_count = (unsigned)(size / PAGE_SIZE); |
| 2777 | |
| 2778 | zcram_metadata_init(newmem, size, chunk_metadata); |
| 2779 | |
| 2780 | #if VM_MAX_TAG_ZONES |
| 2781 | if (__improbable(zone->tags)) { |
| 2782 | assert(from_zm); |
| 2783 | ztMemoryAdd(zone, newmem, size); |
| 2784 | } |
| 2785 | #endif /* VM_MAX_TAG_ZONES */ |
| 2786 | |
| 2787 | lock_zone(zone); |
| 2788 | assert(zone->zone_valid); |
| 2789 | enqueue_tail(&zone->pages.all_used, &(chunk_metadata->pages)); |
| 2790 | |
| 2791 | if (!from_zm) { |
| 2792 | /* We cannot support elements larger than page size for foreign memory because we |
| 2793 | * put metadata on the page itself for each page of foreign memory. We need to do |
| 2794 | * this in order to be able to reach the metadata when any element is freed |
| 2795 | */ |
| 2796 | |
| 2797 | for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) { |
| 2798 | vm_offset_t first_element_offset = 0; |
| 2799 | if (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT == 0){ |
| 2800 | first_element_offset = zone_page_metadata_size; |
| 2801 | } else { |
| 2802 | first_element_offset = zone_page_metadata_size + (ZONE_ELEMENT_ALIGNMENT - (zone_page_metadata_size % ZONE_ELEMENT_ALIGNMENT)); |
| 2803 | } |
| 2804 | element_count = (unsigned int)((PAGE_SIZE - first_element_offset) / elem_size); |
| 2805 | random_free_to_zone(zone, newmem, first_element_offset, element_count, entropy_buffer); |
| 2806 | } |
| 2807 | } else { |
| 2808 | element_count = (unsigned int)(size / elem_size); |
| 2809 | random_free_to_zone(zone, newmem, 0, element_count, entropy_buffer); |
| 2810 | } |
| 2811 | unlock_zone(zone); |
| 2812 | |
| 2813 | KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, zone->index); |
| 2814 | |
| 2815 | } |
| 2816 | |
| 2817 | /* |
| 2818 | * Fill a zone with enough memory to contain at least nelem elements. |
| 2819 | * Return the number of elements actually put into the zone, which may |
| 2820 | * be more than the caller asked for since the memory allocation is |
| 2821 | * rounded up to the next zone allocation size. |
| 2822 | */ |
| 2823 | int |
| 2824 | zfill( |
| 2825 | zone_t zone, |
| 2826 | int nelem) |
| 2827 | { |
| 2828 | kern_return_t kr; |
| 2829 | vm_offset_t memory; |
| 2830 | |
| 2831 | vm_size_t alloc_size = zone->alloc_size; |
| 2832 | vm_size_t elem_per_alloc = alloc_size / zone->elem_size; |
| 2833 | vm_size_t nalloc = (nelem + elem_per_alloc - 1) / elem_per_alloc; |
| 2834 | |
| 2835 | /* Don't mix-and-match zfill with foreign memory */ |
| 2836 | assert(!zone->allows_foreign); |
| 2837 | |
| 2838 | /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */ |
| 2839 | if (is_zone_map_nearing_exhaustion()) { |
| 2840 | thread_wakeup((event_t) &vm_pageout_garbage_collect); |
| 2841 | } |
| 2842 | |
| 2843 | kr = kernel_memory_allocate(zone_map, &memory, nalloc * alloc_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_ZONE); |
| 2844 | if (kr != KERN_SUCCESS) { |
| 2845 | printf("%s: kernel_memory_allocate() of %lu bytes failed\n" , |
| 2846 | __func__, (unsigned long)(nalloc * alloc_size)); |
| 2847 | return 0; |
| 2848 | } |
| 2849 | |
| 2850 | for (vm_size_t i = 0; i < nalloc; i++) { |
| 2851 | zcram(zone, memory + i * alloc_size, alloc_size); |
| 2852 | } |
| 2853 | |
| 2854 | return (int)(nalloc * elem_per_alloc); |
| 2855 | } |
| 2856 | |
| 2857 | /* |
| 2858 | * Initialize the "zone of zones" which uses fixed memory allocated |
| 2859 | * earlier in memory initialization. zone_bootstrap is called |
| 2860 | * before zone_init. |
| 2861 | */ |
| 2862 | void |
| 2863 | zone_bootstrap(void) |
| 2864 | { |
| 2865 | char temp_buf[16]; |
| 2866 | |
| 2867 | if (!PE_parse_boot_argn("zalloc_debug" , &zalloc_debug, sizeof(zalloc_debug))) |
| 2868 | zalloc_debug = 0; |
| 2869 | |
| 2870 | /* Set up zone element poisoning */ |
| 2871 | zp_init(); |
| 2872 | |
| 2873 | random_bool_init(&zone_bool_gen); |
| 2874 | |
| 2875 | /* should zlog log to debug zone corruption instead of leaks? */ |
| 2876 | if (PE_parse_boot_argn("-zc" , temp_buf, sizeof(temp_buf))) { |
| 2877 | corruption_debug_flag = TRUE; |
| 2878 | } |
| 2879 | |
| 2880 | #if DEBUG || DEVELOPMENT |
| 2881 | /* should perform zone element size checking in copyin/copyout? */ |
| 2882 | if (PE_parse_boot_argn("-no-copyio-zalloc-check" , temp_buf, sizeof(temp_buf))) { |
| 2883 | copyio_zalloc_check = FALSE; |
| 2884 | } |
| 2885 | #if VM_MAX_TAG_ZONES |
| 2886 | /* enable tags for zones that ask for */ |
| 2887 | if (PE_parse_boot_argn("-zt" , temp_buf, sizeof(temp_buf))) { |
| 2888 | zone_tagging_on = TRUE; |
| 2889 | } |
| 2890 | #endif /* VM_MAX_TAG_ZONES */ |
| 2891 | /* disable element location randomization in a page */ |
| 2892 | if (PE_parse_boot_argn("-zl" , temp_buf, sizeof(temp_buf))) { |
| 2893 | leak_scan_debug_flag = TRUE; |
| 2894 | } |
| 2895 | #endif |
| 2896 | |
| 2897 | simple_lock_init(&all_zones_lock, 0); |
| 2898 | |
| 2899 | num_zones_in_use = 0; |
| 2900 | num_zones = 0; |
| 2901 | /* Mark all zones as empty */ |
| 2902 | bitmap_full(zone_empty_bitmap, BITMAP_LEN(MAX_ZONES)); |
| 2903 | zone_names_next = zone_names_start = 0; |
| 2904 | |
| 2905 | #if DEBUG || DEVELOPMENT |
| 2906 | simple_lock_init(&zone_test_lock, 0); |
| 2907 | #endif /* DEBUG || DEVELOPMENT */ |
| 2908 | |
| 2909 | thread_call_setup(&call_async_alloc, zalloc_async, NULL); |
| 2910 | |
| 2911 | /* initializing global lock group for zones */ |
| 2912 | lck_grp_attr_setdefault(&zone_locks_grp_attr); |
| 2913 | lck_grp_init(&zone_locks_grp, "zone_locks" , &zone_locks_grp_attr); |
| 2914 | |
| 2915 | lck_attr_setdefault(&zone_metadata_lock_attr); |
| 2916 | lck_mtx_init_ext(&zone_metadata_region_lck, &zone_metadata_region_lck_ext, &zone_locks_grp, &zone_metadata_lock_attr); |
| 2917 | |
| 2918 | #if CONFIG_ZCACHE |
| 2919 | /* zcc_enable_for_zone_name=<zone>: enable per-cpu zone caching for <zone>. */ |
| 2920 | if (PE_parse_boot_arg_str("zcc_enable_for_zone_name" , cache_zone_name, sizeof(cache_zone_name))) { |
| 2921 | printf("zcache: caching enabled for zone %s\n" , cache_zone_name); |
| 2922 | } |
| 2923 | |
| 2924 | /* -zcache_all: enable per-cpu zone caching for all zones, overrides 'zcc_enable_for_zone_name'. */ |
| 2925 | if (PE_parse_boot_argn("-zcache_all" , temp_buf, sizeof(temp_buf))) { |
| 2926 | cache_all_zones = TRUE; |
| 2927 | printf("zcache: caching enabled for all zones\n" ); |
| 2928 | } |
| 2929 | #endif /* CONFIG_ZCACHE */ |
| 2930 | } |
| 2931 | |
| 2932 | /* |
| 2933 | * We're being very conservative here and picking a value of 95%. We might need to lower this if |
| 2934 | * we find that we're not catching the problem and are still hitting zone map exhaustion panics. |
| 2935 | */ |
| 2936 | #define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95 |
| 2937 | |
| 2938 | /* |
| 2939 | * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit. |
| 2940 | * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default. |
| 2941 | */ |
| 2942 | unsigned int zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT; |
| 2943 | |
| 2944 | /* |
| 2945 | * Returns pid of the task with the largest number of VM map entries. |
| 2946 | */ |
| 2947 | extern pid_t find_largest_process_vm_map_entries(void); |
| 2948 | |
| 2949 | /* |
| 2950 | * Callout to jetsam. If pid is -1, we wake up the memorystatus thread to do asynchronous kills. |
| 2951 | * For any other pid we try to kill that process synchronously. |
| 2952 | */ |
| 2953 | boolean_t memorystatus_kill_on_zone_map_exhaustion(pid_t pid); |
| 2954 | |
| 2955 | void get_zone_map_size(uint64_t *current_size, uint64_t *capacity) |
| 2956 | { |
| 2957 | *current_size = zone_map->size; |
| 2958 | *capacity = vm_map_max(zone_map) - vm_map_min(zone_map); |
| 2959 | } |
| 2960 | |
| 2961 | void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size) |
| 2962 | { |
| 2963 | zone_t largest_zone = zone_find_largest(); |
| 2964 | strlcpy(zone_name, largest_zone->zone_name, zone_name_len); |
| 2965 | *zone_size = largest_zone->cur_size; |
| 2966 | } |
| 2967 | |
| 2968 | boolean_t is_zone_map_nearing_exhaustion(void) |
| 2969 | { |
| 2970 | uint64_t size = zone_map->size; |
| 2971 | uint64_t capacity = vm_map_max(zone_map) - vm_map_min(zone_map); |
| 2972 | if (size > ((capacity * zone_map_jetsam_limit) / 100)) { |
| 2973 | return TRUE; |
| 2974 | } |
| 2975 | return FALSE; |
| 2976 | } |
| 2977 | |
| 2978 | extern zone_t vm_map_entry_zone; |
| 2979 | extern zone_t vm_object_zone; |
| 2980 | |
| 2981 | #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98 |
| 2982 | |
| 2983 | /* |
| 2984 | * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread |
| 2985 | * to walk through the jetsam priority bands and kill processes. |
| 2986 | */ |
| 2987 | static void kill_process_in_largest_zone(void) |
| 2988 | { |
| 2989 | pid_t pid = -1; |
| 2990 | zone_t largest_zone = zone_find_largest(); |
| 2991 | |
| 2992 | printf("zone_map_exhaustion: Zone map size %lld, capacity %lld [jetsam limit %d%%]\n" , (uint64_t)zone_map->size, |
| 2993 | (uint64_t)(vm_map_max(zone_map) - vm_map_min(zone_map)), zone_map_jetsam_limit); |
| 2994 | printf("zone_map_exhaustion: Largest zone %s, size %lu\n" , largest_zone->zone_name, (uintptr_t)largest_zone->cur_size); |
| 2995 | |
| 2996 | /* |
| 2997 | * We want to make sure we don't call this function from userspace. Or we could end up trying to synchronously kill the process |
| 2998 | * whose context we're in, causing the system to hang. |
| 2999 | */ |
| 3000 | assert(current_task() == kernel_task); |
| 3001 | |
| 3002 | /* |
| 3003 | * If vm_object_zone is the largest, check to see if the number of elements in vm_map_entry_zone is comparable. If so, consider |
| 3004 | * vm_map_entry_zone as the largest. This lets us target a specific process to jetsam to quickly recover from the zone map bloat. |
| 3005 | */ |
| 3006 | if (largest_zone == vm_object_zone) { |
| 3007 | unsigned int vm_object_zone_count = vm_object_zone->count; |
| 3008 | unsigned int vm_map_entry_zone_count = vm_map_entry_zone->count; |
| 3009 | /* Is the VM map entries zone count >= 98% of the VM objects zone count? */ |
| 3010 | if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) { |
| 3011 | largest_zone = vm_map_entry_zone; |
| 3012 | printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n" , (uintptr_t)largest_zone->cur_size); |
| 3013 | } |
| 3014 | } |
| 3015 | |
| 3016 | /* TODO: Extend this to check for the largest process in other zones as well. */ |
| 3017 | if (largest_zone == vm_map_entry_zone) { |
| 3018 | pid = find_largest_process_vm_map_entries(); |
| 3019 | } else { |
| 3020 | printf("zone_map_exhaustion: Nothing to do for the largest zone [%s]. Waking up memorystatus thread.\n" , largest_zone->zone_name); |
| 3021 | } |
| 3022 | if (!memorystatus_kill_on_zone_map_exhaustion(pid)) { |
| 3023 | printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n" , pid); |
| 3024 | } |
| 3025 | } |
| 3026 | |
| 3027 | /* Global initialization of Zone Allocator. |
| 3028 | * Runs after zone_bootstrap. |
| 3029 | */ |
| 3030 | void |
| 3031 | zone_init( |
| 3032 | vm_size_t max_zonemap_size) |
| 3033 | { |
| 3034 | kern_return_t retval; |
| 3035 | vm_offset_t zone_min; |
| 3036 | vm_offset_t zone_max; |
| 3037 | vm_offset_t zone_metadata_space; |
| 3038 | unsigned int zone_pages; |
| 3039 | vm_map_kernel_flags_t vmk_flags; |
| 3040 | |
| 3041 | #if VM_MAX_TAG_ZONES |
| 3042 | if (zone_tagging_on) ztInit(max_zonemap_size, &zone_locks_grp); |
| 3043 | #endif |
| 3044 | |
| 3045 | vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; |
| 3046 | vmk_flags.vmkf_permanent = TRUE; |
| 3047 | retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, |
| 3048 | FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_ZONE, |
| 3049 | &zone_map); |
| 3050 | |
| 3051 | if (retval != KERN_SUCCESS) |
| 3052 | panic("zone_init: kmem_suballoc failed" ); |
| 3053 | zone_max = zone_min + round_page(max_zonemap_size); |
| 3054 | #if CONFIG_GZALLOC |
| 3055 | gzalloc_init(max_zonemap_size); |
| 3056 | #endif |
| 3057 | |
| 3058 | /* |
| 3059 | * Setup garbage collection information: |
| 3060 | */ |
| 3061 | zone_map_min_address = zone_min; |
| 3062 | zone_map_max_address = zone_max; |
| 3063 | |
| 3064 | zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); |
| 3065 | zone_metadata_space = round_page(zone_pages * sizeof(struct zone_page_metadata)); |
| 3066 | retval = kernel_memory_allocate(zone_map, &zone_metadata_region_min, zone_metadata_space, |
| 3067 | 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_OSFMK); |
| 3068 | if (retval != KERN_SUCCESS) |
| 3069 | panic("zone_init: zone_metadata_region initialization failed!" ); |
| 3070 | zone_metadata_region_max = zone_metadata_region_min + zone_metadata_space; |
| 3071 | |
| 3072 | #if defined(__LP64__) |
| 3073 | /* |
| 3074 | * ensure that any vm_page_t that gets created from |
| 3075 | * the vm_page zone can be packed properly (see vm_page.h |
| 3076 | * for the packing requirements |
| 3077 | */ |
| 3078 | if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_metadata_region_max))) != (vm_page_t)zone_metadata_region_max) |
| 3079 | panic("VM_PAGE_PACK_PTR failed on zone_metadata_region_max - %p" , (void *)zone_metadata_region_max); |
| 3080 | |
| 3081 | if ((vm_page_t)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(zone_map_max_address))) != (vm_page_t)zone_map_max_address) |
| 3082 | panic("VM_PAGE_PACK_PTR failed on zone_map_max_address - %p" , (void *)zone_map_max_address); |
| 3083 | #endif |
| 3084 | |
| 3085 | lck_grp_attr_setdefault(&zone_gc_lck_grp_attr); |
| 3086 | lck_grp_init(&zone_gc_lck_grp, "zone_gc" , &zone_gc_lck_grp_attr); |
| 3087 | lck_attr_setdefault(&zone_gc_lck_attr); |
| 3088 | lck_mtx_init_ext(&zone_gc_lock, &zone_gc_lck_ext, &zone_gc_lck_grp, &zone_gc_lck_attr); |
| 3089 | |
| 3090 | #if CONFIG_ZLEAKS |
| 3091 | /* |
| 3092 | * Initialize the zone leak monitor |
| 3093 | */ |
| 3094 | zleak_init(max_zonemap_size); |
| 3095 | #endif /* CONFIG_ZLEAKS */ |
| 3096 | |
| 3097 | #if VM_MAX_TAG_ZONES |
| 3098 | if (zone_tagging_on) vm_allocation_zones_init(); |
| 3099 | #endif |
| 3100 | |
| 3101 | int jetsam_limit_temp = 0; |
| 3102 | if (PE_parse_boot_argn("zone_map_jetsam_limit" , &jetsam_limit_temp, sizeof (jetsam_limit_temp)) && |
| 3103 | jetsam_limit_temp > 0 && jetsam_limit_temp <= 100) |
| 3104 | zone_map_jetsam_limit = jetsam_limit_temp; |
| 3105 | } |
| 3106 | |
| 3107 | #pragma mark - |
| 3108 | #pragma mark zalloc_canblock |
| 3109 | |
| 3110 | extern boolean_t early_boot_complete; |
| 3111 | |
| 3112 | void |
| 3113 | zalloc_poison_element(boolean_t check_poison, zone_t zone, vm_offset_t addr) |
| 3114 | { |
| 3115 | vm_offset_t inner_size = zone->elem_size; |
| 3116 | if (__improbable(check_poison && addr)) { |
| 3117 | vm_offset_t *element_cursor = ((vm_offset_t *) addr) + 1; |
| 3118 | vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *) addr); |
| 3119 | |
| 3120 | for ( ; element_cursor < backup ; element_cursor++) |
| 3121 | if (__improbable(*element_cursor != ZP_POISON)) |
| 3122 | zone_element_was_modified_panic(zone, |
| 3123 | addr, |
| 3124 | *element_cursor, |
| 3125 | ZP_POISON, |
| 3126 | ((vm_offset_t)element_cursor) - addr); |
| 3127 | } |
| 3128 | |
| 3129 | if (addr) { |
| 3130 | /* |
| 3131 | * Clear out the old next pointer and backup to avoid leaking the cookie |
| 3132 | * and so that only values on the freelist have a valid cookie |
| 3133 | */ |
| 3134 | |
| 3135 | vm_offset_t *primary = (vm_offset_t *) addr; |
| 3136 | vm_offset_t *backup = get_backup_ptr(inner_size, primary); |
| 3137 | |
| 3138 | *primary = ZP_POISON; |
| 3139 | *backup = ZP_POISON; |
| 3140 | } |
| 3141 | } |
| 3142 | |
| 3143 | /* |
| 3144 | * zalloc returns an element from the specified zone. |
| 3145 | */ |
| 3146 | static void * |
| 3147 | zalloc_internal( |
| 3148 | zone_t zone, |
| 3149 | boolean_t canblock, |
| 3150 | boolean_t nopagewait, |
| 3151 | vm_size_t |
| 3152 | #if !VM_MAX_TAG_ZONES |
| 3153 | __unused |
| 3154 | #endif |
| 3155 | reqsize, |
| 3156 | vm_tag_t tag) |
| 3157 | { |
| 3158 | vm_offset_t addr = 0; |
| 3159 | kern_return_t retval; |
| 3160 | uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */ |
| 3161 | unsigned int numsaved = 0; |
| 3162 | boolean_t zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE; |
| 3163 | thread_t thr = current_thread(); |
| 3164 | boolean_t check_poison = FALSE; |
| 3165 | boolean_t set_doing_alloc_with_vm_priv = FALSE; |
| 3166 | |
| 3167 | #if CONFIG_ZLEAKS |
| 3168 | uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ |
| 3169 | #endif /* CONFIG_ZLEAKS */ |
| 3170 | |
| 3171 | #if KASAN |
| 3172 | /* |
| 3173 | * KASan uses zalloc() for fakestack, which can be called anywhere. However, |
| 3174 | * we make sure these calls can never block. |
| 3175 | */ |
| 3176 | boolean_t irq_safe = FALSE; |
| 3177 | const char *fakestack_name = "fakestack." ; |
| 3178 | if (strncmp(zone->zone_name, fakestack_name, strlen(fakestack_name)) == 0) { |
| 3179 | irq_safe = TRUE; |
| 3180 | } |
| 3181 | #elif MACH_ASSERT |
| 3182 | /* In every other case, zalloc() from interrupt context is unsafe. */ |
| 3183 | const boolean_t irq_safe = FALSE; |
| 3184 | #endif |
| 3185 | |
| 3186 | assert(zone != ZONE_NULL); |
| 3187 | assert(irq_safe || ml_get_interrupts_enabled() || ml_is_quiescing() || debug_mode_active() || !early_boot_complete); |
| 3188 | |
| 3189 | #if CONFIG_GZALLOC |
| 3190 | addr = gzalloc_alloc(zone, canblock); |
| 3191 | #endif |
| 3192 | /* |
| 3193 | * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. |
| 3194 | */ |
| 3195 | if (__improbable(DO_LOGGING(zone))) |
| 3196 | numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH); |
| 3197 | |
| 3198 | #if CONFIG_ZLEAKS |
| 3199 | /* |
| 3200 | * Zone leak detection: capture a backtrace every zleak_sample_factor |
| 3201 | * allocations in this zone. |
| 3202 | */ |
| 3203 | if (__improbable(zone->zleak_on && sample_counter(&zone->zleak_capture, zleak_sample_factor) == TRUE)) { |
| 3204 | /* Avoid backtracing twice if zone logging is on */ |
| 3205 | if (numsaved == 0) |
| 3206 | zleak_tracedepth = backtrace(zbt, MAX_ZTRACE_DEPTH); |
| 3207 | else |
| 3208 | zleak_tracedepth = numsaved; |
| 3209 | } |
| 3210 | #endif /* CONFIG_ZLEAKS */ |
| 3211 | |
| 3212 | #if VM_MAX_TAG_ZONES |
| 3213 | if (__improbable(zone->tags)) vm_tag_will_update_zone(tag, zone->tag_zone_index); |
| 3214 | #endif /* VM_MAX_TAG_ZONES */ |
| 3215 | |
| 3216 | #if CONFIG_ZCACHE |
| 3217 | if (__probable(addr == 0)) { |
| 3218 | if (zone_caching_enabled(zone)) { |
| 3219 | addr = zcache_alloc_from_cpu_cache(zone); |
| 3220 | if (addr) { |
| 3221 | #if KASAN_ZALLOC |
| 3222 | addr = kasan_fixup_allocated_element_address(zone, addr); |
| 3223 | #endif |
| 3224 | DTRACE_VM2(zalloc, zone_t, zone, void*, addr); |
| 3225 | return((void *)addr); |
| 3226 | } |
| 3227 | } |
| 3228 | } |
| 3229 | #endif /* CONFIG_ZCACHE */ |
| 3230 | |
| 3231 | lock_zone(zone); |
| 3232 | assert(zone->zone_valid); |
| 3233 | |
| 3234 | if (zone->async_prio_refill && zone->zone_replenish_thread) { |
| 3235 | vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size)); |
| 3236 | vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size; |
| 3237 | zone_replenish_wakeup = (zfreec < zrefillwm); |
| 3238 | zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0)); |
| 3239 | |
| 3240 | do { |
| 3241 | if (zone_replenish_wakeup) { |
| 3242 | zone_replenish_wakeups_initiated++; |
| 3243 | /* Signal the potentially waiting |
| 3244 | * refill thread. |
| 3245 | */ |
| 3246 | thread_wakeup(&zone->zone_replenish_thread); |
| 3247 | |
| 3248 | /* We don't want to wait around for zone_replenish_thread to bump up the free count |
| 3249 | * if we're in zone_gc(). This keeps us from deadlocking with zone_replenish_thread. |
| 3250 | */ |
| 3251 | if (thr->options & TH_OPT_ZONE_GC) |
| 3252 | break; |
| 3253 | |
| 3254 | unlock_zone(zone); |
| 3255 | /* Scheduling latencies etc. may prevent |
| 3256 | * the refill thread from keeping up |
| 3257 | * with demand. Throttle consumers |
| 3258 | * when we fall below half the |
| 3259 | * watermark, unless VM privileged |
| 3260 | */ |
| 3261 | if (zone_alloc_throttle) { |
| 3262 | zone_replenish_throttle_count++; |
| 3263 | assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC); |
| 3264 | thread_block(THREAD_CONTINUE_NULL); |
| 3265 | } |
| 3266 | lock_zone(zone); |
| 3267 | assert(zone->zone_valid); |
| 3268 | } |
| 3269 | |
| 3270 | zfreec = (zone->cur_size - (zone->count * zone->elem_size)); |
| 3271 | zrefillwm = zone->prio_refill_watermark * zone->elem_size; |
| 3272 | zone_replenish_wakeup = (zfreec < zrefillwm); |
| 3273 | zone_alloc_throttle = (((zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0)) || (zfreec == 0)); |
| 3274 | |
| 3275 | } while (zone_alloc_throttle == TRUE); |
| 3276 | } |
| 3277 | |
| 3278 | if (__probable(addr == 0)) |
| 3279 | addr = try_alloc_from_zone(zone, tag, &check_poison); |
| 3280 | |
| 3281 | /* If we're here because of zone_gc(), we didn't wait for zone_replenish_thread to finish. |
| 3282 | * So we need to ensure that we did successfully grab an element. And we only need to assert |
| 3283 | * this for zones that have a replenish thread configured (in this case, the Reserved VM map |
| 3284 | * entries zone). |
| 3285 | */ |
| 3286 | if (thr->options & TH_OPT_ZONE_GC && zone->async_prio_refill) |
| 3287 | assert(addr != 0); |
| 3288 | |
| 3289 | while ((addr == 0) && canblock) { |
| 3290 | /* |
| 3291 | * zone is empty, try to expand it |
| 3292 | * |
| 3293 | * Note that we now allow up to 2 threads (1 vm_privliged and 1 non-vm_privliged) |
| 3294 | * to expand the zone concurrently... this is necessary to avoid stalling |
| 3295 | * vm_privileged threads running critical code necessary to continue compressing/swapping |
| 3296 | * pages (i.e. making new free pages) from stalling behind non-vm_privileged threads |
| 3297 | * waiting to acquire free pages when the vm_page_free_count is below the |
| 3298 | * vm_page_free_reserved limit. |
| 3299 | */ |
| 3300 | if ((zone->doing_alloc_without_vm_priv || zone->doing_alloc_with_vm_priv) && |
| 3301 | (((thr->options & TH_OPT_VMPRIV) == 0) || zone->doing_alloc_with_vm_priv)) { |
| 3302 | /* |
| 3303 | * This is a non-vm_privileged thread and a non-vm_privileged or |
| 3304 | * a vm_privileged thread is already expanding the zone... |
| 3305 | * OR |
| 3306 | * this is a vm_privileged thread and a vm_privileged thread is |
| 3307 | * already expanding the zone... |
| 3308 | * |
| 3309 | * In either case wait for a thread to finish, then try again. |
| 3310 | */ |
| 3311 | zone->waiting = TRUE; |
| 3312 | zone_sleep(zone); |
| 3313 | } else { |
| 3314 | vm_offset_t space; |
| 3315 | vm_size_t alloc_size; |
| 3316 | int retry = 0; |
| 3317 | |
| 3318 | if ((zone->cur_size + zone->elem_size) > |
| 3319 | zone->max_size) { |
| 3320 | if (zone->exhaustible) |
| 3321 | break; |
| 3322 | if (zone->expandable) { |
| 3323 | /* |
| 3324 | * We're willing to overflow certain |
| 3325 | * zones, but not without complaining. |
| 3326 | * |
| 3327 | * This is best used in conjunction |
| 3328 | * with the collectable flag. What we |
| 3329 | * want is an assurance we can get the |
| 3330 | * memory back, assuming there's no |
| 3331 | * leak. |
| 3332 | */ |
| 3333 | zone->max_size += (zone->max_size >> 1); |
| 3334 | } else { |
| 3335 | unlock_zone(zone); |
| 3336 | |
| 3337 | panic_include_zprint = TRUE; |
| 3338 | #if CONFIG_ZLEAKS |
| 3339 | if (zleak_state & ZLEAK_STATE_ACTIVE) |
| 3340 | panic_include_ztrace = TRUE; |
| 3341 | #endif /* CONFIG_ZLEAKS */ |
| 3342 | panic("zalloc: zone \"%s\" empty." , zone->zone_name); |
| 3343 | } |
| 3344 | } |
| 3345 | /* |
| 3346 | * It is possible that a BG thread is refilling/expanding the zone |
| 3347 | * and gets pre-empted during that operation. That blocks all other |
| 3348 | * threads from making progress leading to a watchdog timeout. To |
| 3349 | * avoid that, boost the thread priority using the rwlock boost |
| 3350 | */ |
| 3351 | set_thread_rwlock_boost(); |
| 3352 | |
| 3353 | if ((thr->options & TH_OPT_VMPRIV)) { |
| 3354 | zone->doing_alloc_with_vm_priv = TRUE; |
| 3355 | set_doing_alloc_with_vm_priv = TRUE; |
| 3356 | } else { |
| 3357 | zone->doing_alloc_without_vm_priv = TRUE; |
| 3358 | } |
| 3359 | unlock_zone(zone); |
| 3360 | |
| 3361 | for (;;) { |
| 3362 | int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; |
| 3363 | |
| 3364 | if (vm_pool_low() || retry >= 1) |
| 3365 | alloc_size = |
| 3366 | round_page(zone->elem_size); |
| 3367 | else |
| 3368 | alloc_size = zone->alloc_size; |
| 3369 | |
| 3370 | if (zone->noencrypt) |
| 3371 | zflags |= KMA_NOENCRYPT; |
| 3372 | |
| 3373 | /* Trigger jetsams via the vm_pageout_garbage_collect thread if we're running out of zone memory */ |
| 3374 | if (is_zone_map_nearing_exhaustion()) { |
| 3375 | thread_wakeup((event_t) &vm_pageout_garbage_collect); |
| 3376 | } |
| 3377 | |
| 3378 | retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags, VM_KERN_MEMORY_ZONE); |
| 3379 | if (retval == KERN_SUCCESS) { |
| 3380 | #if CONFIG_ZLEAKS |
| 3381 | if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) { |
| 3382 | if (zone_map->size >= zleak_global_tracking_threshold) { |
| 3383 | kern_return_t kr; |
| 3384 | |
| 3385 | kr = zleak_activate(); |
| 3386 | if (kr != KERN_SUCCESS) { |
| 3387 | printf("Failed to activate live zone leak debugging (%d).\n" , kr); |
| 3388 | } |
| 3389 | } |
| 3390 | } |
| 3391 | |
| 3392 | if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) { |
| 3393 | if (zone->cur_size > zleak_per_zone_tracking_threshold) { |
| 3394 | zone->zleak_on = TRUE; |
| 3395 | } |
| 3396 | } |
| 3397 | #endif /* CONFIG_ZLEAKS */ |
| 3398 | zcram(zone, space, alloc_size); |
| 3399 | |
| 3400 | break; |
| 3401 | } else if (retval != KERN_RESOURCE_SHORTAGE) { |
| 3402 | retry++; |
| 3403 | |
| 3404 | if (retry == 3) { |
| 3405 | panic_include_zprint = TRUE; |
| 3406 | #if CONFIG_ZLEAKS |
| 3407 | if ((zleak_state & ZLEAK_STATE_ACTIVE)) { |
| 3408 | panic_include_ztrace = TRUE; |
| 3409 | } |
| 3410 | #endif /* CONFIG_ZLEAKS */ |
| 3411 | if (retval == KERN_NO_SPACE) { |
| 3412 | zone_t zone_largest = zone_find_largest(); |
| 3413 | panic("zalloc: zone map exhausted while allocating from zone %s, likely due to memory leak in zone %s (%lu total bytes, %d elements allocated)" , |
| 3414 | zone->zone_name, zone_largest->zone_name, |
| 3415 | (unsigned long)zone_largest->cur_size, zone_largest->count); |
| 3416 | |
| 3417 | } |
| 3418 | panic("zalloc: \"%s\" (%d elements) retry fail %d" , zone->zone_name, zone->count, retval); |
| 3419 | } |
| 3420 | } else { |
| 3421 | break; |
| 3422 | } |
| 3423 | } |
| 3424 | lock_zone(zone); |
| 3425 | assert(zone->zone_valid); |
| 3426 | |
| 3427 | if (set_doing_alloc_with_vm_priv == TRUE) |
| 3428 | zone->doing_alloc_with_vm_priv = FALSE; |
| 3429 | else |
| 3430 | zone->doing_alloc_without_vm_priv = FALSE; |
| 3431 | |
| 3432 | if (zone->waiting) { |
| 3433 | zone->waiting = FALSE; |
| 3434 | zone_wakeup(zone); |
| 3435 | } |
| 3436 | clear_thread_rwlock_boost(); |
| 3437 | |
| 3438 | addr = try_alloc_from_zone(zone, tag, &check_poison); |
| 3439 | if (addr == 0 && |
| 3440 | retval == KERN_RESOURCE_SHORTAGE) { |
| 3441 | if (nopagewait == TRUE) |
| 3442 | break; /* out of the main while loop */ |
| 3443 | unlock_zone(zone); |
| 3444 | |
| 3445 | VM_PAGE_WAIT(); |
| 3446 | lock_zone(zone); |
| 3447 | assert(zone->zone_valid); |
| 3448 | } |
| 3449 | } |
| 3450 | if (addr == 0) |
| 3451 | addr = try_alloc_from_zone(zone, tag, &check_poison); |
| 3452 | } |
| 3453 | |
| 3454 | #if CONFIG_ZLEAKS |
| 3455 | /* Zone leak detection: |
| 3456 | * If we're sampling this allocation, add it to the zleaks hash table. |
| 3457 | */ |
| 3458 | if (addr && zleak_tracedepth > 0) { |
| 3459 | /* Sampling can fail if another sample is happening at the same time in a different zone. */ |
| 3460 | if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) { |
| 3461 | /* If it failed, roll back the counter so we sample the next allocation instead. */ |
| 3462 | zone->zleak_capture = zleak_sample_factor; |
| 3463 | } |
| 3464 | } |
| 3465 | #endif /* CONFIG_ZLEAKS */ |
| 3466 | |
| 3467 | |
| 3468 | if ((addr == 0) && (!canblock || nopagewait) && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { |
| 3469 | zone->async_pending = TRUE; |
| 3470 | unlock_zone(zone); |
| 3471 | thread_call_enter(&call_async_alloc); |
| 3472 | lock_zone(zone); |
| 3473 | assert(zone->zone_valid); |
| 3474 | addr = try_alloc_from_zone(zone, tag, &check_poison); |
| 3475 | } |
| 3476 | |
| 3477 | #if VM_MAX_TAG_ZONES |
| 3478 | if (__improbable(zone->tags) && addr) { |
| 3479 | if (reqsize) reqsize = zone->elem_size - reqsize; |
| 3480 | vm_tag_update_zone_size(tag, zone->tag_zone_index, zone->elem_size, reqsize); |
| 3481 | } |
| 3482 | #endif /* VM_MAX_TAG_ZONES */ |
| 3483 | |
| 3484 | unlock_zone(zone); |
| 3485 | |
| 3486 | if (__improbable(DO_LOGGING(zone) && addr)) { |
| 3487 | btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved); |
| 3488 | } |
| 3489 | |
| 3490 | zalloc_poison_element(check_poison, zone, addr); |
| 3491 | |
| 3492 | if (addr) { |
| 3493 | #if DEBUG || DEVELOPMENT |
| 3494 | if (__improbable(leak_scan_debug_flag && !(zone->elem_size & (sizeof(uintptr_t) - 1)))) { |
| 3495 | unsigned int count, idx; |
| 3496 | /* Fill element, from tail, with backtrace in reverse order */ |
| 3497 | if (numsaved == 0) numsaved = backtrace(zbt, MAX_ZTRACE_DEPTH); |
| 3498 | count = (unsigned int)(zone->elem_size / sizeof(uintptr_t)); |
| 3499 | if (count >= numsaved) count = numsaved - 1; |
| 3500 | for (idx = 0; idx < count; idx++) ((uintptr_t *)addr)[count - 1 - idx] = zbt[idx + 1]; |
| 3501 | } |
| 3502 | #endif /* DEBUG || DEVELOPMENT */ |
| 3503 | } |
| 3504 | |
| 3505 | TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); |
| 3506 | |
| 3507 | |
| 3508 | #if KASAN_ZALLOC |
| 3509 | addr = kasan_fixup_allocated_element_address(zone, addr); |
| 3510 | #endif |
| 3511 | |
| 3512 | DTRACE_VM2(zalloc, zone_t, zone, void*, addr); |
| 3513 | |
| 3514 | return((void *)addr); |
| 3515 | } |
| 3516 | |
| 3517 | void * |
| 3518 | zalloc(zone_t zone) |
| 3519 | { |
| 3520 | return (zalloc_internal(zone, TRUE, FALSE, 0, VM_KERN_MEMORY_NONE)); |
| 3521 | } |
| 3522 | |
| 3523 | void * |
| 3524 | zalloc_noblock(zone_t zone) |
| 3525 | { |
| 3526 | return (zalloc_internal(zone, FALSE, FALSE, 0, VM_KERN_MEMORY_NONE)); |
| 3527 | } |
| 3528 | |
| 3529 | void * |
| 3530 | zalloc_nopagewait(zone_t zone) |
| 3531 | { |
| 3532 | return (zalloc_internal(zone, TRUE, TRUE, 0, VM_KERN_MEMORY_NONE)); |
| 3533 | } |
| 3534 | |
| 3535 | void * |
| 3536 | zalloc_canblock_tag(zone_t zone, boolean_t canblock, vm_size_t reqsize, vm_tag_t tag) |
| 3537 | { |
| 3538 | return (zalloc_internal(zone, canblock, FALSE, reqsize, tag)); |
| 3539 | } |
| 3540 | |
| 3541 | void * |
| 3542 | zalloc_canblock(zone_t zone, boolean_t canblock) |
| 3543 | { |
| 3544 | return (zalloc_internal(zone, canblock, FALSE, 0, VM_KERN_MEMORY_NONE)); |
| 3545 | } |
| 3546 | |
| 3547 | void * |
| 3548 | zalloc_attempt(zone_t zone) |
| 3549 | { |
| 3550 | boolean_t check_poison = FALSE; |
| 3551 | vm_offset_t addr = try_alloc_from_zone(zone, VM_KERN_MEMORY_NONE, &check_poison); |
| 3552 | zalloc_poison_element(check_poison, zone, addr); |
| 3553 | return (void *)addr; |
| 3554 | } |
| 3555 | |
| 3556 | void |
| 3557 | zfree_direct(zone_t zone, vm_offset_t elem) |
| 3558 | { |
| 3559 | boolean_t poison = zfree_poison_element(zone, elem); |
| 3560 | free_to_zone(zone, elem, poison); |
| 3561 | } |
| 3562 | |
| 3563 | |
| 3564 | void |
| 3565 | zalloc_async( |
| 3566 | __unused thread_call_param_t p0, |
| 3567 | __unused thread_call_param_t p1) |
| 3568 | { |
| 3569 | zone_t current_z = NULL; |
| 3570 | unsigned int max_zones, i; |
| 3571 | void *elt = NULL; |
| 3572 | boolean_t pending = FALSE; |
| 3573 | |
| 3574 | simple_lock(&all_zones_lock); |
| 3575 | max_zones = num_zones; |
| 3576 | simple_unlock(&all_zones_lock); |
| 3577 | for (i = 0; i < max_zones; i++) { |
| 3578 | current_z = &(zone_array[i]); |
| 3579 | |
| 3580 | if (current_z->no_callout == TRUE) { |
| 3581 | /* async_pending will never be set */ |
| 3582 | continue; |
| 3583 | } |
| 3584 | |
| 3585 | lock_zone(current_z); |
| 3586 | if (current_z->zone_valid && current_z->async_pending == TRUE) { |
| 3587 | current_z->async_pending = FALSE; |
| 3588 | pending = TRUE; |
| 3589 | } |
| 3590 | unlock_zone(current_z); |
| 3591 | |
| 3592 | if (pending == TRUE) { |
| 3593 | elt = zalloc_canblock_tag(current_z, TRUE, 0, VM_KERN_MEMORY_OSFMK); |
| 3594 | zfree(current_z, elt); |
| 3595 | pending = FALSE; |
| 3596 | } |
| 3597 | } |
| 3598 | } |
| 3599 | |
| 3600 | /* |
| 3601 | * zget returns an element from the specified zone |
| 3602 | * and immediately returns nothing if there is nothing there. |
| 3603 | */ |
| 3604 | void * |
| 3605 | zget( |
| 3606 | zone_t zone) |
| 3607 | { |
| 3608 | return zalloc_internal(zone, FALSE, TRUE, 0, VM_KERN_MEMORY_NONE); |
| 3609 | } |
| 3610 | |
| 3611 | /* Keep this FALSE by default. Large memory machine run orders of magnitude |
| 3612 | slower in debug mode when true. Use debugger to enable if needed */ |
| 3613 | /* static */ boolean_t zone_check = FALSE; |
| 3614 | |
| 3615 | static void zone_check_freelist(zone_t zone, vm_offset_t elem) |
| 3616 | { |
| 3617 | struct zone_free_element *this; |
| 3618 | struct zone_page_metadata *thispage; |
| 3619 | |
| 3620 | if (zone->allows_foreign) { |
| 3621 | for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign); |
| 3622 | !queue_end(&zone->pages.any_free_foreign, &(thispage->pages)); |
| 3623 | thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) { |
| 3624 | for (this = page_metadata_get_freelist(thispage); |
| 3625 | this != NULL; |
| 3626 | this = this->next) { |
| 3627 | if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) |
| 3628 | panic("zone_check_freelist" ); |
| 3629 | } |
| 3630 | } |
| 3631 | } |
| 3632 | for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.all_free); |
| 3633 | !queue_end(&zone->pages.all_free, &(thispage->pages)); |
| 3634 | thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) { |
| 3635 | for (this = page_metadata_get_freelist(thispage); |
| 3636 | this != NULL; |
| 3637 | this = this->next) { |
| 3638 | if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) |
| 3639 | panic("zone_check_freelist" ); |
| 3640 | } |
| 3641 | } |
| 3642 | for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate); |
| 3643 | !queue_end(&zone->pages.intermediate, &(thispage->pages)); |
| 3644 | thispage = (struct zone_page_metadata *)queue_next(&(thispage->pages))) { |
| 3645 | for (this = page_metadata_get_freelist(thispage); |
| 3646 | this != NULL; |
| 3647 | this = this->next) { |
| 3648 | if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) |
| 3649 | panic("zone_check_freelist" ); |
| 3650 | } |
| 3651 | } |
| 3652 | } |
| 3653 | |
| 3654 | boolean_t |
| 3655 | zfree_poison_element(zone_t zone, vm_offset_t elem) |
| 3656 | { |
| 3657 | boolean_t poison = FALSE; |
| 3658 | if (zp_factor != 0 || zp_tiny_zone_limit != 0) { |
| 3659 | /* |
| 3660 | * Poison the memory before it ends up on the freelist to catch |
| 3661 | * use-after-free and use of uninitialized memory |
| 3662 | * |
| 3663 | * Always poison tiny zones' elements (limit is 0 if -no-zp is set) |
| 3664 | * Also poison larger elements periodically |
| 3665 | */ |
| 3666 | |
| 3667 | vm_offset_t inner_size = zone->elem_size; |
| 3668 | |
| 3669 | uint32_t sample_factor = zp_factor + (((uint32_t)inner_size) >> zp_scale); |
| 3670 | |
| 3671 | if (inner_size <= zp_tiny_zone_limit) |
| 3672 | poison = TRUE; |
| 3673 | else if (zp_factor != 0 && sample_counter(&zone->zp_count, sample_factor) == TRUE) |
| 3674 | poison = TRUE; |
| 3675 | |
| 3676 | if (__improbable(poison)) { |
| 3677 | |
| 3678 | /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */ |
| 3679 | /* Poison everything but primary and backup */ |
| 3680 | vm_offset_t *element_cursor = ((vm_offset_t *) elem) + 1; |
| 3681 | vm_offset_t *backup = get_backup_ptr(inner_size, (vm_offset_t *)elem); |
| 3682 | |
| 3683 | for ( ; element_cursor < backup; element_cursor++) |
| 3684 | *element_cursor = ZP_POISON; |
| 3685 | } |
| 3686 | } |
| 3687 | return poison; |
| 3688 | } |
| 3689 | void |
| 3690 | zfree( |
| 3691 | zone_t zone, |
| 3692 | void *addr) |
| 3693 | { |
| 3694 | vm_offset_t elem = (vm_offset_t) addr; |
| 3695 | uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ |
| 3696 | unsigned int numsaved = 0; |
| 3697 | boolean_t gzfreed = FALSE; |
| 3698 | boolean_t poison = FALSE; |
| 3699 | #if VM_MAX_TAG_ZONES |
| 3700 | vm_tag_t tag; |
| 3701 | #endif /* VM_MAX_TAG_ZONES */ |
| 3702 | |
| 3703 | assert(zone != ZONE_NULL); |
| 3704 | DTRACE_VM2(zfree, zone_t, zone, void*, addr); |
| 3705 | #if KASAN_ZALLOC |
| 3706 | if (kasan_quarantine_freed_element(&zone, &addr)) { |
| 3707 | return; |
| 3708 | } |
| 3709 | elem = (vm_offset_t)addr; |
| 3710 | #endif |
| 3711 | |
| 3712 | /* |
| 3713 | * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. |
| 3714 | */ |
| 3715 | |
| 3716 | if (__improbable(DO_LOGGING(zone) && corruption_debug_flag)) |
| 3717 | numsaved = OSBacktrace((void *)zbt, MAX_ZTRACE_DEPTH); |
| 3718 | |
| 3719 | #if MACH_ASSERT |
| 3720 | /* Basic sanity checks */ |
| 3721 | if (zone == ZONE_NULL || elem == (vm_offset_t)0) |
| 3722 | panic("zfree: NULL" ); |
| 3723 | #endif |
| 3724 | |
| 3725 | #if CONFIG_GZALLOC |
| 3726 | gzfreed = gzalloc_free(zone, addr); |
| 3727 | #endif |
| 3728 | |
| 3729 | if (!gzfreed) { |
| 3730 | struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr, FALSE); |
| 3731 | if (zone != PAGE_METADATA_GET_ZONE(page_meta)) { |
| 3732 | panic("Element %p from zone %s caught being freed to wrong zone %s\n" , addr, PAGE_METADATA_GET_ZONE(page_meta)->zone_name, zone->zone_name); |
| 3733 | } |
| 3734 | } |
| 3735 | |
| 3736 | TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr); |
| 3737 | |
| 3738 | if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign && |
| 3739 | !from_zone_map(elem, zone->elem_size))) { |
| 3740 | panic("zfree: non-allocated memory in collectable zone!" ); |
| 3741 | } |
| 3742 | |
| 3743 | if (!gzfreed) { |
| 3744 | poison = zfree_poison_element(zone, elem); |
| 3745 | } |
| 3746 | |
| 3747 | /* |
| 3748 | * See if we're doing logging on this zone. There are two styles of logging used depending on |
| 3749 | * whether we're trying to catch a leak or corruption. See comments above in zalloc for details. |
| 3750 | */ |
| 3751 | |
| 3752 | if (__improbable(DO_LOGGING(zone))) { |
| 3753 | if (corruption_debug_flag) { |
| 3754 | /* |
| 3755 | * We're logging to catch a corruption. Add a record of this zfree operation |
| 3756 | * to log. |
| 3757 | */ |
| 3758 | btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_FREE, (void **)zbt, numsaved); |
| 3759 | } else { |
| 3760 | /* |
| 3761 | * We're logging to catch a leak. Remove any record we might have for this |
| 3762 | * element since it's being freed. Note that we may not find it if the buffer |
| 3763 | * overflowed and that's OK. Since the log is of a limited size, old records |
| 3764 | * get overwritten if there are more zallocs than zfrees. |
| 3765 | */ |
| 3766 | btlog_remove_entries_for_element(zone->zlog_btlog, (void *)addr); |
| 3767 | } |
| 3768 | } |
| 3769 | |
| 3770 | #if CONFIG_ZCACHE |
| 3771 | if (zone_caching_enabled(zone)) { |
| 3772 | int __assert_only ret = zcache_free_to_cpu_cache(zone, addr); |
| 3773 | assert(ret != FALSE); |
| 3774 | return; |
| 3775 | } |
| 3776 | #endif /* CONFIG_ZCACHE */ |
| 3777 | |
| 3778 | lock_zone(zone); |
| 3779 | assert(zone->zone_valid); |
| 3780 | |
| 3781 | if (zone_check) { |
| 3782 | zone_check_freelist(zone, elem); |
| 3783 | } |
| 3784 | |
| 3785 | if (__probable(!gzfreed)) { |
| 3786 | #if VM_MAX_TAG_ZONES |
| 3787 | if (__improbable(zone->tags)) { |
| 3788 | tag = (ZTAG(zone, elem)[0] >> 1); |
| 3789 | // set the tag with b0 clear so the block remains inuse |
| 3790 | ZTAG(zone, elem)[0] = 0xFFFE; |
| 3791 | } |
| 3792 | #endif /* VM_MAX_TAG_ZONES */ |
| 3793 | free_to_zone(zone, elem, poison); |
| 3794 | } |
| 3795 | |
| 3796 | if (__improbable(zone->count < 0)) { |
| 3797 | panic("zfree: zone count underflow in zone %s while freeing element %p, possible cause: double frees or freeing memory that did not come from this zone" , |
| 3798 | zone->zone_name, addr); |
| 3799 | } |
| 3800 | |
| 3801 | #if CONFIG_ZLEAKS |
| 3802 | /* |
| 3803 | * Zone leak detection: un-track the allocation |
| 3804 | */ |
| 3805 | if (zone->zleak_on) { |
| 3806 | zleak_free(elem, zone->elem_size); |
| 3807 | } |
| 3808 | #endif /* CONFIG_ZLEAKS */ |
| 3809 | |
| 3810 | #if VM_MAX_TAG_ZONES |
| 3811 | if (__improbable(zone->tags) && __probable(!gzfreed)) { |
| 3812 | vm_tag_update_zone_size(tag, zone->tag_zone_index, -((int64_t)zone->elem_size), 0); |
| 3813 | } |
| 3814 | #endif /* VM_MAX_TAG_ZONES */ |
| 3815 | |
| 3816 | unlock_zone(zone); |
| 3817 | } |
| 3818 | |
| 3819 | /* Change a zone's flags. |
| 3820 | * This routine must be called immediately after zinit. |
| 3821 | */ |
| 3822 | void |
| 3823 | zone_change( |
| 3824 | zone_t zone, |
| 3825 | unsigned int item, |
| 3826 | boolean_t value) |
| 3827 | { |
| 3828 | assert( zone != ZONE_NULL ); |
| 3829 | assert( value == TRUE || value == FALSE ); |
| 3830 | |
| 3831 | switch(item){ |
| 3832 | case Z_NOENCRYPT: |
| 3833 | zone->noencrypt = value; |
| 3834 | break; |
| 3835 | case Z_EXHAUST: |
| 3836 | zone->exhaustible = value; |
| 3837 | break; |
| 3838 | case Z_COLLECT: |
| 3839 | zone->collectable = value; |
| 3840 | break; |
| 3841 | case Z_EXPAND: |
| 3842 | zone->expandable = value; |
| 3843 | break; |
| 3844 | case Z_FOREIGN: |
| 3845 | zone->allows_foreign = value; |
| 3846 | break; |
| 3847 | case Z_CALLERACCT: |
| 3848 | zone->caller_acct = value; |
| 3849 | break; |
| 3850 | case Z_NOCALLOUT: |
| 3851 | zone->no_callout = value; |
| 3852 | break; |
| 3853 | case Z_TAGS_ENABLED: |
| 3854 | #if VM_MAX_TAG_ZONES |
| 3855 | { |
| 3856 | static int tag_zone_index; |
| 3857 | zone->tags = TRUE; |
| 3858 | zone->tags_inline = (((page_size + zone->elem_size - 1) / zone->elem_size) <= (sizeof(uint32_t) / sizeof(uint16_t))); |
| 3859 | zone->tag_zone_index = OSAddAtomic(1, &tag_zone_index); |
| 3860 | } |
| 3861 | #endif /* VM_MAX_TAG_ZONES */ |
| 3862 | break; |
| 3863 | case Z_GZALLOC_EXEMPT: |
| 3864 | zone->gzalloc_exempt = value; |
| 3865 | #if CONFIG_GZALLOC |
| 3866 | gzalloc_reconfigure(zone); |
| 3867 | #endif |
| 3868 | break; |
| 3869 | case Z_ALIGNMENT_REQUIRED: |
| 3870 | zone->alignment_required = value; |
| 3871 | #if KASAN_ZALLOC |
| 3872 | if (zone->kasan_redzone == KASAN_GUARD_SIZE) { |
| 3873 | /* Don't disturb alignment with the redzone for zones with |
| 3874 | * specific alignment requirements. */ |
| 3875 | zone->elem_size -= zone->kasan_redzone * 2; |
| 3876 | zone->kasan_redzone = 0; |
| 3877 | } |
| 3878 | #endif |
| 3879 | #if CONFIG_GZALLOC |
| 3880 | gzalloc_reconfigure(zone); |
| 3881 | #endif |
| 3882 | break; |
| 3883 | case Z_KASAN_QUARANTINE: |
| 3884 | zone->kasan_quarantine = value; |
| 3885 | break; |
| 3886 | case Z_CACHING_ENABLED: |
| 3887 | #if CONFIG_ZCACHE |
| 3888 | if (value == TRUE && use_caching) { |
| 3889 | if (zcache_ready()) { |
| 3890 | zcache_init(zone); |
| 3891 | } else { |
| 3892 | zone->cpu_cache_enable_when_ready = TRUE; |
| 3893 | } |
| 3894 | |
| 3895 | } |
| 3896 | #endif |
| 3897 | break; |
| 3898 | default: |
| 3899 | panic("Zone_change: Wrong Item Type!" ); |
| 3900 | /* break; */ |
| 3901 | } |
| 3902 | } |
| 3903 | |
| 3904 | /* |
| 3905 | * Return the expected number of free elements in the zone. |
| 3906 | * This calculation will be incorrect if items are zfree'd that |
| 3907 | * were never zalloc'd/zget'd. The correct way to stuff memory |
| 3908 | * into a zone is by zcram. |
| 3909 | */ |
| 3910 | |
| 3911 | integer_t |
| 3912 | zone_free_count(zone_t zone) |
| 3913 | { |
| 3914 | integer_t free_count; |
| 3915 | |
| 3916 | lock_zone(zone); |
| 3917 | free_count = zone->countfree; |
| 3918 | unlock_zone(zone); |
| 3919 | |
| 3920 | assert(free_count >= 0); |
| 3921 | |
| 3922 | return(free_count); |
| 3923 | } |
| 3924 | |
| 3925 | /* Drops the elements in the free queue of a zone. Called by zone_gc() on each zone, and when a zone is zdestroy'ed. */ |
| 3926 | void |
| 3927 | drop_free_elements(zone_t z) |
| 3928 | { |
| 3929 | vm_size_t elt_size, size_freed; |
| 3930 | unsigned int total_freed_pages = 0; |
| 3931 | uint64_t old_all_free_count; |
| 3932 | struct zone_page_metadata *page_meta; |
| 3933 | queue_head_t page_meta_head; |
| 3934 | |
| 3935 | lock_zone(z); |
| 3936 | if (queue_empty(&z->pages.all_free)) { |
| 3937 | unlock_zone(z); |
| 3938 | return; |
| 3939 | } |
| 3940 | |
| 3941 | /* |
| 3942 | * Snatch all of the free elements away from the zone. |
| 3943 | */ |
| 3944 | elt_size = z->elem_size; |
| 3945 | old_all_free_count = z->count_all_free_pages; |
| 3946 | queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages); |
| 3947 | queue_init(&z->pages.all_free); |
| 3948 | z->count_all_free_pages = 0; |
| 3949 | unlock_zone(z); |
| 3950 | |
| 3951 | /* Iterate through all elements to find out size and count of elements we snatched */ |
| 3952 | size_freed = 0; |
| 3953 | queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) { |
| 3954 | assert(from_zone_map((vm_address_t)page_meta, sizeof(*page_meta))); /* foreign elements should be in any_free_foreign */ |
| 3955 | size_freed += elt_size * page_meta->free_count; |
| 3956 | } |
| 3957 | |
| 3958 | /* Update the zone size and free element count */ |
| 3959 | lock_zone(z); |
| 3960 | z->cur_size -= size_freed; |
| 3961 | z->countfree -= size_freed/elt_size; |
| 3962 | unlock_zone(z); |
| 3963 | |
| 3964 | while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) { |
| 3965 | vm_address_t free_page_address; |
| 3966 | /* Free the pages for metadata and account for them */ |
| 3967 | free_page_address = get_zone_page(page_meta); |
| 3968 | ZONE_PAGE_COUNT_DECR(z, page_meta->page_count); |
| 3969 | total_freed_pages += page_meta->page_count; |
| 3970 | old_all_free_count -= page_meta->page_count; |
| 3971 | #if KASAN_ZALLOC |
| 3972 | kasan_poison_range(free_page_address, page_meta->page_count * PAGE_SIZE, ASAN_VALID); |
| 3973 | #endif |
| 3974 | #if VM_MAX_TAG_ZONES |
| 3975 | if (z->tags) ztMemoryRemove(z, free_page_address, (page_meta->page_count * PAGE_SIZE)); |
| 3976 | #endif /* VM_MAX_TAG_ZONES */ |
| 3977 | kmem_free(zone_map, free_page_address, (page_meta->page_count * PAGE_SIZE)); |
| 3978 | if (current_thread()->options & TH_OPT_ZONE_GC) { |
| 3979 | thread_yield_to_preemption(); |
| 3980 | } |
| 3981 | } |
| 3982 | |
| 3983 | /* We freed all the pages from the all_free list for this zone */ |
| 3984 | assert(old_all_free_count == 0); |
| 3985 | |
| 3986 | if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) |
| 3987 | kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n" , z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages); |
| 3988 | } |
| 3989 | |
| 3990 | /* Zone garbage collection |
| 3991 | * |
| 3992 | * zone_gc will walk through all the free elements in all the |
| 3993 | * zones that are marked collectable looking for reclaimable |
| 3994 | * pages. zone_gc is called by consider_zone_gc when the system |
| 3995 | * begins to run out of memory. |
| 3996 | * |
| 3997 | * We should ensure that zone_gc never blocks. |
| 3998 | */ |
| 3999 | void |
| 4000 | zone_gc(boolean_t consider_jetsams) |
| 4001 | { |
| 4002 | unsigned int max_zones; |
| 4003 | zone_t z; |
| 4004 | unsigned int i; |
| 4005 | |
| 4006 | if (consider_jetsams) { |
| 4007 | kill_process_in_largest_zone(); |
| 4008 | /* |
| 4009 | * If we do end up jetsamming something, we need to do a zone_gc so that |
| 4010 | * we can reclaim free zone elements and update the zone map size. |
| 4011 | * Fall through. |
| 4012 | */ |
| 4013 | } |
| 4014 | |
| 4015 | lck_mtx_lock(&zone_gc_lock); |
| 4016 | |
| 4017 | current_thread()->options |= TH_OPT_ZONE_GC; |
| 4018 | |
| 4019 | simple_lock(&all_zones_lock); |
| 4020 | max_zones = num_zones; |
| 4021 | simple_unlock(&all_zones_lock); |
| 4022 | |
| 4023 | if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) |
| 4024 | kprintf("zone_gc() starting...\n" ); |
| 4025 | |
| 4026 | for (i = 0; i < max_zones; i++) { |
| 4027 | z = &(zone_array[i]); |
| 4028 | assert(z != ZONE_NULL); |
| 4029 | |
| 4030 | if (!z->collectable) { |
| 4031 | continue; |
| 4032 | } |
| 4033 | #if CONFIG_ZCACHE |
| 4034 | if (zone_caching_enabled(z)) { |
| 4035 | zcache_drain_depot(z); |
| 4036 | } |
| 4037 | #endif /* CONFIG_ZCACHE */ |
| 4038 | if (queue_empty(&z->pages.all_free)) { |
| 4039 | continue; |
| 4040 | } |
| 4041 | |
| 4042 | drop_free_elements(z); |
| 4043 | } |
| 4044 | |
| 4045 | current_thread()->options &= ~TH_OPT_ZONE_GC; |
| 4046 | |
| 4047 | lck_mtx_unlock(&zone_gc_lock); |
| 4048 | } |
| 4049 | |
| 4050 | extern vm_offset_t kmapoff_kaddr; |
| 4051 | extern unsigned int kmapoff_pgcnt; |
| 4052 | |
| 4053 | /* |
| 4054 | * consider_zone_gc: |
| 4055 | * |
| 4056 | * Called by the pageout daemon when the system needs more free pages. |
| 4057 | */ |
| 4058 | |
| 4059 | void |
| 4060 | consider_zone_gc(boolean_t consider_jetsams) |
| 4061 | { |
| 4062 | if (kmapoff_kaddr != 0) { |
| 4063 | /* |
| 4064 | * One-time reclaim of kernel_map resources we allocated in |
| 4065 | * early boot. |
| 4066 | */ |
| 4067 | (void) vm_deallocate(kernel_map, |
| 4068 | kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64); |
| 4069 | kmapoff_kaddr = 0; |
| 4070 | } |
| 4071 | |
| 4072 | if (zone_gc_allowed) |
| 4073 | zone_gc(consider_jetsams); |
| 4074 | } |
| 4075 | |
| 4076 | /* |
| 4077 | * Creates a vm_map_copy_t to return to the caller of mach_* MIG calls |
| 4078 | * requesting zone information. |
| 4079 | * Frees unused pages towards the end of the region, and zero'es out unused |
| 4080 | * space on the last page. |
| 4081 | */ |
| 4082 | vm_map_copy_t |
| 4083 | create_vm_map_copy( |
| 4084 | vm_offset_t start_addr, |
| 4085 | vm_size_t total_size, |
| 4086 | vm_size_t used_size) |
| 4087 | { |
| 4088 | kern_return_t kr; |
| 4089 | vm_offset_t end_addr; |
| 4090 | vm_size_t free_size; |
| 4091 | vm_map_copy_t copy; |
| 4092 | |
| 4093 | if (used_size != total_size) { |
| 4094 | end_addr = start_addr + used_size; |
| 4095 | free_size = total_size - (round_page(end_addr) - start_addr); |
| 4096 | |
| 4097 | if (free_size >= PAGE_SIZE) { |
| 4098 | kmem_free(ipc_kernel_map, |
| 4099 | round_page(end_addr), free_size); |
| 4100 | } |
| 4101 | bzero((char *) end_addr, round_page(end_addr) - end_addr); |
| 4102 | } |
| 4103 | |
| 4104 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)start_addr, |
| 4105 | (vm_map_size_t)used_size, TRUE, ©); |
| 4106 | assert(kr == KERN_SUCCESS); |
| 4107 | |
| 4108 | return copy; |
| 4109 | } |
| 4110 | |
| 4111 | boolean_t |
| 4112 | get_zone_info( |
| 4113 | zone_t z, |
| 4114 | mach_zone_name_t *zn, |
| 4115 | mach_zone_info_t *zi) |
| 4116 | { |
| 4117 | struct zone zcopy; |
| 4118 | |
| 4119 | assert(z != ZONE_NULL); |
| 4120 | lock_zone(z); |
| 4121 | if (!z->zone_valid) { |
| 4122 | unlock_zone(z); |
| 4123 | return FALSE; |
| 4124 | } |
| 4125 | zcopy = *z; |
| 4126 | unlock_zone(z); |
| 4127 | |
| 4128 | if (zn != NULL) { |
| 4129 | /* assuming here the name data is static */ |
| 4130 | (void) __nosan_strlcpy(zn->mzn_name, zcopy.zone_name, |
| 4131 | strlen(zcopy.zone_name)+1); |
| 4132 | } |
| 4133 | |
| 4134 | if (zi != NULL) { |
| 4135 | zi->mzi_count = (uint64_t)zcopy.count; |
| 4136 | zi->mzi_cur_size = ptoa_64(zcopy.page_count); |
| 4137 | zi->mzi_max_size = (uint64_t)zcopy.max_size; |
| 4138 | zi->mzi_elem_size = (uint64_t)zcopy.elem_size; |
| 4139 | zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size; |
| 4140 | zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size; |
| 4141 | zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible; |
| 4142 | zi->mzi_collectable = 0; |
| 4143 | if (zcopy.collectable) { |
| 4144 | SET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable, ((uint64_t)zcopy.count_all_free_pages * PAGE_SIZE)); |
| 4145 | SET_MZI_COLLECTABLE_FLAG(zi->mzi_collectable, TRUE); |
| 4146 | } |
| 4147 | } |
| 4148 | |
| 4149 | return TRUE; |
| 4150 | } |
| 4151 | |
| 4152 | kern_return_t |
| 4153 | task_zone_info( |
| 4154 | __unused task_t task, |
| 4155 | __unused mach_zone_name_array_t *namesp, |
| 4156 | __unused mach_msg_type_number_t *namesCntp, |
| 4157 | __unused task_zone_info_array_t *infop, |
| 4158 | __unused mach_msg_type_number_t *infoCntp) |
| 4159 | { |
| 4160 | return KERN_FAILURE; |
| 4161 | } |
| 4162 | |
| 4163 | kern_return_t |
| 4164 | mach_zone_info( |
| 4165 | host_priv_t host, |
| 4166 | mach_zone_name_array_t *namesp, |
| 4167 | mach_msg_type_number_t *namesCntp, |
| 4168 | mach_zone_info_array_t *infop, |
| 4169 | mach_msg_type_number_t *infoCntp) |
| 4170 | { |
| 4171 | return (mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL)); |
| 4172 | } |
| 4173 | |
| 4174 | |
| 4175 | kern_return_t |
| 4176 | mach_memory_info( |
| 4177 | host_priv_t host, |
| 4178 | mach_zone_name_array_t *namesp, |
| 4179 | mach_msg_type_number_t *namesCntp, |
| 4180 | mach_zone_info_array_t *infop, |
| 4181 | mach_msg_type_number_t *infoCntp, |
| 4182 | mach_memory_info_array_t *memoryInfop, |
| 4183 | mach_msg_type_number_t *memoryInfoCntp) |
| 4184 | { |
| 4185 | mach_zone_name_t *names; |
| 4186 | vm_offset_t names_addr; |
| 4187 | vm_size_t names_size; |
| 4188 | |
| 4189 | mach_zone_info_t *info; |
| 4190 | vm_offset_t info_addr; |
| 4191 | vm_size_t info_size; |
| 4192 | |
| 4193 | mach_memory_info_t *memory_info; |
| 4194 | vm_offset_t memory_info_addr; |
| 4195 | vm_size_t memory_info_size; |
| 4196 | vm_size_t memory_info_vmsize; |
| 4197 | unsigned int num_info; |
| 4198 | |
| 4199 | unsigned int max_zones, used_zones, i; |
| 4200 | mach_zone_name_t *zn; |
| 4201 | mach_zone_info_t *zi; |
| 4202 | kern_return_t kr; |
| 4203 | |
| 4204 | uint64_t zones_collectable_bytes = 0; |
| 4205 | |
| 4206 | if (host == HOST_NULL) |
| 4207 | return KERN_INVALID_HOST; |
| 4208 | #if CONFIG_DEBUGGER_FOR_ZONE_INFO |
| 4209 | if (!PE_i_can_has_debugger(NULL)) |
| 4210 | return KERN_INVALID_HOST; |
| 4211 | #endif |
| 4212 | |
| 4213 | /* |
| 4214 | * We assume that zones aren't freed once allocated. |
| 4215 | * We won't pick up any zones that are allocated later. |
| 4216 | */ |
| 4217 | |
| 4218 | simple_lock(&all_zones_lock); |
| 4219 | max_zones = (unsigned int)(num_zones); |
| 4220 | simple_unlock(&all_zones_lock); |
| 4221 | |
| 4222 | names_size = round_page(max_zones * sizeof *names); |
| 4223 | kr = kmem_alloc_pageable(ipc_kernel_map, |
| 4224 | &names_addr, names_size, VM_KERN_MEMORY_IPC); |
| 4225 | if (kr != KERN_SUCCESS) |
| 4226 | return kr; |
| 4227 | names = (mach_zone_name_t *) names_addr; |
| 4228 | |
| 4229 | info_size = round_page(max_zones * sizeof *info); |
| 4230 | kr = kmem_alloc_pageable(ipc_kernel_map, |
| 4231 | &info_addr, info_size, VM_KERN_MEMORY_IPC); |
| 4232 | if (kr != KERN_SUCCESS) { |
| 4233 | kmem_free(ipc_kernel_map, |
| 4234 | names_addr, names_size); |
| 4235 | return kr; |
| 4236 | } |
| 4237 | info = (mach_zone_info_t *) info_addr; |
| 4238 | |
| 4239 | zn = &names[0]; |
| 4240 | zi = &info[0]; |
| 4241 | |
| 4242 | used_zones = max_zones; |
| 4243 | for (i = 0; i < max_zones; i++) { |
| 4244 | if (!get_zone_info(&(zone_array[i]), zn, zi)) { |
| 4245 | used_zones--; |
| 4246 | continue; |
| 4247 | } |
| 4248 | zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable); |
| 4249 | zn++; |
| 4250 | zi++; |
| 4251 | } |
| 4252 | |
| 4253 | *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, used_zones * sizeof *names); |
| 4254 | *namesCntp = used_zones; |
| 4255 | |
| 4256 | *infop = (mach_zone_info_t *) create_vm_map_copy(info_addr, info_size, used_zones * sizeof *info); |
| 4257 | *infoCntp = used_zones; |
| 4258 | |
| 4259 | num_info = 0; |
| 4260 | memory_info_addr = 0; |
| 4261 | |
| 4262 | if (memoryInfop && memoryInfoCntp) |
| 4263 | { |
| 4264 | vm_map_copy_t copy; |
| 4265 | num_info = vm_page_diagnose_estimate(); |
| 4266 | memory_info_size = num_info * sizeof(*memory_info); |
| 4267 | memory_info_vmsize = round_page(memory_info_size); |
| 4268 | kr = kmem_alloc_pageable(ipc_kernel_map, |
| 4269 | &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC); |
| 4270 | if (kr != KERN_SUCCESS) { |
| 4271 | return kr; |
| 4272 | } |
| 4273 | |
| 4274 | kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, |
| 4275 | VM_PROT_READ|VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE); |
| 4276 | assert(kr == KERN_SUCCESS); |
| 4277 | |
| 4278 | memory_info = (mach_memory_info_t *) memory_info_addr; |
| 4279 | vm_page_diagnose(memory_info, num_info, zones_collectable_bytes); |
| 4280 | |
| 4281 | kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE); |
| 4282 | assert(kr == KERN_SUCCESS); |
| 4283 | |
| 4284 | kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr, |
| 4285 | (vm_map_size_t)memory_info_size, TRUE, ©); |
| 4286 | assert(kr == KERN_SUCCESS); |
| 4287 | |
| 4288 | *memoryInfop = (mach_memory_info_t *) copy; |
| 4289 | *memoryInfoCntp = num_info; |
| 4290 | } |
| 4291 | |
| 4292 | return KERN_SUCCESS; |
| 4293 | } |
| 4294 | |
| 4295 | kern_return_t |
| 4296 | mach_zone_info_for_zone( |
| 4297 | host_priv_t host, |
| 4298 | mach_zone_name_t name, |
| 4299 | mach_zone_info_t *infop) |
| 4300 | { |
| 4301 | unsigned int max_zones, i; |
| 4302 | zone_t zone_ptr; |
| 4303 | |
| 4304 | if (host == HOST_NULL) |
| 4305 | return KERN_INVALID_HOST; |
| 4306 | #if CONFIG_DEBUGGER_FOR_ZONE_INFO |
| 4307 | if (!PE_i_can_has_debugger(NULL)) |
| 4308 | return KERN_INVALID_HOST; |
| 4309 | #endif |
| 4310 | |
| 4311 | if (infop == NULL) { |
| 4312 | return KERN_INVALID_ARGUMENT; |
| 4313 | } |
| 4314 | |
| 4315 | simple_lock(&all_zones_lock); |
| 4316 | max_zones = (unsigned int)(num_zones); |
| 4317 | simple_unlock(&all_zones_lock); |
| 4318 | |
| 4319 | zone_ptr = ZONE_NULL; |
| 4320 | for (i = 0; i < max_zones; i++) { |
| 4321 | zone_t z = &(zone_array[i]); |
| 4322 | assert(z != ZONE_NULL); |
| 4323 | |
| 4324 | /* Find the requested zone by name */ |
| 4325 | if (track_this_zone(z->zone_name, name.mzn_name)) { |
| 4326 | zone_ptr = z; |
| 4327 | break; |
| 4328 | } |
| 4329 | } |
| 4330 | |
| 4331 | /* No zones found with the requested zone name */ |
| 4332 | if (zone_ptr == ZONE_NULL) { |
| 4333 | return KERN_INVALID_ARGUMENT; |
| 4334 | } |
| 4335 | |
| 4336 | if (get_zone_info(zone_ptr, NULL, infop)) { |
| 4337 | return KERN_SUCCESS; |
| 4338 | } |
| 4339 | return KERN_FAILURE; |
| 4340 | } |
| 4341 | |
| 4342 | kern_return_t |
| 4343 | mach_zone_info_for_largest_zone( |
| 4344 | host_priv_t host, |
| 4345 | mach_zone_name_t *namep, |
| 4346 | mach_zone_info_t *infop) |
| 4347 | { |
| 4348 | if (host == HOST_NULL) |
| 4349 | return KERN_INVALID_HOST; |
| 4350 | #if CONFIG_DEBUGGER_FOR_ZONE_INFO |
| 4351 | if (!PE_i_can_has_debugger(NULL)) |
| 4352 | return KERN_INVALID_HOST; |
| 4353 | #endif |
| 4354 | |
| 4355 | if (namep == NULL || infop == NULL) { |
| 4356 | return KERN_INVALID_ARGUMENT; |
| 4357 | } |
| 4358 | |
| 4359 | if (get_zone_info(zone_find_largest(), namep, infop)) { |
| 4360 | return KERN_SUCCESS; |
| 4361 | } |
| 4362 | return KERN_FAILURE; |
| 4363 | } |
| 4364 | |
| 4365 | uint64_t |
| 4366 | get_zones_collectable_bytes(void) |
| 4367 | { |
| 4368 | unsigned int i, max_zones; |
| 4369 | uint64_t zones_collectable_bytes = 0; |
| 4370 | mach_zone_info_t zi; |
| 4371 | |
| 4372 | simple_lock(&all_zones_lock); |
| 4373 | max_zones = (unsigned int)(num_zones); |
| 4374 | simple_unlock(&all_zones_lock); |
| 4375 | |
| 4376 | for (i = 0; i < max_zones; i++) { |
| 4377 | if (get_zone_info(&(zone_array[i]), NULL, &zi)) { |
| 4378 | zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi.mzi_collectable); |
| 4379 | } |
| 4380 | } |
| 4381 | |
| 4382 | return zones_collectable_bytes; |
| 4383 | } |
| 4384 | |
| 4385 | kern_return_t |
| 4386 | mach_zone_get_zlog_zones( |
| 4387 | host_priv_t host, |
| 4388 | mach_zone_name_array_t *namesp, |
| 4389 | mach_msg_type_number_t *namesCntp) |
| 4390 | { |
| 4391 | #if DEBUG || DEVELOPMENT |
| 4392 | unsigned int max_zones, logged_zones, i; |
| 4393 | kern_return_t kr; |
| 4394 | zone_t zone_ptr; |
| 4395 | mach_zone_name_t *names; |
| 4396 | vm_offset_t names_addr; |
| 4397 | vm_size_t names_size; |
| 4398 | |
| 4399 | if (host == HOST_NULL) |
| 4400 | return KERN_INVALID_HOST; |
| 4401 | |
| 4402 | if (namesp == NULL || namesCntp == NULL) |
| 4403 | return KERN_INVALID_ARGUMENT; |
| 4404 | |
| 4405 | simple_lock(&all_zones_lock); |
| 4406 | max_zones = (unsigned int)(num_zones); |
| 4407 | simple_unlock(&all_zones_lock); |
| 4408 | |
| 4409 | names_size = round_page(max_zones * sizeof *names); |
| 4410 | kr = kmem_alloc_pageable(ipc_kernel_map, |
| 4411 | &names_addr, names_size, VM_KERN_MEMORY_IPC); |
| 4412 | if (kr != KERN_SUCCESS) |
| 4413 | return kr; |
| 4414 | names = (mach_zone_name_t *) names_addr; |
| 4415 | |
| 4416 | zone_ptr = ZONE_NULL; |
| 4417 | logged_zones = 0; |
| 4418 | for (i = 0; i < max_zones; i++) { |
| 4419 | zone_t z = &(zone_array[i]); |
| 4420 | assert(z != ZONE_NULL); |
| 4421 | |
| 4422 | /* Copy out the zone name if zone logging is enabled */ |
| 4423 | if(z->zlog_btlog) { |
| 4424 | get_zone_info(z, &names[logged_zones], NULL); |
| 4425 | logged_zones++; |
| 4426 | } |
| 4427 | } |
| 4428 | |
| 4429 | *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, logged_zones * sizeof *names); |
| 4430 | *namesCntp = logged_zones; |
| 4431 | |
| 4432 | return KERN_SUCCESS; |
| 4433 | |
| 4434 | #else /* DEBUG || DEVELOPMENT */ |
| 4435 | #pragma unused(host, namesp, namesCntp) |
| 4436 | return KERN_FAILURE; |
| 4437 | #endif /* DEBUG || DEVELOPMENT */ |
| 4438 | } |
| 4439 | |
| 4440 | kern_return_t |
| 4441 | mach_zone_get_btlog_records( |
| 4442 | host_priv_t host, |
| 4443 | mach_zone_name_t name, |
| 4444 | zone_btrecord_array_t *recsp, |
| 4445 | mach_msg_type_number_t *recsCntp) |
| 4446 | { |
| 4447 | #if DEBUG || DEVELOPMENT |
| 4448 | unsigned int max_zones, i, numrecs = 0; |
| 4449 | zone_btrecord_t *recs; |
| 4450 | kern_return_t kr; |
| 4451 | zone_t zone_ptr; |
| 4452 | vm_offset_t recs_addr; |
| 4453 | vm_size_t recs_size; |
| 4454 | |
| 4455 | if (host == HOST_NULL) |
| 4456 | return KERN_INVALID_HOST; |
| 4457 | |
| 4458 | if (recsp == NULL || recsCntp == NULL) |
| 4459 | return KERN_INVALID_ARGUMENT; |
| 4460 | |
| 4461 | simple_lock(&all_zones_lock); |
| 4462 | max_zones = (unsigned int)(num_zones); |
| 4463 | simple_unlock(&all_zones_lock); |
| 4464 | |
| 4465 | zone_ptr = ZONE_NULL; |
| 4466 | for (i = 0; i < max_zones; i++) { |
| 4467 | zone_t z = &(zone_array[i]); |
| 4468 | assert(z != ZONE_NULL); |
| 4469 | |
| 4470 | /* Find the requested zone by name */ |
| 4471 | if (track_this_zone(z->zone_name, name.mzn_name)) { |
| 4472 | zone_ptr = z; |
| 4473 | break; |
| 4474 | } |
| 4475 | } |
| 4476 | |
| 4477 | /* No zones found with the requested zone name */ |
| 4478 | if (zone_ptr == ZONE_NULL) { |
| 4479 | return KERN_INVALID_ARGUMENT; |
| 4480 | } |
| 4481 | |
| 4482 | /* Logging not turned on for the requested zone */ |
| 4483 | if (!DO_LOGGING(zone_ptr)) { |
| 4484 | return KERN_FAILURE; |
| 4485 | } |
| 4486 | |
| 4487 | /* Allocate memory for btlog records */ |
| 4488 | numrecs = (unsigned int)(get_btlog_records_count(zone_ptr->zlog_btlog)); |
| 4489 | recs_size = round_page(numrecs * sizeof *recs); |
| 4490 | |
| 4491 | kr = kmem_alloc_pageable(ipc_kernel_map, &recs_addr, recs_size, VM_KERN_MEMORY_IPC); |
| 4492 | if (kr != KERN_SUCCESS) { |
| 4493 | return kr; |
| 4494 | } |
| 4495 | |
| 4496 | /* |
| 4497 | * We will call get_btlog_records() below which populates this region while holding a spinlock |
| 4498 | * (the btlog lock). So these pages need to be wired. |
| 4499 | */ |
| 4500 | kr = vm_map_wire_kernel(ipc_kernel_map, recs_addr, recs_addr + recs_size, |
| 4501 | VM_PROT_READ|VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE); |
| 4502 | assert(kr == KERN_SUCCESS); |
| 4503 | |
| 4504 | recs = (zone_btrecord_t *)recs_addr; |
| 4505 | get_btlog_records(zone_ptr->zlog_btlog, recs, &numrecs); |
| 4506 | |
| 4507 | kr = vm_map_unwire(ipc_kernel_map, recs_addr, recs_addr + recs_size, FALSE); |
| 4508 | assert(kr == KERN_SUCCESS); |
| 4509 | |
| 4510 | *recsp = (zone_btrecord_t *) create_vm_map_copy(recs_addr, recs_size, numrecs * sizeof *recs); |
| 4511 | *recsCntp = numrecs; |
| 4512 | |
| 4513 | return KERN_SUCCESS; |
| 4514 | |
| 4515 | #else /* DEBUG || DEVELOPMENT */ |
| 4516 | #pragma unused(host, name, recsp, recsCntp) |
| 4517 | return KERN_FAILURE; |
| 4518 | #endif /* DEBUG || DEVELOPMENT */ |
| 4519 | } |
| 4520 | |
| 4521 | |
| 4522 | #if DEBUG || DEVELOPMENT |
| 4523 | |
| 4524 | kern_return_t |
| 4525 | mach_memory_info_check(void) |
| 4526 | { |
| 4527 | mach_memory_info_t * memory_info; |
| 4528 | mach_memory_info_t * info; |
| 4529 | zone_t zone; |
| 4530 | unsigned int idx, num_info, max_zones; |
| 4531 | vm_offset_t memory_info_addr; |
| 4532 | kern_return_t kr; |
| 4533 | size_t memory_info_size, memory_info_vmsize; |
| 4534 | uint64_t top_wired, zonestotal, total; |
| 4535 | |
| 4536 | num_info = vm_page_diagnose_estimate(); |
| 4537 | memory_info_size = num_info * sizeof(*memory_info); |
| 4538 | memory_info_vmsize = round_page(memory_info_size); |
| 4539 | kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG); |
| 4540 | assert (kr == KERN_SUCCESS); |
| 4541 | |
| 4542 | memory_info = (mach_memory_info_t *) memory_info_addr; |
| 4543 | vm_page_diagnose(memory_info, num_info, 0); |
| 4544 | |
| 4545 | simple_lock(&all_zones_lock); |
| 4546 | max_zones = num_zones; |
| 4547 | simple_unlock(&all_zones_lock); |
| 4548 | |
| 4549 | top_wired = total = zonestotal = 0; |
| 4550 | for (idx = 0; idx < max_zones; idx++) |
| 4551 | { |
| 4552 | zone = &(zone_array[idx]); |
| 4553 | assert(zone != ZONE_NULL); |
| 4554 | lock_zone(zone); |
| 4555 | zonestotal += ptoa_64(zone->page_count); |
| 4556 | unlock_zone(zone); |
| 4557 | } |
| 4558 | for (idx = 0; idx < num_info; idx++) |
| 4559 | { |
| 4560 | info = &memory_info[idx]; |
| 4561 | if (!info->size) continue; |
| 4562 | if (VM_KERN_COUNT_WIRED == info->site) top_wired = info->size; |
| 4563 | if (VM_KERN_SITE_HIDE & info->flags) continue; |
| 4564 | if (!(VM_KERN_SITE_WIRED & info->flags)) continue; |
| 4565 | total += info->size; |
| 4566 | } |
| 4567 | total += zonestotal; |
| 4568 | |
| 4569 | printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n" , total, top_wired, zonestotal, top_wired - total); |
| 4570 | |
| 4571 | kmem_free(kernel_map, memory_info_addr, memory_info_vmsize); |
| 4572 | |
| 4573 | return (kr); |
| 4574 | } |
| 4575 | |
| 4576 | extern boolean_t (* volatile consider_buffer_cache_collect)(int); |
| 4577 | |
| 4578 | #endif /* DEBUG || DEVELOPMENT */ |
| 4579 | |
| 4580 | kern_return_t |
| 4581 | mach_zone_force_gc( |
| 4582 | host_t host) |
| 4583 | { |
| 4584 | if (host == HOST_NULL) |
| 4585 | return KERN_INVALID_HOST; |
| 4586 | |
| 4587 | #if DEBUG || DEVELOPMENT |
| 4588 | /* Callout to buffer cache GC to drop elements in the apfs zones */ |
| 4589 | if (consider_buffer_cache_collect != NULL) { |
| 4590 | (void)(*consider_buffer_cache_collect)(0); |
| 4591 | } |
| 4592 | consider_zone_gc(FALSE); |
| 4593 | #endif /* DEBUG || DEVELOPMENT */ |
| 4594 | return (KERN_SUCCESS); |
| 4595 | } |
| 4596 | |
| 4597 | extern unsigned int stack_total; |
| 4598 | extern unsigned long long stack_allocs; |
| 4599 | |
| 4600 | #if defined(__i386__) || defined (__x86_64__) |
| 4601 | extern unsigned int inuse_ptepages_count; |
| 4602 | extern long long alloc_ptepages_count; |
| 4603 | #endif |
| 4604 | |
| 4605 | zone_t |
| 4606 | zone_find_largest(void) |
| 4607 | { |
| 4608 | unsigned int i; |
| 4609 | unsigned int max_zones; |
| 4610 | zone_t the_zone; |
| 4611 | zone_t zone_largest; |
| 4612 | |
| 4613 | simple_lock(&all_zones_lock); |
| 4614 | max_zones = num_zones; |
| 4615 | simple_unlock(&all_zones_lock); |
| 4616 | |
| 4617 | zone_largest = &(zone_array[0]); |
| 4618 | for (i = 0; i < max_zones; i++) { |
| 4619 | the_zone = &(zone_array[i]); |
| 4620 | if (the_zone->cur_size > zone_largest->cur_size) { |
| 4621 | zone_largest = the_zone; |
| 4622 | } |
| 4623 | } |
| 4624 | return zone_largest; |
| 4625 | } |
| 4626 | |
| 4627 | #if ZONE_DEBUG |
| 4628 | |
| 4629 | /* should we care about locks here ? */ |
| 4630 | |
| 4631 | #define zone_in_use(z) ( z->count || z->free_elements \ |
| 4632 | || !queue_empty(&z->pages.all_free) \ |
| 4633 | || !queue_empty(&z->pages.intermediate) \ |
| 4634 | || (z->allows_foreign && !queue_empty(&z->pages.any_free_foreign))) |
| 4635 | |
| 4636 | |
| 4637 | #endif /* ZONE_DEBUG */ |
| 4638 | |
| 4639 | |
| 4640 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 4641 | |
| 4642 | #if DEBUG || DEVELOPMENT |
| 4643 | |
| 4644 | static uintptr_t * |
| 4645 | zone_copy_all_allocations_inqueue(zone_t z, queue_head_t * queue, uintptr_t * elems) |
| 4646 | { |
| 4647 | struct zone_page_metadata *page_meta; |
| 4648 | vm_offset_t free, elements; |
| 4649 | vm_offset_t idx, numElements, freeCount, bytesAvail, metaSize; |
| 4650 | |
| 4651 | queue_iterate(queue, page_meta, struct zone_page_metadata *, pages) |
| 4652 | { |
| 4653 | elements = get_zone_page(page_meta); |
| 4654 | bytesAvail = ptoa(page_meta->page_count); |
| 4655 | freeCount = 0; |
| 4656 | if (z->allows_foreign && !from_zone_map(elements, z->elem_size)) |
| 4657 | { |
| 4658 | metaSize = (sizeof(struct zone_page_metadata) + ZONE_ELEMENT_ALIGNMENT - 1) & ~(ZONE_ELEMENT_ALIGNMENT - 1); |
| 4659 | bytesAvail -= metaSize; |
| 4660 | elements += metaSize; |
| 4661 | } |
| 4662 | numElements = bytesAvail / z->elem_size; |
| 4663 | // construct array of all possible elements |
| 4664 | for (idx = 0; idx < numElements; idx++) |
| 4665 | { |
| 4666 | elems[idx] = INSTANCE_PUT(elements + idx * z->elem_size); |
| 4667 | } |
| 4668 | // remove from the array all free elements |
| 4669 | free = (vm_offset_t)page_metadata_get_freelist(page_meta); |
| 4670 | while (free) |
| 4671 | { |
| 4672 | // find idx of free element |
| 4673 | for (idx = 0; (idx < numElements) && (elems[idx] != INSTANCE_PUT(free)); idx++) {} |
| 4674 | assert(idx < numElements); |
| 4675 | // remove it |
| 4676 | bcopy(&elems[idx + 1], &elems[idx], (numElements - (idx + 1)) * sizeof(elems[0])); |
| 4677 | numElements--; |
| 4678 | freeCount++; |
| 4679 | // next free element |
| 4680 | vm_offset_t *primary = (vm_offset_t *) free; |
| 4681 | free = *primary ^ zp_nopoison_cookie; |
| 4682 | } |
| 4683 | elems += numElements; |
| 4684 | } |
| 4685 | |
| 4686 | return (elems); |
| 4687 | } |
| 4688 | |
| 4689 | kern_return_t |
| 4690 | zone_leaks(const char * zoneName, uint32_t nameLen, leak_site_proc proc, void * refCon) |
| 4691 | { |
| 4692 | uintptr_t zbt[MAX_ZTRACE_DEPTH]; |
| 4693 | zone_t zone; |
| 4694 | uintptr_t * array; |
| 4695 | uintptr_t * next; |
| 4696 | uintptr_t element, bt; |
| 4697 | uint32_t idx, count, found; |
| 4698 | uint32_t btidx, btcount, nobtcount, btfound; |
| 4699 | uint32_t elemSize; |
| 4700 | uint64_t maxElems; |
| 4701 | unsigned int max_zones; |
| 4702 | kern_return_t kr; |
| 4703 | |
| 4704 | simple_lock(&all_zones_lock); |
| 4705 | max_zones = num_zones; |
| 4706 | simple_unlock(&all_zones_lock); |
| 4707 | |
| 4708 | for (idx = 0; idx < max_zones; idx++) |
| 4709 | { |
| 4710 | if (!strncmp(zoneName, zone_array[idx].zone_name, nameLen)) break; |
| 4711 | } |
| 4712 | if (idx >= max_zones) return (KERN_INVALID_NAME); |
| 4713 | zone = &zone_array[idx]; |
| 4714 | |
| 4715 | elemSize = (uint32_t) zone->elem_size; |
| 4716 | maxElems = ptoa(zone->page_count) / elemSize; |
| 4717 | |
| 4718 | if ((zone->alloc_size % elemSize) |
| 4719 | && !leak_scan_debug_flag) return (KERN_INVALID_CAPABILITY); |
| 4720 | |
| 4721 | kr = kmem_alloc_kobject(kernel_map, (vm_offset_t *) &array, |
| 4722 | maxElems * sizeof(uintptr_t), VM_KERN_MEMORY_DIAG); |
| 4723 | if (KERN_SUCCESS != kr) return (kr); |
| 4724 | |
| 4725 | lock_zone(zone); |
| 4726 | |
| 4727 | next = array; |
| 4728 | next = zone_copy_all_allocations_inqueue(zone, &zone->pages.any_free_foreign, next); |
| 4729 | next = zone_copy_all_allocations_inqueue(zone, &zone->pages.intermediate, next); |
| 4730 | next = zone_copy_all_allocations_inqueue(zone, &zone->pages.all_used, next); |
| 4731 | count = (uint32_t)(next - array); |
| 4732 | |
| 4733 | unlock_zone(zone); |
| 4734 | |
| 4735 | zone_leaks_scan(array, count, (uint32_t)zone->elem_size, &found); |
| 4736 | assert(found <= count); |
| 4737 | |
| 4738 | for (idx = 0; idx < count; idx++) |
| 4739 | { |
| 4740 | element = array[idx]; |
| 4741 | if (kInstanceFlagReferenced & element) continue; |
| 4742 | element = INSTANCE_PUT(element) & ~kInstanceFlags; |
| 4743 | } |
| 4744 | |
| 4745 | if (zone->zlog_btlog && !corruption_debug_flag) |
| 4746 | { |
| 4747 | // btlog_copy_backtraces_for_elements will set kInstanceFlagReferenced on elements it found |
| 4748 | btlog_copy_backtraces_for_elements(zone->zlog_btlog, array, &count, elemSize, proc, refCon); |
| 4749 | } |
| 4750 | |
| 4751 | for (nobtcount = idx = 0; idx < count; idx++) |
| 4752 | { |
| 4753 | element = array[idx]; |
| 4754 | if (!element) continue; |
| 4755 | if (kInstanceFlagReferenced & element) continue; |
| 4756 | element = INSTANCE_PUT(element) & ~kInstanceFlags; |
| 4757 | |
| 4758 | // see if we can find any backtrace left in the element |
| 4759 | btcount = (typeof(btcount)) (zone->elem_size / sizeof(uintptr_t)); |
| 4760 | if (btcount >= MAX_ZTRACE_DEPTH) btcount = MAX_ZTRACE_DEPTH - 1; |
| 4761 | for (btfound = btidx = 0; btidx < btcount; btidx++) |
| 4762 | { |
| 4763 | bt = ((uintptr_t *)element)[btcount - 1 - btidx]; |
| 4764 | if (!VM_KERNEL_IS_SLID(bt)) break; |
| 4765 | zbt[btfound++] = bt; |
| 4766 | } |
| 4767 | if (btfound) (*proc)(refCon, 1, elemSize, &zbt[0], btfound); |
| 4768 | else nobtcount++; |
| 4769 | } |
| 4770 | if (nobtcount) |
| 4771 | { |
| 4772 | // fake backtrace when we found nothing |
| 4773 | zbt[0] = (uintptr_t) &zalloc; |
| 4774 | (*proc)(refCon, nobtcount, elemSize, &zbt[0], 1); |
| 4775 | } |
| 4776 | |
| 4777 | kmem_free(kernel_map, (vm_offset_t) array, maxElems * sizeof(uintptr_t)); |
| 4778 | |
| 4779 | return (KERN_SUCCESS); |
| 4780 | } |
| 4781 | |
| 4782 | boolean_t |
| 4783 | kdp_is_in_zone(void *addr, const char *zone_name) |
| 4784 | { |
| 4785 | zone_t z; |
| 4786 | return (zone_element_size(addr, &z) && !strcmp(z->zone_name, zone_name)); |
| 4787 | } |
| 4788 | |
| 4789 | boolean_t |
| 4790 | run_zone_test(void) |
| 4791 | { |
| 4792 | unsigned int i = 0, max_iter = 5; |
| 4793 | void * test_ptr; |
| 4794 | zone_t test_zone; |
| 4795 | |
| 4796 | simple_lock(&zone_test_lock); |
| 4797 | if (!zone_test_running) { |
| 4798 | zone_test_running = TRUE; |
| 4799 | } else { |
| 4800 | simple_unlock(&zone_test_lock); |
| 4801 | printf("run_zone_test: Test already running.\n" ); |
| 4802 | return FALSE; |
| 4803 | } |
| 4804 | simple_unlock(&zone_test_lock); |
| 4805 | |
| 4806 | printf("run_zone_test: Testing zinit(), zalloc(), zfree() and zdestroy() on zone \"test_zone_sysctl\"\n" ); |
| 4807 | |
| 4808 | /* zinit() and zdestroy() a zone with the same name a bunch of times, verify that we get back the same zone each time */ |
| 4809 | do { |
| 4810 | test_zone = zinit(sizeof(uint64_t), 100 * sizeof(uint64_t), sizeof(uint64_t), "test_zone_sysctl" ); |
| 4811 | if (test_zone == NULL) { |
| 4812 | printf("run_zone_test: zinit() failed\n" ); |
| 4813 | return FALSE; |
| 4814 | } |
| 4815 | |
| 4816 | #if KASAN_ZALLOC |
| 4817 | if (test_zone_ptr == NULL && zone_free_count(test_zone) != 0) { |
| 4818 | #else |
| 4819 | if (zone_free_count(test_zone) != 0) { |
| 4820 | #endif |
| 4821 | printf("run_zone_test: free count is not zero\n" ); |
| 4822 | return FALSE; |
| 4823 | } |
| 4824 | |
| 4825 | if (test_zone_ptr == NULL) { |
| 4826 | /* Stash the zone pointer returned on the fist zinit */ |
| 4827 | printf("run_zone_test: zone created for the first time\n" ); |
| 4828 | test_zone_ptr = test_zone; |
| 4829 | } else if (test_zone != test_zone_ptr) { |
| 4830 | printf("run_zone_test: old zone pointer and new zone pointer don't match\n" ); |
| 4831 | return FALSE; |
| 4832 | } |
| 4833 | |
| 4834 | test_ptr = zalloc(test_zone); |
| 4835 | if (test_ptr == NULL) { |
| 4836 | printf("run_zone_test: zalloc() failed\n" ); |
| 4837 | return FALSE; |
| 4838 | } |
| 4839 | zfree(test_zone, test_ptr); |
| 4840 | |
| 4841 | zdestroy(test_zone); |
| 4842 | i++; |
| 4843 | |
| 4844 | printf("run_zone_test: Iteration %d successful\n" , i); |
| 4845 | } while (i < max_iter); |
| 4846 | |
| 4847 | printf("run_zone_test: Test passed\n" ); |
| 4848 | |
| 4849 | simple_lock(&zone_test_lock); |
| 4850 | zone_test_running = FALSE; |
| 4851 | simple_unlock(&zone_test_lock); |
| 4852 | |
| 4853 | return TRUE; |
| 4854 | } |
| 4855 | |
| 4856 | #endif /* DEBUG || DEVELOPMENT */ |
| 4857 | |