| 1 | /* |
| 2 | * Copyright (c) 2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * Below is a diagram of the caching system. This design is based of the |
| 30 | * paper "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and |
| 31 | * Arbitrary Resources" by Jeff Bonwick and Jonathan Adams. It is divided into 3 |
| 32 | * layers: the Per-cpu Layer, the Depot Layer, and the Zone Allocator. The |
| 33 | * Per-CPU and Depot layers store elements using arrays we call magazines. |
| 34 | * |
| 35 | * Magazines function like a stack (we push and pop elements) and can be |
| 36 | * moved around for bulk operations. |
| 37 | * _________ _________ _________ |
| 38 | * | CPU 1 | | CPU 2 | | CPU 3 | |
| 39 | * | _ _ | | _ _ | | _ _ | |
| 40 | * | |#| | | | | | | |#| | | |#| |#| | Per-CPU Layer |
| 41 | * | |#| |_| | | |_| |#| | | |#| |#| | |
| 42 | * |_________| |_________| |_________| |
| 43 | * |
| 44 | * ______________________________________________ |
| 45 | * | _ _ _ _ _ _ | |
| 46 | * | |#| |#| |#| | | | | | | | Depot Layer |
| 47 | * | |#| |#| |#| |_| |_| |_| | |
| 48 | * |______________________________________________| |
| 49 | * |
| 50 | * _______________________________________________ |
| 51 | * | # | # | # | # | # | # | # | # | # | # | # | # | Zone Allocator |
| 52 | * |_______________________________________________| |
| 53 | * |
| 54 | * The top layer is the per-cpu cache and consists of a current and |
| 55 | * previous magazine for each CPU. The current magazine is the one we always try |
| 56 | * to allocate from and free to first. Only if we are unable, do we check the |
| 57 | * previous magazine. If the previous magazine can satisfy the allocate or free, |
| 58 | * then we switch the two and allocate from the new current magazine. This layer |
| 59 | * requires no locking, so we can access multiple CPU's caches concurrently. |
| 60 | * This is the main source of the speedup. |
| 61 | * |
| 62 | * We have two magazines here to prevent thrashing when swapping magazines |
| 63 | * with the depot layer. If a certain pattern of alloc and free are called we |
| 64 | * can waste a lot of time swapping magazines to and from the depot layer. We |
| 65 | * prevent this by dividing the per-cpu cache into two separate magazines. |
| 66 | * |
| 67 | * The middle layer is the magazine depot. This layer consists of a |
| 68 | * collection of full and empty magazines. These are used to reload the per-cpu |
| 69 | * caches when needed. This is implemented as an array of magazines which are |
| 70 | * initially all empty and as we fill up magazines we increment the index to |
| 71 | * point at the first empty magazine. Since this layer is per-zone, it allows us |
| 72 | * to balance the cache between cpus, but does require taking a lock. |
| 73 | * |
| 74 | * When neither the current nor previous magazine for a given CPU can |
| 75 | * satisfy the free or allocation, we look to the depot layer. If there are |
| 76 | * magazines in the depot that can satisfy the free or allocation we swap |
| 77 | * that magazine into the current position. In the example below, to allocate on |
| 78 | * the given CPU we must lock the depot layer and swap magazine A with magazine |
| 79 | * B and decrement the depot index. |
| 80 | * |
| 81 | * _____________________ _______________________________________ |
| 82 | * | Per-CPU Cache | | Depot Layer | |
| 83 | * | | | | |
| 84 | * | A___ ____ | | ____ B___ ____ ____ | |
| 85 | * | | | | | | | | ## | | ## | | | | | | |
| 86 | * | | | | | | | | ## | | ## | | | | | | |
| 87 | * | | | | | | | | ## | | ## | | | | | | |
| 88 | * | | | | | | | | ## | | ## | | | | | | |
| 89 | * | |____| |____| | | |_##_| |_##_| |____| |____| | |
| 90 | * | Current Previous | | | |
| 91 | * |_____________________| |_______________________________________| |
| 92 | * |
| 93 | * The bottom layer is the Zone Allocator. This is already implemented in |
| 94 | * XNU and will remain mostly unchanged. Implementation for this can be found |
| 95 | * in zalloc.c and zalloc.h. We will only use the zone if all other layers are |
| 96 | * unable to satisfy the allocation or free. When we do use the zone, we will |
| 97 | * try to allocate an entire magazine of elements or free an entire magazine of |
| 98 | * elements at once. |
| 99 | * |
| 100 | * Caching must be enabled explicitly, by calling zone_change() with the |
| 101 | * Z_CACHING_ENABLED flag, for every zone you want to cache elements for. Zones |
| 102 | * which are good candidates for this are ones with highly contended zone locks. |
| 103 | * |
| 104 | * Some good potential candidates are kalloc.16, kalloc.48, Vm objects, VM map |
| 105 | * entries, ipc vouchers, and ipc ports. |
| 106 | * |
| 107 | * |
| 108 | * Some factors can be tuned by boot-arg: |
| 109 | * zcc_enable_for_zone_name name of a single zone to enable caching for |
| 110 | * (replace space characters with '.') |
| 111 | * |
| 112 | * zcc_magazine_element_count integer value for magazine size used for all |
| 113 | * zones (default 8 is used if not specified) |
| 114 | * |
| 115 | * zcc_depot_element_count integer value for how many full and empty |
| 116 | * magazines to store in the depot, if N specified |
| 117 | * depot will have N full and N empty magazines |
| 118 | * (default 16 used if not specified) |
| 119 | */ |
| 120 | #include <kern/kern_types.h> |
| 121 | #include <vm/vm_kern.h> |
| 122 | |
| 123 | |
| 124 | /* |
| 125 | * zcache_ready |
| 126 | * |
| 127 | * Description: returns whether or not the zone caches are ready to use |
| 128 | * |
| 129 | */ |
| 130 | bool zcache_ready(void); |
| 131 | |
| 132 | |
| 133 | /* |
| 134 | * zcache_bootstrap |
| 135 | * |
| 136 | * Description: initializes zone to allocate magazines from |
| 137 | * |
| 138 | */ |
| 139 | void zcache_bootstrap(void); |
| 140 | |
| 141 | |
| 142 | /* |
| 143 | * zcache_init |
| 144 | * |
| 145 | * Description: Initializes all parts of the per-cpu caches for a given zone |
| 146 | * |
| 147 | * Parameters: zone pointer to zone on which to iniitalize caching |
| 148 | * |
| 149 | */ |
| 150 | void zcache_init(zone_t zone); |
| 151 | |
| 152 | |
| 153 | /* |
| 154 | * zcache_free_to_cpu_cache |
| 155 | * |
| 156 | * Description: Checks per-cpu caches to free element there if possible |
| 157 | * |
| 158 | * Parameters: zone pointer to zone for which element comes from |
| 159 | * addr pointer to element to free |
| 160 | * |
| 161 | * Returns: TRUE if successfull, FALSE otherwise |
| 162 | * |
| 163 | * Precondition: check that caching is enabled for zone |
| 164 | */ |
| 165 | bool zcache_free_to_cpu_cache(zone_t zone, void *addr); |
| 166 | |
| 167 | |
| 168 | /* |
| 169 | * zcache_alloc_from_cpu_cache |
| 170 | * |
| 171 | * Description: Checks per-cpu caches to allocate element from there if possible |
| 172 | * |
| 173 | * Parameters: zone pointer to zone for which element will come from |
| 174 | * |
| 175 | * Returns: pointer to usable element |
| 176 | * |
| 177 | * Precondition: check that caching is enabled for zone |
| 178 | */ |
| 179 | vm_offset_t zcache_alloc_from_cpu_cache(zone_t zone); |
| 180 | |
| 181 | /* |
| 182 | * zcache_drain_depot |
| 183 | * |
| 184 | * Description: Frees all the full magazines from the depot layer to the zone allocator |
| 185 | * Invoked by zone_gc() |
| 186 | * |
| 187 | * Parameters: zone pointer to zone for which the depot layer needs to be drained |
| 188 | * |
| 189 | * Returns: None |
| 190 | * |
| 191 | */ |
| 192 | void zcache_drain_depot(zone_t zone); |
| 193 | |