| 1 | /* |
| 2 | * Copyright (c) 2006-2017 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | #ifndef _SYS_MCACHE_H |
| 29 | #define _SYS_MCACHE_H |
| 30 | |
| 31 | #ifdef KERNEL_PRIVATE |
| 32 | |
| 33 | #ifdef __cplusplus |
| 34 | extern "C" { |
| 35 | #endif |
| 36 | |
| 37 | #include <sys/types.h> |
| 38 | #include <sys/queue.h> |
| 39 | #include <mach/boolean.h> |
| 40 | #include <kern/locks.h> |
| 41 | #include <libkern/OSAtomic.h> |
| 42 | |
| 43 | #ifdef ASSERT |
| 44 | #undef ASSERT |
| 45 | #endif |
| 46 | |
| 47 | #ifdef VERIFY |
| 48 | #undef VERIFY |
| 49 | #endif |
| 50 | |
| 51 | /* |
| 52 | * Unlike VERIFY(), ASSERT() is evaluated only in DEBUG/DEVELOPMENT build. |
| 53 | */ |
| 54 | #define VERIFY(EX) \ |
| 55 | ((void)(__probable((EX)) || assfail(#EX, __FILE__, __LINE__))) |
| 56 | #if (DEBUG || DEVELOPMENT) |
| 57 | #define ASSERT(EX) VERIFY(EX) |
| 58 | #else |
| 59 | #define ASSERT(EX) ((void)0) |
| 60 | #endif |
| 61 | |
| 62 | /* |
| 63 | * Compile time assert; this should be on its own someday. |
| 64 | */ |
| 65 | #define _CASSERT(x) _Static_assert(x, "compile-time assertion failed") |
| 66 | |
| 67 | /* |
| 68 | * Atomic macros; these should be on their own someday. |
| 69 | */ |
| 70 | #define atomic_add_16_ov(a, n) \ |
| 71 | ((u_int16_t) OSAddAtomic16(n, (volatile SInt16 *)a)) |
| 72 | |
| 73 | #define atomic_add_16(a, n) \ |
| 74 | ((void) atomic_add_16_ov(a, n)) |
| 75 | |
| 76 | #define atomic_add_32_ov(a, n) \ |
| 77 | ((u_int32_t) OSAddAtomic(n, (volatile SInt32 *)a)) |
| 78 | |
| 79 | #define atomic_add_32(a, n) \ |
| 80 | ((void) atomic_add_32_ov(a, n)) |
| 81 | |
| 82 | #define atomic_add_64_ov(a, n) \ |
| 83 | ((u_int64_t) OSAddAtomic64(n, (volatile SInt64 *)a)) |
| 84 | |
| 85 | #define atomic_add_64(a, n) \ |
| 86 | ((void) atomic_add_64_ov(a, n)) |
| 87 | |
| 88 | #define atomic_test_set_32(a, o, n) \ |
| 89 | OSCompareAndSwap(o, n, (volatile UInt32 *)a) |
| 90 | |
| 91 | #define atomic_set_32(a, n) do { \ |
| 92 | while (!atomic_test_set_32(a, *a, n)) \ |
| 93 | ; \ |
| 94 | } while (0) |
| 95 | |
| 96 | #define atomic_test_set_64(a, o, n) \ |
| 97 | OSCompareAndSwap64(o, n, (volatile UInt64 *)a) |
| 98 | |
| 99 | #define atomic_set_64(a, n) do { \ |
| 100 | while (!atomic_test_set_64(a, *a, n)) \ |
| 101 | ; \ |
| 102 | } while (0) |
| 103 | |
| 104 | #if defined(__LP64__) |
| 105 | #define atomic_get_64(n, a) do { \ |
| 106 | (n) = *(a); \ |
| 107 | } while (0) |
| 108 | #else |
| 109 | #define atomic_get_64(n, a) do { \ |
| 110 | (n) = atomic_add_64_ov(a, 0); \ |
| 111 | } while (0) |
| 112 | #endif /* __LP64__ */ |
| 113 | |
| 114 | #define atomic_test_set_ptr(a, o, n) \ |
| 115 | OSCompareAndSwapPtr(o, n, (void * volatile *)a) |
| 116 | |
| 117 | #define atomic_set_ptr(a, n) do { \ |
| 118 | while (!atomic_test_set_ptr(a, *a, n)) \ |
| 119 | ; \ |
| 120 | } while (0) |
| 121 | |
| 122 | #define atomic_or_8_ov(a, n) \ |
| 123 | ((u_int8_t) OSBitOrAtomic8(n, (volatile UInt8 *)a)) |
| 124 | |
| 125 | #define atomic_or_8(a, n) \ |
| 126 | ((void) atomic_or_8_ov(a, n)) |
| 127 | |
| 128 | #define atomic_bitset_8(a, n) \ |
| 129 | atomic_or_8(a, n) |
| 130 | |
| 131 | #define atomic_or_16_ov(a, n) \ |
| 132 | ((u_int16_t) OSBitOrAtomic16(n, (volatile UInt16 *)a)) |
| 133 | |
| 134 | #define atomic_or_16(a, n) \ |
| 135 | ((void) atomic_or_16_ov(a, n)) |
| 136 | |
| 137 | #define atomic_bitset_16(a, n) \ |
| 138 | atomic_or_16(a, n) |
| 139 | |
| 140 | #define atomic_or_32_ov(a, n) \ |
| 141 | ((u_int32_t) OSBitOrAtomic(n, (volatile UInt32 *)a)) |
| 142 | |
| 143 | #define atomic_or_32(a, n) \ |
| 144 | ((void) atomic_or_32_ov(a, n)) |
| 145 | |
| 146 | #define atomic_bitset_32(a, n) \ |
| 147 | atomic_or_32(a, n) |
| 148 | |
| 149 | #define atomic_bitset_32_ov(a, n) \ |
| 150 | atomic_or_32_ov(a, n) |
| 151 | |
| 152 | #define atomic_and_8_ov(a, n) \ |
| 153 | ((u_int8_t) OSBitAndAtomic8(n, (volatile UInt8 *)a)) |
| 154 | |
| 155 | #define atomic_and_8(a, n) \ |
| 156 | ((void) atomic_and_8_ov(a, n)) |
| 157 | |
| 158 | #define atomic_bitclear_8(a, n) \ |
| 159 | atomic_and_8(a, ~(n)) |
| 160 | |
| 161 | #define atomic_and_16_ov(a, n) \ |
| 162 | ((u_int16_t) OSBitAndAtomic16(n, (volatile UInt16 *)a)) |
| 163 | |
| 164 | #define atomic_and_16(a, n) \ |
| 165 | ((void) atomic_and_16_ov(a, n)) |
| 166 | |
| 167 | #define atomic_bitclear_16(a, n) \ |
| 168 | atomic_and_16(a, ~(n)) |
| 169 | |
| 170 | #define atomic_and_32_ov(a, n) \ |
| 171 | ((u_int32_t) OSBitAndAtomic(n, (volatile UInt32 *)a)) |
| 172 | |
| 173 | #define atomic_and_32(a, n) \ |
| 174 | ((void) atomic_and_32_ov(a, n)) |
| 175 | |
| 176 | #define atomic_bitclear_32(a, n) \ |
| 177 | atomic_and_32(a, ~(n)) |
| 178 | |
| 179 | #define membar_sync OSMemoryBarrier |
| 180 | |
| 181 | /* |
| 182 | * Use CPU_CACHE_LINE_SIZE instead of MAX_CPU_CACHE_LINE_SIZE, unless |
| 183 | * wasting space is of no concern. |
| 184 | */ |
| 185 | #define MAX_CPU_CACHE_LINE_SIZE 128 |
| 186 | #define CPU_CACHE_LINE_SIZE mcache_cache_line_size() |
| 187 | |
| 188 | #ifndef IS_P2ALIGNED |
| 189 | #define IS_P2ALIGNED(v, a) \ |
| 190 | ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) |
| 191 | #endif /* IS_P2ALIGNED */ |
| 192 | |
| 193 | #ifndef P2ROUNDUP |
| 194 | #define P2ROUNDUP(x, align) \ |
| 195 | (-(-((uintptr_t)(x)) & -((uintptr_t)align))) |
| 196 | #endif /* P2ROUNDUP */ |
| 197 | |
| 198 | #ifndef P2ROUNDDOWN |
| 199 | #define P2ROUNDDOWN(x, align) \ |
| 200 | (((uintptr_t)(x)) & ~((uintptr_t)(align) - 1)) |
| 201 | #endif /* P2ROUNDDOWN */ |
| 202 | |
| 203 | #ifndef P2ALIGN |
| 204 | #define P2ALIGN(x, align) \ |
| 205 | ((uintptr_t)(x) & -((uintptr_t)(align))) |
| 206 | #endif /* P2ALIGN */ |
| 207 | |
| 208 | #define MCACHE_FREE_PATTERN 0xdeadbeefdeadbeefULL |
| 209 | #define MCACHE_UNINITIALIZED_PATTERN 0xbaddcafebaddcafeULL |
| 210 | |
| 211 | /* |
| 212 | * mcache allocation request flags. |
| 213 | * |
| 214 | * MCR_NOSLEEP and MCR_FAILOK are mutually exclusive. The latter is used |
| 215 | * by the mbuf allocator to handle the implementation of several caches that |
| 216 | * involve multiple layers of mcache. It implies a best effort blocking |
| 217 | * allocation request; if the request cannot be satisfied, the caller will |
| 218 | * be blocked until further notice, similar to MCR_SLEEP, except that upon |
| 219 | * a wake up it will return immediately to the caller regardless of whether |
| 220 | * the request can been fulfilled. |
| 221 | * |
| 222 | * MCR_TRYHARD implies a non-blocking allocation request, regardless of |
| 223 | * whether MCR_NOSLEEP is set. It informs the allocator that the request |
| 224 | * should not cause the calling thread to block, and that it must have |
| 225 | * exhausted all possible schemes to fulfill the request, including doing |
| 226 | * reclaims and/or purges, before returning to the caller. |
| 227 | * |
| 228 | * Regular mcache clients should only use MCR_SLEEP or MCR_NOSLEEP. |
| 229 | */ |
| 230 | #define MCR_SLEEP 0x0000 /* same as M_WAITOK */ |
| 231 | #define MCR_NOSLEEP 0x0001 /* same as M_NOWAIT */ |
| 232 | #define MCR_FAILOK 0x0100 /* private, for internal use only */ |
| 233 | #define MCR_TRYHARD 0x0200 /* private, for internal use only */ |
| 234 | #define MCR_USR1 0x1000 /* private, for internal use only */ |
| 235 | |
| 236 | #define MCR_NONBLOCKING (MCR_NOSLEEP | MCR_FAILOK | MCR_TRYHARD) |
| 237 | |
| 238 | /* |
| 239 | * Generic one-way linked list element structure. This is used to handle |
| 240 | * mcache_alloc_ext() requests in order to chain the allocated objects |
| 241 | * together before returning them to the caller. |
| 242 | */ |
| 243 | typedef struct mcache_obj { |
| 244 | struct mcache_obj *obj_next; |
| 245 | } mcache_obj_t; |
| 246 | |
| 247 | typedef struct mcache_bkt { |
| 248 | void *bkt_next; /* next bucket in list */ |
| 249 | void *bkt_obj[1]; /* one or more objects */ |
| 250 | } mcache_bkt_t; |
| 251 | |
| 252 | typedef struct mcache_bktlist { |
| 253 | mcache_bkt_t *bl_list; /* bucket list */ |
| 254 | u_int32_t bl_total; /* number of buckets */ |
| 255 | u_int32_t bl_min; /* min since last update */ |
| 256 | u_int32_t bl_reaplimit; /* max reapable buckets */ |
| 257 | u_int64_t bl_alloc; /* allocations from this list */ |
| 258 | } mcache_bktlist_t; |
| 259 | |
| 260 | typedef struct mcache_bkttype { |
| 261 | int bt_bktsize; /* bucket size (number of elements) */ |
| 262 | size_t bt_minbuf; /* all smaller buffers qualify */ |
| 263 | size_t bt_maxbuf; /* no larger bfufers qualify */ |
| 264 | struct mcache *bt_cache; /* bucket cache */ |
| 265 | } mcache_bkttype_t; |
| 266 | |
| 267 | typedef struct mcache_cpu { |
| 268 | decl_lck_mtx_data(, cc_lock); |
| 269 | mcache_bkt_t *cc_filled; /* the currently filled bucket */ |
| 270 | mcache_bkt_t *cc_pfilled; /* the previously filled bucket */ |
| 271 | u_int64_t cc_alloc; /* allocations from this cpu */ |
| 272 | u_int64_t cc_free; /* frees to this cpu */ |
| 273 | int cc_objs; /* number of objects in filled bkt */ |
| 274 | int cc_pobjs; /* number of objects in previous bkt */ |
| 275 | int cc_bktsize; /* number of elements in a full bkt */ |
| 276 | } __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE))) mcache_cpu_t; |
| 277 | |
| 278 | typedef unsigned int (*mcache_allocfn_t)(void *, mcache_obj_t ***, |
| 279 | unsigned int, int); |
| 280 | typedef void (*mcache_freefn_t)(void *, mcache_obj_t *, boolean_t); |
| 281 | typedef void (*mcache_auditfn_t)(void *, mcache_obj_t *, boolean_t); |
| 282 | typedef void (*mcache_logfn_t)(u_int32_t, mcache_obj_t *, boolean_t); |
| 283 | typedef void (*mcache_notifyfn_t)(void *, u_int32_t); |
| 284 | |
| 285 | typedef struct mcache { |
| 286 | /* |
| 287 | * Cache properties |
| 288 | */ |
| 289 | LIST_ENTRY(mcache) mc_list; /* cache linkage */ |
| 290 | char mc_name[32]; /* cache name */ |
| 291 | struct zone *mc_slab_zone; /* backend zone allocator */ |
| 292 | mcache_allocfn_t mc_slab_alloc; /* slab layer allocate callback */ |
| 293 | mcache_freefn_t mc_slab_free; /* slab layer free callback */ |
| 294 | mcache_auditfn_t mc_slab_audit; /* slab layer audit callback */ |
| 295 | mcache_logfn_t mc_slab_log; /* slab layer log callback */ |
| 296 | mcache_notifyfn_t mc_slab_notify; /* slab layer notify callback */ |
| 297 | void *mc_private; /* opaque arg to callbacks */ |
| 298 | size_t mc_bufsize; /* object size */ |
| 299 | size_t mc_align; /* object alignment */ |
| 300 | u_int32_t mc_flags; /* cache creation flags */ |
| 301 | u_int32_t mc_purge_cnt; /* # of purges requested by slab */ |
| 302 | u_int32_t mc_enable_cnt; /* # of reenables due to purges */ |
| 303 | u_int32_t mc_waiter_cnt; /* # of slab layer waiters */ |
| 304 | u_int32_t mc_wretry_cnt; /* # of wait retries */ |
| 305 | u_int32_t mc_nwretry_cnt; /* # of no-wait retry attempts */ |
| 306 | u_int32_t mc_nwfail_cnt; /* # of no-wait retries that failed */ |
| 307 | decl_lck_mtx_data(, mc_sync_lock); /* protects purges and reenables */ |
| 308 | lck_attr_t *mc_sync_lock_attr; |
| 309 | lck_grp_t *mc_sync_lock_grp; |
| 310 | lck_grp_attr_t *mc_sync_lock_grp_attr; |
| 311 | /* |
| 312 | * Keep CPU and buckets layers lock statistics separate. |
| 313 | */ |
| 314 | lck_attr_t *mc_cpu_lock_attr; |
| 315 | lck_grp_t *mc_cpu_lock_grp; |
| 316 | lck_grp_attr_t *mc_cpu_lock_grp_attr; |
| 317 | |
| 318 | /* |
| 319 | * Bucket layer common to all CPUs |
| 320 | */ |
| 321 | decl_lck_mtx_data(, mc_bkt_lock); |
| 322 | lck_attr_t *mc_bkt_lock_attr; |
| 323 | lck_grp_t *mc_bkt_lock_grp; |
| 324 | lck_grp_attr_t *mc_bkt_lock_grp_attr; |
| 325 | mcache_bkttype_t *cache_bkttype; /* bucket type */ |
| 326 | mcache_bktlist_t mc_full; /* full buckets */ |
| 327 | mcache_bktlist_t mc_empty; /* empty buckets */ |
| 328 | size_t mc_chunksize; /* bufsize + alignment */ |
| 329 | u_int32_t mc_bkt_contention; /* lock contention count */ |
| 330 | u_int32_t mc_bkt_contention_prev; /* previous snapshot */ |
| 331 | |
| 332 | /* |
| 333 | * Per-CPU layer, aligned at cache line boundary |
| 334 | */ |
| 335 | mcache_cpu_t mc_cpu[1] |
| 336 | __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE))); |
| 337 | } mcache_t; |
| 338 | |
| 339 | #define MCACHE_ALIGN 8 /* default guaranteed alignment */ |
| 340 | |
| 341 | /* Valid values for mc_flags */ |
| 342 | #define MCF_VERIFY 0x00000001 /* enable verification */ |
| 343 | #define MCF_TRACE 0x00000002 /* enable transaction auditing */ |
| 344 | #define MCF_NOCPUCACHE 0x00000010 /* disable CPU layer caching */ |
| 345 | #define MCF_NOLEAKLOG 0x00000100 /* disable leak logging */ |
| 346 | #define MCF_EXPLEAKLOG 0x00000200 /* expose leak info to user space */ |
| 347 | |
| 348 | #define MCF_DEBUG (MCF_VERIFY | MCF_TRACE) |
| 349 | #define MCF_FLAGS_MASK \ |
| 350 | (MCF_DEBUG | MCF_NOCPUCACHE | MCF_NOLEAKLOG | MCF_EXPLEAKLOG) |
| 351 | |
| 352 | /* Valid values for notify callback */ |
| 353 | #define MCN_RETRYALLOC 0x00000001 /* Allocation should be retried */ |
| 354 | |
| 355 | #define MCACHE_STACK_DEPTH 16 |
| 356 | |
| 357 | #define MCA_TRN_MAX 2 /* Number of transactions to record */ |
| 358 | |
| 359 | typedef struct mcache_audit { |
| 360 | struct mcache_audit *mca_next; /* next audit struct */ |
| 361 | void *mca_addr; /* address of buffer */ |
| 362 | mcache_t *mca_cache; /* parent cache of the buffer */ |
| 363 | size_t mca_contents_size; /* size of saved contents */ |
| 364 | void *mca_contents; /* user-specific saved contents */ |
| 365 | void *mca_uptr; /* user-specific pointer */ |
| 366 | uint32_t mca_uflags; /* user-specific flags */ |
| 367 | uint32_t mca_next_trn; |
| 368 | struct mca_trn { |
| 369 | struct thread *mca_thread; /* thread doing transaction */ |
| 370 | uint32_t mca_tstamp; |
| 371 | uint16_t mca_depth; |
| 372 | void *mca_stack[MCACHE_STACK_DEPTH]; |
| 373 | } mca_trns[MCA_TRN_MAX]; |
| 374 | } mcache_audit_t; |
| 375 | |
| 376 | __private_extern__ int assfail(const char *, const char *, int); |
| 377 | __private_extern__ void mcache_init(void); |
| 378 | __private_extern__ unsigned int mcache_getflags(void); |
| 379 | __private_extern__ unsigned int mcache_cache_line_size(void); |
| 380 | __private_extern__ mcache_t *mcache_create(const char *, size_t, |
| 381 | size_t, u_int32_t, int); |
| 382 | __private_extern__ void *mcache_alloc(mcache_t *, int); |
| 383 | __private_extern__ void mcache_free(mcache_t *, void *); |
| 384 | __private_extern__ mcache_t *mcache_create_ext(const char *, size_t, |
| 385 | mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t, |
| 386 | mcache_notifyfn_t, void *, u_int32_t, int); |
| 387 | __private_extern__ void mcache_destroy(mcache_t *); |
| 388 | __private_extern__ unsigned int mcache_alloc_ext(mcache_t *, mcache_obj_t **, |
| 389 | unsigned int, int); |
| 390 | __private_extern__ void mcache_free_ext(mcache_t *, mcache_obj_t *); |
| 391 | __private_extern__ void mcache_reap(void); |
| 392 | __private_extern__ void mcache_reap_now(mcache_t *, boolean_t); |
| 393 | __private_extern__ boolean_t mcache_purge_cache(mcache_t *, boolean_t); |
| 394 | __private_extern__ void mcache_waiter_inc(mcache_t *); |
| 395 | __private_extern__ void mcache_waiter_dec(mcache_t *); |
| 396 | __private_extern__ boolean_t mcache_bkt_isempty(mcache_t *); |
| 397 | |
| 398 | __private_extern__ void mcache_buffer_log(mcache_audit_t *, void *, mcache_t *, |
| 399 | struct timeval *); |
| 400 | __private_extern__ void mcache_set_pattern(u_int64_t, void *, size_t); |
| 401 | __private_extern__ void *mcache_verify_pattern(u_int64_t, void *, size_t); |
| 402 | __private_extern__ void *mcache_verify_set_pattern(u_int64_t, u_int64_t, |
| 403 | void *, size_t); |
| 404 | __private_extern__ void mcache_audit_free_verify(mcache_audit_t *, |
| 405 | void *, size_t, size_t); |
| 406 | __private_extern__ void mcache_audit_free_verify_set(mcache_audit_t *, |
| 407 | void *, size_t, size_t); |
| 408 | __private_extern__ char *mcache_dump_mca(mcache_audit_t *); |
| 409 | __private_extern__ void mcache_audit_panic(mcache_audit_t *, void *, size_t, |
| 410 | int64_t, int64_t); |
| 411 | |
| 412 | extern int32_t total_sbmb_cnt; |
| 413 | extern int32_t total_sbmb_cnt_floor; |
| 414 | extern int32_t total_sbmb_cnt_peak; |
| 415 | extern int64_t sbmb_limreached; |
| 416 | extern mcache_t *mcache_audit_cache; |
| 417 | |
| 418 | #ifdef __cplusplus |
| 419 | } |
| 420 | #endif |
| 421 | |
| 422 | #endif /* KERNEL_PRIVATE */ |
| 423 | |
| 424 | #endif /* _SYS_MCACHE_H */ |
| 425 | |