| 1 | /* |
| 2 | * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * Copyright (c) 1999,2000 Jonathan Lemon <jlemon@FreeBSD.org> |
| 30 | * All rights reserved. |
| 31 | * |
| 32 | * Redistribution and use in source and binary forms, with or without |
| 33 | * modification, are permitted provided that the following conditions |
| 34 | * are met: |
| 35 | * 1. Redistributions of source code must retain the above copyright |
| 36 | * notice, this list of conditions and the following disclaimer. |
| 37 | * 2. Redistributions in binary form must reproduce the above copyright |
| 38 | * notice, this list of conditions and the following disclaimer in the |
| 39 | * documentation and/or other materials provided with the distribution. |
| 40 | * |
| 41 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 51 | * SUCH DAMAGE. |
| 52 | * |
| 53 | * $FreeBSD: src/sys/sys/eventvar.h,v 1.1.2.2 2000/07/18 21:49:12 jlemon Exp $ |
| 54 | */ |
| 55 | |
| 56 | #ifndef _SYS_EVENTVAR_H_ |
| 57 | #define _SYS_EVENTVAR_H_ |
| 58 | |
| 59 | #include <sys/event.h> |
| 60 | #include <sys/select.h> |
| 61 | #include <kern/kern_types.h> |
| 62 | #include <kern/waitq.h> |
| 63 | |
| 64 | #if defined(XNU_KERNEL_PRIVATE) |
| 65 | |
| 66 | typedef int (*kevent_callback_t)(struct kqueue *, struct kevent_internal_s *, void *); |
| 67 | typedef void (*kqueue_continue_t)(struct kqueue *, void *, int); |
| 68 | |
| 69 | #include <stdint.h> |
| 70 | #include <kern/locks.h> |
| 71 | #include <mach/thread_policy.h> |
| 72 | #include <pthread/workqueue_internal.h> |
| 73 | |
| 74 | /* |
| 75 | * Lock ordering: |
| 76 | * |
| 77 | * The kqueue locking order can follow a few different patterns: |
| 78 | * |
| 79 | * Standard file-based kqueues (from above): |
| 80 | * proc fd lock -> kq lock -> kq-waitq-set lock -> thread lock |
| 81 | * |
| 82 | * WorkQ/WorkLoop kqueues (from above): |
| 83 | * proc fd lock -> kq lock -> kq-request lock -> pthread kext locks -> thread lock |
| 84 | * |
| 85 | * Whenever kqueues interact with source locks, it drops all of its own |
| 86 | * locks in exchange for a use-reference on the knote used to synchronize |
| 87 | * with the source code. When those sources post events from below, they |
| 88 | * have the following lock hierarchy. |
| 89 | * |
| 90 | * Standard file-based kqueues (from below): |
| 91 | * XXX lock -> kq lock -> kq-waitq-set lock -> thread lock |
| 92 | * Standard file-based kqueues with non-kq-aware sources (from below): |
| 93 | * XXX lock -> kq-waitq-set lock -> thread lock |
| 94 | * |
| 95 | * WorkQ/WorkLoop kqueues (from below): |
| 96 | * XXX lock -> kq lock -> kq-request lock -> pthread kext locks -> thread lock |
| 97 | * WorkQ/WorkLoop kqueues with non-kq-aware sources (from below): |
| 98 | * XXX -> kq-waitq-set lock -> kq-request lock -> pthread kext locks -> thread lock |
| 99 | */ |
| 100 | |
| 101 | #define KQEXTENT 256 /* linear growth by this amount */ |
| 102 | |
| 103 | struct knote_lock_ctx { |
| 104 | struct knote *knlc_knote; |
| 105 | thread_t knlc_thread; |
| 106 | // TODO: knlc_turnstile |
| 107 | TAILQ_HEAD(, knote_lock_ctx) knlc_head; |
| 108 | union { |
| 109 | LIST_ENTRY(knote_lock_ctx) knlc_le; |
| 110 | TAILQ_ENTRY(knote_lock_ctx) knlc_tqe; |
| 111 | }; |
| 112 | #if DEBUG || DEVELOPMENT |
| 113 | #define KNOTE_LOCK_CTX_UNLOCKED 0 |
| 114 | #define KNOTE_LOCK_CTX_LOCKED 1 |
| 115 | #define KNOTE_LOCK_CTX_WAITING 2 |
| 116 | int knlc_state; |
| 117 | #endif |
| 118 | }; |
| 119 | LIST_HEAD(knote_locks, knote_lock_ctx); |
| 120 | |
| 121 | #if DEBUG || DEVELOPMENT |
| 122 | /* |
| 123 | * KNOTE_LOCK_CTX(name) is a convenience macro to define a knote lock context on |
| 124 | * the stack named `name`. In development kernels, it uses tricks to make sure |
| 125 | * not locks was still held when exiting the C-scope that contains this context. |
| 126 | */ |
| 127 | __attribute__((noinline,not_tail_called)) |
| 128 | void knote_lock_ctx_chk(struct knote_lock_ctx *ctx); |
| 129 | #define KNOTE_LOCK_CTX(n) \ |
| 130 | struct knote_lock_ctx n __attribute__((cleanup(knote_lock_ctx_chk))); \ |
| 131 | n.knlc_state = KNOTE_LOCK_CTX_UNLOCKED |
| 132 | #else |
| 133 | #define KNOTE_LOCK_CTX(n) \ |
| 134 | struct knote_lock_ctx n |
| 135 | #endif |
| 136 | |
| 137 | /* |
| 138 | * kqueue - common core definition of a kqueue |
| 139 | * |
| 140 | * No real structures are allocated of this type. They are |
| 141 | * either kqfile objects or kqworkq objects - each of which is |
| 142 | * derived from this definition. |
| 143 | */ |
| 144 | struct kqueue { |
| 145 | struct { |
| 146 | struct waitq_set kq_wqs; /* private waitq set */ |
| 147 | lck_spin_t kq_lock; /* kqueue lock */ |
| 148 | uint16_t kq_state; /* state of the kq */ |
| 149 | uint16_t kq_level; /* nesting level of the kq */ |
| 150 | uint32_t kq_count; /* number of queued events */ |
| 151 | struct proc *kq_p; /* process containing kqueue */ |
| 152 | struct knote_locks kq_knlocks; /* list of knote locks held */ |
| 153 | lck_spin_t kq_reqlock; /* kqueue request lock */ |
| 154 | }; /* make sure struct padding is put before kq_queue */ |
| 155 | struct kqtailq kq_queue[0]; /* variable array of queues */ |
| 156 | }; |
| 157 | |
| 158 | #define KQ_SEL 0x001 /* select was recorded for kq */ |
| 159 | #define KQ_SLEEP 0x002 /* thread is waiting for events */ |
| 160 | #define KQ_PROCWAIT 0x004 /* thread waiting for processing */ |
| 161 | #define KQ_KEV32 0x008 /* kq is used with 32-bit events */ |
| 162 | #define KQ_KEV64 0x010 /* kq is used with 64-bit events */ |
| 163 | #define KQ_KEV_QOS 0x020 /* kq events carry QoS info */ |
| 164 | #define KQ_WORKQ 0x040 /* KQ is bound to process workq */ |
| 165 | #define KQ_WORKLOOP 0x080 /* KQ is part of a workloop */ |
| 166 | #define KQ_PROCESSING 0x100 /* KQ is being processed */ |
| 167 | #define KQ_DRAIN 0x200 /* kq is draining */ |
| 168 | #define KQ_WAKEUP 0x400 /* kq awakened while processing */ |
| 169 | #define KQ_DYNAMIC 0x800 /* kqueue is dynamically managed */ |
| 170 | /* |
| 171 | * kqfile - definition of a typical kqueue opened as a file descriptor |
| 172 | * via the kqueue() system call. |
| 173 | * |
| 174 | * Adds selinfo support to the base kqueue definition, as these |
| 175 | * fds can be fed into select(). |
| 176 | */ |
| 177 | struct kqfile { |
| 178 | struct kqueue kqf_kqueue; /* common kqueue core */ |
| 179 | struct kqtailq kqf_queue; /* queue of woken up knotes */ |
| 180 | struct kqtailq kqf_suppressed; /* suppression queue */ |
| 181 | struct selinfo kqf_sel; /* parent select/kqueue info */ |
| 182 | }; |
| 183 | |
| 184 | #define kqf_wqs kqf_kqueue.kq_wqs |
| 185 | #define kqf_lock kqf_kqueue.kq_lock |
| 186 | #define kqf_state kqf_kqueue.kq_state |
| 187 | #define kqf_level kqf_kqueue.kq_level |
| 188 | #define kqf_count kqf_kqueue.kq_count |
| 189 | #define kqf_p kqf_kqueue.kq_p |
| 190 | |
| 191 | #define QOS_INDEX_KQFILE 0 /* number of qos levels in a file kq */ |
| 192 | |
| 193 | /* |
| 194 | * kqrequest - per-QoS thread request status |
| 195 | */ |
| 196 | struct kqrequest { |
| 197 | struct workq_threadreq_s kqr_req; /* used when request oustanding */ |
| 198 | struct kqtailq kqr_suppressed; /* Per-QoS suppression queues */ |
| 199 | thread_t kqr_thread; /* thread to satisfy request */ |
| 200 | uint8_t kqr_state; /* KQ/workq interaction state */ |
| 201 | #define KQWL_STAYACTIVE_FIRED_BIT (1 << 0) |
| 202 | uint8_t kqr_wakeup_indexes; /* QoS/override levels that woke */ |
| 203 | uint16_t kqr_dsync_waiters; /* number of dispatch sync waiters */ |
| 204 | kq_index_t kqr_stayactive_qos; /* max QoS of statyactive knotes */ |
| 205 | kq_index_t kqr_override_index; /* highest wakeup override index */ |
| 206 | kq_index_t kqr_qos_index; /* QoS for the thread request */ |
| 207 | }; |
| 208 | |
| 209 | |
| 210 | #define KQR_WORKLOOP 0x01 /* owner is a workloop */ |
| 211 | #define KQR_THREQUESTED 0x02 /* thread has been requested from workq */ |
| 212 | #define KQR_WAKEUP 0x04 /* wakeup called during processing */ |
| 213 | #define KQR_THOVERCOMMIT 0x08 /* overcommit needed for thread requests */ |
| 214 | #define KQR_R2K_NOTIF_ARMED 0x10 /* ast notifications armed */ |
| 215 | #define KQR_ALLOCATED_TURNSTILE 0x20 /* kqwl_turnstile is allocated */ |
| 216 | |
| 217 | /* |
| 218 | * WorkQ kqueues need to request threads to service the triggered |
| 219 | * knotes in the queue. These threads are brought up on a |
| 220 | * effective-requested-QoS basis. Knotes are segregated based on |
| 221 | * that value - calculated by computing max(event-QoS, kevent-QoS). |
| 222 | * Only one servicing thread is requested at a time for all the |
| 223 | * knotes at a given effective-requested-QoS. |
| 224 | */ |
| 225 | |
| 226 | #if !defined(KQWQ_QOS_MANAGER) |
| 227 | #define KQWQ_QOS_MANAGER (THREAD_QOS_LAST) |
| 228 | #endif |
| 229 | |
| 230 | #if !defined(KQWQ_NBUCKETS) |
| 231 | #define KQWQ_NBUCKETS (KQWQ_QOS_MANAGER + 1) |
| 232 | #endif |
| 233 | |
| 234 | /* |
| 235 | * kqworkq - definition of a private kqueue used to coordinate event |
| 236 | * handling for pthread work queues. |
| 237 | * |
| 238 | * These have per-qos processing queues and state to coordinate with |
| 239 | * the pthread kext to ask for threads at corresponding pthread priority |
| 240 | * values. |
| 241 | */ |
| 242 | struct kqworkq { |
| 243 | struct kqueue kqwq_kqueue; |
| 244 | struct kqtailq kqwq_queue[KQWQ_NBUCKETS]; /* array of queues */ |
| 245 | struct kqrequest kqwq_request[KQWQ_NBUCKETS]; /* per-QoS request states */ |
| 246 | }; |
| 247 | |
| 248 | #define kqwq_wqs kqwq_kqueue.kq_wqs |
| 249 | #define kqwq_lock kqwq_kqueue.kq_lock |
| 250 | #define kqwq_state kqwq_kqueue.kq_state |
| 251 | #define kqwq_level kqwq_kqueue.kq_level |
| 252 | #define kqwq_count kqwq_kqueue.kq_count |
| 253 | #define kqwq_p kqwq_kqueue.kq_p |
| 254 | |
| 255 | /* |
| 256 | * WorkLoop kqueues need to request a thread to service the triggered |
| 257 | * knotes in the queue. The thread is brought up on a |
| 258 | * effective-requested-QoS basis. Knotes are segregated based on |
| 259 | * that value. Once a request is made, it cannot be undone. If |
| 260 | * events with higher QoS arrive after, they are stored in their |
| 261 | * own queues and an override applied to the original request based |
| 262 | * on the delta between the two QoS values. |
| 263 | */ |
| 264 | |
| 265 | /* |
| 266 | * "Stay-active" knotes are held in a separate bucket that indicates |
| 267 | * special handling required. They are kept separate because the |
| 268 | * wakeups issued to them don't have context to tell us where to go |
| 269 | * to find and process them. All processing of them happens at the |
| 270 | * highest QoS. Unlike WorkQ kqueues, there is no special singular |
| 271 | * "manager thread" for a process. We simply request a servicing |
| 272 | * thread at the higest known QoS when these are woken (or override |
| 273 | * an existing request to that). |
| 274 | */ |
| 275 | #define KQWL_BUCKET_STAYACTIVE (THREAD_QOS_LAST) |
| 276 | |
| 277 | #if !defined(KQWL_NBUCKETS) |
| 278 | #define KQWL_NBUCKETS (KQWL_BUCKET_STAYACTIVE + 1) |
| 279 | #endif |
| 280 | |
| 281 | /* |
| 282 | * kqworkloop - definition of a private kqueue used to coordinate event |
| 283 | * handling for pthread workloops. |
| 284 | * |
| 285 | * Workloops vary from workqs in that only a single thread is ever |
| 286 | * requested to service a workloop at a time. But unlike workqs, |
| 287 | * workloops may be "owned" by user-space threads that are |
| 288 | * synchronously draining an event off the workloop. In those cases, |
| 289 | * any overrides have to be applied to the owner until it relinqueshes |
| 290 | * ownership. |
| 291 | * |
| 292 | * NOTE: "lane" support is TBD. |
| 293 | */ |
| 294 | struct kqworkloop { |
| 295 | struct kqueue kqwl_kqueue; /* queue of events */ |
| 296 | struct kqtailq kqwl_queue[KQWL_NBUCKETS]; /* array of queues */ |
| 297 | struct kqrequest kqwl_request; /* thread request state */ |
| 298 | lck_mtx_t kqwl_statelock; /* state/debounce lock */ |
| 299 | thread_t kqwl_owner; /* current [sync] owner thread */ |
| 300 | uint32_t kqwl_retains; /* retain references */ |
| 301 | kqueue_id_t kqwl_dynamicid; /* dynamic identity */ |
| 302 | uint64_t kqwl_params; /* additional parameters */ |
| 303 | struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */ |
| 304 | SLIST_ENTRY(kqworkloop) kqwl_hashlink; /* linkage for search list */ |
| 305 | #if CONFIG_WORKLOOP_DEBUG |
| 306 | #define KQWL_HISTORY_COUNT 32 |
| 307 | #define KQWL_HISTORY_WRITE_ENTRY(kqwl, ...) ({ \ |
| 308 | struct kqworkloop *__kqwl = (kqwl); \ |
| 309 | unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_index, relaxed); \ |
| 310 | __kqwl->kqwl_history[__index % KQWL_HISTORY_COUNT] = \ |
| 311 | (struct kqwl_history)__VA_ARGS__; \ |
| 312 | }) |
| 313 | struct kqwl_history { |
| 314 | thread_t updater; /* Note: updates can be reordered */ |
| 315 | thread_t servicer; |
| 316 | thread_t old_owner; |
| 317 | thread_t new_owner; |
| 318 | |
| 319 | uint64_t kev_ident; |
| 320 | int16_t error; |
| 321 | uint16_t kev_flags; |
| 322 | uint32_t kev_fflags; |
| 323 | |
| 324 | uint64_t kev_mask; |
| 325 | uint64_t kev_value; |
| 326 | uint64_t in_value; |
| 327 | } kqwl_history[KQWL_HISTORY_COUNT]; |
| 328 | unsigned int kqwl_index; |
| 329 | #endif // CONFIG_WORKLOOP_DEBUG |
| 330 | }; |
| 331 | |
| 332 | typedef union { |
| 333 | struct kqueue *kq; |
| 334 | struct kqworkq *kqwq; |
| 335 | struct kqfile *kqf; |
| 336 | struct kqworkloop *kqwl; |
| 337 | } __attribute__((transparent_union)) kqueue_t; |
| 338 | |
| 339 | SLIST_HEAD(kqlist, kqworkloop); |
| 340 | |
| 341 | #define kqwl_wqs kqwl_kqueue.kq_wqs |
| 342 | #define kqwl_lock kqwl_kqueue.kq_lock |
| 343 | #define kqwl_state kqwl_kqueue.kq_state |
| 344 | #define kqwl_level kqwl_kqueue.kq_level |
| 345 | #define kqwl_count kqwl_kqueue.kq_count |
| 346 | #define kqwl_p kqwl_kqueue.kq_p |
| 347 | |
| 348 | #define KQ_WORKLOOP_RETAINS_MAX UINT32_MAX |
| 349 | |
| 350 | extern void kqueue_threadreq_unbind(struct proc *p, struct kqrequest *kqr); |
| 351 | |
| 352 | // called with the kq req held |
| 353 | #define KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE 0x1 |
| 354 | extern void kqueue_threadreq_bind(struct proc *p, workq_threadreq_t req, |
| 355 | thread_t thread, unsigned int flags); |
| 356 | |
| 357 | // called with the wq lock held |
| 358 | extern void kqueue_threadreq_bind_prepost(struct proc *p, workq_threadreq_t req, thread_t thread); |
| 359 | |
| 360 | // called with no lock held |
| 361 | extern void kqueue_threadreq_bind_commit(struct proc *p, thread_t thread); |
| 362 | |
| 363 | extern void kqueue_threadreq_cancel(struct proc *p, workq_threadreq_t req); |
| 364 | |
| 365 | // lock not held as kqwl_params is immutable after creation |
| 366 | extern workq_threadreq_param_t kqueue_threadreq_workloop_param(workq_threadreq_t req); |
| 367 | |
| 368 | extern struct kqueue *kqueue_alloc(struct proc *, unsigned int); |
| 369 | extern void kqueue_dealloc(struct kqueue *); |
| 370 | |
| 371 | extern void knotes_dealloc(struct proc *); |
| 372 | extern void kqworkloops_dealloc(struct proc *); |
| 373 | |
| 374 | extern int kevent_register(struct kqueue *, struct kevent_internal_s *, |
| 375 | struct knote_lock_ctx *); |
| 376 | extern int kqueue_scan(struct kqueue *, kevent_callback_t, kqueue_continue_t, |
| 377 | void *, struct filt_process_s *, struct timeval *, struct proc *); |
| 378 | extern int kqueue_stat(struct kqueue *, void *, int, proc_t); |
| 379 | |
| 380 | #endif /* XNU_KERNEL_PRIVATE */ |
| 381 | |
| 382 | #endif /* !_SYS_EVENTVAR_H_ */ |
| 383 | |