timer_call.c source code [codebrowser/osfmk/kern/timer_call.c]

1	/*
2	* Copyright (c) 1993-2008 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Timer interrupt callout module.
30	*/
31
32	#include <mach/mach_types.h>
33
34	#include <kern/clock.h>
35	#include <kern/smp.h>
36	#include <kern/processor.h>
37	#include <kern/timer_call.h>
38	#include <kern/timer_queue.h>
39	#include <kern/call_entry.h>
40	#include <kern/thread.h>
41	#include <kern/policy_internal.h>
42
43	#include <sys/kdebug.h>
44
45	#if CONFIG_DTRACE
46	#include <mach/sdt.h>
47	#endif
48
49
50	#if DEBUG
51	#define TIMER_ASSERT 1
52	#endif
53
54	//#define TIMER_ASSERT 1
55	//#define TIMER_DBG 1
56
57	#if TIMER_DBG
58	#define DBG(x...) kprintf("DBG: " x);
59	#else
60	#define DBG(x...)
61	#endif
62
63	#if TIMER_TRACE
64	#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
65	#else
66	#define TIMER_KDEBUG_TRACE(x...)
67	#endif
68
69
70	lck_grp_t timer_call_lck_grp;
71	lck_attr_t timer_call_lck_attr;
72	lck_grp_attr_t timer_call_lck_grp_attr;
73
74	lck_grp_t timer_longterm_lck_grp;
75	lck_attr_t timer_longterm_lck_attr;
76	lck_grp_attr_t timer_longterm_lck_grp_attr;
77
78	/ Timer queue lock must be acquired with interrupts disabled (under splclock()) /
79	#if __SMP__
80	#define timer_queue_lock_spin(queue) \
81	lck_mtx_lock_spin_always(&queue->lock_data)
82
83	#define timer_queue_unlock(queue) \
84	lck_mtx_unlock_always(&queue->lock_data)
85	#else
86	#define timer_queue_lock_spin(queue) (void)1
87	#define timer_queue_unlock(queue) (void)1
88	#endif
89
90	#define QUEUE(x) ((queue_t)(x))
91	#define MPQUEUE(x) ((mpqueue_head_t *)(x))
92	#define TIMER_CALL(x) ((timer_call_t)(x))
93	#define TCE(x) (&(x->call_entry))
94	/*
95	* The longterm timer object is a global structure holding all timers
96	* beyond the short-term, local timer queue threshold. The boot processor
97	* is responsible for moving each timer to its local timer queue
98	* if and when that timer becomes due within the threshold.
99	*/
100
101	/ Sentinel for "no time set": /
102	#define TIMER_LONGTERM_NONE EndOfAllTime
103	/ The default threadhold is the delta above which a timer is "long-term" /
104	#if defined(__x86_64__)
105	#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */
106	#else
107	#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */
108	#endif
109
110	/*
111	* The scan_limit throttles processing of the longterm queue.
112	* If the scan time exceeds this limit, we terminate, unlock
113	* and defer for scan_interval. This prevents unbounded holding of
114	* timer queue locks with interrupts masked.
115	*/
116	#define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */
117	#define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */
118	/ Sentinel for "scan limit exceeded": /
119	#define TIMER_LONGTERM_SCAN_AGAIN 0
120
121	typedef struct {
122	uint64_t interval; / longterm timer interval /
123	uint64_t margin; / fudge factor (10% of interval /
124	uint64_t deadline; / first/soonest longterm deadline /
125	uint64_t preempted; / sooner timer has pre-empted /
126	timer_call_t call; / first/soonest longterm timer call /
127	uint64_t deadline_set; / next timer set /
128	timer_call_data_t timer; / timer used by threshold management /
129	/ Stats: /
130	uint64_t scans; / num threshold timer scans /
131	uint64_t preempts; / num threshold reductions /
132	uint64_t latency; / average threshold latency /
133	uint64_t latency_min; / minimum threshold latency /
134	uint64_t latency_max; / maximum threshold latency /
135	} threshold_t;
136
137	typedef struct {
138	mpqueue_head_t queue; / longterm timer list /
139	uint64_t enqueues; / num timers queued /
140	uint64_t dequeues; / num timers dequeued /
141	uint64_t escalates; / num timers becoming shortterm /
142	uint64_t scan_time; / last time the list was scanned /
143	threshold_t threshold; / longterm timer threshold /
144	uint64_t scan_limit; / maximum scan time /
145	uint64_t scan_interval; / interval between LT "escalation" scans /
146	uint64_t scan_pauses; / num scans exceeding time limit /
147	} timer_longterm_t;
148
149	timer_longterm_t timer_longterm = {
150	.scan_limit = TIMER_LONGTERM_SCAN_LIMIT,
151	.scan_interval = TIMER_LONGTERM_SCAN_INTERVAL,
152	};
153
154	static mpqueue_head_t *timer_longterm_queue = NULL;
155
156	static void timer_longterm_init(void);
157	static void timer_longterm_callout(
158	timer_call_param_t p0,
159	timer_call_param_t p1);
160	extern void timer_longterm_scan(
161	timer_longterm_t *tlp,
162	uint64_t now);
163	static void timer_longterm_update(
164	timer_longterm_t *tlp);
165	static void timer_longterm_update_locked(
166	timer_longterm_t *tlp);
167	static mpqueue_head_t * timer_longterm_enqueue_unlocked(
168	timer_call_t call,
169	uint64_t now,
170	uint64_t deadline,
171	mpqueue_head_t ** old_queue,
172	uint64_t soft_deadline,
173	uint64_t ttd,
174	timer_call_param_t param1,
175	uint32_t callout_flags);
176	static void timer_longterm_dequeued_locked(
177	timer_call_t call);
178
179	uint64_t past_deadline_timers;
180	uint64_t past_deadline_deltas;
181	uint64_t past_deadline_longest;
182	uint64_t past_deadline_shortest = ~`0ULL`;
183	enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = `10` * `1000`};
184
185	uint64_t past_deadline_timer_adjustment;
186
187	static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited);
188	boolean_t mach_timer_coalescing_enabled = TRUE;
189
190	mpqueue_head_t *timer_call_enqueue_deadline_unlocked(
191	timer_call_t call,
192	mpqueue_head_t *queue,
193	uint64_t deadline,
194	uint64_t soft_deadline,
195	uint64_t ttd,
196	timer_call_param_t param1,
197	uint32_t flags);
198
199	mpqueue_head_t *timer_call_dequeue_unlocked(
200	timer_call_t call);
201
202	timer_coalescing_priority_params_t tcoal_prio_params;
203
204	#if TCOAL_PRIO_STATS
205	int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
206	#define TCOAL_PRIO_STAT(x) (x++)
207	#else
208	#define TCOAL_PRIO_STAT(x)
209	#endif
210
211	static void
212	timer_call_init_abstime(void)
213	{
214	int i;
215	uint64_t result;
216	timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params();
217	nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
218	nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result);
219	tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result;
220	nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result);
221	tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result;
222	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result);
223	tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result;
224	tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift;
225	tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift;
226	tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift;
227	tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift;
228	tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift;
229
230	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max,
231	&tcoal_prio_params.timer_coalesce_rt_abstime_max);
232	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max,
233	&tcoal_prio_params.timer_coalesce_bg_abstime_max);
234	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max,
235	&tcoal_prio_params.timer_coalesce_kt_abstime_max);
236	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max,
237	&tcoal_prio_params.timer_coalesce_fp_abstime_max);
238	nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max,
239	&tcoal_prio_params.timer_coalesce_ts_abstime_max);
240
241	for (i = `0`; i < NUM_LATENCY_QOS_TIERS; i++) {
242	tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i];
243	nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i],
244	&tcoal_prio_params.latency_qos_abstime_max[i]);
245	tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i];
246	}
247	}
248
249
250	void
251	timer_call_init(void)
252	{
253	lck_attr_setdefault(&timer_call_lck_attr);
254	lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
255	lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
256
257	timer_longterm_init();
258	timer_call_init_abstime();
259	}
260
261
262	void
263	timer_call_queue_init(mpqueue_head_t *queue)
264	{
265	DBG("timer_call_queue_init(%p)\n", queue);
266	mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr);
267	}
268
269
270	void
271	timer_call_setup(
272	timer_call_t call,
273	timer_call_func_t func,
274	timer_call_param_t param0)
275	{
276	DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
277	call_entry_setup(TCE(call), func, param0);
278	simple_lock_init(&(call)->lock, `0`);
279	call->async_dequeue = FALSE;
280	}
281	#if TIMER_ASSERT
282	static __inline__ mpqueue_head_t *
283	timer_call_entry_dequeue(
284	timer_call_t entry)
285	{
286	mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
287
288	if (!hw_lock_held((hw_lock_t)&entry->lock))
289	panic("_call_entry_dequeue() "
290	"entry %p is not locked\n", entry);
291	/*
292	* XXX The queue lock is actually a mutex in spin mode
293	* but there's no way to test for it being held
294	* so we pretend it's a spinlock!
295	*/
296	if (!hw_lock_held((hw_lock_t)&old_queue->lock_data))
297	panic("_call_entry_dequeue() "
298	"queue %p is not locked\n", old_queue);
299
300	call_entry_dequeue(TCE(entry));
301	old_queue->count--;
302
303	return (old_queue);
304	}
305
306	static __inline__ mpqueue_head_t *
307	timer_call_entry_enqueue_deadline(
308	timer_call_t entry,
309	mpqueue_head_t *queue,
310	uint64_t deadline)
311	{
312	mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
313
314	if (!hw_lock_held((hw_lock_t)&entry->lock))
315	panic("_call_entry_enqueue_deadline() "
316	"entry %p is not locked\n", entry);
317	/ XXX More lock pretense: /
318	if (!hw_lock_held((hw_lock_t)&queue->lock_data))
319	panic("_call_entry_enqueue_deadline() "
320	"queue %p is not locked\n", queue);
321	if (old_queue != NULL && old_queue != queue)
322	panic("_call_entry_enqueue_deadline() "
323	"old_queue %p != queue", old_queue);
324
325	call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
326
327	/ For efficiency, track the earliest soft deadline on the queue, so that*
328	* fuzzy decisions can be made without lock acquisitions.
329	*/
330	timer_call_t thead = (timer_call_t)queue_first(&queue->head);
331
332	queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
333
334	if (old_queue)
335	old_queue->count--;
336	queue->count++;
337
338	return (old_queue);
339	}
340
341	#else
342
343	static __inline__ mpqueue_head_t *
344	timer_call_entry_dequeue(
345	timer_call_t entry)
346	{
347	mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
348
349	call_entry_dequeue(TCE(entry));
350	old_queue->count--;
351
352	return old_queue;
353	}
354
355	static __inline__ mpqueue_head_t *
356	timer_call_entry_enqueue_deadline(
357	timer_call_t entry,
358	mpqueue_head_t *queue,
359	uint64_t deadline)
360	{
361	mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
362
363	call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
364
365	/ For efficiency, track the earliest soft deadline on the queue,*
366	* so that fuzzy decisions can be made without lock acquisitions.
367	*/
368
369	timer_call_t thead = (timer_call_t)queue_first(&queue->head);
370	queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
371
372	if (old_queue)
373	old_queue->count--;
374	queue->count++;
375
376	return old_queue;
377	}
378
379	#endif
380
381	static __inline__ void
382	timer_call_entry_enqueue_tail(
383	timer_call_t entry,
384	mpqueue_head_t *queue)
385	{
386	call_entry_enqueue_tail(TCE(entry), QUEUE(queue));
387	queue->count++;
388	return;
389	}
390
391	/*
392	* Remove timer entry from its queue but don't change the queue pointer
393	* and set the async_dequeue flag. This is locking case 2b.
394	*/
395	static __inline__ void
396	timer_call_entry_dequeue_async(
397	timer_call_t entry)
398	{
399	mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
400	if (old_queue) {
401	old_queue->count--;
402	(void) remque(qe(entry));
403	entry->async_dequeue = TRUE;
404	}
405	return;
406	}
407
408	#if TIMER_ASSERT
409	unsigned timer_call_enqueue_deadline_unlocked_async1;
410	unsigned timer_call_enqueue_deadline_unlocked_async2;
411	#endif
412	/*
413	* Assumes call_entry and queues unlocked, interrupts disabled.
414	*/
415	__inline__ mpqueue_head_t *
416	timer_call_enqueue_deadline_unlocked(
417	timer_call_t call,
418	mpqueue_head_t *queue,
419	uint64_t deadline,
420	uint64_t soft_deadline,
421	uint64_t ttd,
422	timer_call_param_t param1,
423	uint32_t callout_flags)
424	{
425	call_entry_t entry = TCE(call);
426	mpqueue_head_t *old_queue;
427
428	DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
429
430	simple_lock(&call->lock);
431
432	old_queue = MPQUEUE(entry->queue);
433
434	if (old_queue != NULL) {
435	timer_queue_lock_spin(old_queue);
436	if (call->async_dequeue) {
437	/ collision (1c): timer already dequeued, clear flag /
438	#if TIMER_ASSERT
439	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
440	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
441	VM_KERNEL_UNSLIDE_OR_PERM(call),
442	call->async_dequeue,
443	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
444	`0x1c`, `0`);
445	timer_call_enqueue_deadline_unlocked_async1++;
446	#endif
447	call->async_dequeue = FALSE;
448	entry->queue = NULL;
449	} else if (old_queue != queue) {
450	timer_call_entry_dequeue(call);
451	#if TIMER_ASSERT
452	timer_call_enqueue_deadline_unlocked_async2++;
453	#endif
454	}
455	if (old_queue == timer_longterm_queue)
456	timer_longterm_dequeued_locked(call);
457	if (old_queue != queue) {
458	timer_queue_unlock(old_queue);
459	timer_queue_lock_spin(queue);
460	}
461	} else {
462	timer_queue_lock_spin(queue);
463	}
464
465	call->soft_deadline = soft_deadline;
466	call->flags = callout_flags;
467	TCE(call)->param1 = param1;
468	call->ttd = ttd;
469
470	timer_call_entry_enqueue_deadline(call, queue, deadline);
471	timer_queue_unlock(queue);
472	simple_unlock(&call->lock);
473
474	return (old_queue);
475	}
476
477	#if TIMER_ASSERT
478	unsigned timer_call_dequeue_unlocked_async1;
479	unsigned timer_call_dequeue_unlocked_async2;
480	#endif
481	mpqueue_head_t *
482	timer_call_dequeue_unlocked(
483	timer_call_t call)
484	{
485	call_entry_t entry = TCE(call);
486	mpqueue_head_t *old_queue;
487
488	DBG("timer_call_dequeue_unlocked(%p)\n", call);
489
490	simple_lock(&call->lock);
491	old_queue = MPQUEUE(entry->queue);
492	#if TIMER_ASSERT
493	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
494	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
495	VM_KERNEL_UNSLIDE_OR_PERM(call),
496	call->async_dequeue,
497	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
498	`0`, `0`);
499	#endif
500	if (old_queue != NULL) {
501	timer_queue_lock_spin(old_queue);
502	if (call->async_dequeue) {
503	/ collision (1c): timer already dequeued, clear flag /
504	#if TIMER_ASSERT
505	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
506	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
507	VM_KERNEL_UNSLIDE_OR_PERM(call),
508	call->async_dequeue,
509	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
510	`0x1c`, `0`);
511	timer_call_dequeue_unlocked_async1++;
512	#endif
513	call->async_dequeue = FALSE;
514	entry->queue = NULL;
515	} else {
516	timer_call_entry_dequeue(call);
517	}
518	if (old_queue == timer_longterm_queue)
519	timer_longterm_dequeued_locked(call);
520	timer_queue_unlock(old_queue);
521	}
522	simple_unlock(&call->lock);
523	return (old_queue);
524	}
525
526	static uint64_t
527	past_deadline_timer_handle(uint64_t deadline, uint64_t ctime)
528	{
529	uint64_t delta = (ctime - deadline);
530
531	past_deadline_timers++;
532	past_deadline_deltas += delta;
533	if (delta > past_deadline_longest)
534	past_deadline_longest = deadline;
535	if (delta < past_deadline_shortest)
536	past_deadline_shortest = delta;
537
538	return (ctime + past_deadline_timer_adjustment);
539	}
540
541	/*
542	* Timer call entry locking model
543	* ==============================
544	*
545	* Timer call entries are linked on per-cpu timer queues which are protected
546	* by the queue lock and the call entry lock. The locking protocol is:
547	*
548	* 0) The canonical locking order is timer call entry followed by queue.
549	*
550	* 1) With only the entry lock held, entry.queue is valid:
551	* 1a) NULL: the entry is not queued, or
552	* 1b) non-NULL: this queue must be locked before the entry is modified.
553	* After locking the queue, the call.async_dequeue flag must be checked:
554	* 1c) TRUE: the entry was removed from the queue by another thread
555	* and we must NULL the entry.queue and reset this flag, or
556	* 1d) FALSE: (ie. queued), the entry can be manipulated.
557	*
558	* 2) If a queue lock is obtained first, the queue is stable:
559	* 2a) If a try-lock of a queued entry succeeds, the call can be operated on
560	* and dequeued.
561	* 2b) If a try-lock fails, it indicates that another thread is attempting
562	* to change the entry and move it to a different position in this queue
563	* or to different queue. The entry can be dequeued but it should not be
564	* operated upon since it is being changed. Furthermore, we don't null
565	* the entry.queue pointer (protected by the entry lock we don't own).
566	* Instead, we set the async_dequeue flag -- see (1c).
567	* 2c) Same as 2b but occurring when a longterm timer is matured.
568	* 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
569	* should be manipulated with the appropriate timer queue lock held,
570	* to prevent queue traversal observations from observing inconsistent
571	* updates to an in-flight callout.
572	*/
573
574	/*
575	* Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline()
576	* cast between pointer types (mpqueue_head_t *) and (queue_t) so that
577	* we can use the call_entry_dequeue() and call_entry_enqueue_deadline()
578	* methods to operate on timer_call structs as if they are call_entry structs.
579	* These structures are identical except for their queue head pointer fields.
580	*
581	* In the debug case, we assert that the timer call locking protocol
582	* is being obeyed.
583	*/
584
585	static boolean_t
586	timer_call_enter_internal(
587	timer_call_t call,
588	timer_call_param_t param1,
589	uint64_t deadline,
590	uint64_t leeway,
591	uint32_t flags,
592	boolean_t ratelimited)
593	{
594	mpqueue_head_t *queue = NULL;
595	mpqueue_head_t *old_queue;
596	spl_t s;
597	uint64_t slop;
598	uint32_t urgency;
599	uint64_t sdeadline, ttd;
600
601	assert(call->call_entry.func != NULL);
602	s = splclock();
603
604	sdeadline = deadline;
605	uint64_t ctime = mach_absolute_time();
606
607	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
608	DECR_TIMER_ENTER \| DBG_FUNC_START,
609	VM_KERNEL_UNSLIDE_OR_PERM(call),
610	VM_KERNEL_ADDRHIDE(param1), deadline, flags, `0`);
611
612	urgency = (flags & TIMER_CALL_URGENCY_MASK);
613
614	boolean_t slop_ratelimited = FALSE;
615	slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);
616
617	if ((flags & TIMER_CALL_LEEWAY) != `0` && leeway > slop)
618	slop = leeway;
619
620	if (UINT64_MAX - deadline <= slop) {
621	deadline = UINT64_MAX;
622	} else {
623	deadline += slop;
624	}
625
626	if (__improbable(deadline < ctime)) {
627	deadline = past_deadline_timer_handle(deadline, ctime);
628	sdeadline = deadline;
629	}
630
631	if (ratelimited \|\| slop_ratelimited) {
632	flags \|= TIMER_CALL_RATELIMITED;
633	} else {
634	flags &= ~TIMER_CALL_RATELIMITED;
635	}
636
637	ttd = sdeadline - ctime;
638	#if CONFIG_DTRACE
639	DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
640	timer_call_param_t, TCE(call)->param0, uint32_t, flags,
641	(deadline - sdeadline),
642	(ttd >> `32`), (unsigned) (ttd & `0xFFFFFFFF`), call);
643	#endif
644
645	/ Program timer callout parameters under the appropriate per-CPU or*
646	* longterm queue lock. The callout may have been previously enqueued
647	* and in-flight on this or another timer queue.
648	*/
649	if (!ratelimited && !slop_ratelimited) {
650	queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
651	}
652
653	if (queue == NULL) {
654	queue = timer_queue_assign(deadline);
655	old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
656	}
657
658	#if TIMER_TRACE
659	TCE(call)->entry_time = ctime;
660	#endif
661
662	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
663	DECR_TIMER_ENTER \| DBG_FUNC_END,
664	VM_KERNEL_UNSLIDE_OR_PERM(call),
665	(old_queue != NULL), deadline, queue->count, `0`);
666
667	splx(s);
668
669	return (old_queue != NULL);
670	}
671
672	/*
673	* timer_call_*()
674	* return boolean indicating whether the call was previously queued.
675	*/
676	boolean_t
677	timer_call_enter(
678	timer_call_t call,
679	uint64_t deadline,
680	uint32_t flags)
681	{
682	return timer_call_enter_internal(call, NULL, deadline, `0`, flags, FALSE);
683	}
684
685	boolean_t
686	timer_call_enter1(
687	timer_call_t call,
688	timer_call_param_t param1,
689	uint64_t deadline,
690	uint32_t flags)
691	{
692	return timer_call_enter_internal(call, param1, deadline, `0`, flags, FALSE);
693	}
694
695	boolean_t
696	timer_call_enter_with_leeway(
697	timer_call_t call,
698	timer_call_param_t param1,
699	uint64_t deadline,
700	uint64_t leeway,
701	uint32_t flags,
702	boolean_t ratelimited)
703	{
704	return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited);
705	}
706
707	boolean_t
708	timer_call_quantum_timer_enter(
709	timer_call_t call,
710	timer_call_param_t param1,
711	uint64_t deadline,
712	uint64_t ctime)
713	{
714	assert(call->call_entry.func != NULL);
715	assert(ml_get_interrupts_enabled() == FALSE);
716
717	uint32_t flags = TIMER_CALL_SYS_CRITICAL \| TIMER_CALL_LOCAL;
718
719	TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER \| DBG_FUNC_START,
720	VM_KERNEL_UNSLIDE_OR_PERM(call),
721	VM_KERNEL_ADDRHIDE(param1), deadline,
722	flags, `0`);
723
724	if (__improbable(deadline < ctime)) {
725	deadline = past_deadline_timer_handle(deadline, ctime);
726	}
727
728	uint64_t ttd = deadline - ctime;
729	#if CONFIG_DTRACE
730	DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
731	timer_call_param_t, TCE(call)->param0, uint32_t, flags, `0`,
732	(ttd >> `32`), (unsigned) (ttd & `0xFFFFFFFF`), call);
733	#endif
734
735	quantum_timer_set_deadline(deadline);
736	TCE(call)->deadline = deadline;
737	TCE(call)->param1 = param1;
738	call->ttd = ttd;
739	call->flags = flags;
740
741	#if TIMER_TRACE
742	TCE(call)->entry_time = ctime;
743	#endif
744
745	TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER \| DBG_FUNC_END,
746	VM_KERNEL_UNSLIDE_OR_PERM(call),
747	`1`, deadline, `0`, `0`);
748
749	return true;
750	}
751
752
753	boolean_t
754	timer_call_quantum_timer_cancel(
755	timer_call_t call)
756	{
757	assert(ml_get_interrupts_enabled() == FALSE);
758
759	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
760	DECR_TIMER_CANCEL \| DBG_FUNC_START,
761	VM_KERNEL_UNSLIDE_OR_PERM(call), TCE(call)->deadline,
762	`0`, call->flags, `0`);
763
764	TCE(call)->deadline = `0`;
765	quantum_timer_set_deadline(`0`);
766
767	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
768	DECR_TIMER_CANCEL \| DBG_FUNC_END,
769	VM_KERNEL_UNSLIDE_OR_PERM(call), `0`,
770	TCE(call)->deadline - mach_absolute_time(),
771	TCE(call)->deadline - TCE(call)->entry_time, `0`);
772
773	#if CONFIG_DTRACE
774	DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
775	timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, `0`,
776	(call->ttd >> `32`), (unsigned) (call->ttd & `0xFFFFFFFF`));
777	#endif
778
779	return true;
780	}
781
782	boolean_t
783	timer_call_cancel(
784	timer_call_t call)
785	{
786	mpqueue_head_t *old_queue;
787	spl_t s;
788
789	s = splclock();
790
791	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
792	DECR_TIMER_CANCEL \| DBG_FUNC_START,
793	VM_KERNEL_UNSLIDE_OR_PERM(call),
794	TCE(call)->deadline, call->soft_deadline, call->flags, `0`);
795
796	old_queue = timer_call_dequeue_unlocked(call);
797
798	if (old_queue != NULL) {
799	timer_queue_lock_spin(old_queue);
800	if (!queue_empty(&old_queue->head)) {
801	timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline);
802	timer_call_t thead = (timer_call_t)queue_first(&old_queue->head);
803	old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
804	}
805	else {
806	timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX);
807	old_queue->earliest_soft_deadline = UINT64_MAX;
808	}
809	timer_queue_unlock(old_queue);
810	}
811	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
812	DECR_TIMER_CANCEL \| DBG_FUNC_END,
813	VM_KERNEL_UNSLIDE_OR_PERM(call),
814	VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
815	TCE(call)->deadline - mach_absolute_time(),
816	TCE(call)->deadline - TCE(call)->entry_time, `0`);
817	splx(s);
818
819	#if CONFIG_DTRACE
820	DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
821	timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, `0`,
822	(call->ttd >> `32`), (unsigned) (call->ttd & `0xFFFFFFFF`));
823	#endif
824
825	return (old_queue != NULL);
826	}
827
828	static uint32_t timer_queue_shutdown_lock_skips;
829	static uint32_t timer_queue_shutdown_discarded;
830
831	void
832	timer_queue_shutdown(
833	mpqueue_head_t *queue)
834	{
835	timer_call_t call;
836	mpqueue_head_t *new_queue;
837	spl_t s;
838
839
840	DBG("timer_queue_shutdown(%p)\n", queue);
841
842	s = splclock();
843
844	/ Note comma operator in while expression re-locking each iteration /
845	while ((void)timer_queue_lock_spin(queue), !queue_empty(&queue->head)) {
846	call = TIMER_CALL(queue_first(&queue->head));
847
848	if (!simple_lock_try(&call->lock)) {
849	/*
850	* case (2b) lock order inversion, dequeue and skip
851	* Don't change the call_entry queue back-pointer
852	* but set the async_dequeue field.
853	*/
854	timer_queue_shutdown_lock_skips++;
855	timer_call_entry_dequeue_async(call);
856	#if TIMER_ASSERT
857	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
858	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
859	VM_KERNEL_UNSLIDE_OR_PERM(call),
860	call->async_dequeue,
861	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
862	`0x2b`, `0`);
863	#endif
864	timer_queue_unlock(queue);
865	continue;
866	}
867
868	boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != `0`);
869
870	/ remove entry from old queue /
871	timer_call_entry_dequeue(call);
872	timer_queue_unlock(queue);
873
874	if (call_local == FALSE) {
875	/ and queue it on new, discarding LOCAL timers /
876	new_queue = timer_queue_assign(TCE(call)->deadline);
877	timer_queue_lock_spin(new_queue);
878	timer_call_entry_enqueue_deadline(
879	call, new_queue, TCE(call)->deadline);
880	timer_queue_unlock(new_queue);
881	} else {
882	timer_queue_shutdown_discarded++;
883	}
884
885	assert(call_local == FALSE);
886	simple_unlock(&call->lock);
887	}
888
889	timer_queue_unlock(queue);
890	splx(s);
891	}
892
893
894	void
895	quantum_timer_expire(
896	uint64_t deadline)
897	{
898	processor_t processor = current_processor();
899	timer_call_t call = TIMER_CALL(&(processor->quantum_timer));
900
901	if (__improbable(TCE(call)->deadline > deadline))
902	panic("CPU quantum timer deadlin out of sync with timer call deadline");
903
904	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
905	DECR_TIMER_EXPIRE \| DBG_FUNC_NONE,
906	VM_KERNEL_UNSLIDE_OR_PERM(call),
907	TCE(call)->deadline,
908	TCE(call)->deadline,
909	TCE(call)->entry_time, `0`);
910
911	timer_call_func_t func = TCE(call)->func;
912	timer_call_param_t param0 = TCE(call)->param0;
913	timer_call_param_t param1 = TCE(call)->param1;
914
915	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
916	DECR_TIMER_CALLOUT \| DBG_FUNC_START,
917	VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
918	VM_KERNEL_ADDRHIDE(param0),
919	VM_KERNEL_ADDRHIDE(param1),
920	`0`);
921
922	#if CONFIG_DTRACE
923	DTRACE_TMR7(callout__start, timer_call_func_t, func,
924	timer_call_param_t, param0, unsigned, call->flags,
925	`0`, (call->ttd >> `32`),
926	(unsigned) (call->ttd & `0xFFFFFFFF`), call);
927	#endif
928	(*func)(param0, param1);
929
930	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
931	DECR_TIMER_CALLOUT \| DBG_FUNC_END,
932	VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
933	VM_KERNEL_ADDRHIDE(param0),
934	VM_KERNEL_ADDRHIDE(param1),
935	`0`);
936	}
937
938	static uint32_t timer_queue_expire_lock_skips;
939	uint64_t
940	timer_queue_expire_with_options(
941	mpqueue_head_t *queue,
942	uint64_t deadline,
943	boolean_t rescan)
944	{
945	timer_call_t call = NULL;
946	uint32_t tc_iterations = `0`;
947	DBG("timer_queue_expire(%p,)\n", queue);
948
949	uint64_t cur_deadline = deadline;
950	timer_queue_lock_spin(queue);
951
952	while (!queue_empty(&queue->head)) {
953	/ Upon processing one or more timer calls, refresh the*
954	* deadline to account for time elapsed in the callout
955	*/
956	if (++tc_iterations > `1`)
957	cur_deadline = mach_absolute_time();
958
959	if (call == NULL)
960	call = TIMER_CALL(queue_first(&queue->head));
961
962	if (call->soft_deadline <= cur_deadline) {
963	timer_call_func_t func;
964	timer_call_param_t param0, param1;
965
966	TCOAL_DEBUG(`0xDDDD0000`, queue->earliest_soft_deadline, call->soft_deadline, `0`, `0`, `0`);
967	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
968	DECR_TIMER_EXPIRE \| DBG_FUNC_NONE,
969	VM_KERNEL_UNSLIDE_OR_PERM(call),
970	call->soft_deadline,
971	TCE(call)->deadline,
972	TCE(call)->entry_time, `0`);
973
974	if ((call->flags & TIMER_CALL_RATELIMITED) &&
975	(TCE(call)->deadline > cur_deadline)) {
976	if (rescan == FALSE)
977	break;
978	}
979
980	if (!simple_lock_try(&call->lock)) {
981	/ case (2b) lock inversion, dequeue and skip /
982	timer_queue_expire_lock_skips++;
983	timer_call_entry_dequeue_async(call);
984	call = NULL;
985	continue;
986	}
987
988	timer_call_entry_dequeue(call);
989
990	func = TCE(call)->func;
991	param0 = TCE(call)->param0;
992	param1 = TCE(call)->param1;
993
994	simple_unlock(&call->lock);
995	timer_queue_unlock(queue);
996
997	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
998	DECR_TIMER_CALLOUT \| DBG_FUNC_START,
999	VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
1000	VM_KERNEL_ADDRHIDE(param0),
1001	VM_KERNEL_ADDRHIDE(param1),
1002	`0`);
1003
1004	#if CONFIG_DTRACE
1005	DTRACE_TMR7(callout__start, timer_call_func_t, func,
1006	timer_call_param_t, param0, unsigned, call->flags,
1007	`0`, (call->ttd >> `32`),
1008	(unsigned) (call->ttd & `0xFFFFFFFF`), call);
1009	#endif
1010	/ Maintain time-to-deadline in per-processor data*
1011	* structure for thread wakeup deadline statistics.
1012	*/
1013	uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd));
1014	*ttdp = call->ttd;
1015	(*func)(param0, param1);
1016	*ttdp = `0`;
1017	#if CONFIG_DTRACE
1018	DTRACE_TMR4(callout__end, timer_call_func_t, func,
1019	param0, param1, call);
1020	#endif
1021
1022	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1023	DECR_TIMER_CALLOUT \| DBG_FUNC_END,
1024	VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
1025	VM_KERNEL_ADDRHIDE(param0),
1026	VM_KERNEL_ADDRHIDE(param1),
1027	`0`);
1028	call = NULL;
1029	timer_queue_lock_spin(queue);
1030	} else {
1031	if (__probable(rescan == FALSE)) {
1032	break;
1033	} else {
1034	int64_t skew = TCE(call)->deadline - call->soft_deadline;
1035	assert(TCE(call)->deadline >= call->soft_deadline);
1036
1037	/ DRK: On a latency quality-of-service level change,*
1038	* re-sort potentially rate-limited timers. The platform
1039	* layer determines which timers require
1040	* this. In the absence of the per-callout
1041	* synchronization requirement, a global resort could
1042	* be more efficient. The re-sort effectively
1043	* annuls all timer adjustments, i.e. the "soft
1044	* deadline" is the sort key.
1045	*/
1046
1047	if (timer_resort_threshold(skew)) {
1048	if (__probable(simple_lock_try(&call->lock))) {
1049	timer_call_entry_dequeue(call);
1050	timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline);
1051	simple_unlock(&call->lock);
1052	call = NULL;
1053	}
1054	}
1055	if (call) {
1056	call = TIMER_CALL(queue_next(qe(call)));
1057	if (queue_end(&queue->head, qe(call)))
1058	break;
1059	}
1060	}
1061	}
1062	}
1063
1064	if (!queue_empty(&queue->head)) {
1065	call = TIMER_CALL(queue_first(&queue->head));
1066	cur_deadline = TCE(call)->deadline;
1067	queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline;
1068	} else {
1069	queue->earliest_soft_deadline = cur_deadline = UINT64_MAX;
1070	}
1071
1072	timer_queue_unlock(queue);
1073
1074	return (cur_deadline);
1075	}
1076
1077	uint64_t
1078	timer_queue_expire(
1079	mpqueue_head_t *queue,
1080	uint64_t deadline)
1081	{
1082	return timer_queue_expire_with_options(queue, deadline, FALSE);
1083	}
1084
1085	extern int serverperfmode;
1086	static uint32_t timer_queue_migrate_lock_skips;
1087	/*
1088	* timer_queue_migrate() is called by timer_queue_migrate_cpu()
1089	* to move timer requests from the local processor (queue_from)
1090	* to a target processor's (queue_to).
1091	*/
1092	int
1093	timer_queue_migrate(mpqueue_head_t queue_from, mpqueue_head_t queue_to)
1094	{
1095	timer_call_t call;
1096	timer_call_t head_to;
1097	int timers_migrated = `0`;
1098
1099	DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
1100
1101	assert(!ml_get_interrupts_enabled());
1102	assert(queue_from != queue_to);
1103
1104	if (serverperfmode) {
1105	/*
1106	* if we're running a high end server
1107	* avoid migrations... they add latency
1108	* and don't save us power under typical
1109	* server workloads
1110	*/
1111	return -`4`;
1112	}
1113
1114	/*
1115	* Take both local (from) and target (to) timer queue locks while
1116	* moving the timers from the local queue to the target processor.
1117	* We assume that the target is always the boot processor.
1118	* But only move if all of the following is true:
1119	* - the target queue is non-empty
1120	* - the local queue is non-empty
1121	* - the local queue's first deadline is later than the target's
1122	* - the local queue contains no non-migrateable "local" call
1123	* so that we need not have the target resync.
1124	*/
1125
1126	timer_queue_lock_spin(queue_to);
1127
1128	head_to = TIMER_CALL(queue_first(&queue_to->head));
1129	if (queue_empty(&queue_to->head)) {
1130	timers_migrated = -`1`;
1131	goto abort1;
1132	}
1133
1134	timer_queue_lock_spin(queue_from);
1135
1136	if (queue_empty(&queue_from->head)) {
1137	timers_migrated = -`2`;
1138	goto abort2;
1139	}
1140
1141	call = TIMER_CALL(queue_first(&queue_from->head));
1142	if (TCE(call)->deadline < TCE(head_to)->deadline) {
1143	timers_migrated = `0`;
1144	goto abort2;
1145	}
1146
1147	/ perform scan for non-migratable timers /
1148	do {
1149	if (call->flags & TIMER_CALL_LOCAL) {
1150	timers_migrated = -`3`;
1151	goto abort2;
1152	}
1153	call = TIMER_CALL(queue_next(qe(call)));
1154	} while (!queue_end(&queue_from->head, qe(call)));
1155
1156	/ migration loop itself -- both queues are locked /
1157	while (!queue_empty(&queue_from->head)) {
1158	call = TIMER_CALL(queue_first(&queue_from->head));
1159	if (!simple_lock_try(&call->lock)) {
1160	/ case (2b) lock order inversion, dequeue only /
1161	#ifdef TIMER_ASSERT
1162	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1163	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
1164	VM_KERNEL_UNSLIDE_OR_PERM(call),
1165	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1166	VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
1167	`0x2b`, `0`);
1168	#endif
1169	timer_queue_migrate_lock_skips++;
1170	timer_call_entry_dequeue_async(call);
1171	continue;
1172	}
1173	timer_call_entry_dequeue(call);
1174	timer_call_entry_enqueue_deadline(
1175	call, queue_to, TCE(call)->deadline);
1176	timers_migrated++;
1177	simple_unlock(&call->lock);
1178	}
1179	queue_from->earliest_soft_deadline = UINT64_MAX;
1180	abort2:
1181	timer_queue_unlock(queue_from);
1182	abort1:
1183	timer_queue_unlock(queue_to);
1184
1185	return timers_migrated;
1186	}
1187
1188	void
1189	timer_queue_trace_cpu(int ncpu)
1190	{
1191	timer_call_nosync_cpu(
1192	ncpu,
1193	(void()(void* *))timer_queue_trace,
1194	(void*) timer_queue_cpu(ncpu));
1195	}
1196
1197	void
1198	timer_queue_trace(
1199	mpqueue_head_t *queue)
1200	{
1201	timer_call_t call;
1202	spl_t s;
1203
1204	if (!kdebug_enable)
1205	return;
1206
1207	s = splclock();
1208	timer_queue_lock_spin(queue);
1209
1210	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1211	DECR_TIMER_QUEUE \| DBG_FUNC_START,
1212	queue->count, mach_absolute_time(), `0`, `0`, `0`);
1213
1214	if (!queue_empty(&queue->head)) {
1215	call = TIMER_CALL(queue_first(&queue->head));
1216	do {
1217	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1218	DECR_TIMER_QUEUE \| DBG_FUNC_NONE,
1219	call->soft_deadline,
1220	TCE(call)->deadline,
1221	TCE(call)->entry_time,
1222	VM_KERNEL_UNSLIDE(TCE(call)->func),
1223	`0`);
1224	call = TIMER_CALL(queue_next(qe(call)));
1225	} while (!queue_end(&queue->head, qe(call)));
1226	}
1227
1228	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1229	DECR_TIMER_QUEUE \| DBG_FUNC_END,
1230	queue->count, mach_absolute_time(), `0`, `0`, `0`);
1231
1232	timer_queue_unlock(queue);
1233	splx(s);
1234	}
1235
1236	void
1237	timer_longterm_dequeued_locked(timer_call_t call)
1238	{
1239	timer_longterm_t *tlp = &timer_longterm;
1240
1241	tlp->dequeues++;
1242	if (call == tlp->threshold.call)
1243	tlp->threshold.call = NULL;
1244	}
1245
1246	/*
1247	* Place a timer call in the longterm list
1248	* and adjust the next timer callout deadline if the new timer is first.
1249	*/
1250	mpqueue_head_t *
1251	timer_longterm_enqueue_unlocked(timer_call_t call,
1252	uint64_t now,
1253	uint64_t deadline,
1254	mpqueue_head_t **old_queue,
1255	uint64_t soft_deadline,
1256	uint64_t ttd,
1257	timer_call_param_t param1,
1258	uint32_t callout_flags)
1259	{
1260	timer_longterm_t *tlp = &timer_longterm;
1261	boolean_t update_required = FALSE;
1262	uint64_t longterm_threshold;
1263
1264	longterm_threshold = now + tlp->threshold.interval;
1265
1266	/*
1267	* Return NULL without doing anything if:
1268	* - this timer is local, or
1269	* - the longterm mechanism is disabled, or
1270	* - this deadline is too short.
1271	*/
1272	if ((callout_flags & TIMER_CALL_LOCAL) != `0` \|\|
1273	(tlp->threshold.interval == TIMER_LONGTERM_NONE) \|\|
1274	(deadline <= longterm_threshold))
1275	return NULL;
1276
1277	/*
1278	* Remove timer from its current queue, if any.
1279	*/
1280	*old_queue = timer_call_dequeue_unlocked(call);
1281
1282	/*
1283	* Lock the longterm queue, queue timer and determine
1284	* whether an update is necessary.
1285	*/
1286	assert(!ml_get_interrupts_enabled());
1287	simple_lock(&call->lock);
1288	timer_queue_lock_spin(timer_longterm_queue);
1289	TCE(call)->deadline = deadline;
1290	TCE(call)->param1 = param1;
1291	call->ttd = ttd;
1292	call->soft_deadline = soft_deadline;
1293	call->flags = callout_flags;
1294	timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1295
1296	tlp->enqueues++;
1297
1298	/*
1299	* We'll need to update the currently set threshold timer
1300	* if the new deadline is sooner and no sooner update is in flight.
1301	*/
1302	if (deadline < tlp->threshold.deadline &&
1303	deadline < tlp->threshold.preempted) {
1304	tlp->threshold.preempted = deadline;
1305	tlp->threshold.call = call;
1306	update_required = TRUE;
1307	}
1308	timer_queue_unlock(timer_longterm_queue);
1309	simple_unlock(&call->lock);
1310
1311	if (update_required) {
1312	/*
1313	* Note: this call expects that calling the master cpu
1314	* alone does not involve locking the topo lock.
1315	*/
1316	timer_call_nosync_cpu(
1317	master_cpu,
1318	(void ()(void* *)) timer_longterm_update,
1319	(void *)tlp);
1320	}
1321
1322	return timer_longterm_queue;
1323	}
1324
1325	/*
1326	* Scan for timers below the longterm threshold.
1327	* Move these to the local timer queue (of the boot processor on which the
1328	* calling thread is running).
1329	* Both the local (boot) queue and the longterm queue are locked.
1330	* The scan is similar to the timer migrate sequence but is performed by
1331	* successively examining each timer on the longterm queue:
1332	* - if within the short-term threshold
1333	* - enter on the local queue (unless being deleted),
1334	* - otherwise:
1335	* - if sooner, deadline becomes the next threshold deadline.
1336	* The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be
1337	* exceeded, we abort and reschedule again so that we don't shut others from
1338	* the timer queues. Longterm timers firing late is not critical.
1339	*/
1340	void
1341	timer_longterm_scan(timer_longterm_t *tlp,
1342	uint64_t time_start)
1343	{
1344	queue_entry_t qe;
1345	timer_call_t call;
1346	uint64_t threshold;
1347	uint64_t deadline;
1348	uint64_t time_limit = time_start + tlp->scan_limit;
1349	mpqueue_head_t *timer_master_queue;
1350
1351	assert(!ml_get_interrupts_enabled());
1352	assert(cpu_number() == master_cpu);
1353
1354	if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1355	threshold = time_start + tlp->threshold.interval;
1356
1357	tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1358	tlp->threshold.call = NULL;
1359
1360	if (queue_empty(&timer_longterm_queue->head))
1361	return;
1362
1363	timer_master_queue = timer_queue_cpu(master_cpu);
1364	timer_queue_lock_spin(timer_master_queue);
1365
1366	qe = queue_first(&timer_longterm_queue->head);
1367	while (!queue_end(&timer_longterm_queue->head, qe)) {
1368	call = TIMER_CALL(qe);
1369	deadline = call->soft_deadline;
1370	qe = queue_next(qe);
1371	if (!simple_lock_try(&call->lock)) {
1372	/ case (2c) lock order inversion, dequeue only /
1373	#ifdef TIMER_ASSERT
1374	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1375	DECR_TIMER_ASYNC_DEQ \| DBG_FUNC_NONE,
1376	VM_KERNEL_UNSLIDE_OR_PERM(call),
1377	VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1378	VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
1379	`0x2c`, `0`);
1380	#endif
1381	timer_call_entry_dequeue_async(call);
1382	continue;
1383	}
1384	if (deadline < threshold) {
1385	/*
1386	* This timer needs moving (escalating)
1387	* to the local (boot) processor's queue.
1388	*/
1389	#ifdef TIMER_ASSERT
1390	if (deadline < time_start)
1391	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1392	DECR_TIMER_OVERDUE \| DBG_FUNC_NONE,
1393	VM_KERNEL_UNSLIDE_OR_PERM(call),
1394	deadline,
1395	time_start,
1396	threshold,
1397	`0`);
1398	#endif
1399	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1400	DECR_TIMER_ESCALATE \| DBG_FUNC_NONE,
1401	VM_KERNEL_UNSLIDE_OR_PERM(call),
1402	TCE(call)->deadline,
1403	TCE(call)->entry_time,
1404	VM_KERNEL_UNSLIDE(TCE(call)->func),
1405	`0`);
1406	tlp->escalates++;
1407	timer_call_entry_dequeue(call);
1408	timer_call_entry_enqueue_deadline(
1409	call, timer_master_queue, TCE(call)->deadline);
1410	/*
1411	* A side-effect of the following call is to update
1412	* the actual hardware deadline if required.
1413	*/
1414	(void) timer_queue_assign(deadline);
1415	} else {
1416	if (deadline < tlp->threshold.deadline) {
1417	tlp->threshold.deadline = deadline;
1418	tlp->threshold.call = call;
1419	}
1420	}
1421	simple_unlock(&call->lock);
1422
1423	/ Abort scan if we're taking too long. /
1424	if (mach_absolute_time() > time_limit) {
1425	tlp->threshold.deadline = TIMER_LONGTERM_SCAN_AGAIN;
1426	tlp->scan_pauses++;
1427	DBG("timer_longterm_scan() paused %llu, qlen: %llu\n",
1428	time_limit, tlp->queue.count);
1429	break;
1430	}
1431	}
1432
1433	timer_queue_unlock(timer_master_queue);
1434	}
1435
1436	void
1437	timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1)
1438	{
1439	timer_longterm_t tlp = (timer_longterm_t ) p0;
1440
1441	timer_longterm_update(tlp);
1442	}
1443
1444	void
1445	timer_longterm_update_locked(timer_longterm_t *tlp)
1446	{
1447	uint64_t latency;
1448
1449	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1450	DECR_TIMER_UPDATE \| DBG_FUNC_START,
1451	VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1452	tlp->threshold.deadline,
1453	tlp->threshold.preempted,
1454	tlp->queue.count, `0`);
1455
1456	tlp->scan_time = mach_absolute_time();
1457	if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) {
1458	tlp->threshold.preempts++;
1459	tlp->threshold.deadline = tlp->threshold.preempted;
1460	tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1461	/*
1462	* Note: in the unlikely event that a pre-empted timer has
1463	* itself been cancelled, we'll simply re-scan later at the
1464	* time of the preempted/cancelled timer.
1465	*/
1466	} else {
1467	tlp->threshold.scans++;
1468
1469	/*
1470	* Maintain a moving average of our wakeup latency.
1471	* Clamp latency to 0 and ignore above threshold interval.
1472	*/
1473	if (tlp->scan_time > tlp->threshold.deadline_set)
1474	latency = tlp->scan_time - tlp->threshold.deadline_set;
1475	else
1476	latency = `0`;
1477	if (latency < tlp->threshold.interval) {
1478	tlp->threshold.latency_min =
1479	MIN(tlp->threshold.latency_min, latency);
1480	tlp->threshold.latency_max =
1481	MAX(tlp->threshold.latency_max, latency);
1482	tlp->threshold.latency =
1483	(tlp->threshold.latency*`99` + latency) / `100`;
1484	}
1485
1486	timer_longterm_scan(tlp, tlp->scan_time);
1487	}
1488
1489	tlp->threshold.deadline_set = tlp->threshold.deadline;
1490	/ The next deadline timer to be set is adjusted /
1491	if (tlp->threshold.deadline != TIMER_LONGTERM_NONE &&
1492	tlp->threshold.deadline != TIMER_LONGTERM_SCAN_AGAIN) {
1493	tlp->threshold.deadline_set -= tlp->threshold.margin;
1494	tlp->threshold.deadline_set -= tlp->threshold.latency;
1495	}
1496
1497	/ Throttle next scan time /
1498	uint64_t scan_clamp = mach_absolute_time() + tlp->scan_interval;
1499	if (tlp->threshold.deadline_set < scan_clamp)
1500	tlp->threshold.deadline_set = scan_clamp;
1501
1502	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1503	DECR_TIMER_UPDATE \| DBG_FUNC_END,
1504	VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1505	tlp->threshold.deadline,
1506	tlp->threshold.scans,
1507	tlp->queue.count, `0`);
1508	}
1509
1510	void
1511	timer_longterm_update(timer_longterm_t *tlp)
1512	{
1513	spl_t s = splclock();
1514
1515	timer_queue_lock_spin(timer_longterm_queue);
1516
1517	if (cpu_number() != master_cpu)
1518	panic("timer_longterm_update_master() on non-boot cpu");
1519
1520	timer_longterm_update_locked(tlp);
1521
1522	if (tlp->threshold.deadline != TIMER_LONGTERM_NONE)
1523	timer_call_enter(
1524	&tlp->threshold.timer,
1525	tlp->threshold.deadline_set,
1526	TIMER_CALL_LOCAL \| TIMER_CALL_SYS_CRITICAL);
1527
1528	timer_queue_unlock(timer_longterm_queue);
1529	splx(s);
1530	}
1531
1532	void
1533	timer_longterm_init(void)
1534	{
1535	uint32_t longterm;
1536	timer_longterm_t *tlp = &timer_longterm;
1537
1538	DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue);
1539
1540	/*
1541	* Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1542	* or TIMER_LONGTERM_NONE (disabled) for server;
1543	* overridden longterm boot-arg
1544	*/
1545	tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE
1546	: TIMER_LONGTERM_THRESHOLD;
1547	if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) {
1548	tlp->threshold.interval = (longterm == `0`) ?
1549	TIMER_LONGTERM_NONE :
1550	longterm * NSEC_PER_MSEC;
1551	}
1552	if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1553	printf("Longterm timer threshold: %llu ms\n",
1554	tlp->threshold.interval / NSEC_PER_MSEC);
1555	kprintf("Longterm timer threshold: %llu ms\n",
1556	tlp->threshold.interval / NSEC_PER_MSEC);
1557	nanoseconds_to_absolutetime(tlp->threshold.interval,
1558	&tlp->threshold.interval);
1559	tlp->threshold.margin = tlp->threshold.interval / `10`;
1560	tlp->threshold.latency_min = EndOfAllTime;
1561	tlp->threshold.latency_max = `0`;
1562	}
1563
1564	tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1565	tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1566
1567	lck_attr_setdefault(&timer_longterm_lck_attr);
1568	lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr);
1569	lck_grp_init(&timer_longterm_lck_grp,
1570	"timer_longterm", &timer_longterm_lck_grp_attr);
1571	mpqueue_init(&tlp->queue,
1572	&timer_longterm_lck_grp, &timer_longterm_lck_attr);
1573
1574	timer_call_setup(&tlp->threshold.timer,
1575	timer_longterm_callout, (timer_call_param_t) tlp);
1576
1577	timer_longterm_queue = &tlp->queue;
1578	}
1579
1580	enum {
1581	THRESHOLD, QCOUNT,
1582	ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS,
1583	LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES
1584	};
1585	uint64_t
1586	timer_sysctl_get(int oid)
1587	{
1588	timer_longterm_t *tlp = &timer_longterm;
1589
1590	switch (oid) {
1591	case THRESHOLD:
1592	return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ?
1593	`0` : tlp->threshold.interval / NSEC_PER_MSEC;
1594	case QCOUNT:
1595	return tlp->queue.count;
1596	case ENQUEUES:
1597	return tlp->enqueues;
1598	case DEQUEUES:
1599	return tlp->dequeues;
1600	case ESCALATES:
1601	return tlp->escalates;
1602	case SCANS:
1603	return tlp->threshold.scans;
1604	case PREEMPTS:
1605	return tlp->threshold.preempts;
1606	case LATENCY:
1607	return tlp->threshold.latency;
1608	case LATENCY_MIN:
1609	return tlp->threshold.latency_min;
1610	case LATENCY_MAX:
1611	return tlp->threshold.latency_max;
1612	case SCAN_LIMIT:
1613	return tlp->scan_limit;
1614	case SCAN_INTERVAL:
1615	return tlp->scan_interval;
1616	case PAUSES:
1617	return tlp->scan_pauses;
1618	default:
1619	return `0`;
1620	}
1621	}
1622
1623	/*
1624	* timer_master_scan() is the inverse of timer_longterm_scan()
1625	* since it un-escalates timers to the longterm queue.
1626	*/
1627	static void
1628	timer_master_scan(timer_longterm_t *tlp,
1629	uint64_t now)
1630	{
1631	queue_entry_t qe;
1632	timer_call_t call;
1633	uint64_t threshold;
1634	uint64_t deadline;
1635	mpqueue_head_t *timer_master_queue;
1636
1637	if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1638	threshold = now + tlp->threshold.interval;
1639	else
1640	threshold = TIMER_LONGTERM_NONE;
1641
1642	timer_master_queue = timer_queue_cpu(master_cpu);
1643	timer_queue_lock_spin(timer_master_queue);
1644
1645	qe = queue_first(&timer_master_queue->head);
1646	while (!queue_end(&timer_master_queue->head, qe)) {
1647	call = TIMER_CALL(qe);
1648	deadline = TCE(call)->deadline;
1649	qe = queue_next(qe);
1650	if ((call->flags & TIMER_CALL_LOCAL) != `0`)
1651	continue;
1652	if (!simple_lock_try(&call->lock)) {
1653	/ case (2c) lock order inversion, dequeue only /
1654	timer_call_entry_dequeue_async(call);
1655	continue;
1656	}
1657	if (deadline > threshold) {
1658	/ move from master to longterm /
1659	timer_call_entry_dequeue(call);
1660	timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1661	if (deadline < tlp->threshold.deadline) {
1662	tlp->threshold.deadline = deadline;
1663	tlp->threshold.call = call;
1664	}
1665	}
1666	simple_unlock(&call->lock);
1667	}
1668	timer_queue_unlock(timer_master_queue);
1669	}
1670
1671	static void
1672	timer_sysctl_set_threshold(uint64_t value)
1673	{
1674	timer_longterm_t *tlp = &timer_longterm;
1675	spl_t s = splclock();
1676	boolean_t threshold_increase;
1677
1678	timer_queue_lock_spin(timer_longterm_queue);
1679
1680	timer_call_cancel(&tlp->threshold.timer);
1681
1682	/*
1683	* Set the new threshold and note whther it's increasing.
1684	*/
1685	if (value == `0`) {
1686	tlp->threshold.interval = TIMER_LONGTERM_NONE;
1687	threshold_increase = TRUE;
1688	timer_call_cancel(&tlp->threshold.timer);
1689	} else {
1690	uint64_t old_interval = tlp->threshold.interval;
1691	tlp->threshold.interval = value * NSEC_PER_MSEC;
1692	nanoseconds_to_absolutetime(tlp->threshold.interval,
1693	&tlp->threshold.interval);
1694	tlp->threshold.margin = tlp->threshold.interval / `10`;
1695	if (old_interval == TIMER_LONGTERM_NONE)
1696	threshold_increase = FALSE;
1697	else
1698	threshold_increase = (tlp->threshold.interval > old_interval);
1699	}
1700
1701	if (threshold_increase / or removal /) {
1702	/ Escalate timers from the longterm queue /
1703	timer_longterm_scan(tlp, mach_absolute_time());
1704	} else / decrease or addition / {
1705	/*
1706	* We scan the local/master queue for timers now longterm.
1707	* To be strictly correct, we should scan all processor queues
1708	* but timer migration results in most timers gravitating to the
1709	* master processor in any case.
1710	*/
1711	timer_master_scan(tlp, mach_absolute_time());
1712	}
1713
1714	/ Set new timer accordingly /
1715	tlp->threshold.deadline_set = tlp->threshold.deadline;
1716	if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1717	tlp->threshold.deadline_set -= tlp->threshold.margin;
1718	tlp->threshold.deadline_set -= tlp->threshold.latency;
1719	timer_call_enter(
1720	&tlp->threshold.timer,
1721	tlp->threshold.deadline_set,
1722	TIMER_CALL_LOCAL \| TIMER_CALL_SYS_CRITICAL);
1723	}
1724
1725	/ Reset stats /
1726	tlp->enqueues = `0`;
1727	tlp->dequeues = `0`;
1728	tlp->escalates = `0`;
1729	tlp->scan_pauses = `0`;
1730	tlp->threshold.scans = `0`;
1731	tlp->threshold.preempts = `0`;
1732	tlp->threshold.latency = `0`;
1733	tlp->threshold.latency_min = EndOfAllTime;
1734	tlp->threshold.latency_max = `0`;
1735
1736	timer_queue_unlock(timer_longterm_queue);
1737	splx(s);
1738	}
1739
1740	int
1741	timer_sysctl_set(int oid, uint64_t value)
1742	{
1743	switch (oid) {
1744	case THRESHOLD:
1745	timer_call_cpu(
1746	master_cpu,
1747	(void ()(void* *)) timer_sysctl_set_threshold,
1748	(void *) value);
1749	return KERN_SUCCESS;
1750	case SCAN_LIMIT:
1751	timer_longterm.scan_limit = value;
1752	return KERN_SUCCESS;
1753	case SCAN_INTERVAL:
1754	timer_longterm.scan_interval = value;
1755	return KERN_SUCCESS;
1756	default:
1757	return KERN_INVALID_ARGUMENT;
1758	}
1759	}
1760
1761
1762	/ Select timer coalescing window based on per-task quality-of-service hints /
1763	static boolean_t tcoal_qos_adjust(thread_t t, int32_t tshift, uint64_t tmax_abstime, boolean_t *pratelimited) {
1764	uint32_t latency_qos;
1765	boolean_t adjusted = FALSE;
1766	task_t ctask = t->task;
1767
1768	if (ctask) {
1769	latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS);
1770
1771	assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
1772
1773	if (latency_qos) {
1774	*tshift = tcoal_prio_params.latency_qos_scale[latency_qos - `1`];
1775	*tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - `1`];
1776	*pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - `1`];
1777	adjusted = TRUE;
1778	}
1779	}
1780	return adjusted;
1781	}
1782
1783
1784	/ Adjust timer deadlines based on priority of the thread and the*
1785	* urgency value provided at timeout establishment. With this mechanism,
1786	* timers are no longer necessarily sorted in order of soft deadline
1787	* on a given timer queue, i.e. they may be differentially skewed.
1788	* In the current scheme, this could lead to fewer pending timers
1789	* processed than is technically possible when the HW deadline arrives.
1790	*/
1791	static void
1792	timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t tshift, uint64_t tmax_abstime, boolean_t *pratelimited) {
1793	int16_t tpri = cthread->sched_pri;
1794	if ((urgency & TIMER_CALL_USER_MASK) != `0`) {
1795	if (tpri >= BASEPRI_RTQUEUES \|\|
1796	urgency == TIMER_CALL_USER_CRITICAL) {
1797	*tshift = tcoal_prio_params.timer_coalesce_rt_shift;
1798	*tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max;
1799	TCOAL_PRIO_STAT(rt_tcl);
1800	} else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) \|\|
1801	(urgency == TIMER_CALL_USER_BACKGROUND)) {
1802	/ Determine if timer should be subjected to a lower QoS /
1803	if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1804	if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) {
1805	return;
1806	} else {
1807	*pratelimited = FALSE;
1808	}
1809	}
1810	*tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1811	*tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1812	TCOAL_PRIO_STAT(bg_tcl);
1813	} else if (tpri >= MINPRI_KERNEL) {
1814	*tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1815	*tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1816	TCOAL_PRIO_STAT(kt_tcl);
1817	} else if (cthread->sched_mode == TH_MODE_FIXED) {
1818	*tshift = tcoal_prio_params.timer_coalesce_fp_shift;
1819	*tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max;
1820	TCOAL_PRIO_STAT(fp_tcl);
1821	} else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1822	TCOAL_PRIO_STAT(qos_tcl);
1823	} else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
1824	*tshift = tcoal_prio_params.timer_coalesce_ts_shift;
1825	*tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max;
1826	TCOAL_PRIO_STAT(ts_tcl);
1827	} else {
1828	TCOAL_PRIO_STAT(nc_tcl);
1829	}
1830	} else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
1831	*tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1832	*tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1833	TCOAL_PRIO_STAT(bg_tcl);
1834	} else {
1835	*tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1836	*tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1837	TCOAL_PRIO_STAT(kt_tcl);
1838	}
1839	}
1840
1841
1842	int timer_user_idle_level;
1843
1844	uint64_t
1845	timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
1846	{
1847	int32_t tcs_shift = `0`;
1848	uint64_t tcs_max_abstime = `0`;
1849	uint64_t adjval;
1850	uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
1851
1852	if (mach_timer_coalescing_enabled &&
1853	(deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
1854	timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited);
1855
1856	if (tcs_shift >= `0`)
1857	adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime);
1858	else
1859	adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime);
1860	/ Apply adjustments derived from "user idle level" heuristic /
1861	adjval += (adjval * timer_user_idle_level) >> `7`;
1862	return adjval;
1863	} else {
1864	return `0`;
1865	}
1866	}
1867
1868	int
1869	timer_get_user_idle_level(void) {
1870	return timer_user_idle_level;
1871	}
1872
1873	kern_return_t timer_set_user_idle_level(int ilevel) {
1874	boolean_t do_reeval = FALSE;
1875
1876	if ((ilevel < `0`) \|\| (ilevel > `128`))
1877	return KERN_INVALID_ARGUMENT;
1878
1879	if (ilevel < timer_user_idle_level) {
1880	do_reeval = TRUE;
1881	}
1882
1883	timer_user_idle_level = ilevel;
1884
1885	if (do_reeval)
1886	ml_timer_evaluate();
1887
1888	return KERN_SUCCESS;
1889	}
1890

Browse the source code of codebrowser/osfmk/kern/timer_call.c