sched_multiq.c source code [codebrowser/osfmk/kern/sched_multiq.c]

1	/*
2	* Copyright (c) 2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <mach/mach_types.h>
30	#include <mach/machine.h>
31
32	#include <machine/machine_routines.h>
33	#include <machine/sched_param.h>
34	#include <machine/machine_cpu.h>
35
36	#include <kern/kern_types.h>
37	#include <kern/debug.h>
38	#include <kern/mach_param.h>
39	#include <kern/machine.h>
40	#include <kern/misc_protos.h>
41	#include <kern/processor.h>
42	#include <kern/queue.h>
43	#include <kern/sched.h>
44	#include <kern/sched_prim.h>
45	#include <kern/task.h>
46	#include <kern/thread.h>
47
48	#include <sys/kdebug.h>
49
50	/*
51	* Theory Statement
52	*
53	* How does the task scheduler work?
54	*
55	* It schedules threads across a few levels.
56	*
57	* RT threads are dealt with above us
58	* Bound threads go into the per-processor runq
59	* Non-bound threads are linked on their task's sched_group's runq
60	* sched_groups' sched_entries are linked on the pset's runq
61	*
62	* TODO: make this explicit - bound threads should have a different enqueue fxn
63	*
64	* When we choose a new thread, we will decide whether to look at the bound runqueue, the global runqueue
65	* or the current group's runqueue, then dequeue the next thread in that runqueue.
66	*
67	* We then manipulate the sched_entries to reflect the invariant that:
68	* Each non-empty priority level in a group's runq is represented by one sched_entry enqueued in the global
69	* runqueue.
70	*
71	* A sched_entry represents a chance at running - for each priority in each task, there is one chance of getting
72	* to run. This reduces the excess contention bonus given to processes which have work spread among many threads
73	* as compared to processes which do the same amount of work under fewer threads.
74	*
75	* NOTE: Currently, the multiq scheduler only supports one pset.
76	*
77	* NOTE ABOUT thread->sched_pri:
78	*
79	* It can change after enqueue - it's changed without pset lock but with thread lock if thread->runq is 0.
80	* Therefore we can only depend on it not changing during the enqueue and remove path, not the dequeue.
81	*
82	* TODO: Future features:
83	*
84	* Decouple the task priority from the sched_entry priority, allowing for:
85	* fast task priority change without having to iterate and re-dispatch all threads in the task.
86	* i.e. task-wide priority, task-wide boosting
87	* fancier group decay features
88	*
89	* Group (or task) decay:
90	* Decay is used for a few different things:
91	* Prioritizing latency-needing threads over throughput-needing threads for time-to-running
92	* Balancing work between threads in a process
93	* Balancing work done at the same priority between different processes
94	* Recovering from priority inversions between two threads in the same process
95	* Recovering from priority inversions between two threads in different processes
96	* Simulating a proportional share scheduler by allowing lower priority threads
97	* to run for a certain percentage of the time
98	*
99	* Task decay lets us separately address the 'same process' and 'different process' needs,
100	* which will allow us to make smarter tradeoffs in different cases.
101	* For example, we could resolve priority inversion in the same process by reordering threads without dropping the
102	* process below low priority threads in other processes.
103	*
104	* One lock to rule them all (or at least all the runqueues) instead of the pset locks
105	*
106	* Shrink sched_entry size to the size of a queue_chain_t by inferring priority, group, and perhaps runq field.
107	* The entries array is 5K currently so it'd be really great to reduce.
108	* One way to get sched_group below 4K without a new runq structure would be to remove the extra queues above realtime.
109	*
110	* When preempting a processor, store a flag saying if the preemption
111	* was from a thread in the same group or different group,
112	* and tell choose_thread about it.
113	*
114	* When choosing a processor, bias towards those running in the same
115	* group as I am running (at the same priority, or within a certain band?).
116	*
117	* Decide if we need to support psets.
118	* Decide how to support psets - do we need duplicate entries for each pset,
119	* or can we get away with putting the entry in either one or the other pset?
120	*
121	* Consider the right way to handle runq count - I don't want to iterate groups.
122	* Perhaps keep a global counter.
123	* Alternate option - remove it from choose_processor. It doesn't add much value
124	* now that we have global runq.
125	*
126	* Need a better way of finding group to target instead of looking at current_task.
127	* Perhaps choose_thread could pass in the current thread?
128	*
129	* Consider unifying runq copy-pastes.
130	*
131	* Thoughts on having a group central quantum bucket:
132	*
133	* I see two algorithms to decide quanta:
134	* A) Hand off only when switching thread to thread in the same group
135	* B) Allocate and return quanta to the group's pool
136	*
137	* Issues:
138	* If a task blocks completely, should it come back with the leftover quanta
139	* or brand new quanta?
140	*
141	* Should I put a flag saying zero out a quanta you grab when youre dispatched'?
142	*
143	* Resolution:
144	* Handing off quanta between threads will help with jumping around in the current task
145	* but will not help when a thread from a different task is involved.
146	* Need an algorithm that works with round robin-ing between threads in different tasks
147	*
148	* But wait - round robining can only be triggered by quantum expire or blocking.
149	* We need something that works with preemption or yielding - that's the more interesting idea.
150	*
151	* Existing algorithm - preemption doesn't re-set quantum, puts thread on head of runq.
152	* Blocking or quantum expiration does re-set quantum, puts thread on tail of runq.
153	*
154	* New algorithm -
155	* Hand off quanta when hopping between threads with same sched_group
156	* Even if thread was blocked it uses last thread remaining quanta when it starts.
157	*
158	* If we use the only cycle entry at quantum algorithm, then the quantum pool starts getting
159	* interesting.
160	*
161	* A thought - perhaps the handoff approach doesn't work so well in the presence of
162	* non-handoff wakeups i.e. wake other thread then wait then block - doesn't mean that
163	* woken thread will be what I switch to - other processor may have stolen it.
164	* What do we do there?
165	*
166	* Conclusions:
167	* We currently don't know of a scenario where quantum buckets on the task is beneficial.
168	* We will instead handoff quantum between threads in the task, and keep quantum
169	* on the preempted thread if it's preempted by something outside the task.
170	*
171	*/
172
173	#if DEBUG \|\| DEVELOPMENT
174	#define MULTIQ_SANITY_CHECK
175	#endif
176
177	typedef struct sched_entry {
178	queue_chain_t entry_links;
179	int16_t sched_pri; / scheduled (current) priority /
180	int16_t runq;
181	int32_t pad;
182	} *sched_entry_t;
183
184	typedef run_queue_t entry_queue_t; / A run queue that holds sched_entries instead of threads /
185	typedef run_queue_t group_runq_t; / A run queue that is part of a sched_group /
186
187	#define SCHED_ENTRY_NULL ((sched_entry_t) 0)
188	#define MULTIQ_ERUNQ (-4) /* Indicates entry is on the main runq */
189
190	/ Each level in the run queue corresponds to one entry in the entries array /
191	struct sched_group {
192	struct sched_entry entries[NRQS];
193	struct run_queue runq;
194	queue_chain_t sched_groups;
195	};
196
197	/*
198	* Keep entry on the head of the runqueue while dequeueing threads.
199	* Only cycle it to the end of the runqueue when a thread in the task
200	* hits its quantum.
201	*/
202	static boolean_t deep_drain = FALSE;
203
204	/ Verify the consistency of the runq before touching it /
205	static boolean_t multiq_sanity_check = FALSE;
206
207	/*
208	* Draining threads from the current task is preferred
209	* when they're less than X steps below the current
210	* global highest priority
211	*/
212	#define DEFAULT_DRAIN_BAND_LIMIT MAXPRI
213	static integer_t drain_band_limit;
214
215	/*
216	* Don't go below this priority level if there is something above it in another task
217	*/
218	#define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE
219	static integer_t drain_depth_limit;
220
221	/*
222	* Don't favor the task when there's something above this priority in another task.
223	*/
224	#define DEFAULT_DRAIN_CEILING BASEPRI_FOREGROUND
225	static integer_t drain_ceiling;
226
227	static struct zone *sched_group_zone;
228
229	static uint64_t num_sched_groups = `0`;
230	static queue_head_t sched_groups;
231
232	static lck_attr_t sched_groups_lock_attr;
233	static lck_grp_t sched_groups_lock_grp;
234	static lck_grp_attr_t sched_groups_lock_grp_attr;
235
236	static lck_mtx_t sched_groups_lock;
237
238
239	static void
240	sched_multiq_init(void);
241
242	static thread_t
243	sched_multiq_steal_thread(processor_set_t pset);
244
245	static void
246	sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context);
247
248	static boolean_t
249	sched_multiq_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
250
251	static boolean_t
252	sched_multiq_processor_queue_remove(processor_t processor, thread_t thread);
253
254	void
255	sched_multiq_quantum_expire(thread_t thread);
256
257	static ast_t
258	sched_multiq_processor_csw_check(processor_t processor);
259
260	static boolean_t
261	sched_multiq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
262
263	static int
264	sched_multiq_runq_count(processor_t processor);
265
266	static boolean_t
267	sched_multiq_processor_queue_empty(processor_t processor);
268
269	static uint64_t
270	sched_multiq_runq_stats_count_sum(processor_t processor);
271
272	static int
273	sched_multiq_processor_bound_count(processor_t processor);
274
275	static void
276	sched_multiq_pset_init(processor_set_t pset);
277
278	static void
279	sched_multiq_processor_init(processor_t processor);
280
281	static thread_t
282	sched_multiq_choose_thread(processor_t processor, int priority, ast_t reason);
283
284	static void
285	sched_multiq_processor_queue_shutdown(processor_t processor);
286
287	static sched_mode_t
288	sched_multiq_initial_thread_sched_mode(task_t parent_task);
289
290	static bool
291	sched_multiq_thread_avoid_processor(processor_t processor, thread_t thread);
292
293	const struct sched_dispatch_table sched_multiq_dispatch = {
294	.sched_name = "multiq",
295	.init = sched_multiq_init,
296	.timebase_init = sched_timeshare_timebase_init,
297	.processor_init = sched_multiq_processor_init,
298	.pset_init = sched_multiq_pset_init,
299	.maintenance_continuation = sched_timeshare_maintenance_continue,
300	.choose_thread = sched_multiq_choose_thread,
301	.steal_thread_enabled = FALSE,
302	.steal_thread = sched_multiq_steal_thread,
303	.compute_timeshare_priority = sched_compute_timeshare_priority,
304	.choose_processor = choose_processor,
305	.processor_enqueue = sched_multiq_processor_enqueue,
306	.processor_queue_shutdown = sched_multiq_processor_queue_shutdown,
307	.processor_queue_remove = sched_multiq_processor_queue_remove,
308	.processor_queue_empty = sched_multiq_processor_queue_empty,
309	.priority_is_urgent = priority_is_urgent,
310	.processor_csw_check = sched_multiq_processor_csw_check,
311	.processor_queue_has_priority = sched_multiq_processor_queue_has_priority,
312	.initial_quantum_size = sched_timeshare_initial_quantum_size,
313	.initial_thread_sched_mode = sched_multiq_initial_thread_sched_mode,
314	.can_update_priority = can_update_priority,
315	.update_priority = update_priority,
316	.lightweight_update_priority = lightweight_update_priority,
317	.quantum_expire = sched_multiq_quantum_expire,
318	.processor_runq_count = sched_multiq_runq_count,
319	.processor_runq_stats_count_sum = sched_multiq_runq_stats_count_sum,
320	.processor_bound_count = sched_multiq_processor_bound_count,
321	.thread_update_scan = sched_multiq_thread_update_scan,
322	.direct_dispatch_to_idle_processors = FALSE,
323	.multiple_psets_enabled = FALSE,
324	.sched_groups_enabled = TRUE,
325	.avoid_processor_enabled = TRUE,
326	.thread_avoid_processor = sched_multiq_thread_avoid_processor,
327	.processor_balance = sched_SMT_balance,
328
329	.rt_runq = sched_rtglobal_runq,
330	.rt_init = sched_rtglobal_init,
331	.rt_queue_shutdown = sched_rtglobal_queue_shutdown,
332	.rt_runq_scan = sched_rtglobal_runq_scan,
333	.rt_runq_count_sum = sched_rtglobal_runq_count_sum,
334
335	.qos_max_parallelism = sched_qos_max_parallelism,
336	.check_spill = sched_check_spill,
337	.ipi_policy = sched_ipi_policy,
338	.thread_should_yield = sched_thread_should_yield,
339	};
340
341
342	static void
343	sched_multiq_init(void)
344	{
345	#if defined(MULTIQ_SANITY_CHECK)
346	PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check));
347	#endif
348
349	PE_parse_boot_argn("-multiq-deep-drain", &deep_drain, sizeof(deep_drain));
350
351	if (!PE_parse_boot_argn("multiq_drain_ceiling", &drain_ceiling, sizeof(drain_ceiling))) {
352	drain_ceiling = DEFAULT_DRAIN_CEILING;
353	}
354
355	if (!PE_parse_boot_argn("multiq_drain_depth_limit", &drain_depth_limit, sizeof(drain_depth_limit))) {
356	drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT;
357	}
358
359	if (!PE_parse_boot_argn("multiq_drain_band_limit", &drain_band_limit, sizeof(drain_band_limit))) {
360	drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT;
361	}
362
363	printf("multiq scheduler config: deep-drain %d, ceiling %d, depth limit %d, band limit %d, sanity check %d\n",
364	deep_drain, drain_ceiling, drain_depth_limit, drain_band_limit, multiq_sanity_check);
365
366	sched_group_zone = zinit(
367	sizeof(struct sched_group),
368	task_max * sizeof(struct sched_group),
369	PAGE_SIZE,
370	"sched groups");
371
372	zone_change(sched_group_zone, Z_NOENCRYPT, TRUE);
373	zone_change(sched_group_zone, Z_NOCALLOUT, TRUE);
374
375	queue_init(&sched_groups);
376
377	lck_grp_attr_setdefault(&sched_groups_lock_grp_attr);
378	lck_grp_init(&sched_groups_lock_grp, "sched_groups", &sched_groups_lock_grp_attr);
379	lck_attr_setdefault(&sched_groups_lock_attr);
380	lck_mtx_init(&sched_groups_lock, &sched_groups_lock_grp, &sched_groups_lock_attr);
381
382	sched_timeshare_init();
383	}
384
385	static void
386	sched_multiq_processor_init(processor_t processor)
387	{
388	run_queue_init(&processor->runq);
389	}
390
391	static void
392	sched_multiq_pset_init(processor_set_t pset)
393	{
394	run_queue_init(&pset->pset_runq);
395	}
396
397	static sched_mode_t
398	sched_multiq_initial_thread_sched_mode(task_t parent_task)
399	{
400	if (parent_task == kernel_task)
401	return TH_MODE_FIXED;
402	else
403	return TH_MODE_TIMESHARE;
404	}
405
406	sched_group_t
407	sched_group_create(void)
408	{
409	sched_group_t sched_group;
410
411	if (!SCHED(sched_groups_enabled))
412	return SCHED_GROUP_NULL;
413
414	sched_group = (sched_group_t)zalloc(sched_group_zone);
415
416	bzero(sched_group, sizeof(struct sched_group));
417
418	run_queue_init(&sched_group->runq);
419
420	for (int i = `0`; i < NRQS; i++) {
421	sched_group->entries[i].runq = `0`;
422	sched_group->entries[i].sched_pri = i;
423	}
424
425	lck_mtx_lock(&sched_groups_lock);
426	queue_enter(&sched_groups, sched_group, sched_group_t, sched_groups);
427	num_sched_groups++;
428	lck_mtx_unlock(&sched_groups_lock);
429
430	return (sched_group);
431	}
432
433	void
434	sched_group_destroy(sched_group_t sched_group)
435	{
436	if (!SCHED(sched_groups_enabled)) {
437	assert(sched_group == SCHED_GROUP_NULL);
438	return;
439	}
440
441	assert(sched_group != SCHED_GROUP_NULL);
442	assert(sched_group->runq.count == `0`);
443
444	for (int i = `0`; i < NRQS; i++) {
445	assert(sched_group->entries[i].runq == `0`);
446	assert(sched_group->entries[i].sched_pri == i);
447	}
448
449	lck_mtx_lock(&sched_groups_lock);
450	queue_remove(&sched_groups, sched_group, sched_group_t, sched_groups);
451	num_sched_groups--;
452	lck_mtx_unlock(&sched_groups_lock);
453
454	zfree(sched_group_zone, sched_group);
455	}
456
457	__attribute__((always_inline))
458	static inline entry_queue_t
459	multiq_main_entryq(processor_t processor)
460	{
461	return (entry_queue_t)&processor->processor_set->pset_runq;
462	}
463
464	__attribute__((always_inline))
465	static inline run_queue_t
466	multiq_bound_runq(processor_t processor)
467	{
468	return &processor->runq;
469	}
470
471	__attribute__((always_inline))
472	static inline sched_entry_t
473	group_entry_for_pri(sched_group_t group, integer_t pri)
474	{
475	return &group->entries[pri];
476	}
477
478	__attribute__((always_inline))
479	static inline sched_group_t
480	group_for_entry(sched_entry_t entry)
481	{
482	#pragma clang diagnostic push
483	#pragma clang diagnostic ignored "-Wcast-align"
484	sched_group_t group = (sched_group_t)(entry - entry->sched_pri);
485	#pragma clang diagnostic pop
486	return group;
487	}
488
489	/ Peek at the head of the runqueue /
490	static sched_entry_t
491	entry_queue_first_entry(entry_queue_t rq)
492	{
493	assert(rq->count != `0`);
494
495	queue_t queue = &rq->queues[rq->highq];
496
497	sched_entry_t entry = qe_queue_first(queue, struct sched_entry, entry_links);
498
499	assert(entry->sched_pri == rq->highq);
500
501	return entry;
502	}
503
504	#if defined(MULTIQ_SANITY_CHECK)
505
506	#if MACH_ASSERT
507	__attribute__((always_inline))
508	static inline boolean_t
509	queue_chain_linked(queue_chain_t* chain)
510	{
511	if (chain->next != NULL) {
512	assert(chain->prev != NULL);
513	return TRUE;
514	} else {
515	assert(chain->prev == NULL);
516	return FALSE;
517	}
518	}
519	#endif /* MACH_ASSERT */
520
521	static thread_t
522	group_first_thread(sched_group_t group)
523	{
524	group_runq_t rq = &group->runq;
525
526	assert(rq->count != `0`);
527
528	queue_t queue = &rq->queues[rq->highq];
529
530	thread_t thread = qe_queue_first(queue, struct thread, runq_links);
531
532	assert(thread != THREAD_NULL);
533	assert_thread_magic(thread);
534
535	assert(thread->sched_group == group);
536
537	/ TODO: May not be safe /
538	assert(thread->sched_pri == rq->highq);
539
540	return thread;
541	}
542
543	/ Asserts if entry is not in entry runq at pri /
544	static void
545	entry_queue_check_entry(entry_queue_t runq, sched_entry_t entry, int expected_pri)
546	{
547	queue_t q;
548	sched_entry_t elem;
549
550	assert(queue_chain_linked(&entry->entry_links));
551	assert(entry->runq == MULTIQ_ERUNQ);
552
553	q = &runq->queues[expected_pri];
554
555	qe_foreach_element(elem, q, entry_links) {
556	if (elem == entry)
557	return;
558	}
559
560	panic("runq %p doesn't contain entry %p at pri %d", runq, entry, expected_pri);
561	}
562
563	/ Asserts if thread is not in group at its priority /
564	static void
565	sched_group_check_thread(sched_group_t group, thread_t thread)
566	{
567	queue_t q;
568	thread_t elem;
569	int pri = thread->sched_pri;
570
571	assert(thread->runq != PROCESSOR_NULL);
572
573	q = &group->runq.queues[pri];
574
575	qe_foreach_element(elem, q, runq_links) {
576	if (elem == thread)
577	return;
578	}
579
580	panic("group %p doesn't contain thread %p at pri %d", group, thread, pri);
581	}
582
583	static void
584	global_check_entry_queue(entry_queue_t main_entryq)
585	{
586	if (main_entryq->count == `0`)
587	return;
588
589	sched_entry_t entry = entry_queue_first_entry(main_entryq);
590
591	assert(entry->runq == MULTIQ_ERUNQ);
592
593	sched_group_t group = group_for_entry(entry);
594
595	thread_t thread = group_first_thread(group);
596
597	__assert_only sched_entry_t thread_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
598
599	assert(entry->sched_pri == group->runq.highq);
600
601	assert(entry == thread_entry);
602	assert(thread->runq != PROCESSOR_NULL);
603	}
604
605	static void
606	group_check_run_queue(entry_queue_t main_entryq, sched_group_t group)
607	{
608	if (group->runq.count == `0`)
609	return;
610
611	thread_t thread = group_first_thread(group);
612
613	assert(thread->runq != PROCESSOR_NULL);
614
615	sched_entry_t sched_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
616
617	entry_queue_check_entry(main_entryq, sched_entry, thread->sched_pri);
618
619	assert(sched_entry->sched_pri == thread->sched_pri);
620	assert(sched_entry->runq == MULTIQ_ERUNQ);
621	}
622
623	#endif /* defined(MULTIQ_SANITY_CHECK) */
624
625	/*
626	* The run queue must not be empty.
627	*/
628	static sched_entry_t
629	entry_queue_dequeue_entry(entry_queue_t rq)
630	{
631	sched_entry_t sched_entry;
632	queue_t queue = &rq->queues[rq->highq];
633
634	assert(rq->count > `0`);
635	assert(!queue_empty(queue));
636
637	sched_entry = qe_dequeue_head(queue, struct sched_entry, entry_links);
638
639	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
640	rq->count--;
641	if (SCHED(priority_is_urgent)(rq->highq)) {
642	rq->urgency--; assert(rq->urgency >= `0`);
643	}
644	if (queue_empty(queue)) {
645	rq_bitmap_clear(rq->bitmap, rq->highq);
646	rq->highq = bitmap_first(rq->bitmap, NRQS);
647	}
648
649	sched_entry->runq = `0`;
650
651	return (sched_entry);
652	}
653
654	/*
655	* The run queue must not be empty.
656	*/
657	static boolean_t
658	entry_queue_enqueue_entry(
659	entry_queue_t rq,
660	sched_entry_t entry,
661	integer_t options)
662	{
663	int sched_pri = entry->sched_pri;
664	queue_t queue = &rq->queues[sched_pri];
665	boolean_t result = FALSE;
666
667	assert(entry->runq == `0`);
668
669	if (queue_empty(queue)) {
670	enqueue_tail(queue, &entry->entry_links);
671
672	rq_bitmap_set(rq->bitmap, sched_pri);
673	if (sched_pri > rq->highq) {
674	rq->highq = sched_pri;
675	result = TRUE;
676	}
677	} else {
678	if (options & SCHED_TAILQ)
679	enqueue_tail(queue, &entry->entry_links);
680	else
681	enqueue_head(queue, &entry->entry_links);
682	}
683	if (SCHED(priority_is_urgent)(sched_pri))
684	rq->urgency++;
685	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
686	rq->count++;
687
688	entry->runq = MULTIQ_ERUNQ;
689
690	return (result);
691	}
692
693	/*
694	* The entry must be in this runqueue.
695	*/
696	static void
697	entry_queue_remove_entry(
698	entry_queue_t rq,
699	sched_entry_t entry)
700	{
701	int sched_pri = entry->sched_pri;
702
703	#if defined(MULTIQ_SANITY_CHECK)
704	if (multiq_sanity_check) {
705	entry_queue_check_entry(rq, entry, sched_pri);
706	}
707	#endif
708
709	remqueue(&entry->entry_links);
710
711	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
712	rq->count--;
713	if (SCHED(priority_is_urgent)(sched_pri)) {
714	rq->urgency--; assert(rq->urgency >= `0`);
715	}
716
717	if (queue_empty(&rq->queues[sched_pri])) {
718	/ update run queue status /
719	rq_bitmap_clear(rq->bitmap, sched_pri);
720	rq->highq = bitmap_first(rq->bitmap, NRQS);
721	}
722
723	entry->runq = `0`;
724	}
725
726	static void
727	entry_queue_change_entry(
728	entry_queue_t rq,
729	sched_entry_t entry,
730	integer_t options)
731	{
732	int sched_pri = entry->sched_pri;
733	queue_t queue = &rq->queues[sched_pri];
734
735	#if defined(MULTIQ_SANITY_CHECK)
736	if (multiq_sanity_check) {
737	entry_queue_check_entry(rq, entry, sched_pri);
738	}
739	#endif
740
741	if (options & SCHED_TAILQ)
742	re_queue_tail(queue, &entry->entry_links);
743	else
744	re_queue_head(queue, &entry->entry_links);
745	}
746	/*
747	* The run queue must not be empty.
748	*
749	* sets queue_empty to TRUE if queue is now empty at thread_pri
750	*/
751	static thread_t
752	group_run_queue_dequeue_thread(
753	group_runq_t rq,
754	integer_t *thread_pri,
755	boolean_t *queue_empty)
756	{
757	thread_t thread;
758	queue_t queue = &rq->queues[rq->highq];
759
760	assert(rq->count > `0`);
761	assert(!queue_empty(queue));
762
763	*thread_pri = rq->highq;
764
765	thread = qe_dequeue_head(queue, struct thread, runq_links);
766	assert_thread_magic(thread);
767
768	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
769	rq->count--;
770	if (SCHED(priority_is_urgent)(rq->highq)) {
771	rq->urgency--; assert(rq->urgency >= `0`);
772	}
773	if (queue_empty(queue)) {
774	rq_bitmap_clear(rq->bitmap, rq->highq);
775	rq->highq = bitmap_first(rq->bitmap, NRQS);
776	*queue_empty = TRUE;
777	} else {
778	*queue_empty = FALSE;
779	}
780
781	return thread;
782	}
783
784	/*
785	* The run queue must not be empty.
786	* returns TRUE if queue was empty at thread_pri
787	*/
788	static boolean_t
789	group_run_queue_enqueue_thread(
790	group_runq_t rq,
791	thread_t thread,
792	integer_t thread_pri,
793	integer_t options)
794	{
795	queue_t queue = &rq->queues[thread_pri];
796	boolean_t result = FALSE;
797
798	assert(thread->runq == PROCESSOR_NULL);
799	assert_thread_magic(thread);
800
801	if (queue_empty(queue)) {
802	enqueue_tail(queue, &thread->runq_links);
803
804	rq_bitmap_set(rq->bitmap, thread_pri);
805	if (thread_pri > rq->highq) {
806	rq->highq = thread_pri;
807	}
808	result = TRUE;
809	} else {
810	if (options & SCHED_TAILQ)
811	enqueue_tail(queue, &thread->runq_links);
812	else
813	enqueue_head(queue, &thread->runq_links);
814	}
815	if (SCHED(priority_is_urgent)(thread_pri))
816	rq->urgency++;
817	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
818	rq->count++;
819
820	return (result);
821	}
822
823	/*
824	* The thread must be in this runqueue.
825	* returns TRUE if queue is now empty at thread_pri
826	*/
827	static boolean_t
828	group_run_queue_remove_thread(
829	group_runq_t rq,
830	thread_t thread,
831	integer_t thread_pri)
832	{
833	boolean_t result = FALSE;
834
835	assert_thread_magic(thread);
836	assert(thread->runq != PROCESSOR_NULL);
837
838	remqueue(&thread->runq_links);
839
840	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
841	rq->count--;
842	if (SCHED(priority_is_urgent)(thread_pri)) {
843	rq->urgency--; assert(rq->urgency >= `0`);
844	}
845
846	if (queue_empty(&rq->queues[thread_pri])) {
847	/ update run queue status /
848	rq_bitmap_clear(rq->bitmap, thread_pri);
849	rq->highq = bitmap_first(rq->bitmap, NRQS);
850	result = TRUE;
851	}
852
853	thread->runq = PROCESSOR_NULL;
854
855	return result;
856	}
857
858	/*
859	* A thread's sched pri may change out from under us because
860	* we're clearing thread->runq here without the thread locked.
861	* Do not rely on it to be the same as when we enqueued.
862	*/
863	static thread_t
864	sched_global_dequeue_thread(entry_queue_t main_entryq)
865	{
866	boolean_t pri_level_empty = FALSE;
867	sched_entry_t entry;
868	group_runq_t group_runq;
869	thread_t thread;
870	integer_t thread_pri;
871	sched_group_t group;
872
873	assert(main_entryq->count > `0`);
874
875	entry = entry_queue_dequeue_entry(main_entryq);
876
877	group = group_for_entry(entry);
878	group_runq = &group->runq;
879
880	thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
881
882	thread->runq = PROCESSOR_NULL;
883
884	if (!pri_level_empty) {
885	entry_queue_enqueue_entry(main_entryq, entry, SCHED_TAILQ);
886	}
887
888	return thread;
889	}
890
891	/ Dequeue a thread from the global runq without moving the entry /
892	static thread_t
893	sched_global_deep_drain_dequeue_thread(entry_queue_t main_entryq)
894	{
895	boolean_t pri_level_empty = FALSE;
896	sched_entry_t entry;
897	group_runq_t group_runq;
898	thread_t thread;
899	integer_t thread_pri;
900	sched_group_t group;
901
902	assert(main_entryq->count > `0`);
903
904	entry = entry_queue_first_entry(main_entryq);
905
906	group = group_for_entry(entry);
907	group_runq = &group->runq;
908
909	thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
910
911	thread->runq = PROCESSOR_NULL;
912
913	if (pri_level_empty) {
914	entry_queue_remove_entry(main_entryq, entry);
915	}
916
917	return thread;
918	}
919
920
921	static thread_t
922	sched_group_dequeue_thread(
923	entry_queue_t main_entryq,
924	sched_group_t group)
925	{
926	group_runq_t group_runq = &group->runq;
927	boolean_t pri_level_empty = FALSE;
928	thread_t thread;
929	integer_t thread_pri;
930
931	thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
932
933	thread->runq = PROCESSOR_NULL;
934
935	if (pri_level_empty) {
936	entry_queue_remove_entry(main_entryq, group_entry_for_pri(group, thread_pri));
937	}
938
939	return thread;
940	}
941
942	static void
943	sched_group_remove_thread(
944	entry_queue_t main_entryq,
945	sched_group_t group,
946	thread_t thread)
947	{
948	integer_t thread_pri = thread->sched_pri;
949	sched_entry_t sched_entry = group_entry_for_pri(group, thread_pri);
950
951	#if defined(MULTIQ_SANITY_CHECK)
952	if (multiq_sanity_check) {
953	global_check_entry_queue(main_entryq);
954	group_check_run_queue(main_entryq, group);
955
956	sched_group_check_thread(group, thread);
957	entry_queue_check_entry(main_entryq, sched_entry, thread_pri);
958	}
959	#endif
960
961	boolean_t pri_level_empty = group_run_queue_remove_thread(&group->runq, thread, thread_pri);
962
963	if (pri_level_empty) {
964	entry_queue_remove_entry(main_entryq, sched_entry);
965	}
966
967	#if defined(MULTIQ_SANITY_CHECK)
968	if (multiq_sanity_check) {
969	global_check_entry_queue(main_entryq);
970	group_check_run_queue(main_entryq, group);
971	}
972	#endif
973	}
974
975	static void
976	sched_group_enqueue_thread(
977	entry_queue_t main_entryq,
978	sched_group_t group,
979	thread_t thread,
980	integer_t options)
981	{
982	#if defined(MULTIQ_SANITY_CHECK)
983	if (multiq_sanity_check) {
984	global_check_entry_queue(main_entryq);
985	group_check_run_queue(main_entryq, group);
986	}
987	#endif
988
989	int sched_pri = thread->sched_pri;
990
991	boolean_t pri_level_was_empty = group_run_queue_enqueue_thread(&group->runq, thread, sched_pri, options);
992
993	if (pri_level_was_empty) {
994	/*
995	* TODO: Need to figure out if passing options here is a good idea or not
996	* What effects would it have?
997	*/
998	entry_queue_enqueue_entry(main_entryq, &group->entries[sched_pri], options);
999	} else if (options & SCHED_HEADQ) {
1000	/ The thread should be at the head of the line - move its entry to the front /
1001	entry_queue_change_entry(main_entryq, &group->entries[sched_pri], options);
1002	}
1003	}
1004
1005	/*
1006	* Locate a thread to execute from the run queue and return it.
1007	* Only choose a thread with greater or equal priority.
1008	*
1009	* pset is locked, thread is not locked.
1010	*
1011	* Returns THREAD_NULL if it cannot find a valid thread.
1012	*
1013	* Note: we cannot rely on the value of thread->sched_pri in this path because
1014	* we don't have the thread locked.
1015	*
1016	* TODO: Remove tracepoints
1017	*/
1018	static thread_t
1019	sched_multiq_choose_thread(
1020	processor_t processor,
1021	int priority,
1022	ast_t reason)
1023	{
1024	entry_queue_t main_entryq = multiq_main_entryq(processor);
1025	run_queue_t bound_runq = multiq_bound_runq(processor);
1026
1027	boolean_t choose_bound_runq = FALSE;
1028
1029	if (bound_runq->highq < priority &&
1030	main_entryq->highq < priority)
1031	return THREAD_NULL;
1032
1033	if (bound_runq->count && main_entryq->count) {
1034	if (bound_runq->highq >= main_entryq->highq) {
1035	choose_bound_runq = TRUE;
1036	} else {
1037	/ Use main runq /
1038	}
1039	} else if (bound_runq->count) {
1040	choose_bound_runq = TRUE;
1041	} else if (main_entryq->count) {
1042	/ Use main runq /
1043	} else {
1044	return (THREAD_NULL);
1045	}
1046
1047	if (choose_bound_runq) {
1048	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1049	MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) \| DBG_FUNC_NONE,
1050	MACH_MULTIQ_BOUND, main_entryq->highq, bound_runq->highq, `0`, `0`);
1051
1052	return run_queue_dequeue(bound_runq, SCHED_HEADQ);
1053	}
1054
1055	sched_group_t group = current_thread()->sched_group;
1056
1057	#if defined(MULTIQ_SANITY_CHECK)
1058	if (multiq_sanity_check) {
1059	global_check_entry_queue(main_entryq);
1060	group_check_run_queue(main_entryq, group);
1061	}
1062	#endif
1063
1064	/*
1065	* Determine if we should look at the group or the global queue
1066	*
1067	* TODO:
1068	* Perhaps pass reason as a 'should look inside' argument to choose_thread
1069	* Should YIELD AST override drain limit?
1070	*/
1071	if (group->runq.count != `0` && (reason & AST_PREEMPTION) == `0`) {
1072	boolean_t favor_group = TRUE;
1073
1074	integer_t global_pri = main_entryq->highq;
1075	integer_t group_pri = group->runq.highq;
1076
1077	/*
1078	* Favor the current group if the group is still the globally highest.
1079	*
1080	* Otherwise, consider choosing a thread from the current group
1081	* even if it's lower priority than the global highest priority.
1082	*/
1083	if (global_pri > group_pri) {
1084	/*
1085	* If there's something elsewhere above the depth limit,
1086	* don't pick a thread below the limit.
1087	*/
1088	if (global_pri > drain_depth_limit && group_pri <= drain_depth_limit)
1089	favor_group = FALSE;
1090
1091	/*
1092	* If there's something at or above the ceiling,
1093	* don't favor the group.
1094	*/
1095	if (global_pri >= drain_ceiling)
1096	favor_group = FALSE;
1097
1098	/*
1099	* Don't go more than X steps below the global highest
1100	*/
1101	if ((global_pri - group_pri) >= drain_band_limit)
1102	favor_group = FALSE;
1103	}
1104
1105	if (favor_group) {
1106	/ Pull from local runq /
1107	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1108	MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) \| DBG_FUNC_NONE,
1109	MACH_MULTIQ_GROUP, global_pri, group_pri, `0`, `0`);
1110
1111	return sched_group_dequeue_thread(main_entryq, group);
1112	}
1113	}
1114
1115	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1116	MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) \| DBG_FUNC_NONE,
1117	MACH_MULTIQ_GLOBAL, main_entryq->highq, group->runq.highq, `0`, `0`);
1118
1119	/ Couldn't pull from local runq, pull from global runq instead /
1120	if (deep_drain) {
1121	return sched_global_deep_drain_dequeue_thread(main_entryq);
1122	} else {
1123	return sched_global_dequeue_thread(main_entryq);
1124	}
1125	}
1126
1127
1128	/*
1129	* Thread must be locked, and not already be on a run queue.
1130	* pset is locked.
1131	*/
1132	static boolean_t
1133	sched_multiq_processor_enqueue(
1134	processor_t processor,
1135	thread_t thread,
1136	integer_t options)
1137	{
1138	boolean_t result;
1139
1140	assert(processor == thread->chosen_processor);
1141
1142	if (thread->bound_processor != PROCESSOR_NULL) {
1143	assert(thread->bound_processor == processor);
1144
1145	result = run_queue_enqueue(multiq_bound_runq(processor), thread, options);
1146	thread->runq = processor;
1147
1148	return result;
1149	}
1150
1151	sched_group_enqueue_thread(multiq_main_entryq(processor),
1152	thread->sched_group,
1153	thread, options);
1154
1155	thread->runq = processor;
1156
1157	return (FALSE);
1158	}
1159
1160	/*
1161	* Called in the context of thread with thread and pset unlocked,
1162	* after updating thread priority but before propagating that priority
1163	* to the processor
1164	*/
1165	void
1166	sched_multiq_quantum_expire(thread_t thread)
1167	{
1168	if (deep_drain) {
1169	/*
1170	* Move the entry at this priority to the end of the queue,
1171	* to allow the next task a shot at running.
1172	*/
1173
1174	processor_t processor = thread->last_processor;
1175	processor_set_t pset = processor->processor_set;
1176	entry_queue_t entryq = multiq_main_entryq(processor);
1177
1178	pset_lock(pset);
1179
1180	sched_entry_t entry = group_entry_for_pri(thread->sched_group, processor->current_pri);
1181
1182	if (entry->runq == MULTIQ_ERUNQ) {
1183	entry_queue_change_entry(entryq, entry, SCHED_TAILQ);
1184	}
1185
1186	pset_unlock(pset);
1187	}
1188	}
1189
1190	static boolean_t
1191	sched_multiq_processor_queue_empty(processor_t processor)
1192	{
1193	return multiq_main_entryq(processor)->count == `0` &&
1194	multiq_bound_runq(processor)->count == `0`;
1195	}
1196
1197	static ast_t
1198	sched_multiq_processor_csw_check(processor_t processor)
1199	{
1200	boolean_t has_higher;
1201	int pri;
1202
1203	if (sched_multiq_thread_avoid_processor(processor, current_thread())) {
1204	return (AST_PREEMPT \| AST_URGENT);
1205	}
1206
1207	entry_queue_t main_entryq = multiq_main_entryq(processor);
1208	run_queue_t bound_runq = multiq_bound_runq(processor);
1209
1210	assert(processor->active_thread != NULL);
1211
1212	pri = MAX(main_entryq->highq, bound_runq->highq);
1213
1214	if (processor->first_timeslice) {
1215	has_higher = (pri > processor->current_pri);
1216	} else {
1217	has_higher = (pri >= processor->current_pri);
1218	}
1219
1220	if (has_higher) {
1221	if (main_entryq->urgency > `0`)
1222	return (AST_PREEMPT \| AST_URGENT);
1223
1224	if (bound_runq->urgency > `0`)
1225	return (AST_PREEMPT \| AST_URGENT);
1226
1227	return AST_PREEMPT;
1228	}
1229
1230	return AST_NONE;
1231	}
1232
1233	static boolean_t
1234	sched_multiq_processor_queue_has_priority(
1235	processor_t processor,
1236	int priority,
1237	boolean_t gte)
1238	{
1239	run_queue_t main_runq = multiq_main_entryq(processor);
1240	run_queue_t bound_runq = multiq_bound_runq(processor);
1241
1242	int qpri = MAX(main_runq->highq, bound_runq->highq);
1243
1244	if (gte)
1245	return qpri >= priority;
1246	else
1247	return qpri > priority;
1248	}
1249
1250	static int
1251	sched_multiq_runq_count(processor_t processor)
1252	{
1253	/*
1254	* TODO: Decide whether to keep a count of runnable threads in the pset
1255	* or just return something less than the true count.
1256	*
1257	* This needs to be fast, so no iterating the whole runq.
1258	*
1259	* Another possible decision is to remove this - with global runq
1260	* it doesn't make much sense.
1261	*/
1262	return multiq_main_entryq(processor)->count + multiq_bound_runq(processor)->count;
1263	}
1264
1265	static uint64_t
1266	sched_multiq_runq_stats_count_sum(processor_t processor)
1267	{
1268	/*
1269	* TODO: This one does need to go through all the runqueues, but it's only needed for
1270	* the sched stats tool
1271	*/
1272
1273	uint64_t bound_sum = multiq_bound_runq(processor)->runq_stats.count_sum;
1274
1275	if (processor->cpu_id == processor->processor_set->cpu_set_low)
1276	return bound_sum + multiq_main_entryq(processor)->runq_stats.count_sum;
1277	else
1278	return bound_sum;
1279	}
1280
1281	static int
1282	sched_multiq_processor_bound_count(processor_t processor)
1283	{
1284	return multiq_bound_runq(processor)->count;
1285	}
1286
1287	static void
1288	sched_multiq_processor_queue_shutdown(processor_t processor)
1289	{
1290	processor_set_t pset = processor->processor_set;
1291	entry_queue_t main_entryq = multiq_main_entryq(processor);
1292	thread_t thread;
1293	queue_head_t tqueue;
1294
1295	/ We only need to migrate threads if this is the last active processor in the pset /
1296	if (pset->online_processor_count > `0`) {
1297	pset_unlock(pset);
1298	return;
1299	}
1300
1301	queue_init(&tqueue);
1302
1303	/ Note that we do not remove bound threads from the queues here /
1304
1305	while (main_entryq->count > `0`) {
1306	thread = sched_global_dequeue_thread(main_entryq);
1307	enqueue_tail(&tqueue, &thread->runq_links);
1308	}
1309
1310	pset_unlock(pset);
1311
1312	qe_foreach_element_safe(thread, &tqueue, runq_links) {
1313
1314	remqueue(&thread->runq_links);
1315
1316	thread_lock(thread);
1317
1318	thread_setrun(thread, SCHED_TAILQ);
1319
1320	thread_unlock(thread);
1321	}
1322	}
1323
1324	/*
1325	* Thread is locked
1326	*
1327	* This is why we can never read sched_pri unless we have the thread locked.
1328	* Which we do in the enqueue and remove cases, but not the dequeue case.
1329	*/
1330	static boolean_t
1331	sched_multiq_processor_queue_remove(
1332	processor_t processor,
1333	thread_t thread)
1334	{
1335	boolean_t removed = FALSE;
1336	processor_set_t pset = processor->processor_set;
1337
1338	pset_lock(pset);
1339
1340	if (thread->runq != PROCESSOR_NULL) {
1341	/*
1342	* Thread is on a run queue and we have a lock on
1343	* that run queue.
1344	*/
1345
1346	assert(thread->runq == processor);
1347
1348	if (thread->bound_processor != PROCESSOR_NULL) {
1349	assert(processor == thread->bound_processor);
1350	run_queue_remove(multiq_bound_runq(processor), thread);
1351	thread->runq = PROCESSOR_NULL;
1352	} else {
1353	sched_group_remove_thread(multiq_main_entryq(processor),
1354	thread->sched_group,
1355	thread);
1356	}
1357
1358	removed = TRUE;
1359	}
1360
1361	pset_unlock(pset);
1362
1363	return removed;
1364	}
1365
1366	/ pset is locked, returned unlocked /
1367	static thread_t
1368	sched_multiq_steal_thread(processor_set_t pset)
1369	{
1370	pset_unlock(pset);
1371	return (THREAD_NULL);
1372	}
1373
1374	/*
1375	* Scan the global queue for candidate groups, and scan those groups for
1376	* candidate threads.
1377	*
1378	* TODO: This iterates every group runq in its entirety for each entry it has in the runq, which is O(N^2)
1379	* Instead, iterate only the queue in the group runq matching the priority of the entry.
1380	*
1381	* Returns TRUE if retry is needed.
1382	*/
1383	static boolean_t
1384	group_scan(entry_queue_t runq, sched_update_scan_context_t scan_context) {
1385	int count = runq->count;
1386	int queue_index;
1387
1388	assert(count >= `0`);
1389
1390	if (count == `0`)
1391	return FALSE;
1392
1393	for (queue_index = bitmap_first(runq->bitmap, NRQS);
1394	queue_index >= `0`;
1395	queue_index = bitmap_next(runq->bitmap, queue_index)) {
1396
1397	sched_entry_t entry;
1398
1399	qe_foreach_element(entry, &runq->queues[queue_index], entry_links) {
1400	assert(count > `0`);
1401
1402	sched_group_t group = group_for_entry(entry);
1403	if (group->runq.count > `0`) {
1404	if (runq_scan(&group->runq, scan_context))
1405	return (TRUE);
1406	}
1407	count--;
1408	}
1409	}
1410
1411	return (FALSE);
1412	}
1413
1414	static void
1415	sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context)
1416	{
1417	boolean_t restart_needed = FALSE;
1418	processor_t processor = processor_list;
1419	processor_set_t pset;
1420	thread_t thread;
1421	spl_t s;
1422
1423	/*
1424	* We update the threads associated with each processor (bound and idle threads)
1425	* and then update the threads in each pset runqueue.
1426	*/
1427
1428	do {
1429	do {
1430	pset = processor->processor_set;
1431
1432	s = splsched();
1433	pset_lock(pset);
1434
1435	restart_needed = runq_scan(multiq_bound_runq(processor), scan_context);
1436
1437	pset_unlock(pset);
1438	splx(s);
1439
1440	if (restart_needed)
1441	break;
1442
1443	thread = processor->idle_thread;
1444	if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
1445	if (thread_update_add_thread(thread) == FALSE) {
1446	restart_needed = TRUE;
1447	break;
1448	}
1449	}
1450	} while ((processor = processor->processor_list) != NULL);
1451
1452	/ Ok, we now have a collection of candidates -- fix them. /
1453	thread_update_process_threads();
1454
1455	} while (restart_needed);
1456
1457	pset = &pset0;
1458
1459	do {
1460	do {
1461	s = splsched();
1462	pset_lock(pset);
1463
1464	restart_needed = group_scan(&pset->pset_runq, scan_context);
1465
1466	pset_unlock(pset);
1467	splx(s);
1468
1469	if (restart_needed)
1470	break;
1471	} while ((pset = pset->pset_list) != NULL);
1472
1473	/ Ok, we now have a collection of candidates -- fix them. /
1474	thread_update_process_threads();
1475
1476	} while (restart_needed);
1477	}
1478
1479	extern int sched_allow_rt_smt;
1480
1481	/ Return true if this thread should not continue running on this processor /
1482	static bool
1483	sched_multiq_thread_avoid_processor(processor_t processor, thread_t thread)
1484	{
1485	if (processor->processor_primary != processor) {
1486	/*
1487	* This is a secondary SMT processor. If the primary is running
1488	* a realtime thread, only allow realtime threads on the secondary.
1489	*/
1490	if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) \|\| !sched_allow_rt_smt)) {
1491	return true;
1492	}
1493	}
1494
1495	return false;
1496	}
1497

Browse the source code of codebrowser/osfmk/kern/sched_multiq.c