locks.c source code [codebrowser/osfmk/kern/locks.c]

1	/*
2	* Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56
57	#define ATOMIC_PRIVATE 1
58	#define LOCK_PRIVATE 1
59
60	#include <mach_ldebug.h>
61	#include <debug.h>
62
63	#include <mach/kern_return.h>
64	#include <mach/mach_host_server.h>
65	#include <mach_debug/lockgroup_info.h>
66
67	#include <kern/locks.h>
68	#include <kern/misc_protos.h>
69	#include <kern/kalloc.h>
70	#include <kern/thread.h>
71	#include <kern/processor.h>
72	#include <kern/sched_prim.h>
73	#include <kern/debug.h>
74	#include <libkern/section_keywords.h>
75	#include <machine/atomic.h>
76	#include <machine/machine_cpu.h>
77	#include <string.h>
78
79	#include <sys/kdebug.h>
80
81	#if CONFIG_DTRACE
82	/*
83	* We need only enough declarations from the BSD-side to be able to
84	* test if our probe is active, and to call __dtrace_probe(). Setting
85	* NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86	*/
87	#define NEED_DTRACE_DEFS
88	#include <../bsd/sys/lockstat.h>
89	#endif
90
91	#define LCK_MTX_SLEEP_CODE 0
92	#define LCK_MTX_SLEEP_DEADLINE_CODE 1
93	#define LCK_MTX_LCK_WAIT_CODE 2
94	#define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
96	#if MACH_LDEBUG
97	#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98	#else
99	#define ALIGN_TEST(p,t) do{}while(0)
100	#endif
101
102	/ Silence the volatile to _Atomic cast warning /
103	#define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105	/ Enforce program order of loads and stores. /
106	#define ordered_load(target, type) \
107	__c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108	#define ordered_store(target, type, value) \
109	__c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111	#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112	#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114	#define NOINLINE __attribute__((noinline))
115
116
117	static queue_head_t lck_grp_queue;
118	static unsigned int lck_grp_cnt;
119
120	decl_lck_mtx_data(static,lck_grp_lock)
121	static lck_mtx_ext_t lck_grp_lock_ext;
122
123	SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
124
125	lck_grp_attr_t LockDefaultGroupAttr;
126	lck_grp_t LockCompatGroup;
127	lck_attr_t LockDefaultLckAttr;
128
129	#if CONFIG_DTRACE && __SMP__
130	#if defined (__x86_64__)
131	uint64_t dtrace_spin_threshold = `500`; // 500ns
132	#elif defined(__arm__) \|\| defined(__arm64__)
133	uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / `1000000`; // 500ns
134	#endif
135	#endif
136
137	uintptr_t
138	unslide_for_kdebug(void* object) {
139	if (__improbable(kdebug_enable))
140	return VM_KERNEL_UNSLIDE_OR_PERM(object);
141	else
142	return `0`;
143	}
144
145	/*
146	* Routine: lck_mod_init
147	*/
148
149	void
150	lck_mod_init(
151	void)
152	{
153	/*
154	* Obtain "lcks" options:this currently controls lock statistics
155	*/
156	if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
157	LcksOpts = `0`;
158
159
160	#if (DEVELOPMENT \|\| DEBUG) && defined(__x86_64__)
161	if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof (LckDisablePreemptCheck)))
162	LckDisablePreemptCheck = `0`;
163	#endif /* (DEVELOPMENT \|\| DEBUG) && defined(__x86_64__) */
164
165	queue_init(&lck_grp_queue);
166
167	/*
168	* Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
169	* grabbing the lck_grp_lock before it is initialized.
170	*/
171
172	bzero(&LockCompatGroup, sizeof(lck_grp_t));
173	(void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
174
175	if (LcksOpts & enaLkStat)
176	LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
177	else
178	LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
179
180	LockCompatGroup.lck_grp_refcnt = `1`;
181
182	enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
183	lck_grp_cnt = `1`;
184
185	lck_grp_attr_setdefault(&LockDefaultGroupAttr);
186	lck_attr_setdefault(&LockDefaultLckAttr);
187
188	lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
189	}
190
191	/*
192	* Routine: lck_grp_attr_alloc_init
193	*/
194
195	lck_grp_attr_t *
196	lck_grp_attr_alloc_init(
197	void)
198	{
199	lck_grp_attr_t *attr;
200
201	if ((attr = (lck_grp_attr_t )kalloc(sizeof*(lck_grp_attr_t))) != `0`)
202	lck_grp_attr_setdefault(attr);
203
204	return(attr);
205	}
206
207
208	/*
209	* Routine: lck_grp_attr_setdefault
210	*/
211
212	void
213	lck_grp_attr_setdefault(
214	lck_grp_attr_t *attr)
215	{
216	if (LcksOpts & enaLkStat)
217	attr->grp_attr_val = LCK_GRP_ATTR_STAT;
218	else
219	attr->grp_attr_val = `0`;
220	}
221
222
223	/*
224	* Routine: lck_grp_attr_setstat
225	*/
226
227	void
228	lck_grp_attr_setstat(
229	lck_grp_attr_t *attr)
230	{
231	(void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
232	}
233
234
235	/*
236	* Routine: lck_grp_attr_free
237	*/
238
239	void
240	lck_grp_attr_free(
241	lck_grp_attr_t *attr)
242	{
243	kfree(attr, sizeof(lck_grp_attr_t));
244	}
245
246
247	/*
248	* Routine: lck_grp_alloc_init
249	*/
250
251	lck_grp_t *
252	lck_grp_alloc_init(
253	const char* grp_name,
254	lck_grp_attr_t *attr)
255	{
256	lck_grp_t *grp;
257
258	if ((grp = (lck_grp_t )kalloc(sizeof*(lck_grp_t))) != `0`)
259	lck_grp_init(grp, grp_name, attr);
260
261	return(grp);
262	}
263
264	/*
265	* Routine: lck_grp_init
266	*/
267
268	void
269	lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
270	{
271	/ make sure locking infrastructure has been initialized /
272	assert(lck_grp_cnt > `0`);
273
274	bzero((void )grp, sizeof*(lck_grp_t));
275
276	(void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
277
278	if (attr != LCK_GRP_ATTR_NULL)
279	grp->lck_grp_attr = attr->grp_attr_val;
280	else if (LcksOpts & enaLkStat)
281	grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
282	else
283	grp->lck_grp_attr = LCK_ATTR_NONE;
284
285	grp->lck_grp_refcnt = `1`;
286
287	lck_mtx_lock(&lck_grp_lock);
288	enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
289	lck_grp_cnt++;
290	lck_mtx_unlock(&lck_grp_lock);
291	}
292
293	/*
294	* Routine: lck_grp_free
295	*/
296
297	void
298	lck_grp_free(
299	lck_grp_t *grp)
300	{
301	lck_mtx_lock(&lck_grp_lock);
302	lck_grp_cnt--;
303	(void)remque((queue_entry_t)grp);
304	lck_mtx_unlock(&lck_grp_lock);
305	lck_grp_deallocate(grp);
306	}
307
308
309	/*
310	* Routine: lck_grp_reference
311	*/
312
313	void
314	lck_grp_reference(
315	lck_grp_t *grp)
316	{
317	(void)hw_atomic_add(&grp->lck_grp_refcnt, `1`);
318	}
319
320
321	/*
322	* Routine: lck_grp_deallocate
323	*/
324
325	void
326	lck_grp_deallocate(
327	lck_grp_t *grp)
328	{
329	if (hw_atomic_sub(&grp->lck_grp_refcnt, `1`) == `0`)
330	kfree(grp, sizeof(lck_grp_t));
331	}
332
333	/*
334	* Routine: lck_grp_lckcnt_incr
335	*/
336
337	void
338	lck_grp_lckcnt_incr(
339	lck_grp_t *grp,
340	lck_type_t lck_type)
341	{
342	unsigned int *lckcnt;
343
344	switch (lck_type) {
345	case LCK_TYPE_SPIN:
346	lckcnt = &grp->lck_grp_spincnt;
347	break;
348	case LCK_TYPE_MTX:
349	lckcnt = &grp->lck_grp_mtxcnt;
350	break;
351	case LCK_TYPE_RW:
352	lckcnt = &grp->lck_grp_rwcnt;
353	break;
354	default:
355	return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
356	}
357
358	(void)hw_atomic_add(lckcnt, `1`);
359	}
360
361	/*
362	* Routine: lck_grp_lckcnt_decr
363	*/
364
365	void
366	lck_grp_lckcnt_decr(
367	lck_grp_t *grp,
368	lck_type_t lck_type)
369	{
370	unsigned int *lckcnt;
371	int updated;
372
373	switch (lck_type) {
374	case LCK_TYPE_SPIN:
375	lckcnt = &grp->lck_grp_spincnt;
376	break;
377	case LCK_TYPE_MTX:
378	lckcnt = &grp->lck_grp_mtxcnt;
379	break;
380	case LCK_TYPE_RW:
381	lckcnt = &grp->lck_grp_rwcnt;
382	break;
383	default:
384	panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
385	return;
386	}
387
388	updated = (int)hw_atomic_sub(lckcnt, `1`);
389	assert(updated >= `0`);
390	}
391
392	/*
393	* Routine: lck_attr_alloc_init
394	*/
395
396	lck_attr_t *
397	lck_attr_alloc_init(
398	void)
399	{
400	lck_attr_t *attr;
401
402	if ((attr = (lck_attr_t )kalloc(sizeof*(lck_attr_t))) != `0`)
403	lck_attr_setdefault(attr);
404
405	return(attr);
406	}
407
408
409	/*
410	* Routine: lck_attr_setdefault
411	*/
412
413	void
414	lck_attr_setdefault(
415	lck_attr_t *attr)
416	{
417	#if __arm__ \|\| __arm64__
418	/ <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm /
419	attr->lck_attr_val = LCK_ATTR_NONE;
420	#elif __i386__ \|\| __x86_64__
421	#if !DEBUG
422	if (LcksOpts & enaLkDeb)
423	attr->lck_attr_val = LCK_ATTR_DEBUG;
424	else
425	attr->lck_attr_val = LCK_ATTR_NONE;
426	#else
427	attr->lck_attr_val = LCK_ATTR_DEBUG;
428	#endif /* !DEBUG */
429	#else
430	#error Unknown architecture.
431	#endif /* __arm__ */
432	}
433
434
435	/*
436	* Routine: lck_attr_setdebug
437	*/
438	void
439	lck_attr_setdebug(
440	lck_attr_t *attr)
441	{
442	(void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
443	}
444
445	/*
446	* Routine: lck_attr_setdebug
447	*/
448	void
449	lck_attr_cleardebug(
450	lck_attr_t *attr)
451	{
452	(void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
453	}
454
455
456	/*
457	* Routine: lck_attr_rw_shared_priority
458	*/
459	void
460	lck_attr_rw_shared_priority(
461	lck_attr_t *attr)
462	{
463	(void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
464	}
465
466
467	/*
468	* Routine: lck_attr_free
469	*/
470	void
471	lck_attr_free(
472	lck_attr_t *attr)
473	{
474	kfree(attr, sizeof(lck_attr_t));
475	}
476
477	/*
478	* Routine: hw_lock_init
479	*
480	* Initialize a hardware lock.
481	*/
482	void
483	hw_lock_init(hw_lock_t lock)
484	{
485	ordered_store_hw(lock, `0`);
486	}
487
488	/*
489	* Routine: hw_lock_lock_contended
490	*
491	* Spin until lock is acquired or timeout expires.
492	* timeout is in mach_absolute_time ticks. Called with
493	* preemption disabled.
494	*/
495
496	#if __SMP__
497	static unsigned int NOINLINE
498	hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
499	{
500	uint64_t end = `0`;
501	uintptr_t holder = lock->lock_data;
502	int i;
503
504	if (timeout == `0`)
505	timeout = LOCK_PANIC_TIMEOUT;
506	#if CONFIG_DTRACE
507	uint64_t begin;
508	boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != `0`;
509	if (__improbable(dtrace_enabled))
510	begin = mach_absolute_time();
511	#endif
512	for ( ; ; ) {
513	for (i = `0`; i < LOCK_SNOOP_SPINS; i++) {
514	cpu_pause();
515	#if (!__ARM_ENABLE_WFE_) \|\| (LOCK_PRETEST)
516	holder = ordered_load_hw(lock);
517	if (holder != `0`)
518	continue;
519	#endif
520	if (atomic_compare_exchange(&lock->lock_data, `0`, data,
521	memory_order_acquire_smp, TRUE)) {
522	#if CONFIG_DTRACE
523	if (__improbable(dtrace_enabled)) {
524	uint64_t spintime = mach_absolute_time() - begin;
525	if (spintime > dtrace_spin_threshold)
526	LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
527	}
528	#endif
529	return `1`;
530	}
531	}
532	if (end == `0`) {
533	end = ml_get_timebase() + timeout;
534	}
535	else if (ml_get_timebase() >= end)
536	break;
537	}
538	if (do_panic) {
539	// Capture the actual time spent blocked, which may be higher than the timeout
540	// if a misbehaving interrupt stole this thread's CPU time.
541	panic("Spinlock timeout after %llu ticks, %p = %lx",
542	(ml_get_timebase() - end + timeout), lock, holder);
543	}
544	return `0`;
545	}
546	#endif // __SMP__
547
548	static inline void
549	hw_lock_lock_internal(hw_lock_t lock, thread_t thread)
550	{
551	uintptr_t state;
552
553	state = LCK_MTX_THREAD_TO_STATE(thread) \| PLATFORM_LCK_ILOCK;
554	#if __SMP__
555
556	#if LOCK_PRETEST
557	if (ordered_load_hw(lock))
558	goto contended;
559	#endif // LOCK_PRETEST
560	if (atomic_compare_exchange(&lock->lock_data, `0`, state,
561	memory_order_acquire_smp, TRUE)) {
562	goto end;
563	}
564	#if LOCK_PRETEST
565	contended:
566	#endif // LOCK_PRETEST
567	hw_lock_lock_contended(lock, state, `0`, spinlock_timeout_panic);
568	end:
569	#else // __SMP__
570	if (lock->lock_data)
571	panic("Spinlock held %p", lock);
572	lock->lock_data = state;
573	#endif // __SMP__
574	#if CONFIG_DTRACE
575	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, `0`);
576	#endif
577	return;
578	}
579
580	/*
581	* Routine: hw_lock_lock
582	*
583	* Acquire lock, spinning until it becomes available,
584	* return with preemption disabled.
585	*/
586	void
587	hw_lock_lock(hw_lock_t lock)
588	{
589	thread_t thread = current_thread();
590	disable_preemption_for_thread(thread);
591	hw_lock_lock_internal(lock, thread);
592	}
593
594	/*
595	* Routine: hw_lock_lock_nopreempt
596	*
597	* Acquire lock, spinning until it becomes available.
598	*/
599	void
600	hw_lock_lock_nopreempt(hw_lock_t lock)
601	{
602	thread_t thread = current_thread();
603	if (__improbable(!preemption_disabled_for_thread(thread)))
604	panic("Attempt to take no-preempt spinlock %p in preemptible context", lock);
605	hw_lock_lock_internal(lock, thread);
606	}
607
608	/*
609	* Routine: hw_lock_to
610	*
611	* Acquire lock, spinning until it becomes available or timeout.
612	* Timeout is in mach_absolute_time ticks, return with
613	* preemption disabled.
614	*/
615	unsigned int
616	hw_lock_to(hw_lock_t lock, uint64_t timeout)
617	{
618	thread_t thread;
619	uintptr_t state;
620	unsigned int success = `0`;
621
622	thread = current_thread();
623	disable_preemption_for_thread(thread);
624	state = LCK_MTX_THREAD_TO_STATE(thread) \| PLATFORM_LCK_ILOCK;
625	#if __SMP__
626
627	#if LOCK_PRETEST
628	if (ordered_load_hw(lock))
629	goto contended;
630	#endif // LOCK_PRETEST
631	if (atomic_compare_exchange(&lock->lock_data, `0`, state,
632	memory_order_acquire_smp, TRUE)) {
633	success = `1`;
634	goto end;
635	}
636	#if LOCK_PRETEST
637	contended:
638	#endif // LOCK_PRETEST
639	success = hw_lock_lock_contended(lock, state, timeout, FALSE);
640	end:
641	#else // __SMP__
642	(void)timeout;
643	if (ordered_load_hw(lock) == `0`) {
644	ordered_store_hw(lock, state);
645	success = `1`;
646	}
647	#endif // __SMP__
648	#if CONFIG_DTRACE
649	if (success)
650	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, `0`);
651	#endif
652	return success;
653	}
654
655	/*
656	* Routine: hw_lock_try
657	*
658	* returns with preemption disabled on success.
659	*/
660	static inline unsigned int
661	hw_lock_try_internal(hw_lock_t lock, thread_t thread)
662	{
663	int success = `0`;
664
665	#if __SMP__
666	#if LOCK_PRETEST
667	if (ordered_load_hw(lock))
668	goto failed;
669	#endif // LOCK_PRETEST
670	success = atomic_compare_exchange(&lock->lock_data, `0`, LCK_MTX_THREAD_TO_STATE(thread) \| PLATFORM_LCK_ILOCK,
671	memory_order_acquire_smp, FALSE);
672	#else
673	if (lock->lock_data == `0`) {
674	lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) \| PLATFORM_LCK_ILOCK;
675	success = `1`;
676	}
677	#endif // __SMP__
678
679	#if LOCK_PRETEST
680	failed:
681	#endif // LOCK_PRETEST
682	#if CONFIG_DTRACE
683	if (success)
684	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, `0`);
685	#endif
686	return success;
687	}
688
689	unsigned int
690	hw_lock_try(hw_lock_t lock)
691	{
692	thread_t thread = current_thread();
693	disable_preemption_for_thread(thread);
694	unsigned int success = hw_lock_try_internal(lock, thread);
695	if (!success)
696	enable_preemption();
697	return success;
698	}
699
700	unsigned int
701	hw_lock_try_nopreempt(hw_lock_t lock)
702	{
703	thread_t thread = current_thread();
704	if (__improbable(!preemption_disabled_for_thread(thread)))
705	panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
706	return hw_lock_try_internal(lock, thread);
707	}
708
709	/*
710	* Routine: hw_lock_unlock
711	*
712	* Unconditionally release lock, release preemption level.
713	*/
714	static inline void
715	hw_lock_unlock_internal(hw_lock_t lock)
716	{
717	__c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, `0`, memory_order_release_smp);
718	#if __arm__ \|\| __arm64__
719	// ARM tests are only for open-source exclusion
720	set_event();
721	#endif // __arm__ \|\| __arm64__
722	#if CONFIG_DTRACE
723	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, `0`);
724	#endif /* CONFIG_DTRACE */
725	}
726
727	void
728	hw_lock_unlock(hw_lock_t lock)
729	{
730	hw_lock_unlock_internal(lock);
731	enable_preemption();
732	}
733
734	void
735	hw_lock_unlock_nopreempt(hw_lock_t lock)
736	{
737	if (__improbable(!preemption_disabled_for_thread(current_thread())))
738	panic("Attempt to release no-preempt spinlock %p in preemptible context", lock);
739	hw_lock_unlock_internal(lock);
740	}
741
742	/*
743	* Routine hw_lock_held, doesn't change preemption state.
744	* N.B. Racy, of course.
745	*/
746	unsigned int
747	hw_lock_held(hw_lock_t lock)
748	{
749	return (ordered_load_hw(lock) != `0`);
750	}
751
752	/*
753	* Routine: lck_spin_sleep
754	*/
755	wait_result_t
756	lck_spin_sleep(
757	lck_spin_t *lck,
758	lck_sleep_action_t lck_sleep_action,
759	event_t event,
760	wait_interrupt_t interruptible)
761	{
762	wait_result_t res;
763
764	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
765	panic("Invalid lock sleep action %x\n", lck_sleep_action);
766
767	res = assert_wait(event, interruptible);
768	if (res == THREAD_WAITING) {
769	lck_spin_unlock(lck);
770	res = thread_block(THREAD_CONTINUE_NULL);
771	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
772	lck_spin_lock(lck);
773	}
774	else
775	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
776	lck_spin_unlock(lck);
777
778	return res;
779	}
780
781
782	/*
783	* Routine: lck_spin_sleep_deadline
784	*/
785	wait_result_t
786	lck_spin_sleep_deadline(
787	lck_spin_t *lck,
788	lck_sleep_action_t lck_sleep_action,
789	event_t event,
790	wait_interrupt_t interruptible,
791	uint64_t deadline)
792	{
793	wait_result_t res;
794
795	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
796	panic("Invalid lock sleep action %x\n", lck_sleep_action);
797
798	res = assert_wait_deadline(event, interruptible, deadline);
799	if (res == THREAD_WAITING) {
800	lck_spin_unlock(lck);
801	res = thread_block(THREAD_CONTINUE_NULL);
802	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
803	lck_spin_lock(lck);
804	}
805	else
806	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
807	lck_spin_unlock(lck);
808
809	return res;
810	}
811
812	/*
813	* Routine: lck_mtx_sleep
814	*/
815	wait_result_t
816	lck_mtx_sleep(
817	lck_mtx_t *lck,
818	lck_sleep_action_t lck_sleep_action,
819	event_t event,
820	wait_interrupt_t interruptible)
821	{
822	wait_result_t res;
823	thread_t thread = current_thread();
824
825	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) \| DBG_FUNC_START,
826	VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, `0`);
827
828	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
829	panic("Invalid lock sleep action %x\n", lck_sleep_action);
830
831	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
832	/*
833	* We overload the RW lock promotion to give us a priority ceiling
834	* during the time that this thread is asleep, so that when it
835	* is re-awakened (and not yet contending on the mutex), it is
836	* runnable at a reasonably high priority.
837	*/
838	thread->rwlock_count++;
839	}
840
841	res = assert_wait(event, interruptible);
842	if (res == THREAD_WAITING) {
843	lck_mtx_unlock(lck);
844	res = thread_block(THREAD_CONTINUE_NULL);
845	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
846	if ((lck_sleep_action & LCK_SLEEP_SPIN))
847	lck_mtx_lock_spin(lck);
848	else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
849	lck_mtx_lock_spin_always(lck);
850	else
851	lck_mtx_lock(lck);
852	}
853	}
854	else
855	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
856	lck_mtx_unlock(lck);
857
858	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
859	if ((thread->rwlock_count-- == `1` / field now 0 /) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
860	/ sched_flags checked without lock, but will be rechecked while clearing /
861	lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
862	}
863	}
864
865	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) \| DBG_FUNC_END, (int)res, `0`, `0`, `0`, `0`);
866
867	return res;
868	}
869
870
871	/*
872	* Routine: lck_mtx_sleep_deadline
873	*/
874	wait_result_t
875	lck_mtx_sleep_deadline(
876	lck_mtx_t *lck,
877	lck_sleep_action_t lck_sleep_action,
878	event_t event,
879	wait_interrupt_t interruptible,
880	uint64_t deadline)
881	{
882	wait_result_t res;
883	thread_t thread = current_thread();
884
885	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) \| DBG_FUNC_START,
886	VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, `0`);
887
888	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
889	panic("Invalid lock sleep action %x\n", lck_sleep_action);
890
891	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
892	/*
893	* See lck_mtx_sleep().
894	*/
895	thread->rwlock_count++;
896	}
897
898	res = assert_wait_deadline(event, interruptible, deadline);
899	if (res == THREAD_WAITING) {
900	lck_mtx_unlock(lck);
901	res = thread_block(THREAD_CONTINUE_NULL);
902	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
903	if ((lck_sleep_action & LCK_SLEEP_SPIN))
904	lck_mtx_lock_spin(lck);
905	else
906	lck_mtx_lock(lck);
907	}
908	}
909	else
910	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
911	lck_mtx_unlock(lck);
912
913	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
914	if ((thread->rwlock_count-- == `1` / field now 0 /) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
915	/ sched_flags checked without lock, but will be rechecked while clearing /
916	lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
917	}
918	}
919
920	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) \| DBG_FUNC_END, (int)res, `0`, `0`, `0`, `0`);
921
922	return res;
923	}
924
925	/*
926	* Lock Boosting Invariants:
927	*
928	* The lock owner is always promoted to the max priority of all its waiters.
929	* Max priority is capped at MAXPRI_PROMOTE.
930	*
931	* lck_mtx_pri being set implies that the lock owner is promoted to at least lck_mtx_pri
932	* This prevents the thread from dropping in priority while holding a mutex
933	* (note: Intel locks currently don't do this, to avoid thread lock churn)
934	*
935	* thread->promotions has a +1 for every mutex currently promoting the thread
936	* and 1 for was_promoted_on_wakeup being set.
937	* TH_SFLAG_PROMOTED is set on a thread whenever it has any promotions
938	* from any mutex (i.e. thread->promotions != 0)
939	*
940	* was_promoted_on_wakeup is set on a thread which is woken up by a mutex when
941	* it raises the priority of the woken thread to match lck_mtx_pri.
942	* It can be set for multiple iterations of wait, fail to acquire, re-wait, etc
943	* was_promoted_on_wakeup being set always implies a +1 promotions count.
944	*
945	* The last waiter is not given a promotion when it wakes up or acquires the lock.
946	* When the last waiter is waking up, a new contender can always come in and
947	* steal the lock without having to wait for the last waiter to make forward progress.
948	*
949	* lck_mtx_waiters has a +1 for every waiter currently between wait and acquire
950	* This prevents us from asserting that every wakeup wakes up a thread.
951	* This also causes excess thread_wakeup calls in the unlock path.
952	* It can only be fooled into thinking there are more waiters than are
953	* actually blocked, not less.
954	* It does allows us to reduce the complexity of the lock state.
955	*
956	* This also means that a starved bg thread as the last waiter could end up
957	* keeping the lock in the contended state for a long period of time, which
958	* may keep lck_mtx_pri artificially high for a very long time even though
959	* it is not participating or blocking anyone else.
960	* Intel locks don't have this problem because they can go uncontended
961	* as soon as there are no blocked threads involved.
962	*/
963
964	/*
965	* Routine: lck_mtx_lock_wait
966	*
967	* Invoked in order to wait on contention.
968	*
969	* Called with the interlock locked and
970	* returns it unlocked.
971	*
972	* Always aggressively sets the owning thread to promoted,
973	* even if it's the same or higher priority
974	* This prevents it from lowering its own priority while holding a lock
975	*
976	* TODO: Come up with a more efficient way to handle same-priority promotions
977	* <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
978	*/
979	void
980	lck_mtx_lock_wait (
981	lck_mtx_t *lck,
982	thread_t holder)
983	{
984	thread_t self = current_thread();
985	lck_mtx_t *mutex;
986	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
987
988	#if CONFIG_DTRACE
989	uint64_t sleep_start = `0`;
990
991	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] \|\| lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
992	sleep_start = mach_absolute_time();
993	}
994	#endif
995
996	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
997	mutex = lck;
998	else
999	mutex = &lck->lck_mtx_ptr->lck_mtx;
1000
1001	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) \| DBG_FUNC_START,
1002	trace_lck, (uintptr_t)thread_tid(thread), `0`, `0`, `0`);
1003
1004	spl_t s = splsched();
1005	thread_lock(holder);
1006
1007	assert_promotions_invariant(holder);
1008
1009	if ((holder->sched_flags & TH_SFLAG_DEPRESS) == `0`)
1010	assert(holder->sched_pri >= mutex->lck_mtx_pri);
1011
1012	integer_t priority = self->sched_pri;
1013	priority = MAX(priority, self->base_pri);
1014	priority = MAX(priority, BASEPRI_DEFAULT);
1015	priority = MIN(priority, MAXPRI_PROMOTE);
1016
1017	if (mutex->lck_mtx_pri == `0`) {
1018	/ This is the first promotion for this mutex /
1019	if (holder->promotions++ == `0`) {
1020	/ This is the first promotion for holder /
1021	sched_thread_promote_to_pri(holder, priority, trace_lck);
1022	} else {
1023	/ Holder was previously promoted due to a different mutex, raise to match this one /
1024	sched_thread_update_promotion_to_pri(holder, priority, trace_lck);
1025	}
1026	} else {
1027	/ Holder was previously promoted due to this mutex, check if the pri needs to go up /
1028	sched_thread_update_promotion_to_pri(holder, priority, trace_lck);
1029	}
1030
1031	assert(holder->promotions > `0`);
1032	assert(holder->promotion_priority >= priority);
1033
1034	if ((holder->sched_flags & TH_SFLAG_DEPRESS) == `0`)
1035	assert(holder->sched_pri >= mutex->lck_mtx_pri);
1036
1037	assert_promotions_invariant(holder);
1038
1039	thread_unlock(holder);
1040	splx(s);
1041
1042	if (mutex->lck_mtx_pri < priority)
1043	mutex->lck_mtx_pri = priority;
1044
1045	if (self->waiting_for_mutex == NULL) {
1046	self->waiting_for_mutex = mutex;
1047	mutex->lck_mtx_waiters++;
1048	}
1049
1050	assert(self->waiting_for_mutex == mutex);
1051
1052	thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
1053	assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER);
1054	lck_mtx_ilk_unlock(mutex);
1055
1056	thread_block(THREAD_CONTINUE_NULL);
1057
1058	assert(mutex->lck_mtx_waiters > `0`);
1059
1060	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
1061	#if CONFIG_DTRACE
1062	/*
1063	* Record the DTrace lockstat probe for blocking, block time
1064	* measured from when we were entered.
1065	*/
1066	if (sleep_start) {
1067	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1068	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1069	mach_absolute_time() - sleep_start);
1070	} else {
1071	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1072	mach_absolute_time() - sleep_start);
1073	}
1074	}
1075	#endif
1076	}
1077
1078	/*
1079	* Routine: lck_mtx_lock_acquire
1080	*
1081	* Invoked on acquiring the mutex when there is
1082	* contention.
1083	*
1084	* Returns the current number of waiters.
1085	*
1086	* Called with the interlock locked.
1087	*/
1088	int
1089	lck_mtx_lock_acquire(
1090	lck_mtx_t *lck)
1091	{
1092	thread_t thread = current_thread();
1093	lck_mtx_t *mutex;
1094	integer_t priority;
1095
1096	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1097	mutex = lck;
1098	else
1099	mutex = &lck->lck_mtx_ptr->lck_mtx;
1100
1101	/*
1102	* If waiting_for_mutex is set, then this thread was previously blocked waiting on this lock
1103	* If it's un-set, then this thread stole the lock from another waiter.
1104	*/
1105	if (thread->waiting_for_mutex == mutex) {
1106	assert(mutex->lck_mtx_waiters > `0`);
1107
1108	thread->waiting_for_mutex = NULL;
1109	mutex->lck_mtx_waiters--;
1110	}
1111
1112	assert(thread->waiting_for_mutex == NULL);
1113
1114	if (mutex->lck_mtx_waiters > `0`) {
1115	priority = mutex->lck_mtx_pri;
1116	} else {
1117	/ I was the last waiter, so the mutex is no longer promoted or contended /
1118	mutex->lck_mtx_pri = `0`;
1119	priority = `0`;
1120	}
1121
1122	if (priority \|\| thread->was_promoted_on_wakeup) {
1123	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1124
1125	/*
1126	* Note: was_promoted_on_wakeup can happen for multiple wakeups in a row without
1127	* an intervening acquire if a thread keeps failing to acquire the lock
1128	*
1129	* If priority is true but not promoted on wakeup,
1130	* then this is a lock steal of a promoted mutex, so it needs a ++ of promotions.
1131	*
1132	* If promoted on wakeup is true, but priority is not,
1133	* then this is the last owner, and the last owner does not need a promotion.
1134	*/
1135
1136	spl_t s = splsched();
1137	thread_lock(thread);
1138
1139	assert_promotions_invariant(thread);
1140
1141	if (thread->was_promoted_on_wakeup)
1142	assert(thread->promotions > `0`);
1143
1144	if (priority) {
1145	if (thread->promotions++ == `0`) {
1146	/ This is the first promotion for holder /
1147	sched_thread_promote_to_pri(thread, priority, trace_lck);
1148	} else {
1149	/*
1150	* Holder was previously promoted due to a different mutex, raise to match this one
1151	* Or, this thread was promoted on wakeup but someone else later contended on mutex
1152	* at higher priority before we got here
1153	*/
1154	sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1155	}
1156	}
1157
1158	if (thread->was_promoted_on_wakeup) {
1159	thread->was_promoted_on_wakeup = `0`;
1160	if (--thread->promotions == `0`)
1161	sched_thread_unpromote(thread, trace_lck);
1162	}
1163
1164	assert_promotions_invariant(thread);
1165
1166	if (priority && (thread->sched_flags & TH_SFLAG_DEPRESS) == `0`)
1167	assert(thread->sched_pri >= priority);
1168
1169	thread_unlock(thread);
1170	splx(s);
1171	}
1172
1173	#if CONFIG_DTRACE
1174	if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] \|\| lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1175	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1176	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, `0`);
1177	} else {
1178	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, `0`);
1179	}
1180	}
1181	#endif
1182	return (mutex->lck_mtx_waiters);
1183	}
1184
1185	/*
1186	* Routine: lck_mtx_unlock_wakeup
1187	*
1188	* Invoked on unlock when there is contention.
1189	*
1190	* Called with the interlock locked.
1191	*
1192	* TODO: the 'waiters' flag does not indicate waiters exist on the waitqueue,
1193	* it indicates waiters exist between wait and acquire.
1194	* This means that here we may do extra unneeded wakeups.
1195	*/
1196	void
1197	lck_mtx_unlock_wakeup (
1198	lck_mtx_t *lck,
1199	thread_t holder)
1200	{
1201	thread_t thread = current_thread();
1202	lck_mtx_t *mutex;
1203	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1204
1205	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1206	mutex = lck;
1207	else
1208	mutex = &lck->lck_mtx_ptr->lck_mtx;
1209
1210	if (thread != holder)
1211	panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1212
1213	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) \| DBG_FUNC_START,
1214	trace_lck, (uintptr_t)thread_tid(thread), `0`, `0`, `0`);
1215
1216	assert(mutex->lck_mtx_waiters > `0`);
1217	assert(thread->was_promoted_on_wakeup == `0`);
1218	assert(thread->waiting_for_mutex == NULL);
1219
1220	/*
1221	* The waiters count does not precisely match the number of threads on the waitqueue,
1222	* therefore we cannot assert that we actually wake up a thread here
1223	*/
1224	if (mutex->lck_mtx_waiters > `1`)
1225	thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1226	else
1227	thread_wakeup_one(LCK_MTX_EVENT(lck));
1228
1229	/ When mutex->lck_mtx_pri is set, it means means I as the owner have a promotion. /
1230	if (mutex->lck_mtx_pri) {
1231	spl_t s = splsched();
1232	thread_lock(thread);
1233
1234	assert(thread->promotions > `0`);
1235
1236	assert_promotions_invariant(thread);
1237
1238	if (--thread->promotions == `0`)
1239	sched_thread_unpromote(thread, trace_lck);
1240
1241	assert_promotions_invariant(thread);
1242
1243	thread_unlock(thread);
1244	splx(s);
1245	}
1246
1247	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
1248	}
1249
1250	/*
1251	* Callout from the waitqueue code from inside thread_wakeup_one_with_pri
1252	* At splsched, thread is pulled from waitq, still locked, not on runqueue yet
1253	*
1254	* We always make sure to set the promotion flag, even if the thread is already at this priority,
1255	* so that it doesn't go down.
1256	*/
1257	void
1258	lck_mtx_wakeup_adjust_pri(thread_t thread, integer_t priority)
1259	{
1260	assert(priority <= MAXPRI_PROMOTE);
1261	assert(thread->waiting_for_mutex != NULL);
1262
1263	__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(thread->waiting_for_mutex);
1264
1265	assert_promotions_invariant(thread);
1266
1267	if (thread->was_promoted_on_wakeup) {
1268	/ Thread was previously promoted, but contended again /
1269	sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1270	return;
1271	}
1272
1273	if (thread->promotions > `0` && priority <= thread->promotion_priority) {
1274	/*
1275	* Thread is already promoted to the right level, no need to do more
1276	* I can draft off of another promotion here, which is OK
1277	* because I know the thread will soon run acquire to get its own promotion
1278	*/
1279	assert((thread->sched_flags & TH_SFLAG_PROMOTED) == TH_SFLAG_PROMOTED);
1280	return;
1281	}
1282
1283	thread->was_promoted_on_wakeup = `1`;
1284
1285	if (thread->promotions++ == `0`) {
1286	/ This is the first promotion for this thread /
1287	sched_thread_promote_to_pri(thread, priority, trace_lck);
1288	} else {
1289	/ Holder was previously promoted due to a different mutex, raise to match this one /
1290	sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1291	}
1292
1293	assert_promotions_invariant(thread);
1294	}
1295
1296
1297	/*
1298	* Routine: mutex_pause
1299	*
1300	* Called by former callers of simple_lock_pause().
1301	*/
1302	#define MAX_COLLISION_COUNTS 32
1303	#define MAX_COLLISION 8
1304
1305	unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1306
1307	uint32_t collision_backoffs[MAX_COLLISION] = {
1308	`10`, `50`, `100`, `200`, `400`, `600`, `800`, `1000`
1309	};
1310
1311
1312	void
1313	mutex_pause(uint32_t collisions)
1314	{
1315	wait_result_t wait_result;
1316	uint32_t back_off;
1317
1318	if (collisions >= MAX_COLLISION_COUNTS)
1319	collisions = MAX_COLLISION_COUNTS - `1`;
1320	max_collision_count[collisions]++;
1321
1322	if (collisions >= MAX_COLLISION)
1323	collisions = MAX_COLLISION - `1`;
1324	back_off = collision_backoffs[collisions];
1325
1326	wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1327	assert(wait_result == THREAD_WAITING);
1328
1329	wait_result = thread_block(THREAD_CONTINUE_NULL);
1330	assert(wait_result == THREAD_TIMED_OUT);
1331	}
1332
1333
1334	unsigned int mutex_yield_wait = `0`;
1335	unsigned int mutex_yield_no_wait = `0`;
1336
1337	void
1338	lck_mtx_yield(
1339	lck_mtx_t *lck)
1340	{
1341	int waiters;
1342
1343	#if DEBUG
1344	lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1345	#endif /* DEBUG */
1346
1347	if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
1348	waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1349	else
1350	waiters = lck->lck_mtx_waiters;
1351
1352	if ( !waiters) {
1353	mutex_yield_no_wait++;
1354	} else {
1355	mutex_yield_wait++;
1356	lck_mtx_unlock(lck);
1357	mutex_pause(`0`);
1358	lck_mtx_lock(lck);
1359	}
1360	}
1361
1362
1363	/*
1364	* Routine: lck_rw_sleep
1365	*/
1366	wait_result_t
1367	lck_rw_sleep(
1368	lck_rw_t *lck,
1369	lck_sleep_action_t lck_sleep_action,
1370	event_t event,
1371	wait_interrupt_t interruptible)
1372	{
1373	wait_result_t res;
1374	lck_rw_type_t lck_rw_type;
1375	thread_t thread = current_thread();
1376
1377	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
1378	panic("Invalid lock sleep action %x\n", lck_sleep_action);
1379
1380	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1381	/*
1382	* Although we are dropping the RW lock, the intent in most cases
1383	* is that this thread remains as an observer, since it may hold
1384	* some secondary resource, but must yield to avoid deadlock. In
1385	* this situation, make sure that the thread is boosted to the
1386	* RW lock ceiling while blocked, so that it can re-acquire the
1387	* RW lock at that priority.
1388	*/
1389	thread->rwlock_count++;
1390	}
1391
1392	res = assert_wait(event, interruptible);
1393	if (res == THREAD_WAITING) {
1394	lck_rw_type = lck_rw_done(lck);
1395	res = thread_block(THREAD_CONTINUE_NULL);
1396	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1397	if (!(lck_sleep_action & (LCK_SLEEP_SHARED\|LCK_SLEEP_EXCLUSIVE)))
1398	lck_rw_lock(lck, lck_rw_type);
1399	else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1400	lck_rw_lock_exclusive(lck);
1401	else
1402	lck_rw_lock_shared(lck);
1403	}
1404	}
1405	else
1406	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1407	(void)lck_rw_done(lck);
1408
1409	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1410	if ((thread->rwlock_count-- == `1` / field now 0 /) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1411	/ sched_flags checked without lock, but will be rechecked while clearing /
1412
1413	/ Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 /
1414	assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1415
1416	lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1417	}
1418	}
1419
1420	return res;
1421	}
1422
1423
1424	/*
1425	* Routine: lck_rw_sleep_deadline
1426	*/
1427	wait_result_t
1428	lck_rw_sleep_deadline(
1429	lck_rw_t *lck,
1430	lck_sleep_action_t lck_sleep_action,
1431	event_t event,
1432	wait_interrupt_t interruptible,
1433	uint64_t deadline)
1434	{
1435	wait_result_t res;
1436	lck_rw_type_t lck_rw_type;
1437	thread_t thread = current_thread();
1438
1439	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`)
1440	panic("Invalid lock sleep action %x\n", lck_sleep_action);
1441
1442	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1443	thread->rwlock_count++;
1444	}
1445
1446	res = assert_wait_deadline(event, interruptible, deadline);
1447	if (res == THREAD_WAITING) {
1448	lck_rw_type = lck_rw_done(lck);
1449	res = thread_block(THREAD_CONTINUE_NULL);
1450	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1451	if (!(lck_sleep_action & (LCK_SLEEP_SHARED\|LCK_SLEEP_EXCLUSIVE)))
1452	lck_rw_lock(lck, lck_rw_type);
1453	else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1454	lck_rw_lock_exclusive(lck);
1455	else
1456	lck_rw_lock_shared(lck);
1457	}
1458	}
1459	else
1460	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1461	(void)lck_rw_done(lck);
1462
1463	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1464	if ((thread->rwlock_count-- == `1` / field now 0 /) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1465	/ sched_flags checked without lock, but will be rechecked while clearing /
1466
1467	/ Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 /
1468	assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1469
1470	lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1471	}
1472	}
1473
1474	return res;
1475	}
1476
1477	/*
1478	* Reader-writer lock promotion
1479	*
1480	* We support a limited form of reader-writer
1481	* lock promotion whose effects are:
1482	*
1483	* * Qualifying threads have decay disabled
1484	* * Scheduler priority is reset to a floor of
1485	* of their statically assigned priority
1486	* or MINPRI_RWLOCK
1487	*
1488	* The rationale is that lck_rw_ts do not have
1489	* a single owner, so we cannot apply a directed
1490	* priority boost from all waiting threads
1491	* to all holding threads without maintaining
1492	* lists of all shared owners and all waiting
1493	* threads for every lock.
1494	*
1495	* Instead (and to preserve the uncontended fast-
1496	* path), acquiring (or attempting to acquire)
1497	* a RW lock in shared or exclusive lock increments
1498	* a per-thread counter. Only if that thread stops
1499	* making forward progress (for instance blocking
1500	* on a mutex, or being preempted) do we consult
1501	* the counter and apply the priority floor.
1502	* When the thread becomes runnable again (or in
1503	* the case of preemption it never stopped being
1504	* runnable), it has the priority boost and should
1505	* be in a good position to run on the CPU and
1506	* release all RW locks (at which point the priority
1507	* boost is cleared).
1508	*
1509	* Care must be taken to ensure that priority
1510	* boosts are not retained indefinitely, since unlike
1511	* mutex priority boosts (where the boost is tied
1512	* to the mutex lifecycle), the boost is tied
1513	* to the thread and independent of any particular
1514	* lck_rw_t. Assertions are in place on return
1515	* to userspace so that the boost is not held
1516	* indefinitely.
1517	*
1518	* The routines that increment/decrement the
1519	* per-thread counter should err on the side of
1520	* incrementing any time a preemption is possible
1521	* and the lock would be visible to the rest of the
1522	* system as held (so it should be incremented before
1523	* interlocks are dropped/preemption is enabled, or
1524	* before a CAS is executed to acquire the lock).
1525	*
1526	*/
1527
1528	/*
1529	* lck_rw_clear_promotion: Undo priority promotions when the last RW
1530	* lock is released by a thread (if a promotion was active)
1531	*/
1532	void lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
1533	{
1534	assert(thread->rwlock_count == `0`);
1535
1536	/ Cancel any promotions if the thread had actually blocked while holding a RW lock /
1537	spl_t s = splsched();
1538	thread_lock(thread);
1539
1540	if (thread->sched_flags & TH_SFLAG_RW_PROMOTED)
1541	sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
1542
1543	thread_unlock(thread);
1544	splx(s);
1545	}
1546
1547	/*
1548	* Callout from context switch if the thread goes
1549	* off core with a positive rwlock_count
1550	*
1551	* Called at splsched with the thread locked
1552	*/
1553	void
1554	lck_rw_set_promotion_locked(thread_t thread)
1555	{
1556	if (LcksOpts & disLkRWPrio)
1557	return;
1558
1559	assert(thread->rwlock_count > `0`);
1560
1561	if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED))
1562	sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, `0`);
1563	}
1564
1565	kern_return_t
1566	host_lockgroup_info(
1567	host_t host,
1568	lockgroup_info_array_t *lockgroup_infop,
1569	mach_msg_type_number_t *lockgroup_infoCntp)
1570	{
1571	lockgroup_info_t *lockgroup_info_base;
1572	lockgroup_info_t *lockgroup_info;
1573	vm_offset_t lockgroup_info_addr;
1574	vm_size_t lockgroup_info_size;
1575	vm_size_t lockgroup_info_vmsize;
1576	lck_grp_t *lck_grp;
1577	unsigned int i;
1578	vm_map_copy_t copy;
1579	kern_return_t kr;
1580
1581	if (host == HOST_NULL)
1582	return KERN_INVALID_HOST;
1583
1584	lck_mtx_lock(&lck_grp_lock);
1585
1586	lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1587	lockgroup_info_vmsize = round_page(lockgroup_info_size);
1588	kr = kmem_alloc_pageable(ipc_kernel_map,
1589	&lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1590	if (kr != KERN_SUCCESS) {
1591	lck_mtx_unlock(&lck_grp_lock);
1592	return(kr);
1593	}
1594
1595	lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1596	lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1597	lockgroup_info = lockgroup_info_base;
1598
1599	for (i = `0`; i < lck_grp_cnt; i++) {
1600
1601	lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1602	lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1603	lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1604	lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1605	lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1606	lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1607
1608	lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1609	lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1610	lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1611	lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1612	lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1613	lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1614	lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1615	lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1616	lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1617
1618	lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1619	lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1620	lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1621	lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1622	lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1623	lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1624	lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1625	lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1626	lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1627
1628	(void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1629
1630	lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1631	lockgroup_info++;
1632	}
1633
1634	*lockgroup_infoCntp = lck_grp_cnt;
1635	lck_mtx_unlock(&lck_grp_lock);
1636
1637	if (lockgroup_info_size != lockgroup_info_vmsize)
1638	bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1639
1640	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1641	(vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1642	assert(kr == KERN_SUCCESS);
1643
1644	lockgroup_infop = (lockgroup_info_t ) copy;
1645
1646	return(KERN_SUCCESS);
1647	}
1648
1649	/*
1650	* Atomic primitives, prototyped in kern/simple_lock.h
1651	* Noret versions are more efficient on some architectures
1652	*/
1653
1654	uint32_t
1655	hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1656	{
1657	ALIGN_TEST(dest,uint32_t);
1658	return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1659	}
1660
1661	uint32_t
1662	hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1663	{
1664	ALIGN_TEST(dest,uint32_t);
1665	return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1666	}
1667
1668	uint32_t
1669	hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1670	{
1671	ALIGN_TEST(dest,uint32_t);
1672	return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) \| mask;
1673	}
1674
1675	void
1676	hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1677	{
1678	ALIGN_TEST(dest,uint32_t);
1679	__c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1680	}
1681
1682	uint32_t
1683	hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1684	{
1685	ALIGN_TEST(dest,uint32_t);
1686	return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1687	}
1688
1689	void
1690	hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1691	{
1692	ALIGN_TEST(dest,uint32_t);
1693	__c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1694	}
1695
1696	uint32_t
1697	hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1698	{
1699	ALIGN_TEST(dest,uint32_t);
1700	return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1701	memory_order_acq_rel_smp, memory_order_relaxed);
1702	}
1703
1704

Browse the source code of codebrowser/osfmk/kern/locks.c