pthread_cond_common.c source code [glibc/nptl/pthread_cond_common.c]

1	/ pthread_cond_common -- shared code for condition variable.*
2	Copyright (C) 2016-2017 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <atomic.h>
20	#include <stdint.h>
21	#include <pthread.h>
22	#include <libc-internal.h>
23
24	/ We need 3 least-significant bits on __wrefs for something else. /
25	#define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29)
26
27	#if __HAVE_64B_ATOMICS == 1
28
29	static uint64_t __attribute__ ((unused))
30	__condvar_load_wseq_relaxed (pthread_cond_t *cond)
31	{
32	return atomic_load_relaxed (&cond->__data.__wseq);
33	}
34
35	static uint64_t __attribute__ ((unused))
36	__condvar_fetch_add_wseq_acquire (pthread_cond_t cond, unsigned* int val)
37	{
38	return atomic_fetch_add_acquire (&cond->__data.__wseq, val);
39	}
40
41	static uint64_t __attribute__ ((unused))
42	__condvar_fetch_xor_wseq_release (pthread_cond_t cond, unsigned* int val)
43	{
44	return atomic_fetch_xor_release (&cond->__data.__wseq, val);
45	}
46
47	static uint64_t __attribute__ ((unused))
48	__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
49	{
50	return atomic_load_relaxed (&cond->__data.__g1_start);
51	}
52
53	static void __attribute__ ((unused))
54	__condvar_add_g1_start_relaxed (pthread_cond_t cond, unsigned* int val)
55	{
56	atomic_store_relaxed (&cond->__data.__g1_start,
57	atomic_load_relaxed (&cond->__data.__g1_start) + val);
58	}
59
60	#else
61
62	/ We use two 64b counters: __wseq and __g1_start. They are monotonically*
63	increasing and single-writer-multiple-readers counters, so we can implement
64	load, fetch-and-add, and fetch-and-xor operations even when we just have
65	32b atomics. Values we add or xor are less than or equal to 1<<31 (),*
66	so we only have to make overflow-and-addition atomic wrt. to concurrent
67	load operations and xor operations. To do that, we split each counter into
68	two 32b values of which we reserve the MSB of each to represent an
69	overflow from the lower-order half to the higher-order half.
70
71	In the common case, the state is (higher-order / lower-order half, and . is
72	basically concatenation of the bits):
73	0.h / 0.l = h.l
74
75	When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
76	following steps S1-S4 (the values these represent are on the right-hand
77	side):
78	S1: 0.h / 1.L == (h+1).L
79	S2: 1.(h+1) / 1.L == (h+1).L
80	S3: 1.(h+1) / 0.L == (h+1).L
81	S4: 0.(h+1) / 0.L == (h+1).L
82	If the LSB of the higher-order half is set, readers will ignore the
83	overflow bit in the lower-order half.
84
85	To get an atomic snapshot in load operations, we exploit that the
86	higher-order half is monotonically increasing; if we load a value V from
87	it, then read the lower-order half, and then read the higher-order half
88	again and see the same value V, we know that both halves have existed in
89	the sequence of values the full counter had. This is similar to the
90	validated reads in the time-based STMs in GCC's libitm (e.g.,
91	method_ml_wt).
92
93	The xor operation needs to be an atomic read-modify-write. The write
94	itself is not an issue as it affects just the lower-order half but not bits
95	used in the add operation. To make the full fetch-and-xor atomic, we
96	exploit that concurrently, the value can increase by at most 1<<31 (): The*
97	xor operation is only called while having acquired the lock, so not more
98	than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus
99	increment __wseq. Therefore, if the xor operation observes a value of
100	__wseq, then the value it applies the modification to later on can be
101	derived (see below).
102
103	One benefit of this scheme is that this makes load operations
104	obstruction-free because unlike if we would just lock the counter, readers
105	can almost always interpret a snapshot of each halves. Readers can be
106	forced to read a new snapshot when the read is concurrent with an overflow.
107	However, overflows will happen infrequently, so load operations are
108	practically lock-free.
109
110	() The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to*
111	__g1_start (the two extra bits are for the lock in the two LSBs of
112	__g1_start). /*
113
114	typedef struct
115	{
116	unsigned int low;
117	unsigned int high;
118	} _condvar_lohi;
119
120	static uint64_t
121	__condvar_fetch_add_64_relaxed (_condvar_lohi lh, unsigned* int op)
122	{
123	/ S1. Note that this is an atomic read-modify-write so it extends the*
124	release sequence of release MO store at S3. /*
125	unsigned int l = atomic_fetch_add_relaxed (&lh->low, op);
126	unsigned int h = atomic_load_relaxed (&lh->high);
127	uint64_t result = ((uint64_t) h << `31`) \| l;
128	l += op;
129	if ((l >> `31`) > `0`)
130	{
131	/ Overflow. Need to increment higher-order half. Note that all*
132	add operations are ordered in happens-before. /*
133	h++;
134	/ S2. Release MO to synchronize with the loads of the higher-order half*
135	in the load operation. See __condvar_load_64_relaxed. /*
136	atomic_store_release (&lh->high, h \| ((unsigned int) `1` << `31`));
137	l ^= (unsigned int) `1` << `31`;
138	/ S3. See __condvar_load_64_relaxed. /
139	atomic_store_release (&lh->low, l);
140	/ S4. Likewise. /
141	atomic_store_release (&lh->high, h);
142	}
143	return result;
144	}
145
146	static uint64_t
147	__condvar_load_64_relaxed (_condvar_lohi *lh)
148	{
149	unsigned int h, l, h2;
150	do
151	{
152	/ This load and the second one below to the same location read from the*
153	stores in the overflow handling of the add operation or the
154	initializing stores (which is a simple special case because
155	initialization always completely happens before further use).
156	Because no two stores to the higher-order half write the same value,
157	the loop ensures that if we continue to use the snapshot, this load
158	and the second one read from the same store operation. All candidate
159	store operations have release MO.
160	If we read from S2 in the first load, then we will see the value of
161	S1 on the next load (because we synchronize with S2), or a value
162	later in modification order. We correctly ignore the lower-half's
163	overflow bit in this case. If we read from S4, then we will see the
164	value of S3 in the next load (or a later value), which does not have
165	the overflow bit set anymore.
166	*/
167	h = atomic_load_acquire (&lh->high);
168	/ This will read from the release sequence of S3 (i.e, either the S3*
169	store or the read-modify-writes at S1 following S3 in modification
170	order). Thus, the read synchronizes with S3, and the following load
171	of the higher-order half will read from the matching S2 (or a later
172	value).
173	Thus, if we read a lower-half value here that already overflowed and
174	belongs to an increased higher-order half value, we will see the
175	latter and h and h2 will not be equal. /*
176	l = atomic_load_acquire (&lh->low);
177	/ See above. /
178	h2 = atomic_load_relaxed (&lh->high);
179	}
180	while (h != h2);
181	if (((l >> `31`) > `0`) && ((h >> `31`) > `0`))
182	l ^= (unsigned int) `1` << `31`;
183	return ((uint64_t) (h & ~((unsigned int) `1` << `31`)) << `31`) + l;
184	}
185
186	static uint64_t __attribute__ ((unused))
187	__condvar_load_wseq_relaxed (pthread_cond_t *cond)
188	{
189	return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32);
190	}
191
192	static uint64_t __attribute__ ((unused))
193	__condvar_fetch_add_wseq_acquire (pthread_cond_t cond, unsigned* int val)
194	{
195	uint64_t r = __condvar_fetch_add_64_relaxed
196	((_condvar_lohi *) &cond->__data.__wseq32, val);
197	atomic_thread_fence_acquire ();
198	return r;
199	}
200
201	static uint64_t __attribute__ ((unused))
202	__condvar_fetch_xor_wseq_release (pthread_cond_t cond, unsigned* int val)
203	{
204	_condvar_lohi lh = (_condvar_lohi ) &cond->__data.__wseq32;
205	/ First, get the current value. See __condvar_load_64_relaxed. /
206	unsigned int h, l, h2;
207	do
208	{
209	h = atomic_load_acquire (&lh->high);
210	l = atomic_load_acquire (&lh->low);
211	h2 = atomic_load_relaxed (&lh->high);
212	}
213	while (h != h2);
214	if (((l >> `31`) > `0`) && ((h >> `31`) == `0`))
215	h++;
216	h &= ~((unsigned int) `1` << `31`);
217	l &= ~((unsigned int) `1` << `31`);
218
219	/ Now modify. Due to the coherence rules, the prior load will read a value*
220	earlier in modification order than the following fetch-xor.
221	This uses release MO to make the full operation have release semantics
222	(all other operations access the lower-order half). /*
223	unsigned int l2 = atomic_fetch_xor_release (&lh->low, val)
224	& ~((unsigned int) `1` << `31`);
225	if (l2 < l)
226	/ The lower-order half overflowed in the meantime. This happened exactly*
227	once due to the limit on concurrent waiters (see above). /*
228	h++;
229	return ((uint64_t) h << `31`) + l2;
230	}
231
232	static uint64_t __attribute__ ((unused))
233	__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
234	{
235	return __condvar_load_64_relaxed
236	((_condvar_lohi *) &cond->__data.__g1_start32);
237	}
238
239	static void __attribute__ ((unused))
240	__condvar_add_g1_start_relaxed (pthread_cond_t cond, unsigned* int val)
241	{
242	ignore_value (__condvar_fetch_add_64_relaxed
243	((_condvar_lohi *) &cond->__data.__g1_start32, val));
244	}
245
246	#endif /* !__HAVE_64B_ATOMICS */
247
248
249	/ The lock that signalers use. See pthread_cond_wait_common for uses.*
250	The lock is our normal three-state lock: not acquired (0) / acquired (1) /
251	acquired-with-futex_wake-request (2). However, we need to preserve the
252	other bits in the unsigned int used for the lock, and therefore it is a
253	little more complex. /*
254	static void __attribute__ ((unused))
255	__condvar_acquire_lock (pthread_cond_t cond, int* private)
256	{
257	unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
258	while ((s & `3`) == `0`)
259	{
260	if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size,
261	&s, s \| `1`))
262	return;
263	/ TODO Spinning and back-off. /
264	}
265	/ We can't change from not acquired to acquired, so try to change to*
266	acquired-with-futex-wake-request and do a futex wait if we cannot change
267	from not acquired. /*
268	while (`1`)
269	{
270	while ((s & `3`) != `2`)
271	{
272	if (atomic_compare_exchange_weak_acquire
273	(&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) `3`) \| `2`))
274	{
275	if ((s & `3`) == `0`)
276	return;
277	break;
278	}
279	/ TODO Back off. /
280	}
281	futex_wait_simple (&cond->__data.__g1_orig_size,
282	(s & ~(unsigned int) `3`) \| `2`, private);
283	/ Reload so we see a recent value. /
284	s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
285	}
286	}
287
288	/ See __condvar_acquire_lock. /
289	static void __attribute__ ((unused))
290	__condvar_release_lock (pthread_cond_t cond, int* private)
291	{
292	if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size,
293	~(unsigned int) `3`) & `3`)
294	== `2`)
295	futex_wake (&cond->__data.__g1_orig_size, `1`, private);
296	}
297
298	/ Only use this when having acquired the lock. /
299	static unsigned int __attribute__ ((unused))
300	__condvar_get_orig_size (pthread_cond_t *cond)
301	{
302	return atomic_load_relaxed (&cond->__data.__g1_orig_size) >> `2`;
303	}
304
305	/ Only use this when having acquired the lock. /
306	static void __attribute__ ((unused))
307	__condvar_set_orig_size (pthread_cond_t cond, unsigned* int size)
308	{
309	/ We have acquired the lock, but might get one concurrent update due to a*
310	lock state change from acquired to acquired-with-futex_wake-request.
311	The store with relaxed MO is fine because there will be no further
312	changes to the lock bits nor the size, and we will subsequently release
313	the lock with release MO. /*
314	unsigned int s;
315	s = (atomic_load_relaxed (&cond->__data.__g1_orig_size) & `3`)
316	\| (size << `2`);
317	if ((atomic_exchange_relaxed (&cond->__data.__g1_orig_size, s) & `3`)
318	!= (s & `3`))
319	atomic_store_relaxed (&cond->__data.__g1_orig_size, (size << `2`) \| `2`);
320	}
321
322	/ Returns FUTEX_SHARED or FUTEX_PRIVATE based on the provided __wrefs*
323	value. /*
324	static int __attribute__ ((unused))
325	__condvar_get_private (int flags)
326	{
327	if ((flags & __PTHREAD_COND_SHARED_MASK) == `0`)
328	return FUTEX_PRIVATE;
329	else
330	return FUTEX_SHARED;
331	}
332
333	/ This closes G1 (whose index is in G1INDEX), waits for all futex waiters to*
334	leave G1, converts G1 into a fresh G2, and then switches group roles so that
335	the former G2 becomes the new G1 ending at the current __wseq value when we
336	eventually make the switch (WSEQ is just an observation of __wseq by the
337	signaler).
338	If G2 is empty, it will not switch groups because then it would create an
339	empty G1 which would require switching groups again on the next signal.
340	Returns false iff groups were not switched because G2 was empty. /*
341	static bool __attribute__ ((unused))
342	__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
343	unsigned int g1index, int* private)
344	{
345	const unsigned int maxspin = `0`;
346	unsigned int g1 = *g1index;
347
348	/ If there is no waiter in G2, we don't do anything. The expression may*
349	look odd but remember that __g_size might hold a negative value, so
350	putting the expression this way avoids relying on implementation-defined
351	behavior.
352	Note that this works correctly for a zero-initialized condvar too. /*
353	unsigned int old_orig_size = __condvar_get_orig_size (cond);
354	uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> `1`;
355	if (((unsigned) (wseq - old_g1_start - old_orig_size)
356	+ cond->__data.__g_size[g1 ^ `1`]) == `0`)
357	return false;
358
359	/ Now try to close and quiesce G1. We have to consider the following kinds*
360	of waiters:
361	* Waiters from less recent groups than G1 are not affected because
362	nothing will change for them apart from __g1_start getting larger.
363	* New waiters arriving concurrently with the group switching will all go
364	into G2 until we atomically make the switch. Waiters existing in G2
365	are not affected.
366	* Waiters in G1 will be closed out immediately by setting a flag in
367	__g_signals, which will prevent waiters from blocking using a futex on
368	__g_signals and also notifies them that the group is closed. As a
369	result, they will eventually remove their group reference, allowing us
370	to close switch group roles. /*
371
372	/ First, set the closed flag on __g_signals. This tells waiters that are*
373	about to wait that they shouldn't do that anymore. This basically
374	serves as an advance notificaton of the upcoming change to __g1_start;
375	waiters interpret it as if __g1_start was larger than their waiter
376	sequence position. This allows us to change __g1_start after waiting
377	for all existing waiters with group references to leave, which in turn
378	makes recovery after stealing a signal simpler because it then can be
379	skipped if __g1_start indicates that the group is closed (otherwise,
380	we would have to recover always because waiters don't know how big their
381	groups are). Relaxed MO is fine. /*
382	atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, `1`);
383
384	/ Wait until there are no group references anymore. The fetch-or operation*
385	injects us into the modification order of __g_refs; release MO ensures
386	that waiters incrementing __g_refs after our fetch-or see the previous
387	changes to __g_signals and to __g1_start that had to happen before we can
388	switch this G1 and alias with an older group (we have two groups, so
389	aliasing requires switching group roles twice). Note that nobody else
390	can have set the wake-request flag, so we do not have to act upon it.
391
392	Also note that it is harmless if older waiters or waiters from this G1
393	get a group reference after we have quiesced the group because it will
394	remain closed for them either because of the closed flag in __g_signals
395	or the later update to __g1_start. New waiters will never arrive here
396	but instead continue to go into the still current G2. /*
397	unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, `0`);
398	while ((r >> `1`) > `0`)
399	{
400	for (unsigned int spin = maxspin; ((r >> `1`) > `0`) && (spin > `0`); spin--)
401	{
402	/ TODO Back off. /
403	r = atomic_load_relaxed (cond->__data.__g_refs + g1);
404	}
405	if ((r >> `1`) > `0`)
406	{
407	/ There is still a waiter after spinning. Set the wake-request*
408	flag and block. Relaxed MO is fine because this is just about
409	this futex word. /*
410	r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, `1`);
411
412	if ((r >> `1`) > `0`)
413	futex_wait_simple (cond->__data.__g_refs + g1, r, private);
414	/ Reload here so we eventually see the most recent value even if we*
415	do not spin. /*
416	r = atomic_load_relaxed (cond->__data.__g_refs + g1);
417	}
418	}
419	/ Acquire MO so that we synchronize with the release operation that waiters*
420	use to decrement __g_refs and thus happen after the waiters we waited
421	for. /*
422	atomic_thread_fence_acquire ();
423
424	/ Update __g1_start, which finishes closing this group. The value we add*
425	will never be negative because old_orig_size can only be zero when we
426	switch groups the first time after a condvar was initialized, in which
427	case G1 will be at index 1 and we will add a value of 1. See above for
428	why this takes place after waiting for quiescence of the group.
429	Relaxed MO is fine because the change comes with no additional
430	constraints that others would have to observe. /*
431	__condvar_add_g1_start_relaxed (cond,
432	(old_orig_size << `1`) + (g1 == `1` ? `1` : - `1`));
433
434	/ Now reopen the group, thus enabling waiters to again block using the*
435	futex controlled by __g_signals. Release MO so that observers that see
436	no signals (and thus can block) also see the write __g1_start and thus
437	that this is now a new group (see __pthread_cond_wait_common for the
438	matching acquire MO loads). /*
439	atomic_store_release (cond->__data.__g_signals + g1, `0`);
440
441	/ At this point, the old G1 is now a valid new G2 (but not in use yet).*
442	No old waiter can neither grab a signal nor acquire a reference without
443	noticing that __g1_start is larger.
444	We can now publish the group switch by flipping the G2 index in __wseq.
445	Release MO so that this synchronizes with the acquire MO operation
446	waiters use to obtain a position in the waiter sequence. /*
447	wseq = __condvar_fetch_xor_wseq_release (cond, `1`) >> `1`;
448	g1 ^= `1`;
449	*g1index ^= `1`;
450
451	/ These values are just observed by signalers, and thus protected by the*
452	lock. /*
453	unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
454	__condvar_set_orig_size (cond, orig_size);
455	/ Use and addition to not loose track of cancellations in what was*
456	previously G2. /*
457	cond->__data.__g_size[g1] += orig_size;
458
459	/ The new G1's size may be zero because of cancellations during its time*
460	as G2. If this happens, there are no waiters that have to receive a
461	signal, so we do not need to add any and return false. /*
462	if (cond->__data.__g_size[g1] == `0`)
463	return false;
464
465	return true;
466	}
467

Browse the source code of glibc/nptl/pthread_cond_common.c