pthread_barrier_wait.c source code [glibc/nptl/pthread_barrier_wait.c]

1	/ Copyright (C) 2003-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <errno.h>
20	#include <sysdep.h>
21	#include <futex-internal.h>
22	#include <pthreadP.h>
23	#include <shlib-compat.h>
24
25
26	/ Wait on the barrier.*
27
28	In each round, we wait for a fixed number of threads to enter the barrier
29	(COUNT). Once that has happened, exactly these threads are allowed to
30	leave the barrier. Note that POSIX does not require that only COUNT
31	threads can attempt to block using the barrier concurrently.
32
33	We count the number of threads that have entered (IN). Each thread
34	increments IN when entering, thus getting a position in the sequence of
35	threads that are or have been waiting (starting with 1, so the position
36	is the number of threads that have entered so far including the current
37	thread).
38	CURRENT_ROUND designates the most recent thread whose round has been
39	detected as complete. When a thread detects that enough threads have
40	entered to make a round complete, it finishes this round by effectively
41	adding COUNT to CURRENT_ROUND atomically. Threads that believe that their
42	round is not complete yet wait until CURRENT_ROUND is not smaller than
43	their position anymore.
44
45	A barrier can be destroyed as soon as no threads are blocked on the
46	barrier. This is already the case if just one thread from the last round
47	has stopped waiting and returned to the caller; the assumption is that
48	all threads from the round are unblocked atomically, even though they may
49	return at different times from the respective calls to
50	pthread_barrier_wait). Thus, a valid call to pthread_barrier_destroy can
51	be concurrent with other threads still figuring out that their round has
52	been completed. Therefore, threads need to confirm that they have left
53	the barrier by incrementing OUT, and pthread_barrier_destroy needs to wait
54	until OUT equals IN.
55
56	To avoid an ABA issue for futex_wait on CURRENT_ROUND and for archs with
57	32b-only atomics, we additionally reset the barrier when IN reaches
58	a threshold to avoid overflow. We assume that the total number of threads
59	is less than UINT_MAX/2, and set the threshold accordingly so that we can
60	use a simple atomic_fetch_add on IN instead of a CAS when entering. The
61	threshold is always set to the end of a round, so all threads that have
62	entered are either pre-reset threads or post-reset threads (i.e., have a
63	position larger than the threshold).
64	Pre-reset threads just run the algorithm explained above. Post-reset
65	threads wait until IN is reset to a pre-threshold value.
66	When the last pre-reset thread leaves the barrier (i.e., OUT equals the
67	threshold), it resets the barrier to its initial state. Other (post-reset)
68	threads wait for the reset to have finished by waiting until IN is less
69	than the threshold and then restart by trying to enter the barrier again.
70
71	We reuse the reset mechanism in pthread_barrier_destroy to get notified
72	when all threads have left the barrier: We trigger an artificial reset and
73	wait for the last pre-reset thread to finish reset, thus notifying the
74	thread that is about to destroy the barrier.
75
76	Blocking using futexes is straightforward: pre-reset threads wait for
77	completion of their round using CURRENT_ROUND as futex word, and post-reset
78	threads and pthread_barrier_destroy use IN as futex word.
79
80	Further notes:
81	* It is not simple to let some of the post-reset threads help with the
82	reset because of the ABA issues that arise; therefore, we simply make
83	the last thread to leave responsible for the reset.
84	* POSIX leaves it unspecified whether a signal handler running in a thread
85	that has been unblocked (because its round is complete) can stall all
86	other threads and prevent them from returning from the barrier. In this
87	implementation, other threads will return. However,
88	pthread_barrier_destroy will of course wait for the signal handler thread
89	to confirm that it left the barrier.
90
91	TODO We should add spinning with back-off. Once we do that, we could also
92	try to avoid the futex_wake syscall when a round is detected as finished.
93	If we do not spin, it is quite likely that at least some other threads will
94	have called futex_wait already. /*
95	int
96	___pthread_barrier_wait (pthread_barrier_t *barrier)
97	{
98	struct pthread_barrier bar = (struct* pthread_barrier *) barrier;
99
100	/ How many threads entered so far, including ourself. /
101	unsigned int i;
102
103	reset_restart:
104	/ Try to enter the barrier. We need acquire MO to (1) ensure that if we*
105	observe that our round can be completed (see below for our attempt to do
106	so), all pre-barrier-entry effects of all threads in our round happen
107	before us completing the round, and (2) to make our use of the barrier
108	happen after a potential reset. We need release MO to make sure that our
109	pre-barrier-entry effects happen before threads in this round leaving the
110	barrier. /*
111	i = atomic_fetch_add_acq_rel (&bar->in, `1`) + `1`;
112	/ These loads are after the fetch_add so that we're less likely to first*
113	pull in the cache line as shared. /*
114	unsigned int count = bar->count;
115	/ This is the number of threads that can enter before we need to reset.*
116	Always at the end of a round. /*
117	unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD
118	- BARRIER_IN_THRESHOLD % count;
119
120	if (i > max_in_before_reset)
121	{
122	/ We're in a reset round. Just wait for a reset to finish; do not*
123	help finishing previous rounds because this could happen
124	concurrently with a reset. /*
125	while (i > max_in_before_reset)
126	{
127	futex_wait_simple (&bar->in, i, bar->shared);
128	/ Relaxed MO is fine here because we just need an indication for*
129	when we should retry to enter (which will use acquire MO, see
130	above). /*
131	i = atomic_load_relaxed (&bar->in);
132	}
133	goto reset_restart;
134	}
135
136	/ Look at the current round. At this point, we are just interested in*
137	whether we can complete rounds, based on the information we obtained
138	through our acquire-MO load of IN. Nonetheless, if we notice that
139	our round has been completed using this load, we use the acquire-MO
140	fence below to make sure that all pre-barrier-entry effects of all
141	threads in our round happen before us leaving the barrier. Therefore,
142	relaxed MO is sufficient. /*
143	unsigned cr = atomic_load_relaxed (&bar->current_round);
144
145	/ Try to finish previous rounds and/or the current round. We simply*
146	consider just our position here and do not try to do the work of threads
147	that entered more recently. /*
148	while (cr + count <= i)
149	{
150	/ Calculate the new current round based on how many threads entered.*
151	NEWCR must be larger than CR because CR+COUNT ends a round. /*
152	unsigned int newcr = i - i % count;
153	/ Try to complete previous and/or the current round. We need release*
154	MO to propagate the happens-before that we observed through reading
155	with acquire MO from IN to other threads. If the CAS fails, it
156	is like the relaxed-MO load of CURRENT_ROUND above. /*
157	if (atomic_compare_exchange_weak_release (&bar->current_round, &cr,
158	newcr))
159	{
160	/ Update CR with the modification we just did. /
161	cr = newcr;
162	/ Wake threads belonging to the rounds we just finished. We may*
163	wake more threads than necessary if more than COUNT threads try
164	to block concurrently on the barrier, but this is not a typical
165	use of barriers.
166	Note that we can still access SHARED because we haven't yet
167	confirmed to have left the barrier. /*
168	futex_wake (&bar->current_round, INT_MAX, bar->shared);
169	/ We did as much as we could based on our position. If we advanced*
170	the current round to a round sufficient for us, do not wait for
171	that to happen and skip the acquire fence (we already
172	synchronize-with all other threads in our round through the
173	initial acquire MO fetch_add of IN. /*
174	if (i <= cr)
175	goto ready_to_leave;
176	else
177	break;
178	}
179	}
180
181	/ Wait until the current round is more recent than the round we are in. /
182	while (i > cr)
183	{
184	/ Wait for the current round to finish. /
185	futex_wait_simple (&bar->current_round, cr, bar->shared);
186	/ See the fence below. /
187	cr = atomic_load_relaxed (&bar->current_round);
188	}
189
190	/ Our round finished. Use the acquire MO fence to synchronize-with the*
191	thread that finished the round, either through the initial load of
192	CURRENT_ROUND above or a failed CAS in the loop above. /*
193	atomic_thread_fence_acquire ();
194
195	/ Now signal that we left. /
196	unsigned int o;
197	ready_to_leave:
198	/ We need release MO here so that our use of the barrier happens before*
199	reset or memory reuse after pthread_barrier_destroy. /*
200	o = atomic_fetch_add_release (&bar->out, `1`) + `1`;
201	if (o == max_in_before_reset)
202	{
203	/ Perform a reset if we are the last pre-reset thread leaving. All*
204	other threads accessing the barrier are post-reset threads and are
205	incrementing or spinning on IN. Thus, resetting IN as the last step
206	of reset ensures that the reset is not concurrent with actual use of
207	the barrier. We need the acquire MO fence so that the reset happens
208	after use of the barrier by all earlier pre-reset threads. /*
209	atomic_thread_fence_acquire ();
210	atomic_store_relaxed (&bar->current_round, `0`);
211	atomic_store_relaxed (&bar->out, `0`);
212	/ When destroying the barrier, we wait for a reset to happen. Thus,*
213	we must load SHARED now so that this happens before the barrier is
214	destroyed. /*
215	int shared = bar->shared;
216	atomic_store_release (&bar->in, `0`);
217	futex_wake (&bar->in, INT_MAX, shared);
218
219	}
220
221	/ Return a special value for exactly one thread per round. /
222	return i % count == `0` ? PTHREAD_BARRIER_SERIAL_THREAD : `0`;
223	}
224	versioned_symbol (libc, ___pthread_barrier_wait, pthread_barrier_wait,
225	GLIBC_2_34);
226	libc_hidden_ver (___pthread_barrier_wait, __pthread_barrier_wait)
227	#ifndef SHARED
228	strong_alias (___pthread_barrier_wait, __pthread_barrier_wait)
229	#endif
230
231	#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_2, GLIBC_2_34)
232	compat_symbol (libpthread, ___pthread_barrier_wait, pthread_barrier_wait,
233	GLIBC_2_2);
234	#endif
235

Browse the source code of glibc/nptl/pthread_barrier_wait.c