1 | /* sem_waitcommon -- wait on a semaphore, shared code. |
2 | Copyright (C) 2003-2019 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <kernel-features.h> |
21 | #include <errno.h> |
22 | #include <sysdep.h> |
23 | #include <futex-internal.h> |
24 | #include <internaltypes.h> |
25 | #include <semaphore.h> |
26 | #include <sys/time.h> |
27 | |
28 | #include <pthreadP.h> |
29 | #include <shlib-compat.h> |
30 | #include <atomic.h> |
31 | |
32 | |
33 | /* The semaphore provides two main operations: sem_post adds a token to the |
34 | semaphore; sem_wait grabs a token from the semaphore, potentially waiting |
35 | until there is a token available. A sem_wait needs to synchronize with |
36 | the sem_post that provided the token, so that whatever lead to the sem_post |
37 | happens before the code after sem_wait. |
38 | |
39 | Conceptually, available tokens can simply be counted; let's call that the |
40 | value of the semaphore. However, we also want to know whether there might |
41 | be a sem_wait that is blocked on the value because it was zero (using a |
42 | futex with the value being the futex variable); if there is no blocked |
43 | sem_wait, sem_post does not need to execute a futex_wake call. Therefore, |
44 | we also need to count the number of potentially blocked sem_wait calls |
45 | (which we call nwaiters). |
46 | |
47 | What makes this tricky is that POSIX requires that a semaphore can be |
48 | destroyed as soon as the last remaining sem_wait has returned, and no |
49 | other sem_wait or sem_post calls are executing concurrently. However, the |
50 | sem_post call whose token was consumed by the last sem_wait is considered |
51 | to have finished once it provided the token to the sem_wait. |
52 | Thus, sem_post must not access the semaphore struct anymore after it has |
53 | made a token available; IOW, it needs to be able to atomically provide |
54 | a token and check whether any blocked sem_wait calls might exist. |
55 | |
56 | This is straightforward to do if the architecture provides 64b atomics |
57 | because we can just put both the value and nwaiters into one variable that |
58 | we access atomically: This is the data field, the value is in the |
59 | least-significant 32 bits, and nwaiters in the other bits. When sem_post |
60 | makes a value available, it can atomically check nwaiters. |
61 | |
62 | If we have only 32b atomics available, we cannot put both nwaiters and |
63 | value into one 32b value because then we might have too few bits for both |
64 | of those counters. Therefore, we need to use two distinct fields. |
65 | |
66 | To allow sem_post to atomically make a token available and check for |
67 | blocked sem_wait calls, we use one bit in value to indicate whether |
68 | nwaiters is nonzero. That allows sem_post to use basically the same |
69 | algorithm as with 64b atomics, but requires sem_wait to update the bit; it |
70 | can't do this atomically with another access to nwaiters, but it can compute |
71 | a conservative value for the bit because it's benign if the bit is set |
72 | even if nwaiters is zero (all we get is an unnecessary futex wake call by |
73 | sem_post). |
74 | Specifically, sem_wait will unset the bit speculatively if it believes that |
75 | there is no other concurrently executing sem_wait. If it misspeculated, |
76 | it will have to clean up by waking any other sem_wait call (i.e., what |
77 | sem_post would do otherwise). This does not conflict with the destruction |
78 | requirement because the semaphore must not be destructed while any sem_wait |
79 | is still executing. */ |
80 | |
81 | #if !__HAVE_64B_ATOMICS |
82 | static void |
83 | __sem_wait_32_finish (struct new_sem *sem); |
84 | #endif |
85 | |
86 | static void |
87 | __sem_wait_cleanup (void *arg) |
88 | { |
89 | struct new_sem *sem = (struct new_sem *) arg; |
90 | |
91 | #if __HAVE_64B_ATOMICS |
92 | /* Stop being registered as a waiter. See below for MO. */ |
93 | atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
94 | #else |
95 | __sem_wait_32_finish (sem); |
96 | #endif |
97 | } |
98 | |
99 | /* Wait until at least one token is available, possibly with a timeout. |
100 | This is in a separate function in order to make sure gcc |
101 | puts the call site into an exception region, and thus the |
102 | cleanups get properly run. TODO still necessary? Other futex_wait |
103 | users don't seem to need it. */ |
104 | static int |
105 | __attribute__ ((noinline)) |
106 | do_futex_wait (struct new_sem *sem, const struct timespec *abstime) |
107 | { |
108 | int err; |
109 | |
110 | #if __HAVE_64B_ATOMICS |
111 | err = futex_abstimed_wait_cancelable ( |
112 | (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, abstime, |
113 | sem->private); |
114 | #else |
115 | err = futex_abstimed_wait_cancelable (&sem->value, SEM_NWAITERS_MASK, |
116 | abstime, sem->private); |
117 | #endif |
118 | |
119 | return err; |
120 | } |
121 | |
122 | /* Fast path: Try to grab a token without blocking. */ |
123 | static int |
124 | __new_sem_wait_fast (struct new_sem *sem, int definitive_result) |
125 | { |
126 | /* We need acquire MO if we actually grab a token, so that this |
127 | synchronizes with all token providers (i.e., the RMW operation we read |
128 | from or all those before it in modification order; also see sem_post). |
129 | We do not need to guarantee any ordering if we observed that there is |
130 | no token (POSIX leaves it unspecified whether functions that fail |
131 | synchronize memory); thus, relaxed MO is sufficient for the initial load |
132 | and the failure path of the CAS. If the weak CAS fails and we need a |
133 | definitive result, retry. */ |
134 | #if __HAVE_64B_ATOMICS |
135 | uint64_t d = atomic_load_relaxed (&sem->data); |
136 | do |
137 | { |
138 | if ((d & SEM_VALUE_MASK) == 0) |
139 | break; |
140 | if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1)) |
141 | return 0; |
142 | } |
143 | while (definitive_result); |
144 | return -1; |
145 | #else |
146 | unsigned int v = atomic_load_relaxed (&sem->value); |
147 | do |
148 | { |
149 | if ((v >> SEM_VALUE_SHIFT) == 0) |
150 | break; |
151 | if (atomic_compare_exchange_weak_acquire (&sem->value, |
152 | &v, v - (1 << SEM_VALUE_SHIFT))) |
153 | return 0; |
154 | } |
155 | while (definitive_result); |
156 | return -1; |
157 | #endif |
158 | } |
159 | |
160 | /* Slow path that blocks. */ |
161 | static int |
162 | __attribute__ ((noinline)) |
163 | __new_sem_wait_slow (struct new_sem *sem, const struct timespec *abstime) |
164 | { |
165 | int err = 0; |
166 | |
167 | #if __HAVE_64B_ATOMICS |
168 | /* Add a waiter. Relaxed MO is sufficient because we can rely on the |
169 | ordering provided by the RMW operations we use. */ |
170 | uint64_t d = atomic_fetch_add_relaxed (&sem->data, |
171 | (uint64_t) 1 << SEM_NWAITERS_SHIFT); |
172 | |
173 | pthread_cleanup_push (__sem_wait_cleanup, sem); |
174 | |
175 | /* Wait for a token to be available. Retry until we can grab one. */ |
176 | for (;;) |
177 | { |
178 | /* If there is no token available, sleep until there is. */ |
179 | if ((d & SEM_VALUE_MASK) == 0) |
180 | { |
181 | err = do_futex_wait (sem, abstime); |
182 | /* A futex return value of 0 or EAGAIN is due to a real or spurious |
183 | wake-up, or due to a change in the number of tokens. We retry in |
184 | these cases. |
185 | If we timed out, forward this to the caller. |
186 | EINTR is returned if we are interrupted by a signal; we |
187 | forward this to the caller. (See futex_wait and related |
188 | documentation. Before Linux 2.6.22, EINTR was also returned on |
189 | spurious wake-ups; we only support more recent Linux versions, |
190 | so do not need to consider this here.) */ |
191 | if (err == ETIMEDOUT || err == EINTR) |
192 | { |
193 | __set_errno (err); |
194 | err = -1; |
195 | /* Stop being registered as a waiter. */ |
196 | atomic_fetch_add_relaxed (&sem->data, |
197 | -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
198 | break; |
199 | } |
200 | /* Relaxed MO is sufficient; see below. */ |
201 | d = atomic_load_relaxed (&sem->data); |
202 | } |
203 | else |
204 | { |
205 | /* Try to grab both a token and stop being a waiter. We need |
206 | acquire MO so this synchronizes with all token providers (i.e., |
207 | the RMW operation we read from or all those before it in |
208 | modification order; also see sem_post). On the failure path, |
209 | relaxed MO is sufficient because we only eventually need the |
210 | up-to-date value; the futex_wait or the CAS perform the real |
211 | work. */ |
212 | if (atomic_compare_exchange_weak_acquire (&sem->data, |
213 | &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT))) |
214 | { |
215 | err = 0; |
216 | break; |
217 | } |
218 | } |
219 | } |
220 | |
221 | pthread_cleanup_pop (0); |
222 | #else |
223 | /* The main difference to the 64b-atomics implementation is that we need to |
224 | access value and nwaiters in separate steps, and that the nwaiters bit |
225 | in the value can temporarily not be set even if nwaiters is nonzero. |
226 | We work around incorrectly unsetting the nwaiters bit by letting sem_wait |
227 | set the bit again and waking the number of waiters that could grab a |
228 | token. There are two additional properties we need to ensure: |
229 | (1) We make sure that whenever unsetting the bit, we see the increment of |
230 | nwaiters by the other thread that set the bit. IOW, we will notice if |
231 | we make a mistake. |
232 | (2) When setting the nwaiters bit, we make sure that we see the unsetting |
233 | of the bit by another waiter that happened before us. This avoids having |
234 | to blindly set the bit whenever we need to block on it. We set/unset |
235 | the bit while having incremented nwaiters (i.e., are a registered |
236 | waiter), and the problematic case only happens when one waiter indeed |
237 | followed another (i.e., nwaiters was never larger than 1); thus, this |
238 | works similarly as with a critical section using nwaiters (see the MOs |
239 | and related comments below). |
240 | |
241 | An alternative approach would be to unset the bit after decrementing |
242 | nwaiters; however, that would result in needing Dekker-like |
243 | synchronization and thus full memory barriers. We also would not be able |
244 | to prevent misspeculation, so this alternative scheme does not seem |
245 | beneficial. */ |
246 | unsigned int v; |
247 | |
248 | /* Add a waiter. We need acquire MO so this synchronizes with the release |
249 | MO we use when decrementing nwaiters below; it ensures that if another |
250 | waiter unset the bit before us, we see that and set it again. Also see |
251 | property (2) above. */ |
252 | atomic_fetch_add_acquire (&sem->nwaiters, 1); |
253 | |
254 | pthread_cleanup_push (__sem_wait_cleanup, sem); |
255 | |
256 | /* Wait for a token to be available. Retry until we can grab one. */ |
257 | /* We do not need any ordering wrt. to this load's reads-from, so relaxed |
258 | MO is sufficient. The acquire MO above ensures that in the problematic |
259 | case, we do see the unsetting of the bit by another waiter. */ |
260 | v = atomic_load_relaxed (&sem->value); |
261 | do |
262 | { |
263 | do |
264 | { |
265 | /* We are about to block, so make sure that the nwaiters bit is |
266 | set. We need release MO on the CAS to ensure that when another |
267 | waiter unsets the nwaiters bit, it will also observe that we |
268 | incremented nwaiters in the meantime (also see the unsetting of |
269 | the bit below). Relaxed MO on CAS failure is sufficient (see |
270 | above). */ |
271 | do |
272 | { |
273 | if ((v & SEM_NWAITERS_MASK) != 0) |
274 | break; |
275 | } |
276 | while (!atomic_compare_exchange_weak_release (&sem->value, |
277 | &v, v | SEM_NWAITERS_MASK)); |
278 | /* If there is no token, wait. */ |
279 | if ((v >> SEM_VALUE_SHIFT) == 0) |
280 | { |
281 | /* See __HAVE_64B_ATOMICS variant. */ |
282 | err = do_futex_wait(sem, abstime); |
283 | if (err == ETIMEDOUT || err == EINTR) |
284 | { |
285 | __set_errno (err); |
286 | err = -1; |
287 | goto error; |
288 | } |
289 | err = 0; |
290 | /* We blocked, so there might be a token now. Relaxed MO is |
291 | sufficient (see above). */ |
292 | v = atomic_load_relaxed (&sem->value); |
293 | } |
294 | } |
295 | /* If there is no token, we must not try to grab one. */ |
296 | while ((v >> SEM_VALUE_SHIFT) == 0); |
297 | } |
298 | /* Try to grab a token. We need acquire MO so this synchronizes with |
299 | all token providers (i.e., the RMW operation we read from or all those |
300 | before it in modification order; also see sem_post). */ |
301 | while (!atomic_compare_exchange_weak_acquire (&sem->value, |
302 | &v, v - (1 << SEM_VALUE_SHIFT))); |
303 | |
304 | error: |
305 | pthread_cleanup_pop (0); |
306 | |
307 | __sem_wait_32_finish (sem); |
308 | #endif |
309 | |
310 | return err; |
311 | } |
312 | |
313 | /* Stop being a registered waiter (non-64b-atomics code only). */ |
314 | #if !__HAVE_64B_ATOMICS |
315 | static void |
316 | __sem_wait_32_finish (struct new_sem *sem) |
317 | { |
318 | /* The nwaiters bit is still set, try to unset it now if this seems |
319 | necessary. We do this before decrementing nwaiters so that the unsetting |
320 | is visible to other waiters entering after us. Relaxed MO is sufficient |
321 | because we are just speculating here; a stronger MO would not prevent |
322 | misspeculation. */ |
323 | unsigned int wguess = atomic_load_relaxed (&sem->nwaiters); |
324 | if (wguess == 1) |
325 | /* We might be the last waiter, so unset. This needs acquire MO so that |
326 | it syncronizes with the release MO when setting the bit above; if we |
327 | overwrite someone else that set the bit, we'll read in the following |
328 | decrement of nwaiters at least from that release sequence, so we'll |
329 | see if the other waiter is still active or if another writer entered |
330 | in the meantime (i.e., using the check below). */ |
331 | atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK); |
332 | |
333 | /* Now stop being a waiter, and see whether our guess was correct. |
334 | This needs release MO so that it synchronizes with the acquire MO when |
335 | a waiter increments nwaiters; this makes sure that newer writers see that |
336 | we reset the waiters_present bit. */ |
337 | unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1); |
338 | if (wfinal > 1 && wguess == 1) |
339 | { |
340 | /* We guessed wrong, and so need to clean up after the mistake and |
341 | unblock any waiters that could have not been woken. There is no |
342 | additional ordering that we need to set up, so relaxed MO is |
343 | sufficient. */ |
344 | unsigned int v = atomic_fetch_or_relaxed (&sem->value, |
345 | SEM_NWAITERS_MASK); |
346 | /* If there are available tokens, then wake as many waiters. If there |
347 | aren't any, then there is no need to wake anyone because there is |
348 | none to grab for another waiter. If tokens become available |
349 | subsequently, then the respective sem_post calls will do the wake-up |
350 | due to us having set the nwaiters bit again. */ |
351 | v >>= SEM_VALUE_SHIFT; |
352 | if (v > 0) |
353 | futex_wake (&sem->value, v, sem->private); |
354 | } |
355 | } |
356 | #endif |
357 | |