| 1 | /* sem_waitcommon -- wait on a semaphore, shared code. |
| 2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <kernel-features.h> |
| 20 | #include <errno.h> |
| 21 | #include <sysdep.h> |
| 22 | #include <futex-internal.h> |
| 23 | #include <internaltypes.h> |
| 24 | #include <semaphore.h> |
| 25 | #include <sys/time.h> |
| 26 | |
| 27 | #include <pthreadP.h> |
| 28 | #include <shlib-compat.h> |
| 29 | #include <atomic.h> |
| 30 | |
| 31 | |
| 32 | /* The semaphore provides two main operations: sem_post adds a token to the |
| 33 | semaphore; sem_wait grabs a token from the semaphore, potentially waiting |
| 34 | until there is a token available. A sem_wait needs to synchronize with |
| 35 | the sem_post that provided the token, so that whatever lead to the sem_post |
| 36 | happens before the code after sem_wait. |
| 37 | |
| 38 | Conceptually, available tokens can simply be counted; let's call that the |
| 39 | value of the semaphore. However, we also want to know whether there might |
| 40 | be a sem_wait that is blocked on the value because it was zero (using a |
| 41 | futex with the value being the futex variable); if there is no blocked |
| 42 | sem_wait, sem_post does not need to execute a futex_wake call. Therefore, |
| 43 | we also need to count the number of potentially blocked sem_wait calls |
| 44 | (which we call nwaiters). |
| 45 | |
| 46 | What makes this tricky is that POSIX requires that a semaphore can be |
| 47 | destroyed as soon as the last remaining sem_wait has returned, and no |
| 48 | other sem_wait or sem_post calls are executing concurrently. However, the |
| 49 | sem_post call whose token was consumed by the last sem_wait is considered |
| 50 | to have finished once it provided the token to the sem_wait. |
| 51 | Thus, sem_post must not access the semaphore struct anymore after it has |
| 52 | made a token available; IOW, it needs to be able to atomically provide |
| 53 | a token and check whether any blocked sem_wait calls might exist. |
| 54 | |
| 55 | This is straightforward to do if the architecture provides 64b atomics |
| 56 | because we can just put both the value and nwaiters into one variable that |
| 57 | we access atomically: This is the data field, the value is in the |
| 58 | least-significant 32 bits, and nwaiters in the other bits. When sem_post |
| 59 | makes a value available, it can atomically check nwaiters. |
| 60 | |
| 61 | If we have only 32b atomics available, we cannot put both nwaiters and |
| 62 | value into one 32b value because then we might have too few bits for both |
| 63 | of those counters. Therefore, we need to use two distinct fields. |
| 64 | |
| 65 | To allow sem_post to atomically make a token available and check for |
| 66 | blocked sem_wait calls, we use one bit in value to indicate whether |
| 67 | nwaiters is nonzero. That allows sem_post to use basically the same |
| 68 | algorithm as with 64b atomics, but requires sem_wait to update the bit; it |
| 69 | can't do this atomically with another access to nwaiters, but it can compute |
| 70 | a conservative value for the bit because it's benign if the bit is set |
| 71 | even if nwaiters is zero (all we get is an unnecessary futex wake call by |
| 72 | sem_post). |
| 73 | Specifically, sem_wait will unset the bit speculatively if it believes that |
| 74 | there is no other concurrently executing sem_wait. If it misspeculated, |
| 75 | it will have to clean up by waking any other sem_wait call (i.e., what |
| 76 | sem_post would do otherwise). This does not conflict with the destruction |
| 77 | requirement because the semaphore must not be destructed while any sem_wait |
| 78 | is still executing. */ |
| 79 | |
| 80 | #if !__HAVE_64B_ATOMICS |
| 81 | static void |
| 82 | __sem_wait_32_finish (struct new_sem *sem); |
| 83 | #endif |
| 84 | |
| 85 | static void |
| 86 | __sem_wait_cleanup (void *arg) |
| 87 | { |
| 88 | struct new_sem *sem = (struct new_sem *) arg; |
| 89 | |
| 90 | #if __HAVE_64B_ATOMICS |
| 91 | /* Stop being registered as a waiter. See below for MO. */ |
| 92 | atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
| 93 | #else |
| 94 | __sem_wait_32_finish (sem); |
| 95 | #endif |
| 96 | } |
| 97 | |
| 98 | /* Wait until at least one token is available, possibly with a timeout. |
| 99 | This is in a separate function in order to make sure gcc |
| 100 | puts the call site into an exception region, and thus the |
| 101 | cleanups get properly run. TODO still necessary? Other futex_wait |
| 102 | users don't seem to need it. */ |
| 103 | static int |
| 104 | __attribute__ ((noinline)) |
| 105 | do_futex_wait (struct new_sem *sem, clockid_t clockid, |
| 106 | const struct __timespec64 *abstime) |
| 107 | { |
| 108 | int err; |
| 109 | |
| 110 | #if __HAVE_64B_ATOMICS |
| 111 | err = __futex_abstimed_wait_cancelable64 ( |
| 112 | (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, |
| 113 | clockid, abstime, |
| 114 | sem->private); |
| 115 | #else |
| 116 | err = __futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK, |
| 117 | clockid, abstime, sem->private); |
| 118 | #endif |
| 119 | |
| 120 | return err; |
| 121 | } |
| 122 | |
| 123 | /* Fast path: Try to grab a token without blocking. */ |
| 124 | static int |
| 125 | __new_sem_wait_fast (struct new_sem *sem, int definitive_result) |
| 126 | { |
| 127 | /* We need acquire MO if we actually grab a token, so that this |
| 128 | synchronizes with all token providers (i.e., the RMW operation we read |
| 129 | from or all those before it in modification order; also see sem_post). |
| 130 | We do not need to guarantee any ordering if we observed that there is |
| 131 | no token (POSIX leaves it unspecified whether functions that fail |
| 132 | synchronize memory); thus, relaxed MO is sufficient for the initial load |
| 133 | and the failure path of the CAS. If the weak CAS fails and we need a |
| 134 | definitive result, retry. */ |
| 135 | #if __HAVE_64B_ATOMICS |
| 136 | uint64_t d = atomic_load_relaxed (&sem->data); |
| 137 | do |
| 138 | { |
| 139 | if ((d & SEM_VALUE_MASK) == 0) |
| 140 | break; |
| 141 | if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1)) |
| 142 | return 0; |
| 143 | } |
| 144 | while (definitive_result); |
| 145 | return -1; |
| 146 | #else |
| 147 | unsigned int v = atomic_load_relaxed (&sem->value); |
| 148 | do |
| 149 | { |
| 150 | if ((v >> SEM_VALUE_SHIFT) == 0) |
| 151 | break; |
| 152 | if (atomic_compare_exchange_weak_acquire (&sem->value, |
| 153 | &v, v - (1 << SEM_VALUE_SHIFT))) |
| 154 | return 0; |
| 155 | } |
| 156 | while (definitive_result); |
| 157 | return -1; |
| 158 | #endif |
| 159 | } |
| 160 | |
| 161 | /* Slow path that blocks. */ |
| 162 | static int |
| 163 | __attribute__ ((noinline)) |
| 164 | __new_sem_wait_slow64 (struct new_sem *sem, clockid_t clockid, |
| 165 | const struct __timespec64 *abstime) |
| 166 | { |
| 167 | int err = 0; |
| 168 | |
| 169 | #if __HAVE_64B_ATOMICS |
| 170 | /* Add a waiter. Relaxed MO is sufficient because we can rely on the |
| 171 | ordering provided by the RMW operations we use. */ |
| 172 | uint64_t d = atomic_fetch_add_relaxed (&sem->data, |
| 173 | (uint64_t) 1 << SEM_NWAITERS_SHIFT); |
| 174 | |
| 175 | pthread_cleanup_push (__sem_wait_cleanup, sem); |
| 176 | |
| 177 | /* Wait for a token to be available. Retry until we can grab one. */ |
| 178 | for (;;) |
| 179 | { |
| 180 | /* If there is no token available, sleep until there is. */ |
| 181 | if ((d & SEM_VALUE_MASK) == 0) |
| 182 | { |
| 183 | err = do_futex_wait (sem, clockid, abstime); |
| 184 | /* A futex return value of 0 or EAGAIN is due to a real or spurious |
| 185 | wake-up, or due to a change in the number of tokens. We retry in |
| 186 | these cases. |
| 187 | If we timed out, forward this to the caller. |
| 188 | EINTR is returned if we are interrupted by a signal; we |
| 189 | forward this to the caller. (See futex_wait and related |
| 190 | documentation. Before Linux 2.6.22, EINTR was also returned on |
| 191 | spurious wake-ups; we only support more recent Linux versions, |
| 192 | so do not need to consider this here.) */ |
| 193 | if (err == ETIMEDOUT || err == EINTR || err == EOVERFLOW) |
| 194 | { |
| 195 | __set_errno (err); |
| 196 | err = -1; |
| 197 | /* Stop being registered as a waiter. */ |
| 198 | atomic_fetch_add_relaxed (&sem->data, |
| 199 | -((uint64_t) 1 << SEM_NWAITERS_SHIFT)); |
| 200 | break; |
| 201 | } |
| 202 | /* Relaxed MO is sufficient; see below. */ |
| 203 | d = atomic_load_relaxed (&sem->data); |
| 204 | } |
| 205 | else |
| 206 | { |
| 207 | /* Try to grab both a token and stop being a waiter. We need |
| 208 | acquire MO so this synchronizes with all token providers (i.e., |
| 209 | the RMW operation we read from or all those before it in |
| 210 | modification order; also see sem_post). On the failure path, |
| 211 | relaxed MO is sufficient because we only eventually need the |
| 212 | up-to-date value; the futex_wait or the CAS perform the real |
| 213 | work. */ |
| 214 | if (atomic_compare_exchange_weak_acquire (&sem->data, |
| 215 | &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT))) |
| 216 | { |
| 217 | err = 0; |
| 218 | break; |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | pthread_cleanup_pop (0); |
| 224 | #else |
| 225 | /* The main difference to the 64b-atomics implementation is that we need to |
| 226 | access value and nwaiters in separate steps, and that the nwaiters bit |
| 227 | in the value can temporarily not be set even if nwaiters is nonzero. |
| 228 | We work around incorrectly unsetting the nwaiters bit by letting sem_wait |
| 229 | set the bit again and waking the number of waiters that could grab a |
| 230 | token. There are two additional properties we need to ensure: |
| 231 | (1) We make sure that whenever unsetting the bit, we see the increment of |
| 232 | nwaiters by the other thread that set the bit. IOW, we will notice if |
| 233 | we make a mistake. |
| 234 | (2) When setting the nwaiters bit, we make sure that we see the unsetting |
| 235 | of the bit by another waiter that happened before us. This avoids having |
| 236 | to blindly set the bit whenever we need to block on it. We set/unset |
| 237 | the bit while having incremented nwaiters (i.e., are a registered |
| 238 | waiter), and the problematic case only happens when one waiter indeed |
| 239 | followed another (i.e., nwaiters was never larger than 1); thus, this |
| 240 | works similarly as with a critical section using nwaiters (see the MOs |
| 241 | and related comments below). |
| 242 | |
| 243 | An alternative approach would be to unset the bit after decrementing |
| 244 | nwaiters; however, that would result in needing Dekker-like |
| 245 | synchronization and thus full memory barriers. We also would not be able |
| 246 | to prevent misspeculation, so this alternative scheme does not seem |
| 247 | beneficial. */ |
| 248 | unsigned int v; |
| 249 | |
| 250 | /* Add a waiter. We need acquire MO so this synchronizes with the release |
| 251 | MO we use when decrementing nwaiters below; it ensures that if another |
| 252 | waiter unset the bit before us, we see that and set it again. Also see |
| 253 | property (2) above. */ |
| 254 | atomic_fetch_add_acquire (&sem->nwaiters, 1); |
| 255 | |
| 256 | pthread_cleanup_push (__sem_wait_cleanup, sem); |
| 257 | |
| 258 | /* Wait for a token to be available. Retry until we can grab one. */ |
| 259 | /* We do not need any ordering wrt. to this load's reads-from, so relaxed |
| 260 | MO is sufficient. The acquire MO above ensures that in the problematic |
| 261 | case, we do see the unsetting of the bit by another waiter. */ |
| 262 | v = atomic_load_relaxed (&sem->value); |
| 263 | do |
| 264 | { |
| 265 | do |
| 266 | { |
| 267 | /* We are about to block, so make sure that the nwaiters bit is |
| 268 | set. We need release MO on the CAS to ensure that when another |
| 269 | waiter unsets the nwaiters bit, it will also observe that we |
| 270 | incremented nwaiters in the meantime (also see the unsetting of |
| 271 | the bit below). Relaxed MO on CAS failure is sufficient (see |
| 272 | above). */ |
| 273 | do |
| 274 | { |
| 275 | if ((v & SEM_NWAITERS_MASK) != 0) |
| 276 | break; |
| 277 | } |
| 278 | while (!atomic_compare_exchange_weak_release (&sem->value, |
| 279 | &v, v | SEM_NWAITERS_MASK)); |
| 280 | /* If there is no token, wait. */ |
| 281 | if ((v >> SEM_VALUE_SHIFT) == 0) |
| 282 | { |
| 283 | /* See __HAVE_64B_ATOMICS variant. */ |
| 284 | err = do_futex_wait (sem, clockid, abstime); |
| 285 | if (err == ETIMEDOUT || err == EINTR) |
| 286 | { |
| 287 | __set_errno (err); |
| 288 | err = -1; |
| 289 | goto error; |
| 290 | } |
| 291 | err = 0; |
| 292 | /* We blocked, so there might be a token now. Relaxed MO is |
| 293 | sufficient (see above). */ |
| 294 | v = atomic_load_relaxed (&sem->value); |
| 295 | } |
| 296 | } |
| 297 | /* If there is no token, we must not try to grab one. */ |
| 298 | while ((v >> SEM_VALUE_SHIFT) == 0); |
| 299 | } |
| 300 | /* Try to grab a token. We need acquire MO so this synchronizes with |
| 301 | all token providers (i.e., the RMW operation we read from or all those |
| 302 | before it in modification order; also see sem_post). */ |
| 303 | while (!atomic_compare_exchange_weak_acquire (&sem->value, |
| 304 | &v, v - (1 << SEM_VALUE_SHIFT))); |
| 305 | |
| 306 | error: |
| 307 | pthread_cleanup_pop (0); |
| 308 | |
| 309 | __sem_wait_32_finish (sem); |
| 310 | #endif |
| 311 | |
| 312 | return err; |
| 313 | } |
| 314 | |
| 315 | /* Stop being a registered waiter (non-64b-atomics code only). */ |
| 316 | #if !__HAVE_64B_ATOMICS |
| 317 | static void |
| 318 | __sem_wait_32_finish (struct new_sem *sem) |
| 319 | { |
| 320 | /* The nwaiters bit is still set, try to unset it now if this seems |
| 321 | necessary. We do this before decrementing nwaiters so that the unsetting |
| 322 | is visible to other waiters entering after us. Relaxed MO is sufficient |
| 323 | because we are just speculating here; a stronger MO would not prevent |
| 324 | misspeculation. */ |
| 325 | unsigned int wguess = atomic_load_relaxed (&sem->nwaiters); |
| 326 | if (wguess == 1) |
| 327 | /* We might be the last waiter, so unset. This needs acquire MO so that |
| 328 | it synchronizes with the release MO when setting the bit above; if we |
| 329 | overwrite someone else that set the bit, we'll read in the following |
| 330 | decrement of nwaiters at least from that release sequence, so we'll |
| 331 | see if the other waiter is still active or if another writer entered |
| 332 | in the meantime (i.e., using the check below). */ |
| 333 | atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK); |
| 334 | |
| 335 | /* Now stop being a waiter, and see whether our guess was correct. |
| 336 | This needs release MO so that it synchronizes with the acquire MO when |
| 337 | a waiter increments nwaiters; this makes sure that newer writers see that |
| 338 | we reset the waiters_present bit. */ |
| 339 | unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1); |
| 340 | if (wfinal > 1 && wguess == 1) |
| 341 | { |
| 342 | /* We guessed wrong, and so need to clean up after the mistake and |
| 343 | unblock any waiters that could have not been woken. There is no |
| 344 | additional ordering that we need to set up, so relaxed MO is |
| 345 | sufficient. */ |
| 346 | unsigned int v = atomic_fetch_or_relaxed (&sem->value, |
| 347 | SEM_NWAITERS_MASK); |
| 348 | /* If there are available tokens, then wake as many waiters. If there |
| 349 | aren't any, then there is no need to wake anyone because there is |
| 350 | none to grab for another waiter. If tokens become available |
| 351 | subsequently, then the respective sem_post calls will do the wake-up |
| 352 | due to us having set the nwaiters bit again. */ |
| 353 | v >>= SEM_VALUE_SHIFT; |
| 354 | if (v > 0) |
| 355 | futex_wake (&sem->value, v, sem->private); |
| 356 | } |
| 357 | } |
| 358 | #endif |
| 359 | |