| 1 | /* pthread_cond_common -- shared code for condition variable. |
| 2 | Copyright (C) 2016-2022 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <atomic.h> |
| 20 | #include <atomic_wide_counter.h> |
| 21 | #include <stdint.h> |
| 22 | #include <pthread.h> |
| 23 | |
| 24 | /* We need 3 least-significant bits on __wrefs for something else. |
| 25 | This also matches __atomic_wide_counter requirements: The highest |
| 26 | value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to __g1_start |
| 27 | (the two extra bits are for the lock in the two LSBs of |
| 28 | __g1_start). */ |
| 29 | #define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29) |
| 30 | |
| 31 | static inline uint64_t |
| 32 | __condvar_load_wseq_relaxed (pthread_cond_t *cond) |
| 33 | { |
| 34 | return __atomic_wide_counter_load_relaxed (&cond->__data.__wseq); |
| 35 | } |
| 36 | |
| 37 | static inline uint64_t |
| 38 | __condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) |
| 39 | { |
| 40 | return __atomic_wide_counter_fetch_add_acquire (&cond->__data.__wseq, val); |
| 41 | } |
| 42 | |
| 43 | static inline uint64_t |
| 44 | __condvar_load_g1_start_relaxed (pthread_cond_t *cond) |
| 45 | { |
| 46 | return __atomic_wide_counter_load_relaxed (&cond->__data.__g1_start); |
| 47 | } |
| 48 | |
| 49 | static inline void |
| 50 | __condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) |
| 51 | { |
| 52 | __atomic_wide_counter_add_relaxed (&cond->__data.__g1_start, val); |
| 53 | } |
| 54 | |
| 55 | #if __HAVE_64B_ATOMICS == 1 |
| 56 | |
| 57 | static inline uint64_t |
| 58 | __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) |
| 59 | { |
| 60 | return atomic_fetch_xor_release (&cond->__data.__wseq.__value64, val); |
| 61 | } |
| 62 | |
| 63 | #else /* !__HAVE_64B_ATOMICS */ |
| 64 | |
| 65 | /* The xor operation needs to be an atomic read-modify-write. The write |
| 66 | itself is not an issue as it affects just the lower-order half but not bits |
| 67 | used in the add operation. To make the full fetch-and-xor atomic, we |
| 68 | exploit that concurrently, the value can increase by at most 1<<31 (*): The |
| 69 | xor operation is only called while having acquired the lock, so not more |
| 70 | than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus |
| 71 | increment __wseq. Therefore, if the xor operation observes a value of |
| 72 | __wseq, then the value it applies the modification to later on can be |
| 73 | derived. */ |
| 74 | |
| 75 | static uint64_t __attribute__ ((unused)) |
| 76 | __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) |
| 77 | { |
| 78 | /* First, get the current value. See __atomic_wide_counter_load_relaxed. */ |
| 79 | unsigned int h, l, h2; |
| 80 | do |
| 81 | { |
| 82 | h = atomic_load_acquire (&cond->__data.__wseq.__value32.__high); |
| 83 | l = atomic_load_acquire (&cond->__data.__wseq.__value32.__low); |
| 84 | h2 = atomic_load_relaxed (&cond->__data.__wseq.__value32.__high); |
| 85 | } |
| 86 | while (h != h2); |
| 87 | if (((l >> 31) > 0) && ((h >> 31) == 0)) |
| 88 | h++; |
| 89 | h &= ~((unsigned int) 1 << 31); |
| 90 | l &= ~((unsigned int) 1 << 31); |
| 91 | |
| 92 | /* Now modify. Due to the coherence rules, the prior load will read a value |
| 93 | earlier in modification order than the following fetch-xor. |
| 94 | This uses release MO to make the full operation have release semantics |
| 95 | (all other operations access the lower-order half). */ |
| 96 | unsigned int l2 |
| 97 | = (atomic_fetch_xor_release (&cond->__data.__wseq.__value32.__low, val) |
| 98 | & ~((unsigned int) 1 << 31)); |
| 99 | if (l2 < l) |
| 100 | /* The lower-order half overflowed in the meantime. This happened exactly |
| 101 | once due to the limit on concurrent waiters (see above). */ |
| 102 | h++; |
| 103 | return ((uint64_t) h << 31) + l2; |
| 104 | } |
| 105 | |
| 106 | #endif /* !__HAVE_64B_ATOMICS */ |
| 107 | |
| 108 | /* The lock that signalers use. See pthread_cond_wait_common for uses. |
| 109 | The lock is our normal three-state lock: not acquired (0) / acquired (1) / |
| 110 | acquired-with-futex_wake-request (2). However, we need to preserve the |
| 111 | other bits in the unsigned int used for the lock, and therefore it is a |
| 112 | little more complex. */ |
| 113 | static void __attribute__ ((unused)) |
| 114 | __condvar_acquire_lock (pthread_cond_t *cond, int private) |
| 115 | { |
| 116 | unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size); |
| 117 | while ((s & 3) == 0) |
| 118 | { |
| 119 | if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size, |
| 120 | &s, s | 1)) |
| 121 | return; |
| 122 | /* TODO Spinning and back-off. */ |
| 123 | } |
| 124 | /* We can't change from not acquired to acquired, so try to change to |
| 125 | acquired-with-futex-wake-request and do a futex wait if we cannot change |
| 126 | from not acquired. */ |
| 127 | while (1) |
| 128 | { |
| 129 | while ((s & 3) != 2) |
| 130 | { |
| 131 | if (atomic_compare_exchange_weak_acquire |
| 132 | (&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) 3) | 2)) |
| 133 | { |
| 134 | if ((s & 3) == 0) |
| 135 | return; |
| 136 | break; |
| 137 | } |
| 138 | /* TODO Back off. */ |
| 139 | } |
| 140 | futex_wait_simple (&cond->__data.__g1_orig_size, |
| 141 | (s & ~(unsigned int) 3) | 2, private); |
| 142 | /* Reload so we see a recent value. */ |
| 143 | s = atomic_load_relaxed (&cond->__data.__g1_orig_size); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | /* See __condvar_acquire_lock. */ |
| 148 | static void __attribute__ ((unused)) |
| 149 | __condvar_release_lock (pthread_cond_t *cond, int private) |
| 150 | { |
| 151 | if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size, |
| 152 | ~(unsigned int) 3) & 3) |
| 153 | == 2) |
| 154 | futex_wake (&cond->__data.__g1_orig_size, 1, private); |
| 155 | } |
| 156 | |
| 157 | /* Only use this when having acquired the lock. */ |
| 158 | static unsigned int __attribute__ ((unused)) |
| 159 | __condvar_get_orig_size (pthread_cond_t *cond) |
| 160 | { |
| 161 | return atomic_load_relaxed (&cond->__data.__g1_orig_size) >> 2; |
| 162 | } |
| 163 | |
| 164 | /* Only use this when having acquired the lock. */ |
| 165 | static void __attribute__ ((unused)) |
| 166 | __condvar_set_orig_size (pthread_cond_t *cond, unsigned int size) |
| 167 | { |
| 168 | /* We have acquired the lock, but might get one concurrent update due to a |
| 169 | lock state change from acquired to acquired-with-futex_wake-request. |
| 170 | The store with relaxed MO is fine because there will be no further |
| 171 | changes to the lock bits nor the size, and we will subsequently release |
| 172 | the lock with release MO. */ |
| 173 | unsigned int s; |
| 174 | s = (atomic_load_relaxed (&cond->__data.__g1_orig_size) & 3) |
| 175 | | (size << 2); |
| 176 | if ((atomic_exchange_relaxed (&cond->__data.__g1_orig_size, s) & 3) |
| 177 | != (s & 3)) |
| 178 | atomic_store_relaxed (&cond->__data.__g1_orig_size, (size << 2) | 2); |
| 179 | } |
| 180 | |
| 181 | /* Returns FUTEX_SHARED or FUTEX_PRIVATE based on the provided __wrefs |
| 182 | value. */ |
| 183 | static int __attribute__ ((unused)) |
| 184 | __condvar_get_private (int flags) |
| 185 | { |
| 186 | if ((flags & __PTHREAD_COND_SHARED_MASK) == 0) |
| 187 | return FUTEX_PRIVATE; |
| 188 | else |
| 189 | return FUTEX_SHARED; |
| 190 | } |
| 191 | |
| 192 | /* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to |
| 193 | leave G1, converts G1 into a fresh G2, and then switches group roles so that |
| 194 | the former G2 becomes the new G1 ending at the current __wseq value when we |
| 195 | eventually make the switch (WSEQ is just an observation of __wseq by the |
| 196 | signaler). |
| 197 | If G2 is empty, it will not switch groups because then it would create an |
| 198 | empty G1 which would require switching groups again on the next signal. |
| 199 | Returns false iff groups were not switched because G2 was empty. */ |
| 200 | static bool __attribute__ ((unused)) |
| 201 | __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, |
| 202 | unsigned int *g1index, int private) |
| 203 | { |
| 204 | const unsigned int maxspin = 0; |
| 205 | unsigned int g1 = *g1index; |
| 206 | |
| 207 | /* If there is no waiter in G2, we don't do anything. The expression may |
| 208 | look odd but remember that __g_size might hold a negative value, so |
| 209 | putting the expression this way avoids relying on implementation-defined |
| 210 | behavior. |
| 211 | Note that this works correctly for a zero-initialized condvar too. */ |
| 212 | unsigned int old_orig_size = __condvar_get_orig_size (cond); |
| 213 | uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; |
| 214 | if (((unsigned) (wseq - old_g1_start - old_orig_size) |
| 215 | + cond->__data.__g_size[g1 ^ 1]) == 0) |
| 216 | return false; |
| 217 | |
| 218 | /* Now try to close and quiesce G1. We have to consider the following kinds |
| 219 | of waiters: |
| 220 | * Waiters from less recent groups than G1 are not affected because |
| 221 | nothing will change for them apart from __g1_start getting larger. |
| 222 | * New waiters arriving concurrently with the group switching will all go |
| 223 | into G2 until we atomically make the switch. Waiters existing in G2 |
| 224 | are not affected. |
| 225 | * Waiters in G1 will be closed out immediately by setting a flag in |
| 226 | __g_signals, which will prevent waiters from blocking using a futex on |
| 227 | __g_signals and also notifies them that the group is closed. As a |
| 228 | result, they will eventually remove their group reference, allowing us |
| 229 | to close switch group roles. */ |
| 230 | |
| 231 | /* First, set the closed flag on __g_signals. This tells waiters that are |
| 232 | about to wait that they shouldn't do that anymore. This basically |
| 233 | serves as an advance notificaton of the upcoming change to __g1_start; |
| 234 | waiters interpret it as if __g1_start was larger than their waiter |
| 235 | sequence position. This allows us to change __g1_start after waiting |
| 236 | for all existing waiters with group references to leave, which in turn |
| 237 | makes recovery after stealing a signal simpler because it then can be |
| 238 | skipped if __g1_start indicates that the group is closed (otherwise, |
| 239 | we would have to recover always because waiters don't know how big their |
| 240 | groups are). Relaxed MO is fine. */ |
| 241 | atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1); |
| 242 | |
| 243 | /* Wait until there are no group references anymore. The fetch-or operation |
| 244 | injects us into the modification order of __g_refs; release MO ensures |
| 245 | that waiters incrementing __g_refs after our fetch-or see the previous |
| 246 | changes to __g_signals and to __g1_start that had to happen before we can |
| 247 | switch this G1 and alias with an older group (we have two groups, so |
| 248 | aliasing requires switching group roles twice). Note that nobody else |
| 249 | can have set the wake-request flag, so we do not have to act upon it. |
| 250 | |
| 251 | Also note that it is harmless if older waiters or waiters from this G1 |
| 252 | get a group reference after we have quiesced the group because it will |
| 253 | remain closed for them either because of the closed flag in __g_signals |
| 254 | or the later update to __g1_start. New waiters will never arrive here |
| 255 | but instead continue to go into the still current G2. */ |
| 256 | unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0); |
| 257 | while ((r >> 1) > 0) |
| 258 | { |
| 259 | for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--) |
| 260 | { |
| 261 | /* TODO Back off. */ |
| 262 | r = atomic_load_relaxed (cond->__data.__g_refs + g1); |
| 263 | } |
| 264 | if ((r >> 1) > 0) |
| 265 | { |
| 266 | /* There is still a waiter after spinning. Set the wake-request |
| 267 | flag and block. Relaxed MO is fine because this is just about |
| 268 | this futex word. |
| 269 | |
| 270 | Update r to include the set wake-request flag so that the upcoming |
| 271 | futex_wait only blocks if the flag is still set (otherwise, we'd |
| 272 | violate the basic client-side futex protocol). */ |
| 273 | r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1; |
| 274 | |
| 275 | if ((r >> 1) > 0) |
| 276 | futex_wait_simple (cond->__data.__g_refs + g1, r, private); |
| 277 | /* Reload here so we eventually see the most recent value even if we |
| 278 | do not spin. */ |
| 279 | r = atomic_load_relaxed (cond->__data.__g_refs + g1); |
| 280 | } |
| 281 | } |
| 282 | /* Acquire MO so that we synchronize with the release operation that waiters |
| 283 | use to decrement __g_refs and thus happen after the waiters we waited |
| 284 | for. */ |
| 285 | atomic_thread_fence_acquire (); |
| 286 | |
| 287 | /* Update __g1_start, which finishes closing this group. The value we add |
| 288 | will never be negative because old_orig_size can only be zero when we |
| 289 | switch groups the first time after a condvar was initialized, in which |
| 290 | case G1 will be at index 1 and we will add a value of 1. See above for |
| 291 | why this takes place after waiting for quiescence of the group. |
| 292 | Relaxed MO is fine because the change comes with no additional |
| 293 | constraints that others would have to observe. */ |
| 294 | __condvar_add_g1_start_relaxed (cond, |
| 295 | (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); |
| 296 | |
| 297 | /* Now reopen the group, thus enabling waiters to again block using the |
| 298 | futex controlled by __g_signals. Release MO so that observers that see |
| 299 | no signals (and thus can block) also see the write __g1_start and thus |
| 300 | that this is now a new group (see __pthread_cond_wait_common for the |
| 301 | matching acquire MO loads). */ |
| 302 | atomic_store_release (cond->__data.__g_signals + g1, 0); |
| 303 | |
| 304 | /* At this point, the old G1 is now a valid new G2 (but not in use yet). |
| 305 | No old waiter can neither grab a signal nor acquire a reference without |
| 306 | noticing that __g1_start is larger. |
| 307 | We can now publish the group switch by flipping the G2 index in __wseq. |
| 308 | Release MO so that this synchronizes with the acquire MO operation |
| 309 | waiters use to obtain a position in the waiter sequence. */ |
| 310 | wseq = __condvar_fetch_xor_wseq_release (cond, 1) >> 1; |
| 311 | g1 ^= 1; |
| 312 | *g1index ^= 1; |
| 313 | |
| 314 | /* These values are just observed by signalers, and thus protected by the |
| 315 | lock. */ |
| 316 | unsigned int orig_size = wseq - (old_g1_start + old_orig_size); |
| 317 | __condvar_set_orig_size (cond, orig_size); |
| 318 | /* Use and addition to not loose track of cancellations in what was |
| 319 | previously G2. */ |
| 320 | cond->__data.__g_size[g1] += orig_size; |
| 321 | |
| 322 | /* The new G1's size may be zero because of cancellations during its time |
| 323 | as G2. If this happens, there are no waiters that have to receive a |
| 324 | signal, so we do not need to add any and return false. */ |
| 325 | if (cond->__data.__g_size[g1] == 0) |
| 326 | return false; |
| 327 | |
| 328 | return true; |
| 329 | } |
| 330 | |