1 | /* Copyright (C) 2003-2023 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <endian.h> |
19 | #include <errno.h> |
20 | #include <sysdep.h> |
21 | #include <futex-internal.h> |
22 | #include <pthread.h> |
23 | #include <pthreadP.h> |
24 | #include <sys/time.h> |
25 | #include <atomic.h> |
26 | #include <stdint.h> |
27 | #include <stdbool.h> |
28 | |
29 | #include <shlib-compat.h> |
30 | #include <stap-probe.h> |
31 | #include <time.h> |
32 | |
33 | #include "pthread_cond_common.c" |
34 | |
35 | |
36 | struct _condvar_cleanup_buffer |
37 | { |
38 | uint64_t wseq; |
39 | pthread_cond_t *cond; |
40 | pthread_mutex_t *mutex; |
41 | int private; |
42 | }; |
43 | |
44 | |
45 | /* Decrease the waiter reference count. */ |
46 | static void |
47 | __condvar_confirm_wakeup (pthread_cond_t *cond, int private) |
48 | { |
49 | /* If destruction is pending (i.e., the wake-request flag is nonzero) and we |
50 | are the last waiter (prior value of __wrefs was 1 << 3), then wake any |
51 | threads waiting in pthread_cond_destroy. Release MO to synchronize with |
52 | these threads. Don't bother clearing the wake-up request flag. */ |
53 | if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3) |
54 | futex_wake (&cond->__data.__wrefs, INT_MAX, private); |
55 | } |
56 | |
57 | |
58 | /* Cancel waiting after having registered as a waiter previously. SEQ is our |
59 | position and G is our group index. |
60 | The goal of cancellation is to make our group smaller if that is still |
61 | possible. If we are in a closed group, this is not possible anymore; in |
62 | this case, we need to send a replacement signal for the one we effectively |
63 | consumed because the signal should have gotten consumed by another waiter |
64 | instead; we must not both cancel waiting and consume a signal. |
65 | |
66 | Must not be called while still holding a reference on the group. |
67 | |
68 | Returns true iff we consumed a signal. |
69 | |
70 | On some kind of timeouts, we may be able to pretend that a signal we |
71 | effectively consumed happened before the timeout (i.e., similarly to first |
72 | spinning on signals before actually checking whether the timeout has |
73 | passed already). Doing this would allow us to skip sending a replacement |
74 | signal, but this case might happen rarely because the end of the timeout |
75 | must race with someone else sending a signal. Therefore, we don't bother |
76 | trying to optimize this. */ |
77 | static void |
78 | __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, |
79 | int private) |
80 | { |
81 | bool consumed_signal = false; |
82 | |
83 | /* No deadlock with group switching is possible here because we do |
84 | not hold a reference on the group. */ |
85 | __condvar_acquire_lock (cond, private); |
86 | |
87 | uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; |
88 | if (g1_start > seq) |
89 | { |
90 | /* Our group is closed, so someone provided enough signals for it. |
91 | Thus, we effectively consumed a signal. */ |
92 | consumed_signal = true; |
93 | } |
94 | else |
95 | { |
96 | if (g1_start + __condvar_get_orig_size (cond) <= seq) |
97 | { |
98 | /* We are in the current G2 and thus cannot have consumed a signal. |
99 | Reduce its effective size or handle overflow. Remember that in |
100 | G2, unsigned int size is zero or a negative value. */ |
101 | if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0) |
102 | { |
103 | cond->__data.__g_size[g]--; |
104 | } |
105 | else |
106 | { |
107 | /* Cancellations would overflow the maximum group size. Just |
108 | wake up everyone spuriously to create a clean state. This |
109 | also means we do not consume a signal someone else sent. */ |
110 | __condvar_release_lock (cond, private); |
111 | __pthread_cond_broadcast (cond); |
112 | return; |
113 | } |
114 | } |
115 | else |
116 | { |
117 | /* We are in current G1. If the group's size is zero, someone put |
118 | a signal in the group that nobody else but us can consume. */ |
119 | if (cond->__data.__g_size[g] == 0) |
120 | consumed_signal = true; |
121 | else |
122 | { |
123 | /* Otherwise, we decrease the size of the group. This is |
124 | equivalent to atomically putting in a signal just for us and |
125 | consuming it right away. We do not consume a signal sent |
126 | by someone else. We also cannot have consumed a futex |
127 | wake-up because if we were cancelled or timed out in a futex |
128 | call, the futex will wake another waiter. */ |
129 | cond->__data.__g_size[g]--; |
130 | } |
131 | } |
132 | } |
133 | |
134 | __condvar_release_lock (cond, private); |
135 | |
136 | if (consumed_signal) |
137 | { |
138 | /* We effectively consumed a signal even though we didn't want to. |
139 | Therefore, we need to send a replacement signal. |
140 | If we would want to optimize this, we could do what |
141 | pthread_cond_signal does right in the critical section above. */ |
142 | __pthread_cond_signal (cond); |
143 | } |
144 | } |
145 | |
146 | /* Wake up any signalers that might be waiting. */ |
147 | static void |
148 | __condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) |
149 | { |
150 | /* Release MO to synchronize-with the acquire load in |
151 | __condvar_quiesce_and_switch_g1. */ |
152 | if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) |
153 | { |
154 | /* Clear the wake-up request flag before waking up. We do not need more |
155 | than relaxed MO and it doesn't matter if we apply this for an aliased |
156 | group because we wake all futex waiters right after clearing the |
157 | flag. */ |
158 | atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); |
159 | futex_wake (cond->__data.__g_refs + g, INT_MAX, private); |
160 | } |
161 | } |
162 | |
163 | /* Clean-up for cancellation of waiters waiting for normal signals. We cancel |
164 | our registration as a waiter, confirm we have woken up, and re-acquire the |
165 | mutex. */ |
166 | static void |
167 | __condvar_cleanup_waiting (void *arg) |
168 | { |
169 | struct _condvar_cleanup_buffer *cbuffer = |
170 | (struct _condvar_cleanup_buffer *) arg; |
171 | pthread_cond_t *cond = cbuffer->cond; |
172 | unsigned g = cbuffer->wseq & 1; |
173 | |
174 | __condvar_dec_grefs (cond, g, cbuffer->private); |
175 | |
176 | __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); |
177 | /* FIXME With the current cancellation implementation, it is possible that |
178 | a thread is cancelled after it has returned from a syscall. This could |
179 | result in a cancelled waiter consuming a futex wake-up that is then |
180 | causing another waiter in the same group to not wake up. To work around |
181 | this issue until we have fixed cancellation, just add a futex wake-up |
182 | conservatively. */ |
183 | futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private); |
184 | |
185 | __condvar_confirm_wakeup (cond, cbuffer->private); |
186 | |
187 | /* XXX If locking the mutex fails, should we just stop execution? This |
188 | might be better than silently ignoring the error. */ |
189 | __pthread_mutex_cond_lock (cbuffer->mutex); |
190 | } |
191 | |
192 | /* This condvar implementation guarantees that all calls to signal and |
193 | broadcast and all of the three virtually atomic parts of each call to wait |
194 | (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re- |
195 | acquiring the mutex) happen in some total order that is consistent with the |
196 | happens-before relations in the calling program. However, this order does |
197 | not necessarily result in additional happens-before relations being |
198 | established (which aligns well with spurious wake-ups being allowed). |
199 | |
200 | All waiters acquire a certain position in a 64b waiter sequence (__wseq). |
201 | This sequence determines which waiters are allowed to consume signals. |
202 | A broadcast is equal to sending as many signals as are unblocked waiters. |
203 | When a signal arrives, it samples the current value of __wseq with a |
204 | relaxed-MO load (i.e., the position the next waiter would get). (This is |
205 | sufficient because it is consistent with happens-before; the caller can |
206 | enforce stronger ordering constraints by calling signal while holding the |
207 | mutex.) Only waiters with a position less than the __wseq value observed |
208 | by the signal are eligible to consume this signal. |
209 | |
210 | This would be straight-forward to implement if waiters would just spin but |
211 | we need to let them block using futexes. Futexes give no guarantee of |
212 | waking in FIFO order, so we cannot reliably wake eligible waiters if we |
213 | just use a single futex. Also, futex words are 32b in size, but we need |
214 | to distinguish more than 1<<32 states because we need to represent the |
215 | order of wake-up (and thus which waiters are eligible to consume signals); |
216 | blocking in a futex is not atomic with a waiter determining its position in |
217 | the waiter sequence, so we need the futex word to reliably notify waiters |
218 | that they should not attempt to block anymore because they have been |
219 | already signaled in the meantime. While an ABA issue on a 32b value will |
220 | be rare, ignoring it when we are aware of it is not the right thing to do |
221 | either. |
222 | |
223 | Therefore, we use a 64b counter to represent the waiter sequence (on |
224 | architectures which only support 32b atomics, we use a few bits less). |
225 | To deal with the blocking using futexes, we maintain two groups of waiters: |
226 | * Group G1 consists of waiters that are all eligible to consume signals; |
227 | incoming signals will always signal waiters in this group until all |
228 | waiters in G1 have been signaled. |
229 | * Group G2 consists of waiters that arrive when a G1 is present and still |
230 | contains waiters that have not been signaled. When all waiters in G1 |
231 | are signaled and a new signal arrives, the new signal will convert G2 |
232 | into the new G1 and create a new G2 for future waiters. |
233 | |
234 | We cannot allocate new memory because of process-shared condvars, so we |
235 | have just two slots of groups that change their role between G1 and G2. |
236 | Each has a separate futex word, a number of signals available for |
237 | consumption, a size (number of waiters in the group that have not been |
238 | signaled), and a reference count. |
239 | |
240 | The group reference count is used to maintain the number of waiters that |
241 | are using the group's futex. Before a group can change its role, the |
242 | reference count must show that no waiters are using the futex anymore; this |
243 | prevents ABA issues on the futex word. |
244 | |
245 | To represent which intervals in the waiter sequence the groups cover (and |
246 | thus also which group slot contains G1 or G2), we use a 64b counter to |
247 | designate the start position of G1 (inclusive), and a single bit in the |
248 | waiter sequence counter to represent which group slot currently contains |
249 | G2. This allows us to switch group roles atomically wrt. waiters obtaining |
250 | a position in the waiter sequence. The G1 start position allows waiters to |
251 | figure out whether they are in a group that has already been completely |
252 | signaled (i.e., if the current G1 starts at a later position that the |
253 | waiter's position). Waiters cannot determine whether they are currently |
254 | in G2 or G1 -- but they do not have too because all they are interested in |
255 | is whether there are available signals, and they always start in G2 (whose |
256 | group slot they know because of the bit in the waiter sequence. Signalers |
257 | will simply fill the right group until it is completely signaled and can |
258 | be closed (they do not switch group roles until they really have to to |
259 | decrease the likelihood of having to wait for waiters still holding a |
260 | reference on the now-closed G1). |
261 | |
262 | Signalers maintain the initial size of G1 to be able to determine where |
263 | G2 starts (G2 is always open-ended until it becomes G1). They track the |
264 | remaining size of a group; when waiters cancel waiting (due to PThreads |
265 | cancellation or timeouts), they will decrease this remaining size as well. |
266 | |
267 | To implement condvar destruction requirements (i.e., that |
268 | pthread_cond_destroy can be called as soon as all waiters have been |
269 | signaled), waiters increment a reference count before starting to wait and |
270 | decrement it after they stopped waiting but right before they acquire the |
271 | mutex associated with the condvar. |
272 | |
273 | pthread_cond_t thus consists of the following (bits that are used for |
274 | flags and are not part of the primary value of each field but necessary |
275 | to make some things atomic or because there was no space for them |
276 | elsewhere in the data structure): |
277 | |
278 | __wseq: Waiter sequence counter |
279 | * LSB is index of current G2. |
280 | * Waiters fetch-add while having acquire the mutex associated with the |
281 | condvar. Signalers load it and fetch-xor it concurrently. |
282 | __g1_start: Starting position of G1 (inclusive) |
283 | * LSB is index of current G2. |
284 | * Modified by signalers while having acquired the condvar-internal lock |
285 | and observed concurrently by waiters. |
286 | __g1_orig_size: Initial size of G1 |
287 | * The two least-significant bits represent the condvar-internal lock. |
288 | * Only accessed while having acquired the condvar-internal lock. |
289 | __wrefs: Waiter reference counter. |
290 | * Bit 2 is true if waiters should run futex_wake when they remove the |
291 | last reference. pthread_cond_destroy uses this as futex word. |
292 | * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC). |
293 | * Bit 0 is true iff this is a process-shared condvar. |
294 | * Simple reference count used by both waiters and pthread_cond_destroy. |
295 | (If the format of __wrefs is changed, update nptl_lock_constants.pysym |
296 | and the pretty printers.) |
297 | For each of the two groups, we have: |
298 | __g_refs: Futex waiter reference count. |
299 | * LSB is true if waiters should run futex_wake when they remove the |
300 | last reference. |
301 | * Reference count used by waiters concurrently with signalers that have |
302 | acquired the condvar-internal lock. |
303 | __g_signals: The number of signals that can still be consumed. |
304 | * Used as a futex word by waiters. Used concurrently by waiters and |
305 | signalers. |
306 | * LSB is true iff this group has been completely signaled (i.e., it is |
307 | closed). |
308 | __g_size: Waiters remaining in this group (i.e., which have not been |
309 | signaled yet. |
310 | * Accessed by signalers and waiters that cancel waiting (both do so only |
311 | when having acquired the condvar-internal lock. |
312 | * The size of G2 is always zero because it cannot be determined until |
313 | the group becomes G1. |
314 | * Although this is of unsigned type, we rely on using unsigned overflow |
315 | rules to make this hold effectively negative values too (in |
316 | particular, when waiters in G2 cancel waiting). |
317 | |
318 | A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields |
319 | a condvar that has G2 starting at position 0 and a G1 that is closed. |
320 | |
321 | Because waiters do not claim ownership of a group right when obtaining a |
322 | position in __wseq but only reference count the group when using futexes |
323 | to block, it can happen that a group gets closed before a waiter can |
324 | increment the reference count. Therefore, waiters have to check whether |
325 | their group is already closed using __g1_start. They also have to perform |
326 | this check when spinning when trying to grab a signal from __g_signals. |
327 | Note that for these checks, using relaxed MO to load __g1_start is |
328 | sufficient because if a waiter can see a sufficiently large value, it could |
329 | have also consume a signal in the waiters group. |
330 | |
331 | Waiters try to grab a signal from __g_signals without holding a reference |
332 | count, which can lead to stealing a signal from a more recent group after |
333 | their own group was already closed. They cannot always detect whether they |
334 | in fact did because they do not know when they stole, but they can |
335 | conservatively add a signal back to the group they stole from; if they |
336 | did so unnecessarily, all that happens is a spurious wake-up. To make this |
337 | even less likely, __g1_start contains the index of the current g2 too, |
338 | which allows waiters to check if there aliasing on the group slots; if |
339 | there wasn't, they didn't steal from the current G1, which means that the |
340 | G1 they stole from must have been already closed and they do not need to |
341 | fix anything. |
342 | |
343 | It is essential that the last field in pthread_cond_t is __g_signals[1]: |
344 | The previous condvar used a pointer-sized field in pthread_cond_t, so a |
345 | PTHREAD_COND_INITIALIZER from that condvar implementation might only |
346 | initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes |
347 | in total instead of the 48 we need). __g_signals[1] is not accessed before |
348 | the first group switch (G2 starts at index 0), which will set its value to |
349 | zero after a harmless fetch-or whose return value is ignored. This |
350 | effectively completes initialization. |
351 | |
352 | |
353 | Limitations: |
354 | * This condvar isn't designed to allow for more than |
355 | __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait. |
356 | * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not |
357 | supported. |
358 | * Beyond what is allowed as errors by POSIX or documented, we can also |
359 | return the following errors: |
360 | * EPERM if MUTEX is a recursive mutex and the caller doesn't own it. |
361 | * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike |
362 | for other errors, this can happen when we re-acquire the mutex; this |
363 | isn't allowed by POSIX (which requires all errors to virtually happen |
364 | before we release the mutex or change the condvar state), but there's |
365 | nothing we can do really. |
366 | * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors |
367 | returned by __pthread_tpp_change_priority. We will already have |
368 | released the mutex in such cases, so the caller cannot expect to own |
369 | MUTEX. |
370 | |
371 | Other notes: |
372 | * Instead of the normal mutex unlock / lock functions, we use |
373 | __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m) |
374 | because those will not change the mutex-internal users count, so that it |
375 | can be detected when a condvar is still associated with a particular |
376 | mutex because there is a waiter blocked on this condvar using this mutex. |
377 | */ |
378 | static __always_inline int |
379 | __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, |
380 | clockid_t clockid, const struct __timespec64 *abstime) |
381 | { |
382 | const int maxspin = 0; |
383 | int err; |
384 | int result = 0; |
385 | |
386 | LIBC_PROBE (cond_wait, 2, cond, mutex); |
387 | |
388 | /* clockid will already have been checked by |
389 | __pthread_cond_clockwait or pthread_condattr_setclock, or we |
390 | don't use it if abstime is NULL, so we don't need to check it |
391 | here. */ |
392 | |
393 | /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an |
394 | atomic operation because signals and broadcasts may update the group |
395 | switch without acquiring the mutex. We do not need release MO here |
396 | because we do not need to establish any happens-before relation with |
397 | signalers (see __pthread_cond_signal); modification order alone |
398 | establishes a total order of waiters/signals. We do need acquire MO |
399 | to synchronize with group reinitialization in |
400 | __condvar_quiesce_and_switch_g1. */ |
401 | uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); |
402 | /* Find our group's index. We always go into what was G2 when we acquired |
403 | our position. */ |
404 | unsigned int g = wseq & 1; |
405 | uint64_t seq = wseq >> 1; |
406 | |
407 | /* Increase the waiter reference count. Relaxed MO is sufficient because |
408 | we only need to synchronize when decrementing the reference count. */ |
409 | unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8); |
410 | int private = __condvar_get_private (flags); |
411 | |
412 | /* Now that we are registered as a waiter, we can release the mutex. |
413 | Waiting on the condvar must be atomic with releasing the mutex, so if |
414 | the mutex is used to establish a happens-before relation with any |
415 | signaler, the waiter must be visible to the latter; thus, we release the |
416 | mutex after registering as waiter. |
417 | If releasing the mutex fails, we just cancel our registration as a |
418 | waiter and confirm that we have woken up. */ |
419 | err = __pthread_mutex_unlock_usercnt (mutex, 0); |
420 | if (__glibc_unlikely (err != 0)) |
421 | { |
422 | __condvar_cancel_waiting (cond, seq, g, private); |
423 | __condvar_confirm_wakeup (cond, private); |
424 | return err; |
425 | } |
426 | |
427 | /* Now wait until a signal is available in our group or it is closed. |
428 | Acquire MO so that if we observe a value of zero written after group |
429 | switching in __condvar_quiesce_and_switch_g1, we synchronize with that |
430 | store and will see the prior update of __g1_start done while switching |
431 | groups too. */ |
432 | unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); |
433 | |
434 | do |
435 | { |
436 | while (1) |
437 | { |
438 | /* Spin-wait first. |
439 | Note that spinning first without checking whether a timeout |
440 | passed might lead to what looks like a spurious wake-up even |
441 | though we should return ETIMEDOUT (e.g., if the caller provides |
442 | an absolute timeout that is clearly in the past). However, |
443 | (1) spurious wake-ups are allowed, (2) it seems unlikely that a |
444 | user will (ab)use pthread_cond_wait as a check for whether a |
445 | point in time is in the past, and (3) spinning first without |
446 | having to compare against the current time seems to be the right |
447 | choice from a performance perspective for most use cases. */ |
448 | unsigned int spin = maxspin; |
449 | while (signals == 0 && spin > 0) |
450 | { |
451 | /* Check that we are not spinning on a group that's already |
452 | closed. */ |
453 | if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) |
454 | goto done; |
455 | |
456 | /* TODO Back off. */ |
457 | |
458 | /* Reload signals. See above for MO. */ |
459 | signals = atomic_load_acquire (cond->__data.__g_signals + g); |
460 | spin--; |
461 | } |
462 | |
463 | /* If our group will be closed as indicated by the flag on signals, |
464 | don't bother grabbing a signal. */ |
465 | if (signals & 1) |
466 | goto done; |
467 | |
468 | /* If there is an available signal, don't block. */ |
469 | if (signals != 0) |
470 | break; |
471 | |
472 | /* No signals available after spinning, so prepare to block. |
473 | We first acquire a group reference and use acquire MO for that so |
474 | that we synchronize with the dummy read-modify-write in |
475 | __condvar_quiesce_and_switch_g1 if we read from that. In turn, |
476 | in this case this will make us see the closed flag on __g_signals |
477 | that designates a concurrent attempt to reuse the group's slot. |
478 | We use acquire MO for the __g_signals check to make the |
479 | __g1_start check work (see spinning above). |
480 | Note that the group reference acquisition will not mask the |
481 | release MO when decrementing the reference count because we use |
482 | an atomic read-modify-write operation and thus extend the release |
483 | sequence. */ |
484 | atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); |
485 | if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) |
486 | || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) |
487 | { |
488 | /* Our group is closed. Wake up any signalers that might be |
489 | waiting. */ |
490 | __condvar_dec_grefs (cond, g, private); |
491 | goto done; |
492 | } |
493 | |
494 | // Now block. |
495 | struct _pthread_cleanup_buffer buffer; |
496 | struct _condvar_cleanup_buffer cbuffer; |
497 | cbuffer.wseq = wseq; |
498 | cbuffer.cond = cond; |
499 | cbuffer.mutex = mutex; |
500 | cbuffer.private = private; |
501 | __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); |
502 | |
503 | err = __futex_abstimed_wait_cancelable64 ( |
504 | cond->__data.__g_signals + g, 0, clockid, abstime, private); |
505 | |
506 | __pthread_cleanup_pop (&buffer, 0); |
507 | |
508 | if (__glibc_unlikely (err == ETIMEDOUT || err == EOVERFLOW)) |
509 | { |
510 | __condvar_dec_grefs (cond, g, private); |
511 | /* If we timed out, we effectively cancel waiting. Note that |
512 | we have decremented __g_refs before cancellation, so that a |
513 | deadlock between waiting for quiescence of our group in |
514 | __condvar_quiesce_and_switch_g1 and us trying to acquire |
515 | the lock during cancellation is not possible. */ |
516 | __condvar_cancel_waiting (cond, seq, g, private); |
517 | result = err; |
518 | goto done; |
519 | } |
520 | else |
521 | __condvar_dec_grefs (cond, g, private); |
522 | |
523 | /* Reload signals. See above for MO. */ |
524 | signals = atomic_load_acquire (cond->__data.__g_signals + g); |
525 | } |
526 | |
527 | } |
528 | /* Try to grab a signal. Use acquire MO so that we see an up-to-date value |
529 | of __g1_start below (see spinning above for a similar case). In |
530 | particular, if we steal from a more recent group, we will also see a |
531 | more recent __g1_start below. */ |
532 | while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, |
533 | &signals, signals - 2)); |
534 | |
535 | /* We consumed a signal but we could have consumed from a more recent group |
536 | that aliased with ours due to being in the same group slot. If this |
537 | might be the case our group must be closed as visible through |
538 | __g1_start. */ |
539 | uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); |
540 | if (seq < (g1_start >> 1)) |
541 | { |
542 | /* We potentially stole a signal from a more recent group but we do not |
543 | know which group we really consumed from. |
544 | We do not care about groups older than current G1 because they are |
545 | closed; we could have stolen from these, but then we just add a |
546 | spurious wake-up for the current groups. |
547 | We will never steal a signal from current G2 that was really intended |
548 | for G2 because G2 never receives signals (until it becomes G1). We |
549 | could have stolen a signal from G2 that was conservatively added by a |
550 | previous waiter that also thought it stole a signal -- but given that |
551 | that signal was added unnecessarily, it's not a problem if we steal |
552 | it. |
553 | Thus, the remaining case is that we could have stolen from the current |
554 | G1, where "current" means the __g1_start value we observed. However, |
555 | if the current G1 does not have the same slot index as we do, we did |
556 | not steal from it and do not need to undo that. This is the reason |
557 | for putting a bit with G2's index into__g1_start as well. */ |
558 | if (((g1_start & 1) ^ 1) == g) |
559 | { |
560 | /* We have to conservatively undo our potential mistake of stealing |
561 | a signal. We can stop trying to do that when the current G1 |
562 | changes because other spinning waiters will notice this too and |
563 | __condvar_quiesce_and_switch_g1 has checked that there are no |
564 | futex waiters anymore before switching G1. |
565 | Relaxed MO is fine for the __g1_start load because we need to |
566 | merely be able to observe this fact and not have to observe |
567 | something else as well. |
568 | ??? Would it help to spin for a little while to see whether the |
569 | current G1 gets closed? This might be worthwhile if the group is |
570 | small or close to being closed. */ |
571 | unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); |
572 | while (__condvar_load_g1_start_relaxed (cond) == g1_start) |
573 | { |
574 | /* Try to add a signal. We don't need to acquire the lock |
575 | because at worst we can cause a spurious wake-up. If the |
576 | group is in the process of being closed (LSB is true), this |
577 | has an effect similar to us adding a signal. */ |
578 | if (((s & 1) != 0) |
579 | || atomic_compare_exchange_weak_relaxed |
580 | (cond->__data.__g_signals + g, &s, s + 2)) |
581 | { |
582 | /* If we added a signal, we also need to add a wake-up on |
583 | the futex. We also need to do that if we skipped adding |
584 | a signal because the group is being closed because |
585 | while __condvar_quiesce_and_switch_g1 could have closed |
586 | the group, it might stil be waiting for futex waiters to |
587 | leave (and one of those waiters might be the one we stole |
588 | the signal from, which cause it to block using the |
589 | futex). */ |
590 | futex_wake (cond->__data.__g_signals + g, 1, private); |
591 | break; |
592 | } |
593 | /* TODO Back off. */ |
594 | } |
595 | } |
596 | } |
597 | |
598 | done: |
599 | |
600 | /* Confirm that we have been woken. We do that before acquiring the mutex |
601 | to allow for execution of pthread_cond_destroy while having acquired the |
602 | mutex. */ |
603 | __condvar_confirm_wakeup (cond, private); |
604 | |
605 | /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT, |
606 | which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */ |
607 | err = __pthread_mutex_cond_lock (mutex); |
608 | /* XXX Abort on errors that are disallowed by POSIX? */ |
609 | return (err != 0) ? err : result; |
610 | } |
611 | |
612 | |
613 | /* See __pthread_cond_wait_common. */ |
614 | int |
615 | ___pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) |
616 | { |
617 | /* clockid is unused when abstime is NULL. */ |
618 | return __pthread_cond_wait_common (cond, mutex, 0, NULL); |
619 | } |
620 | |
621 | versioned_symbol (libc, ___pthread_cond_wait, pthread_cond_wait, |
622 | GLIBC_2_3_2); |
623 | libc_hidden_ver (___pthread_cond_wait, __pthread_cond_wait) |
624 | #ifndef SHARED |
625 | strong_alias (___pthread_cond_wait, __pthread_cond_wait) |
626 | #endif |
627 | |
628 | /* See __pthread_cond_wait_common. */ |
629 | int |
630 | ___pthread_cond_timedwait64 (pthread_cond_t *cond, pthread_mutex_t *mutex, |
631 | const struct __timespec64 *abstime) |
632 | { |
633 | /* Check parameter validity. This should also tell the compiler that |
634 | it can assume that abstime is not NULL. */ |
635 | if (! valid_nanoseconds (abstime->tv_nsec)) |
636 | return EINVAL; |
637 | |
638 | /* Relaxed MO is suffice because clock ID bit is only modified |
639 | in condition creation. */ |
640 | unsigned int flags = atomic_load_relaxed (&cond->__data.__wrefs); |
641 | clockid_t clockid = (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) |
642 | ? CLOCK_MONOTONIC : CLOCK_REALTIME; |
643 | return __pthread_cond_wait_common (cond, mutex, clockid, abstime); |
644 | } |
645 | |
646 | #if __TIMESIZE == 64 |
647 | strong_alias (___pthread_cond_timedwait64, ___pthread_cond_timedwait) |
648 | #else |
649 | strong_alias (___pthread_cond_timedwait64, __pthread_cond_timedwait64) |
650 | libc_hidden_def (__pthread_cond_timedwait64) |
651 | |
652 | int |
653 | ___pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, |
654 | const struct timespec *abstime) |
655 | { |
656 | struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime); |
657 | |
658 | return __pthread_cond_timedwait64 (cond, mutex, &ts64); |
659 | } |
660 | #endif /* __TIMESIZE == 64 */ |
661 | versioned_symbol (libc, ___pthread_cond_timedwait, |
662 | pthread_cond_timedwait, GLIBC_2_3_2); |
663 | libc_hidden_ver (___pthread_cond_timedwait, __pthread_cond_timedwait) |
664 | #ifndef SHARED |
665 | strong_alias (___pthread_cond_timedwait, __pthread_cond_timedwait) |
666 | #endif |
667 | |
668 | /* See __pthread_cond_wait_common. */ |
669 | int |
670 | ___pthread_cond_clockwait64 (pthread_cond_t *cond, pthread_mutex_t *mutex, |
671 | clockid_t clockid, |
672 | const struct __timespec64 *abstime) |
673 | { |
674 | /* Check parameter validity. This should also tell the compiler that |
675 | it can assume that abstime is not NULL. */ |
676 | if (! valid_nanoseconds (abstime->tv_nsec)) |
677 | return EINVAL; |
678 | |
679 | if (!futex_abstimed_supported_clockid (clockid)) |
680 | return EINVAL; |
681 | |
682 | return __pthread_cond_wait_common (cond, mutex, clockid, abstime); |
683 | } |
684 | |
685 | #if __TIMESIZE == 64 |
686 | strong_alias (___pthread_cond_clockwait64, ___pthread_cond_clockwait) |
687 | #else |
688 | strong_alias (___pthread_cond_clockwait64, __pthread_cond_clockwait64); |
689 | libc_hidden_def (__pthread_cond_clockwait64) |
690 | |
691 | int |
692 | ___pthread_cond_clockwait (pthread_cond_t *cond, pthread_mutex_t *mutex, |
693 | clockid_t clockid, |
694 | const struct timespec *abstime) |
695 | { |
696 | struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime); |
697 | |
698 | return __pthread_cond_clockwait64 (cond, mutex, clockid, &ts64); |
699 | } |
700 | #endif /* __TIMESIZE == 64 */ |
701 | libc_hidden_ver (___pthread_cond_clockwait, __pthread_cond_clockwait) |
702 | #ifndef SHARED |
703 | strong_alias (___pthread_cond_clockwait, __pthread_cond_clockwait) |
704 | #endif |
705 | versioned_symbol (libc, ___pthread_cond_clockwait, |
706 | pthread_cond_clockwait, GLIBC_2_34); |
707 | #if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_30, GLIBC_2_34) |
708 | compat_symbol (libpthread, ___pthread_cond_clockwait, |
709 | pthread_cond_clockwait, GLIBC_2_30); |
710 | #endif |
711 | |