| 1 | /* Register destructors for C++ TLS variables declared with thread_local. |
| 2 | Copyright (C) 2013-2021 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | /* CONCURRENCY NOTES: |
| 20 | |
| 21 | This documents concurrency for the non-POD TLS destructor registration, |
| 22 | calling and destruction. The functions __cxa_thread_atexit_impl, |
| 23 | _dl_close_worker and __call_tls_dtors are the three main routines that may |
| 24 | run concurrently and access shared data. The shared data in all possible |
| 25 | combinations of all three functions are the link map list, a link map for a |
| 26 | DSO and the link map member l_tls_dtor_count. |
| 27 | |
| 28 | __cxa_thread_atexit_impl acquires the dl_load_lock before accessing any |
| 29 | shared state and hence multiple of its instances can safely execute |
| 30 | concurrently. |
| 31 | |
| 32 | _dl_close_worker acquires the dl_load_lock before accessing any shared state |
| 33 | as well and hence can concurrently execute multiple of its own instances as |
| 34 | well as those of __cxa_thread_atexit_impl safely. Not all accesses to |
| 35 | l_tls_dtor_count are protected by the dl_load_lock, so we need to |
| 36 | synchronize using atomics. |
| 37 | |
| 38 | __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it |
| 39 | decrements the value by one. It does not need the big lock because it does |
| 40 | not access any other shared state except for the current DSO link map and |
| 41 | its member l_tls_dtor_count. |
| 42 | |
| 43 | Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero, |
| 44 | unloads the DSO, thus deallocating the current link map. This is the goal |
| 45 | of maintaining l_tls_dtor_count - to unload the DSO and free resources if |
| 46 | there are no pending destructors to be called. |
| 47 | |
| 48 | We want to eliminate the inconsistent state where the DSO is unloaded in |
| 49 | _dl_close_worker before it is used in __call_tls_dtors. This could happen |
| 50 | if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0, |
| 51 | since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that |
| 52 | it is safe to unload the DSO. Hence, to ensure that this does not happen, |
| 53 | the following conditions must be met: |
| 54 | |
| 55 | 1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO is |
| 56 | unloaded and its link map is freed |
| 57 | 2. The link map dereference in __call_tls_dtors happens before the |
| 58 | l_tls_dtor_count dereference. |
| 59 | |
| 60 | To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should |
| 61 | have release semantics and the load in _dl_close_worker should have acquire |
| 62 | semantics. |
| 63 | |
| 64 | Concurrent executions of __call_tls_dtors should only ensure that the value |
| 65 | is accessed atomically; no reordering constraints need to be considered. |
| 66 | Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl. |
| 67 | |
| 68 | There is still a possibility on concurrent execution of _dl_close_worker and |
| 69 | __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count |
| 70 | as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but |
| 71 | _dl_close_worker does not unload the DSO, having read the old value. This |
| 72 | is not very different from a case where __call_tls_dtors is called after |
| 73 | _dl_close_worker on the DSO and hence is an accepted execution. */ |
| 74 | |
| 75 | #include <stdlib.h> |
| 76 | #include <ldsodefs.h> |
| 77 | |
| 78 | typedef void (*dtor_func) (void *); |
| 79 | |
| 80 | struct dtor_list |
| 81 | { |
| 82 | dtor_func func; |
| 83 | void *obj; |
| 84 | struct link_map *map; |
| 85 | struct dtor_list *next; |
| 86 | }; |
| 87 | |
| 88 | static __thread struct dtor_list *tls_dtor_list; |
| 89 | static __thread void *dso_symbol_cache; |
| 90 | static __thread struct link_map *lm_cache; |
| 91 | |
| 92 | /* Register a destructor for TLS variables declared with the 'thread_local' |
| 93 | keyword. This function is only called from code generated by the C++ |
| 94 | compiler. FUNC is the destructor function and OBJ is the object to be |
| 95 | passed to the destructor. DSO_SYMBOL is the __dso_handle symbol that each |
| 96 | DSO has at a unique address in its map, added from crtbegin.o during the |
| 97 | linking phase. */ |
| 98 | int |
| 99 | __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol) |
| 100 | { |
| 101 | #ifdef PTR_MANGLE |
| 102 | PTR_MANGLE (func); |
| 103 | #endif |
| 104 | |
| 105 | /* Prepend. */ |
| 106 | struct dtor_list *new = calloc (1, sizeof (struct dtor_list)); |
| 107 | new->func = func; |
| 108 | new->obj = obj; |
| 109 | new->next = tls_dtor_list; |
| 110 | tls_dtor_list = new; |
| 111 | |
| 112 | /* We have to acquire the big lock to prevent a racing dlclose from pulling |
| 113 | our DSO from underneath us while we're setting up our destructor. */ |
| 114 | __rtld_lock_lock_recursive (GL(dl_load_lock)); |
| 115 | |
| 116 | /* See if we already encountered the DSO. */ |
| 117 | if (__glibc_unlikely (dso_symbol_cache != dso_symbol)) |
| 118 | { |
| 119 | ElfW(Addr) caller = (ElfW(Addr)) dso_symbol; |
| 120 | |
| 121 | struct link_map *l = _dl_find_dso_for_object (caller); |
| 122 | |
| 123 | /* If the address is not recognized the call comes from the main |
| 124 | program (we hope). */ |
| 125 | lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded; |
| 126 | } |
| 127 | |
| 128 | /* This increment may only be concurrently observed either by the decrement |
| 129 | in __call_tls_dtors since the other l_tls_dtor_count access in |
| 130 | _dl_close_worker is protected by the dl_load_lock. The execution in |
| 131 | __call_tls_dtors does not really depend on this value beyond the fact that |
| 132 | it should be atomic, so Relaxed MO should be sufficient. */ |
| 133 | atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1); |
| 134 | __rtld_lock_unlock_recursive (GL(dl_load_lock)); |
| 135 | |
| 136 | new->map = lm_cache; |
| 137 | |
| 138 | return 0; |
| 139 | } |
| 140 | |
| 141 | /* Call the destructors. This is called either when a thread returns from the |
| 142 | initial function or when the process exits via the exit function. */ |
| 143 | void |
| 144 | __call_tls_dtors (void) |
| 145 | { |
| 146 | while (tls_dtor_list) |
| 147 | { |
| 148 | struct dtor_list *cur = tls_dtor_list; |
| 149 | dtor_func func = cur->func; |
| 150 | #ifdef PTR_DEMANGLE |
| 151 | PTR_DEMANGLE (func); |
| 152 | #endif |
| 153 | |
| 154 | tls_dtor_list = tls_dtor_list->next; |
| 155 | func (cur->obj); |
| 156 | |
| 157 | /* Ensure that the MAP dereference happens before |
| 158 | l_tls_dtor_count decrement. That way, we protect this access from a |
| 159 | potential DSO unload in _dl_close_worker, which happens when |
| 160 | l_tls_dtor_count is 0. See CONCURRENCY NOTES for more detail. */ |
| 161 | atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1); |
| 162 | free (cur); |
| 163 | } |
| 164 | } |
| 165 | libc_hidden_def (__call_tls_dtors) |
| 166 | |