1 | /* Register destructors for C++ TLS variables declared with thread_local. |
2 | Copyright (C) 2013-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* CONCURRENCY NOTES: |
20 | |
21 | This documents concurrency for the non-POD TLS destructor registration, |
22 | calling and destruction. The functions __cxa_thread_atexit_impl, |
23 | _dl_close_worker and __call_tls_dtors are the three main routines that may |
24 | run concurrently and access shared data. The shared data in all possible |
25 | combinations of all three functions are the link map list, a link map for a |
26 | DSO and the link map member l_tls_dtor_count. |
27 | |
28 | __cxa_thread_atexit_impl acquires the dl_load_lock before accessing any |
29 | shared state and hence multiple of its instances can safely execute |
30 | concurrently. |
31 | |
32 | _dl_close_worker acquires the dl_load_lock before accessing any shared state |
33 | as well and hence can concurrently execute multiple of its own instances as |
34 | well as those of __cxa_thread_atexit_impl safely. Not all accesses to |
35 | l_tls_dtor_count are protected by the dl_load_lock, so we need to |
36 | synchronize using atomics. |
37 | |
38 | __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it |
39 | decrements the value by one. It does not need the big lock because it does |
40 | not access any other shared state except for the current DSO link map and |
41 | its member l_tls_dtor_count. |
42 | |
43 | Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero, |
44 | unloads the DSO, thus deallocating the current link map. This is the goal |
45 | of maintaining l_tls_dtor_count - to unload the DSO and free resources if |
46 | there are no pending destructors to be called. |
47 | |
48 | We want to eliminate the inconsistent state where the DSO is unloaded in |
49 | _dl_close_worker before it is used in __call_tls_dtors. This could happen |
50 | if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0, |
51 | since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that |
52 | it is safe to unload the DSO. Hence, to ensure that this does not happen, |
53 | the following conditions must be met: |
54 | |
55 | 1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO is |
56 | unloaded and its link map is freed |
57 | 2. The link map dereference in __call_tls_dtors happens before the |
58 | l_tls_dtor_count dereference. |
59 | |
60 | To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should |
61 | have release semantics and the load in _dl_close_worker should have acquire |
62 | semantics. |
63 | |
64 | Concurrent executions of __call_tls_dtors should only ensure that the value |
65 | is accessed atomically; no reordering constraints need to be considered. |
66 | Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl. |
67 | |
68 | There is still a possibility on concurrent execution of _dl_close_worker and |
69 | __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count |
70 | as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but |
71 | _dl_close_worker does not unload the DSO, having read the old value. This |
72 | is not very different from a case where __call_tls_dtors is called after |
73 | _dl_close_worker on the DSO and hence is an accepted execution. */ |
74 | |
75 | #include <stdlib.h> |
76 | #include <ldsodefs.h> |
77 | |
78 | typedef void (*dtor_func) (void *); |
79 | |
80 | struct dtor_list |
81 | { |
82 | dtor_func func; |
83 | void *obj; |
84 | struct link_map *map; |
85 | struct dtor_list *next; |
86 | }; |
87 | |
88 | static __thread struct dtor_list *tls_dtor_list; |
89 | static __thread void *dso_symbol_cache; |
90 | static __thread struct link_map *lm_cache; |
91 | |
92 | /* Register a destructor for TLS variables declared with the 'thread_local' |
93 | keyword. This function is only called from code generated by the C++ |
94 | compiler. FUNC is the destructor function and OBJ is the object to be |
95 | passed to the destructor. DSO_SYMBOL is the __dso_handle symbol that each |
96 | DSO has at a unique address in its map, added from crtbegin.o during the |
97 | linking phase. */ |
98 | int |
99 | __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol) |
100 | { |
101 | #ifdef PTR_MANGLE |
102 | PTR_MANGLE (func); |
103 | #endif |
104 | |
105 | /* Prepend. */ |
106 | struct dtor_list *new = calloc (1, sizeof (struct dtor_list)); |
107 | new->func = func; |
108 | new->obj = obj; |
109 | new->next = tls_dtor_list; |
110 | tls_dtor_list = new; |
111 | |
112 | /* We have to acquire the big lock to prevent a racing dlclose from pulling |
113 | our DSO from underneath us while we're setting up our destructor. */ |
114 | __rtld_lock_lock_recursive (GL(dl_load_lock)); |
115 | |
116 | /* See if we already encountered the DSO. */ |
117 | if (__glibc_unlikely (dso_symbol_cache != dso_symbol)) |
118 | { |
119 | ElfW(Addr) caller = (ElfW(Addr)) dso_symbol; |
120 | |
121 | struct link_map *l = _dl_find_dso_for_object (caller); |
122 | |
123 | /* If the address is not recognized the call comes from the main |
124 | program (we hope). */ |
125 | lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded; |
126 | } |
127 | |
128 | /* This increment may only be concurrently observed either by the decrement |
129 | in __call_tls_dtors since the other l_tls_dtor_count access in |
130 | _dl_close_worker is protected by the dl_load_lock. The execution in |
131 | __call_tls_dtors does not really depend on this value beyond the fact that |
132 | it should be atomic, so Relaxed MO should be sufficient. */ |
133 | atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1); |
134 | __rtld_lock_unlock_recursive (GL(dl_load_lock)); |
135 | |
136 | new->map = lm_cache; |
137 | |
138 | return 0; |
139 | } |
140 | |
141 | /* Call the destructors. This is called either when a thread returns from the |
142 | initial function or when the process exits via the exit function. */ |
143 | void |
144 | __call_tls_dtors (void) |
145 | { |
146 | while (tls_dtor_list) |
147 | { |
148 | struct dtor_list *cur = tls_dtor_list; |
149 | dtor_func func = cur->func; |
150 | #ifdef PTR_DEMANGLE |
151 | PTR_DEMANGLE (func); |
152 | #endif |
153 | |
154 | tls_dtor_list = tls_dtor_list->next; |
155 | func (cur->obj); |
156 | |
157 | /* Ensure that the MAP dereference happens before |
158 | l_tls_dtor_count decrement. That way, we protect this access from a |
159 | potential DSO unload in _dl_close_worker, which happens when |
160 | l_tls_dtor_count is 0. See CONCURRENCY NOTES for more detail. */ |
161 | atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1); |
162 | free (cur); |
163 | } |
164 | } |
165 | libc_hidden_def (__call_tls_dtors) |
166 | |