allocatestack.c source code [glibc/nptl/allocatestack.c]

1	/ Copyright (C) 2002-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <assert.h>
20	#include <errno.h>
21	#include <signal.h>
22	#include <stdint.h>
23	#include <string.h>
24	#include <unistd.h>
25	#include <sys/mman.h>
26	#include <sys/param.h>
27	#include <dl-sysdep.h>
28	#include <dl-tls.h>
29	#include <tls.h>
30	#include <list.h>
31	#include <lowlevellock.h>
32	#include <futex-internal.h>
33	#include <kernel-features.h>
34	#include <stack-aliasing.h>
35
36
37	#ifndef NEED_SEPARATE_REGISTER_STACK
38
39	/ Most architectures have exactly one stack pointer. Some have more. /
40	# define STACK_VARIABLES void *stackaddr = NULL
41
42	/ How to pass the values to the 'create_thread' function. /
43	# define STACK_VARIABLES_ARGS stackaddr
44
45	/ How to declare function which gets there parameters. /
46	# define STACK_VARIABLES_PARMS void *stackaddr
47
48	/ How to declare allocate_stack. /
49	# define ALLOCATE_STACK_PARMS void **stack
50
51	/ This is how the function is called. We do it this way to allow*
52	other variants of the function to have more parameters. /*
53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
54
55	#else
56
57	/ We need two stacks. The kernel will place them but we have to tell*
58	the kernel about the size of the reserved address space. /*
59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
60
61	/ How to pass the values to the 'create_thread' function. /
62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
63
64	/ How to declare function which gets there parameters. /
65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
66
67	/ How to declare allocate_stack. /
68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
69
70	/ This is how the function is called. We do it this way to allow*
71	other variants of the function to have more parameters. /*
72	# define ALLOCATE_STACK(attr, pd) \
73	allocate_stack (attr, pd, &stackaddr, &stacksize)
74
75	#endif
76
77
78	/ Default alignment of stack. /
79	#ifndef STACK_ALIGN
80	# define STACK_ALIGN __alignof__ (long double)
81	#endif
82
83	/ Default value for minimal stack size after allocating thread*
84	descriptor and guard. /*
85	#ifndef MINIMAL_REST_STACK
86	# define MINIMAL_REST_STACK 4096
87	#endif
88
89
90	/ Newer kernels have the MAP_STACK flag to indicate a mapping is used for*
91	a stack. Use it when possible. /*
92	#ifndef MAP_STACK
93	# define MAP_STACK 0
94	#endif
95
96	/ This yields the pointer that TLS support code calls the thread pointer. /
97	#if TLS_TCB_AT_TP
98	# define TLS_TPADJ(pd) (pd)
99	#elif TLS_DTV_AT_TP
100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
101	#endif
102
103	/ Cache handling for not-yet free stacks. /
104
105	/ Maximum size in kB of cache. /
106	static size_t stack_cache_maxsize = `40` * `1024` * `1024`; / 40MiBi by default. /
107	static size_t stack_cache_actsize;
108
109	/ List of queued stack frames. /
110	static LIST_HEAD (stack_cache);
111
112	/ We need to record what list operations we are going to do so that,*
113	in case of an asynchronous interruption due to a fork() call, we
114	can correct for the work. /*
115	static uintptr_t in_flight_stack;
116
117	/ Check whether the stack is still used or not. /
118	#define FREE_P(descr) ((descr)->tid <= 0)
119
120
121	static void
122	stack_list_del (list_t *elem)
123	{
124	in_flight_stack = (uintptr_t) elem;
125
126	atomic_write_barrier ();
127
128	list_del (elem);
129
130	atomic_write_barrier ();
131
132	in_flight_stack = `0`;
133	}
134
135
136	static void
137	stack_list_add (list_t elem, list_t list)
138	{
139	in_flight_stack = (uintptr_t) elem \| `1`;
140
141	atomic_write_barrier ();
142
143	list_add (elem, list);
144
145	atomic_write_barrier ();
146
147	in_flight_stack = `0`;
148	}
149
150
151	/ We create a double linked list of all cache entries. Double linked*
152	because this allows removing entries from the end. /*
153
154
155	/ Get a stack frame from the cache. We have to match by size since*
156	some blocks might be too small or far too large. /*
157	static struct pthread *
158	get_cached_stack (size_t sizep, void* **memp)
159	{
160	size_t size = *sizep;
161	struct pthread *result = NULL;
162	list_t *entry;
163
164	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
165
166	/ Search the cache for a matching entry. We search for the*
167	smallest stack which has at least the required size. Note that
168	in normal situations the size of all allocated stacks is the
169	same. As the very least there are only a few different sizes.
170	Therefore this loop will exit early most of the time with an
171	exact match. /*
172	list_for_each (entry, &stack_cache)
173	{
174	struct pthread *curr;
175
176	curr = list_entry (entry, struct pthread, list);
177	if (FREE_P (curr) && curr->stackblock_size >= size)
178	{
179	if (curr->stackblock_size == size)
180	{
181	result = curr;
182	break;
183	}
184
185	if (result == NULL
186	\|\| result->stackblock_size > curr->stackblock_size)
187	result = curr;
188	}
189	}
190
191	if (__builtin_expect (result == NULL, `0`)
192	/ Make sure the size difference is not too excessive. In that*
193	case we do not use the block. /*
194	\|\| __builtin_expect (result->stackblock_size > `4` * size, `0`))
195	{
196	/ Release the lock. /
197	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
198
199	return NULL;
200	}
201
202	/ Don't allow setxid until cloned. /
203	result->setxid_futex = -`1`;
204
205	/ Dequeue the entry. /
206	stack_list_del (&result->list);
207
208	/ And add to the list of stacks in use. /
209	stack_list_add (&result->list, &GL (dl_stack_used));
210
211	/ And decrease the cache size. /
212	stack_cache_actsize -= result->stackblock_size;
213
214	/ Release the lock early. /
215	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
216
217	/ Report size and location of the stack to the caller. /
218	*sizep = result->stackblock_size;
219	*memp = result->stackblock;
220
221	/ Cancellation handling is back to the default. /
222	result->cancelhandling = `0`;
223	result->cleanup = NULL;
224
225	/ No pending event. /
226	result->nextevent = NULL;
227
228	result->tls_state = (struct tls_internal_t) { `0` };
229
230	/ Clear the DTV. /
231	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
232	for (size_t cnt = `0`; cnt < dtv[-`1`].counter; ++cnt)
233	free (dtv[`1` + cnt].pointer.to_free);
234	memset (dtv, `'\0'`, (dtv[-`1`].counter + `1`) * sizeof (dtv_t));
235
236	/ Re-initialize the TLS. /
237	_dl_allocate_tls_init (TLS_TPADJ (result));
238
239	return result;
240	}
241
242
243	/ Free stacks until cache size is lower than LIMIT. /
244	static void
245	free_stacks (size_t limit)
246	{
247	/ We reduce the size of the cache. Remove the last entries until*
248	the size is below the limit. /*
249	list_t *entry;
250	list_t *prev;
251
252	/ Search from the end of the list. /
253	list_for_each_prev_safe (entry, prev, &stack_cache)
254	{
255	struct pthread *curr;
256
257	curr = list_entry (entry, struct pthread, list);
258	if (FREE_P (curr))
259	{
260	/ Unlink the block. /
261	stack_list_del (entry);
262
263	/ Account for the freed memory. /
264	stack_cache_actsize -= curr->stackblock_size;
265
266	/ Free the memory associated with the ELF TLS. /
267	_dl_deallocate_tls (TLS_TPADJ (curr), false);
268
269	/ Remove this block. This should never fail. If it does*
270	something is really wrong. /*
271	if (__munmap (curr->stackblock, curr->stackblock_size) != `0`)
272	abort ();
273
274	/ Maybe we have freed enough. /
275	if (stack_cache_actsize <= limit)
276	break;
277	}
278	}
279	}
280
281	/ Free all the stacks on cleanup. /
282	void
283	__nptl_stacks_freeres (void)
284	{
285	free_stacks (`0`);
286	}
287
288	/ Add a stack frame which is not used anymore to the stack. Must be*
289	called with the cache lock held. /*
290	static inline void
291	__attribute ((always_inline))
292	queue_stack (struct pthread *stack)
293	{
294	/ We unconditionally add the stack to the list. The memory may*
295	still be in use but it will not be reused until the kernel marks
296	the stack as not used anymore. /*
297	stack_list_add (&stack->list, &stack_cache);
298
299	stack_cache_actsize += stack->stackblock_size;
300	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
301	free_stacks (stack_cache_maxsize);
302	}
303
304
305	static int
306	change_stack_perm (struct pthread *pd
307	#ifdef NEED_SEPARATE_REGISTER_STACK
308	, size_t pagemask
309	#endif
310	)
311	{
312	#ifdef NEED_SEPARATE_REGISTER_STACK
313	void *stack = (pd->stackblock
314	+ (((((pd->stackblock_size - pd->guardsize) / `2`)
315	& pagemask) + pd->guardsize) & pagemask));
316	size_t len = pd->stackblock + pd->stackblock_size - stack;
317	#elif _STACK_GROWS_DOWN
318	void *stack = pd->stackblock + pd->guardsize;
319	size_t len = pd->stackblock_size - pd->guardsize;
320	#elif _STACK_GROWS_UP
321	void *stack = pd->stackblock;
322	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
323	#else
324	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
325	#endif
326	if (__mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != `0`)
327	return errno;
328
329	return `0`;
330	}
331
332	/ Return the guard page position on allocated stack. /
333	static inline char *
334	__attribute ((always_inline))
335	guard_position (void mem, size_t size, size_t guardsize, struct* pthread *pd,
336	size_t pagesize_m1)
337	{
338	#ifdef NEED_SEPARATE_REGISTER_STACK
339	return mem + (((size - guardsize) / `2`) & ~pagesize_m1);
340	#elif _STACK_GROWS_DOWN
341	return mem;
342	#elif _STACK_GROWS_UP
343	return (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
344	#endif
345	}
346
347	/ Based on stack allocated with PROT_NONE, setup the required portions with*
348	'prot' flags based on the guard page position. /*
349	static inline int
350	setup_stack_prot (char mem, size_t size, char* *guard, size_t guardsize,
351	const int prot)
352	{
353	char *guardend = guard + guardsize;
354	#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
355	/ As defined at guard_position, for architectures with downward stack*
356	the guard page is always at start of the allocated area. /*
357	if (__mprotect (guardend, size - guardsize, prot) != `0`)
358	return errno;
359	#else
360	size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem;
361	if (__mprotect (mem, mprots1, prot) != `0`)
362	return errno;
363	size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend;
364	if (__mprotect (guardend, mprots2, prot) != `0`)
365	return errno;
366	#endif
367	return `0`;
368	}
369
370	/ Mark the memory of the stack as usable to the kernel. It frees everything*
371	except for the space used for the TCB itself. /*
372	static __always_inline void
373	advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize)
374	{
375	uintptr_t sp = (uintptr_t) CURRENT_STACK_FRAME;
376	size_t pagesize_m1 = __getpagesize () - `1`;
377	#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
378	size_t freesize = (sp - (uintptr_t) mem) & ~pagesize_m1;
379	assert (freesize < size);
380	if (freesize > PTHREAD_STACK_MIN)
381	__madvise (mem, freesize - PTHREAD_STACK_MIN, MADV_DONTNEED);
382	#else
383	/ Page aligned start of memory to free (higher than or equal*
384	to current sp plus the minimum stack size). /*
385	uintptr_t freeblock = (sp + PTHREAD_STACK_MIN + pagesize_m1) & ~pagesize_m1;
386	uintptr_t free_end = (pd - guardsize) & ~pagesize_m1;
387	if (free_end > freeblock)
388	{
389	size_t freesize = free_end - freeblock;
390	assert (freesize < size);
391	__madvise ((void*) freeblock, freesize, MADV_DONTNEED);
392	}
393	#endif
394	}
395
396	/ Returns a usable stack for a new thread either by allocating a*
397	new stack or reusing a cached stack of sufficient size.
398	ATTR must be non-NULL and point to a valid pthread_attr.
399	PDP must be non-NULL. /*
400	static int
401	allocate_stack (const struct pthread_attr attr, struct* pthread **pdp,
402	ALLOCATE_STACK_PARMS)
403	{
404	struct pthread *pd;
405	size_t size;
406	size_t pagesize_m1 = __getpagesize () - `1`;
407
408	assert (powerof2 (pagesize_m1 + `1`));
409	assert (TCB_ALIGNMENT >= STACK_ALIGN);
410
411	/ Get the stack size from the attribute if it is set. Otherwise we*
412	use the default we determined at start time. /*
413	if (attr->stacksize != `0`)
414	size = attr->stacksize;
415	else
416	{
417	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
418	size = __default_pthread_attr.internal.stacksize;
419	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
420	}
421
422	/ Get memory for the stack. /
423	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
424	{
425	uintptr_t adj;
426	char stackaddr = (char* *) attr->stackaddr;
427
428	/ Assume the same layout as the _STACK_GROWS_DOWN case, with struct*
429	pthread at the top of the stack block. Later we adjust the guard
430	location and stack address to match the _STACK_GROWS_UP case. /*
431	if (_STACK_GROWS_UP)
432	stackaddr += attr->stacksize;
433
434	/ If the user also specified the size of the stack make sure it*
435	is large enough. /*
436	if (attr->stacksize != `0`
437	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
438	return EINVAL;
439
440	/ Adjust stack size for alignment of the TLS block. /
441	#if TLS_TCB_AT_TP
442	adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
443	& __static_tls_align_m1;
444	assert (size > adj + TLS_TCB_SIZE);
445	#elif TLS_DTV_AT_TP
446	adj = ((uintptr_t) stackaddr - __static_tls_size)
447	& __static_tls_align_m1;
448	assert (size > adj);
449	#endif
450
451	/ The user provided some memory. Let's hope it matches the*
452	size... We do not allocate guard pages if the user provided
453	the stack. It is the user's responsibility to do this if it
454	is wanted. /*
455	#if TLS_TCB_AT_TP
456	pd = (struct pthread *) ((uintptr_t) stackaddr
457	- TLS_TCB_SIZE - adj);
458	#elif TLS_DTV_AT_TP
459	pd = (struct pthread *) (((uintptr_t) stackaddr
460	- __static_tls_size - adj)
461	- TLS_PRE_TCB_SIZE);
462	#endif
463
464	/ The user provided stack memory needs to be cleared. /
465	memset (pd, `'\0'`, sizeof (struct pthread));
466
467	/ The first TSD block is included in the TCB. /
468	pd->specific[`0`] = pd->specific_1stblock;
469
470	/ Remember the stack-related values. /
471	pd->stackblock = (char *) stackaddr - size;
472	pd->stackblock_size = size;
473
474	/ This is a user-provided stack. It will not be queued in the*
475	stack cache nor will the memory (except the TLS memory) be freed. /*
476	pd->user_stack = true;
477
478	/ This is at least the second thread. /
479	pd->header.multiple_threads = `1`;
480	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
481	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
482	#endif
483
484	#ifdef NEED_DL_SYSINFO
485	SETUP_THREAD_SYSINFO (pd);
486	#endif
487
488	/ Don't allow setxid until cloned. /
489	pd->setxid_futex = -`1`;
490
491	/ Allocate the DTV for this thread. /
492	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
493	{
494	/ Something went wrong. /
495	assert (errno == ENOMEM);
496	return errno;
497	}
498
499
500	/ Prepare to modify global data. /
501	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
502
503	/ And add to the list of stacks in use. /
504	list_add (&pd->list, &GL (dl_stack_user));
505
506	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
507	}
508	else
509	{
510	/ Allocate some anonymous memory. If possible use the cache. /
511	size_t guardsize;
512	size_t reported_guardsize;
513	size_t reqsize;
514	void *mem;
515	const int prot = (PROT_READ \| PROT_WRITE
516	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : `0`));
517
518	/ Adjust the stack size for alignment. /
519	size &= ~__static_tls_align_m1;
520	assert (size != `0`);
521
522	/ Make sure the size of the stack is enough for the guard and*
523	eventually the thread descriptor. On some targets there is
524	a minimum guard size requirement, ARCH_MIN_GUARD_SIZE, so
525	internally enforce it (unless the guard was disabled), but
526	report the original guard size for backward compatibility:
527	before POSIX 2008 the guardsize was specified to be one page
528	by default which is observable via pthread_attr_getguardsize
529	and pthread_getattr_np. /*
530	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
531	reported_guardsize = guardsize;
532	if (guardsize > `0` && guardsize < ARCH_MIN_GUARD_SIZE)
533	guardsize = ARCH_MIN_GUARD_SIZE;
534	if (guardsize < attr->guardsize \|\| size + guardsize < guardsize)
535	/ Arithmetic overflow. /
536	return EINVAL;
537	size += guardsize;
538	if (__builtin_expect (size < ((guardsize + __static_tls_size
539	+ MINIMAL_REST_STACK + pagesize_m1)
540	& ~pagesize_m1),
541	`0`))
542	/ The stack is too small (or the guard too large). /
543	return EINVAL;
544
545	/ Try to get a stack from the cache. /
546	reqsize = size;
547	pd = get_cached_stack (&size, &mem);
548	if (pd == NULL)
549	{
550	/ To avoid aliasing effects on a larger scale than pages we*
551	adjust the allocated stack size if necessary. This way
552	allocations directly following each other will not have
553	aliasing problems. /*
554	#if MULTI_PAGE_ALIASING != 0
555	if ((size % MULTI_PAGE_ALIASING) == `0`)
556	size += pagesize_m1 + `1`;
557	#endif
558
559	/ If a guard page is required, avoid committing memory by first*
560	allocate with PROT_NONE and then reserve with required permission
561	excluding the guard page. /*
562	mem = __mmap (NULL, size, (guardsize == `0`) ? prot : PROT_NONE,
563	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -`1`, `0`);
564
565	if (__glibc_unlikely (mem == MAP_FAILED))
566	return errno;
567
568	/ SIZE is guaranteed to be greater than zero.*
569	So we can never get a null pointer back from mmap. /*
570	assert (mem != NULL);
571
572	/ Place the thread descriptor at the end of the stack. /
573	#if TLS_TCB_AT_TP
574	pd = (struct pthread *) ((((uintptr_t) mem + size)
575	- TLS_TCB_SIZE)
576	& ~__static_tls_align_m1);
577	#elif TLS_DTV_AT_TP
578	pd = (struct pthread *) ((((uintptr_t) mem + size
579	- __static_tls_size)
580	& ~__static_tls_align_m1)
581	- TLS_PRE_TCB_SIZE);
582	#endif
583
584	/ Now mprotect the required region excluding the guard area. /
585	if (__glibc_likely (guardsize > `0`))
586	{
587	char *guard = guard_position (mem, size, guardsize, pd,
588	pagesize_m1);
589	if (setup_stack_prot (mem, size, guard, guardsize, prot) != `0`)
590	{
591	__munmap (mem, size);
592	return errno;
593	}
594	}
595
596	/ Remember the stack-related values. /
597	pd->stackblock = mem;
598	pd->stackblock_size = size;
599	/ Update guardsize for newly allocated guardsize to avoid*
600	an mprotect in guard resize below. /*
601	pd->guardsize = guardsize;
602
603	/ We allocated the first block thread-specific data array.*
604	This address will not change for the lifetime of this
605	descriptor. /*
606	pd->specific[`0`] = pd->specific_1stblock;
607
608	/ This is at least the second thread. /
609	pd->header.multiple_threads = `1`;
610	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
611	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
612	#endif
613
614	#ifdef NEED_DL_SYSINFO
615	SETUP_THREAD_SYSINFO (pd);
616	#endif
617
618	/ Don't allow setxid until cloned. /
619	pd->setxid_futex = -`1`;
620
621	/ Allocate the DTV for this thread. /
622	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
623	{
624	/ Something went wrong. /
625	assert (errno == ENOMEM);
626
627	/ Free the stack memory we just allocated. /
628	(void) __munmap (mem, size);
629
630	return errno;
631	}
632
633
634	/ Prepare to modify global data. /
635	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
636
637	/ And add to the list of stacks in use. /
638	stack_list_add (&pd->list, &GL (dl_stack_used));
639
640	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
641
642
643	/ There might have been a race. Another thread might have*
644	caused the stacks to get exec permission while this new
645	stack was prepared. Detect if this was possible and
646	change the permission if necessary. /*
647	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != `0`
648	&& (prot & PROT_EXEC) == `0`, `0`))
649	{
650	int err = change_stack_perm (pd
651	#ifdef NEED_SEPARATE_REGISTER_STACK
652	, ~pagesize_m1
653	#endif
654	);
655	if (err != `0`)
656	{
657	/ Free the stack memory we just allocated. /
658	(void) __munmap (mem, size);
659
660	return err;
661	}
662	}
663
664
665	/ Note that all of the stack and the thread descriptor is*
666	zeroed. This means we do not have to initialize fields
667	with initial value zero. This is specifically true for
668	the 'tid' field which is always set back to zero once the
669	stack is not used anymore and for the 'guardsize' field
670	which will be read next. /*
671	}
672
673	/ Create or resize the guard area if necessary. /
674	if (__glibc_unlikely (guardsize > pd->guardsize))
675	{
676	char *guard = guard_position (mem, size, guardsize, pd,
677	pagesize_m1);
678	if (__mprotect (guard, guardsize, PROT_NONE) != `0`)
679	{
680	mprot_error:
681	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
682
683	/ Remove the thread from the list. /
684	stack_list_del (&pd->list);
685
686	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
687
688	/ Get rid of the TLS block we allocated. /
689	_dl_deallocate_tls (TLS_TPADJ (pd), false);
690
691	/ Free the stack memory regardless of whether the size*
692	of the cache is over the limit or not. If this piece
693	of memory caused problems we better do not use it
694	anymore. Uh, and we ignore possible errors. There
695	is nothing we could do. /*
696	(void) __munmap (mem, size);
697
698	return errno;
699	}
700
701	pd->guardsize = guardsize;
702	}
703	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
704	`0`))
705	{
706	/ The old guard area is too large. /
707
708	#ifdef NEED_SEPARATE_REGISTER_STACK
709	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
710	char *oldguard = mem + (((size - pd->guardsize) / `2`) & ~pagesize_m1);
711
712	if (oldguard < guard
713	&& __mprotect (oldguard, guard - oldguard, prot) != `0`)
714	goto mprot_error;
715
716	if (__mprotect (guard + guardsize,
717	oldguard + pd->guardsize - guard - guardsize,
718	prot) != `0`)
719	goto mprot_error;
720	#elif _STACK_GROWS_DOWN
721	if (__mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
722	prot) != `0`)
723	goto mprot_error;
724	#elif _STACK_GROWS_UP
725	char new_guard = (char* *)(((uintptr_t) pd - guardsize)
726	& ~pagesize_m1);
727	char old_guard = (char* *)(((uintptr_t) pd - pd->guardsize)
728	& ~pagesize_m1);
729	/ The guard size difference might be > 0, but once rounded*
730	to the nearest page the size difference might be zero. /*
731	if (new_guard > old_guard
732	&& __mprotect (old_guard, new_guard - old_guard, prot) != `0`)
733	goto mprot_error;
734	#endif
735
736	pd->guardsize = guardsize;
737	}
738	/ The pthread_getattr_np() calls need to get passed the size*
739	requested in the attribute, regardless of how large the
740	actually used guardsize is. /*
741	pd->reported_guardsize = reported_guardsize;
742	}
743
744	/ Initialize the lock. We have to do this unconditionally since the*
745	stillborn thread could be canceled while the lock is taken. /*
746	pd->lock = LLL_LOCK_INITIALIZER;
747
748	/ The robust mutex lists also need to be initialized*
749	unconditionally because the cleanup for the previous stack owner
750	might have happened in the kernel. /*
751	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
752	- offsetof (pthread_mutex_t,
753	__data.__list.__next));
754	pd->robust_head.list_op_pending = NULL;
755	#if __PTHREAD_MUTEX_HAVE_PREV
756	pd->robust_prev = &pd->robust_head;
757	#endif
758	pd->robust_head.list = &pd->robust_head;
759
760	/ We place the thread descriptor at the end of the stack. /
761	*pdp = pd;
762
763	#if _STACK_GROWS_DOWN
764	void *stacktop;
765
766	# if TLS_TCB_AT_TP
767	/ The stack begins before the TCB and the static TLS block. /
768	stacktop = ((char *) (pd + `1`) - __static_tls_size);
769	# elif TLS_DTV_AT_TP
770	stacktop = (char *) (pd - `1`);
771	# endif
772
773	# ifdef NEED_SEPARATE_REGISTER_STACK
774	*stack = pd->stackblock;
775	stacksize = stacktop - stack;
776	# else
777	*stack = stacktop;
778	# endif
779	#else
780	*stack = pd->stackblock;
781	#endif
782
783	return `0`;
784	}
785
786
787	void
788	__deallocate_stack (struct pthread *pd)
789	{
790	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
791
792	/ Remove the thread from the list of threads with user defined*
793	stacks. /*
794	stack_list_del (&pd->list);
795
796	/ Not much to do. Just free the mmap()ed memory. Note that we do*
797	not reset the 'used' flag in the 'tid' field. This is done by
798	the kernel. If no thread has been created yet this field is
799	still zero. /*
800	if (__glibc_likely (! pd->user_stack))
801	(void) queue_stack (pd);
802	else
803	/ Free the memory associated with the ELF TLS. /
804	_dl_deallocate_tls (TLS_TPADJ (pd), false);
805
806	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
807	}
808
809
810	int
811	__make_stacks_executable (void **stack_endp)
812	{
813	/ First the main thread's stack. /
814	int err = _dl_make_stack_executable (stack_endp);
815	if (err != `0`)
816	return err;
817
818	#ifdef NEED_SEPARATE_REGISTER_STACK
819	const size_t pagemask = ~(__getpagesize () - `1`);
820	#endif
821
822	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
823
824	list_t *runp;
825	list_for_each (runp, &GL (dl_stack_used))
826	{
827	err = change_stack_perm (list_entry (runp, struct pthread, list)
828	#ifdef NEED_SEPARATE_REGISTER_STACK
829	, pagemask
830	#endif
831	);
832	if (err != `0`)
833	break;
834	}
835
836	/ Also change the permission for the currently unused stacks. This*
837	might be wasted time but better spend it here than adding a check
838	in the fast path. /*
839	if (err == `0`)
840	list_for_each (runp, &stack_cache)
841	{
842	err = change_stack_perm (list_entry (runp, struct pthread, list)
843	#ifdef NEED_SEPARATE_REGISTER_STACK
844	, pagemask
845	#endif
846	);
847	if (err != `0`)
848	break;
849	}
850
851	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
852
853	return err;
854	}
855
856
857	/ In case of a fork() call the memory allocation in the child will be*
858	the same but only one thread is running. All stacks except that of
859	the one running thread are not used anymore. We have to recycle
860	them. /*
861	void
862	__reclaim_stacks (void)
863	{
864	struct pthread self = (struct* pthread *) THREAD_SELF;
865
866	/ No locking necessary. The caller is the only stack in use. But*
867	we have to be aware that we might have interrupted a list
868	operation. /*
869
870	if (in_flight_stack != `0`)
871	{
872	bool add_p = in_flight_stack & `1`;
873	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) `1`);
874
875	if (add_p)
876	{
877	/ We always add at the beginning of the list. So in this case we*
878	only need to check the beginning of these lists to see if the
879	pointers at the head of the list are inconsistent. /*
880	list_t *l = NULL;
881
882	if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
883	l = &GL (dl_stack_used);
884	else if (stack_cache.next->prev != &stack_cache)
885	l = &stack_cache;
886
887	if (l != NULL)
888	{
889	assert (l->next->prev == elem);
890	elem->next = l->next;
891	elem->prev = l;
892	l->next = elem;
893	}
894	}
895	else
896	{
897	/ We can simply always replay the delete operation. /
898	elem->next->prev = elem->prev;
899	elem->prev->next = elem->next;
900	}
901	}
902
903	/ Mark all stacks except the still running one as free. /
904	list_t *runp;
905	list_for_each (runp, &GL (dl_stack_used))
906	{
907	struct pthread curp = list_entry (runp, struct* pthread, list);
908	if (curp != self)
909	{
910	/ This marks the stack as free. /
911	curp->tid = `0`;
912
913	/ Account for the size of the stack. /
914	stack_cache_actsize += curp->stackblock_size;
915
916	if (curp->specific_used)
917	{
918	/ Clear the thread-specific data. /
919	memset (curp->specific_1stblock, `'\0'`,
920	sizeof (curp->specific_1stblock));
921
922	curp->specific_used = false;
923
924	for (size_t cnt = `1`; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
925	if (curp->specific[cnt] != NULL)
926	{
927	memset (curp->specific[cnt], `'\0'`,
928	sizeof (curp->specific_1stblock));
929
930	/ We have allocated the block which we do not*
931	free here so re-set the bit. /*
932	curp->specific_used = true;
933	}
934	}
935	}
936	}
937
938	/ Add the stack of all running threads to the cache. /
939	list_splice (&GL (dl_stack_used), &stack_cache);
940
941	/ Remove the entry for the current thread to from the cache list*
942	and add it to the list of running threads. Which of the two
943	lists is decided by the user_stack flag. /*
944	stack_list_del (&self->list);
945
946	/ Re-initialize the lists for all the threads. /
947	INIT_LIST_HEAD (&GL (dl_stack_used));
948	INIT_LIST_HEAD (&GL (dl_stack_user));
949
950	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
951	list_add (&self->list, &GL (dl_stack_user));
952	else
953	list_add (&self->list, &GL (dl_stack_used));
954
955	/ There is one thread running. /
956	__nptl_nthreads = `1`;
957
958	in_flight_stack = `0`;
959
960	/ Initialize locks. /
961	GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
962	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
963	}
964
965
966	static void
967	setxid_mark_thread (struct xid_command cmdp, struct* pthread *t)
968	{
969	int ch;
970
971	/ Wait until this thread is cloned. /
972	if (t->setxid_futex == -`1`
973	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -`2`, -`1`))
974	do
975	futex_wait_simple (&t->setxid_futex, -`2`, FUTEX_PRIVATE);
976	while (t->setxid_futex == -`2`);
977
978	/ Don't let the thread exit before the setxid handler runs. /
979	t->setxid_futex = `0`;
980
981	do
982	{
983	ch = t->cancelhandling;
984
985	/ If the thread is exiting right now, ignore it. /
986	if ((ch & EXITING_BITMASK) != `0`)
987	{
988	/ Release the futex if there is no other setxid in*
989	progress. /*
990	if ((ch & SETXID_BITMASK) == `0`)
991	{
992	t->setxid_futex = `1`;
993	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
994	}
995	return;
996	}
997	}
998	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
999	ch \| SETXID_BITMASK, ch));
1000	}
1001
1002
1003	static void
1004	setxid_unmark_thread (struct xid_command cmdp, struct* pthread *t)
1005	{
1006	int ch;
1007
1008	do
1009	{
1010	ch = t->cancelhandling;
1011	if ((ch & SETXID_BITMASK) == `0`)
1012	return;
1013	}
1014	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1015	ch & ~SETXID_BITMASK, ch));
1016
1017	/ Release the futex just in case. /
1018	t->setxid_futex = `1`;
1019	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1020	}
1021
1022
1023	static int
1024	setxid_signal_thread (struct xid_command cmdp, struct* pthread *t)
1025	{
1026	if ((t->cancelhandling & SETXID_BITMASK) == `0`)
1027	return `0`;
1028
1029	int val;
1030	pid_t pid = __getpid ();
1031	val = INTERNAL_SYSCALL_CALL (tgkill, pid, t->tid, SIGSETXID);
1032
1033	/ If this failed, it must have had not started yet or else exited. /
1034	if (!INTERNAL_SYSCALL_ERROR_P (val))
1035	{
1036	atomic_increment (&cmdp->cntr);
1037	return `1`;
1038	}
1039	else
1040	return `0`;
1041	}
1042
1043	/ Check for consistency across setid system call results. The abort
1044	should not happen as long as all privileges changes happen through
1045	the glibc wrappers. ERROR must be 0 (no error) or an errno
1046	code. /*
1047	void
1048	attribute_hidden
1049	__nptl_setxid_error (struct xid_command cmdp, int* error)
1050	{
1051	do
1052	{
1053	int olderror = cmdp->error;
1054	if (olderror == error)
1055	break;
1056	if (olderror != -`1`)
1057	{
1058	/ Mismatch between current and previous results. Save the*
1059	error value to memory so that is not clobbered by the
1060	abort function and preserved in coredumps. /*
1061	volatile int xid_err __attribute__((unused)) = error;
1062	abort ();
1063	}
1064	}
1065	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -`1`));
1066	}
1067
1068	int
1069	attribute_hidden
1070	__nptl_setxid (struct xid_command *cmdp)
1071	{
1072	int signalled;
1073	int result;
1074	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1075
1076	__xidcmd = cmdp;
1077	cmdp->cntr = `0`;
1078	cmdp->error = -`1`;
1079
1080	struct pthread *self = THREAD_SELF;
1081
1082	/ Iterate over the list with system-allocated threads first. /
1083	list_t *runp;
1084	list_for_each (runp, &GL (dl_stack_used))
1085	{
1086	struct pthread t = list_entry (runp, struct* pthread, list);
1087	if (t == self)
1088	continue;
1089
1090	setxid_mark_thread (cmdp, t);
1091	}
1092
1093	/ Now the list with threads using user-allocated stacks. /
1094	list_for_each (runp, &GL (dl_stack_user))
1095	{
1096	struct pthread t = list_entry (runp, struct* pthread, list);
1097	if (t == self)
1098	continue;
1099
1100	setxid_mark_thread (cmdp, t);
1101	}
1102
1103	/ Iterate until we don't succeed in signalling anyone. That means*
1104	we have gotten all running threads, and their children will be
1105	automatically correct once started. /*
1106	do
1107	{
1108	signalled = `0`;
1109
1110	list_for_each (runp, &GL (dl_stack_used))
1111	{
1112	struct pthread t = list_entry (runp, struct* pthread, list);
1113	if (t == self)
1114	continue;
1115
1116	signalled += setxid_signal_thread (cmdp, t);
1117	}
1118
1119	list_for_each (runp, &GL (dl_stack_user))
1120	{
1121	struct pthread t = list_entry (runp, struct* pthread, list);
1122	if (t == self)
1123	continue;
1124
1125	signalled += setxid_signal_thread (cmdp, t);
1126	}
1127
1128	int cur = cmdp->cntr;
1129	while (cur != `0`)
1130	{
1131	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1132	FUTEX_PRIVATE);
1133	cur = cmdp->cntr;
1134	}
1135	}
1136	while (signalled != `0`);
1137
1138	/ Clean up flags, so that no thread blocks during exit waiting*
1139	for a signal which will never come. /*
1140	list_for_each (runp, &GL (dl_stack_used))
1141	{
1142	struct pthread t = list_entry (runp, struct* pthread, list);
1143	if (t == self)
1144	continue;
1145
1146	setxid_unmark_thread (cmdp, t);
1147	}
1148
1149	list_for_each (runp, &GL (dl_stack_user))
1150	{
1151	struct pthread t = list_entry (runp, struct* pthread, list);
1152	if (t == self)
1153	continue;
1154
1155	setxid_unmark_thread (cmdp, t);
1156	}
1157
1158	/ This must be last, otherwise the current thread might not have*
1159	permissions to send SIGSETXID syscall to the other threads. /*
1160	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, `3`,
1161	cmdp->id[`0`], cmdp->id[`1`], cmdp->id[`2`]);
1162	int error = `0`;
1163	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result)))
1164	{
1165	error = INTERNAL_SYSCALL_ERRNO (result);
1166	__set_errno (error);
1167	result = -`1`;
1168	}
1169	__nptl_setxid_error (cmdp, error);
1170
1171	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1172	return result;
1173	}
1174
1175	static inline void __attribute__((always_inline))
1176	init_one_static_tls (struct pthread curp, struct* link_map *map)
1177	{
1178	# if TLS_TCB_AT_TP
1179	void dest = (char* *) curp - map->l_tls_offset;
1180	# elif TLS_DTV_AT_TP
1181	void dest = (char* *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1182	# else
1183	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1184	# endif
1185
1186	/ Initialize the memory. /
1187	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1188	`'\0'`, map->l_tls_blocksize - map->l_tls_initimage_size);
1189	}
1190
1191	void
1192	attribute_hidden
1193	__pthread_init_static_tls (struct link_map *map)
1194	{
1195	lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1196
1197	/ Iterate over the list with system-allocated threads first. /
1198	list_t *runp;
1199	list_for_each (runp, &GL (dl_stack_used))
1200	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1201
1202	/ Now the list with threads using user-allocated stacks. /
1203	list_for_each (runp, &GL (dl_stack_user))
1204	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1205
1206	lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1207	}
1208

Browse the source code of glibc/nptl/allocatestack.c