allocatestack.c source code [glibc/nptl/allocatestack.c]

1	/ Copyright (C) 2002-2016 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <assert.h>
20	#include <errno.h>
21	#include <signal.h>
22	#include <stdint.h>
23	#include <string.h>
24	#include <unistd.h>
25	#include <sys/mman.h>
26	#include <sys/param.h>
27	#include <dl-sysdep.h>
28	#include <dl-tls.h>
29	#include <tls.h>
30	#include <list.h>
31	#include <lowlevellock.h>
32	#include <futex-internal.h>
33	#include <kernel-features.h>
34	#include <stack-aliasing.h>
35
36
37	#ifndef NEED_SEPARATE_REGISTER_STACK
38
39	/ Most architectures have exactly one stack pointer. Some have more. /
40	# define STACK_VARIABLES void *stackaddr = NULL
41
42	/ How to pass the values to the 'create_thread' function. /
43	# define STACK_VARIABLES_ARGS stackaddr
44
45	/ How to declare function which gets there parameters. /
46	# define STACK_VARIABLES_PARMS void *stackaddr
47
48	/ How to declare allocate_stack. /
49	# define ALLOCATE_STACK_PARMS void **stack
50
51	/ This is how the function is called. We do it this way to allow*
52	other variants of the function to have more parameters. /*
53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
54
55	#else
56
57	/ We need two stacks. The kernel will place them but we have to tell*
58	the kernel about the size of the reserved address space. /*
59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
60
61	/ How to pass the values to the 'create_thread' function. /
62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
63
64	/ How to declare function which gets there parameters. /
65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
66
67	/ How to declare allocate_stack. /
68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
69
70	/ This is how the function is called. We do it this way to allow*
71	other variants of the function to have more parameters. /*
72	# define ALLOCATE_STACK(attr, pd) \
73	allocate_stack (attr, pd, &stackaddr, &stacksize)
74
75	#endif
76
77
78	/ Default alignment of stack. /
79	#ifndef STACK_ALIGN
80	# define STACK_ALIGN __alignof__ (long double)
81	#endif
82
83	/ Default value for minimal stack size after allocating thread*
84	descriptor and guard. /*
85	#ifndef MINIMAL_REST_STACK
86	# define MINIMAL_REST_STACK 4096
87	#endif
88
89
90	/ Newer kernels have the MAP_STACK flag to indicate a mapping is used for*
91	a stack. Use it when possible. /*
92	#ifndef MAP_STACK
93	# define MAP_STACK 0
94	#endif
95
96	/ This yields the pointer that TLS support code calls the thread pointer. /
97	#if TLS_TCB_AT_TP
98	# define TLS_TPADJ(pd) (pd)
99	#elif TLS_DTV_AT_TP
100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
101	#endif
102
103	/ Cache handling for not-yet free stacks. /
104
105	/ Maximum size in kB of cache. /
106	static size_t stack_cache_maxsize = `40` * `1024` * `1024`; / 40MiBi by default. /
107	static size_t stack_cache_actsize;
108
109	/ Mutex protecting this variable. /
110	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
111
112	/ List of queued stack frames. /
113	static LIST_HEAD (stack_cache);
114
115	/ List of the stacks in use. /
116	static LIST_HEAD (stack_used);
117
118	/ We need to record what list operations we are going to do so that,*
119	in case of an asynchronous interruption due to a fork() call, we
120	can correct for the work. /*
121	static uintptr_t in_flight_stack;
122
123	/ List of the threads with user provided stacks in use. No need to*
124	initialize this, since it's done in __pthread_initialize_minimal. /*
125	list_t __stack_user __attribute__ ((nocommon));
126	hidden_data_def (__stack_user)
127
128	#if COLORING_INCREMENT != 0
129	/ Number of threads created. /
130	static unsigned int nptl_ncreated;
131	#endif
132
133
134	/ Check whether the stack is still used or not. /
135	#define FREE_P(descr) ((descr)->tid <= 0)
136
137
138	static void
139	stack_list_del (list_t *elem)
140	{
141	in_flight_stack = (uintptr_t) elem;
142
143	atomic_write_barrier ();
144
145	list_del (elem);
146
147	atomic_write_barrier ();
148
149	in_flight_stack = `0`;
150	}
151
152
153	static void
154	stack_list_add (list_t elem, list_t list)
155	{
156	in_flight_stack = (uintptr_t) elem \| `1`;
157
158	atomic_write_barrier ();
159
160	list_add (elem, list);
161
162	atomic_write_barrier ();
163
164	in_flight_stack = `0`;
165	}
166
167
168	/ We create a double linked list of all cache entries. Double linked*
169	because this allows removing entries from the end. /*
170
171
172	/ Get a stack frame from the cache. We have to match by size since*
173	some blocks might be too small or far too large. /*
174	static struct pthread *
175	get_cached_stack (size_t sizep, void* **memp)
176	{
177	size_t size = *sizep;
178	struct pthread *result = NULL;
179	list_t *entry;
180
181	lll_lock (stack_cache_lock, LLL_PRIVATE);
182
183	/ Search the cache for a matching entry. We search for the*
184	smallest stack which has at least the required size. Note that
185	in normal situations the size of all allocated stacks is the
186	same. As the very least there are only a few different sizes.
187	Therefore this loop will exit early most of the time with an
188	exact match. /*
189	list_for_each (entry, &stack_cache)
190	{
191	struct pthread *curr;
192
193	curr = list_entry (entry, struct pthread, list);
194	if (FREE_P (curr) && curr->stackblock_size >= size)
195	{
196	if (curr->stackblock_size == size)
197	{
198	result = curr;
199	break;
200	}
201
202	if (result == NULL
203	\|\| result->stackblock_size > curr->stackblock_size)
204	result = curr;
205	}
206	}
207
208	if (__builtin_expect (result == NULL, `0`)
209	/ Make sure the size difference is not too excessive. In that*
210	case we do not use the block. /*
211	\|\| __builtin_expect (result->stackblock_size > `4` * size, `0`))
212	{
213	/ Release the lock. /
214	lll_unlock (stack_cache_lock, LLL_PRIVATE);
215
216	return NULL;
217	}
218
219	/ Don't allow setxid until cloned. /
220	result->setxid_futex = -`1`;
221
222	/ Dequeue the entry. /
223	stack_list_del (&result->list);
224
225	/ And add to the list of stacks in use. /
226	stack_list_add (&result->list, &stack_used);
227
228	/ And decrease the cache size. /
229	stack_cache_actsize -= result->stackblock_size;
230
231	/ Release the lock early. /
232	lll_unlock (stack_cache_lock, LLL_PRIVATE);
233
234	/ Report size and location of the stack to the caller. /
235	*sizep = result->stackblock_size;
236	*memp = result->stackblock;
237
238	/ Cancellation handling is back to the default. /
239	result->cancelhandling = `0`;
240	result->cleanup = NULL;
241
242	/ No pending event. /
243	result->nextevent = NULL;
244
245	/ Clear the DTV. /
246	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
247	for (size_t cnt = `0`; cnt < dtv[-`1`].counter; ++cnt)
248	if (! dtv[`1` + cnt].pointer.is_static
249	&& dtv[`1` + cnt].pointer.val != TLS_DTV_UNALLOCATED)
250	free (dtv[`1` + cnt].pointer.val);
251	memset (dtv, `'\0'`, (dtv[-`1`].counter + `1`) * sizeof (dtv_t));
252
253	/ Re-initialize the TLS. /
254	_dl_allocate_tls_init (TLS_TPADJ (result));
255
256	return result;
257	}
258
259
260	/ Free stacks until cache size is lower than LIMIT. /
261	void
262	__free_stacks (size_t limit)
263	{
264	/ We reduce the size of the cache. Remove the last entries until*
265	the size is below the limit. /*
266	list_t *entry;
267	list_t *prev;
268
269	/ Search from the end of the list. /
270	list_for_each_prev_safe (entry, prev, &stack_cache)
271	{
272	struct pthread *curr;
273
274	curr = list_entry (entry, struct pthread, list);
275	if (FREE_P (curr))
276	{
277	/ Unlink the block. /
278	stack_list_del (entry);
279
280	/ Account for the freed memory. /
281	stack_cache_actsize -= curr->stackblock_size;
282
283	/ Free the memory associated with the ELF TLS. /
284	_dl_deallocate_tls (TLS_TPADJ (curr), false);
285
286	/ Remove this block. This should never fail. If it does*
287	something is really wrong. /*
288	if (munmap (curr->stackblock, curr->stackblock_size) != `0`)
289	abort ();
290
291	/ Maybe we have freed enough. /
292	if (stack_cache_actsize <= limit)
293	break;
294	}
295	}
296	}
297
298
299	/ Add a stack frame which is not used anymore to the stack. Must be*
300	called with the cache lock held. /*
301	static inline void
302	__attribute ((always_inline))
303	queue_stack (struct pthread *stack)
304	{
305	/ We unconditionally add the stack to the list. The memory may*
306	still be in use but it will not be reused until the kernel marks
307	the stack as not used anymore. /*
308	stack_list_add (&stack->list, &stack_cache);
309
310	stack_cache_actsize += stack->stackblock_size;
311	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
312	__free_stacks (stack_cache_maxsize);
313	}
314
315
316	static int
317	internal_function
318	change_stack_perm (struct pthread *pd
319	#ifdef NEED_SEPARATE_REGISTER_STACK
320	, size_t pagemask
321	#endif
322	)
323	{
324	#ifdef NEED_SEPARATE_REGISTER_STACK
325	void *stack = (pd->stackblock
326	+ (((((pd->stackblock_size - pd->guardsize) / `2`)
327	& pagemask) + pd->guardsize) & pagemask));
328	size_t len = pd->stackblock + pd->stackblock_size - stack;
329	#elif _STACK_GROWS_DOWN
330	void *stack = pd->stackblock + pd->guardsize;
331	size_t len = pd->stackblock_size - pd->guardsize;
332	#elif _STACK_GROWS_UP
333	void *stack = pd->stackblock;
334	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
335	#else
336	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
337	#endif
338	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != `0`)
339	return errno;
340
341	return `0`;
342	}
343
344
345	/ Returns a usable stack for a new thread either by allocating a*
346	new stack or reusing a cached stack of sufficient size.
347	ATTR must be non-NULL and point to a valid pthread_attr.
348	PDP must be non-NULL. /*
349	static int
350	allocate_stack (const struct pthread_attr attr, struct* pthread **pdp,
351	ALLOCATE_STACK_PARMS)
352	{
353	struct pthread *pd;
354	size_t size;
355	size_t pagesize_m1 = __getpagesize () - `1`;
356
357	assert (powerof2 (pagesize_m1 + `1`));
358	assert (TCB_ALIGNMENT >= STACK_ALIGN);
359
360	/ Get the stack size from the attribute if it is set. Otherwise we*
361	use the default we determined at start time. /*
362	if (attr->stacksize != `0`)
363	size = attr->stacksize;
364	else
365	{
366	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
367	size = __default_pthread_attr.stacksize;
368	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
369	}
370
371	/ Get memory for the stack. /
372	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
373	{
374	uintptr_t adj;
375	char stackaddr = (char* *) attr->stackaddr;
376
377	/ Assume the same layout as the _STACK_GROWS_DOWN case, with struct*
378	pthread at the top of the stack block. Later we adjust the guard
379	location and stack address to match the _STACK_GROWS_UP case. /*
380	if (_STACK_GROWS_UP)
381	stackaddr += attr->stacksize;
382
383	/ If the user also specified the size of the stack make sure it*
384	is large enough. /*
385	if (attr->stacksize != `0`
386	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
387	return EINVAL;
388
389	/ Adjust stack size for alignment of the TLS block. /
390	#if TLS_TCB_AT_TP
391	adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
392	& __static_tls_align_m1;
393	assert (size > adj + TLS_TCB_SIZE);
394	#elif TLS_DTV_AT_TP
395	adj = ((uintptr_t) stackaddr - __static_tls_size)
396	& __static_tls_align_m1;
397	assert (size > adj);
398	#endif
399
400	/ The user provided some memory. Let's hope it matches the*
401	size... We do not allocate guard pages if the user provided
402	the stack. It is the user's responsibility to do this if it
403	is wanted. /*
404	#if TLS_TCB_AT_TP
405	pd = (struct pthread *) ((uintptr_t) stackaddr
406	- TLS_TCB_SIZE - adj);
407	#elif TLS_DTV_AT_TP
408	pd = (struct pthread *) (((uintptr_t) stackaddr
409	- __static_tls_size - adj)
410	- TLS_PRE_TCB_SIZE);
411	#endif
412
413	/ The user provided stack memory needs to be cleared. /
414	memset (pd, `'\0'`, sizeof (struct pthread));
415
416	/ The first TSD block is included in the TCB. /
417	pd->specific[`0`] = pd->specific_1stblock;
418
419	/ Remember the stack-related values. /
420	pd->stackblock = (char *) stackaddr - size;
421	pd->stackblock_size = size;
422
423	/ This is a user-provided stack. It will not be queued in the*
424	stack cache nor will the memory (except the TLS memory) be freed. /*
425	pd->user_stack = true;
426
427	/ This is at least the second thread. /
428	pd->header.multiple_threads = `1`;
429	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
430	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
431	#endif
432
433	#ifndef __ASSUME_PRIVATE_FUTEX
434	/ The thread must know when private futexes are supported. /
435	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
436	header.private_futex);
437	#endif
438
439	#ifdef NEED_DL_SYSINFO
440	SETUP_THREAD_SYSINFO (pd);
441	#endif
442
443	/ The process ID is also the same as that of the caller. /
444	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
445
446	/ Don't allow setxid until cloned. /
447	pd->setxid_futex = -`1`;
448
449	/ Allocate the DTV for this thread. /
450	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
451	{
452	/ Something went wrong. /
453	assert (errno == ENOMEM);
454	return errno;
455	}
456
457
458	/ Prepare to modify global data. /
459	lll_lock (stack_cache_lock, LLL_PRIVATE);
460
461	/ And add to the list of stacks in use. /
462	list_add (&pd->list, &__stack_user);
463
464	lll_unlock (stack_cache_lock, LLL_PRIVATE);
465	}
466	else
467	{
468	/ Allocate some anonymous memory. If possible use the cache. /
469	size_t guardsize;
470	size_t reqsize;
471	void *mem;
472	const int prot = (PROT_READ \| PROT_WRITE
473	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : `0`));
474
475	#if COLORING_INCREMENT != 0
476	/ Add one more page for stack coloring. Don't do it for stacks*
477	with 16 times pagesize or larger. This might just cause
478	unnecessary misalignment. /*
479	if (size <= `16` * pagesize_m1)
480	size += pagesize_m1 + `1`;
481	#endif
482
483	/ Adjust the stack size for alignment. /
484	size &= ~__static_tls_align_m1;
485	assert (size != `0`);
486
487	/ Make sure the size of the stack is enough for the guard and*
488	eventually the thread descriptor. /*
489	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
490	if (__builtin_expect (size < ((guardsize + __static_tls_size
491	+ MINIMAL_REST_STACK + pagesize_m1)
492	& ~pagesize_m1),
493	`0`))
494	/ The stack is too small (or the guard too large). /
495	return EINVAL;
496
497	/ Try to get a stack from the cache. /
498	reqsize = size;
499	pd = get_cached_stack (&size, &mem);
500	if (pd == NULL)
501	{
502	/ To avoid aliasing effects on a larger scale than pages we*
503	adjust the allocated stack size if necessary. This way
504	allocations directly following each other will not have
505	aliasing problems. /*
506	#if MULTI_PAGE_ALIASING != 0
507	if ((size % MULTI_PAGE_ALIASING) == `0`)
508	size += pagesize_m1 + `1`;
509	#endif
510
511	mem = mmap (NULL, size, prot,
512	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -`1`, `0`);
513
514	if (__glibc_unlikely (mem == MAP_FAILED))
515	return errno;
516
517	/ SIZE is guaranteed to be greater than zero.*
518	So we can never get a null pointer back from mmap. /*
519	assert (mem != NULL);
520
521	#if COLORING_INCREMENT != 0
522	/ Atomically increment NCREATED. /
523	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
524
525	/ We chose the offset for coloring by incrementing it for*
526	every new thread by a fixed amount. The offset used
527	module the page size. Even if coloring would be better
528	relative to higher alignment values it makes no sense to
529	do it since the mmap() interface does not allow us to
530	specify any alignment for the returned memory block. /*
531	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
532
533	/ Make sure the coloring offsets does not disturb the alignment*
534	of the TCB and static TLS block. /*
535	if (__glibc_unlikely ((coloring & __static_tls_align_m1) != `0`))
536	coloring = (((coloring + __static_tls_align_m1)
537	& ~(__static_tls_align_m1))
538	& ~pagesize_m1);
539	#else
540	/ Unless specified we do not make any adjustments. /
541	# define coloring 0
542	#endif
543
544	/ Place the thread descriptor at the end of the stack. /
545	#if TLS_TCB_AT_TP
546	pd = (struct pthread ) ((char* *) mem + size - coloring) - `1`;
547	#elif TLS_DTV_AT_TP
548	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
549	- __static_tls_size)
550	& ~__static_tls_align_m1)
551	- TLS_PRE_TCB_SIZE);
552	#endif
553
554	/ Remember the stack-related values. /
555	pd->stackblock = mem;
556	pd->stackblock_size = size;
557
558	/ We allocated the first block thread-specific data array.*
559	This address will not change for the lifetime of this
560	descriptor. /*
561	pd->specific[`0`] = pd->specific_1stblock;
562
563	/ This is at least the second thread. /
564	pd->header.multiple_threads = `1`;
565	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
566	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
567	#endif
568
569	#ifndef __ASSUME_PRIVATE_FUTEX
570	/ The thread must know when private futexes are supported. /
571	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
572	header.private_futex);
573	#endif
574
575	#ifdef NEED_DL_SYSINFO
576	SETUP_THREAD_SYSINFO (pd);
577	#endif
578
579	/ Don't allow setxid until cloned. /
580	pd->setxid_futex = -`1`;
581
582	/ The process ID is also the same as that of the caller. /
583	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
584
585	/ Allocate the DTV for this thread. /
586	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
587	{
588	/ Something went wrong. /
589	assert (errno == ENOMEM);
590
591	/ Free the stack memory we just allocated. /
592	(void) munmap (mem, size);
593
594	return errno;
595	}
596
597
598	/ Prepare to modify global data. /
599	lll_lock (stack_cache_lock, LLL_PRIVATE);
600
601	/ And add to the list of stacks in use. /
602	stack_list_add (&pd->list, &stack_used);
603
604	lll_unlock (stack_cache_lock, LLL_PRIVATE);
605
606
607	/ There might have been a race. Another thread might have*
608	caused the stacks to get exec permission while this new
609	stack was prepared. Detect if this was possible and
610	change the permission if necessary. /*
611	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != `0`
612	&& (prot & PROT_EXEC) == `0`, `0`))
613	{
614	int err = change_stack_perm (pd
615	#ifdef NEED_SEPARATE_REGISTER_STACK
616	, ~pagesize_m1
617	#endif
618	);
619	if (err != `0`)
620	{
621	/ Free the stack memory we just allocated. /
622	(void) munmap (mem, size);
623
624	return err;
625	}
626	}
627
628
629	/ Note that all of the stack and the thread descriptor is*
630	zeroed. This means we do not have to initialize fields
631	with initial value zero. This is specifically true for
632	the 'tid' field which is always set back to zero once the
633	stack is not used anymore and for the 'guardsize' field
634	which will be read next. /*
635	}
636
637	/ Create or resize the guard area if necessary. /
638	if (__glibc_unlikely (guardsize > pd->guardsize))
639	{
640	#ifdef NEED_SEPARATE_REGISTER_STACK
641	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
642	#elif _STACK_GROWS_DOWN
643	char *guard = mem;
644	#elif _STACK_GROWS_UP
645	char guard = (char* *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
646	#endif
647	if (mprotect (guard, guardsize, PROT_NONE) != `0`)
648	{
649	mprot_error:
650	lll_lock (stack_cache_lock, LLL_PRIVATE);
651
652	/ Remove the thread from the list. /
653	stack_list_del (&pd->list);
654
655	lll_unlock (stack_cache_lock, LLL_PRIVATE);
656
657	/ Get rid of the TLS block we allocated. /
658	_dl_deallocate_tls (TLS_TPADJ (pd), false);
659
660	/ Free the stack memory regardless of whether the size*
661	of the cache is over the limit or not. If this piece
662	of memory caused problems we better do not use it
663	anymore. Uh, and we ignore possible errors. There
664	is nothing we could do. /*
665	(void) munmap (mem, size);
666
667	return errno;
668	}
669
670	pd->guardsize = guardsize;
671	}
672	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
673	`0`))
674	{
675	/ The old guard area is too large. /
676
677	#ifdef NEED_SEPARATE_REGISTER_STACK
678	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
679	char *oldguard = mem + (((size - pd->guardsize) / `2`) & ~pagesize_m1);
680
681	if (oldguard < guard
682	&& mprotect (oldguard, guard - oldguard, prot) != `0`)
683	goto mprot_error;
684
685	if (mprotect (guard + guardsize,
686	oldguard + pd->guardsize - guard - guardsize,
687	prot) != `0`)
688	goto mprot_error;
689	#elif _STACK_GROWS_DOWN
690	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
691	prot) != `0`)
692	goto mprot_error;
693	#elif _STACK_GROWS_UP
694	if (mprotect ((char *) pd - pd->guardsize,
695	pd->guardsize - guardsize, prot) != `0`)
696	goto mprot_error;
697	#endif
698
699	pd->guardsize = guardsize;
700	}
701	/ The pthread_getattr_np() calls need to get passed the size*
702	requested in the attribute, regardless of how large the
703	actually used guardsize is. /*
704	pd->reported_guardsize = guardsize;
705	}
706
707	/ Initialize the lock. We have to do this unconditionally since the*
708	stillborn thread could be canceled while the lock is taken. /*
709	pd->lock = LLL_LOCK_INITIALIZER;
710
711	/ The robust mutex lists also need to be initialized*
712	unconditionally because the cleanup for the previous stack owner
713	might have happened in the kernel. /*
714	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
715	- offsetof (pthread_mutex_t,
716	__data.__list.__next));
717	pd->robust_head.list_op_pending = NULL;
718	#ifdef __PTHREAD_MUTEX_HAVE_PREV
719	pd->robust_prev = &pd->robust_head;
720	#endif
721	pd->robust_head.list = &pd->robust_head;
722
723	/ We place the thread descriptor at the end of the stack. /
724	*pdp = pd;
725
726	#if _STACK_GROWS_DOWN
727	void *stacktop;
728
729	# if TLS_TCB_AT_TP
730	/ The stack begins before the TCB and the static TLS block. /
731	stacktop = ((char *) (pd + `1`) - __static_tls_size);
732	# elif TLS_DTV_AT_TP
733	stacktop = (char *) (pd - `1`);
734	# endif
735
736	# ifdef NEED_SEPARATE_REGISTER_STACK
737	*stack = pd->stackblock;
738	stacksize = stacktop - stack;
739	# else
740	*stack = stacktop;
741	# endif
742	#else
743	*stack = pd->stackblock;
744	#endif
745
746	return `0`;
747	}
748
749
750	void
751	internal_function
752	__deallocate_stack (struct pthread *pd)
753	{
754	lll_lock (stack_cache_lock, LLL_PRIVATE);
755
756	/ Remove the thread from the list of threads with user defined*
757	stacks. /*
758	stack_list_del (&pd->list);
759
760	/ Not much to do. Just free the mmap()ed memory. Note that we do*
761	not reset the 'used' flag in the 'tid' field. This is done by
762	the kernel. If no thread has been created yet this field is
763	still zero. /*
764	if (__glibc_likely (! pd->user_stack))
765	(void) queue_stack (pd);
766	else
767	/ Free the memory associated with the ELF TLS. /
768	_dl_deallocate_tls (TLS_TPADJ (pd), false);
769
770	lll_unlock (stack_cache_lock, LLL_PRIVATE);
771	}
772
773
774	int
775	internal_function
776	__make_stacks_executable (void **stack_endp)
777	{
778	/ First the main thread's stack. /
779	int err = _dl_make_stack_executable (stack_endp);
780	if (err != `0`)
781	return err;
782
783	#ifdef NEED_SEPARATE_REGISTER_STACK
784	const size_t pagemask = ~(__getpagesize () - `1`);
785	#endif
786
787	lll_lock (stack_cache_lock, LLL_PRIVATE);
788
789	list_t *runp;
790	list_for_each (runp, &stack_used)
791	{
792	err = change_stack_perm (list_entry (runp, struct pthread, list)
793	#ifdef NEED_SEPARATE_REGISTER_STACK
794	, pagemask
795	#endif
796	);
797	if (err != `0`)
798	break;
799	}
800
801	/ Also change the permission for the currently unused stacks. This*
802	might be wasted time but better spend it here than adding a check
803	in the fast path. /*
804	if (err == `0`)
805	list_for_each (runp, &stack_cache)
806	{
807	err = change_stack_perm (list_entry (runp, struct pthread, list)
808	#ifdef NEED_SEPARATE_REGISTER_STACK
809	, pagemask
810	#endif
811	);
812	if (err != `0`)
813	break;
814	}
815
816	lll_unlock (stack_cache_lock, LLL_PRIVATE);
817
818	return err;
819	}
820
821
822	/ In case of a fork() call the memory allocation in the child will be*
823	the same but only one thread is running. All stacks except that of
824	the one running thread are not used anymore. We have to recycle
825	them. /*
826	void
827	__reclaim_stacks (void)
828	{
829	struct pthread self = (struct* pthread *) THREAD_SELF;
830
831	/ No locking necessary. The caller is the only stack in use. But*
832	we have to be aware that we might have interrupted a list
833	operation. /*
834
835	if (in_flight_stack != `0`)
836	{
837	bool add_p = in_flight_stack & `1`;
838	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) `1`);
839
840	if (add_p)
841	{
842	/ We always add at the beginning of the list. So in this case we*
843	only need to check the beginning of these lists to see if the
844	pointers at the head of the list are inconsistent. /*
845	list_t *l = NULL;
846
847	if (stack_used.next->prev != &stack_used)
848	l = &stack_used;
849	else if (stack_cache.next->prev != &stack_cache)
850	l = &stack_cache;
851
852	if (l != NULL)
853	{
854	assert (l->next->prev == elem);
855	elem->next = l->next;
856	elem->prev = l;
857	l->next = elem;
858	}
859	}
860	else
861	{
862	/ We can simply always replay the delete operation. /
863	elem->next->prev = elem->prev;
864	elem->prev->next = elem->next;
865	}
866	}
867
868	/ Mark all stacks except the still running one as free. /
869	list_t *runp;
870	list_for_each (runp, &stack_used)
871	{
872	struct pthread curp = list_entry (runp, struct* pthread, list);
873	if (curp != self)
874	{
875	/ This marks the stack as free. /
876	curp->tid = `0`;
877
878	/ The PID field must be initialized for the new process. /
879	curp->pid = self->pid;
880
881	/ Account for the size of the stack. /
882	stack_cache_actsize += curp->stackblock_size;
883
884	if (curp->specific_used)
885	{
886	/ Clear the thread-specific data. /
887	memset (curp->specific_1stblock, `'\0'`,
888	sizeof (curp->specific_1stblock));
889
890	curp->specific_used = false;
891
892	for (size_t cnt = `1`; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
893	if (curp->specific[cnt] != NULL)
894	{
895	memset (curp->specific[cnt], `'\0'`,
896	sizeof (curp->specific_1stblock));
897
898	/ We have allocated the block which we do not*
899	free here so re-set the bit. /*
900	curp->specific_used = true;
901	}
902	}
903	}
904	}
905
906	/ Reset the PIDs in any cached stacks. /
907	list_for_each (runp, &stack_cache)
908	{
909	struct pthread curp = list_entry (runp, struct* pthread, list);
910	curp->pid = self->pid;
911	}
912
913	/ Add the stack of all running threads to the cache. /
914	list_splice (&stack_used, &stack_cache);
915
916	/ Remove the entry for the current thread to from the cache list*
917	and add it to the list of running threads. Which of the two
918	lists is decided by the user_stack flag. /*
919	stack_list_del (&self->list);
920
921	/ Re-initialize the lists for all the threads. /
922	INIT_LIST_HEAD (&stack_used);
923	INIT_LIST_HEAD (&__stack_user);
924
925	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
926	list_add (&self->list, &__stack_user);
927	else
928	list_add (&self->list, &stack_used);
929
930	/ There is one thread running. /
931	__nptl_nthreads = `1`;
932
933	in_flight_stack = `0`;
934
935	/ Initialize locks. /
936	stack_cache_lock = LLL_LOCK_INITIALIZER;
937	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
938	}
939
940
941	#if HP_TIMING_AVAIL
942	# undef __find_thread_by_id
943	/ Find a thread given the thread ID. /
944	attribute_hidden
945	struct pthread *
946	__find_thread_by_id (pid_t tid)
947	{
948	struct pthread *result = NULL;
949
950	lll_lock (stack_cache_lock, LLL_PRIVATE);
951
952	/ Iterate over the list with system-allocated threads first. /
953	list_t *runp;
954	list_for_each (runp, &stack_used)
955	{
956	struct pthread *curp;
957
958	curp = list_entry (runp, struct pthread, list);
959
960	if (curp->tid == tid)
961	{
962	result = curp;
963	goto out;
964	}
965	}
966
967	/ Now the list with threads using user-allocated stacks. /
968	list_for_each (runp, &__stack_user)
969	{
970	struct pthread *curp;
971
972	curp = list_entry (runp, struct pthread, list);
973
974	if (curp->tid == tid)
975	{
976	result = curp;
977	goto out;
978	}
979	}
980
981	out:
982	lll_unlock (stack_cache_lock, LLL_PRIVATE);
983
984	return result;
985	}
986	#endif
987
988
989	#ifdef SIGSETXID
990	static void
991	internal_function
992	setxid_mark_thread (struct xid_command cmdp, struct* pthread *t)
993	{
994	int ch;
995
996	/ Wait until this thread is cloned. /
997	if (t->setxid_futex == -`1`
998	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -`2`, -`1`))
999	do
1000	futex_wait_simple (&t->setxid_futex, -`2`, FUTEX_PRIVATE);
1001	while (t->setxid_futex == -`2`);
1002
1003	/ Don't let the thread exit before the setxid handler runs. /
1004	t->setxid_futex = `0`;
1005
1006	do
1007	{
1008	ch = t->cancelhandling;
1009
1010	/ If the thread is exiting right now, ignore it. /
1011	if ((ch & EXITING_BITMASK) != `0`)
1012	{
1013	/ Release the futex if there is no other setxid in*
1014	progress. /*
1015	if ((ch & SETXID_BITMASK) == `0`)
1016	{
1017	t->setxid_futex = `1`;
1018	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1019	}
1020	return;
1021	}
1022	}
1023	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1024	ch \| SETXID_BITMASK, ch));
1025	}
1026
1027
1028	static void
1029	internal_function
1030	setxid_unmark_thread (struct xid_command cmdp, struct* pthread *t)
1031	{
1032	int ch;
1033
1034	do
1035	{
1036	ch = t->cancelhandling;
1037	if ((ch & SETXID_BITMASK) == `0`)
1038	return;
1039	}
1040	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1041	ch & ~SETXID_BITMASK, ch));
1042
1043	/ Release the futex just in case. /
1044	t->setxid_futex = `1`;
1045	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1046	}
1047
1048
1049	static int
1050	internal_function
1051	setxid_signal_thread (struct xid_command cmdp, struct* pthread *t)
1052	{
1053	if ((t->cancelhandling & SETXID_BITMASK) == `0`)
1054	return `0`;
1055
1056	int val;
1057	INTERNAL_SYSCALL_DECL (err);
1058	val = INTERNAL_SYSCALL (tgkill, err, `3`, THREAD_GETMEM (THREAD_SELF, pid),
1059	t->tid, SIGSETXID);
1060
1061	/ If this failed, it must have had not started yet or else exited. /
1062	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1063	{
1064	atomic_increment (&cmdp->cntr);
1065	return `1`;
1066	}
1067	else
1068	return `0`;
1069	}
1070
1071	/ Check for consistency across setid system call results. The abort
1072	should not happen as long as all privileges changes happen through
1073	the glibc wrappers. ERROR must be 0 (no error) or an errno
1074	code. /*
1075	void
1076	attribute_hidden
1077	__nptl_setxid_error (struct xid_command cmdp, int* error)
1078	{
1079	do
1080	{
1081	int olderror = cmdp->error;
1082	if (olderror == error)
1083	break;
1084	if (olderror != -`1`)
1085	/ Mismatch between current and previous results. /
1086	abort ();
1087	}
1088	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -`1`));
1089	}
1090
1091	int
1092	attribute_hidden
1093	__nptl_setxid (struct xid_command *cmdp)
1094	{
1095	int signalled;
1096	int result;
1097	lll_lock (stack_cache_lock, LLL_PRIVATE);
1098
1099	__xidcmd = cmdp;
1100	cmdp->cntr = `0`;
1101	cmdp->error = -`1`;
1102
1103	struct pthread *self = THREAD_SELF;
1104
1105	/ Iterate over the list with system-allocated threads first. /
1106	list_t *runp;
1107	list_for_each (runp, &stack_used)
1108	{
1109	struct pthread t = list_entry (runp, struct* pthread, list);
1110	if (t == self)
1111	continue;
1112
1113	setxid_mark_thread (cmdp, t);
1114	}
1115
1116	/ Now the list with threads using user-allocated stacks. /
1117	list_for_each (runp, &__stack_user)
1118	{
1119	struct pthread t = list_entry (runp, struct* pthread, list);
1120	if (t == self)
1121	continue;
1122
1123	setxid_mark_thread (cmdp, t);
1124	}
1125
1126	/ Iterate until we don't succeed in signalling anyone. That means*
1127	we have gotten all running threads, and their children will be
1128	automatically correct once started. /*
1129	do
1130	{
1131	signalled = `0`;
1132
1133	list_for_each (runp, &stack_used)
1134	{
1135	struct pthread t = list_entry (runp, struct* pthread, list);
1136	if (t == self)
1137	continue;
1138
1139	signalled += setxid_signal_thread (cmdp, t);
1140	}
1141
1142	list_for_each (runp, &__stack_user)
1143	{
1144	struct pthread t = list_entry (runp, struct* pthread, list);
1145	if (t == self)
1146	continue;
1147
1148	signalled += setxid_signal_thread (cmdp, t);
1149	}
1150
1151	int cur = cmdp->cntr;
1152	while (cur != `0`)
1153	{
1154	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1155	FUTEX_PRIVATE);
1156	cur = cmdp->cntr;
1157	}
1158	}
1159	while (signalled != `0`);
1160
1161	/ Clean up flags, so that no thread blocks during exit waiting*
1162	for a signal which will never come. /*
1163	list_for_each (runp, &stack_used)
1164	{
1165	struct pthread t = list_entry (runp, struct* pthread, list);
1166	if (t == self)
1167	continue;
1168
1169	setxid_unmark_thread (cmdp, t);
1170	}
1171
1172	list_for_each (runp, &__stack_user)
1173	{
1174	struct pthread t = list_entry (runp, struct* pthread, list);
1175	if (t == self)
1176	continue;
1177
1178	setxid_unmark_thread (cmdp, t);
1179	}
1180
1181	/ This must be last, otherwise the current thread might not have*
1182	permissions to send SIGSETXID syscall to the other threads. /*
1183	INTERNAL_SYSCALL_DECL (err);
1184	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, `3`,
1185	cmdp->id[`0`], cmdp->id[`1`], cmdp->id[`2`]);
1186	int error = `0`;
1187	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1188	{
1189	error = INTERNAL_SYSCALL_ERRNO (result, err);
1190	__set_errno (error);
1191	result = -`1`;
1192	}
1193	__nptl_setxid_error (cmdp, error);
1194
1195	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1196	return result;
1197	}
1198	#endif /* SIGSETXID. */
1199
1200
1201	static inline void __attribute__((always_inline))
1202	init_one_static_tls (struct pthread curp, struct* link_map *map)
1203	{
1204	# if TLS_TCB_AT_TP
1205	void dest = (char* *) curp - map->l_tls_offset;
1206	# elif TLS_DTV_AT_TP
1207	void dest = (char* *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1208	# else
1209	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1210	# endif
1211
1212	/ We cannot delay the initialization of the Static TLS area, since*
1213	it can be accessed with LE or IE, but since the DTV is only used
1214	by GD and LD, we can delay its update to avoid a race. /*
1215	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1216	`'\0'`, map->l_tls_blocksize - map->l_tls_initimage_size);
1217	}
1218
1219	void
1220	attribute_hidden
1221	__pthread_init_static_tls (struct link_map *map)
1222	{
1223	lll_lock (stack_cache_lock, LLL_PRIVATE);
1224
1225	/ Iterate over the list with system-allocated threads first. /
1226	list_t *runp;
1227	list_for_each (runp, &stack_used)
1228	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1229
1230	/ Now the list with threads using user-allocated stacks. /
1231	list_for_each (runp, &__stack_user)
1232	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1233
1234	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1235	}
1236
1237
1238	void
1239	attribute_hidden
1240	__wait_lookup_done (void)
1241	{
1242	lll_lock (stack_cache_lock, LLL_PRIVATE);
1243
1244	struct pthread *self = THREAD_SELF;
1245
1246	/ Iterate over the list with system-allocated threads first. /
1247	list_t *runp;
1248	list_for_each (runp, &stack_used)
1249	{
1250	struct pthread t = list_entry (runp, struct* pthread, list);
1251	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1252	continue;
1253
1254	int *const gscope_flagp = &t->header.gscope_flag;
1255
1256	/ We have to wait until this thread is done with the global*
1257	scope. First tell the thread that we are waiting and
1258	possibly have to be woken. /*
1259	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1260	THREAD_GSCOPE_FLAG_WAIT,
1261	THREAD_GSCOPE_FLAG_USED))
1262	continue;
1263
1264	do
1265	futex_wait_simple ((unsigned int *) gscope_flagp,
1266	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1267	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1268	}
1269
1270	/ Now the list with threads using user-allocated stacks. /
1271	list_for_each (runp, &__stack_user)
1272	{
1273	struct pthread t = list_entry (runp, struct* pthread, list);
1274	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1275	continue;
1276
1277	int *const gscope_flagp = &t->header.gscope_flag;
1278
1279	/ We have to wait until this thread is done with the global*
1280	scope. First tell the thread that we are waiting and
1281	possibly have to be woken. /*
1282	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1283	THREAD_GSCOPE_FLAG_WAIT,
1284	THREAD_GSCOPE_FLAG_USED))
1285	continue;
1286
1287	do
1288	futex_wait_simple ((unsigned int *) gscope_flagp,
1289	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1290	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1291	}
1292
1293	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1294	}
1295

Browse the source code of glibc/nptl/allocatestack.c