kern_exec.c source code [codebrowser/bsd/kern/kern_exec.c]

1	/*
2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/ Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved /
29	/*
30	* Mach Operating System
31	* Copyright (c) 1987 Carnegie-Mellon University
32	* All rights reserved. The CMU software License Agreement specifies
33	* the terms and conditions for use and redistribution.
34	*/
35
36	/-*
37	* Copyright (c) 1982, 1986, 1991, 1993
38	* The Regents of the University of California. All rights reserved.
39	* (c) UNIX System Laboratories, Inc.
40	* All or some portions of this file are derived from material licensed
41	* to the University of California by American Telephone and Telegraph
42	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
43	* the permission of UNIX System Laboratories, Inc.
44	*
45	* Redistribution and use in source and binary forms, with or without
46	* modification, are permitted provided that the following conditions
47	* are met:
48	* 1. Redistributions of source code must retain the above copyright
49	* notice, this list of conditions and the following disclaimer.
50	* 2. Redistributions in binary form must reproduce the above copyright
51	* notice, this list of conditions and the following disclaimer in the
52	* documentation and/or other materials provided with the distribution.
53	* 3. All advertising materials mentioning features or use of this software
54	* must display the following acknowledgement:
55	* This product includes software developed by the University of
56	* California, Berkeley and its contributors.
57	* 4. Neither the name of the University nor the names of its contributors
58	* may be used to endorse or promote products derived from this software
59	* without specific prior written permission.
60	*
61	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71	* SUCH DAMAGE.
72	*
73	* from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
74	*/
75	/*
76	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
77	* support for mandatory and extensible security protections. This notice
78	* is included in support of clause 2.2 (b) of the Apple Public License,
79	* Version 2.0.
80	*/
81	#include <machine/reg.h>
82	#include <machine/cpu_capabilities.h>
83
84	#include <sys/param.h>
85	#include <sys/systm.h>
86	#include <sys/filedesc.h>
87	#include <sys/kernel.h>
88	#include <sys/proc_internal.h>
89	#include <sys/kauth.h>
90	#include <sys/user.h>
91	#include <sys/socketvar.h>
92	#include <sys/malloc.h>
93	#include <sys/namei.h>
94	#include <sys/mount_internal.h>
95	#include <sys/vnode_internal.h>
96	#include <sys/file_internal.h>
97	#include <sys/stat.h>
98	#include <sys/uio_internal.h>
99	#include <sys/acct.h>
100	#include <sys/exec.h>
101	#include <sys/kdebug.h>
102	#include <sys/signal.h>
103	#include <sys/aio_kern.h>
104	#include <sys/sysproto.h>
105	#include <sys/persona.h>
106	#include <sys/reason.h>
107	#if SYSV_SHM
108	#include <sys/shm_internal.h> /* shmexec() */
109	#endif
110	#include <sys/ubc_internal.h> /* ubc_map() */
111	#include <sys/spawn.h>
112	#include <sys/spawn_internal.h>
113	#include <sys/process_policy.h>
114	#include <sys/codesign.h>
115	#include <sys/random.h>
116	#include <crypto/sha1.h>
117
118	#include <libkern/libkern.h>
119
120	#include <security/audit/audit.h>
121
122	#include <ipc/ipc_types.h>
123
124	#include <mach/mach_types.h>
125	#include <mach/port.h>
126	#include <mach/task.h>
127	#include <mach/task_access.h>
128	#include <mach/thread_act.h>
129	#include <mach/vm_map.h>
130	#include <mach/mach_vm.h>
131	#include <mach/vm_param.h>
132
133	#include <kern/sched_prim.h> /* thread_wakeup() */
134	#include <kern/affinity.h>
135	#include <kern/assert.h>
136	#include <kern/task.h>
137	#include <kern/coalition.h>
138	#include <kern/policy_internal.h>
139	#include <kern/kalloc.h>
140
141	#include <os/log.h>
142
143	#if CONFIG_MACF
144	#include <security/mac_framework.h>
145	#include <security/mac_mach_internal.h>
146	#endif
147
148	#include <vm/vm_map.h>
149	#include <vm/vm_kern.h>
150	#include <vm/vm_protos.h>
151	#include <vm/vm_kern.h>
152	#include <vm/vm_fault.h>
153	#include <vm/vm_pageout.h>
154
155	#include <kdp/kdp_dyld.h>
156
157	#include <machine/pal_routines.h>
158
159	#include <pexpert/pexpert.h>
160
161	#if CONFIG_MEMORYSTATUS
162	#include <sys/kern_memorystatus.h>
163	#endif
164
165	extern boolean_t vm_darkwake_mode;
166
167	#if CONFIG_DTRACE
168	/ Do not include dtrace.h, it redefines kmem_[alloc/free] /
169	extern void dtrace_proc_exec(proc_t);
170	extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t);
171
172	/*
173	* Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c,
174	* we will store its value before actually calling it.
175	*/
176	static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL;
177
178	#include <sys/dtrace_ptss.h>
179	#endif
180
181	/ support for child creation in exec after vfork /
182	thread_t fork_create_child(task_t parent_task,
183	coalition_t *parent_coalition,
184	proc_t child_proc,
185	int inherit_memory,
186	int is_64bit_addr,
187	int is_64bit_data,
188	int in_exec);
189	void vfork_exit(proc_t p, int rv);
190	extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
191	extern void task_set_did_exec_flag(task_t task);
192	extern void task_clear_exec_copy_flag(task_t task);
193	proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread);
194	boolean_t task_is_active(task_t);
195	boolean_t thread_is_active(thread_t thread);
196	void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread);
197	void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task);
198	extern void ipc_importance_release(void *elem);
199
200	/*
201	* Mach things for which prototypes are unavailable from Mach headers
202	*/
203	void ipc_task_reset(
204	task_t task);
205	void ipc_thread_reset(
206	thread_t thread);
207	kern_return_t ipc_object_copyin(
208	ipc_space_t space,
209	mach_port_name_t name,
210	mach_msg_type_name_t msgt_name,
211	ipc_object_t *objectp);
212	void ipc_port_release_send(ipc_port_t);
213
214	#if DEVELOPMENT \|\| DEBUG
215	void task_importance_update_owner_info(task_t);
216	#endif
217
218	extern struct savearea *get_user_regs(thread_t);
219
220	__attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid);
221
222	#include <kern/thread.h>
223	#include <kern/task.h>
224	#include <kern/ast.h>
225	#include <kern/mach_loader.h>
226	#include <kern/mach_fat.h>
227	#include <mach-o/fat.h>
228	#include <mach-o/loader.h>
229	#include <machine/vmparam.h>
230	#include <sys/imgact.h>
231
232	#include <sys/sdt.h>
233
234
235	/*
236	* EAI_ITERLIMIT The maximum number of times to iterate an image
237	* activator in exec_activate_image() before treating
238	* it as malformed/corrupt.
239	*/
240	#define EAI_ITERLIMIT 3
241
242	/*
243	* For #! interpreter parsing
244	*/
245	#define IS_WHITESPACE(ch) ((ch == ' ') \|\| (ch == '\t'))
246	#define IS_EOL(ch) ((ch == '#') \|\| (ch == '\n'))
247
248	extern vm_map_t bsd_pageable_map;
249	extern const struct fileops vnops;
250
251	#define USER_ADDR_ALIGN(addr, val) \
252	( ( (user_addr_t)(addr) + (val) - 1) \
253	& ~((val) - 1) )
254
255	/ Platform Code Exec Logging /
256	static int platform_exec_logging = `0`;
257
258	SYSCTL_DECL(_security_mac);
259
260	SYSCTL_INT(_security_mac, OID_AUTO, platform_exec_logging, CTLFLAG_RW, &platform_exec_logging, `0`,
261	"log cdhashes for all platform binary executions");
262
263	static os_log_t peLog = OS_LOG_DEFAULT;
264
265	struct image_params; / Forward /
266	static int exec_activate_image(struct image_params *imgp);
267	static int exec_copyout_strings(struct image_params imgp, user_addr_t stackp);
268	static int load_return_to_errno(load_return_t lrtn);
269	static int execargs_alloc(struct image_params *imgp);
270	static int execargs_free(struct image_params *imgp);
271	static int exec_check_permissions(struct image_params *imgp);
272	static int exec_extract_strings(struct image_params *imgp);
273	static int exec_add_apple_strings(struct image_params imgp, const* load_result_t *load_result);
274	static int exec_handle_sugid(struct image_params *imgp);
275	static int sugid_scripts = `0`;
276	SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW \| CTLFLAG_LOCKED, &sugid_scripts, `0`, "");
277	static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
278	static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
279	static void exec_resettextvp(proc_t, struct image_params *);
280	static int check_for_signature(proc_t, struct image_params *);
281	static void exec_prefault_data(proc_t, struct image_params , load_result_t );
282	static errno_t exec_handle_port_actions(struct image_params imgp, boolean_t portwatch_present, ipc_port_t * portwatch_ports);
283	static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
284	ipc_port_t * portwatch_ports, int portwatch_count);
285
286	/*
287	* exec_add_user_string
288	*
289	* Add the requested string to the string space area.
290	*
291	* Parameters; struct image_params * image parameter block
292	* user_addr_t string to add to strings area
293	* int segment from which string comes
294	* boolean_t TRUE if string contributes to NCARGS
295	*
296	* Returns: 0 Success
297	* !0 Failure errno from copyinstr()
298	*
299	* Implicit returns:
300	* (imgp->ip_strendp) updated location of next add, if any
301	* (imgp->ip_strspace) updated byte count of space remaining
302	* (imgp->ip_argspace) updated byte count of space in NCARGS
303	*/
304	static int
305	exec_add_user_string(struct image_params imgp, user_addr_t str, int* seg, boolean_t is_ncargs)
306	{
307	int error = `0`;
308
309	do {
310	size_t len = `0`;
311	int space;
312
313	if (is_ncargs)
314	space = imgp->ip_argspace; / by definition smaller than ip_strspace /
315	else
316	space = imgp->ip_strspace;
317
318	if (space <= `0`) {
319	error = E2BIG;
320	break;
321	}
322
323	if (!UIO_SEG_IS_USER_SPACE(seg)) {
324	char kstr = CAST_DOWN(char* ,str); /* SAFE /
325	error = copystr(kstr, imgp->ip_strendp, space, &len);
326	} else {
327	error = copyinstr(str, imgp->ip_strendp, space, &len);
328	}
329
330	imgp->ip_strendp += len;
331	imgp->ip_strspace -= len;
332	if (is_ncargs)
333	imgp->ip_argspace -= len;
334
335	} while (error == ENAMETOOLONG);
336
337	return error;
338	}
339
340	/*
341	* dyld is now passed the executable path as a getenv-like variable
342	* in the same fashion as the stack_guard and malloc_entropy keys.
343	*/
344	#define EXECUTABLE_KEY "executable_path="
345
346	/*
347	* exec_save_path
348	*
349	* To support new app package launching for Mac OS X, the dyld needs the
350	* first argument to execve() stored on the user stack.
351	*
352	* Save the executable path name at the bottom of the strings area and set
353	* the argument vector pointer to the location following that to indicate
354	* the start of the argument and environment tuples, setting the remaining
355	* string space count to the size of the string area minus the path length.
356	*
357	* Parameters; struct image_params * image parameter block
358	* char * path used to invoke program
359	* int segment from which path comes
360	*
361	* Returns: int 0 Success
362	* EFAULT Bad address
363	* copy[in]str:EFAULT Bad address
364	* copy[in]str:ENAMETOOLONG Filename too long
365	*
366	* Implicit returns:
367	* (imgp->ip_strings) saved path
368	* (imgp->ip_strspace) space remaining in ip_strings
369	* (imgp->ip_strendp) start of remaining copy area
370	* (imgp->ip_argspace) space remaining of NCARGS
371	* (imgp->ip_applec) Initial applev[0]
372	*
373	* Note: We have to do this before the initial namei() since in the
374	* path contains symbolic links, namei() will overwrite the
375	* original path buffer contents. If the last symbolic link
376	* resolved was a relative pathname, we would lose the original
377	* "path", which could be an absolute pathname. This might be
378	* unacceptable for dyld.
379	*/
380	static int
381	exec_save_path(struct image_params imgp, user_addr_t path, int* seg, const char **excpath)
382	{
383	int error;
384	size_t len;
385	char *kpath;
386
387	// imgp->ip_strings can come out of a cache, so we need to obliterate the
388	// old path.
389	memset(imgp->ip_strings, `'\0'`, strlen(EXECUTABLE_KEY) + MAXPATHLEN);
390
391	len = MIN(MAXPATHLEN, imgp->ip_strspace);
392
393	switch(seg) {
394	case UIO_USERSPACE32:
395	case UIO_USERSPACE64: / Same for copyin()... /
396	error = copyinstr(path, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
397	break;
398	case UIO_SYSSPACE:
399	kpath = CAST_DOWN(char ,path); /* SAFE /
400	error = copystr(kpath, imgp->ip_strings + strlen(EXECUTABLE_KEY), len, &len);
401	break;
402	default:
403	error = EFAULT;
404	break;
405	}
406
407	if (!error) {
408	bcopy(EXECUTABLE_KEY, imgp->ip_strings, strlen(EXECUTABLE_KEY));
409	len += strlen(EXECUTABLE_KEY);
410
411	imgp->ip_strendp += len;
412	imgp->ip_strspace -= len;
413
414	if (excpath) {
415	*excpath = imgp->ip_strings + strlen(EXECUTABLE_KEY);
416	}
417	}
418
419	return(error);
420	}
421
422	/*
423	* exec_reset_save_path
424	*
425	* If we detect a shell script, we need to reset the string area
426	* state so that the interpreter can be saved onto the stack.
427
428	* Parameters; struct image_params * image parameter block
429	*
430	* Returns: int 0 Success
431	*
432	* Implicit returns:
433	* (imgp->ip_strings) saved path
434	* (imgp->ip_strspace) space remaining in ip_strings
435	* (imgp->ip_strendp) start of remaining copy area
436	* (imgp->ip_argspace) space remaining of NCARGS
437	*
438	*/
439	static int
440	exec_reset_save_path(struct image_params *imgp)
441	{
442	imgp->ip_strendp = imgp->ip_strings;
443	imgp->ip_argspace = NCARGS;
444	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
445
446	return (`0`);
447	}
448
449	/*
450	* exec_shell_imgact
451	*
452	* Image activator for interpreter scripts. If the image begins with
453	* the characters "#!", then it is an interpreter script. Verify the
454	* length of the script line indicating the interpreter is not in
455	* excess of the maximum allowed size. If this is the case, then
456	* break out the arguments, if any, which are separated by white
457	* space, and copy them into the argument save area as if they were
458	* provided on the command line before all other arguments. The line
459	* ends when we encounter a comment character ('#') or newline.
460	*
461	* Parameters; struct image_params * image parameter block
462	*
463	* Returns: -1 not an interpreter (keep looking)
464	* -3 Success: interpreter: relookup
465	* >0 Failure: interpreter: error number
466	*
467	* A return value other than -1 indicates subsequent image activators should
468	* not be given the opportunity to attempt to activate the image.
469	*/
470	static int
471	exec_shell_imgact(struct image_params *imgp)
472	{
473	char *vdata = imgp->ip_vdata;
474	char *ihp;
475	char line_startp, line_endp;
476	char *interp;
477
478	/*
479	* Make sure it's a shell script. If we've already redirected
480	* from an interpreted file once, don't do it again.
481	*/
482	if (vdata[`0`] != `'#'` \|\|
483	vdata[`1`] != `'!'` \|\|
484	(imgp->ip_flags & IMGPF_INTERPRET) != `0`) {
485	return (-`1`);
486	}
487
488	if (imgp->ip_origcputype != `0`) {
489	/ Fat header previously matched, don't allow shell script inside /
490	return (-`1`);
491	}
492
493	imgp->ip_flags \|= IMGPF_INTERPRET;
494	imgp->ip_interp_sugid_fd = -`1`;
495	imgp->ip_interp_buffer[`0`] = `'\0'`;
496
497	/ Check to see if SUGID scripts are permitted. If they aren't then*
498	* clear the SUGID bits.
499	* imgp->ip_vattr is known to be valid.
500	*/
501	if (sugid_scripts == `0`) {
502	imgp->ip_origvattr->va_mode &= ~(VSUID \| VSGID);
503	}
504
505	/ Try to find the first non-whitespace character /
506	for( ihp = &vdata[`2`]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
507	if (IS_EOL(*ihp)) {
508	/ Did not find interpreter, "#!\n" /
509	return (ENOEXEC);
510	} else if (IS_WHITESPACE(*ihp)) {
511	/ Whitespace, like "#! /bin/sh\n", keep going. /
512	} else {
513	/ Found start of interpreter /
514	break;
515	}
516	}
517
518	if (ihp == &vdata[IMG_SHSIZE]) {
519	/ All whitespace, like "#! " /
520	return (ENOEXEC);
521	}
522
523	line_startp = ihp;
524
525	/ Try to find the end of the interpreter+args string /
526	for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
527	if (IS_EOL(*ihp)) {
528	/ Got it /
529	break;
530	} else {
531	/ Still part of interpreter or args /
532	}
533	}
534
535	if (ihp == &vdata[IMG_SHSIZE]) {
536	/ A long line, like "#! blah blah blah" without end /
537	return (ENOEXEC);
538	}
539
540	/ Backtrack until we find the last non-whitespace /
541	while (IS_EOL(ihp) \|\| IS_WHITESPACE(ihp)) {
542	ihp--;
543	}
544
545	/ The character after the last non-whitespace is our logical end of line /
546	line_endp = ihp + `1`;
547
548	/*
549	* Now we have pointers to the usable part of:
550	*
551	* "#! /usr/bin/int first second third \n"
552	* ^ line_startp ^ line_endp
553	*/
554
555	/ copy the interpreter name /
556	interp = imgp->ip_interp_buffer;
557	for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
558	interp++ = ihp;
559	*interp = `'\0'`;
560
561	exec_reset_save_path(imgp);
562	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
563	UIO_SYSSPACE, NULL);
564
565	/ Copy the entire interpreter + args for later processing into argv[] /
566	interp = imgp->ip_interp_buffer;
567	for ( ihp = line_startp; (ihp < line_endp); ihp++)
568	interp++ = ihp;
569	*interp = `'\0'`;
570
571	#if !SECURE_KERNEL
572	/*
573	* If we have an SUID or SGID script, create a file descriptor
574	* from the vnode and pass /dev/fd/%d instead of the actual
575	* path name so that the script does not get opened twice
576	*/
577	if (imgp->ip_origvattr->va_mode & (VSUID \| VSGID)) {
578	proc_t p;
579	struct fileproc *fp;
580	int fd;
581	int error;
582
583	p = vfs_context_proc(imgp->ip_vfs_context);
584	error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
585	if (error)
586	return(error);
587
588	fp->f_fglob->fg_flag = FREAD;
589	fp->f_fglob->fg_ops = &vnops;
590	fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
591
592	proc_fdlock(p);
593	procfdtbl_releasefd(p, fd, NULL);
594	fp_drop(p, fd, fp, `1`);
595	proc_fdunlock(p);
596	vnode_ref(imgp->ip_vp);
597
598	imgp->ip_interp_sugid_fd = fd;
599	}
600	#endif
601
602	return (-`3`);
603	}
604
605
606
607	/*
608	* exec_fat_imgact
609	*
610	* Image activator for fat 1.0 binaries. If the binary is fat, then we
611	* need to select an image from it internally, and make that the image
612	* we are going to attempt to execute. At present, this consists of
613	* reloading the first page for the image with a first page from the
614	* offset location indicated by the fat header.
615	*
616	* Parameters; struct image_params * image parameter block
617	*
618	* Returns: -1 not a fat binary (keep looking)
619	* -2 Success: encapsulated binary: reread
620	* >0 Failure: error number
621	*
622	* Important: This image activator is byte order neutral.
623	*
624	* Note: A return value other than -1 indicates subsequent image
625	* activators should not be given the opportunity to attempt
626	* to activate the image.
627	*
628	* If we find an encapsulated binary, we make no assertions
629	* about its validity; instead, we leave that up to a rescan
630	* for an activator to claim it, and, if it is claimed by one,
631	* that activator is responsible for determining validity.
632	*/
633	static int
634	exec_fat_imgact(struct image_params *imgp)
635	{
636	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
637	kauth_cred_t cred = kauth_cred_proc_ref(p);
638	struct fat_header fat_header = (struct* fat_header *)imgp->ip_vdata;
639	struct _posix_spawnattr *psa = NULL;
640	struct fat_arch fat_arch;
641	int resid, error;
642	load_return_t lret;
643
644	if (imgp->ip_origcputype != `0`) {
645	/ Fat header previously matched, don't allow another fat file inside /
646	error = -`1`; / not claimed /
647	goto bad;
648	}
649
650	/ Make sure it's a fat binary /
651	if (OSSwapBigToHostInt32(fat_header->magic) != FAT_MAGIC) {
652	error = -`1`; / not claimed /
653	goto bad;
654	}
655
656	/ imgp->ip_vdata has PAGE_SIZE, zerofilled if the file is smaller /
657	lret = fatfile_validate_fatarches((vm_offset_t)fat_header, PAGE_SIZE);
658	if (lret != LOAD_SUCCESS) {
659	error = load_return_to_errno(lret);
660	goto bad;
661	}
662
663	/ If posix_spawn binprefs exist, respect those prefs. /
664	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
665	if (psa != NULL && psa->psa_binprefs[`0`] != `0`) {
666	uint32_t pr = `0`;
667
668	/ Check each preference listed against all arches in header /
669	for (pr = `0`; pr < NBINPREFS; pr++) {
670	cpu_type_t pref = psa->psa_binprefs[pr];
671	if (pref == `0`) {
672	/ No suitable arch in the pref list /
673	error = EBADARCH;
674	goto bad;
675	}
676
677	if (pref == CPU_TYPE_ANY) {
678	/ Fall through to regular grading /
679	goto regular_grading;
680	}
681
682	lret = fatfile_getbestarch_for_cputype(pref,
683	(vm_offset_t)fat_header,
684	PAGE_SIZE,
685	&fat_arch);
686	if (lret == LOAD_SUCCESS) {
687	goto use_arch;
688	}
689	}
690
691	/ Requested binary preference was not honored /
692	error = EBADEXEC;
693	goto bad;
694	}
695
696	regular_grading:
697	/ Look up our preferred architecture in the fat file. /
698	lret = fatfile_getbestarch((vm_offset_t)fat_header,
699	PAGE_SIZE,
700	&fat_arch);
701	if (lret != LOAD_SUCCESS) {
702	error = load_return_to_errno(lret);
703	goto bad;
704	}
705
706	use_arch:
707	/ Read the Mach-O header out of fat_arch /
708	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
709	PAGE_SIZE, fat_arch.offset,
710	UIO_SYSSPACE, (IO_UNIT\|IO_NODELOCKED),
711	cred, &resid, p);
712	if (error) {
713	goto bad;
714	}
715
716	if (resid) {
717	memset(imgp->ip_vdata + (PAGE_SIZE - resid), `0x0`, resid);
718	}
719
720	/ Success. Indicate we have identified an encapsulated binary /
721	error = -`2`;
722	imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
723	imgp->ip_arch_size = (user_size_t)fat_arch.size;
724	imgp->ip_origcputype = fat_arch.cputype;
725	imgp->ip_origcpusubtype = fat_arch.cpusubtype;
726
727	bad:
728	kauth_cred_unref(&cred);
729	return (error);
730	}
731
732	static int
733	activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result)
734	{
735	int ret;
736
737	task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, `0`);
738	task_set_64bit(task, result->is_64bit_addr, result->is_64bit_data);
739	if (result->is_64bit_addr) {
740	OSBitOrAtomic(P_LP64, &p->p_flag);
741	} else {
742	OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
743	}
744
745	ret = thread_state_initialize(thread);
746	if (ret != KERN_SUCCESS) {
747	return ret;
748	}
749
750	if (result->threadstate) {
751	uint32_t *ts = result->threadstate;
752	uint32_t total_size = result->threadstate_sz;
753
754	while (total_size > `0`) {
755	uint32_t flavor = *ts++;
756	uint32_t size = *ts++;
757
758	ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size);
759	if (ret) {
760	return ret;
761	}
762	ts += size;
763	total_size -= (size + `2`) * sizeof(uint32_t);
764	}
765	}
766
767	thread_setentrypoint(thread, result->entry_point);
768
769	return KERN_SUCCESS;
770	}
771
772
773	/*
774	* Set p->p_comm and p->p_name to the name passed to exec
775	*/
776	static void
777	set_proc_name(struct image_params *imgp, proc_t p)
778	{
779	int p_name_len = sizeof(p->p_name) - `1`;
780
781	if (imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len) {
782	imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
783	}
784
785	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
786	(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
787	p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = `'\0'`;
788
789	if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) {
790	imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
791	}
792
793	bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
794	(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
795	p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = `'\0'`;
796	}
797
798	static uint64_t get_va_fsid(struct vnode_attr *vap)
799	{
800	if (VATTR_IS_SUPPORTED(vap, va_fsid64)) {
801	return (uint64_t )&vap->va_fsid64;
802	} else {
803	return vap->va_fsid;
804	}
805	}
806
807	/*
808	* exec_mach_imgact
809	*
810	* Image activator for mach-o 1.0 binaries.
811	*
812	* Parameters; struct image_params * image parameter block
813	*
814	* Returns: -1 not a fat binary (keep looking)
815	* -2 Success: encapsulated binary: reread
816	* >0 Failure: error number
817	* EBADARCH Mach-o binary, but with an unrecognized
818	* architecture
819	* ENOMEM No memory for child process after -
820	* can only happen after vfork()
821	*
822	* Important: This image activator is NOT byte order neutral.
823	*
824	* Note: A return value other than -1 indicates subsequent image
825	* activators should not be given the opportunity to attempt
826	* to activate the image.
827	*
828	* TODO: More gracefully handle failures after vfork
829	*/
830	static int
831	exec_mach_imgact(struct image_params *imgp)
832	{
833	struct mach_header mach_header = (struct* mach_header *)imgp->ip_vdata;
834	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
835	int error = `0`;
836	task_t task;
837	task_t new_task = NULL; / protected by vfexec /
838	thread_t thread;
839	struct uthread *uthread;
840	vm_map_t old_map = VM_MAP_NULL;
841	vm_map_t map = VM_MAP_NULL;
842	load_return_t lret;
843	load_result_t load_result = {};
844	struct _posix_spawnattr *psa = NULL;
845	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
846	int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
847	int exec = (imgp->ip_flags & IMGPF_EXEC);
848	os_reason_t exec_failure_reason = OS_REASON_NULL;
849
850	/*
851	* make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
852	* is a reserved field on the end, so for the most part, we can
853	* treat them as if they were identical. Reverse-endian Mach-O
854	* binaries are recognized but not compatible.
855	*/
856	if ((mach_header->magic == MH_CIGAM) \|\|
857	(mach_header->magic == MH_CIGAM_64)) {
858	error = EBADARCH;
859	goto bad;
860	}
861
862	if ((mach_header->magic != MH_MAGIC) &&
863	(mach_header->magic != MH_MAGIC_64)) {
864	error = -`1`;
865	goto bad;
866	}
867
868	if (mach_header->filetype != MH_EXECUTE) {
869	error = -`1`;
870	goto bad;
871	}
872
873	if (imgp->ip_origcputype != `0`) {
874	/ Fat header previously had an idea about this thin file /
875	if (imgp->ip_origcputype != mach_header->cputype \|\|
876	imgp->ip_origcpusubtype != mach_header->cpusubtype) {
877	error = EBADARCH;
878	goto bad;
879	}
880	} else {
881	imgp->ip_origcputype = mach_header->cputype;
882	imgp->ip_origcpusubtype = mach_header->cpusubtype;
883	}
884
885	task = current_task();
886	thread = current_thread();
887	uthread = get_bsdthread_info(thread);
888
889	if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) {
890	imgp->ip_flags \|= IMGPF_IS_64BIT_ADDR \| IMGPF_IS_64BIT_DATA;
891	}
892
893	/ If posix_spawn binprefs exist, respect those prefs. /
894	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
895	if (psa != NULL && psa->psa_binprefs[`0`] != `0`) {
896	int pr = `0`;
897	for (pr = `0`; pr < NBINPREFS; pr++) {
898	cpu_type_t pref = psa->psa_binprefs[pr];
899	if (pref == `0`) {
900	/ No suitable arch in the pref list /
901	error = EBADARCH;
902	goto bad;
903	}
904
905	if (pref == CPU_TYPE_ANY) {
906	/ Jump to regular grading /
907	goto grade;
908	}
909
910	if (pref == imgp->ip_origcputype) {
911	/ We have a match! /
912	goto grade;
913	}
914	}
915	error = EBADARCH;
916	goto bad;
917	}
918	grade:
919	if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
920	error = EBADARCH;
921	goto bad;
922	}
923
924
925
926	/ Copy in arguments/environment from the old process /
927	error = exec_extract_strings(imgp);
928	if (error)
929	goto bad;
930
931	AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
932	imgp->ip_endargv - imgp->ip_startargv);
933	AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
934	imgp->ip_endenvv - imgp->ip_endargv);
935
936	/*
937	* We are being called to activate an image subsequent to a vfork()
938	* operation; in this case, we know that our task, thread, and
939	* uthread are actually those of our parent, and our proc, which we
940	* obtained indirectly from the image_params vfs_context_t, is the
941	* new child process.
942	*/
943	if (vfexec) {
944	imgp->ip_new_thread = fork_create_child(task,
945	NULL,
946	p,
947	FALSE,
948	(imgp->ip_flags & IMGPF_IS_64BIT_ADDR),
949	(imgp->ip_flags & IMGPF_IS_64BIT_DATA),
950	FALSE);
951	/ task and thread ref returned, will be released in __mac_execve /
952	if (imgp->ip_new_thread == NULL) {
953	error = ENOMEM;
954	goto bad;
955	}
956	}
957
958
959	/ reset local idea of thread, uthread, task /
960	thread = imgp->ip_new_thread;
961	uthread = get_bsdthread_info(thread);
962	task = new_task = get_threadtask(thread);
963
964	/*
965	* Load the Mach-O file.
966	*
967	* NOTE: An error after this point indicates we have potentially
968	* destroyed or overwritten some process state while attempting an
969	* execve() following a vfork(), which is an unrecoverable condition.
970	* We send the new process an immediate SIGKILL to avoid it executing
971	* any instructions in the mutated address space. For true spawns,
972	* this is not the case, and "too late" is still not too late to
973	* return an error code to the parent process.
974	*/
975
976	/*
977	* Actually load the image file we previously decided to load.
978	*/
979	lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
980	if (lret != LOAD_SUCCESS) {
981	error = load_return_to_errno(lret);
982
983	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
984	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO, `0`, `0`);
985	if (lret == LOAD_BADMACHO_UPX) {
986	/ set anything that might be useful in the crash report /
987	set_proc_name(imgp, p);
988
989	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_UPX);
990	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
991	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_CONSISTENT_FAILURE;
992	} else if (lret == LOAD_BADARCH_X86) {
993	/ set anything that might be useful in the crash report /
994	set_proc_name(imgp, p);
995
996	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_NO32EXEC);
997	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
998	exec_failure_reason->osr_flags \|= OS_REASON_FLAG_CONSISTENT_FAILURE;
999	} else {
1000	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_BAD_MACHO);
1001	}
1002
1003	goto badtoolate;
1004	}
1005
1006	proc_lock(p);
1007	p->p_cputype = imgp->ip_origcputype;
1008	p->p_cpusubtype = imgp->ip_origcpusubtype;
1009	proc_unlock(p);
1010
1011	vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1012
1013	/*
1014	* Set code-signing flags if this binary is signed, or if parent has
1015	* requested them on exec.
1016	*/
1017	if (load_result.csflags & CS_VALID) {
1018	imgp->ip_csflags \|= load_result.csflags &
1019	(CS_VALID\|CS_SIGNED\|CS_DEV_CODE\|
1020	CS_HARD\|CS_KILL\|CS_RESTRICT\|CS_ENFORCEMENT\|CS_REQUIRE_LV\|
1021	CS_FORCED_LV\|CS_ENTITLEMENTS_VALIDATED\|CS_DYLD_PLATFORM\|CS_RUNTIME\|
1022	CS_ENTITLEMENT_FLAGS\|
1023	CS_EXEC_SET_HARD\|CS_EXEC_SET_KILL\|CS_EXEC_SET_ENFORCEMENT);
1024	} else {
1025	imgp->ip_csflags &= ~CS_VALID;
1026	}
1027
1028	if (p->p_csflags & CS_EXEC_SET_HARD)
1029	imgp->ip_csflags \|= CS_HARD;
1030	if (p->p_csflags & CS_EXEC_SET_KILL)
1031	imgp->ip_csflags \|= CS_KILL;
1032	if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
1033	imgp->ip_csflags \|= CS_ENFORCEMENT;
1034	if (p->p_csflags & CS_EXEC_INHERIT_SIP) {
1035	if (p->p_csflags & CS_INSTALLER)
1036	imgp->ip_csflags \|= CS_INSTALLER;
1037	if (p->p_csflags & CS_DATAVAULT_CONTROLLER)
1038	imgp->ip_csflags \|= CS_DATAVAULT_CONTROLLER;
1039	if (p->p_csflags & CS_NVRAM_UNRESTRICTED)
1040	imgp->ip_csflags \|= CS_NVRAM_UNRESTRICTED;
1041	}
1042
1043	/*
1044	* Set up the system reserved areas in the new address space.
1045	*/
1046	int cpu_subtype;
1047	cpu_subtype = `0`; / all cpu_subtypes use the same shared region /
1048	vm_map_exec(map, task, load_result.is_64bit_addr, (void *)p->p_fd->fd_rdir, cpu_type(), cpu_subtype);
1049
1050	/*
1051	* Close file descriptors which specify close-on-exec.
1052	*/
1053	fdexec(p, psa != NULL ? psa->psa_flags : `0`, exec);
1054
1055	/*
1056	* deal with set[ug]id.
1057	*/
1058	error = exec_handle_sugid(imgp);
1059	if (error) {
1060	vm_map_deallocate(map);
1061
1062	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1063	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, `0`, `0`);
1064	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE);
1065	goto badtoolate;
1066	}
1067
1068	/*
1069	* Commit to new map.
1070	*
1071	* Swap the new map for the old for target task, which consumes
1072	* our new map reference but each leaves us responsible for the
1073	* old_map reference. That lets us get off the pmap associated
1074	* with it, and then we can release it.
1075	*
1076	* The map needs to be set on the target task which is different
1077	* than current task, thus swap_task_map is used instead of
1078	* vm_map_switch.
1079	*/
1080	old_map = swap_task_map(task, thread, map);
1081	vm_map_deallocate(old_map);
1082	old_map = NULL;
1083
1084	lret = activate_exec_state(task, p, thread, &load_result);
1085	if (lret != KERN_SUCCESS) {
1086
1087	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1088	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE, `0`, `0`);
1089	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_ACTV_THREADSTATE);
1090	goto badtoolate;
1091	}
1092
1093	/*
1094	* deal with voucher on exec-calling thread.
1095	*/
1096	if (imgp->ip_new_thread == NULL)
1097	thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
1098
1099	/ Make sure we won't interrupt ourself signalling a partial process /
1100	if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
1101	psignal(p, SIGTRAP);
1102
1103	if (load_result.unixproc &&
1104	create_unix_stack(get_task_map(task),
1105	&load_result,
1106	p) != KERN_SUCCESS) {
1107	error = load_return_to_errno(LOAD_NOSPACE);
1108
1109	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1110	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC, `0`, `0`);
1111	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_STACK_ALLOC);
1112	goto badtoolate;
1113	}
1114
1115	error = exec_add_apple_strings(imgp, &load_result);
1116	if (error) {
1117
1118	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1119	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT, `0`, `0`);
1120	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_APPLE_STRING_INIT);
1121	goto badtoolate;
1122	}
1123
1124	/ Switch to target task's map to copy out strings /
1125	old_map = vm_map_switch(get_task_map(task));
1126
1127	if (load_result.unixproc) {
1128	user_addr_t ap;
1129
1130	/*
1131	* Copy the strings area out into the new process address
1132	* space.
1133	*/
1134	ap = p->user_stack;
1135	error = exec_copyout_strings(imgp, &ap);
1136	if (error) {
1137	vm_map_switch(old_map);
1138
1139	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1140	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, `0`, `0`);
1141	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS);
1142	goto badtoolate;
1143	}
1144	/ Set the stack /
1145	thread_setuserstack(thread, ap);
1146	}
1147
1148	if (load_result.dynlinker) {
1149	uint64_t ap;
1150	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? `8` : `4`;
1151
1152	/ Adjust the stack /
1153	ap = thread_adjuserstack(thread, -new_ptr_size);
1154	error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
1155
1156	if (error) {
1157	vm_map_switch(old_map);
1158
1159	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
1160	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, `0`, `0`);
1161	exec_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER);
1162	goto badtoolate;
1163	}
1164	task_set_dyld_info(task, load_result.all_image_info_addr,
1165	load_result.all_image_info_size);
1166	}
1167
1168	/ Avoid immediate VM faults back into kernel /
1169	exec_prefault_data(p, imgp, &load_result);
1170
1171	vm_map_switch(old_map);
1172
1173	/ Stop profiling /
1174	stopprofclock(p);
1175
1176	/*
1177	* Reset signal state.
1178	*/
1179	execsigs(p, thread);
1180
1181	/*
1182	* need to cancel async IO requests that can be cancelled and wait for those
1183	* already active. MAY BLOCK!
1184	*/
1185	_aio_exec( p );
1186
1187	#if SYSV_SHM
1188	/ FIXME: Till vmspace inherit is fixed: /
1189	if (!vfexec && p->vm_shm)
1190	shmexec(p);
1191	#endif
1192	#if SYSV_SEM
1193	/ Clean up the semaphores /
1194	semexit(p);
1195	#endif
1196
1197	/*
1198	* Remember file name for accounting.
1199	*/
1200	p->p_acflag &= ~AFORK;
1201
1202	set_proc_name(imgp, p);
1203
1204	#if CONFIG_SECLUDED_MEMORY
1205	if (secluded_for_apps &&
1206	load_result.platform_binary) {
1207	if (strncmp(p->p_name,
1208	"Camera",
1209	sizeof (p->p_name)) == `0`) {
1210	task_set_could_use_secluded_mem(task, TRUE);
1211	} else {
1212	task_set_could_use_secluded_mem(task, FALSE);
1213	}
1214	if (strncmp(p->p_name,
1215	"mediaserverd",
1216	sizeof (p->p_name)) == `0`) {
1217	task_set_could_also_use_secluded_mem(task, TRUE);
1218	}
1219	}
1220	#endif /* CONFIG_SECLUDED_MEMORY */
1221
1222	#if __arm64__
1223	if (load_result.legacy_footprint) {
1224	task_set_legacy_footprint(task, TRUE);
1225	}
1226	#endif /* __arm64__ */
1227
1228	pal_dbg_set_task_name(task);
1229
1230	/*
1231	* The load result will have already been munged by AMFI to include the
1232	* platform binary flag if boot-args dictated it (AMFI will mark anything
1233	* that doesn't go through the upcall path as a platform binary if its
1234	* enforcement is disabled).
1235	*/
1236	if (load_result.platform_binary) {
1237	if (cs_debug) {
1238	printf("setting platform binary on task: pid = %d\n", p->p_pid);
1239	}
1240
1241	/*
1242	* We must use 'task' here because the proc's task has not yet been
1243	* switched to the new one.
1244	*/
1245	task_set_platform_binary(task, TRUE);
1246	} else {
1247	if (cs_debug) {
1248	printf("clearing platform binary on task: pid = %d\n", p->p_pid);
1249	}
1250
1251	task_set_platform_binary(task, FALSE);
1252	}
1253
1254	#if DEVELOPMENT \|\| DEBUG
1255	/*
1256	* Update the pid an proc name for importance base if any
1257	*/
1258	task_importance_update_owner_info(task);
1259	#endif
1260
1261	memcpy(&p->p_uuid[`0`], &load_result.uuid[`0`], sizeof(p->p_uuid));
1262
1263	#if CONFIG_DTRACE
1264	dtrace_proc_exec(p);
1265	#endif
1266
1267	if (kdebug_enable) {
1268	long args[`4`] = {};
1269
1270	uintptr_t fsid = `0`, fileid = `0`;
1271	if (imgp->ip_vattr) {
1272	uint64_t fsid64 = get_va_fsid(imgp->ip_vattr);
1273	fsid = fsid64;
1274	fileid = imgp->ip_vattr->va_fileid;
1275	// check for (unexpected) overflow and trace zero in that case
1276	if (fsid != fsid64 \|\| fileid != imgp->ip_vattr->va_fileid) {
1277	fsid = fileid = `0`;
1278	}
1279	}
1280	KERNEL_DEBUG_CONSTANT_IST1(TRACE_DATA_EXEC, p->p_pid, fsid, fileid, `0`,
1281	(uintptr_t)thread_tid(thread));
1282
1283	/*
1284	* Collect the pathname for tracing
1285	*/
1286	kdbg_trace_string(p, &args[`0`], &args[`1`], &args[`2`], &args[`3`]);
1287	KERNEL_DEBUG_CONSTANT_IST1(TRACE_STRING_EXEC, args[`0`], args[`1`],
1288	args[`2`], args[`3`], (uintptr_t)thread_tid(thread));
1289	}
1290
1291	/*
1292	* If posix_spawned with the START_SUSPENDED flag, stop the
1293	* process before it runs.
1294	*/
1295	if (imgp->ip_px_sa != NULL) {
1296	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1297	if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1298	proc_lock(p);
1299	p->p_stat = SSTOP;
1300	proc_unlock(p);
1301	(void) task_suspend_internal(task);
1302	}
1303	}
1304
1305	/*
1306	* mark as execed, wakeup the process that vforked (if any) and tell
1307	* it that it now has its own resources back
1308	*/
1309	OSBitOrAtomic(P_EXEC, &p->p_flag);
1310	proc_resetregister(p);
1311	if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1312	proc_lock(p);
1313	p->p_lflag &= ~P_LPPWAIT;
1314	proc_unlock(p);
1315	wakeup((caddr_t)p->p_pptr);
1316	}
1317
1318	/*
1319	* Pay for our earlier safety; deliver the delayed signals from
1320	* the incomplete vfexec process now that it's complete.
1321	*/
1322	if (vfexec && (p->p_lflag & P_LTRACED)) {
1323	psignal_vfork(p, new_task, thread, SIGTRAP);
1324	}
1325
1326	goto done;
1327
1328	badtoolate:
1329	/ Don't allow child process to execute any instructions /
1330	if (!spawn) {
1331	if (vfexec) {
1332	assert(exec_failure_reason != OS_REASON_NULL);
1333	psignal_vfork_with_reason(p, new_task, thread, SIGKILL, exec_failure_reason);
1334	exec_failure_reason = OS_REASON_NULL;
1335	} else {
1336	assert(exec_failure_reason != OS_REASON_NULL);
1337	psignal_with_reason(p, SIGKILL, exec_failure_reason);
1338	exec_failure_reason = OS_REASON_NULL;
1339
1340	if (exec) {
1341	/ Terminate the exec copy task /
1342	task_terminate_internal(task);
1343	}
1344	}
1345
1346	/ We can't stop this system call at this point, so just pretend we succeeded /
1347	error = `0`;
1348	} else {
1349	os_reason_free(exec_failure_reason);
1350	exec_failure_reason = OS_REASON_NULL;
1351	}
1352
1353	done:
1354	if (load_result.threadstate) {
1355	kfree(load_result.threadstate, load_result.threadstate_sz);
1356	load_result.threadstate = NULL;
1357	}
1358
1359	bad:
1360	/ If we hit this, we likely would have leaked an exit reason /
1361	assert(exec_failure_reason == OS_REASON_NULL);
1362	return(error);
1363	}
1364
1365
1366
1367
1368	/*
1369	* Our image activator table; this is the table of the image types we are
1370	* capable of loading. We list them in order of preference to ensure the
1371	* fastest image load speed.
1372	*
1373	* XXX hardcoded, for now; should use linker sets
1374	*/
1375	struct execsw {
1376	int (ex_imgact)(struct* image_params *);
1377	const char *ex_name;
1378	} execsw[] = {
1379	{ exec_mach_imgact, "Mach-o Binary" },
1380	{ exec_fat_imgact, "Fat Binary" },
1381	{ exec_shell_imgact, "Interpreter Script" },
1382	{ NULL, NULL}
1383	};
1384
1385
1386	/*
1387	* exec_activate_image
1388	*
1389	* Description: Iterate through the available image activators, and activate
1390	* the image associated with the imgp structure. We start with
1391	* the activator for Mach-o binaries followed by that for Fat binaries
1392	* for Interpreter scripts.
1393	*
1394	* Parameters: struct image_params * Image parameter block
1395	*
1396	* Returns: 0 Success
1397	* EBADEXEC The executable is corrupt/unknown
1398	* execargs_alloc:EINVAL Invalid argument
1399	* execargs_alloc:EACCES Permission denied
1400	* execargs_alloc:EINTR Interrupted function
1401	* execargs_alloc:ENOMEM Not enough space
1402	* exec_save_path:EFAULT Bad address
1403	* exec_save_path:ENAMETOOLONG Filename too long
1404	* exec_check_permissions:EACCES Permission denied
1405	* exec_check_permissions:ENOEXEC Executable file format error
1406	* exec_check_permissions:ETXTBSY Text file busy [misuse of error code]
1407	* exec_check_permissions:???
1408	* namei:???
1409	* vn_rdwr:??? [anything vn_rdwr can return]
1410	* <ex_imgact>:??? [anything an imgact can return]
1411	* EDEADLK Process is being terminated
1412	*/
1413	static int
1414	exec_activate_image(struct image_params *imgp)
1415	{
1416	struct nameidata *ndp = NULL;
1417	const char *excpath;
1418	int error;
1419	int resid;
1420	int once = `1`; / save SGUID-ness for interpreted files /
1421	int i;
1422	int itercount = `0`;
1423	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1424
1425	error = execargs_alloc(imgp);
1426	if (error)
1427	goto bad_notrans;
1428
1429	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
1430	if (error) {
1431	goto bad_notrans;
1432	}
1433
1434	/ Use excpath, which contains the copyin-ed exec path /
1435	DTRACE_PROC1(exec, uintptr_t, excpath);
1436
1437	MALLOC(ndp, struct nameidata , sizeof(ndp), M_TEMP, M_WAITOK \| M_ZERO);
1438	if (ndp == NULL) {
1439	error = ENOMEM;
1440	goto bad_notrans;
1441	}
1442
1443	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW \| LOCKLEAF \| AUDITVNPATH1,
1444	UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1445
1446	again:
1447	error = namei(ndp);
1448	if (error)
1449	goto bad_notrans;
1450	imgp->ip_ndp = ndp; / successful namei(); call nameidone() later /
1451	imgp->ip_vp = ndp->ni_vp; / if set, need to vnode_put() at some point /
1452
1453	/*
1454	* Before we start the transition from binary A to binary B, make
1455	* sure another thread hasn't started exiting the process. We grab
1456	* the proc lock to check p_lflag initially, and the transition
1457	* mechanism ensures that the value doesn't change after we release
1458	* the lock.
1459	*/
1460	proc_lock(p);
1461	if (p->p_lflag & P_LEXIT) {
1462	error = EDEADLK;
1463	proc_unlock(p);
1464	goto bad_notrans;
1465	}
1466	error = proc_transstart(p, `1`, `0`);
1467	proc_unlock(p);
1468	if (error)
1469	goto bad_notrans;
1470
1471	error = exec_check_permissions(imgp);
1472	if (error)
1473	goto bad;
1474
1475	/ Copy; avoid invocation of an interpreter overwriting the original /
1476	if (once) {
1477	once = `0`;
1478	imgp->ip_origvattr = imgp->ip_vattr;
1479	}
1480
1481	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, `0`,
1482	UIO_SYSSPACE, IO_NODELOCKED,
1483	vfs_context_ucred(imgp->ip_vfs_context),
1484	&resid, vfs_context_proc(imgp->ip_vfs_context));
1485	if (error)
1486	goto bad;
1487
1488	if (resid) {
1489	memset(imgp->ip_vdata + (PAGE_SIZE - resid), `0x0`, resid);
1490	}
1491
1492	encapsulated_binary:
1493	/ Limit the number of iterations we will attempt on each binary /
1494	if (++itercount > EAI_ITERLIMIT) {
1495	error = EBADEXEC;
1496	goto bad;
1497	}
1498	error = -`1`;
1499	for(i = `0`; error == -`1` && execsw[i].ex_imgact != NULL; i++) {
1500
1501	error = (*execsw[i].ex_imgact)(imgp);
1502
1503	switch (error) {
1504	/ case -1: not claimed: continue /
1505	case -`2`: / Encapsulated binary, imgp->ip_XXX set for next iteration /
1506	goto encapsulated_binary;
1507
1508	case -`3`: / Interpreter /
1509	#if CONFIG_MACF
1510	/*
1511	* Copy the script label for later use. Note that
1512	* the label can be different when the script is
1513	* actually read by the interpreter.
1514	*/
1515	if (imgp->ip_scriptlabelp)
1516	mac_vnode_label_free(imgp->ip_scriptlabelp);
1517	imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1518	if (imgp->ip_scriptlabelp == NULL) {
1519	error = ENOMEM;
1520	break;
1521	}
1522	mac_vnode_label_copy(imgp->ip_vp->v_label,
1523	imgp->ip_scriptlabelp);
1524
1525	/*
1526	* Take a ref of the script vnode for later use.
1527	*/
1528	if (imgp->ip_scriptvp)
1529	vnode_put(imgp->ip_scriptvp);
1530	if (vnode_getwithref(imgp->ip_vp) == `0`)
1531	imgp->ip_scriptvp = imgp->ip_vp;
1532	#endif
1533
1534	nameidone(ndp);
1535
1536	vnode_put(imgp->ip_vp);
1537	imgp->ip_vp = NULL; / already put /
1538	imgp->ip_ndp = NULL; / already nameidone /
1539
1540	/ Use excpath, which exec_shell_imgact reset to the interpreter /
1541	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW \| LOCKLEAF,
1542	UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
1543
1544	proc_transend(p, `0`);
1545	goto again;
1546
1547	default:
1548	break;
1549	}
1550	}
1551
1552	if (error == `0`) {
1553	if (imgp->ip_flags & IMGPF_INTERPRET && ndp->ni_vp) {
1554	AUDIT_ARG(vnpath, ndp->ni_vp, ARG_VNODE2);
1555	}
1556
1557	/*
1558	* Call out to allow 3rd party notification of exec.
1559	* Ignore result of kauth_authorize_fileop call.
1560	*/
1561	if (kauth_authorize_fileop_has_listeners()) {
1562	kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1563	KAUTH_FILEOP_EXEC,
1564	(uintptr_t)ndp->ni_vp, `0`);
1565	}
1566	}
1567	bad:
1568	proc_transend(p, `0`);
1569
1570	bad_notrans:
1571	if (imgp->ip_strings)
1572	execargs_free(imgp);
1573	if (imgp->ip_ndp)
1574	nameidone(imgp->ip_ndp);
1575	if (ndp)
1576	FREE(ndp, M_TEMP);
1577
1578	return (error);
1579	}
1580
1581
1582	/*
1583	* exec_handle_spawnattr_policy
1584	*
1585	* Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task.
1586	*
1587	* Parameters: proc_t p process to apply attributes to
1588	* int psa_apptype posix spawn attribute apptype
1589	*
1590	* Returns: 0 Success
1591	*/
1592	static errno_t
1593	exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, uint64_t psa_darwin_role,
1594	ipc_port_t * portwatch_ports, int portwatch_count)
1595	{
1596	int apptype = TASK_APPTYPE_NONE;
1597	int qos_clamp = THREAD_QOS_UNSPECIFIED;
1598	int role = TASK_UNSPECIFIED;
1599
1600	if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != `0`) {
1601	int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1602
1603	switch(proctype) {
1604	case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1605	apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1606	break;
1607	case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1608	apptype = TASK_APPTYPE_DAEMON_STANDARD;
1609	break;
1610	case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1611	apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1612	break;
1613	case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1614	apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1615	break;
1616	case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1617	apptype = TASK_APPTYPE_APP_DEFAULT;
1618	break;
1619	#if !CONFIG_EMBEDDED
1620	case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1621	apptype = TASK_APPTYPE_APP_TAL;
1622	break;
1623	#endif /* !CONFIG_EMBEDDED */
1624	default:
1625	apptype = TASK_APPTYPE_NONE;
1626	/ TODO: Should an invalid value here fail the spawn? /
1627	break;
1628	}
1629	}
1630
1631	if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) {
1632	switch (psa_qos_clamp) {
1633	case POSIX_SPAWN_PROC_CLAMP_UTILITY:
1634	qos_clamp = THREAD_QOS_UTILITY;
1635	break;
1636	case POSIX_SPAWN_PROC_CLAMP_BACKGROUND:
1637	qos_clamp = THREAD_QOS_BACKGROUND;
1638	break;
1639	case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE:
1640	qos_clamp = THREAD_QOS_MAINTENANCE;
1641	break;
1642	default:
1643	qos_clamp = THREAD_QOS_UNSPECIFIED;
1644	/ TODO: Should an invalid value here fail the spawn? /
1645	break;
1646	}
1647	}
1648
1649	if (psa_darwin_role != PRIO_DARWIN_ROLE_DEFAULT) {
1650	proc_darwin_role_to_task_role(psa_darwin_role, &role);
1651	}
1652
1653	if (apptype != TASK_APPTYPE_NONE \|\|
1654	qos_clamp != THREAD_QOS_UNSPECIFIED \|\|
1655	role != TASK_UNSPECIFIED) {
1656	proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, role,
1657	portwatch_ports, portwatch_count);
1658	}
1659
1660	return (`0`);
1661	}
1662
1663
1664	/*
1665	* exec_handle_port_actions
1666	*
1667	* Description: Go through the _posix_port_actions_t contents,
1668	* calling task_set_special_port, task_set_exception_ports
1669	* and/or audit_session_spawnjoin for the current task.
1670	*
1671	* Parameters: struct image_params * Image parameter block
1672	*
1673	* Returns: 0 Success
1674	* EINVAL Failure
1675	* ENOTSUP Illegal posix_spawn attr flag was set
1676	*/
1677	static errno_t
1678	exec_handle_port_actions(struct image_params imgp, boolean_t portwatch_present,
1679	ipc_port_t * portwatch_ports)
1680	{
1681	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1682	#if CONFIG_AUDIT
1683	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1684	#endif
1685	_ps_port_action_t *act = NULL;
1686	task_t task = get_threadtask(imgp->ip_new_thread);
1687	ipc_port_t port = NULL;
1688	errno_t ret = `0`;
1689	int i;
1690	kern_return_t kr;
1691
1692	*portwatch_present = FALSE;
1693
1694	for (i = `0`; i < pacts->pspa_count; i++) {
1695	act = &pacts->pspa_actions[i];
1696
1697	if (MACH_PORT_VALID(act->new_port)) {
1698	kr = ipc_object_copyin(get_task_ipcspace(current_task()),
1699	act->new_port, MACH_MSG_TYPE_COPY_SEND,
1700	(ipc_object_t *) &port);
1701
1702	if (kr != KERN_SUCCESS) {
1703	ret = EINVAL;
1704	goto done;
1705	}
1706	} else {
1707	/ it's NULL or DEAD /
1708	port = CAST_MACH_NAME_TO_PORT(act->new_port);
1709	}
1710
1711	switch (act->port_type) {
1712	case PSPA_SPECIAL:
1713	kr = task_set_special_port(task, act->which, port);
1714
1715	if (kr != KERN_SUCCESS)
1716	ret = EINVAL;
1717	break;
1718
1719	case PSPA_EXCEPTION:
1720	kr = task_set_exception_ports(task, act->mask, port,
1721	act->behavior, act->flavor);
1722	if (kr != KERN_SUCCESS)
1723	ret = EINVAL;
1724	break;
1725	#if CONFIG_AUDIT
1726	case PSPA_AU_SESSION:
1727	ret = audit_session_spawnjoin(p, task, port);
1728	if (ret) {
1729	/ audit_session_spawnjoin() has already dropped the reference in case of error. /
1730	goto done;
1731	}
1732
1733	break;
1734	#endif
1735	case PSPA_IMP_WATCHPORTS:
1736	if (portwatch_ports != NULL && IPC_PORT_VALID(port)) {
1737	*portwatch_present = TRUE;
1738	/ hold on to this till end of spawn /
1739	portwatch_ports[i] = port;
1740	} else {
1741	ipc_port_release_send(port);
1742	}
1743
1744	break;
1745	default:
1746	ret = EINVAL;
1747	break;
1748	}
1749
1750	if (ret) {
1751	/ action failed, so release port resources /
1752	ipc_port_release_send(port);
1753	break;
1754	}
1755	}
1756
1757	done:
1758	if (`0` != ret)
1759	DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1760	return (ret);
1761	}
1762
1763	/*
1764	* exec_handle_file_actions
1765	*
1766	* Description: Go through the _posix_file_actions_t contents applying the
1767	* open, close, and dup2 operations to the open file table for
1768	* the current process.
1769	*
1770	* Parameters: struct image_params * Image parameter block
1771	*
1772	* Returns: 0 Success
1773	* ???
1774	*
1775	* Note: Actions are applied in the order specified, with the credential
1776	* of the parent process. This is done to permit the parent
1777	* process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1778	* the child following operations the child may in fact not be
1779	* normally permitted to perform.
1780	*/
1781	static int
1782	exec_handle_file_actions(struct image_params imgp, short* psa_flags)
1783	{
1784	int error = `0`;
1785	int action;
1786	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1787	_posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1788	int ival[`2`]; / dummy retval for system calls) /
1789
1790	for (action = `0`; action < px_sfap->psfa_act_count; action++) {
1791	_psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1792
1793	switch(psfa->psfaa_type) {
1794	case PSFA_OPEN: {
1795	/*
1796	* Open is different, in that it requires the use of
1797	* a path argument, which is normally copied in from
1798	* user space; because of this, we have to support an
1799	* open from kernel space that passes an address space
1800	* context of UIO_SYSSPACE, and casts the address
1801	* argument to a user_addr_t.
1802	*/
1803	char *bufp = NULL;
1804	struct vnode_attr *vap;
1805	struct nameidata *ndp;
1806	int mode = psfa->psfaa_openargs.psfao_mode;
1807	struct dup2_args dup2a;
1808	struct close_nocancel_args ca;
1809	int origfd;
1810
1811	MALLOC(bufp, char , sizeof(vap) + sizeof(*ndp), M_TEMP, M_WAITOK \| M_ZERO);
1812	if (bufp == NULL) {
1813	error = ENOMEM;
1814	break;
1815	}
1816
1817	vap = (struct vnode_attr *) bufp;
1818	ndp = (struct nameidata ) (bufp + sizeof(vap));
1819
1820	VATTR_INIT(vap);
1821	/ Mask off all but regular access permissions /
1822	mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1823	VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
1824
1825	NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW \| AUDITVNPATH1, UIO_SYSSPACE,
1826	CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1827	imgp->ip_vfs_context);
1828
1829	error = open1(imgp->ip_vfs_context,
1830	ndp,
1831	psfa->psfaa_openargs.psfao_oflag,
1832	vap,
1833	fileproc_alloc_init, NULL,
1834	ival);
1835
1836	FREE(bufp, M_TEMP);
1837
1838	/*
1839	* If there's an error, or we get the right fd by
1840	* accident, then drop out here. This is easier than
1841	* reworking all the open code to preallocate fd
1842	* slots, and internally taking one as an argument.
1843	*/
1844	if (error \|\| ival[`0`] == psfa->psfaa_filedes)
1845	break;
1846
1847	origfd = ival[`0`];
1848	/*
1849	* If we didn't fall out from an error, we ended up
1850	* with the wrong fd; so now we've got to try to dup2
1851	* it to the right one.
1852	*/
1853	dup2a.from = origfd;
1854	dup2a.to = psfa->psfaa_filedes;
1855
1856	/*
1857	* The dup2() system call implementation sets
1858	* ival to newfd in the success case, but we
1859	* can ignore that, since if we didn't get the
1860	* fd we wanted, the error will stop us.
1861	*/
1862	error = dup2(p, &dup2a, ival);
1863	if (error)
1864	break;
1865
1866	/*
1867	* Finally, close the original fd.
1868	*/
1869	ca.fd = origfd;
1870
1871	error = close_nocancel(p, &ca, ival);
1872	}
1873	break;
1874
1875	case PSFA_DUP2: {
1876	struct dup2_args dup2a;
1877
1878	dup2a.from = psfa->psfaa_filedes;
1879	dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1880
1881	/*
1882	* The dup2() system call implementation sets
1883	* ival to newfd in the success case, but we
1884	* can ignore that, since if we didn't get the
1885	* fd we wanted, the error will stop us.
1886	*/
1887	error = dup2(p, &dup2a, ival);
1888	}
1889	break;
1890
1891	case PSFA_CLOSE: {
1892	struct close_nocancel_args ca;
1893
1894	ca.fd = psfa->psfaa_filedes;
1895
1896	error = close_nocancel(p, &ca, ival);
1897	}
1898	break;
1899
1900	case PSFA_INHERIT: {
1901	struct fcntl_nocancel_args fcntla;
1902
1903	/*
1904	* Check to see if the descriptor exists, and
1905	* ensure it's -not- marked as close-on-exec.
1906	*
1907	* Attempting to "inherit" a guarded fd will
1908	* result in a error.
1909	*/
1910	fcntla.fd = psfa->psfaa_filedes;
1911	fcntla.cmd = F_GETFD;
1912	if ((error = fcntl_nocancel(p, &fcntla, ival)) != `0`)
1913	break;
1914
1915	if ((ival[`0`] & FD_CLOEXEC) == FD_CLOEXEC) {
1916	fcntla.fd = psfa->psfaa_filedes;
1917	fcntla.cmd = F_SETFD;
1918	fcntla.arg = ival[`0`] & ~FD_CLOEXEC;
1919	error = fcntl_nocancel(p, &fcntla, ival);
1920	}
1921
1922	}
1923	break;
1924
1925	default:
1926	error = EINVAL;
1927	break;
1928	}
1929
1930	/ All file actions failures are considered fatal, per POSIX /
1931
1932	if (error) {
1933	if (PSFA_OPEN == psfa->psfaa_type) {
1934	DTRACE_PROC1(spawn__open__failure, uintptr_t,
1935	psfa->psfaa_openargs.psfao_path);
1936	} else {
1937	DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1938	}
1939	break;
1940	}
1941	}
1942
1943	if (error != `0` \|\| (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == `0`)
1944	return (error);
1945
1946	/*
1947	* If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1948	* this spawn only) as if "close on exec" is the default
1949	* disposition of all pre-existing file descriptors. In this case,
1950	* the list of file descriptors mentioned in the file actions
1951	* are the only ones that can be inherited, so mark them now.
1952	*
1953	* The actual closing part comes later, in fdexec().
1954	*/
1955	proc_fdlock(p);
1956	for (action = `0`; action < px_sfap->psfa_act_count; action++) {
1957	_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1958	int fd = psfa->psfaa_filedes;
1959
1960	switch (psfa->psfaa_type) {
1961	case PSFA_DUP2:
1962	fd = psfa->psfaa_openargs.psfao_oflag;
1963	/FALLTHROUGH/
1964	case PSFA_OPEN:
1965	case PSFA_INHERIT:
1966	*fdflags(p, fd) \|= UF_INHERIT;
1967	break;
1968
1969	case PSFA_CLOSE:
1970	break;
1971	}
1972	}
1973	proc_fdunlock(p);
1974
1975	return (`0`);
1976	}
1977
1978	#if CONFIG_MACF
1979	/*
1980	* exec_spawnattr_getmacpolicyinfo
1981	*/
1982	void *
1983	exec_spawnattr_getmacpolicyinfo(const void macextensions, const* char policyname, size_t lenp)
1984	{
1985	const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1986	int i;
1987
1988	if (psmx == NULL)
1989	return NULL;
1990
1991	for (i = `0`; i < psmx->psmx_count; i++) {
1992	const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1993	if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == `0`) {
1994	if (lenp != NULL)
1995	*lenp = extension->datalen;
1996	return extension->datap;
1997	}
1998	}
1999
2000	if (lenp != NULL)
2001	*lenp = `0`;
2002	return NULL;
2003	}
2004
2005	static int
2006	spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc px_args, _posix_spawn_mac_policy_extensions_t psmxp)
2007	{
2008	_posix_spawn_mac_policy_extensions_t psmx = NULL;
2009	int error = `0`;
2010	int copycnt = `0`;
2011	int i = `0`;
2012
2013	*psmxp = NULL;
2014
2015	if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(`1`) \|\|
2016	px_args->mac_extensions_size > PAGE_SIZE) {
2017	error = EINVAL;
2018	goto bad;
2019	}
2020
2021	MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
2022	if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != `0`)
2023	goto bad;
2024
2025	size_t extsize = PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count);
2026	if (extsize == `0` \|\| extsize > px_args->mac_extensions_size) {
2027	error = EINVAL;
2028	goto bad;
2029	}
2030
2031	for (i = `0`; i < psmx->psmx_count; i++) {
2032	_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
2033	if (extension->datalen == `0` \|\| extension->datalen > PAGE_SIZE) {
2034	error = EINVAL;
2035	goto bad;
2036	}
2037	}
2038
2039	for (copycnt = `0`; copycnt < psmx->psmx_count; copycnt++) {
2040	_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
2041	void *data = NULL;
2042
2043	MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
2044	if ((error = copyin(extension->data, data, extension->datalen)) != `0`) {
2045	FREE(data, M_TEMP);
2046	goto bad;
2047	}
2048	extension->datap = data;
2049	}
2050
2051	*psmxp = psmx;
2052	return `0`;
2053
2054	bad:
2055	if (psmx != NULL) {
2056	for (i = `0`; i < copycnt; i++)
2057	FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2058	FREE(psmx, M_TEMP);
2059	}
2060	return error;
2061	}
2062
2063	static void
2064	spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
2065	{
2066	int i;
2067
2068	if (psmx == NULL)
2069	return;
2070	for (i = `0`; i < psmx->psmx_count; i++)
2071	FREE(psmx->psmx_extensions[i].datap, M_TEMP);
2072	FREE(psmx, M_TEMP);
2073	}
2074	#endif /* CONFIG_MACF */
2075
2076	#if CONFIG_COALITIONS
2077	static inline void spawn_coalitions_release_all(coalition_t coal[COALITION_NUM_TYPES])
2078	{
2079	for (int c = `0`; c < COALITION_NUM_TYPES; c++) {
2080	if (coal[c]) {
2081	coalition_remove_active(coal[c]);
2082	coalition_release(coal[c]);
2083	}
2084	}
2085	}
2086	#endif
2087
2088	#if CONFIG_PERSONAS
2089	static int spawn_validate_persona(struct _posix_spawn_persona_info *px_persona)
2090	{
2091	int error = `0`;
2092	struct persona *persona = NULL;
2093	int verify = px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_VERIFY;
2094
2095	/*
2096	* TODO: rdar://problem/19981151
2097	* Add entitlement check!
2098	*/
2099	if (!kauth_cred_issuser(kauth_cred_get()))
2100	return EPERM;
2101
2102	persona = persona_lookup(px_persona->pspi_id);
2103	if (!persona) {
2104	error = ESRCH;
2105	goto out;
2106	}
2107
2108	if (verify) {
2109	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2110	if (px_persona->pspi_uid != persona_get_uid(persona)) {
2111	error = EINVAL;
2112	goto out;
2113	}
2114	}
2115	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2116	if (px_persona->pspi_gid != persona_get_gid(persona)) {
2117	error = EINVAL;
2118	goto out;
2119	}
2120	}
2121	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2122	unsigned ngroups = `0`;
2123	gid_t groups[NGROUPS_MAX];
2124
2125	if (persona_get_groups(persona, &ngroups, groups,
2126	px_persona->pspi_ngroups) != `0`) {
2127	error = EINVAL;
2128	goto out;
2129	}
2130	if (ngroups != px_persona->pspi_ngroups) {
2131	error = EINVAL;
2132	goto out;
2133	}
2134	while (ngroups--) {
2135	if (px_persona->pspi_groups[ngroups] != groups[ngroups]) {
2136	error = EINVAL;
2137	goto out;
2138	}
2139	}
2140	if (px_persona->pspi_gmuid != persona_get_gmuid(persona)) {
2141	error = EINVAL;
2142	goto out;
2143	}
2144	}
2145	}
2146
2147	out:
2148	if (persona)
2149	persona_put(persona);
2150
2151	return error;
2152	}
2153
2154	static int spawn_persona_adopt(proc_t p, struct _posix_spawn_persona_info *px_persona)
2155	{
2156	int ret;
2157	kauth_cred_t cred;
2158	struct persona *persona = NULL;
2159	int override = !!(px_persona->pspi_flags & POSIX_SPAWN_PERSONA_FLAGS_OVERRIDE);
2160
2161	if (!override)
2162	return persona_proc_adopt_id(p, px_persona->pspi_id, NULL);
2163
2164	/*
2165	* we want to spawn into the given persona, but we want to override
2166	* the kauth with a different UID/GID combo
2167	*/
2168	persona = persona_lookup(px_persona->pspi_id);
2169	if (!persona)
2170	return ESRCH;
2171
2172	cred = persona_get_cred(persona);
2173	if (!cred) {
2174	ret = EINVAL;
2175	goto out;
2176	}
2177
2178	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_UID) {
2179	cred = kauth_cred_setresuid(cred,
2180	px_persona->pspi_uid,
2181	px_persona->pspi_uid,
2182	px_persona->pspi_uid,
2183	KAUTH_UID_NONE);
2184	}
2185
2186	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GID) {
2187	cred = kauth_cred_setresgid(cred,
2188	px_persona->pspi_gid,
2189	px_persona->pspi_gid,
2190	px_persona->pspi_gid);
2191	}
2192
2193	if (px_persona->pspi_flags & POSIX_SPAWN_PERSONA_GROUPS) {
2194	cred = kauth_cred_setgroups(cred,
2195	px_persona->pspi_groups,
2196	px_persona->pspi_ngroups,
2197	px_persona->pspi_gmuid);
2198	}
2199
2200	ret = persona_proc_adopt(p, persona, cred);
2201
2202	out:
2203	persona_put(persona);
2204	return ret;
2205	}
2206	#endif
2207
2208	/*
2209	* posix_spawn
2210	*
2211	* Parameters: uap->pid Pointer to pid return area
2212	* uap->fname File name to exec
2213	* uap->argp Argument list
2214	* uap->envp Environment list
2215	*
2216	* Returns: 0 Success
2217	* EINVAL Invalid argument
2218	* ENOTSUP Not supported
2219	* ENOEXEC Executable file format error
2220	* exec_activate_image:EINVAL Invalid argument
2221	* exec_activate_image:EACCES Permission denied
2222	* exec_activate_image:EINTR Interrupted function
2223	* exec_activate_image:ENOMEM Not enough space
2224	* exec_activate_image:EFAULT Bad address
2225	* exec_activate_image:ENAMETOOLONG Filename too long
2226	* exec_activate_image:ENOEXEC Executable file format error
2227	* exec_activate_image:ETXTBSY Text file busy [misuse of error code]
2228	* exec_activate_image:EBADEXEC The executable is corrupt/unknown
2229	* exec_activate_image:???
2230	* mac_execve_enter:???
2231	*
2232	* TODO: Expect to need __mac_posix_spawn() at some point...
2233	* Handle posix_spawnattr_t
2234	* Handle posix_spawn_file_actions_t
2235	*/
2236	int
2237	posix_spawn(proc_t ap, struct posix_spawn_args uap, int32_t retval)
2238	{
2239	proc_t p = ap; / quiet bogus GCC vfork() warning /
2240	user_addr_t pid = uap->pid;
2241	int ival[`2`]; / dummy retval for setpgid() /
2242	char *bufp = NULL;
2243	struct image_params *imgp;
2244	struct vnode_attr *vap;
2245	struct vnode_attr *origvap;
2246	struct uthread uthread = `0`; /* compiler complains if not set to 0/
2247	int error, sig;
2248	int is_64 = IS_64BIT_PROCESS(p);
2249	struct vfs_context context;
2250	struct user__posix_spawn_args_desc px_args;
2251	struct _posix_spawnattr px_sa;
2252	_posix_spawn_file_actions_t px_sfap = NULL;
2253	_posix_spawn_port_actions_t px_spap = NULL;
2254	struct __kern_sigaction vec;
2255	boolean_t spawn_no_exec = FALSE;
2256	boolean_t proc_transit_set = TRUE;
2257	boolean_t exec_done = FALSE;
2258	int portwatch_count = `0`;
2259	ipc_port_t * portwatch_ports = NULL;
2260	vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
2261	task_t old_task = current_task();
2262	task_t new_task = NULL;
2263	boolean_t should_release_proc_ref = FALSE;
2264	void *inherit = NULL;
2265	#if CONFIG_PERSONAS
2266	struct _posix_spawn_persona_info *px_persona = NULL;
2267	#endif
2268
2269	/*
2270	* Allocate a big chunk for locals instead of using stack since these
2271	* structures are pretty big.
2272	*/
2273	MALLOC(bufp, char , (sizeof(imgp) + sizeof(vap) + sizeof(origvap)), M_TEMP, M_WAITOK \| M_ZERO);
2274	imgp = (struct image_params *) bufp;
2275	if (bufp == NULL) {
2276	error = ENOMEM;
2277	goto bad;
2278	}
2279	vap = (struct vnode_attr ) (bufp + sizeof(imgp));
2280	origvap = (struct vnode_attr ) (bufp + sizeof(imgp) + sizeof(*vap));
2281
2282	/ Initialize the common data in the image_params structure /
2283	imgp->ip_user_fname = uap->path;
2284	imgp->ip_user_argv = uap->argv;
2285	imgp->ip_user_envv = uap->envp;
2286	imgp->ip_vattr = vap;
2287	imgp->ip_origvattr = origvap;
2288	imgp->ip_vfs_context = &context;
2289	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE);
2290	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2291	imgp->ip_mac_return = `0`;
2292	imgp->ip_px_persona = NULL;
2293	imgp->ip_cs_error = OS_REASON_NULL;
2294
2295	if (uap->adesc != USER_ADDR_NULL) {
2296	if(is_64) {
2297	error = copyin(uap->adesc, &px_args, sizeof(px_args));
2298	} else {
2299	struct user32__posix_spawn_args_desc px_args32;
2300
2301	error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
2302
2303	/*
2304	* Convert arguments descriptor from external 32 bit
2305	* representation to internal 64 bit representation
2306	*/
2307	px_args.attr_size = px_args32.attr_size;
2308	px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
2309	px_args.file_actions_size = px_args32.file_actions_size;
2310	px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
2311	px_args.port_actions_size = px_args32.port_actions_size;
2312	px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
2313	px_args.mac_extensions_size = px_args32.mac_extensions_size;
2314	px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
2315	px_args.coal_info_size = px_args32.coal_info_size;
2316	px_args.coal_info = CAST_USER_ADDR_T(px_args32.coal_info);
2317	px_args.persona_info_size = px_args32.persona_info_size;
2318	px_args.persona_info = CAST_USER_ADDR_T(px_args32.persona_info);
2319	}
2320	if (error)
2321	goto bad;
2322
2323	if (px_args.attr_size != `0`) {
2324	/*
2325	* We are not copying the port_actions pointer,
2326	* because we already have it from px_args.
2327	* This is a bit fragile: <rdar://problem/16427422>
2328	*/
2329
2330	if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset)) != `0`) {
2331	goto bad;
2332	}
2333
2334	bzero( (void )( (unsigned* long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
2335
2336	imgp->ip_px_sa = &px_sa;
2337	}
2338	if (px_args.file_actions_size != `0`) {
2339	/ Limit file_actions to allowed number of open files /
2340	int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
2341	size_t maxfa_size = PSF_ACTIONS_SIZE(maxfa);
2342	if (px_args.file_actions_size < PSF_ACTIONS_SIZE(`1`) \|\|
2343	maxfa_size == `0` \|\| px_args.file_actions_size > maxfa_size) {
2344	error = EINVAL;
2345	goto bad;
2346	}
2347	MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
2348	if (px_sfap == NULL) {
2349	error = ENOMEM;
2350	goto bad;
2351	}
2352	imgp->ip_px_sfa = px_sfap;
2353
2354	if ((error = copyin(px_args.file_actions, px_sfap,
2355	px_args.file_actions_size)) != `0`)
2356	goto bad;
2357
2358	/ Verify that the action count matches the struct size /
2359	size_t psfsize = PSF_ACTIONS_SIZE(px_sfap->psfa_act_count);
2360	if (psfsize == `0` \|\| psfsize != px_args.file_actions_size) {
2361	error = EINVAL;
2362	goto bad;
2363	}
2364	}
2365	if (px_args.port_actions_size != `0`) {
2366	/ Limit port_actions to one page of data /
2367	if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(`1`) \|\|
2368	px_args.port_actions_size > PAGE_SIZE) {
2369	error = EINVAL;
2370	goto bad;
2371	}
2372
2373	MALLOC(px_spap, _posix_spawn_port_actions_t,
2374	px_args.port_actions_size, M_TEMP, M_WAITOK);
2375	if (px_spap == NULL) {
2376	error = ENOMEM;
2377	goto bad;
2378	}
2379	imgp->ip_px_spa = px_spap;
2380
2381	if ((error = copyin(px_args.port_actions, px_spap,
2382	px_args.port_actions_size)) != `0`)
2383	goto bad;
2384
2385	/ Verify that the action count matches the struct size /
2386	size_t pasize = PS_PORT_ACTIONS_SIZE(px_spap->pspa_count);
2387	if (pasize == `0` \|\| pasize != px_args.port_actions_size) {
2388	error = EINVAL;
2389	goto bad;
2390	}
2391	}
2392	#if CONFIG_PERSONAS
2393	/ copy in the persona info /
2394	if (px_args.persona_info_size != `0` && px_args.persona_info != `0`) {
2395	/ for now, we need the exact same struct in user space /
2396	if (px_args.persona_info_size != sizeof(*px_persona)) {
2397	error = ERANGE;
2398	goto bad;
2399	}
2400
2401	MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK\|M_ZERO);
2402	if (px_persona == NULL) {
2403	error = ENOMEM;
2404	goto bad;
2405	}
2406	imgp->ip_px_persona = px_persona;
2407
2408	if ((error = copyin(px_args.persona_info, px_persona,
2409	px_args.persona_info_size)) != `0`)
2410	goto bad;
2411	if ((error = spawn_validate_persona(px_persona)) != `0`)
2412	goto bad;
2413	}
2414	#endif
2415	#if CONFIG_MACF
2416	if (px_args.mac_extensions_size != `0`) {
2417	if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != `0`)
2418	goto bad;
2419	}
2420	#endif /* CONFIG_MACF */
2421	}
2422
2423	/ set uthread to parent /
2424	uthread = get_bsdthread_info(current_thread());
2425
2426	/*
2427	* <rdar://6640530>; this does not result in a behaviour change
2428	* relative to Leopard, so there should not be any existing code
2429	* which depends on it.
2430	*/
2431	if (uthread->uu_flag & UT_VFORK) {
2432	error = EINVAL;
2433	goto bad;
2434	}
2435
2436	/*
2437	* If we don't have the extension flag that turns "posix_spawn()"
2438	* into "execve() with options", then we will be creating a new
2439	* process which does not inherit memory from the parent process,
2440	* which is one of the most expensive things about using fork()
2441	* and execve().
2442	*/
2443	if (imgp->ip_px_sa == NULL \|\| !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
2444
2445	/ Set the new task's coalition, if it is requested. /
2446	coalition_t coal[COALITION_NUM_TYPES] = { COALITION_NULL };
2447	#if CONFIG_COALITIONS
2448	int i, ncoals;
2449	kern_return_t kr = KERN_SUCCESS;
2450	struct _posix_spawn_coalition_info coal_info;
2451	int coal_role[COALITION_NUM_TYPES];
2452
2453	if (imgp->ip_px_sa == NULL \|\| !px_args.coal_info)
2454	goto do_fork1;
2455
2456	memset(&coal_info, `0`, sizeof(coal_info));
2457
2458	if (px_args.coal_info_size > sizeof(coal_info))
2459	px_args.coal_info_size = sizeof(coal_info);
2460	error = copyin(px_args.coal_info,
2461	&coal_info, px_args.coal_info_size);
2462	if (error != `0`)
2463	goto bad;
2464
2465	ncoals = `0`;
2466	for (i = `0`; i < COALITION_NUM_TYPES; i++) {
2467	uint64_t cid = coal_info.psci_info[i].psci_id;
2468	if (cid != `0`) {
2469	/*
2470	* don't allow tasks which are not in a
2471	* privileged coalition to spawn processes
2472	* into coalitions other than their own
2473	*/
2474	if (!task_is_in_privileged_coalition(p->task, i)) {
2475	coal_dbg("ERROR: %d not in privilegd "
2476	"coalition of type %d",
2477	p->p_pid, i);
2478	spawn_coalitions_release_all(coal);
2479	error = EPERM;
2480	goto bad;
2481	}
2482
2483	coal_dbg("searching for coalition id:%llu", cid);
2484	/*
2485	* take a reference and activation on the
2486	* coalition to guard against free-while-spawn
2487	* races
2488	*/
2489	coal[i] = coalition_find_and_activate_by_id(cid);
2490	if (coal[i] == COALITION_NULL) {
2491	coal_dbg("could not find coalition id:%llu "
2492	"(perhaps it has been terminated or reaped)", cid);
2493	/*
2494	* release any other coalition's we
2495	* may have a reference to
2496	*/
2497	spawn_coalitions_release_all(coal);
2498	error = ESRCH;
2499	goto bad;
2500	}
2501	if (coalition_type(coal[i]) != i) {
2502	coal_dbg("coalition with id:%lld is not of type:%d"
2503	" (it's type:%d)", cid, i, coalition_type(coal[i]));
2504	error = ESRCH;
2505	goto bad;
2506	}
2507	coal_role[i] = coal_info.psci_info[i].psci_role;
2508	ncoals++;
2509	}
2510	}
2511	if (ncoals < COALITION_NUM_TYPES) {
2512	/*
2513	* If the user is attempting to spawn into a subset of
2514	* the known coalition types, then make sure they have
2515	* _at_least_ specified a resource coalition. If not,
2516	* the following fork1() call will implicitly force an
2517	* inheritance from 'p' and won't actually spawn the
2518	* new task into the coalitions the user specified.
2519	* (also the call to coalitions_set_roles will panic)
2520	*/
2521	if (coal[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
2522	spawn_coalitions_release_all(coal);
2523	error = EINVAL;
2524	goto bad;
2525	}
2526	}
2527	do_fork1:
2528	#endif /* CONFIG_COALITIONS */
2529
2530	/*
2531	* note that this will implicitly inherit the
2532	* caller's persona (if it exists)
2533	*/
2534	error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal);
2535	/ returns a thread and task reference /
2536
2537	if (error == `0`) {
2538	new_task = get_threadtask(imgp->ip_new_thread);
2539	}
2540	#if CONFIG_COALITIONS
2541	/ set the roles of this task within each given coalition /
2542	if (error == `0`) {
2543	kr = coalitions_set_roles(coal, new_task, coal_role);
2544	if (kr != KERN_SUCCESS)
2545	error = EINVAL;
2546	if (kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_COALITION,
2547	MACH_COALITION_ADOPT))) {
2548	for (i = `0`; i < COALITION_NUM_TYPES; i++) {
2549	if (coal[i] != COALITION_NULL) {
2550	/*
2551	* On 32-bit targets, uniqueid
2552	* will get truncated to 32 bits
2553	*/
2554	KDBG_RELEASE(MACHDBG_CODE(
2555	DBG_MACH_COALITION,
2556	MACH_COALITION_ADOPT),
2557	coalition_id(coal[i]),
2558	get_task_uniqueid(new_task));
2559	}
2560	}
2561	}
2562	}
2563
2564	/ drop our references and activations - fork1() now holds them /
2565	spawn_coalitions_release_all(coal);
2566	#endif /* CONFIG_COALITIONS */
2567	if (error != `0`) {
2568	goto bad;
2569	}
2570	imgp->ip_flags \|= IMGPF_SPAWN; / spawn w/o exec /
2571	spawn_no_exec = TRUE; / used in later tests /
2572
2573	#if CONFIG_PERSONAS
2574	/*
2575	* If the parent isn't in a persona (launchd), and
2576	* hasn't specified a new persona for the process,
2577	* then we'll put the process into the system persona
2578	*
2579	* TODO: this will have to be re-worked because as of
2580	* now, without any launchd adoption, the resulting
2581	* xpcproxy process will not have sufficient
2582	* privileges to setuid/gid.
2583	*/
2584	#if 0
2585	if (!proc_has_persona(p) && imgp->ip_px_persona == NULL) {
2586	MALLOC(px_persona, struct _posix_spawn_persona_info *,
2587	sizeof(*px_persona), M_TEMP, M_WAITOK\|M_ZERO);
2588	if (px_persona == NULL) {
2589	error = ENOMEM;
2590	goto bad;
2591	}
2592	px_persona->pspi_id = persona_get_id(g_system_persona);
2593	imgp->ip_px_persona = px_persona;
2594	}
2595	#endif /* 0 */
2596	#endif /* CONFIG_PERSONAS */
2597	} else {
2598	/*
2599	* For execve case, create a new task and thread
2600	* which points to current_proc. The current_proc will point
2601	* to the new task after image activation and proc ref drain.
2602	*
2603	* proc (current_proc) <----- old_task (current_task)
2604	* ^ \| ^
2605	* \| \| \|
2606	* \| ----------------------------------
2607	* \|
2608	* --------- new_task (task marked as TF_EXEC_COPY)
2609	*
2610	* After image activation, the proc will point to the new task
2611	* and would look like following.
2612	*
2613	* proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
2614	* ^ \|
2615	* \| \|
2616	* \| ----------> new_task
2617	* \| \|
2618	* -----------------
2619	*
2620	* During exec any transition from new_task -> proc is fine, but don't allow
2621	* transition from proc->task, since it will modify old_task.
2622	*/
2623	imgp->ip_new_thread = fork_create_child(old_task,
2624	NULL,
2625	p,
2626	FALSE,
2627	p->p_flag & P_LP64,
2628	task_get_64bit_data(old_task),
2629	TRUE);
2630	/ task and thread ref returned by fork_create_child /
2631	if (imgp->ip_new_thread == NULL) {
2632	error = ENOMEM;
2633	goto bad;
2634	}
2635
2636	new_task = get_threadtask(imgp->ip_new_thread);
2637	imgp->ip_flags \|= IMGPF_EXEC;
2638	}
2639
2640	if (spawn_no_exec) {
2641	p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
2642
2643	/*
2644	* We had to wait until this point before firing the
2645	* proc:::create probe, otherwise p would not point to the
2646	* child process.
2647	*/
2648	DTRACE_PROC1(create, proc_t, p);
2649	}
2650	assert(p != NULL);
2651
2652	context.vc_thread = imgp->ip_new_thread;
2653	context.vc_ucred = p->p_ucred; / XXX must NOT be kauth_cred_get() /
2654
2655	/*
2656	* Post fdcopy(), pre exec_handle_sugid() - this is where we want
2657	* to handle the file_actions. Since vfork() also ends up setting
2658	* us into the parent process group, and saved off the signal flags,
2659	* this is also where we want to handle the spawn flags.
2660	*/
2661
2662	/ Has spawn file actions? /
2663	if (imgp->ip_px_sfa != NULL) {
2664	/*
2665	* The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2666	* is handled in exec_handle_file_actions().
2667	*/
2668	if ((error = exec_handle_file_actions(imgp,
2669	imgp->ip_px_sa != NULL ? px_sa.psa_flags : `0`)) != `0`)
2670	goto bad;
2671	}
2672
2673	/ Has spawn port actions? /
2674	if (imgp->ip_px_spa != NULL) {
2675	boolean_t is_adaptive = FALSE;
2676	boolean_t portwatch_present = FALSE;
2677
2678	/ Will this process become adaptive? The apptype isn't ready yet, so we can't look there. /
2679	if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2680	is_adaptive = TRUE;
2681
2682	/*
2683	* portwatch only:
2684	* Allocate a place to store the ports we want to bind to the new task
2685	* We can't bind them until after the apptype is set.
2686	*/
2687	if (px_spap->pspa_count != `0` && is_adaptive) {
2688	portwatch_count = px_spap->pspa_count;
2689	MALLOC(portwatch_ports, ipc_port_t , (sizeof(ipc_port_t) portwatch_count), M_TEMP, M_WAITOK \| M_ZERO);
2690	} else {
2691	portwatch_ports = NULL;
2692	}
2693
2694	if ((error = exec_handle_port_actions(imgp, &portwatch_present, portwatch_ports)) != `0`)
2695	goto bad;
2696
2697	if (portwatch_present == FALSE && portwatch_ports != NULL) {
2698	FREE(portwatch_ports, M_TEMP);
2699	portwatch_ports = NULL;
2700	portwatch_count = `0`;
2701	}
2702	}
2703
2704	/ Has spawn attr? /
2705	if (imgp->ip_px_sa != NULL) {
2706	/*
2707	* Set the process group ID of the child process; this has
2708	* to happen before the image activation.
2709	*/
2710	if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2711	struct setpgid_args spga;
2712	spga.pid = p->p_pid;
2713	spga.pgid = px_sa.psa_pgroup;
2714	/*
2715	* Effectively, call setpgid() system call; works
2716	* because there are no pointer arguments.
2717	*/
2718	if((error = setpgid(p, &spga, ival)) != `0`)
2719	goto bad;
2720	}
2721
2722	/*
2723	* Reset UID/GID to parent's RUID/RGID; This works only
2724	* because the operation occurs after the vfork() and
2725	* before the call to exec_handle_sugid() by the image
2726	* activator called from exec_activate_image(). POSIX
2727	* requires that any setuid/setgid bits on the process
2728	* image will take precedence over the spawn attributes
2729	* (re)setting them.
2730	*
2731	* Modifications to p_ucred must be guarded using the
2732	* proc's ucred lock. This prevents others from accessing
2733	* a garbage credential.
2734	*/
2735	while (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2736	kauth_cred_t my_cred = kauth_cred_proc_ref(p);
2737	kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2738
2739	if (my_cred == my_new_cred) {
2740	kauth_cred_unref(&my_cred);
2741	break;
2742	}
2743
2744	/ update cred on proc /
2745	proc_ucred_lock(p);
2746
2747	if (p->p_ucred != my_cred) {
2748	proc_ucred_unlock(p);
2749	kauth_cred_unref(&my_new_cred);
2750	continue;
2751	}
2752
2753	/ donate cred reference on my_new_cred to p->p_ucred /
2754	p->p_ucred = my_new_cred;
2755	PROC_UPDATE_CREDS_ONPROC(p);
2756	proc_ucred_unlock(p);
2757
2758	/ drop additional reference that was taken on the previous cred /
2759	kauth_cred_unref(&my_cred);
2760	}
2761
2762	#if CONFIG_PERSONAS
2763	if (spawn_no_exec && imgp->ip_px_persona != NULL) {
2764	/*
2765	* If we were asked to spawn a process into a new persona,
2766	* do the credential switch now (which may override the UID/GID
2767	* inherit done just above). It's important to do this switch
2768	* before image activation both for reasons stated above, and
2769	* to ensure that the new persona has access to the image/file
2770	* being executed.
2771	*/
2772	error = spawn_persona_adopt(p, imgp->ip_px_persona);
2773	if (error != `0`)
2774	goto bad;
2775	}
2776	#endif /* CONFIG_PERSONAS */
2777	#if !SECURE_KERNEL
2778	/*
2779	* Disable ASLR for the spawned process.
2780	*
2781	* But only do so if we are not embedded + RELEASE.
2782	* While embedded allows for a boot-arg (-disable_aslr)
2783	* to deal with this (which itself is only honored on
2784	* DEVELOPMENT or DEBUG builds of xnu), it is often
2785	* useful or necessary to disable ASLR on a per-process
2786	* basis for unit testing and debugging.
2787	*/
2788	if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2789	OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2790	#endif /* !SECURE_KERNEL */
2791
2792	/ Randomize high bits of ASLR slide /
2793	if (px_sa.psa_flags & _POSIX_SPAWN_HIGH_BITS_ASLR)
2794	imgp->ip_flags \|= IMGPF_HIGH_BITS_ASLR;
2795
2796	/*
2797	* Forcibly disallow execution from data pages for the spawned process
2798	* even if it would otherwise be permitted by the architecture default.
2799	*/
2800	if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2801	imgp->ip_flags \|= IMGPF_ALLOW_DATA_EXEC;
2802	}
2803
2804	/*
2805	* Disable ASLR during image activation. This occurs either if the
2806	* _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2807	* P_DISABLE_ASLR was inherited from the parent process.
2808	*/
2809	if (p->p_flag & P_DISABLE_ASLR)
2810	imgp->ip_flags \|= IMGPF_DISABLE_ASLR;
2811
2812	/*
2813	* Clear transition flag so we won't hang if exec_activate_image() causes
2814	* an automount (and launchd does a proc sysctl to service it).
2815	*
2816	* <rdar://problem/6848672>, <rdar://problem/5959568>.
2817	*/
2818	if (spawn_no_exec) {
2819	proc_transend(p, `0`);
2820	proc_transit_set = `0`;
2821	}
2822
2823	#if MAC_SPAWN /* XXX */
2824	if (uap->mac_p != USER_ADDR_NULL) {
2825	error = mac_execve_enter(uap->mac_p, imgp);
2826	if (error)
2827	goto bad;
2828	}
2829	#endif
2830
2831	/*
2832	* Activate the image
2833	*/
2834	error = exec_activate_image(imgp);
2835
2836	if (error == `0` && !spawn_no_exec) {
2837	p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread);
2838	/ proc ref returned /
2839	should_release_proc_ref = TRUE;
2840
2841	/*
2842	* Need to transfer pending watch port boosts to the new task while still making
2843	* sure that the old task remains in the importance linkage. Create an importance
2844	* linkage from old task to new task, then switch the task importance base
2845	* of old task and new task. After the switch the port watch boost will be
2846	* boosting the new task and new task will be donating importance to old task.
2847	*/
2848	inherit = ipc_importance_exec_switch_task(old_task, new_task);
2849	}
2850
2851	if (error == `0`) {
2852	/ process completed the exec /
2853	exec_done = TRUE;
2854	} else if (error == -`1`) {
2855	/ Image not claimed by any activator? /
2856	error = ENOEXEC;
2857	}
2858
2859	/*
2860	* If we have a spawn attr, and it contains signal related flags,
2861	* the we need to process them in the "context" of the new child
2862	* process, so we have to process it following image activation,
2863	* prior to making the thread runnable in user space. This is
2864	* necessitated by some signal information being per-thread rather
2865	* than per-process, and we don't have the new allocation in hand
2866	* until after the image is activated.
2867	*/
2868	if (!error && imgp->ip_px_sa != NULL) {
2869	thread_t child_thread = imgp->ip_new_thread;
2870	uthread_t child_uthread = get_bsdthread_info(child_thread);
2871
2872	/*
2873	* Mask a list of signals, instead of them being unmasked, if
2874	* they were unmasked in the parent; note that some signals
2875	* are not maskable.
2876	*/
2877	if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2878	child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2879	/*
2880	* Default a list of signals instead of ignoring them, if
2881	* they were ignored in the parent. Note that we pass
2882	* spawn_no_exec to setsigvec() to indicate that we called
2883	* fork1() and therefore do not need to call proc_signalstart()
2884	* internally.
2885	*/
2886	if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2887	vec.sa_handler = SIG_DFL;
2888	vec.sa_tramp = `0`;
2889	vec.sa_mask = `0`;
2890	vec.sa_flags = `0`;
2891	for (sig = `1`; sig < NSIG; sig++)
2892	if (px_sa.psa_sigdefault & (`1` << (sig-`1`))) {
2893	error = setsigvec(p, child_thread, sig, &vec, spawn_no_exec);
2894	}
2895	}
2896
2897	/*
2898	* Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2899	* usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2900	* limit.
2901	*
2902	* Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2903	*/
2904	if (px_sa.psa_cpumonitor_percent != `0`) {
2905	/*
2906	* Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2907	* an entitlement to configure the monitor a certain way seems silly, since
2908	* whomever is turning it on could just as easily choose not to do so.
2909	*/
2910	error = proc_set_task_ruse_cpu(p->task,
2911	TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2912	px_sa.psa_cpumonitor_percent,
2913	px_sa.psa_cpumonitor_interval * NSEC_PER_SEC,
2914	`0`, TRUE);
2915	}
2916	}
2917
2918	bad:
2919
2920	if (error == `0`) {
2921	/ reset delay idle sleep status if set /
2922	#if !CONFIG_EMBEDDED
2923	if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2924	OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2925	#endif /* !CONFIG_EMBEDDED */
2926	/ upon successful spawn, re/set the proc control state /
2927	if (imgp->ip_px_sa != NULL) {
2928	switch (px_sa.psa_pcontrol) {
2929	case POSIX_SPAWN_PCONTROL_THROTTLE:
2930	p->p_pcaction = P_PCTHROTTLE;
2931	break;
2932	case POSIX_SPAWN_PCONTROL_SUSPEND:
2933	p->p_pcaction = P_PCSUSP;
2934	break;
2935	case POSIX_SPAWN_PCONTROL_KILL:
2936	p->p_pcaction = P_PCKILL;
2937	break;
2938	case POSIX_SPAWN_PCONTROL_NONE:
2939	default:
2940	p->p_pcaction = `0`;
2941	break;
2942	};
2943	}
2944	exec_resettextvp(p, imgp);
2945
2946	#if CONFIG_MEMORYSTATUS
2947	/ Has jetsam attributes? /
2948	if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2949	/*
2950	* With 2-level high-water-mark support, POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is no
2951	* longer relevant, as background limits are described via the inactive limit slots.
2952	*
2953	* That said, however, if the POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND is passed in,
2954	* we attempt to mimic previous behavior by forcing the BG limit data into the
2955	* inactive/non-fatal mode and force the active slots to hold system_wide/fatal mode.
2956	*/
2957	if (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND) {
2958	memorystatus_update(p, px_sa.psa_priority, `0`,
2959	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2960	TRUE,
2961	-`1`, TRUE,
2962	px_sa.psa_memlimit_inactive, FALSE);
2963	} else {
2964	memorystatus_update(p, px_sa.psa_priority, `0`,
2965	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2966	TRUE,
2967	px_sa.psa_memlimit_active,
2968	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_ACTIVE_FATAL),
2969	px_sa.psa_memlimit_inactive,
2970	(px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_INACTIVE_FATAL));
2971	}
2972
2973	}
2974	#endif /* CONFIG_MEMORYSTATUS */
2975	if (imgp->ip_px_sa != NULL && px_sa.psa_thread_limit > `0`) {
2976	task_set_thread_limit(new_task, (uint16_t)px_sa.psa_thread_limit);
2977	}
2978	}
2979
2980	/*
2981	* If we successfully called fork1(), we always need to do this;
2982	* we identify this case by noting the IMGPF_SPAWN flag. This is
2983	* because we come back from that call with signals blocked in the
2984	* child, and we have to unblock them, but we want to wait until
2985	* after we've performed any spawn actions. This has to happen
2986	* before check_for_signature(), which uses psignal.
2987	*/
2988	if (spawn_no_exec) {
2989	if (proc_transit_set)
2990	proc_transend(p, `0`);
2991
2992	/*
2993	* Drop the signal lock on the child which was taken on our
2994	* behalf by forkproc()/cloneproc() to prevent signals being
2995	* received by the child in a partially constructed state.
2996	*/
2997	proc_signalend(p, `0`);
2998
2999	/ flag the 'fork' has occurred /
3000	proc_knote(p->p_pptr, NOTE_FORK \| p->p_pid);
3001	}
3002
3003	/ flag exec has occurred, notify only if it has not failed due to FP Key error /
3004	if (!error && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == `0`))
3005	proc_knote(p, NOTE_EXEC);
3006
3007
3008	if (error == `0`) {
3009	/*
3010	* We need to initialize the bank context behind the protection of
3011	* the proc_trans lock to prevent a race with exit. We can't do this during
3012	* exec_activate_image because task_bank_init checks entitlements that
3013	* aren't loaded until subsequent calls (including exec_resettextvp).
3014	*/
3015	error = proc_transstart(p, `0`, `0`);
3016
3017	if (error == `0`) {
3018	task_bank_init(new_task);
3019	proc_transend(p, `0`);
3020	}
3021	}
3022
3023	/ Inherit task role from old task to new task for exec /
3024	if (error == `0` && !spawn_no_exec) {
3025	proc_inherit_task_role(new_task, old_task);
3026	}
3027
3028	/*
3029	* Apply the spawnattr policy, apptype (which primes the task for importance donation),
3030	* and bind any portwatch ports to the new task.
3031	* This must be done after the exec so that the child's thread is ready,
3032	* and after the in transit state has been released, because priority is
3033	* dropped here so we need to be prepared for a potentially long preemption interval
3034	*
3035	* TODO: Consider splitting this up into separate phases
3036	*/
3037	if (error == `0` && imgp->ip_px_sa != NULL) {
3038	struct _posix_spawnattr psa = (struct* _posix_spawnattr *) imgp->ip_px_sa;
3039
3040	exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, psa->psa_darwin_role,
3041	portwatch_ports, portwatch_count);
3042	}
3043
3044	/*
3045	* Apply the requested maximum address.
3046	*/
3047	if (error == `0` && imgp->ip_px_sa != NULL) {
3048	struct _posix_spawnattr psa = (struct* _posix_spawnattr *) imgp->ip_px_sa;
3049
3050	if (psa->psa_max_addr) {
3051	vm_map_set_max_addr(get_task_map(new_task), psa->psa_max_addr);
3052	}
3053	}
3054
3055	if (error == `0`) {
3056	/ Apply the main thread qos /
3057	thread_t main_thread = imgp->ip_new_thread;
3058	task_set_main_thread_qos(new_task, main_thread);
3059
3060	#if CONFIG_MACF
3061	/*
3062	* Processes with the MAP_JIT entitlement are permitted to have
3063	* a jumbo-size map.
3064	*/
3065	if (mac_proc_check_map_anon(p, `0`, `0`, `0`, MAP_JIT, NULL) == `0`) {
3066	vm_map_set_jumbo(get_task_map(new_task));
3067	}
3068	#endif /* CONFIG_MACF */
3069	}
3070
3071	/*
3072	* Release any ports we kept around for binding to the new task
3073	* We need to release the rights even if the posix_spawn has failed.
3074	*/
3075	if (portwatch_ports != NULL) {
3076	for (int i = `0`; i < portwatch_count; i++) {
3077	ipc_port_t port = NULL;
3078	if ((port = portwatch_ports[i]) != NULL) {
3079	ipc_port_release_send(port);
3080	}
3081	}
3082	FREE(portwatch_ports, M_TEMP);
3083	portwatch_ports = NULL;
3084	portwatch_count = `0`;
3085	}
3086
3087	/*
3088	* We have to delay operations which might throw a signal until after
3089	* the signals have been unblocked; however, we want that to happen
3090	* after exec_resettextvp() so that the textvp is correct when they
3091	* fire.
3092	*/
3093	if (error == `0`) {
3094	error = check_for_signature(p, imgp);
3095
3096	/*
3097	* Pay for our earlier safety; deliver the delayed signals from
3098	* the incomplete spawn process now that it's complete.
3099	*/
3100	if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
3101	psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
3102	}
3103
3104	if (error == `0` && !spawn_no_exec)
3105	KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3106	p->p_pid);
3107	}
3108
3109
3110	if (imgp != NULL) {
3111	if (imgp->ip_vp)
3112	vnode_put(imgp->ip_vp);
3113	if (imgp->ip_scriptvp)
3114	vnode_put(imgp->ip_scriptvp);
3115	if (imgp->ip_strings)
3116	execargs_free(imgp);
3117	if (imgp->ip_px_sfa != NULL)
3118	FREE(imgp->ip_px_sfa, M_TEMP);
3119	if (imgp->ip_px_spa != NULL)
3120	FREE(imgp->ip_px_spa, M_TEMP);
3121	#if CONFIG_PERSONAS
3122	if (imgp->ip_px_persona != NULL)
3123	FREE(imgp->ip_px_persona, M_TEMP);
3124	#endif
3125	#if CONFIG_MACF
3126	if (imgp->ip_px_smpx != NULL)
3127	spawn_free_macpolicyinfo(imgp->ip_px_smpx);
3128	if (imgp->ip_execlabelp)
3129	mac_cred_label_free(imgp->ip_execlabelp);
3130	if (imgp->ip_scriptlabelp)
3131	mac_vnode_label_free(imgp->ip_scriptlabelp);
3132	if (imgp->ip_cs_error != OS_REASON_NULL) {
3133	os_reason_free(imgp->ip_cs_error);
3134	imgp->ip_cs_error = OS_REASON_NULL;
3135	}
3136	#endif
3137	}
3138
3139	#if CONFIG_DTRACE
3140	if (spawn_no_exec) {
3141	/*
3142	* In the original DTrace reference implementation,
3143	* posix_spawn() was a libc routine that just
3144	* did vfork(2) then exec(2). Thus the proc::: probes
3145	* are very fork/exec oriented. The details of this
3146	* in-kernel implementation of posix_spawn() is different
3147	* (while producing the same process-observable effects)
3148	* particularly w.r.t. errors, and which thread/process
3149	* is constructing what on behalf of whom.
3150	*/
3151	if (error) {
3152	DTRACE_PROC1(spawn__failure, int, error);
3153	} else {
3154	DTRACE_PROC(spawn__success);
3155	/*
3156	* Some DTrace scripts, e.g. newproc.d in
3157	* /usr/bin, rely on the the 'exec-success'
3158	* probe being fired in the child after the
3159	* new process image has been constructed
3160	* in order to determine the associated pid.
3161	*
3162	* So, even though the parent built the image
3163	* here, for compatibility, mark the new thread
3164	* so 'exec-success' fires on it as it leaves
3165	* the kernel.
3166	*/
3167	dtrace_thread_didexec(imgp->ip_new_thread);
3168	}
3169	} else {
3170	if (error) {
3171	DTRACE_PROC1(exec__failure, int, error);
3172	} else {
3173	dtrace_thread_didexec(imgp->ip_new_thread);
3174	}
3175	}
3176
3177	if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) {
3178	(*dtrace_proc_waitfor_hook)(p);
3179	}
3180	#endif
3181
3182	#if CONFIG_AUDIT
3183	if (!error && AUDIT_ENABLED() && p) {
3184	/ Add the CDHash of the new process to the audit record /
3185	uint8_t *cdhash = cs_get_cdhash(p);
3186	if (cdhash) {
3187	AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
3188	}
3189	}
3190	#endif
3191
3192	/*
3193	* clear bsd_info from old task if it did exec.
3194	*/
3195	if (task_did_exec(old_task)) {
3196	set_bsdtask_info(old_task, NULL);
3197	}
3198
3199	/ clear bsd_info from new task and terminate it if exec failed /
3200	if (new_task != NULL && task_is_exec_copy(new_task)) {
3201	set_bsdtask_info(new_task, NULL);
3202	task_terminate_internal(new_task);
3203	}
3204
3205	/ Return to both the parent and the child? /
3206	if (imgp != NULL && spawn_no_exec) {
3207	/*
3208	* If the parent wants the pid, copy it out
3209	*/
3210	if (pid != USER_ADDR_NULL)
3211	(void)suword(pid, p->p_pid);
3212	retval[`0`] = error;
3213
3214	/*
3215	* If we had an error, perform an internal reap ; this is
3216	* entirely safe, as we have a real process backing us.
3217	*/
3218	if (error) {
3219	proc_list_lock();
3220	p->p_listflag \|= P_LIST_DEADPARENT;
3221	proc_list_unlock();
3222	proc_lock(p);
3223	/ make sure no one else has killed it off... /
3224	if (p->p_stat != SZOMB && p->exit_thread == NULL) {
3225	p->exit_thread = current_thread();
3226	proc_unlock(p);
3227	exit1(p, `1`, (int *)NULL);
3228	} else {
3229	/ someone is doing it for us; just skip it /
3230	proc_unlock(p);
3231	}
3232	}
3233	}
3234
3235	/*
3236	* Do not terminate the current task, if proc_exec_switch_task did not
3237	* switch the tasks, terminating the current task without the switch would
3238	* result in loosing the SIGKILL status.
3239	*/
3240	if (task_did_exec(old_task)) {
3241	/ Terminate the current task, since exec will start in new task /
3242	task_terminate_internal(old_task);
3243	}
3244
3245	/ Release the thread ref returned by fork_create_child/fork1 /
3246	if (imgp != NULL && imgp->ip_new_thread) {
3247	/ wake up the new thread /
3248	task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3249	thread_deallocate(imgp->ip_new_thread);
3250	imgp->ip_new_thread = NULL;
3251	}
3252
3253	/ Release the ref returned by fork_create_child/fork1 /
3254	if (new_task) {
3255	task_deallocate(new_task);
3256	new_task = NULL;
3257	}
3258
3259	if (should_release_proc_ref) {
3260	proc_rele(p);
3261	}
3262
3263	if (bufp != NULL) {
3264	FREE(bufp, M_TEMP);
3265	}
3266
3267	if (inherit != NULL) {
3268	ipc_importance_release(inherit);
3269	}
3270
3271	return(error);
3272	}
3273
3274	/*
3275	* proc_exec_switch_task
3276	*
3277	* Parameters: p proc
3278	* old_task task before exec
3279	* new_task task after exec
3280	* new_thread thread in new task
3281	*
3282	* Returns: proc.
3283	*
3284	* Note: The function will switch the task pointer of proc
3285	* from old task to new task. The switch needs to happen
3286	* after draining all proc refs and inside a proc translock.
3287	* In the case of failure to switch the task, which might happen
3288	* if the process received a SIGKILL or jetsam killed it, it will make
3289	* sure that the new tasks terminates. User proc ref returned
3290	* to caller.
3291	*
3292	* This function is called after point of no return, in the case
3293	* failure to switch, it will terminate the new task and swallow the
3294	* error and let the terminated process complete exec and die.
3295	*/
3296	proc_t
3297	proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread)
3298	{
3299	int error = `0`;
3300	boolean_t task_active;
3301	boolean_t proc_active;
3302	boolean_t thread_active;
3303	thread_t old_thread = current_thread();
3304
3305	/*
3306	* Switch the task pointer of proc to new task.
3307	* Before switching the task, wait for proc_refdrain.
3308	* After the switch happens, the proc can disappear,
3309	* take a ref before it disappears. Waiting for
3310	* proc_refdrain in exec will block all other threads
3311	* trying to take a proc ref, boost the current thread
3312	* to avoid priority inversion.
3313	*/
3314	thread_set_exec_promotion(old_thread);
3315	p = proc_refdrain_with_refwait(p, TRUE);
3316	/ extra proc ref returned to the caller /
3317
3318	assert(get_threadtask(new_thread) == new_task);
3319	task_active = task_is_active(new_task);
3320
3321	/ Take the proc_translock to change the task ptr /
3322	proc_lock(p);
3323	proc_active = !(p->p_lflag & P_LEXIT);
3324
3325	/ Check if the current thread is not aborted due to SIGKILL /
3326	thread_active = thread_is_active(old_thread);
3327
3328	/*
3329	* Do not switch the task if the new task or proc is already terminated
3330	* as a result of error in exec past point of no return
3331	*/
3332	if (proc_active && task_active && thread_active) {
3333	error = proc_transstart(p, `1`, `0`);
3334	if (error == `0`) {
3335	uthread_t new_uthread = get_bsdthread_info(new_thread);
3336	uthread_t old_uthread = get_bsdthread_info(current_thread());
3337
3338	/*
3339	* bsd_info of old_task will get cleared in execve and posix_spawn
3340	* after firing exec-success/error dtrace probe.
3341	*/
3342	p->task = new_task;
3343
3344	/ Clear dispatchqueue and workloop ast offset /
3345	p->p_dispatchqueue_offset = `0`;
3346	p->p_dispatchqueue_serialno_offset = `0`;
3347	p->p_return_to_kernel_offset = `0`;
3348
3349	/ Copy the signal state, dtrace state and set bsd ast on new thread /
3350	act_set_astbsd(new_thread);
3351	new_uthread->uu_siglist = old_uthread->uu_siglist;
3352	new_uthread->uu_sigwait = old_uthread->uu_sigwait;
3353	new_uthread->uu_sigmask = old_uthread->uu_sigmask;
3354	new_uthread->uu_oldmask = old_uthread->uu_oldmask;
3355	new_uthread->uu_vforkmask = old_uthread->uu_vforkmask;
3356	new_uthread->uu_exit_reason = old_uthread->uu_exit_reason;
3357	#if CONFIG_DTRACE
3358	new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig;
3359	new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop;
3360	new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid;
3361	assert(new_uthread->t_dtrace_scratch == NULL);
3362	new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch;
3363
3364	old_uthread->t_dtrace_sig = `0`;
3365	old_uthread->t_dtrace_stop = `0`;
3366	old_uthread->t_dtrace_resumepid = `0`;
3367	old_uthread->t_dtrace_scratch = NULL;
3368	#endif
3369	/ Copy the resource accounting info /
3370	thread_copy_resource_info(new_thread, current_thread());
3371
3372	/ Clear the exit reason and signal state on old thread /
3373	old_uthread->uu_exit_reason = NULL;
3374	old_uthread->uu_siglist = `0`;
3375
3376	/ Add the new uthread to proc uthlist and remove the old one /
3377	TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list);
3378	TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list);
3379
3380	task_set_did_exec_flag(old_task);
3381	task_clear_exec_copy_flag(new_task);
3382
3383	task_copy_fields_for_exec(new_task, old_task);
3384
3385	proc_transend(p, `1`);
3386	}
3387	}
3388
3389	proc_unlock(p);
3390	proc_refwake(p);
3391	thread_clear_exec_promotion(old_thread);
3392
3393	if (error != `0` \|\| !task_active \|\| !proc_active \|\| !thread_active) {
3394	task_terminate_internal(new_task);
3395	}
3396
3397	return p;
3398	}
3399
3400	/*
3401	* execve
3402	*
3403	* Parameters: uap->fname File name to exec
3404	* uap->argp Argument list
3405	* uap->envp Environment list
3406	*
3407	* Returns: 0 Success
3408	* __mac_execve:EINVAL Invalid argument
3409	* __mac_execve:ENOTSUP Invalid argument
3410	* __mac_execve:EACCES Permission denied
3411	* __mac_execve:EINTR Interrupted function
3412	* __mac_execve:ENOMEM Not enough space
3413	* __mac_execve:EFAULT Bad address
3414	* __mac_execve:ENAMETOOLONG Filename too long
3415	* __mac_execve:ENOEXEC Executable file format error
3416	* __mac_execve:ETXTBSY Text file busy [misuse of error code]
3417	* __mac_execve:???
3418	*
3419	* TODO: Dynamic linker header address on stack is copied via suword()
3420	*/
3421	/ ARGSUSED /
3422	int
3423	execve(proc_t p, struct execve_args uap, int32_t retval)
3424	{
3425	struct __mac_execve_args muap;
3426	int err;
3427
3428	memoryshot(VM_EXECVE, DBG_FUNC_NONE);
3429
3430	muap.fname = uap->fname;
3431	muap.argp = uap->argp;
3432	muap.envp = uap->envp;
3433	muap.mac_p = USER_ADDR_NULL;
3434	err = __mac_execve(p, &muap, retval);
3435
3436	return(err);
3437	}
3438
3439	/*
3440	* __mac_execve
3441	*
3442	* Parameters: uap->fname File name to exec
3443	* uap->argp Argument list
3444	* uap->envp Environment list
3445	* uap->mac_p MAC label supplied by caller
3446	*
3447	* Returns: 0 Success
3448	* EINVAL Invalid argument
3449	* ENOTSUP Not supported
3450	* ENOEXEC Executable file format error
3451	* exec_activate_image:EINVAL Invalid argument
3452	* exec_activate_image:EACCES Permission denied
3453	* exec_activate_image:EINTR Interrupted function
3454	* exec_activate_image:ENOMEM Not enough space
3455	* exec_activate_image:EFAULT Bad address
3456	* exec_activate_image:ENAMETOOLONG Filename too long
3457	* exec_activate_image:ENOEXEC Executable file format error
3458	* exec_activate_image:ETXTBSY Text file busy [misuse of error code]
3459	* exec_activate_image:EBADEXEC The executable is corrupt/unknown
3460	* exec_activate_image:???
3461	* mac_execve_enter:???
3462	*
3463	* TODO: Dynamic linker header address on stack is copied via suword()
3464	*/
3465	int
3466	__mac_execve(proc_t p, struct __mac_execve_args uap, int32_t retval)
3467	{
3468	char *bufp = NULL;
3469	struct image_params *imgp;
3470	struct vnode_attr *vap;
3471	struct vnode_attr *origvap;
3472	int error;
3473	int is_64 = IS_64BIT_PROCESS(p);
3474	struct vfs_context context;
3475	struct uthread *uthread;
3476	task_t old_task = current_task();
3477	task_t new_task = NULL;
3478	boolean_t should_release_proc_ref = FALSE;
3479	boolean_t exec_done = FALSE;
3480	boolean_t in_vfexec = FALSE;
3481	void *inherit = NULL;
3482
3483	context.vc_thread = current_thread();
3484	context.vc_ucred = kauth_cred_proc_ref(p); / XXX must NOT be kauth_cred_get() /
3485
3486	/ Allocate a big chunk for locals instead of using stack since these*
3487	* structures a pretty big.
3488	*/
3489	MALLOC(bufp, char , (sizeof(imgp) + sizeof(vap) + sizeof(origvap)), M_TEMP, M_WAITOK \| M_ZERO);
3490	imgp = (struct image_params *) bufp;
3491	if (bufp == NULL) {
3492	error = ENOMEM;
3493	goto exit_with_error;
3494	}
3495	vap = (struct vnode_attr ) (bufp + sizeof(imgp));
3496	origvap = (struct vnode_attr ) (bufp + sizeof(imgp) + sizeof(*vap));
3497
3498	/ Initialize the common data in the image_params structure /
3499	imgp->ip_user_fname = uap->fname;
3500	imgp->ip_user_argv = uap->argp;
3501	imgp->ip_user_envv = uap->envp;
3502	imgp->ip_vattr = vap;
3503	imgp->ip_origvattr = origvap;
3504	imgp->ip_vfs_context = &context;
3505	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT_ADDR : IMGPF_NONE) \| ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
3506	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
3507	imgp->ip_mac_return = `0`;
3508	imgp->ip_cs_error = OS_REASON_NULL;
3509
3510	#if CONFIG_MACF
3511	if (uap->mac_p != USER_ADDR_NULL) {
3512	error = mac_execve_enter(uap->mac_p, imgp);
3513	if (error) {
3514	kauth_cred_unref(&context.vc_ucred);
3515	goto exit_with_error;
3516	}
3517	}
3518	#endif
3519	uthread = get_bsdthread_info(current_thread());
3520	if (uthread->uu_flag & UT_VFORK) {
3521	imgp->ip_flags \|= IMGPF_VFORK_EXEC;
3522	in_vfexec = TRUE;
3523	} else {
3524	imgp->ip_flags \|= IMGPF_EXEC;
3525
3526	/*
3527	* For execve case, create a new task and thread
3528	* which points to current_proc. The current_proc will point
3529	* to the new task after image activation and proc ref drain.
3530	*
3531	* proc (current_proc) <----- old_task (current_task)
3532	* ^ \| ^
3533	* \| \| \|
3534	* \| ----------------------------------
3535	* \|
3536	* --------- new_task (task marked as TF_EXEC_COPY)
3537	*
3538	* After image activation, the proc will point to the new task
3539	* and would look like following.
3540	*
3541	* proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC)
3542	* ^ \|
3543	* \| \|
3544	* \| ----------> new_task
3545	* \| \|
3546	* -----------------
3547	*
3548	* During exec any transition from new_task -> proc is fine, but don't allow
3549	* transition from proc->task, since it will modify old_task.
3550	*/
3551	imgp->ip_new_thread = fork_create_child(old_task,
3552	NULL,
3553	p,
3554	FALSE,
3555	p->p_flag & P_LP64,
3556	task_get_64bit_data(old_task),
3557	TRUE);
3558	/ task and thread ref returned by fork_create_child /
3559	if (imgp->ip_new_thread == NULL) {
3560	error = ENOMEM;
3561	goto exit_with_error;
3562	}
3563
3564	new_task = get_threadtask(imgp->ip_new_thread);
3565	context.vc_thread = imgp->ip_new_thread;
3566	}
3567
3568	error = exec_activate_image(imgp);
3569	/ thread and task ref returned for vfexec case /
3570
3571	if (imgp->ip_new_thread != NULL) {
3572	/*
3573	* task reference might be returned by exec_activate_image
3574	* for vfexec.
3575	*/
3576	new_task = get_threadtask(imgp->ip_new_thread);
3577	}
3578
3579	if (!error && !in_vfexec) {
3580	p = proc_exec_switch_task(p, old_task, new_task, imgp->ip_new_thread);
3581	/ proc ref returned /
3582	should_release_proc_ref = TRUE;
3583
3584	/*
3585	* Need to transfer pending watch port boosts to the new task while still making
3586	* sure that the old task remains in the importance linkage. Create an importance
3587	* linkage from old task to new task, then switch the task importance base
3588	* of old task and new task. After the switch the port watch boost will be
3589	* boosting the new task and new task will be donating importance to old task.
3590	*/
3591	inherit = ipc_importance_exec_switch_task(old_task, new_task);
3592	}
3593
3594	kauth_cred_unref(&context.vc_ucred);
3595
3596	/ Image not claimed by any activator? /
3597	if (error == -`1`)
3598	error = ENOEXEC;
3599
3600	if (!error) {
3601	exec_done = TRUE;
3602	assert(imgp->ip_new_thread != NULL);
3603
3604	exec_resettextvp(p, imgp);
3605	error = check_for_signature(p, imgp);
3606	}
3607
3608	/ flag exec has occurred, notify only if it has not failed due to FP Key error /
3609	if (exec_done && ((p->p_lflag & P_LTERM_DECRYPTFAIL) == `0`))
3610	proc_knote(p, NOTE_EXEC);
3611
3612	if (imgp->ip_vp != NULLVP)
3613	vnode_put(imgp->ip_vp);
3614	if (imgp->ip_scriptvp != NULLVP)
3615	vnode_put(imgp->ip_scriptvp);
3616	if (imgp->ip_strings)
3617	execargs_free(imgp);
3618	#if CONFIG_MACF
3619	if (imgp->ip_execlabelp)
3620	mac_cred_label_free(imgp->ip_execlabelp);
3621	if (imgp->ip_scriptlabelp)
3622	mac_vnode_label_free(imgp->ip_scriptlabelp);
3623	#endif
3624	if (imgp->ip_cs_error != OS_REASON_NULL) {
3625	os_reason_free(imgp->ip_cs_error);
3626	imgp->ip_cs_error = OS_REASON_NULL;
3627	}
3628
3629	if (!error) {
3630	/*
3631	* We need to initialize the bank context behind the protection of
3632	* the proc_trans lock to prevent a race with exit. We can't do this during
3633	* exec_activate_image because task_bank_init checks entitlements that
3634	* aren't loaded until subsequent calls (including exec_resettextvp).
3635	*/
3636	error = proc_transstart(p, `0`, `0`);
3637	}
3638
3639	if (!error) {
3640	task_bank_init(new_task);
3641	proc_transend(p, `0`);
3642
3643	/ Sever any extant thread affinity /
3644	thread_affinity_exec(current_thread());
3645
3646	/ Inherit task role from old task to new task for exec /
3647	if (!in_vfexec) {
3648	proc_inherit_task_role(new_task, old_task);
3649	}
3650
3651	thread_t main_thread = imgp->ip_new_thread;
3652
3653	task_set_main_thread_qos(new_task, main_thread);
3654
3655	#if CONFIG_MACF
3656	/*
3657	* Processes with the MAP_JIT entitlement are permitted to have
3658	* a jumbo-size map.
3659	*/
3660	if (mac_proc_check_map_anon(p, `0`, `0`, `0`, MAP_JIT, NULL) == `0`) {
3661	vm_map_set_jumbo(get_task_map(new_task));
3662	}
3663	#endif /* CONFIG_MACF */
3664
3665	if (vm_darkwake_mode == TRUE) {
3666	/*
3667	* This process is being launched when the system
3668	* is in darkwake. So mark it specially. This will
3669	* cause all its pages to be entered in the background Q.
3670	*/
3671	task_set_darkwake_mode(new_task, vm_darkwake_mode);
3672	}
3673
3674	#if CONFIG_DTRACE
3675	dtrace_thread_didexec(imgp->ip_new_thread);
3676
3677	if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL)
3678	(*dtrace_proc_waitfor_hook)(p);
3679	#endif
3680
3681	#if CONFIG_AUDIT
3682	if (!error && AUDIT_ENABLED() && p) {
3683	/ Add the CDHash of the new process to the audit record /
3684	uint8_t *cdhash = cs_get_cdhash(p);
3685	if (cdhash) {
3686	AUDIT_ARG(data, cdhash, sizeof(uint8_t), CS_CDHASH_LEN);
3687	}
3688	}
3689	#endif
3690
3691	if (in_vfexec) {
3692	vfork_return(p, retval, p->p_pid);
3693	}
3694	} else {
3695	DTRACE_PROC1(exec__failure, int, error);
3696	}
3697
3698	exit_with_error:
3699
3700	/*
3701	* clear bsd_info from old task if it did exec.
3702	*/
3703	if (task_did_exec(old_task)) {
3704	set_bsdtask_info(old_task, NULL);
3705	}
3706
3707	/ clear bsd_info from new task and terminate it if exec failed /
3708	if (new_task != NULL && task_is_exec_copy(new_task)) {
3709	set_bsdtask_info(new_task, NULL);
3710	task_terminate_internal(new_task);
3711	}
3712
3713	if (imgp != NULL) {
3714	/*
3715	* Do not terminate the current task, if proc_exec_switch_task did not
3716	* switch the tasks, terminating the current task without the switch would
3717	* result in loosing the SIGKILL status.
3718	*/
3719	if (task_did_exec(old_task)) {
3720	/ Terminate the current task, since exec will start in new task /
3721	task_terminate_internal(old_task);
3722	}
3723
3724	/ Release the thread ref returned by fork_create_child /
3725	if (imgp->ip_new_thread) {
3726	/ wake up the new exec thread /
3727	task_clear_return_wait(get_threadtask(imgp->ip_new_thread));
3728	thread_deallocate(imgp->ip_new_thread);
3729	imgp->ip_new_thread = NULL;
3730	}
3731	}
3732
3733	/ Release the ref returned by fork_create_child /
3734	if (new_task) {
3735	task_deallocate(new_task);
3736	new_task = NULL;
3737	}
3738
3739	if (should_release_proc_ref) {
3740	proc_rele(p);
3741	}
3742
3743	if (bufp != NULL) {
3744	FREE(bufp, M_TEMP);
3745	}
3746
3747	if (inherit != NULL) {
3748	ipc_importance_release(inherit);
3749	}
3750
3751	return(error);
3752	}
3753
3754
3755	/*
3756	* copyinptr
3757	*
3758	* Description: Copy a pointer in from user space to a user_addr_t in kernel
3759	* space, based on 32/64 bitness of the user space
3760	*
3761	* Parameters: froma User space address
3762	* toptr Address of kernel space user_addr_t
3763	* ptr_size 4/8, based on 'froma' address space
3764	*
3765	* Returns: 0 Success
3766	* EFAULT Bad 'froma'
3767	*
3768	* Implicit returns:
3769	* *ptr_size Modified
3770	*/
3771	static int
3772	copyinptr(user_addr_t froma, user_addr_t toptr, int* ptr_size)
3773	{
3774	int error;
3775
3776	if (ptr_size == `4`) {
3777	/ 64 bit value containing 32 bit address /
3778	unsigned int i;
3779
3780	error = copyin(froma, &i, `4`);
3781	toptr = CAST_USER_ADDR_T(i); /* SAFE /
3782	} else {
3783	error = copyin(froma, toptr, `8`);
3784	}
3785	return (error);
3786	}
3787
3788
3789	/*
3790	* copyoutptr
3791	*
3792	* Description: Copy a pointer out from a user_addr_t in kernel space to
3793	* user space, based on 32/64 bitness of the user space
3794	*
3795	* Parameters: ua User space address to copy to
3796	* ptr Address of kernel space user_addr_t
3797	* ptr_size 4/8, based on 'ua' address space
3798	*
3799	* Returns: 0 Success
3800	* EFAULT Bad 'ua'
3801	*
3802	*/
3803	static int
3804	copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
3805	{
3806	int error;
3807
3808	if (ptr_size == `4`) {
3809	/ 64 bit value containing 32 bit address /
3810	unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); / SAFE /
3811
3812	error = copyout(&i, ptr, `4`);
3813	} else {
3814	error = copyout(&ua, ptr, `8`);
3815	}
3816	return (error);
3817	}
3818
3819
3820	/*
3821	* exec_copyout_strings
3822	*
3823	* Copy out the strings segment to user space. The strings segment is put
3824	* on a preinitialized stack frame.
3825	*
3826	* Parameters: struct image_params * the image parameter block
3827	* int * a pointer to the stack offset variable
3828	*
3829	* Returns: 0 Success
3830	* !0 Faiure: errno
3831	*
3832	* Implicit returns:
3833	* (*stackp) The stack offset, modified
3834	*
3835	* Note: The strings segment layout is backward, from the beginning
3836	* of the top of the stack to consume the minimal amount of
3837	* space possible; the returned stack pointer points to the
3838	* end of the area consumed (stacks grow downward).
3839	*
3840	* argc is an int; arg[i] are pointers; env[i] are pointers;
3841	* the 0's are (void *)NULL's
3842	*
3843	* The stack frame layout is:
3844	*
3845	* +-------------+ <- p->user_stack
3846	* \| 16b \|
3847	* +-------------+
3848	* \| STRING AREA \|
3849	* \| : \|
3850	* \| : \|
3851	* \| : \|
3852	* +- -- -- -- --+
3853	* \| PATH AREA \|
3854	* +-------------+
3855	* \| 0 \|
3856	* +-------------+
3857	* \| applev[n] \|
3858	* +-------------+
3859	* :
3860	* :
3861	* +-------------+
3862	* \| applev[1] \|
3863	* +-------------+
3864	* \| exec_path / \|
3865	* \| applev[0] \|
3866	* +-------------+
3867	* \| 0 \|
3868	* +-------------+
3869	* \| env[n] \|
3870	* +-------------+
3871	* :
3872	* :
3873	* +-------------+
3874	* \| env[0] \|
3875	* +-------------+
3876	* \| 0 \|
3877	* +-------------+
3878	* \| arg[argc-1] \|
3879	* +-------------+
3880	* :
3881	* :
3882	* +-------------+
3883	* \| arg[0] \|
3884	* +-------------+
3885	* \| argc \|
3886	* sp-> +-------------+
3887	*
3888	* Although technically a part of the STRING AREA, we treat the PATH AREA as
3889	* a separate entity. This allows us to align the beginning of the PATH AREA
3890	* to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
3891	* which preceed it on the stack are properly aligned.
3892	*/
3893
3894	static int
3895	exec_copyout_strings(struct image_params imgp, user_addr_t stackp)
3896	{
3897	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3898	int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? `8` : `4`;
3899	int ptr_area_size;
3900	void ptr_buffer_start, ptr_buffer;
3901	int string_size;
3902
3903	user_addr_t string_area; / argv[], env[] /
3904	user_addr_t ptr_area; / argv[], env[], applev[] /
3905	user_addr_t argc_area; / argc /
3906	user_addr_t stack;
3907	int error;
3908
3909	unsigned i;
3910	struct copyout_desc {
3911	char *start_string;
3912	int count;
3913	#if CONFIG_DTRACE
3914	user_addr_t *dtrace_cookie;
3915	#endif
3916	boolean_t null_term;
3917	} descriptors[] = {
3918	{
3919	.start_string = imgp->ip_startargv,
3920	.count = imgp->ip_argc,
3921	#if CONFIG_DTRACE
3922	.dtrace_cookie = &p->p_dtrace_argv,
3923	#endif
3924	.null_term = TRUE
3925	},
3926	{
3927	.start_string = imgp->ip_endargv,
3928	.count = imgp->ip_envc,
3929	#if CONFIG_DTRACE
3930	.dtrace_cookie = &p->p_dtrace_envp,
3931	#endif
3932	.null_term = TRUE
3933	},
3934	{
3935	.start_string = imgp->ip_strings,
3936	.count = `1`,
3937	#if CONFIG_DTRACE
3938	.dtrace_cookie = NULL,
3939	#endif
3940	.null_term = FALSE
3941	},
3942	{
3943	.start_string = imgp->ip_endenvv,
3944	.count = imgp->ip_applec - `1`, / exec_path handled above /
3945	#if CONFIG_DTRACE
3946	.dtrace_cookie = NULL,
3947	#endif
3948	.null_term = TRUE
3949	}
3950	};
3951
3952	stack = *stackp;
3953
3954	/*
3955	* All previous contributors to the string area
3956	* should have aligned their sub-area
3957	*/
3958	if (imgp->ip_strspace % ptr_size != `0`) {
3959	error = EINVAL;
3960	goto bad;
3961	}
3962
3963	/ Grow the stack down for the strings we've been building up /
3964	string_size = imgp->ip_strendp - imgp->ip_strings;
3965	stack -= string_size;
3966	string_area = stack;
3967
3968	/*
3969	* Need room for one pointer for each string, plus
3970	* one for the NULLs terminating the argv, envv, and apple areas.
3971	*/
3972	ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + `3`) * ptr_size;
3973	stack -= ptr_area_size;
3974	ptr_area = stack;
3975
3976	/ We'll construct all the pointer arrays in our string buffer,*
3977	* which we already know is aligned properly, and ip_argspace
3978	* was used to verify we have enough space.
3979	*/
3980	ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
3981
3982	/*
3983	* Need room for pointer-aligned argc slot.
3984	*/
3985	stack -= ptr_size;
3986	argc_area = stack;
3987
3988	/*
3989	* Record the size of the arguments area so that sysctl_procargs()
3990	* can return the argument area without having to parse the arguments.
3991	*/
3992	proc_lock(p);
3993	p->p_argc = imgp->ip_argc;
3994	p->p_argslen = (int)(*stackp - string_area);
3995	proc_unlock(p);
3996
3997	/ Return the initial stack address: the location of argc /
3998	*stackp = stack;
3999
4000	/*
4001	* Copy out the entire strings area.
4002	*/
4003	error = copyout(imgp->ip_strings, string_area,
4004	string_size);
4005	if (error)
4006	goto bad;
4007
4008	for (i = `0`; i < sizeof(descriptors)/sizeof(descriptors[`0`]); i++) {
4009	char *cur_string = descriptors[i].start_string;
4010	int j;
4011
4012	#if CONFIG_DTRACE
4013	if (descriptors[i].dtrace_cookie) {
4014	proc_lock(p);
4015	descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience /
4016	proc_unlock(p);
4017	}
4018	#endif /* CONFIG_DTRACE */
4019
4020	/*
4021	* For each segment (argv, envv, applev), copy as many pointers as requested
4022	* to our pointer buffer.
4023	*/
4024	for (j = `0`; j < descriptors[i].count; j++) {
4025	user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
4026
4027	/ Copy out the pointer to the current string. Alignment has been verified /
4028	if (ptr_size == `8`) {
4029	(uint64_t )ptr_buffer = (uint64_t)cur_address;
4030	} else {
4031	(uint32_t )ptr_buffer = (uint32_t)cur_address;
4032	}
4033
4034	ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
4035	cur_string += strlen(cur_string) + `1`; / Only a NUL between strings in the same area /
4036	}
4037
4038	if (descriptors[i].null_term) {
4039	if (ptr_size == `8`) {
4040	(uint64_t )ptr_buffer = `0ULL`;
4041	} else {
4042	(uint32_t )ptr_buffer = `0`;
4043	}
4044
4045	ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
4046	}
4047	}
4048
4049	/*
4050	* Copy out all our pointer arrays in bulk.
4051	*/
4052	error = copyout(ptr_buffer_start, ptr_area,
4053	ptr_area_size);
4054	if (error)
4055	goto bad;
4056
4057	/ argc (int32, stored in a ptr_size area) /
4058	error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
4059	if (error)
4060	goto bad;
4061
4062	bad:
4063	return(error);
4064	}
4065
4066
4067	/*
4068	* exec_extract_strings
4069	*
4070	* Copy arguments and environment from user space into work area; we may
4071	* have already copied some early arguments into the work area, and if
4072	* so, any arguments opied in are appended to those already there.
4073	* This function is the primary manipulator of ip_argspace, since
4074	* these are the arguments the client of execve(2) knows about. After
4075	* each argv[]/envv[] string is copied, we charge the string length
4076	* and argv[]/envv[] pointer slot to ip_argspace, so that we can
4077	* full preflight the arg list size.
4078	*
4079	* Parameters: struct image_params * the image parameter block
4080	*
4081	* Returns: 0 Success
4082	* !0 Failure: errno
4083	*
4084	* Implicit returns;
4085	* (imgp->ip_argc) Count of arguments, updated
4086	* (imgp->ip_envc) Count of environment strings, updated
4087	* (imgp->ip_argspace) Count of remaining of NCARGS
4088	* (imgp->ip_interp_buffer) Interpreter and args (mutated in place)
4089	*
4090	*
4091	* Note: The argument and environment vectors are user space pointers
4092	* to arrays of user space pointers.
4093	*/
4094	static int
4095	exec_extract_strings(struct image_params *imgp)
4096	{
4097	int error = `0`;
4098	int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT_ADDR) ? `8` : `4`;
4099	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? `8` : `4`;
4100	user_addr_t argv = imgp->ip_user_argv;
4101	user_addr_t envv = imgp->ip_user_envv;
4102
4103	/*
4104	* Adjust space reserved for the path name by however much padding it
4105	* needs. Doing this here since we didn't know if this would be a 32-
4106	* or 64-bit process back in exec_save_path.
4107	*/
4108	while (imgp->ip_strspace % new_ptr_size != `0`) {
4109	*imgp->ip_strendp++ = `'\0'`;
4110	imgp->ip_strspace--;
4111	/ imgp->ip_argspace--; not counted towards exec args total /
4112	}
4113
4114	/*
4115	* From now on, we start attributing string space to ip_argspace
4116	*/
4117	imgp->ip_startargv = imgp->ip_strendp;
4118	imgp->ip_argc = `0`;
4119
4120	if((imgp->ip_flags & IMGPF_INTERPRET) != `0`) {
4121	user_addr_t arg;
4122	char argstart, ch;
4123
4124	/ First, the arguments in the "#!" string are tokenized and extracted. /
4125	argstart = imgp->ip_interp_buffer;
4126	while (argstart) {
4127	ch = argstart;
4128	while (ch && !IS_WHITESPACE(ch)) {
4129	ch++;
4130	}
4131
4132	if (*ch == `'\0'`) {
4133	/ last argument, no need to NUL-terminate /
4134	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
4135	argstart = NULL;
4136	} else {
4137	/ NUL-terminate /
4138	*ch = `'\0'`;
4139	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
4140
4141	/*
4142	* Find the next string. We know spaces at the end of the string have already
4143	* been stripped.
4144	*/
4145	argstart = ch + `1`;
4146	while (IS_WHITESPACE(*argstart)) {
4147	argstart++;
4148	}
4149	}
4150
4151	/ Error-check, regardless of whether this is the last interpreter arg or not /
4152	if (error)
4153	goto bad;
4154	if (imgp->ip_argspace < new_ptr_size) {
4155	error = E2BIG;
4156	goto bad;
4157	}
4158	imgp->ip_argspace -= new_ptr_size; / to hold argv[] entry /
4159	imgp->ip_argc++;
4160	}
4161
4162	if (argv != `0LL`) {
4163	/*
4164	* If we are running an interpreter, replace the av[0] that was
4165	* passed to execve() with the path name that was
4166	* passed to execve() for interpreters which do not use the PATH
4167	* to locate their script arguments.
4168	*/
4169	error = copyinptr(argv, &arg, ptr_size);
4170	if (error)
4171	goto bad;
4172	if (arg != `0LL`) {
4173	argv += ptr_size; / consume without using /
4174	}
4175	}
4176
4177	if (imgp->ip_interp_sugid_fd != -`1`) {
4178	char temp[`19`]; / "/dev/fd/" + 10 digits + NUL /
4179	snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
4180	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
4181	} else {
4182	error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
4183	}
4184
4185	if (error)
4186	goto bad;
4187	if (imgp->ip_argspace < new_ptr_size) {
4188	error = E2BIG;
4189	goto bad;
4190	}
4191	imgp->ip_argspace -= new_ptr_size; / to hold argv[] entry /
4192	imgp->ip_argc++;
4193	}
4194
4195	while (argv != `0LL`) {
4196	user_addr_t arg;
4197
4198	error = copyinptr(argv, &arg, ptr_size);
4199	if (error)
4200	goto bad;
4201
4202	if (arg == `0LL`) {
4203	break;
4204	}
4205
4206	argv += ptr_size;
4207
4208	/*
4209	* av[n...] = arg[n]
4210	*/
4211	error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
4212	if (error)
4213	goto bad;
4214	if (imgp->ip_argspace < new_ptr_size) {
4215	error = E2BIG;
4216	goto bad;
4217	}
4218	imgp->ip_argspace -= new_ptr_size; / to hold argv[] entry /
4219	imgp->ip_argc++;
4220	}
4221
4222	/ Save space for argv[] NULL terminator /
4223	if (imgp->ip_argspace < new_ptr_size) {
4224	error = E2BIG;
4225	goto bad;
4226	}
4227	imgp->ip_argspace -= new_ptr_size;
4228
4229	/ Note where the args ends and env begins. /
4230	imgp->ip_endargv = imgp->ip_strendp;
4231	imgp->ip_envc = `0`;
4232
4233	/ Now, get the environment /
4234	while (envv != `0LL`) {
4235	user_addr_t env;
4236
4237	error = copyinptr(envv, &env, ptr_size);
4238	if (error)
4239	goto bad;
4240
4241	envv += ptr_size;
4242	if (env == `0LL`) {
4243	break;
4244	}
4245	/*
4246	* av[n...] = env[n]
4247	*/
4248	error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
4249	if (error)
4250	goto bad;
4251	if (imgp->ip_argspace < new_ptr_size) {
4252	error = E2BIG;
4253	goto bad;
4254	}
4255	imgp->ip_argspace -= new_ptr_size; / to hold envv[] entry /
4256	imgp->ip_envc++;
4257	}
4258
4259	/ Save space for envv[] NULL terminator /
4260	if (imgp->ip_argspace < new_ptr_size) {
4261	error = E2BIG;
4262	goto bad;
4263	}
4264	imgp->ip_argspace -= new_ptr_size;
4265
4266	/ Align the tail of the combined argv+envv area /
4267	while (imgp->ip_strspace % new_ptr_size != `0`) {
4268	if (imgp->ip_argspace < `1`) {
4269	error = E2BIG;
4270	goto bad;
4271	}
4272	*imgp->ip_strendp++ = `'\0'`;
4273	imgp->ip_strspace--;
4274	imgp->ip_argspace--;
4275	}
4276
4277	/ Note where the envv ends and applev begins. /
4278	imgp->ip_endenvv = imgp->ip_strendp;
4279
4280	/*
4281	* From now on, we are no longer charging argument
4282	* space to ip_argspace.
4283	*/
4284
4285	bad:
4286	return error;
4287	}
4288
4289	/*
4290	* Libc has an 8-element array set up for stack guard values. It only fills
4291	* in one of those entries, and both gcc and llvm seem to use only a single
4292	* 8-byte guard. Until somebody needs more than an 8-byte guard value, don't
4293	* do the work to construct them.
4294	*/
4295	#define GUARD_VALUES 1
4296	#define GUARD_KEY "stack_guard="
4297
4298	/*
4299	* System malloc needs some entropy when it is initialized.
4300	*/
4301	#define ENTROPY_VALUES 2
4302	#define ENTROPY_KEY "malloc_entropy="
4303
4304	/*
4305	* libplatform needs a random pointer-obfuscation value when it is initialized.
4306	*/
4307	#define PTR_MUNGE_VALUES 1
4308	#define PTR_MUNGE_KEY "ptr_munge="
4309
4310	/*
4311	* System malloc engages nanozone for UIAPP.
4312	*/
4313	#define NANO_ENGAGE_KEY "MallocNanoZone=1"
4314
4315	#define PFZ_KEY "pfz="
4316	extern user32_addr_t commpage_text32_location;
4317	extern user64_addr_t commpage_text64_location;
4318
4319	#define MAIN_STACK_VALUES 4
4320	#define MAIN_STACK_KEY "main_stack="
4321
4322	#define FSID_KEY "executable_file="
4323	#define DYLD_FSID_KEY "dyld_file="
4324	#define CDHASH_KEY "executable_cdhash="
4325
4326	#define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
4327
4328	#define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
4329
4330	static int
4331	exec_add_entropy_key(struct image_params *imgp,
4332	const char *key,
4333	int values,
4334	boolean_t embedNUL)
4335	{
4336	const int limit = `8`;
4337	uint64_t entropy[limit];
4338	char str[strlen(key) + (HEX_STR_LEN + `1`) * limit + `1`];
4339	if (values > limit) {
4340	values = limit;
4341	}
4342
4343	read_random(entropy, sizeof(entropy[`0`]) * values);
4344
4345	if (embedNUL) {
4346	entropy[`0`] &= ~(`0xffull` << `8`);
4347	}
4348
4349	int len = snprintf(str, sizeof(str), "%s0x%llx", key, entropy[`0`]);
4350	int remaining = sizeof(str) - len;
4351	for (int i = `1`; i < values && remaining > `0`; ++i) {
4352	int start = sizeof(str) - remaining;
4353	len = snprintf(&str[start], remaining, ",0x%llx", entropy[i]);
4354	remaining -= len;
4355	}
4356
4357	return exec_add_user_string(imgp, CAST_USER_ADDR_T(str), UIO_SYSSPACE, FALSE);
4358	}
4359
4360	/*
4361	* Build up the contents of the apple[] string vector
4362	*/
4363	static int
4364	exec_add_apple_strings(struct image_params *imgp,
4365	const load_result_t *load_result)
4366	{
4367	int error;
4368	int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? `8` : `4`;
4369
4370	/ exec_save_path stored the first string /
4371	imgp->ip_applec = `1`;
4372
4373	/ adding the pfz string /
4374	{
4375	char pfz_string[strlen(PFZ_KEY) + HEX_STR_LEN + `1`];
4376
4377	if (img_ptr_size == `8`) {
4378	snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%llx", commpage_text64_location);
4379	} else {
4380	snprintf(pfz_string, sizeof(pfz_string), PFZ_KEY "0x%x", commpage_text32_location);
4381	}
4382	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string), UIO_SYSSPACE, FALSE);
4383	if (error) {
4384	goto bad;
4385	}
4386	imgp->ip_applec++;
4387	}
4388
4389	/ adding the NANO_ENGAGE_KEY key /
4390	if (imgp->ip_px_sa) {
4391	int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags);
4392
4393	if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) {
4394	const char *nano_string = NANO_ENGAGE_KEY;
4395	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(nano_string), UIO_SYSSPACE, FALSE);
4396	if (error){
4397	goto bad;
4398	}
4399	imgp->ip_applec++;
4400	}
4401	}
4402
4403	/*
4404	* Supply libc with a collection of random values to use when
4405	* implementing -fstack-protector.
4406	*
4407	* (The first random string always contains an embedded NUL so that
4408	* __stack_chk_guard also protects against C string vulnerabilities)
4409	*/
4410	error = exec_add_entropy_key(imgp, GUARD_KEY, GUARD_VALUES, TRUE);
4411	if (error) {
4412	goto bad;
4413	}
4414	imgp->ip_applec++;
4415
4416	/*
4417	* Supply libc with entropy for system malloc.
4418	*/
4419	error = exec_add_entropy_key(imgp, ENTROPY_KEY, ENTROPY_VALUES, FALSE);
4420	if (error) {
4421	goto bad;
4422	}
4423	imgp->ip_applec++;
4424
4425	/*
4426	* Supply libpthread & libplatform with a random value to use for pointer
4427	* obfuscation.
4428	*/
4429	error = exec_add_entropy_key(imgp, PTR_MUNGE_KEY, PTR_MUNGE_VALUES, FALSE);
4430	if (error) {
4431	goto bad;
4432	}
4433	imgp->ip_applec++;
4434
4435	/*
4436	* Add MAIN_STACK_KEY: Supplies the address and size of the main thread's
4437	* stack if it was allocated by the kernel.
4438	*
4439	* The guard page is not included in this stack size as libpthread
4440	* expects to add it back in after receiving this value.
4441	*/
4442	if (load_result->unixproc) {
4443	char stack_string[strlen(MAIN_STACK_KEY) + (HEX_STR_LEN + `1`) * MAIN_STACK_VALUES + `1`];
4444	snprintf(stack_string, sizeof(stack_string),
4445	MAIN_STACK_KEY "0x%llx,0x%llx,0x%llx,0x%llx",
4446	(uint64_t)load_result->user_stack,
4447	(uint64_t)load_result->user_stack_size,
4448	(uint64_t)load_result->user_stack_alloc,
4449	(uint64_t)load_result->user_stack_alloc_size);
4450	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(stack_string), UIO_SYSSPACE, FALSE);
4451	if (error) {
4452	goto bad;
4453	}
4454	imgp->ip_applec++;
4455	}
4456
4457	if (imgp->ip_vattr) {
4458	uint64_t fsid = get_va_fsid(imgp->ip_vattr);
4459	uint64_t fsobjid = imgp->ip_vattr->va_fileid;
4460
4461	char fsid_string[strlen(FSID_KEY) + strlen(FSID_MAX_STRING) + `1`];
4462	snprintf(fsid_string, sizeof(fsid_string),
4463	FSID_KEY "0x%llx,0x%llx", fsid, fsobjid);
4464	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
4465	if (error) {
4466	goto bad;
4467	}
4468	imgp->ip_applec++;
4469	}
4470
4471	if (imgp->ip_dyld_fsid \|\| imgp->ip_dyld_fsobjid ) {
4472	char fsid_string[strlen(DYLD_FSID_KEY) + strlen(FSID_MAX_STRING) + `1`];
4473	snprintf(fsid_string, sizeof(fsid_string),
4474	DYLD_FSID_KEY "0x%llx,0x%llx", imgp->ip_dyld_fsid, imgp->ip_dyld_fsobjid);
4475	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(fsid_string), UIO_SYSSPACE, FALSE);
4476	if (error) {
4477	goto bad;
4478	}
4479	imgp->ip_applec++;
4480	}
4481
4482	uint8_t cdhash[SHA1_RESULTLEN];
4483	int cdhash_errror = ubc_cs_getcdhash(imgp->ip_vp, imgp->ip_arch_offset, cdhash);
4484	if (cdhash_errror == `0`) {
4485	char hash_string[strlen(CDHASH_KEY) + `2`*SHA1_RESULTLEN + `1`];
4486	strncpy(hash_string, CDHASH_KEY, sizeof(hash_string));
4487	char p = hash_string + sizeof*(CDHASH_KEY) - `1`;
4488	for (int i = `0`; i < SHA1_RESULTLEN; i++) {
4489	snprintf(p, `3`, "%02x", (int) cdhash[i]);
4490	p += `2`;
4491	}
4492	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(hash_string), UIO_SYSSPACE, FALSE);
4493	if (error) {
4494	goto bad;
4495	}
4496	imgp->ip_applec++;
4497	}
4498
4499	/ Align the tail of the combined applev area /
4500	while (imgp->ip_strspace % img_ptr_size != `0`) {
4501	*imgp->ip_strendp++ = `'\0'`;
4502	imgp->ip_strspace--;
4503	}
4504
4505	bad:
4506	return error;
4507	}
4508
4509	#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
4510
4511	/*
4512	* exec_check_permissions
4513	*
4514	* Description: Verify that the file that is being attempted to be executed
4515	* is in fact allowed to be executed based on it POSIX file
4516	* permissions and other access control criteria
4517	*
4518	* Parameters: struct image_params * the image parameter block
4519	*
4520	* Returns: 0 Success
4521	* EACCES Permission denied
4522	* ENOEXEC Executable file format error
4523	* ETXTBSY Text file busy [misuse of error code]
4524	* vnode_getattr:???
4525	* vnode_authorize:???
4526	*/
4527	static int
4528	exec_check_permissions(struct image_params *imgp)
4529	{
4530	struct vnode *vp = imgp->ip_vp;
4531	struct vnode_attr *vap = imgp->ip_vattr;
4532	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4533	int error;
4534	kauth_action_t action;
4535
4536	/ Only allow execution of regular files /
4537	if (!vnode_isreg(vp))
4538	return (EACCES);
4539
4540	/ Get the file attributes that we will be using here and elsewhere /
4541	VATTR_INIT(vap);
4542	VATTR_WANTED(vap, va_uid);
4543	VATTR_WANTED(vap, va_gid);
4544	VATTR_WANTED(vap, va_mode);
4545	VATTR_WANTED(vap, va_fsid);
4546	VATTR_WANTED(vap, va_fsid64);
4547	VATTR_WANTED(vap, va_fileid);
4548	VATTR_WANTED(vap, va_data_size);
4549	if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != `0`)
4550	return (error);
4551
4552	/*
4553	* Ensure that at least one execute bit is on - otherwise root
4554	* will always succeed, and we don't want to happen unless the
4555	* file really is executable.
4556	*/
4557	if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR \| S_IXGRP \| S_IXOTH)) == `0`))
4558	return (EACCES);
4559
4560	/ Disallow zero length files /
4561	if (vap->va_data_size == `0`)
4562	return (ENOEXEC);
4563
4564	imgp->ip_arch_offset = (user_size_t)`0`;
4565	imgp->ip_arch_size = vap->va_data_size;
4566
4567	/ Disable setuid-ness for traced programs or if MNT_NOSUID /
4568	if ((vp->v_mount->mnt_flag & MNT_NOSUID) \|\| (p->p_lflag & P_LTRACED))
4569	vap->va_mode &= ~(VSUID \| VSGID);
4570
4571	/*
4572	* Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
4573	* flags for setuid/setgid binaries.
4574	*/
4575	if (vap->va_mode & (VSUID \| VSGID))
4576	imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC \| IMGPF_DISABLE_ASLR);
4577
4578	#if CONFIG_MACF
4579	error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
4580	if (error)
4581	return (error);
4582	#endif
4583
4584	/ Check for execute permission /
4585	action = KAUTH_VNODE_EXECUTE;
4586	/ Traced images must also be readable /
4587	if (p->p_lflag & P_LTRACED)
4588	action \|= KAUTH_VNODE_READ_DATA;
4589	if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != `0`)
4590	return (error);
4591
4592	#if 0
4593	/ Don't let it run if anyone had it open for writing /
4594	vnode_lock(vp);
4595	if (vp->v_writecount) {
4596	panic("going to return ETXTBSY %x", vp);
4597	vnode_unlock(vp);
4598	return (ETXTBSY);
4599	}
4600	vnode_unlock(vp);
4601	#endif
4602
4603
4604	/ XXX May want to indicate to underlying FS that vnode is open /
4605
4606	return (error);
4607	}
4608
4609
4610	/*
4611	* exec_handle_sugid
4612	*
4613	* Initially clear the P_SUGID in the process flags; if an SUGID process is
4614	* exec'ing a non-SUGID image, then this is the point of no return.
4615	*
4616	* If the image being activated is SUGID, then replace the credential with a
4617	* copy, disable tracing (unless the tracing process is root), reset the
4618	* mach task port to revoke it, set the P_SUGID bit,
4619	*
4620	* If the saved user and group ID will be changing, then make sure it happens
4621	* to a new credential, rather than a shared one.
4622	*
4623	* Set the security token (this is probably obsolete, given that the token
4624	* should not technically be separate from the credential itself).
4625	*
4626	* Parameters: struct image_params * the image parameter block
4627	*
4628	* Returns: void No failure indication
4629	*
4630	* Implicit returns:
4631	* <process credential> Potentially modified/replaced
4632	* <task port> Potentially revoked
4633	* <process flags> P_SUGID bit potentially modified
4634	* <security token> Potentially modified
4635	*/
4636	static int
4637	exec_handle_sugid(struct image_params *imgp)
4638	{
4639	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
4640	kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
4641	kauth_cred_t my_cred, my_new_cred;
4642	int i;
4643	int leave_sugid_clear = `0`;
4644	int mac_reset_ipc = `0`;
4645	int error = `0`;
4646	task_t task = NULL;
4647	#if CONFIG_MACF
4648	int mac_transition, disjoint_cred = `0`;
4649	int label_update_return = `0`;
4650
4651	/*
4652	* Determine whether a call to update the MAC label will result in the
4653	* credential changing.
4654	*
4655	* Note: MAC policies which do not actually end up modifying
4656	* the label subsequently are strongly encouraged to
4657	* return 0 for this check, since a non-zero answer will
4658	* slow down the exec fast path for normal binaries.
4659	*/
4660	mac_transition = mac_cred_check_label_update_execve(
4661	imgp->ip_vfs_context,
4662	imgp->ip_vp,
4663	imgp->ip_arch_offset,
4664	imgp->ip_scriptvp,
4665	imgp->ip_scriptlabelp,
4666	imgp->ip_execlabelp,
4667	p,
4668	imgp->ip_px_smpx);
4669	#endif
4670
4671	OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
4672
4673	/*
4674	* Order of the following is important; group checks must go last,
4675	* as we use the success of the 'ismember' check combined with the
4676	* failure of the explicit match to indicate that we will be setting
4677	* the egid of the process even though the new process did not
4678	* require VSUID/VSGID bits in order for it to set the new group as
4679	* its egid.
4680	*
4681	* Note: Technically, by this we are implying a call to
4682	* setegid() in the new process, rather than implying
4683	* it used its VSGID bit to set the effective group,
4684	* even though there is no code in that process to make
4685	* such a call.
4686	*/
4687	if (((imgp->ip_origvattr->va_mode & VSUID) != `0` &&
4688	kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) \|\|
4689	((imgp->ip_origvattr->va_mode & VSGID) != `0` &&
4690	((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) \|\| !leave_sugid_clear) \|\|
4691	(kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
4692
4693	#if CONFIG_MACF
4694	/ label for MAC transition and neither VSUID nor VSGID /
4695	handle_mac_transition:
4696	#endif
4697
4698	#if !SECURE_KERNEL
4699	/*
4700	* Replace the credential with a copy of itself if euid or
4701	* egid change.
4702	*
4703	* Note: setuid binaries will automatically opt out of
4704	* group resolver participation as a side effect
4705	* of this operation. This is an intentional
4706	* part of the security model, which requires a
4707	* participating credential be established by
4708	* escalating privilege, setting up all other
4709	* aspects of the credential including whether
4710	* or not to participate in external group
4711	* membership resolution, then dropping their
4712	* effective privilege to that of the desired
4713	* final credential state.
4714	*
4715	* Modifications to p_ucred must be guarded using the
4716	* proc's ucred lock. This prevents others from accessing
4717	* a garbage credential.
4718	*/
4719	while (imgp->ip_origvattr->va_mode & VSUID) {
4720	my_cred = kauth_cred_proc_ref(p);
4721	my_new_cred = kauth_cred_setresuid(my_cred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
4722
4723	if (my_new_cred == my_cred) {
4724	kauth_cred_unref(&my_cred);
4725	break;
4726	}
4727
4728	/ update cred on proc /
4729	proc_ucred_lock(p);
4730
4731	if (p->p_ucred != my_cred) {
4732	proc_ucred_unlock(p);
4733	kauth_cred_unref(&my_new_cred);
4734	continue;
4735	}
4736
4737	/ donate cred reference on my_new_cred to p->p_ucred /
4738	p->p_ucred = my_new_cred;
4739	PROC_UPDATE_CREDS_ONPROC(p);
4740	proc_ucred_unlock(p);
4741
4742	/ drop additional reference that was taken on the previous cred /
4743	kauth_cred_unref(&my_cred);
4744
4745	break;
4746	}
4747
4748	while (imgp->ip_origvattr->va_mode & VSGID) {
4749	my_cred = kauth_cred_proc_ref(p);
4750	my_new_cred = kauth_cred_setresgid(my_cred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
4751
4752	if (my_new_cred == my_cred) {
4753	kauth_cred_unref(&my_cred);
4754	break;
4755	}
4756
4757	/ update cred on proc /
4758	proc_ucred_lock(p);
4759
4760	if (p->p_ucred != my_cred) {
4761	proc_ucred_unlock(p);
4762	kauth_cred_unref(&my_new_cred);
4763	continue;
4764	}
4765
4766	/ donate cred reference on my_new_cred to p->p_ucred /
4767	p->p_ucred = my_new_cred;
4768	PROC_UPDATE_CREDS_ONPROC(p);
4769	proc_ucred_unlock(p);
4770
4771	/ drop additional reference that was taken on the previous cred /
4772	kauth_cred_unref(&my_cred);
4773
4774	break;
4775	}
4776	#endif /* !SECURE_KERNEL */
4777
4778	#if CONFIG_MACF
4779	/*
4780	* If a policy has indicated that it will transition the label,
4781	* before making the call into the MAC policies, get a new
4782	* duplicate credential, so they can modify it without
4783	* modifying any others sharing it.
4784	*/
4785	if (mac_transition) {
4786	/*
4787	* This hook may generate upcalls that require
4788	* importance donation from the kernel.
4789	* (23925818)
4790	*/
4791	thread_t thread = current_thread();
4792	thread_enable_send_importance(thread, TRUE);
4793	kauth_proc_label_update_execve(p,
4794	imgp->ip_vfs_context,
4795	imgp->ip_vp,
4796	imgp->ip_arch_offset,
4797	imgp->ip_scriptvp,
4798	imgp->ip_scriptlabelp,
4799	imgp->ip_execlabelp,
4800	&imgp->ip_csflags,
4801	imgp->ip_px_smpx,
4802	&disjoint_cred, / will be non zero if disjoint /
4803	&label_update_return);
4804	thread_enable_send_importance(thread, FALSE);
4805
4806	if (disjoint_cred) {
4807	/*
4808	* If updating the MAC label resulted in a
4809	* disjoint credential, flag that we need to
4810	* set the P_SUGID bit. This protects
4811	* against debuggers being attached by an
4812	* insufficiently privileged process onto the
4813	* result of a transition to a more privileged
4814	* credential.
4815	*/
4816	leave_sugid_clear = `0`;
4817	}
4818
4819	imgp->ip_mac_return = label_update_return;
4820	}
4821
4822	mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp);
4823
4824	#endif /* CONFIG_MACF */
4825
4826	/*
4827	* If 'leave_sugid_clear' is non-zero, then we passed the
4828	* VSUID and MACF checks, and successfully determined that
4829	* the previous cred was a member of the VSGID group, but
4830	* that it was not the default at the time of the execve,
4831	* and that the post-labelling credential was not disjoint.
4832	* So we don't set the P_SUGID or reset mach ports and fds
4833	* on the basis of simply running this code.
4834	*/
4835	if (mac_reset_ipc \|\| !leave_sugid_clear) {
4836	/*
4837	* Have mach reset the task and thread ports.
4838	* We don't want anyone who had the ports before
4839	* a setuid exec to be able to access/control the
4840	* task/thread after.
4841	*/
4842	ipc_task_reset((imgp->ip_new_thread != NULL) ?
4843	get_threadtask(imgp->ip_new_thread) : p->task);
4844	ipc_thread_reset((imgp->ip_new_thread != NULL) ?
4845	imgp->ip_new_thread : current_thread());
4846	}
4847
4848	if (!leave_sugid_clear) {
4849	/*
4850	* Flag the process as setuid.
4851	*/
4852	OSBitOrAtomic(P_SUGID, &p->p_flag);
4853
4854	/*
4855	* Radar 2261856; setuid security hole fix
4856	* XXX For setuid processes, attempt to ensure that
4857	* stdin, stdout, and stderr are already allocated.
4858	* We do not want userland to accidentally allocate
4859	* descriptors in this range which has implied meaning
4860	* to libc.
4861	*/
4862	for (i = `0`; i < `3`; i++) {
4863
4864	if (p->p_fd->fd_ofiles[i] != NULL)
4865	continue;
4866
4867	/*
4868	* Do the kernel equivalent of
4869	*
4870	* if i == 0
4871	* (void) open("/dev/null", O_RDONLY);
4872	* else
4873	* (void) open("/dev/null", O_WRONLY);
4874	*/
4875
4876	struct fileproc *fp;
4877	int indx;
4878	int flag;
4879	struct nameidata *ndp = NULL;
4880
4881	if (i == `0`)
4882	flag = FREAD;
4883	else
4884	flag = FWRITE;
4885
4886	if ((error = falloc(p,
4887	&fp, &indx, imgp->ip_vfs_context)) != `0`)
4888	continue;
4889
4890	MALLOC(ndp, struct nameidata , sizeof(ndp), M_TEMP, M_WAITOK \| M_ZERO);
4891	if (ndp == NULL) {
4892	fp_free(p, indx, fp);
4893	error = ENOMEM;
4894	break;
4895	}
4896
4897	NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
4898	CAST_USER_ADDR_T("/dev/null"),
4899	imgp->ip_vfs_context);
4900
4901	if ((error = vn_open(ndp, flag, `0`)) != `0`) {
4902	fp_free(p, indx, fp);
4903	FREE(ndp, M_TEMP);
4904	break;
4905	}
4906
4907	struct fileglob *fg = fp->f_fglob;
4908
4909	fg->fg_flag = flag;
4910	fg->fg_ops = &vnops;
4911	fg->fg_data = ndp->ni_vp;
4912
4913	vnode_put(ndp->ni_vp);
4914
4915	proc_fdlock(p);
4916	procfdtbl_releasefd(p, indx, NULL);
4917	fp_drop(p, indx, fp, `1`);
4918	proc_fdunlock(p);
4919
4920	FREE(ndp, M_TEMP);
4921	}
4922	}
4923	}
4924	#if CONFIG_MACF
4925	else {
4926	/*
4927	* We are here because we were told that the MAC label will
4928	* be transitioned, and the binary is not VSUID or VSGID; to
4929	* deal with this case, we could either duplicate a lot of
4930	* code, or we can indicate we want to default the P_SUGID
4931	* bit clear and jump back up.
4932	*/
4933	if (mac_transition) {
4934	leave_sugid_clear = `1`;
4935	goto handle_mac_transition;
4936	}
4937	}
4938
4939	#endif /* CONFIG_MACF */
4940
4941	/*
4942	* Implement the semantic where the effective user and group become
4943	* the saved user and group in exec'ed programs.
4944	*
4945	* Modifications to p_ucred must be guarded using the
4946	* proc's ucred lock. This prevents others from accessing
4947	* a garbage credential.
4948	*/
4949	for (;;) {
4950	my_cred = kauth_cred_proc_ref(p);
4951	my_new_cred = kauth_cred_setsvuidgid(my_cred, kauth_cred_getuid(my_cred), kauth_cred_getgid(my_cred));
4952
4953	if (my_new_cred == my_cred) {
4954	kauth_cred_unref(&my_cred);
4955	break;
4956	}
4957
4958	/ update cred on proc /
4959	proc_ucred_lock(p);
4960
4961	if (p->p_ucred != my_cred) {
4962	proc_ucred_unlock(p);
4963	kauth_cred_unref(&my_new_cred);
4964	continue;
4965	}
4966
4967	/ donate cred reference on my_new_cred to p->p_ucred /
4968	p->p_ucred = my_new_cred;
4969	PROC_UPDATE_CREDS_ONPROC(p);
4970	proc_ucred_unlock(p);
4971
4972	/ drop additional reference that was taken on the previous cred /
4973	kauth_cred_unref(&my_cred);
4974
4975	break;
4976	}
4977
4978
4979	/ Update the process' identity version and set the security token /
4980	p->p_idversion++;
4981
4982	if (imgp->ip_new_thread != NULL) {
4983	task = get_threadtask(imgp->ip_new_thread);
4984	} else {
4985	task = p->task;
4986	}
4987	set_security_token_task_internal(p, task);
4988
4989	return(error);
4990	}
4991
4992
4993	/*
4994	* create_unix_stack
4995	*
4996	* Description: Set the user stack address for the process to the provided
4997	* address. If a custom stack was not set as a result of the
4998	* load process (i.e. as specified by the image file for the
4999	* executable), then allocate the stack in the provided map and
5000	* set up appropriate guard pages for enforcing administrative
5001	* limits on stack growth, if they end up being needed.
5002	*
5003	* Parameters: p Process to set stack on
5004	* load_result Information from mach-o load commands
5005	* map Address map in which to allocate the new stack
5006	*
5007	* Returns: KERN_SUCCESS Stack successfully created
5008	* !KERN_SUCCESS Mach failure code
5009	*/
5010	static kern_return_t
5011	create_unix_stack(vm_map_t map, load_result_t* load_result,
5012	proc_t p)
5013	{
5014	mach_vm_size_t size, prot_size;
5015	mach_vm_offset_t addr, prot_addr;
5016	kern_return_t kr;
5017
5018	mach_vm_address_t user_stack = load_result->user_stack;
5019
5020	proc_lock(p);
5021	p->user_stack = user_stack;
5022	proc_unlock(p);
5023
5024	if (load_result->user_stack_alloc_size > `0`) {
5025	/*
5026	* Allocate enough space for the maximum stack size we
5027	* will ever authorize and an extra page to act as
5028	* a guard page for stack overflows. For default stacks,
5029	* vm_initial_limit_stack takes care of the extra guard page.
5030	* Otherwise we must allocate it ourselves.
5031	*/
5032	if (mach_vm_round_page_overflow(load_result->user_stack_alloc_size, &size)) {
5033	return KERN_INVALID_ARGUMENT;
5034	}
5035	addr = mach_vm_trunc_page(load_result->user_stack - size);
5036	kr = mach_vm_allocate_kernel(map, &addr, size,
5037	VM_FLAGS_FIXED, VM_MEMORY_STACK);
5038	if (kr != KERN_SUCCESS) {
5039	// Can't allocate at default location, try anywhere
5040	addr = `0`;
5041	kr = mach_vm_allocate_kernel(map, &addr, size,
5042	VM_FLAGS_ANYWHERE, VM_MEMORY_STACK);
5043	if (kr != KERN_SUCCESS) {
5044	return kr;
5045	}
5046
5047	user_stack = addr + size;
5048	load_result->user_stack = user_stack;
5049
5050	proc_lock(p);
5051	p->user_stack = user_stack;
5052	proc_unlock(p);
5053	}
5054
5055	load_result->user_stack_alloc = addr;
5056
5057	/*
5058	* And prevent access to what's above the current stack
5059	* size limit for this process.
5060	*/
5061	if (load_result->user_stack_size == `0`) {
5062	load_result->user_stack_size = unix_stack_size(p);
5063	prot_size = mach_vm_trunc_page(size - load_result->user_stack_size);
5064	} else {
5065	prot_size = PAGE_SIZE;
5066	}
5067
5068	prot_addr = addr;
5069	kr = mach_vm_protect(map,
5070	prot_addr,
5071	prot_size,
5072	FALSE,
5073	VM_PROT_NONE);
5074	if (kr != KERN_SUCCESS) {
5075	(void)mach_vm_deallocate(map, addr, size);
5076	return kr;
5077	}
5078	}
5079
5080	return KERN_SUCCESS;
5081	}
5082
5083	#include <sys/reboot.h>
5084
5085	/*
5086	* load_init_program_at_path
5087	*
5088	* Description: Load the "init" program; in most cases, this will be "launchd"
5089	*
5090	* Parameters: p Process to call execve() to create
5091	* the "init" program
5092	* scratch_addr Page in p, scratch space
5093	* path NULL terminated path
5094	*
5095	* Returns: KERN_SUCCESS Success
5096	* !KERN_SUCCESS See execve/mac_execve for error codes
5097	*
5098	* Notes: The process that is passed in is the first manufactured
5099	* process on the system, and gets here via bsd_ast() firing
5100	* for the first time. This is done to ensure that bsd_init()
5101	* has run to completion.
5102	*
5103	* The address map of the first manufactured process matches the
5104	* word width of the kernel. Once the self-exec completes, the
5105	* initproc might be different.
5106	*/
5107	static int
5108	load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path)
5109	{
5110	int retval[`2`];
5111	int error;
5112	struct execve_args init_exec_args;
5113	user_addr_t argv0 = USER_ADDR_NULL, argv1 = USER_ADDR_NULL;
5114
5115	/*
5116	* Validate inputs and pre-conditions
5117	*/
5118	assert(p);
5119	assert(scratch_addr);
5120	assert(path);
5121
5122	/*
5123	* Copy out program name.
5124	*/
5125	size_t path_length = strlen(path) + `1`;
5126	argv0 = scratch_addr;
5127	error = copyout(path, argv0, path_length);
5128	if (error)
5129	return error;
5130
5131	scratch_addr = USER_ADDR_ALIGN(scratch_addr + path_length, sizeof(user_addr_t));
5132
5133	/*
5134	* Put out first (and only) argument, similarly.
5135	* Assumes everything fits in a page as allocated above.
5136	*/
5137	if (boothowto & RB_SINGLE) {
5138	const char *init_args = "-s";
5139	size_t init_args_length = strlen(init_args)+`1`;
5140
5141	argv1 = scratch_addr;
5142	error = copyout(init_args, argv1, init_args_length);
5143	if (error)
5144	return error;
5145
5146	scratch_addr = USER_ADDR_ALIGN(scratch_addr + init_args_length, sizeof(user_addr_t));
5147	}
5148
5149	if (proc_is64bit(p)) {
5150	user64_addr_t argv64bit[`3`] = {};
5151
5152	argv64bit[`0`] = argv0;
5153	argv64bit[`1`] = argv1;
5154	argv64bit[`2`] = USER_ADDR_NULL;
5155
5156	error = copyout(argv64bit, scratch_addr, sizeof(argv64bit));
5157	if (error)
5158	return error;
5159	} else {
5160	user32_addr_t argv32bit[`3`] = {};
5161
5162	argv32bit[`0`] = (user32_addr_t)argv0;
5163	argv32bit[`1`] = (user32_addr_t)argv1;
5164	argv32bit[`2`] = USER_ADDR_NULL;
5165
5166	error = copyout(argv32bit, scratch_addr, sizeof(argv32bit));
5167	if (error)
5168	return error;
5169	}
5170
5171	/*
5172	* Set up argument block for fake call to execve.
5173	*/
5174	init_exec_args.fname = argv0;
5175	init_exec_args.argp = scratch_addr;
5176	init_exec_args.envp = USER_ADDR_NULL;
5177
5178	/*
5179	* So that init task is set with uid,gid 0 token
5180	*/
5181	set_security_token(p);
5182
5183	return execve(p, &init_exec_args, retval);
5184	}
5185
5186	static const char * init_programs[] = {
5187	#if DEBUG
5188	"/usr/local/sbin/launchd.debug",
5189	#endif
5190	#if DEVELOPMENT \|\| DEBUG
5191	"/usr/local/sbin/launchd.development",
5192	#endif
5193	"/sbin/launchd",
5194	};
5195
5196	/*
5197	* load_init_program
5198	*
5199	* Description: Load the "init" program; in most cases, this will be "launchd"
5200	*
5201	* Parameters: p Process to call execve() to create
5202	* the "init" program
5203	*
5204	* Returns: (void)
5205	*
5206	* Notes: The process that is passed in is the first manufactured
5207	* process on the system, and gets here via bsd_ast() firing
5208	* for the first time. This is done to ensure that bsd_init()
5209	* has run to completion.
5210	*
5211	* In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
5212	* may be used to select a specific launchd executable. As with
5213	* the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
5214	* will force /sbin/launchd to be selected.
5215	*
5216	* Search order by build:
5217	*
5218	* DEBUG DEVELOPMENT RELEASE PATH
5219	* ----------------------------------------------------------------------------------
5220	* 1 1 NA /usr/local/sbin/launchd.$LAUNCHDSUFFIX
5221	* 2 NA NA /usr/local/sbin/launchd.debug
5222	* 3 2 NA /usr/local/sbin/launchd.development
5223	* 4 3 1 /sbin/launchd
5224	*/
5225	void
5226	load_init_program(proc_t p)
5227	{
5228	uint32_t i;
5229	int error;
5230	vm_map_t map = current_map();
5231	mach_vm_offset_t scratch_addr = `0`;
5232	mach_vm_size_t map_page_size = vm_map_page_size(map);
5233
5234	(void) mach_vm_allocate_kernel(map, &scratch_addr, map_page_size, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE);
5235	#if CONFIG_MEMORYSTATUS
5236	(void) memorystatus_init_at_boot_snapshot();
5237	#endif /* CONFIG_MEMORYSTATUS */
5238
5239	#if DEBUG \|\| DEVELOPMENT
5240	/ Check for boot-arg suffix first /
5241	char launchd_suffix[`64`];
5242	if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
5243	char launchd_path[`128`];
5244	boolean_t is_release_suffix = ((launchd_suffix[`0`] == `0`) \|\|
5245	(strcmp(launchd_suffix, "release") == `0`));
5246
5247	if (is_release_suffix) {
5248	printf("load_init_program: attempting to load /sbin/launchd\n");
5249	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, "/sbin/launchd");
5250	if (!error)
5251	return;
5252
5253	panic("Process 1 exec of launchd.release failed, errno %d", error);
5254	} else {
5255	strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
5256	strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));
5257
5258	printf("load_init_program: attempting to load %s\n", launchd_path);
5259	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, launchd_path);
5260	if (!error) {
5261	return;
5262	} else {
5263	printf("load_init_program: failed loading %s: errno %d\n", launchd_path, error);
5264	}
5265	}
5266	}
5267	#endif
5268
5269	error = ENOENT;
5270	for (i = `0`; i < sizeof(init_programs)/sizeof(init_programs[`0`]); i++) {
5271	printf("load_init_program: attempting to load %s\n", init_programs[i]);
5272	error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]);
5273	if (!error) {
5274	return;
5275	} else {
5276	printf("load_init_program: failed loading %s: errno %d\n", init_programs[i], error);
5277	}
5278	}
5279
5280	panic("Process 1 exec of %s failed, errno %d", ((i == `0`) ? "<null>" : init_programs[i-`1`]), error);
5281	}
5282
5283	/*
5284	* load_return_to_errno
5285	*
5286	* Description: Convert a load_return_t (Mach error) to an errno (BSD error)
5287	*
5288	* Parameters: lrtn Mach error number
5289	*
5290	* Returns: (int) BSD error number
5291	* 0 Success
5292	* EBADARCH Bad architecture
5293	* EBADMACHO Bad Mach object file
5294	* ESHLIBVERS Bad shared library version
5295	* ENOMEM Out of memory/resource shortage
5296	* EACCES Access denied
5297	* ENOENT Entry not found (usually "file does
5298	* does not exist")
5299	* EIO An I/O error occurred
5300	* EBADEXEC The executable is corrupt/unknown
5301	*/
5302	static int
5303	load_return_to_errno(load_return_t lrtn)
5304	{
5305	switch (lrtn) {
5306	case LOAD_SUCCESS:
5307	return `0`;
5308	case LOAD_BADARCH:
5309	case LOAD_BADARCH_X86:
5310	return EBADARCH;
5311	case LOAD_BADMACHO:
5312	case LOAD_BADMACHO_UPX:
5313	return EBADMACHO;
5314	case LOAD_SHLIB:
5315	return ESHLIBVERS;
5316	case LOAD_NOSPACE:
5317	case LOAD_RESOURCE:
5318	return ENOMEM;
5319	case LOAD_PROTECT:
5320	return EACCES;
5321	case LOAD_ENOENT:
5322	return ENOENT;
5323	case LOAD_IOERROR:
5324	return EIO;
5325	case LOAD_FAILURE:
5326	case LOAD_DECRYPTFAIL:
5327	default:
5328	return EBADEXEC;
5329	}
5330	}
5331
5332	#include <mach/mach_types.h>
5333	#include <mach/vm_prot.h>
5334	#include <mach/semaphore.h>
5335	#include <mach/sync_policy.h>
5336	#include <kern/clock.h>
5337	#include <mach/kern_return.h>
5338
5339	/*
5340	* execargs_alloc
5341	*
5342	* Description: Allocate the block of memory used by the execve arguments.
5343	* At the same time, we allocate a page so that we can read in
5344	* the first page of the image.
5345	*
5346	* Parameters: struct image_params * the image parameter block
5347	*
5348	* Returns: 0 Success
5349	* EINVAL Invalid argument
5350	* EACCES Permission denied
5351	* EINTR Interrupted function
5352	* ENOMEM Not enough space
5353	*
5354	* Notes: This is a temporary allocation into the kernel address space
5355	* to enable us to copy arguments in from user space. This is
5356	* necessitated by not mapping the process calling execve() into
5357	* the kernel address space during the execve() system call.
5358	*
5359	* We assemble the argument and environment, etc., into this
5360	* region before copying it as a single block into the child
5361	* process address space (at the top or bottom of the stack,
5362	* depending on which way the stack grows; see the function
5363	* exec_copyout_strings() for details).
5364	*
5365	* This ends up with a second (possibly unnecessary) copy compared
5366	* with assembing the data directly into the child address space,
5367	* instead, but since we cannot be guaranteed that the parent has
5368	* not modified its environment, we can't really know that it's
5369	* really a block there as well.
5370	*/
5371
5372
5373	static int execargs_waiters = `0`;
5374	lck_mtx_t *execargs_cache_lock;
5375
5376	static void
5377	execargs_lock_lock(void) {
5378	lck_mtx_lock_spin(execargs_cache_lock);
5379	}
5380
5381	static void
5382	execargs_lock_unlock(void) {
5383	lck_mtx_unlock(execargs_cache_lock);
5384	}
5385
5386	static wait_result_t
5387	execargs_lock_sleep(void) {
5388	return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
5389	}
5390
5391	static kern_return_t
5392	execargs_purgeable_allocate(char **execarg_address) {
5393	kern_return_t kr = vm_allocate_kernel(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE \| VM_FLAGS_PURGABLE, VM_KERN_MEMORY_NONE);
5394	assert(kr == KERN_SUCCESS);
5395	return kr;
5396	}
5397
5398	static kern_return_t
5399	execargs_purgeable_reference(void *execarg_address) {
5400	int state = VM_PURGABLE_NONVOLATILE;
5401	kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5402
5403	assert(kr == KERN_SUCCESS);
5404	return kr;
5405	}
5406
5407	static kern_return_t
5408	execargs_purgeable_volatilize(void *execarg_address) {
5409	int state = VM_PURGABLE_VOLATILE \| VM_PURGABLE_ORDERING_OBSOLETE;
5410	kern_return_t kr;
5411	kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
5412
5413	assert(kr == KERN_SUCCESS);
5414
5415	return kr;
5416	}
5417
5418	static void
5419	execargs_wakeup_waiters(void) {
5420	thread_wakeup(&execargs_free_count);
5421	}
5422
5423	static int
5424	execargs_alloc(struct image_params *imgp)
5425	{
5426	kern_return_t kret;
5427	wait_result_t res;
5428	int i, cache_index = -`1`;
5429
5430	execargs_lock_lock();
5431
5432	while (execargs_free_count == `0`) {
5433	execargs_waiters++;
5434	res = execargs_lock_sleep();
5435	execargs_waiters--;
5436	if (res != THREAD_AWAKENED) {
5437	execargs_lock_unlock();
5438	return (EINTR);
5439	}
5440	}
5441
5442	execargs_free_count--;
5443
5444	for (i = `0`; i < execargs_cache_size; i++) {
5445	vm_offset_t element = execargs_cache[i];
5446	if (element) {
5447	cache_index = i;
5448	imgp->ip_strings = (char *)(execargs_cache[i]);
5449	execargs_cache[i] = `0`;
5450	break;
5451	}
5452	}
5453
5454	assert(execargs_free_count >= `0`);
5455
5456	execargs_lock_unlock();
5457
5458	if (cache_index == -`1`) {
5459	kret = execargs_purgeable_allocate(&imgp->ip_strings);
5460	}
5461	else
5462	kret = execargs_purgeable_reference(imgp->ip_strings);
5463
5464	assert(kret == KERN_SUCCESS);
5465	if (kret != KERN_SUCCESS) {
5466	return (ENOMEM);
5467	}
5468
5469	/ last page used to read in file headers /
5470	imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
5471	imgp->ip_strendp = imgp->ip_strings;
5472	imgp->ip_argspace = NCARGS;
5473	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
5474
5475	return (`0`);
5476	}
5477
5478	/*
5479	* execargs_free
5480	*
5481	* Description: Free the block of memory used by the execve arguments and the
5482	* first page of the executable by a previous call to the function
5483	* execargs_alloc().
5484	*
5485	* Parameters: struct image_params * the image parameter block
5486	*
5487	* Returns: 0 Success
5488	* EINVAL Invalid argument
5489	* EINTR Oeration interrupted
5490	*/
5491	static int
5492	execargs_free(struct image_params *imgp)
5493	{
5494	kern_return_t kret;
5495	int i;
5496	boolean_t needs_wakeup = FALSE;
5497
5498	kret = execargs_purgeable_volatilize(imgp->ip_strings);
5499
5500	execargs_lock_lock();
5501	execargs_free_count++;
5502
5503	for (i = `0`; i < execargs_cache_size; i++) {
5504	vm_offset_t element = execargs_cache[i];
5505	if (element == `0`) {
5506	execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
5507	imgp->ip_strings = NULL;
5508	break;
5509	}
5510	}
5511
5512	assert(imgp->ip_strings == NULL);
5513
5514	if (execargs_waiters > `0`)
5515	needs_wakeup = TRUE;
5516
5517	execargs_lock_unlock();
5518
5519	if (needs_wakeup == TRUE)
5520	execargs_wakeup_waiters();
5521
5522	return ((kret == KERN_SUCCESS ? `0` : EINVAL));
5523	}
5524
5525	static void
5526	exec_resettextvp(proc_t p, struct image_params *imgp)
5527	{
5528	vnode_t vp;
5529	off_t offset;
5530	vnode_t tvp = p->p_textvp;
5531	int ret;
5532
5533	vp = imgp->ip_vp;
5534	offset = imgp->ip_arch_offset;
5535
5536	if (vp == NULLVP)
5537	panic("exec_resettextvp: expected valid vp");
5538
5539	ret = vnode_ref(vp);
5540	proc_lock(p);
5541	if (ret == `0`) {
5542	p->p_textvp = vp;
5543	p->p_textoff = offset;
5544	} else {
5545	p->p_textvp = NULLVP; / this is paranoia /
5546	p->p_textoff = `0`;
5547	}
5548	proc_unlock(p);
5549
5550	if ( tvp != NULLVP) {
5551	if (vnode_getwithref(tvp) == `0`) {
5552	vnode_rele(tvp);
5553	vnode_put(tvp);
5554	}
5555	}
5556
5557	}
5558
5559	// Includes the 0-byte (therefore "SIZE" instead of "LEN").
5560	static const size_t CS_CDHASH_STRING_SIZE = CS_CDHASH_LEN * `2` + `1`;
5561
5562	static void cdhash_to_string(char str[CS_CDHASH_STRING_SIZE], uint8_t const * const cdhash) {
5563	static char const nibble[] = "0123456789abcdef";
5564
5565	/ Apparently still the safest way to get a hex representation*
5566	* of binary data.
5567	* xnu's printf routines have %*D/%20D in theory, but "not really", see:
5568	* <rdar://problem/33328859> confusion around %*D/%nD in printf
5569	*/
5570	for (int i = `0`; i < CS_CDHASH_LEN; ++i) {
5571	str[i*`2`] = nibble[(cdhash[i] & `0xf0`) >> `4`];
5572	str[i*`2`+`1`] = nibble[cdhash[i] & `0x0f`];
5573	}
5574	str[CS_CDHASH_STRING_SIZE - `1`] = `0`;
5575	}
5576
5577	/*
5578	* __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__
5579	*
5580	* Description: Waits for the userspace daemon to respond to the request
5581	* we made. Function declared non inline to be visible in
5582	* stackshots and spindumps as well as debugging.
5583	*/
5584	__attribute__((noinline)) int
5585	__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid)
5586	{
5587	return find_code_signature(task_access_port, new_pid);
5588	}
5589
5590	static int
5591	check_for_signature(proc_t p, struct image_params *imgp)
5592	{
5593	mach_port_t port = NULL;
5594	kern_return_t kr = KERN_FAILURE;
5595	int error = EACCES;
5596	boolean_t unexpected_failure = FALSE;
5597	struct cs_blob *csb;
5598	boolean_t require_success = FALSE;
5599	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
5600	int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
5601	os_reason_t signature_failure_reason = OS_REASON_NULL;
5602
5603	/*
5604	* Override inherited code signing flags with the
5605	* ones for the process that is being successfully
5606	* loaded
5607	*/
5608	proc_lock(p);
5609	p->p_csflags = imgp->ip_csflags;
5610	proc_unlock(p);
5611
5612	/ Set the switch_protect flag on the map /
5613	if(p->p_csflags & (CS_HARD\|CS_KILL)) {
5614	vm_map_switch_protect(get_task_map(p->task), TRUE);
5615	}
5616
5617	/*
5618	* image activation may be failed due to policy
5619	* which is unexpected but security framework does not
5620	* approve of exec, kill and return immediately.
5621	*/
5622	if (imgp->ip_mac_return != `0`) {
5623
5624	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
5625	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY, `0`, `0`);
5626	signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_SECURITY_POLICY);
5627	error = imgp->ip_mac_return;
5628	unexpected_failure = TRUE;
5629	goto done;
5630	}
5631
5632	if (imgp->ip_cs_error != OS_REASON_NULL) {
5633	signature_failure_reason = imgp->ip_cs_error;
5634	imgp->ip_cs_error = OS_REASON_NULL;
5635	error = EACCES;
5636	goto done;
5637	}
5638
5639	/ If the code signature came through the image activation path, we skip the*
5640	* taskgated / externally attached path. */
5641	if (imgp->ip_csflags & CS_SIGNED) {
5642	error = `0`;
5643	goto done;
5644	}
5645
5646	/ The rest of the code is for signatures that either already have been externally*
5647	* attached (likely, but not necessarily by a previous run through the taskgated
5648	* path), or that will now be attached by taskgated. */
5649
5650	kr = task_get_task_access_port(p->task, &port);
5651	if (KERN_SUCCESS != kr \|\| !IPC_PORT_VALID(port)) {
5652	error = `0`;
5653	if (require_success) {
5654	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
5655	p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT, `0`, `0`);
5656	signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASK_ACCESS_PORT);
5657	error = EACCES;
5658	}
5659	goto done;
5660	}
5661
5662	/*
5663	* taskgated returns KERN_SUCCESS if it has completed its work
5664	* and the exec should continue, KERN_FAILURE if the exec should
5665	* fail, or it may error out with different error code in an
5666	* event of mig failure (e.g. process was signalled during the
5667	* rpc call, taskgated died, mig server died etc.).
5668	*/
5669
5670	kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid);
5671	switch (kr) {
5672	case KERN_SUCCESS:
5673	error = `0`;
5674	break;
5675	case KERN_FAILURE:
5676	error = EACCES;
5677
5678	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
5679	p->p_pid, OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG, `0`, `0`);
5680	signature_failure_reason = os_reason_create(OS_REASON_CODESIGNING, CODESIGNING_EXIT_REASON_TASKGATED_INVALID_SIG);
5681	goto done;
5682	default:
5683	error = EACCES;
5684
5685	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) \| DBG_FUNC_NONE,
5686	p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER, `0`, `0`);
5687	signature_failure_reason = os_reason_create(OS_REASON_EXEC, EXEC_EXIT_REASON_TASKGATED_OTHER);
5688	unexpected_failure = TRUE;
5689	goto done;
5690	}
5691
5692	/ Only do this if exec_resettextvp() did not fail /
5693	if (p->p_textvp != NULLVP) {
5694	csb = ubc_cs_blob_get(p->p_textvp, -`1`, p->p_textoff);
5695
5696	if (csb != NULL) {
5697	/ As the enforcement we can do here is very limited, we only allow things that*
5698	* are the only reason why this code path still exists:
5699	* Adhoc signed non-platform binaries without special cs_flags and without any
5700	* entitlements (unrestricted ones still pass AMFI). */
5701	if (
5702	/ Revalidate the blob if necessary through bumped generation count. /
5703	(ubc_cs_generation_check(p->p_textvp) == `0` \|\|
5704	ubc_cs_blob_revalidate(p->p_textvp, csb, imgp, `0`) == `0`) &&
5705	/ Only CS_ADHOC, no CS_KILL, CS_HARD etc. /
5706	(csb->csb_flags & CS_ALLOWED_MACHO) == CS_ADHOC &&
5707	/ If it has a CMS blob, it's not adhoc. The CS_ADHOC flag can lie. /
5708	csblob_find_blob_bytes((const uint8_t *)csb->csb_mem_kaddr, csb->csb_mem_size,
5709	CSSLOT_SIGNATURESLOT,
5710	CSMAGIC_BLOBWRAPPER) == NULL &&
5711	/ It could still be in a trust cache (unlikely with CS_ADHOC), or a magic path. /
5712	csb->csb_platform_binary == `0` &&
5713	/ No entitlements, not even unrestricted ones. /
5714	csb->csb_entitlements_blob == NULL) {
5715
5716	proc_lock(p);
5717	p->p_csflags \|= CS_SIGNED \| CS_VALID;
5718	proc_unlock(p);
5719
5720	} else {
5721	uint8_t cdhash[CS_CDHASH_LEN];
5722	char cdhash_string[CS_CDHASH_STRING_SIZE];
5723	proc_getcdhash(p, cdhash);
5724	cdhash_to_string(cdhash_string, cdhash);
5725	printf("ignoring detached code signature on '%s' with cdhash '%s' "
5726	"because it is invalid, or not a simple adhoc signature.\n",
5727	p->p_name, cdhash_string);
5728	}
5729
5730	}
5731	}
5732
5733	done:
5734	if (`0` == error) {
5735	/ The process's code signature related properties are*
5736	* fully set up, so this is an opportune moment to log
5737	* platform binary execution, if desired. */
5738	if (platform_exec_logging != `0` && csproc_get_platform_binary(p)) {
5739	uint8_t cdhash[CS_CDHASH_LEN];
5740	char cdhash_string[CS_CDHASH_STRING_SIZE];
5741	proc_getcdhash(p, cdhash);
5742	cdhash_to_string(cdhash_string, cdhash);
5743
5744	os_log(peLog, "CS Platform Exec Logging: Executing platform signed binary "
5745	"'%s' with cdhash %s\n", p->p_name, cdhash_string);
5746	}
5747	} else {
5748	if (!unexpected_failure)
5749	p->p_csflags \|= CS_KILLED;
5750	/ make very sure execution fails /
5751	if (vfexec \|\| spawn) {
5752	assert(signature_failure_reason != OS_REASON_NULL);
5753	psignal_vfork_with_reason(p, p->task, imgp->ip_new_thread,
5754	SIGKILL, signature_failure_reason);
5755	signature_failure_reason = OS_REASON_NULL;
5756	error = `0`;
5757	} else {
5758	assert(signature_failure_reason != OS_REASON_NULL);
5759	psignal_with_reason(p, SIGKILL, signature_failure_reason);
5760	signature_failure_reason = OS_REASON_NULL;
5761	}
5762	}
5763
5764	/ If we hit this, we likely would have leaked an exit reason /
5765	assert(signature_failure_reason == OS_REASON_NULL);
5766	return error;
5767	}
5768
5769	/*
5770	* Typically as soon as we start executing this process, the
5771	* first instruction will trigger a VM fault to bring the text
5772	* pages (as executable) into the address space, followed soon
5773	* thereafter by dyld data structures (for dynamic executable).
5774	* To optimize this, as well as improve support for hardware
5775	* debuggers that can only access resident pages present
5776	* in the process' page tables, we prefault some pages if
5777	* possible. Errors are non-fatal.
5778	*/
5779	static void exec_prefault_data(proc_t p __unused, struct image_params imgp, load_result_t load_result)
5780	{
5781	int ret;
5782	size_t expected_all_image_infos_size;
5783
5784	/*
5785	* Prefault executable or dyld entry point.
5786	*/
5787	vm_fault(current_map(),
5788	vm_map_trunc_page(load_result->entry_point,
5789	vm_map_page_mask(current_map())),
5790	VM_PROT_READ \| VM_PROT_EXECUTE,
5791	FALSE, VM_KERN_MEMORY_NONE,
5792	THREAD_UNINT, NULL, `0`);
5793
5794	if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
5795	expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
5796	} else {
5797	expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
5798	}
5799
5800	/ Decode dyld anchor structure from <mach-o/dyld_images.h> /
5801	if (load_result->dynlinker &&
5802	load_result->all_image_info_addr &&
5803	load_result->all_image_info_size >= expected_all_image_infos_size) {
5804	union {
5805	struct user64_dyld_all_image_infos infos64;
5806	struct user32_dyld_all_image_infos infos32;
5807	} all_image_infos;
5808
5809	/*
5810	* Pre-fault to avoid copyin() going through the trap handler
5811	* and recovery path.
5812	*/
5813	vm_fault(current_map(),
5814	vm_map_trunc_page(load_result->all_image_info_addr,
5815	vm_map_page_mask(current_map())),
5816	VM_PROT_READ \| VM_PROT_WRITE,
5817	FALSE, VM_KERN_MEMORY_NONE,
5818	THREAD_UNINT, NULL, `0`);
5819	if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
5820	/ all_image_infos straddles a page /
5821	vm_fault(current_map(),
5822	vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - `1`,
5823	vm_map_page_mask(current_map())),
5824	VM_PROT_READ \| VM_PROT_WRITE,
5825	FALSE, VM_KERN_MEMORY_NONE,
5826	THREAD_UNINT, NULL, `0`);
5827	}
5828
5829	ret = copyin(load_result->all_image_info_addr,
5830	&all_image_infos,
5831	expected_all_image_infos_size);
5832	if (ret == `0` && all_image_infos.infos32.version >= DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION) {
5833
5834	user_addr_t notification_address;
5835	user_addr_t dyld_image_address;
5836	user_addr_t dyld_version_address;
5837	user_addr_t dyld_all_image_infos_address;
5838	user_addr_t dyld_slide_amount;
5839
5840	if (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) {
5841	notification_address = all_image_infos.infos64.notification;
5842	dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
5843	dyld_version_address = all_image_infos.infos64.dyldVersion;
5844	dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
5845	} else {
5846	notification_address = all_image_infos.infos32.notification;
5847	dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
5848	dyld_version_address = all_image_infos.infos32.dyldVersion;
5849	dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
5850	}
5851
5852	/*
5853	* dyld statically sets up the all_image_infos in its Mach-O
5854	* binary at static link time, with pointers relative to its default
5855	* load address. Since ASLR might slide dyld before its first
5856	* instruction is executed, "dyld_slide_amount" tells us how far
5857	* dyld was loaded compared to its default expected load address.
5858	* All other pointers into dyld's image should be adjusted by this
5859	* amount. At some point later, dyld will fix up pointers to take
5860	* into account the slide, at which point the all_image_infos_address
5861	* field in the structure will match the runtime load address, and
5862	* "dyld_slide_amount" will be 0, if we were to consult it again.
5863	*/
5864
5865	dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
5866
5867	#if 0
5868	kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
5869	(uint64_t)load_result->all_image_info_addr,
5870	all_image_infos.infos32.version,
5871	(uint64_t)notification_address,
5872	(uint64_t)dyld_image_address,
5873	(uint64_t)dyld_version_address,
5874	(uint64_t)dyld_all_image_infos_address);
5875	#endif
5876
5877	vm_fault(current_map(),
5878	vm_map_trunc_page(notification_address + dyld_slide_amount,
5879	vm_map_page_mask(current_map())),
5880	VM_PROT_READ \| VM_PROT_EXECUTE,
5881	FALSE, VM_KERN_MEMORY_NONE,
5882	THREAD_UNINT, NULL, `0`);
5883	vm_fault(current_map(),
5884	vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
5885	vm_map_page_mask(current_map())),
5886	VM_PROT_READ \| VM_PROT_EXECUTE,
5887	FALSE, VM_KERN_MEMORY_NONE,
5888	THREAD_UNINT, NULL, `0`);
5889	vm_fault(current_map(),
5890	vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
5891	vm_map_page_mask(current_map())),
5892	VM_PROT_READ,
5893	FALSE, VM_KERN_MEMORY_NONE,
5894	THREAD_UNINT, NULL, `0`);
5895	vm_fault(current_map(),
5896	vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
5897	vm_map_page_mask(current_map())),
5898	VM_PROT_READ \| VM_PROT_WRITE,
5899	FALSE, VM_KERN_MEMORY_NONE,
5900	THREAD_UNINT, NULL, `0`);
5901	}
5902	}
5903	}
5904

Browse the source code of codebrowser/bsd/kern/kern_exec.c