task.c source code [codebrowser/osfmk/kern/task.c]

1	/*
2	* Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_FREE_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56	/*
57	* File: kern/task.c
58	* Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59	* David Black
60	*
61	* Task management primitives implementation.
62	*/
63	/*
64	* Copyright (c) 1993 The University of Utah and
65	* the Computer Systems Laboratory (CSL). All rights reserved.
66	*
67	* Permission to use, copy, modify and distribute this software and its
68	* documentation is hereby granted, provided that both the copyright
69	* notice and this permission notice appear in all copies of the
70	* software, derivative works or modified versions, and any portions
71	* thereof, and that both notices appear in supporting documentation.
72	*
73	* THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74	* IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75	* ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76	*
77	* CSL requests users of this software to return to csl-dist@cs.utah.edu any
78	* improvements that they make and grant CSL redistribution rights.
79	*
80	*/
81	/*
82	* NOTICE: This file was modified by McAfee Research in 2004 to introduce
83	* support for mandatory and extensible security protections. This notice
84	* is included in support of clause 2.2 (b) of the Apple Public License,
85	* Version 2.0.
86	* Copyright (c) 2005 SPARTA, Inc.
87	*/
88
89	#include <mach/mach_types.h>
90	#include <mach/boolean.h>
91	#include <mach/host_priv.h>
92	#include <mach/machine/vm_types.h>
93	#include <mach/vm_param.h>
94	#include <mach/mach_vm.h>
95	#include <mach/semaphore.h>
96	#include <mach/task_info.h>
97	#include <mach/task_inspect.h>
98	#include <mach/task_special_ports.h>
99	#include <mach/sdt.h>
100
101	#include <ipc/ipc_importance.h>
102	#include <ipc/ipc_types.h>
103	#include <ipc/ipc_space.h>
104	#include <ipc/ipc_entry.h>
105	#include <ipc/ipc_hash.h>
106
107	#include <kern/kern_types.h>
108	#include <kern/mach_param.h>
109	#include <kern/misc_protos.h>
110	#include <kern/task.h>
111	#include <kern/thread.h>
112	#include <kern/coalition.h>
113	#include <kern/zalloc.h>
114	#include <kern/kalloc.h>
115	#include <kern/kern_cdata.h>
116	#include <kern/processor.h>
117	#include <kern/sched_prim.h> /* for thread_wakeup */
118	#include <kern/ipc_tt.h>
119	#include <kern/host.h>
120	#include <kern/clock.h>
121	#include <kern/timer.h>
122	#include <kern/assert.h>
123	#include <kern/sync_lock.h>
124	#include <kern/affinity.h>
125	#include <kern/exc_resource.h>
126	#include <kern/machine.h>
127	#include <kern/policy_internal.h>
128
129	#include <corpses/task_corpse.h>
130	#if CONFIG_TELEMETRY
131	#include <kern/telemetry.h>
132	#endif
133
134	#if MONOTONIC
135	#include <kern/monotonic.h>
136	#include <machine/monotonic.h>
137	#endif /* MONOTONIC */
138
139	#include <os/log.h>
140
141	#include <vm/pmap.h>
142	#include <vm/vm_map.h>
143	#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144	#include <vm/vm_pageout.h>
145	#include <vm/vm_protos.h>
146	#include <vm/vm_purgeable_internal.h>
147
148	#include <sys/resource.h>
149	#include <sys/signalvar.h> /* for coredump */
150
151	/*
152	* Exported interfaces
153	*/
154
155	#include <mach/task_server.h>
156	#include <mach/mach_host_server.h>
157	#include <mach/host_security_server.h>
158	#include <mach/mach_port_server.h>
159
160	#include <vm/vm_shared_region.h>
161
162	#include <libkern/OSDebug.h>
163	#include <libkern/OSAtomic.h>
164	#include <libkern/section_keywords.h>
165
166	#if CONFIG_ATM
167	#include <atm/atm_internal.h>
168	#endif
169
170	#include <kern/sfi.h> /* picks up ledger.h */
171
172	#if CONFIG_MACF
173	#include <security/mac_mach_internal.h>
174	#endif
175
176	#if KPERF
177	extern int kpc_force_all_ctrs(task_t, int);
178	#endif
179
180	task_t kernel_task;
181	zone_t task_zone;
182	lck_attr_t task_lck_attr;
183	lck_grp_t task_lck_grp;
184	lck_grp_attr_t task_lck_grp_attr;
185
186	extern int exc_via_corpse_forking;
187	extern int corpse_for_fatal_memkill;
188	extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
189
190	/ Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. /
191	int audio_active = `0`;
192
193	zinfo_usage_store_t tasks_tkm_private;
194	zinfo_usage_store_t tasks_tkm_shared;
195
196	/ A container to accumulate statistics for expired tasks /
197	expired_task_statistics_t dead_task_statistics;
198	lck_spin_t dead_task_statistics_lock;
199
200	ledger_template_t task_ledger_template = NULL;
201
202	SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
203	{.cpu_time = -`1`,
204	.tkm_private = -`1`,
205	.tkm_shared = -`1`,
206	.phys_mem = -`1`,
207	.wired_mem = -`1`,
208	.internal = -`1`,
209	.iokit_mapped = -`1`,
210	.alternate_accounting = -`1`,
211	.alternate_accounting_compressed = -`1`,
212	.page_table = -`1`,
213	.phys_footprint = -`1`,
214	.internal_compressed = -`1`,
215	.purgeable_volatile = -`1`,
216	.purgeable_nonvolatile = -`1`,
217	.purgeable_volatile_compressed = -`1`,
218	.purgeable_nonvolatile_compressed = -`1`,
219	.network_volatile = -`1`,
220	.network_nonvolatile = -`1`,
221	.network_volatile_compressed = -`1`,
222	.network_nonvolatile_compressed = -`1`,
223	.platform_idle_wakeups = -`1`,
224	.interrupt_wakeups = -`1`,
225	#if !CONFIG_EMBEDDED
226	.sfi_wait_times = { `0` / initialized at runtime /},
227	#endif /* !CONFIG_EMBEDDED */
228	.cpu_time_billed_to_me = -`1`,
229	.cpu_time_billed_to_others = -`1`,
230	.physical_writes = -`1`,
231	.logical_writes = -`1`,
232	.energy_billed_to_me = -`1`,
233	.energy_billed_to_others = -`1`
234	};
235
236	/ System sleep state /
237	boolean_t tasks_suspend_state;
238
239
240	void init_task_ledgers(void);
241	void task_footprint_exceeded(int warning, __unused const void param0, __unused const* void *param1);
242	void task_wakeups_rate_exceeded(int warning, __unused const void param0, __unused const* void *param1);
243	void task_io_rate_exceeded(int warning, const void param0, __unused const* void *param1);
244	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
245	void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
246	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
247
248	kern_return_t task_suspend_internal(task_t);
249	kern_return_t task_resume_internal(task_t);
250	static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
251
252	extern kern_return_t iokit_task_terminate(task_t task);
253
254	extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action , lck_mtx_t );
255	extern void bsd_copythreadname(void dst_uth, void* *src_uth);
256	extern kern_return_t thread_resume(thread_t thread);
257
258	// Warn tasks when they hit 80% of their memory limit.
259	#define PHYS_FOOTPRINT_WARNING_LEVEL 80
260
261	#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
262	#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
263
264	/*
265	* Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
266	*
267	* (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
268	* stacktraces, aka micro-stackshots)
269	*/
270	#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
271
272	int task_wakeups_monitor_interval; / In seconds. Time period over which wakeups rate is observed /
273	int task_wakeups_monitor_rate; / In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent /
274
275	int task_wakeups_monitor_ustackshots_trigger_pct; / Percentage. Level at which we start gathering telemetry. /
276
277	int disable_exc_resource; / Global override to supress EXC_RESOURCE for resource monitor violations. /
278
279	ledger_amount_t max_task_footprint = `0`; / Per-task limit on physical memory consumption in bytes /
280	int max_task_footprint_warning_level = `0`; / Per-task limit warning percentage /
281	int max_task_footprint_mb = `0`; / Per-task limit on physical memory consumption in megabytes /
282
283	/ I/O Monitor Limits /
284	#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
285	#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
286
287	uint64_t task_iomon_limit_mb; / Per-task I/O monitor limit in MBs /
288	uint64_t task_iomon_interval_secs; / Per-task I/O monitor interval in secs /
289
290	#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
291	int64_t io_telemetry_limit; / Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) /
292	int64_t global_logical_writes_count = `0`; / Global count for logical writes /
293	static boolean_t global_update_logical_writes(int64_t);
294
295	#define TASK_MAX_THREAD_LIMIT 256
296
297	#if MACH_ASSERT
298	int pmap_ledgers_panic = `1`;
299	int pmap_ledgers_panic_leeway = `3`;
300	#endif /* MACH_ASSERT */
301
302	int task_max = CONFIG_TASK_MAX; / Max number of tasks /
303
304	#if CONFIG_COREDUMP
305	int hwm_user_cores = `0`; / high watermark violations generate user core files /
306	#endif
307
308	#ifdef MACH_BSD
309	extern void proc_getexecutableuuid(void , unsigned* char , unsigned* long);
310	extern int proc_pid(struct proc *p);
311	extern int proc_selfpid(void);
312	extern struct proc current_proc(void*);
313	extern char proc_name_address(struct* proc *p);
314	extern uint64_t get_dispatchqueue_offset_from_proc(void *);
315	extern int kevent_proc_copy_uptrs(void proc, uint64_t buf, int bufsize);
316	extern void workq_proc_suspended(struct proc *p);
317	extern void workq_proc_resumed(struct proc *p);
318
319	#if CONFIG_MEMORYSTATUS
320	extern void proc_memstat_terminated(struct proc* p, boolean_t set);
321	extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
322	extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
323	extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
324
325	#if DEVELOPMENT \|\| DEBUG
326	extern void memorystatus_abort_vm_map_fork(task_t);
327	#endif
328
329	#endif /* CONFIG_MEMORYSTATUS */
330
331	#endif /* MACH_BSD */
332
333	#if DEVELOPMENT \|\| DEBUG
334	int exc_resource_threads_enabled;
335	#endif /* DEVELOPMENT \|\| DEBUG */
336
337	#if (DEVELOPMENT \|\| DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE
338	uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER \| TASK_EXC_GUARD_MP_CORPSE \|
339	TASK_EXC_GUARD_VM_DELIVER \| TASK_EXC_GUARD_VM_CORPSE;
340	#else
341	uint32_t task_exc_guard_default = `0`;
342	#endif
343
344	/ Forwards /
345
346	static void task_hold_locked(task_t task);
347	static void task_wait_locked(task_t task, boolean_t until_not_runnable);
348	static void task_release_locked(task_t task);
349
350	static void task_synchronizer_destroy_all(task_t task);
351
352
353	void
354	task_set_64bit(
355	task_t task,
356	boolean_t is_64bit,
357	boolean_t is_64bit_data)
358	{
359	#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__arm64__)
360	thread_t thread;
361	#endif /* defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__arm64__) */
362
363	task_lock(task);
364
365	/*
366	* Switching to/from 64-bit address spaces
367	*/
368	if (is_64bit) {
369	if (!task_has_64Bit_addr(task)) {
370	task_set_64Bit_addr(task);
371	}
372	} else {
373	if (task_has_64Bit_addr(task)) {
374	task_clear_64Bit_addr(task);
375	}
376	}
377
378	/*
379	* Switching to/from 64-bit register state.
380	*/
381	if (is_64bit_data) {
382	if (task_has_64Bit_data(task))
383	goto out;
384
385	task_set_64Bit_data(task);
386	} else {
387	if ( !task_has_64Bit_data(task))
388	goto out;
389
390	task_clear_64Bit_data(task);
391	}
392
393	/ FIXME: On x86, the thread save state flavor can diverge from the*
394	* task's 64-bit feature flag due to the 32-bit/64-bit register save
395	* state dichotomy. Since we can be pre-empted in this interval,
396	* certain routines may observe the thread as being in an inconsistent
397	* state with respect to its task's 64-bitness.
398	*/
399
400	#if defined(__x86_64__) \|\| defined(__arm64__)
401	queue_iterate(&task->threads, thread, thread_t, task_threads) {
402	thread_mtx_lock(thread);
403	machine_thread_switch_addrmode(thread);
404	thread_mtx_unlock(thread);
405
406	#if defined(__arm64__)
407	/ specifically, if running on H9 /
408	if (thread == current_thread()) {
409	uint64_t arg1, arg2;
410	int urgency;
411	spl_t spl = splsched();
412	/*
413	* This call tell that the current thread changed it's 32bitness.
414	* Other thread were no more on core when 32bitness was changed,
415	* but current_thread() is on core and the previous call to
416	* machine_thread_going_on_core() gave 32bitness which is now wrong.
417	*
418	* This is needed for bring-up, a different callback should be used
419	* in the future.
420	*
421	* TODO: Remove this callout when we no longer support 32-bit code on H9
422	*/
423	thread_lock(thread);
424	urgency = thread_get_urgency(thread, &arg1, &arg2);
425	machine_thread_going_on_core(thread, urgency, `0`, `0`, mach_approximate_time());
426	thread_unlock(thread);
427	splx(spl);
428	}
429	#endif /* defined(__arm64__) */
430	}
431	#endif /* defined(__x86_64__) \|\| defined(__arm64__) */
432
433	out:
434	task_unlock(task);
435	}
436
437	boolean_t
438	task_get_64bit_data(task_t task)
439	{
440	return task_has_64Bit_data(task);
441	}
442
443	void
444	task_set_platform_binary(
445	task_t task,
446	boolean_t is_platform)
447	{
448	task_lock(task);
449	if (is_platform) {
450	task->t_flags \|= TF_PLATFORM;
451	} else {
452	task->t_flags &= ~(TF_PLATFORM);
453	}
454	task_unlock(task);
455	}
456
457	/*
458	* Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
459	* Returns "false" if flag is already set, and "true" in other cases.
460	*/
461	bool
462	task_set_ca_client_wi(
463	task_t task,
464	boolean_t set_or_clear)
465	{
466	bool ret = true;
467	task_lock(task);
468	if (set_or_clear) {
469	/ Tasks can have only one CA_CLIENT work interval /
470	if (task->t_flags & TF_CA_CLIENT_WI)
471	ret = false;
472	else
473	task->t_flags \|= TF_CA_CLIENT_WI;
474	} else {
475	task->t_flags &= ~TF_CA_CLIENT_WI;
476	}
477	task_unlock(task);
478	return ret;
479	}
480
481	void
482	task_set_dyld_info(
483	task_t task,
484	mach_vm_address_t addr,
485	mach_vm_size_t size)
486	{
487	task_lock(task);
488	task->all_image_info_addr = addr;
489	task->all_image_info_size = size;
490	task_unlock(task);
491	}
492
493	void
494	task_atm_reset(__unused task_t task) {
495
496	#if CONFIG_ATM
497	if (task->atm_context != NULL) {
498	atm_task_descriptor_destroy(task->atm_context);
499	task->atm_context = NULL;
500	}
501	#endif
502
503	}
504
505	void
506	task_bank_reset(__unused task_t task) {
507
508	if (task->bank_context != NULL) {
509	bank_task_destroy(task);
510	}
511	}
512
513	/*
514	* NOTE: This should only be called when the P_LINTRANSIT
515	* flag is set (the proc_trans lock is held) on the
516	* proc associated with the task.
517	*/
518	void
519	task_bank_init(__unused task_t task) {
520
521	if (task->bank_context != NULL) {
522	panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
523	}
524	bank_task_initialize(task);
525	}
526
527	void
528	task_set_did_exec_flag(task_t task)
529	{
530	task->t_procflags \|= TPF_DID_EXEC;
531	}
532
533	void
534	task_clear_exec_copy_flag(task_t task)
535	{
536	task->t_procflags &= ~TPF_EXEC_COPY;
537	}
538
539	/*
540	* This wait event is t_procflags instead of t_flags because t_flags is volatile
541	*
542	* TODO: store the flags in the same place as the event
543	* rdar://problem/28501994
544	*/
545	event_t
546	task_get_return_wait_event(task_t task)
547	{
548	return (event_t)&task->t_procflags;
549	}
550
551	void
552	task_clear_return_wait(task_t task)
553	{
554	task_lock(task);
555
556	task->t_flags &= ~TF_LRETURNWAIT;
557
558	if (task->t_flags & TF_LRETURNWAITER) {
559	thread_wakeup(task_get_return_wait_event(task));
560	task->t_flags &= ~TF_LRETURNWAITER;
561	}
562
563	task_unlock(task);
564	}
565
566	void __attribute__((noreturn))
567	task_wait_to_return(void)
568	{
569	task_t task;
570
571	task = current_task();
572	task_lock(task);
573
574	if (task->t_flags & TF_LRETURNWAIT) {
575	do {
576	task->t_flags \|= TF_LRETURNWAITER;
577	assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
578	task_unlock(task);
579
580	thread_block(THREAD_CONTINUE_NULL);
581
582	task_lock(task);
583	} while (task->t_flags & TF_LRETURNWAIT);
584	}
585
586	task_unlock(task);
587
588	#if CONFIG_MACF
589	/*
590	* Before jumping to userspace and allowing this process to execute any code,
591	* notify any interested parties.
592	*/
593	mac_proc_notify_exec_complete(current_proc());
594	#endif
595
596	thread_bootstrap_return();
597	}
598
599	#ifdef CONFIG_32BIT_TELEMETRY
600	boolean_t
601	task_consume_32bit_log_flag(task_t task)
602	{
603	if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != `0`) {
604	task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
605	return TRUE;
606	} else {
607	return FALSE;
608	}
609	}
610
611	void
612	task_set_32bit_log_flag(task_t task)
613	{
614	task->t_procflags \|= TPF_LOG_32BIT_TELEMETRY;
615	}
616	#endif /* CONFIG_32BIT_TELEMETRY */
617
618	boolean_t
619	task_is_exec_copy(task_t task)
620	{
621	return task_is_exec_copy_internal(task);
622	}
623
624	boolean_t
625	task_did_exec(task_t task)
626	{
627	return task_did_exec_internal(task);
628	}
629
630	boolean_t
631	task_is_active(task_t task)
632	{
633	return task->active;
634	}
635
636	boolean_t
637	task_is_halting(task_t task)
638	{
639	return task->halting;
640	}
641
642	#if TASK_REFERENCE_LEAK_DEBUG
643	#include <kern/btlog.h>
644
645	static btlog_t *task_ref_btlog;
646	#define TASK_REF_OP_INCR 0x1
647	#define TASK_REF_OP_DECR 0x2
648
649	#define TASK_REF_NUM_RECORDS 100000
650	#define TASK_REF_BTDEPTH 7
651
652	void
653	task_reference_internal(task_t task)
654	{
655	void * bt[TASK_REF_BTDEPTH];
656	int numsaved = `0`;
657
658	numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
659
660	(void)hw_atomic_add(&(task)->ref_count, `1`);
661	btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
662	bt, numsaved);
663	}
664
665	uint32_t
666	task_deallocate_internal(task_t task)
667	{
668	void * bt[TASK_REF_BTDEPTH];
669	int numsaved = `0`;
670
671	numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
672
673	btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
674	bt, numsaved);
675	return hw_atomic_sub(&(task)->ref_count, `1`);
676	}
677
678	#endif /* TASK_REFERENCE_LEAK_DEBUG */
679
680	void
681	task_init(void)
682	{
683
684	lck_grp_attr_setdefault(&task_lck_grp_attr);
685	lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
686	lck_attr_setdefault(&task_lck_attr);
687	lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
688	lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
689
690	task_zone = zinit(
691	sizeof(struct task),
692	task_max * sizeof(struct task),
693	TASK_CHUNK * sizeof(struct task),
694	"tasks");
695
696	zone_change(task_zone, Z_NOENCRYPT, TRUE);
697
698	#if CONFIG_EMBEDDED
699	task_watch_init();
700	#endif /* CONFIG_EMBEDDED */
701
702	/*
703	* Configure per-task memory limit.
704	* The boot-arg is interpreted as Megabytes,
705	* and takes precedence over the device tree.
706	* Setting the boot-arg to 0 disables task limits.
707	*/
708	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
709	sizeof (max_task_footprint_mb))) {
710	/*
711	* No limit was found in boot-args, so go look in the device tree.
712	*/
713	if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
714	sizeof(max_task_footprint_mb))) {
715	/*
716	* No limit was found in device tree.
717	*/
718	max_task_footprint_mb = `0`;
719	}
720	}
721
722	if (max_task_footprint_mb != `0`) {
723	#if CONFIG_MEMORYSTATUS
724	if (max_task_footprint_mb < `50`) {
725	printf("Warning: max_task_pmem %d below minimum.\n",
726	max_task_footprint_mb);
727	max_task_footprint_mb = `50`;
728	}
729	printf("Limiting task physical memory footprint to %d MB\n",
730	max_task_footprint_mb);
731
732	max_task_footprint = (ledger_amount_t)max_task_footprint_mb * `1024` * `1024`; // Convert MB to bytes
733
734	/*
735	* Configure the per-task memory limit warning level.
736	* This is computed as a percentage.
737	*/
738	max_task_footprint_warning_level = `0`;
739
740	if (max_mem < `0x40000000`) {
741	/*
742	* On devices with < 1GB of memory:
743	* -- set warnings to 50MB below the per-task limit.
744	*/
745	if (max_task_footprint_mb > `50`) {
746	max_task_footprint_warning_level = ((max_task_footprint_mb - `50`) * `100`) / max_task_footprint_mb;
747	}
748	} else {
749	/*
750	* On devices with >= 1GB of memory:
751	* -- set warnings to 100MB below the per-task limit.
752	*/
753	if (max_task_footprint_mb > `100`) {
754	max_task_footprint_warning_level = ((max_task_footprint_mb - `100`) * `100`) / max_task_footprint_mb;
755	}
756	}
757
758	/*
759	* Never allow warning level to land below the default.
760	*/
761	if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
762	max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
763	}
764
765	printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
766
767	#else
768	printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
769	#endif /* CONFIG_MEMORYSTATUS */
770	}
771
772	#if DEVELOPMENT \|\| DEBUG
773	if (!PE_parse_boot_argn("exc_resource_threads",
774	&exc_resource_threads_enabled,
775	sizeof(exc_resource_threads_enabled))) {
776	exc_resource_threads_enabled = `1`;
777	}
778	PE_parse_boot_argn("task_exc_guard_default",
779	&task_exc_guard_default,
780	sizeof(task_exc_guard_default));
781	#endif /* DEVELOPMENT \|\| DEBUG */
782
783	#if CONFIG_COREDUMP
784	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
785	sizeof (hwm_user_cores))) {
786	hwm_user_cores = `0`;
787	}
788	#endif
789
790	proc_init_cpumon_params();
791
792	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
793	task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
794	}
795
796	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
797	task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
798	}
799
800	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
801	sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
802	task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
803	}
804
805	if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
806	sizeof (disable_exc_resource))) {
807	disable_exc_resource = `0`;
808	}
809
810	if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
811	task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
812	}
813
814	if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
815	task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
816	}
817
818	if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
819	io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
820	}
821
822	/*
823	* If we have coalitions, coalition_init() will call init_task_ledgers() as it
824	* sets up the ledgers for the default coalition. If we don't have coalitions,
825	* then we have to call it now.
826	*/
827	#if CONFIG_COALITIONS
828	assert(task_ledger_template);
829	#else /* CONFIG_COALITIONS */
830	init_task_ledgers();
831	#endif /* CONFIG_COALITIONS */
832
833	#if TASK_REFERENCE_LEAK_DEBUG
834	task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE / caller_will_remove_entries_for_element? /);
835	assert(task_ref_btlog);
836	#endif
837
838	/*
839	* Create the kernel task as the first task.
840	*/
841	#ifdef __LP64__
842	if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
843	#else
844	if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
845	#endif
846	panic("task_init\n");
847
848
849	vm_map_deallocate(kernel_task->map);
850	kernel_task->map = kernel_map;
851	lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
852	}
853
854	/*
855	* Create a task running in the kernel address space. It may
856	* have its own map of size mem_size and may have ipc privileges.
857	*/
858	kern_return_t
859	kernel_task_create(
860	__unused task_t parent_task,
861	__unused vm_offset_t map_base,
862	__unused vm_size_t map_size,
863	__unused task_t *child_task)
864	{
865	return (KERN_INVALID_ARGUMENT);
866	}
867
868	kern_return_t
869	task_create(
870	task_t parent_task,
871	__unused ledger_port_array_t ledger_ports,
872	__unused mach_msg_type_number_t num_ledger_ports,
873	__unused boolean_t inherit_memory,
874	__unused task_t child_task) /* OUT /
875	{
876	if (parent_task == TASK_NULL)
877	return(KERN_INVALID_ARGUMENT);
878
879	/*
880	* No longer supported: too many calls assume that a task has a valid
881	* process attached.
882	*/
883	return(KERN_FAILURE);
884	}
885
886	kern_return_t
887	host_security_create_task_token(
888	host_security_t host_security,
889	task_t parent_task,
890	__unused security_token_t sec_token,
891	__unused audit_token_t audit_token,
892	__unused host_priv_t host_priv,
893	__unused ledger_port_array_t ledger_ports,
894	__unused mach_msg_type_number_t num_ledger_ports,
895	__unused boolean_t inherit_memory,
896	__unused task_t child_task) /* OUT /
897	{
898	if (parent_task == TASK_NULL)
899	return(KERN_INVALID_ARGUMENT);
900
901	if (host_security == HOST_NULL)
902	return(KERN_INVALID_SECURITY);
903
904	/*
905	* No longer supported.
906	*/
907	return(KERN_FAILURE);
908	}
909
910	/*
911	* Task ledgers
912	* ------------
913	*
914	* phys_footprint
915	* Physical footprint: This is the sum of:
916	* + (internal - alternate_accounting)
917	* + (internal_compressed - alternate_accounting_compressed)
918	* + iokit_mapped
919	* + purgeable_nonvolatile
920	* + purgeable_nonvolatile_compressed
921	* + page_table
922	*
923	* internal
924	* The task's anonymous memory, which on iOS is always resident.
925	*
926	* internal_compressed
927	* Amount of this task's internal memory which is held by the compressor.
928	* Such memory is no longer actually resident for the task [i.e., resident in its pmap],
929	* and could be either decompressed back into memory, or paged out to storage, depending
930	* on our implementation.
931	*
932	* iokit_mapped
933	* IOKit mappings: The total size of all IOKit mappings in this task, regardless of
934	clean/dirty or internal/external state].
935	*
936	* alternate_accounting
937	* The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
938	* are counted in both internal and iokit_mapped, so we must subtract them from the total to avoid
939	* double counting.
940	*/
941	void
942	init_task_ledgers(void)
943	{
944	ledger_template_t t;
945
946	assert(task_ledger_template == NULL);
947	assert(kernel_task == TASK_NULL);
948
949	#if MACH_ASSERT
950	PE_parse_boot_argn("pmap_ledgers_panic",
951	&pmap_ledgers_panic,
952	sizeof (pmap_ledgers_panic));
953	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
954	&pmap_ledgers_panic_leeway,
955	sizeof (pmap_ledgers_panic_leeway));
956	#endif /* MACH_ASSERT */
957
958	if ((t = ledger_template_create("Per-task ledger")) == NULL)
959	panic("couldn't create task ledger template");
960
961	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
962	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
963	"physmem", "bytes");
964	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
965	"bytes");
966	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
967	"bytes");
968	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
969	"bytes");
970	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
971	"bytes");
972	task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
973	"bytes");
974	task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
975	"bytes");
976	task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
977	"bytes");
978	task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
979	"bytes");
980	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
981	"bytes");
982	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
983	"bytes");
984	task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
985	task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
986	task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
987	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
988
989	task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
990	task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
991	task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
992	task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
993
994	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
995	"count");
996	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
997	"count");
998
999	#if CONFIG_SCHED_SFI
1000	sfi_class_id_t class_id, ledger_alias;
1001	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1002	task_ledgers.sfi_wait_times[class_id] = -`1`;
1003	}
1004
1005	/ don't account for UNSPECIFIED /
1006	for (class_id = SFI_CLASS_UNSPECIFIED + `1`; class_id < MAX_SFI_CLASS_ID; class_id++) {
1007	ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1008	if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1009	/ Check to see if alias has been registered yet /
1010	if (task_ledgers.sfi_wait_times[ledger_alias] != -`1`) {
1011	task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1012	} else {
1013	/ Otherwise, initialize it first /
1014	task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1015	}
1016	} else {
1017	task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1018	}
1019
1020	if (task_ledgers.sfi_wait_times[class_id] < `0`) {
1021	panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1022	}
1023	}
1024
1025	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -`1`] != -`1`);
1026	#endif /* CONFIG_SCHED_SFI */
1027
1028	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1029	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1030	task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1031	task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1032	task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1033	task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1034
1035	if ((task_ledgers.cpu_time < `0`) \|\|
1036	(task_ledgers.tkm_private < `0`) \|\|
1037	(task_ledgers.tkm_shared < `0`) \|\|
1038	(task_ledgers.phys_mem < `0`) \|\|
1039	(task_ledgers.wired_mem < `0`) \|\|
1040	(task_ledgers.internal < `0`) \|\|
1041	(task_ledgers.iokit_mapped < `0`) \|\|
1042	(task_ledgers.alternate_accounting < `0`) \|\|
1043	(task_ledgers.alternate_accounting_compressed < `0`) \|\|
1044	(task_ledgers.page_table < `0`) \|\|
1045	(task_ledgers.phys_footprint < `0`) \|\|
1046	(task_ledgers.internal_compressed < `0`) \|\|
1047	(task_ledgers.purgeable_volatile < `0`) \|\|
1048	(task_ledgers.purgeable_nonvolatile < `0`) \|\|
1049	(task_ledgers.purgeable_volatile_compressed < `0`) \|\|
1050	(task_ledgers.purgeable_nonvolatile_compressed < `0`) \|\|
1051	(task_ledgers.network_volatile < `0`) \|\|
1052	(task_ledgers.network_nonvolatile < `0`) \|\|
1053	(task_ledgers.network_volatile_compressed < `0`) \|\|
1054	(task_ledgers.network_nonvolatile_compressed < `0`) \|\|
1055	(task_ledgers.platform_idle_wakeups < `0`) \|\|
1056	(task_ledgers.interrupt_wakeups < `0`) \|\|
1057	(task_ledgers.cpu_time_billed_to_me < `0`) \|\| (task_ledgers.cpu_time_billed_to_others < `0`) \|\|
1058	(task_ledgers.physical_writes < `0`) \|\|
1059	(task_ledgers.logical_writes < `0`) \|\|
1060	(task_ledgers.energy_billed_to_me < `0`) \|\|
1061	(task_ledgers.energy_billed_to_others < `0`)
1062	) {
1063	panic("couldn't create entries for task ledger template");
1064	}
1065
1066	ledger_track_credit_only(t, task_ledgers.phys_footprint);
1067	ledger_track_credit_only(t, task_ledgers.page_table);
1068	ledger_track_credit_only(t, task_ledgers.internal);
1069	ledger_track_credit_only(t, task_ledgers.internal_compressed);
1070	ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1071	ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1072	ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1073	ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1074	ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1075	ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1076	ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1077
1078	ledger_track_credit_only(t, task_ledgers.network_volatile);
1079	ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1080	ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1081	ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1082
1083	ledger_track_maximum(t, task_ledgers.phys_footprint, `60`);
1084	#if MACH_ASSERT
1085	if (pmap_ledgers_panic) {
1086	ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1087	ledger_panic_on_negative(t, task_ledgers.page_table);
1088	ledger_panic_on_negative(t, task_ledgers.internal);
1089	ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1090	ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1091	ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1092	ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1093	ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1094	ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1095	ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1096	ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1097
1098	ledger_panic_on_negative(t, task_ledgers.network_volatile);
1099	ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1100	ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1101	ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1102	}
1103	#endif /* MACH_ASSERT */
1104
1105	#if CONFIG_MEMORYSTATUS
1106	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1107	#endif /* CONFIG_MEMORYSTATUS */
1108
1109	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1110	task_wakeups_rate_exceeded, NULL, NULL);
1111	ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1112	ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1113
1114	ledger_template_complete(t);
1115	task_ledger_template = t;
1116	}
1117
1118	kern_return_t
1119	task_create_internal(
1120	task_t parent_task,
1121	coalition_t *parent_coalitions __unused,
1122	boolean_t inherit_memory,
1123	__unused boolean_t is_64bit,
1124	boolean_t is_64bit_data,
1125	uint32_t t_flags,
1126	uint32_t t_procflags,
1127	task_t child_task) /* OUT /
1128	{
1129	task_t new_task;
1130	vm_shared_region_t shared_region;
1131	ledger_t ledger = NULL;
1132
1133	new_task = (task_t) zalloc(task_zone);
1134
1135	if (new_task == TASK_NULL)
1136	return(KERN_RESOURCE_SHORTAGE);
1137
1138	/ one ref for just being alive; one for our caller /
1139	new_task->ref_count = `2`;
1140
1141	/ allocate with active entries /
1142	assert(task_ledger_template != NULL);
1143	if ((ledger = ledger_instantiate(task_ledger_template,
1144	LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1145	zfree(task_zone, new_task);
1146	return(KERN_RESOURCE_SHORTAGE);
1147	}
1148
1149
1150	new_task->ledger = ledger;
1151
1152	#if defined(CONFIG_SCHED_MULTIQ)
1153	new_task->sched_group = sched_group_create();
1154	#endif
1155
1156	/ if inherit_memory is true, parent_task MUST not be NULL /
1157	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1158	new_task->map = vm_map_fork(ledger, parent_task->map, `0`);
1159	else
1160	new_task->map = vm_map_create(pmap_create(ledger, `0`, is_64bit),
1161	(vm_map_offset_t)(VM_MIN_ADDRESS),
1162	(vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1163
1164	/ Inherit memlock limit from parent /
1165	if (parent_task)
1166	vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1167
1168	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1169	queue_init(&new_task->threads);
1170	new_task->suspend_count = `0`;
1171	new_task->thread_count = `0`;
1172	new_task->active_thread_count = `0`;
1173	new_task->user_stop_count = `0`;
1174	new_task->legacy_stop_count = `0`;
1175	new_task->active = TRUE;
1176	new_task->halting = FALSE;
1177	new_task->priv_flags = `0`;
1178	new_task->t_flags = t_flags;
1179	new_task->t_procflags = t_procflags;
1180	new_task->importance = `0`;
1181	new_task->crashed_thread_id = `0`;
1182	new_task->exec_token = `0`;
1183
1184	new_task->task_exc_guard = task_exc_guard_default;
1185
1186	#if CONFIG_ATM
1187	new_task->atm_context = NULL;
1188	#endif
1189	new_task->bank_context = NULL;
1190
1191	#ifdef MACH_BSD
1192	new_task->bsd_info = NULL;
1193	new_task->corpse_info = NULL;
1194	#endif /* MACH_BSD */
1195
1196	#if CONFIG_MACF
1197	new_task->crash_label = NULL;
1198	#endif
1199
1200	#if CONFIG_MEMORYSTATUS
1201	if (max_task_footprint != `0`) {
1202	ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1203	}
1204	#endif /* CONFIG_MEMORYSTATUS */
1205
1206	if (task_wakeups_monitor_rate != `0`) {
1207	uint32_t flags = WAKEMON_ENABLE \| WAKEMON_SET_DEFAULTS;
1208	int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1209	task_wakeups_monitor_ctl(new_task, &flags, &rate);
1210	}
1211
1212	#if CONFIG_IO_ACCOUNTING
1213	uint32_t flags = IOMON_ENABLE;
1214	task_io_monitor_ctl(new_task, &flags);
1215	#endif /* CONFIG_IO_ACCOUNTING */
1216
1217	machine_task_init(new_task, parent_task, inherit_memory);
1218
1219	new_task->task_debug = NULL;
1220
1221	#if DEVELOPMENT \|\| DEBUG
1222	new_task->task_unnested = FALSE;
1223	new_task->task_disconnected_count = `0`;
1224	#endif
1225	queue_init(&new_task->semaphore_list);
1226	new_task->semaphores_owned = `0`;
1227
1228	ipc_task_init(new_task, parent_task);
1229
1230	new_task->vtimers = `0`;
1231
1232	new_task->shared_region = NULL;
1233
1234	new_task->affinity_space = NULL;
1235
1236	new_task->t_kpc = `0`;
1237
1238	new_task->pidsuspended = FALSE;
1239	new_task->frozen = FALSE;
1240	new_task->changing_freeze_state = FALSE;
1241	new_task->rusage_cpu_flags = `0`;
1242	new_task->rusage_cpu_percentage = `0`;
1243	new_task->rusage_cpu_interval = `0`;
1244	new_task->rusage_cpu_deadline = `0`;
1245	new_task->rusage_cpu_callt = NULL;
1246	#if MACH_ASSERT
1247	new_task->suspends_outstanding = `0`;
1248	#endif
1249
1250	#if HYPERVISOR
1251	new_task->hv_task_target = NULL;
1252	#endif /* HYPERVISOR */
1253
1254	#if CONFIG_EMBEDDED
1255	queue_init(&new_task->task_watchers);
1256	new_task->num_taskwatchers = `0`;
1257	new_task->watchapplying = `0`;
1258	#endif /* CONFIG_EMBEDDED */
1259
1260	new_task->mem_notify_reserved = `0`;
1261	new_task->memlimit_attrs_reserved = `0`;
1262
1263	new_task->requested_policy = default_task_requested_policy;
1264	new_task->effective_policy = default_task_effective_policy;
1265
1266	task_importance_init_from_parent(new_task, parent_task);
1267
1268	if (parent_task != TASK_NULL) {
1269	new_task->sec_token = parent_task->sec_token;
1270	new_task->audit_token = parent_task->audit_token;
1271
1272	/ inherit the parent's shared region /
1273	shared_region = vm_shared_region_get(parent_task);
1274	vm_shared_region_set(new_task, shared_region);
1275
1276	if(task_has_64Bit_addr(parent_task)) {
1277	task_set_64Bit_addr(new_task);
1278	}
1279
1280	if(task_has_64Bit_data(parent_task)) {
1281	task_set_64Bit_data(new_task);
1282	}
1283
1284	new_task->all_image_info_addr = parent_task->all_image_info_addr;
1285	new_task->all_image_info_size = parent_task->all_image_info_size;
1286
1287	if (inherit_memory && parent_task->affinity_space)
1288	task_affinity_create(parent_task, new_task);
1289
1290	new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1291
1292	new_task->priority = BASEPRI_DEFAULT;
1293	new_task->max_priority = MAXPRI_USER;
1294
1295	task_policy_create(new_task, parent_task);
1296	} else {
1297	new_task->sec_token = KERNEL_SECURITY_TOKEN;
1298	new_task->audit_token = KERNEL_AUDIT_TOKEN;
1299	#ifdef __LP64__
1300	if(is_64bit) {
1301	task_set_64Bit_addr(new_task);
1302	}
1303	#endif
1304
1305	if(is_64bit_data) {
1306	task_set_64Bit_data(new_task);
1307	}
1308
1309	new_task->all_image_info_addr = (mach_vm_address_t)`0`;
1310	new_task->all_image_info_size = (mach_vm_size_t)`0`;
1311
1312	new_task->pset_hint = PROCESSOR_SET_NULL;
1313
1314	if (kernel_task == TASK_NULL) {
1315	new_task->priority = BASEPRI_KERNEL;
1316	new_task->max_priority = MAXPRI_KERNEL;
1317	} else {
1318	new_task->priority = BASEPRI_DEFAULT;
1319	new_task->max_priority = MAXPRI_USER;
1320	}
1321	}
1322
1323	bzero(new_task->coalition, sizeof(new_task->coalition));
1324	for (int i = `0`; i < COALITION_NUM_TYPES; i++)
1325	queue_chain_init(new_task->task_coalition[i]);
1326
1327	/ Allocate I/O Statistics /
1328	new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1329	assert(new_task->task_io_stats != NULL);
1330	bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1331
1332	bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1333	bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1334
1335	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1336
1337	/ Copy resource acc. info from Parent for Corpe Forked task. /
1338	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1339	task_rollup_accounting_info(new_task, parent_task);
1340	} else {
1341	/ Initialize to zero for standard fork/spawn case /
1342	new_task->total_user_time = `0`;
1343	new_task->total_system_time = `0`;
1344	new_task->total_ptime = `0`;
1345	new_task->total_runnable_time = `0`;
1346	new_task->faults = `0`;
1347	new_task->pageins = `0`;
1348	new_task->cow_faults = `0`;
1349	new_task->messages_sent = `0`;
1350	new_task->messages_received = `0`;
1351	new_task->syscalls_mach = `0`;
1352	new_task->syscalls_unix = `0`;
1353	new_task->c_switch = `0`;
1354	new_task->p_switch = `0`;
1355	new_task->ps_switch = `0`;
1356	new_task->low_mem_notified_warn = `0`;
1357	new_task->low_mem_notified_critical = `0`;
1358	new_task->purged_memory_warn = `0`;
1359	new_task->purged_memory_critical = `0`;
1360	new_task->low_mem_privileged_listener = `0`;
1361	new_task->memlimit_is_active = `0`;
1362	new_task->memlimit_is_fatal = `0`;
1363	new_task->memlimit_active_exc_resource = `0`;
1364	new_task->memlimit_inactive_exc_resource = `0`;
1365	new_task->task_timer_wakeups_bin_1 = `0`;
1366	new_task->task_timer_wakeups_bin_2 = `0`;
1367	new_task->task_gpu_ns = `0`;
1368	new_task->task_immediate_writes = `0`;
1369	new_task->task_deferred_writes = `0`;
1370	new_task->task_invalidated_writes = `0`;
1371	new_task->task_metadata_writes = `0`;
1372	new_task->task_energy = `0`;
1373	#if MONOTONIC
1374	memset(&new_task->task_monotonic, `0`, sizeof(new_task->task_monotonic));
1375	#endif /* MONOTONIC */
1376	}
1377
1378
1379	#if CONFIG_COALITIONS
1380	if (!(t_flags & TF_CORPSE_FORK)) {
1381	/ TODO: there is no graceful failure path here... /
1382	if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1383	coalitions_adopt_task(parent_coalitions, new_task);
1384	} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1385	/*
1386	* all tasks at least have a resource coalition, so
1387	* if the parent has one then inherit all coalitions
1388	* the parent is a part of
1389	*/
1390	coalitions_adopt_task(parent_task->coalition, new_task);
1391	} else {
1392	/ TODO: assert that new_task will be PID 1 (launchd) /
1393	coalitions_adopt_init_task(new_task);
1394	}
1395	/*
1396	* on exec, we need to transfer the coalition roles from the
1397	* parent task to the exec copy task.
1398	*/
1399	if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1400	int coal_roles[COALITION_NUM_TYPES];
1401	task_coalition_roles(parent_task, coal_roles);
1402	(void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1403	}
1404	} else {
1405	coalitions_adopt_corpse_task(new_task);
1406	}
1407
1408	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1409	panic("created task is not a member of a resource coalition");
1410	}
1411	#endif /* CONFIG_COALITIONS */
1412
1413	new_task->dispatchqueue_offset = `0`;
1414	if (parent_task != NULL) {
1415	new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1416	}
1417
1418	new_task->task_volatile_objects = `0`;
1419	new_task->task_nonvolatile_objects = `0`;
1420	new_task->task_purgeable_disowning = FALSE;
1421	new_task->task_purgeable_disowned = FALSE;
1422	queue_init(&new_task->task_objq);
1423	task_objq_lock_init(new_task);
1424
1425	#if __arm64__
1426	new_task->task_legacy_footprint = FALSE;
1427	#endif /* __arm64__ */
1428	new_task->task_region_footprint = FALSE;
1429	new_task->task_has_crossed_thread_limit = FALSE;
1430	new_task->task_thread_limit = `0`;
1431	#if CONFIG_SECLUDED_MEMORY
1432	new_task->task_can_use_secluded_mem = FALSE;
1433	new_task->task_could_use_secluded_mem = FALSE;
1434	new_task->task_could_also_use_secluded_mem = FALSE;
1435	new_task->task_suppressed_secluded = FALSE;
1436	#endif /* CONFIG_SECLUDED_MEMORY */
1437
1438	/*
1439	* t_flags is set up above. But since we don't
1440	* support darkwake mode being set that way
1441	* currently, we clear it out here explicitly.
1442	*/
1443	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1444
1445	queue_init(&new_task->io_user_clients);
1446
1447	ipc_task_enable(new_task);
1448
1449	lck_mtx_lock(&tasks_threads_lock);
1450	queue_enter(&tasks, new_task, task_t, tasks);
1451	tasks_count++;
1452	if (tasks_suspend_state) {
1453	task_suspend_internal(new_task);
1454	}
1455	lck_mtx_unlock(&tasks_threads_lock);
1456
1457	*child_task = new_task;
1458	return(KERN_SUCCESS);
1459	}
1460
1461	/*
1462	* task_rollup_accounting_info
1463	*
1464	* Roll up accounting stats. Used to rollup stats
1465	* for exec copy task and corpse fork.
1466	*/
1467	void
1468	task_rollup_accounting_info(task_t to_task, task_t from_task)
1469	{
1470	assert(from_task != to_task);
1471
1472	to_task->total_user_time = from_task->total_user_time;
1473	to_task->total_system_time = from_task->total_system_time;
1474	to_task->total_ptime = from_task->total_ptime;
1475	to_task->total_runnable_time = from_task->total_runnable_time;
1476	to_task->faults = from_task->faults;
1477	to_task->pageins = from_task->pageins;
1478	to_task->cow_faults = from_task->cow_faults;
1479	to_task->messages_sent = from_task->messages_sent;
1480	to_task->messages_received = from_task->messages_received;
1481	to_task->syscalls_mach = from_task->syscalls_mach;
1482	to_task->syscalls_unix = from_task->syscalls_unix;
1483	to_task->c_switch = from_task->c_switch;
1484	to_task->p_switch = from_task->p_switch;
1485	to_task->ps_switch = from_task->ps_switch;
1486	to_task->extmod_statistics = from_task->extmod_statistics;
1487	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1488	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1489	to_task->purged_memory_warn = from_task->purged_memory_warn;
1490	to_task->purged_memory_critical = from_task->purged_memory_critical;
1491	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1492	to_task->task_io_stats = from_task->task_io_stats;
1493	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1494	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1495	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1496	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1497	to_task->task_gpu_ns = from_task->task_gpu_ns;
1498	to_task->task_immediate_writes = from_task->task_immediate_writes;
1499	to_task->task_deferred_writes = from_task->task_deferred_writes;
1500	to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1501	to_task->task_metadata_writes = from_task->task_metadata_writes;
1502	to_task->task_energy = from_task->task_energy;
1503
1504	/ Skip ledger roll up for memory accounting entries /
1505	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1506	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1507	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1508	#if CONFIG_SCHED_SFI
1509	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1510	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1511	}
1512	#endif
1513	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1514	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1515	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1516	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1517	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1518	ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1519	}
1520
1521	int task_dropped_imp_count = `0`;
1522
1523	/*
1524	* task_deallocate:
1525	*
1526	* Drop a reference on a task.
1527	*/
1528	void
1529	task_deallocate(
1530	task_t task)
1531	{
1532	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1533	uint32_t refs;
1534
1535	if (task == TASK_NULL)
1536	return;
1537
1538	refs = task_deallocate_internal(task);
1539
1540	#if IMPORTANCE_INHERITANCE
1541	if (refs > `1`)
1542	return;
1543
1544	atomic_load_explicit(&task->ref_count, memory_order_acquire);
1545
1546	if (refs == `1`) {
1547	/*
1548	* If last ref potentially comes from the task's importance,
1549	* disconnect it. But more task refs may be added before
1550	* that completes, so wait for the reference to go to zero
1551	* naturually (it may happen on a recursive task_deallocate()
1552	* from the ipc_importance_disconnect_task() call).
1553	*/
1554	if (IIT_NULL != task->task_imp_base)
1555	ipc_importance_disconnect_task(task);
1556	return;
1557	}
1558	#else
1559	if (refs > `0`)
1560	return;
1561
1562	atomic_load_explicit(&task->ref_count, memory_order_acquire);
1563
1564	#endif /* IMPORTANCE_INHERITANCE */
1565
1566	lck_mtx_lock(&tasks_threads_lock);
1567	queue_remove(&terminated_tasks, task, task_t, tasks);
1568	terminated_tasks_count--;
1569	lck_mtx_unlock(&tasks_threads_lock);
1570
1571	/*
1572	* remove the reference on atm descriptor
1573	*/
1574	task_atm_reset(task);
1575
1576	/*
1577	* remove the reference on bank context
1578	*/
1579	task_bank_reset(task);
1580
1581	if (task->task_io_stats)
1582	kfree(task->task_io_stats, sizeof(struct io_stat_info));
1583
1584	/*
1585	* Give the machine dependent code a chance
1586	* to perform cleanup before ripping apart
1587	* the task.
1588	*/
1589	machine_task_terminate(task);
1590
1591	ipc_task_terminate(task);
1592
1593	/ let iokit know /
1594	iokit_task_terminate(task);
1595
1596	if (task->affinity_space)
1597	task_affinity_deallocate(task);
1598
1599	#if MACH_ASSERT
1600	if (task->ledger != NULL &&
1601	task->map != NULL &&
1602	task->map->pmap != NULL &&
1603	task->map->pmap->ledger != NULL) {
1604	assert(task->ledger == task->map->pmap->ledger);
1605	}
1606	#endif /* MACH_ASSERT */
1607
1608	vm_purgeable_disown(task);
1609	assert(task->task_purgeable_disowned);
1610	if (task->task_volatile_objects != `0` \|\|
1611	task->task_nonvolatile_objects != `0`) {
1612	panic("task_deallocate(%p): "
1613	"volatile_objects=%d nonvolatile_objects=%d\n",
1614	task,
1615	task->task_volatile_objects,
1616	task->task_nonvolatile_objects);
1617	}
1618
1619	vm_map_deallocate(task->map);
1620	is_release(task->itk_space);
1621
1622	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1623	&interrupt_wakeups, &debit);
1624	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1625	&platform_idle_wakeups, &debit);
1626
1627	#if defined(CONFIG_SCHED_MULTIQ)
1628	sched_group_destroy(task->sched_group);
1629	#endif
1630
1631	/ Accumulate statistics for dead tasks /
1632	lck_spin_lock(&dead_task_statistics_lock);
1633	dead_task_statistics.total_user_time += task->total_user_time;
1634	dead_task_statistics.total_system_time += task->total_system_time;
1635
1636	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1637	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1638
1639	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1640	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1641	dead_task_statistics.total_ptime += task->total_ptime;
1642	dead_task_statistics.total_pset_switches += task->ps_switch;
1643	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1644	dead_task_statistics.task_energy += task->task_energy;
1645
1646	lck_spin_unlock(&dead_task_statistics_lock);
1647	lck_mtx_destroy(&task->lock, &task_lck_grp);
1648
1649	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1650	&debit)) {
1651	OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1652	OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1653	}
1654	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1655	&debit)) {
1656	OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1657	OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1658	}
1659	ledger_dereference(task->ledger);
1660
1661	#if TASK_REFERENCE_LEAK_DEBUG
1662	btlog_remove_entries_for_element(task_ref_btlog, task);
1663	#endif
1664
1665	#if CONFIG_COALITIONS
1666	task_release_coalitions(task);
1667	#endif /* CONFIG_COALITIONS */
1668
1669	bzero(task->coalition, sizeof(task->coalition));
1670
1671	#if MACH_BSD
1672	/ clean up collected information since last reference to task is gone /
1673	if (task->corpse_info) {
1674	void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1675	task_crashinfo_destroy(task->corpse_info);
1676	task->corpse_info = NULL;
1677	if (corpse_info_kernel) {
1678	kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1679	}
1680	}
1681	#endif
1682
1683	#if CONFIG_MACF
1684	if (task->crash_label) {
1685	mac_exc_free_label(task->crash_label);
1686	task->crash_label = NULL;
1687	}
1688	#endif
1689
1690	assert(queue_empty(&task->task_objq));
1691
1692	zfree(task_zone, task);
1693	}
1694
1695	/*
1696	* task_name_deallocate:
1697	*
1698	* Drop a reference on a task name.
1699	*/
1700	void
1701	task_name_deallocate(
1702	task_name_t task_name)
1703	{
1704	return(task_deallocate((task_t)task_name));
1705	}
1706
1707	/*
1708	* task_inspect_deallocate:
1709	*
1710	* Drop a task inspection reference.
1711	*/
1712	void
1713	task_inspect_deallocate(
1714	task_inspect_t task_inspect)
1715	{
1716	return(task_deallocate((task_t)task_inspect));
1717	}
1718
1719	/*
1720	* task_suspension_token_deallocate:
1721	*
1722	* Drop a reference on a task suspension token.
1723	*/
1724	void
1725	task_suspension_token_deallocate(
1726	task_suspension_token_t token)
1727	{
1728	return(task_deallocate((task_t)token));
1729	}
1730
1731
1732	/*
1733	* task_collect_crash_info:
1734	*
1735	* collect crash info from bsd and mach based data
1736	*/
1737	kern_return_t
1738	task_collect_crash_info(
1739	task_t task,
1740	#ifdef CONFIG_MACF
1741	struct label *crash_label,
1742	#endif
1743	int is_corpse_fork)
1744	{
1745	kern_return_t kr = KERN_SUCCESS;
1746
1747	kcdata_descriptor_t crash_data = NULL;
1748	kcdata_descriptor_t crash_data_release = NULL;
1749	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1750	mach_vm_offset_t crash_data_ptr = `0`;
1751	void *crash_data_kernel = NULL;
1752	void *crash_data_kernel_release = NULL;
1753	#if CONFIG_MACF
1754	struct label label, free_label;
1755	#endif
1756
1757	if (!corpses_enabled()) {
1758	return KERN_NOT_SUPPORTED;
1759	}
1760
1761	#if CONFIG_MACF
1762	free_label = label = mac_exc_create_label();
1763	#endif
1764
1765	task_lock(task);
1766
1767	assert(is_corpse_fork \|\| task->bsd_info != NULL);
1768	if (task->corpse_info == NULL && (is_corpse_fork \|\| task->bsd_info != NULL)) {
1769	#if CONFIG_MACF
1770	/ Set the crash label, used by the exception delivery mac hook /
1771	free_label = task->crash_label; // Most likely NULL.
1772	task->crash_label = label;
1773	mac_exc_update_task_crash_label(task, crash_label);
1774	#endif
1775	task_unlock(task);
1776
1777	crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1778	if (crash_data_kernel == NULL) {
1779	kr = KERN_RESOURCE_SHORTAGE;
1780	goto out_no_lock;
1781	}
1782	bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1783	crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1784
1785	/ Do not get a corpse ref for corpse fork /
1786	crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1787	is_corpse_fork ? `0` : CORPSE_CRASHINFO_HAS_REF,
1788	KCFLAG_USE_MEMCOPY);
1789	if (crash_data) {
1790	task_lock(task);
1791	crash_data_release = task->corpse_info;
1792	crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1793	task->corpse_info = crash_data;
1794
1795	task_unlock(task);
1796	kr = KERN_SUCCESS;
1797	} else {
1798	kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1799	kr = KERN_FAILURE;
1800	}
1801
1802	if (crash_data_release != NULL) {
1803	task_crashinfo_destroy(crash_data_release);
1804	}
1805	if (crash_data_kernel_release != NULL) {
1806	kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1807	}
1808	} else {
1809	task_unlock(task);
1810	}
1811
1812	out_no_lock:
1813	#if CONFIG_MACF
1814	if (free_label != NULL) {
1815	mac_exc_free_label(free_label);
1816	}
1817	#endif
1818	return kr;
1819	}
1820
1821	/*
1822	* task_deliver_crash_notification:
1823	*
1824	* Makes outcall to registered host port for a corpse.
1825	*/
1826	kern_return_t
1827	task_deliver_crash_notification(
1828	task_t task,
1829	thread_t thread,
1830	exception_type_t etype,
1831	mach_exception_subcode_t subcode)
1832	{
1833	kcdata_descriptor_t crash_info = task->corpse_info;
1834	thread_t th_iter = NULL;
1835	kern_return_t kr = KERN_SUCCESS;
1836	wait_interrupt_t wsave;
1837	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1838	ipc_port_t task_port, old_notify;
1839
1840	if (crash_info == NULL)
1841	return KERN_FAILURE;
1842
1843	task_lock(task);
1844	if (task_is_a_corpse_fork(task)) {
1845	/ Populate code with EXC_{RESOURCE,GUARD} for corpse fork /
1846	code[`0`] = etype;
1847	code[`1`] = subcode;
1848	} else {
1849	/ Populate code with EXC_CRASH for corpses /
1850	code[`0`] = EXC_CRASH;
1851	code[`1`] = `0`;
1852	/ Update the code[1] if the boot-arg corpse_for_fatal_memkill is set /
1853	if (corpse_for_fatal_memkill) {
1854	code[`1`] = subcode;
1855	}
1856	}
1857
1858	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1859	{
1860	if (th_iter->corpse_dup == FALSE) {
1861	ipc_thread_reset(th_iter);
1862	}
1863	}
1864	task_unlock(task);
1865
1866	/ Arm the no-sender notification for taskport /
1867	task_reference(task);
1868	task_port = convert_task_to_port(task);
1869	ip_lock(task_port);
1870	assert(ip_active(task_port));
1871	ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1872	/ port unlocked /
1873	assert(IP_NULL == old_notify);
1874
1875	wsave = thread_interrupt_level(THREAD_UNINT);
1876	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1877	if (kr != KERN_SUCCESS) {
1878	printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1879	}
1880
1881	(void)thread_interrupt_level(wsave);
1882
1883	/*
1884	* Drop the send right on task port, will fire the
1885	* no-sender notification if exception deliver failed.
1886	*/
1887	ipc_port_release_send(task_port);
1888	return kr;
1889	}
1890
1891	/*
1892	* task_terminate:
1893	*
1894	* Terminate the specified task. See comments on thread_terminate
1895	* (kern/thread.c) about problems with terminating the "current task."
1896	*/
1897
1898	kern_return_t
1899	task_terminate(
1900	task_t task)
1901	{
1902	if (task == TASK_NULL)
1903	return (KERN_INVALID_ARGUMENT);
1904
1905	if (task->bsd_info)
1906	return (KERN_FAILURE);
1907
1908	return (task_terminate_internal(task));
1909	}
1910
1911	#if MACH_ASSERT
1912	extern int proc_pid(struct proc *);
1913	extern void proc_name_kdp(task_t t, char buf, int* size);
1914	#endif /* MACH_ASSERT */
1915
1916	#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1917	static void
1918	__unused task_partial_reap(task_t task, __unused int pid)
1919	{
1920	unsigned int reclaimed_resident = `0`;
1921	unsigned int reclaimed_compressed = `0`;
1922	uint64_t task_page_count;
1923
1924	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1925
1926	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) \| DBG_FUNC_START),
1927	pid, task_page_count, `0`, `0`, `0`);
1928
1929	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1930
1931	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) \| DBG_FUNC_END),
1932	pid, reclaimed_resident, reclaimed_compressed, `0`, `0`);
1933	}
1934
1935	kern_return_t
1936	task_mark_corpse(task_t task)
1937	{
1938	kern_return_t kr = KERN_SUCCESS;
1939	thread_t self_thread;
1940	(void) self_thread;
1941	wait_interrupt_t wsave;
1942	#if CONFIG_MACF
1943	struct label *crash_label = NULL;
1944	#endif
1945
1946	assert(task != kernel_task);
1947	assert(task == current_task());
1948	assert(!task_is_a_corpse(task));
1949
1950	#if CONFIG_MACF
1951	crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1952	#endif
1953
1954	kr = task_collect_crash_info(task,
1955	#if CONFIG_MACF
1956	crash_label,
1957	#endif
1958	FALSE);
1959	if (kr != KERN_SUCCESS) {
1960	goto out;
1961	}
1962
1963	self_thread = current_thread();
1964
1965	wsave = thread_interrupt_level(THREAD_UNINT);
1966	task_lock(task);
1967
1968	task_set_corpse_pending_report(task);
1969	task_set_corpse(task);
1970	task->crashed_thread_id = thread_tid(self_thread);
1971
1972	kr = task_start_halt_locked(task, TRUE);
1973	assert(kr == KERN_SUCCESS);
1974
1975	ipc_task_reset(task);
1976	/ Remove the naked send right for task port, needed to arm no sender notification /
1977	task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1978	ipc_task_enable(task);
1979
1980	task_unlock(task);
1981	/ terminate the ipc space /
1982	ipc_space_terminate(task->itk_space);
1983
1984	/ Add it to global corpse task list /
1985	task_add_to_corpse_task_list(task);
1986
1987	task_start_halt(task);
1988	thread_terminate_internal(self_thread);
1989
1990	(void) thread_interrupt_level(wsave);
1991	assert(task->halting == TRUE);
1992
1993	out:
1994	#if CONFIG_MACF
1995	mac_exc_free_label(crash_label);
1996	#endif
1997	return kr;
1998	}
1999
2000	/*
2001	* task_clear_corpse
2002	*
2003	* Clears the corpse pending bit on task.
2004	* Removes inspection bit on the threads.
2005	*/
2006	void
2007	task_clear_corpse(task_t task)
2008	{
2009	thread_t th_iter = NULL;
2010
2011	task_lock(task);
2012	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2013	{
2014	thread_mtx_lock(th_iter);
2015	th_iter->inspection = FALSE;
2016	thread_mtx_unlock(th_iter);
2017	}
2018
2019	thread_terminate_crashed_threads();
2020	/ remove the pending corpse report flag /
2021	task_clear_corpse_pending_report(task);
2022
2023	task_unlock(task);
2024	}
2025
2026	/*
2027	* task_port_notify
2028	*
2029	* Called whenever the Mach port system detects no-senders on
2030	* the task port of a corpse.
2031	* Each notification that comes in should terminate the task (corpse).
2032	*/
2033	void
2034	task_port_notify(mach_msg_header_t *msg)
2035	{
2036	mach_no_senders_notification_t notification = (void* *)msg;
2037	ipc_port_t port = notification->not_header.msgh_remote_port;
2038	task_t task;
2039
2040	assert(ip_active(port));
2041	assert(IKOT_TASK == ip_kotype(port));
2042	task = (task_t) port->ip_kobject;
2043
2044	assert(task_is_a_corpse(task));
2045
2046	/ Remove the task from global corpse task list /
2047	task_remove_from_corpse_task_list(task);
2048
2049	task_clear_corpse(task);
2050	task_terminate_internal(task);
2051	}
2052
2053	/*
2054	* task_wait_till_threads_terminate_locked
2055	*
2056	* Wait till all the threads in the task are terminated.
2057	* Might release the task lock and re-acquire it.
2058	*/
2059	void
2060	task_wait_till_threads_terminate_locked(task_t task)
2061	{
2062	/ wait for all the threads in the task to terminate /
2063	while (task->active_thread_count != `0`) {
2064	assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2065	task_unlock(task);
2066	thread_block(THREAD_CONTINUE_NULL);
2067
2068	task_lock(task);
2069	}
2070	}
2071
2072	/*
2073	* task_duplicate_map_and_threads
2074	*
2075	* Copy vmmap of source task.
2076	* Copy active threads from source task to destination task.
2077	* Source task would be suspended during the copy.
2078	*/
2079	kern_return_t
2080	task_duplicate_map_and_threads(
2081	task_t task,
2082	void *p,
2083	task_t new_task,
2084	thread_t *thread_ret,
2085	uint64_t **udata_buffer,
2086	int *size,
2087	int *num_udata)
2088	{
2089	kern_return_t kr = KERN_SUCCESS;
2090	int active;
2091	thread_t thread, self, thread_return = THREAD_NULL;
2092	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2093	thread_t *thread_array;
2094	uint32_t active_thread_count = `0`, array_count = `0`, i;
2095	vm_map_t oldmap;
2096	uint64_t *buffer = NULL;
2097	int buf_size = `0`;
2098	int est_knotes = `0`, num_knotes = `0`;
2099
2100	self = current_thread();
2101
2102	/*
2103	* Suspend the task to copy thread state, use the internal
2104	* variant so that no user-space process can resume
2105	* the task from under us
2106	*/
2107	kr = task_suspend_internal(task);
2108	if (kr != KERN_SUCCESS) {
2109	return kr;
2110	}
2111
2112	if (task->map->disable_vmentry_reuse == TRUE) {
2113	/*
2114	* Quite likely GuardMalloc (or some debugging tool)
2115	* is being used on this task. And it has gone through
2116	* its limit. Making a corpse will likely encounter
2117	* a lot of VM entries that will need COW.
2118	*
2119	* Skip it.
2120	*/
2121	#if DEVELOPMENT \|\| DEBUG
2122	memorystatus_abort_vm_map_fork(task);
2123	#endif
2124	task_resume_internal(task);
2125	return KERN_FAILURE;
2126	}
2127
2128	/ Check with VM if vm_map_fork is allowed for this task /
2129	if (memorystatus_allowed_vm_map_fork(task)) {
2130
2131	/ Setup new task's vmmap, switch from parent task's map to it COW map /
2132	oldmap = new_task->map;
2133	new_task->map = vm_map_fork(new_task->ledger,
2134	task->map,
2135	(VM_MAP_FORK_SHARE_IF_INHERIT_NONE \|
2136	VM_MAP_FORK_PRESERVE_PURGEABLE \|
2137	VM_MAP_FORK_CORPSE_FOOTPRINT));
2138	vm_map_deallocate(oldmap);
2139
2140	/ copy ledgers that impact the memory footprint /
2141	vm_map_copy_footprint_ledgers(task, new_task);
2142
2143	/ Get all the udata pointers from kqueue /
2144	est_knotes = kevent_proc_copy_uptrs(p, NULL, `0`);
2145	if (est_knotes > `0`) {
2146	buf_size = (est_knotes + `32`) * sizeof(uint64_t);
2147	buffer = (uint64_t *) kalloc(buf_size);
2148	num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2149	if (num_knotes > est_knotes + `32`) {
2150	num_knotes = est_knotes + `32`;
2151	}
2152	}
2153	}
2154
2155	active_thread_count = task->active_thread_count;
2156	if (active_thread_count == `0`) {
2157	if (buffer != NULL) {
2158	kfree(buffer, buf_size);
2159	}
2160	task_resume_internal(task);
2161	return KERN_FAILURE;
2162	}
2163
2164	thread_array = (thread_t ) kalloc(sizeof(thread_t) active_thread_count);
2165
2166	/ Iterate all the threads and drop the task lock before calling thread_create_with_continuation /
2167	task_lock(task);
2168	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2169	/ Skip inactive threads /
2170	active = thread->active;
2171	if (!active) {
2172	continue;
2173	}
2174
2175	if (array_count >= active_thread_count) {
2176	break;
2177	}
2178
2179	thread_array[array_count++] = thread;
2180	thread_reference(thread);
2181	}
2182	task_unlock(task);
2183
2184	for (i = `0`; i < array_count; i++) {
2185
2186	kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2187	if (kr != KERN_SUCCESS) {
2188	break;
2189	}
2190
2191	/ Equivalent of current thread in corpse /
2192	if (thread_array[i] == self) {
2193	thread_return = new_thread;
2194	new_task->crashed_thread_id = thread_tid(new_thread);
2195	} else if (first_thread == NULL) {
2196	first_thread = new_thread;
2197	} else {
2198	/ drop the extra ref returned by thread_create_with_continuation /
2199	thread_deallocate(new_thread);
2200	}
2201
2202	kr = thread_dup2(thread_array[i], new_thread);
2203	if (kr != KERN_SUCCESS) {
2204	thread_mtx_lock(new_thread);
2205	new_thread->corpse_dup = TRUE;
2206	thread_mtx_unlock(new_thread);
2207	continue;
2208	}
2209
2210	/ Copy thread name /
2211	bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2212	new_thread->thread_tag = thread_array[i]->thread_tag;
2213	thread_copy_resource_info(new_thread, thread_array[i]);
2214	}
2215
2216	/ return the first thread if we couldn't find the equivalent of current /
2217	if (thread_return == THREAD_NULL) {
2218	thread_return = first_thread;
2219	}
2220	else if (first_thread != THREAD_NULL) {
2221	/ drop the extra ref returned by thread_create_with_continuation /
2222	thread_deallocate(first_thread);
2223	}
2224
2225	task_resume_internal(task);
2226
2227	for (i = `0`; i < array_count; i++) {
2228	thread_deallocate(thread_array[i]);
2229	}
2230	kfree(thread_array, sizeof(thread_t) * active_thread_count);
2231
2232	if (kr == KERN_SUCCESS) {
2233	*thread_ret = thread_return;
2234	*udata_buffer = buffer;
2235	*size = buf_size;
2236	*num_udata = num_knotes;
2237	} else {
2238	if (thread_return != THREAD_NULL) {
2239	thread_deallocate(thread_return);
2240	}
2241	if (buffer != NULL) {
2242	kfree(buffer, buf_size);
2243	}
2244	}
2245
2246	return kr;
2247	}
2248
2249	#if CONFIG_SECLUDED_MEMORY
2250	extern void task_set_can_use_secluded_mem_locked(
2251	task_t task,
2252	boolean_t can_use_secluded_mem);
2253	#endif /* CONFIG_SECLUDED_MEMORY */
2254
2255	kern_return_t
2256	task_terminate_internal(
2257	task_t task)
2258	{
2259	thread_t thread, self;
2260	task_t self_task;
2261	boolean_t interrupt_save;
2262	int pid = `0`;
2263
2264	assert(task != kernel_task);
2265
2266	self = current_thread();
2267	self_task = self->task;
2268
2269	/*
2270	* Get the task locked and make sure that we are not racing
2271	* with someone else trying to terminate us.
2272	*/
2273	if (task == self_task)
2274	task_lock(task);
2275	else
2276	if (task < self_task) {
2277	task_lock(task);
2278	task_lock(self_task);
2279	}
2280	else {
2281	task_lock(self_task);
2282	task_lock(task);
2283	}
2284
2285	#if CONFIG_SECLUDED_MEMORY
2286	if (task->task_can_use_secluded_mem) {
2287	task_set_can_use_secluded_mem_locked(task, FALSE);
2288	}
2289	task->task_could_use_secluded_mem = FALSE;
2290	task->task_could_also_use_secluded_mem = FALSE;
2291
2292	if (task->task_suppressed_secluded) {
2293	stop_secluded_suppression(task);
2294	}
2295	#endif /* CONFIG_SECLUDED_MEMORY */
2296
2297	if (!task->active) {
2298	/*
2299	* Task is already being terminated.
2300	* Just return an error. If we are dying, this will
2301	* just get us to our AST special handler and that
2302	* will get us to finalize the termination of ourselves.
2303	*/
2304	task_unlock(task);
2305	if (self_task != task)
2306	task_unlock(self_task);
2307
2308	return (KERN_FAILURE);
2309	}
2310
2311	if (task_corpse_pending_report(task)) {
2312	/*
2313	* Task is marked for reporting as corpse.
2314	* Just return an error. This will
2315	* just get us to our AST special handler and that
2316	* will get us to finish the path to death
2317	*/
2318	task_unlock(task);
2319	if (self_task != task)
2320	task_unlock(self_task);
2321
2322	return (KERN_FAILURE);
2323	}
2324
2325	if (self_task != task)
2326	task_unlock(self_task);
2327
2328	/*
2329	* Make sure the current thread does not get aborted out of
2330	* the waits inside these operations.
2331	*/
2332	interrupt_save = thread_interrupt_level(THREAD_UNINT);
2333
2334	/*
2335	* Indicate that we want all the threads to stop executing
2336	* at user space by holding the task (we would have held
2337	* each thread independently in thread_terminate_internal -
2338	* but this way we may be more likely to already find it
2339	* held there). Mark the task inactive, and prevent
2340	* further task operations via the task port.
2341	*/
2342	task_hold_locked(task);
2343	task->active = FALSE;
2344	ipc_task_disable(task);
2345
2346	#if CONFIG_TELEMETRY
2347	/*
2348	* Notify telemetry that this task is going away.
2349	*/
2350	telemetry_task_ctl_locked(task, TF_TELEMETRY, `0`);
2351	#endif
2352
2353	/*
2354	* Terminate each thread in the task.
2355	*/
2356	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2357	thread_terminate_internal(thread);
2358	}
2359
2360	#ifdef MACH_BSD
2361	if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2362	pid = proc_pid(task->bsd_info);
2363	}
2364	#endif /* MACH_BSD */
2365
2366	task_unlock(task);
2367
2368	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2369	TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2370
2371	/ Early object reap phase /
2372
2373	// PR-17045188: Revisit implementation
2374	// task_partial_reap(task, pid);
2375
2376	#if CONFIG_EMBEDDED
2377	/*
2378	* remove all task watchers
2379	*/
2380	task_removewatchers(task);
2381
2382	#endif /* CONFIG_EMBEDDED */
2383
2384	/*
2385	* Destroy all synchronizers owned by the task.
2386	*/
2387	task_synchronizer_destroy_all(task);
2388
2389	/*
2390	* Destroy the IPC space, leaving just a reference for it.
2391	*/
2392	ipc_space_terminate(task->itk_space);
2393
2394	#if 00
2395	/ if some ledgers go negative on tear-down again... /
2396	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2397	task_ledgers.phys_footprint);
2398	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2399	task_ledgers.internal);
2400	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2401	task_ledgers.internal_compressed);
2402	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2403	task_ledgers.iokit_mapped);
2404	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2405	task_ledgers.alternate_accounting);
2406	ledger_disable_panic_on_negative(task->map->pmap->ledger,
2407	task_ledgers.alternate_accounting_compressed);
2408	#endif
2409
2410	/*
2411	* If the current thread is a member of the task
2412	* being terminated, then the last reference to
2413	* the task will not be dropped until the thread
2414	* is finally reaped. To avoid incurring the
2415	* expense of removing the address space regions
2416	* at reap time, we do it explictly here.
2417	*/
2418
2419	vm_map_lock(task->map);
2420	vm_map_disable_hole_optimization(task->map);
2421	vm_map_unlock(task->map);
2422
2423	#if MACH_ASSERT
2424	/*
2425	* Identify the pmap's process, in case the pmap ledgers drift
2426	* and we have to report it.
2427	*/
2428	char procname[`17`];
2429	if (task->bsd_info && !task_is_exec_copy(task)) {
2430	pid = proc_pid(task->bsd_info);
2431	proc_name_kdp(task, procname, sizeof (procname));
2432	} else {
2433	pid = `0`;
2434	strlcpy(procname, "<unknown>", sizeof (procname));
2435	}
2436	pmap_set_process(task->map->pmap, pid, procname);
2437	#endif /* MACH_ASSERT */
2438
2439	vm_map_remove(task->map,
2440	task->map->min_offset,
2441	task->map->max_offset,
2442	/*
2443	* Final cleanup:
2444	* + no unnesting
2445	* + remove immutable mappings
2446	* + allow gaps in range
2447	*/
2448	(VM_MAP_REMOVE_NO_UNNESTING \|
2449	VM_MAP_REMOVE_IMMUTABLE \|
2450	VM_MAP_REMOVE_GAPS_OK));
2451
2452	/ release our shared region /
2453	vm_shared_region_set(task, NULL);
2454
2455
2456	lck_mtx_lock(&tasks_threads_lock);
2457	queue_remove(&tasks, task, task_t, tasks);
2458	queue_enter(&terminated_tasks, task, task_t, tasks);
2459	tasks_count--;
2460	terminated_tasks_count++;
2461	lck_mtx_unlock(&tasks_threads_lock);
2462
2463	/*
2464	* We no longer need to guard against being aborted, so restore
2465	* the previous interruptible state.
2466	*/
2467	thread_interrupt_level(interrupt_save);
2468
2469	#if KPC
2470	/ force the task to release all ctrs /
2471	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS)
2472	kpc_force_all_ctrs(task, `0`);
2473	#endif /* KPC */
2474
2475	#if CONFIG_COALITIONS
2476	/*
2477	* Leave our coalitions. (drop activation but not reference)
2478	*/
2479	coalitions_remove_task(task);
2480	#endif
2481
2482	/*
2483	* Get rid of the task active reference on itself.
2484	*/
2485	task_deallocate(task);
2486
2487	return (KERN_SUCCESS);
2488	}
2489
2490	void
2491	tasks_system_suspend(boolean_t suspend)
2492	{
2493	task_t task;
2494
2495	lck_mtx_lock(&tasks_threads_lock);
2496	assert(tasks_suspend_state != suspend);
2497	tasks_suspend_state = suspend;
2498	queue_iterate(&tasks, task, task_t, tasks) {
2499	if (task == kernel_task) {
2500	continue;
2501	}
2502	suspend ? task_suspend_internal(task) : task_resume_internal(task);
2503	}
2504	lck_mtx_unlock(&tasks_threads_lock);
2505	}
2506
2507	/*
2508	* task_start_halt:
2509	*
2510	* Shut the current task down (except for the current thread) in
2511	* preparation for dramatic changes to the task (probably exec).
2512	* We hold the task and mark all other threads in the task for
2513	* termination.
2514	*/
2515	kern_return_t
2516	task_start_halt(task_t task)
2517	{
2518	kern_return_t kr = KERN_SUCCESS;
2519	task_lock(task);
2520	kr = task_start_halt_locked(task, FALSE);
2521	task_unlock(task);
2522	return kr;
2523	}
2524
2525	static kern_return_t
2526	task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2527	{
2528	thread_t thread, self;
2529	uint64_t dispatchqueue_offset;
2530
2531	assert(task != kernel_task);
2532
2533	self = current_thread();
2534
2535	if (task != self->task && !task_is_a_corpse_fork(task))
2536	return (KERN_INVALID_ARGUMENT);
2537
2538	if (task->halting \|\| !task->active \|\| !self->active) {
2539	/*
2540	* Task or current thread is already being terminated.
2541	* Hurry up and return out of the current kernel context
2542	* so that we run our AST special handler to terminate
2543	* ourselves.
2544	*/
2545	return (KERN_FAILURE);
2546	}
2547
2548	task->halting = TRUE;
2549
2550	/*
2551	* Mark all the threads to keep them from starting any more
2552	* user-level execution. The thread_terminate_internal code
2553	* would do this on a thread by thread basis anyway, but this
2554	* gives us a better chance of not having to wait there.
2555	*/
2556	task_hold_locked(task);
2557	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2558
2559	/*
2560	* Terminate all the other threads in the task.
2561	*/
2562	queue_iterate(&task->threads, thread, thread_t, task_threads)
2563	{
2564	if (should_mark_corpse) {
2565	thread_mtx_lock(thread);
2566	thread->inspection = TRUE;
2567	thread_mtx_unlock(thread);
2568	}
2569	if (thread != self)
2570	thread_terminate_internal(thread);
2571	}
2572	task->dispatchqueue_offset = dispatchqueue_offset;
2573
2574	task_release_locked(task);
2575
2576	return KERN_SUCCESS;
2577	}
2578
2579
2580	/*
2581	* task_complete_halt:
2582	*
2583	* Complete task halt by waiting for threads to terminate, then clean
2584	* up task resources (VM, port namespace, etc...) and then let the
2585	* current thread go in the (practically empty) task context.
2586	*
2587	* Note: task->halting flag is not cleared in order to avoid creation
2588	* of new thread in old exec'ed task.
2589	*/
2590	void
2591	task_complete_halt(task_t task)
2592	{
2593	task_lock(task);
2594	assert(task->halting);
2595	assert(task == current_task());
2596
2597	/*
2598	* Wait for the other threads to get shut down.
2599	* When the last other thread is reaped, we'll be
2600	* woken up.
2601	*/
2602	if (task->thread_count > `1`) {
2603	assert_wait((event_t)&task->halting, THREAD_UNINT);
2604	task_unlock(task);
2605	thread_block(THREAD_CONTINUE_NULL);
2606	} else {
2607	task_unlock(task);
2608	}
2609
2610	/*
2611	* Give the machine dependent code a chance
2612	* to perform cleanup of task-level resources
2613	* associated with the current thread before
2614	* ripping apart the task.
2615	*/
2616	machine_task_terminate(task);
2617
2618	/*
2619	* Destroy all synchronizers owned by the task.
2620	*/
2621	task_synchronizer_destroy_all(task);
2622
2623	/*
2624	* Destroy the contents of the IPC space, leaving just
2625	* a reference for it.
2626	*/
2627	ipc_space_clean(task->itk_space);
2628
2629	/*
2630	* Clean out the address space, as we are going to be
2631	* getting a new one.
2632	*/
2633	vm_map_remove(task->map, task->map->min_offset,
2634	task->map->max_offset,
2635	/*
2636	* Final cleanup:
2637	* + no unnesting
2638	* + remove immutable mappings
2639	* + allow gaps in the range
2640	*/
2641	(VM_MAP_REMOVE_NO_UNNESTING \|
2642	VM_MAP_REMOVE_IMMUTABLE \|
2643	VM_MAP_REMOVE_GAPS_OK));
2644
2645	/*
2646	* Kick out any IOKitUser handles to the task. At best they're stale,
2647	* at worst someone is racing a SUID exec.
2648	*/
2649	iokit_task_terminate(task);
2650	}
2651
2652	/*
2653	* task_hold_locked:
2654	*
2655	* Suspend execution of the specified task.
2656	* This is a recursive-style suspension of the task, a count of
2657	* suspends is maintained.
2658	*
2659	* CONDITIONS: the task is locked and active.
2660	*/
2661	void
2662	task_hold_locked(
2663	task_t task)
2664	{
2665	thread_t thread;
2666
2667	assert(task->active);
2668
2669	if (task->suspend_count++ > `0`)
2670	return;
2671
2672	if (task->bsd_info) {
2673	workq_proc_suspended(task->bsd_info);
2674	}
2675
2676	/*
2677	* Iterate through all the threads and hold them.
2678	*/
2679	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2680	thread_mtx_lock(thread);
2681	thread_hold(thread);
2682	thread_mtx_unlock(thread);
2683	}
2684	}
2685
2686	/*
2687	* task_hold:
2688	*
2689	* Same as the internal routine above, except that is must lock
2690	* and verify that the task is active. This differs from task_suspend
2691	* in that it places a kernel hold on the task rather than just a
2692	* user-level hold. This keeps users from over resuming and setting
2693	* it running out from under the kernel.
2694	*
2695	* CONDITIONS: the caller holds a reference on the task
2696	*/
2697	kern_return_t
2698	task_hold(
2699	task_t task)
2700	{
2701	if (task == TASK_NULL)
2702	return (KERN_INVALID_ARGUMENT);
2703
2704	task_lock(task);
2705
2706	if (!task->active) {
2707	task_unlock(task);
2708
2709	return (KERN_FAILURE);
2710	}
2711
2712	task_hold_locked(task);
2713	task_unlock(task);
2714
2715	return (KERN_SUCCESS);
2716	}
2717
2718	kern_return_t
2719	task_wait(
2720	task_t task,
2721	boolean_t until_not_runnable)
2722	{
2723	if (task == TASK_NULL)
2724	return (KERN_INVALID_ARGUMENT);
2725
2726	task_lock(task);
2727
2728	if (!task->active) {
2729	task_unlock(task);
2730
2731	return (KERN_FAILURE);
2732	}
2733
2734	task_wait_locked(task, until_not_runnable);
2735	task_unlock(task);
2736
2737	return (KERN_SUCCESS);
2738	}
2739
2740	/*
2741	* task_wait_locked:
2742	*
2743	* Wait for all threads in task to stop.
2744	*
2745	* Conditions:
2746	* Called with task locked, active, and held.
2747	*/
2748	void
2749	task_wait_locked(
2750	task_t task,
2751	boolean_t until_not_runnable)
2752	{
2753	thread_t thread, self;
2754
2755	assert(task->active);
2756	assert(task->suspend_count > `0`);
2757
2758	self = current_thread();
2759
2760	/*
2761	* Iterate through all the threads and wait for them to
2762	* stop. Do not wait for the current thread if it is within
2763	* the task.
2764	*/
2765	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2766	if (thread != self)
2767	thread_wait(thread, until_not_runnable);
2768	}
2769	}
2770
2771	/*
2772	* task_release_locked:
2773	*
2774	* Release a kernel hold on a task.
2775	*
2776	* CONDITIONS: the task is locked and active
2777	*/
2778	void
2779	task_release_locked(
2780	task_t task)
2781	{
2782	thread_t thread;
2783
2784	assert(task->active);
2785	assert(task->suspend_count > `0`);
2786
2787	if (--task->suspend_count > `0`)
2788	return;
2789
2790	if (task->bsd_info) {
2791	workq_proc_resumed(task->bsd_info);
2792	}
2793
2794	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2795	thread_mtx_lock(thread);
2796	thread_release(thread);
2797	thread_mtx_unlock(thread);
2798	}
2799	}
2800
2801	/*
2802	* task_release:
2803	*
2804	* Same as the internal routine above, except that it must lock
2805	* and verify that the task is active.
2806	*
2807	* CONDITIONS: The caller holds a reference to the task
2808	*/
2809	kern_return_t
2810	task_release(
2811	task_t task)
2812	{
2813	if (task == TASK_NULL)
2814	return (KERN_INVALID_ARGUMENT);
2815
2816	task_lock(task);
2817
2818	if (!task->active) {
2819	task_unlock(task);
2820
2821	return (KERN_FAILURE);
2822	}
2823
2824	task_release_locked(task);
2825	task_unlock(task);
2826
2827	return (KERN_SUCCESS);
2828	}
2829
2830	kern_return_t
2831	task_threads(
2832	task_t task,
2833	thread_act_array_t *threads_out,
2834	mach_msg_type_number_t *count)
2835	{
2836	mach_msg_type_number_t actual;
2837	thread_t *thread_list;
2838	thread_t thread;
2839	vm_size_t size, size_needed;
2840	void *addr;
2841	unsigned int i, j;
2842
2843	if (task == TASK_NULL)
2844	return (KERN_INVALID_ARGUMENT);
2845
2846	size = `0`; addr = NULL;
2847
2848	for (;;) {
2849	task_lock(task);
2850	if (!task->active) {
2851	task_unlock(task);
2852
2853	if (size != `0`)
2854	kfree(addr, size);
2855
2856	return (KERN_FAILURE);
2857	}
2858
2859	actual = task->thread_count;
2860
2861	/ do we have the memory we need? /
2862	size_needed = actual * sizeof (mach_port_t);
2863	if (size_needed <= size)
2864	break;
2865
2866	/ unlock the task and allocate more memory /
2867	task_unlock(task);
2868
2869	if (size != `0`)
2870	kfree(addr, size);
2871
2872	assert(size_needed > `0`);
2873	size = size_needed;
2874
2875	addr = kalloc(size);
2876	if (addr == `0`)
2877	return (KERN_RESOURCE_SHORTAGE);
2878	}
2879
2880	/ OK, have memory and the task is locked & active /
2881	thread_list = (thread_t *)addr;
2882
2883	i = j = `0`;
2884
2885	for (thread = (thread_t)queue_first(&task->threads); i < actual;
2886	++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2887	thread_reference_internal(thread);
2888	thread_list[j++] = thread;
2889	}
2890
2891	assert(queue_end(&task->threads, (queue_entry_t)thread));
2892
2893	actual = j;
2894	size_needed = actual * sizeof (mach_port_t);
2895
2896	/ can unlock task now that we've got the thread refs /
2897	task_unlock(task);
2898
2899	if (actual == `0`) {
2900	/ no threads, so return null pointer and deallocate memory /
2901
2902	*threads_out = NULL;
2903	*count = `0`;
2904
2905	if (size != `0`)
2906	kfree(addr, size);
2907	}
2908	else {
2909	/ if we allocated too much, must copy /
2910
2911	if (size_needed < size) {
2912	void *newaddr;
2913
2914	newaddr = kalloc(size_needed);
2915	if (newaddr == `0`) {
2916	for (i = `0`; i < actual; ++i)
2917	thread_deallocate(thread_list[i]);
2918	kfree(addr, size);
2919	return (KERN_RESOURCE_SHORTAGE);
2920	}
2921
2922	bcopy(addr, newaddr, size_needed);
2923	kfree(addr, size);
2924	thread_list = (thread_t *)newaddr;
2925	}
2926
2927	*threads_out = thread_list;
2928	*count = actual;
2929
2930	/ do the conversion that Mig should handle /
2931
2932	for (i = `0`; i < actual; ++i)
2933	((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2934	}
2935
2936	return (KERN_SUCCESS);
2937	}
2938
2939	#define TASK_HOLD_NORMAL 0
2940	#define TASK_HOLD_PIDSUSPEND 1
2941	#define TASK_HOLD_LEGACY 2
2942	#define TASK_HOLD_LEGACY_ALL 3
2943
2944	static kern_return_t
2945	place_task_hold (
2946	task_t task,
2947	int mode)
2948	{
2949	if (!task->active && !task_is_a_corpse(task)) {
2950	return (KERN_FAILURE);
2951	}
2952
2953	/ Return success for corpse task /
2954	if (task_is_a_corpse(task)) {
2955	return KERN_SUCCESS;
2956	}
2957
2958	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2959	MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) \| DBG_FUNC_NONE,
2960	task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2961	task->user_stop_count, task->user_stop_count + `1`, `0`);
2962
2963	#if MACH_ASSERT
2964	current_task()->suspends_outstanding++;
2965	#endif
2966
2967	if (mode == TASK_HOLD_LEGACY)
2968	task->legacy_stop_count++;
2969
2970	if (task->user_stop_count++ > `0`) {
2971	/*
2972	* If the stop count was positive, the task is
2973	* already stopped and we can exit.
2974	*/
2975	return (KERN_SUCCESS);
2976	}
2977
2978	/*
2979	* Put a kernel-level hold on the threads in the task (all
2980	* user-level task suspensions added together represent a
2981	* single kernel-level hold). We then wait for the threads
2982	* to stop executing user code.
2983	*/
2984	task_hold_locked(task);
2985	task_wait_locked(task, FALSE);
2986
2987	return (KERN_SUCCESS);
2988	}
2989
2990	static kern_return_t
2991	release_task_hold (
2992	task_t task,
2993	int mode)
2994	{
2995	boolean_t release = FALSE;
2996
2997	if (!task->active && !task_is_a_corpse(task)) {
2998	return (KERN_FAILURE);
2999	}
3000
3001	/ Return success for corpse task /
3002	if (task_is_a_corpse(task)) {
3003	return KERN_SUCCESS;
3004	}
3005
3006	if (mode == TASK_HOLD_PIDSUSPEND) {
3007	if (task->pidsuspended == FALSE) {
3008	return (KERN_FAILURE);
3009	}
3010	task->pidsuspended = FALSE;
3011	}
3012
3013	if (task->user_stop_count > (task->pidsuspended ? `1` : `0`)) {
3014
3015	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3016	MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) \| DBG_FUNC_NONE,
3017	task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3018	task->user_stop_count, mode, task->legacy_stop_count);
3019
3020	#if MACH_ASSERT
3021	/*
3022	* This is obviously not robust; if we suspend one task and then resume a different one,
3023	* we'll fly under the radar. This is only meant to catch the common case of a crashed
3024	* or buggy suspender.
3025	*/
3026	current_task()->suspends_outstanding--;
3027	#endif
3028
3029	if (mode == TASK_HOLD_LEGACY_ALL) {
3030	if (task->legacy_stop_count >= task->user_stop_count) {
3031	task->user_stop_count = `0`;
3032	release = TRUE;
3033	} else {
3034	task->user_stop_count -= task->legacy_stop_count;
3035	}
3036	task->legacy_stop_count = `0`;
3037	} else {
3038	if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > `0`)
3039	task->legacy_stop_count--;
3040	if (--task->user_stop_count == `0`)
3041	release = TRUE;
3042	}
3043	}
3044	else {
3045	return (KERN_FAILURE);
3046	}
3047
3048	/*
3049	* Release the task if necessary.
3050	*/
3051	if (release)
3052	task_release_locked(task);
3053
3054	return (KERN_SUCCESS);
3055	}
3056
3057
3058	/*
3059	* task_suspend:
3060	*
3061	* Implement an (old-fashioned) user-level suspension on a task.
3062	*
3063	* Because the user isn't expecting to have to manage a suspension
3064	* token, we'll track it for him in the kernel in the form of a naked
3065	* send right to the task's resume port. All such send rights
3066	* account for a single suspension against the task (unlike task_suspend2()
3067	* where each caller gets a unique suspension count represented by a
3068	* unique send-once right).
3069	*
3070	* Conditions:
3071	* The caller holds a reference to the task
3072	*/
3073	kern_return_t
3074	task_suspend(
3075	task_t task)
3076	{
3077	kern_return_t kr;
3078	mach_port_t port, send, old_notify;
3079	mach_port_name_t name;
3080
3081	if (task == TASK_NULL \|\| task == kernel_task)
3082	return (KERN_INVALID_ARGUMENT);
3083
3084	task_lock(task);
3085
3086	/*
3087	* Claim a send right on the task resume port, and request a no-senders
3088	* notification on that port (if none outstanding).
3089	*/
3090	if (task->itk_resume == IP_NULL) {
3091	task->itk_resume = ipc_port_alloc_kernel();
3092	if (!IP_VALID(task->itk_resume))
3093	panic("failed to create resume port");
3094	ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
3095	}
3096
3097	port = task->itk_resume;
3098	ip_lock(port);
3099	assert(ip_active(port));
3100
3101	send = ipc_port_make_send_locked(port);
3102	assert(IP_VALID(send));
3103
3104	if (port->ip_nsrequest == IP_NULL) {
3105	ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3106	assert(old_notify == IP_NULL);
3107	/ port unlocked /
3108	} else {
3109	ip_unlock(port);
3110	}
3111
3112	/*
3113	* place a legacy hold on the task.
3114	*/
3115	kr = place_task_hold(task, TASK_HOLD_LEGACY);
3116	if (kr != KERN_SUCCESS) {
3117	task_unlock(task);
3118	ipc_port_release_send(send);
3119	return kr;
3120	}
3121
3122	task_unlock(task);
3123
3124	/*
3125	* Copyout the send right into the calling task's IPC space. It won't know it is there,
3126	* but we'll look it up when calling a traditional resume. Any IPC operations that
3127	* deallocate the send right will auto-release the suspension.
3128	*/
3129	if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3130	MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3131	printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3132	proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3133	task_pid(task), kr);
3134	return (kr);
3135	}
3136
3137	return (kr);
3138	}
3139
3140	/*
3141	* task_resume:
3142	* Release a user hold on a task.
3143	*
3144	* Conditions:
3145	* The caller holds a reference to the task
3146	*/
3147	kern_return_t
3148	task_resume(
3149	task_t task)
3150	{
3151	kern_return_t kr;
3152	mach_port_name_t resume_port_name;
3153	ipc_entry_t resume_port_entry;
3154	ipc_space_t space = current_task()->itk_space;
3155
3156	if (task == TASK_NULL \|\| task == kernel_task )
3157	return (KERN_INVALID_ARGUMENT);
3158
3159	/ release a legacy task hold /
3160	task_lock(task);
3161	kr = release_task_hold(task, TASK_HOLD_LEGACY);
3162	task_unlock(task);
3163
3164	is_write_lock(space);
3165	if (is_active(space) && IP_VALID(task->itk_resume) &&
3166	ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3167	/*
3168	* We found a suspension token in the caller's IPC space. Release a send right to indicate that
3169	* we are holding one less legacy hold on the task from this caller. If the release failed,
3170	* go ahead and drop all the rights, as someone either already released our holds or the task
3171	* is gone.
3172	*/
3173	if (kr == KERN_SUCCESS)
3174	ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3175	else
3176	ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, `0`);
3177	/ space unlocked /
3178	} else {
3179	is_write_unlock(space);
3180	if (kr == KERN_SUCCESS)
3181	printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3182	proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3183	task_pid(task));
3184	}
3185
3186	return kr;
3187	}
3188
3189	/*
3190	* Suspend the target task.
3191	* Making/holding a token/reference/port is the callers responsibility.
3192	*/
3193	kern_return_t
3194	task_suspend_internal(task_t task)
3195	{
3196	kern_return_t kr;
3197
3198	if (task == TASK_NULL \|\| task == kernel_task)
3199	return (KERN_INVALID_ARGUMENT);
3200
3201	task_lock(task);
3202	kr = place_task_hold(task, TASK_HOLD_NORMAL);
3203	task_unlock(task);
3204	return (kr);
3205	}
3206
3207	/*
3208	* Suspend the target task, and return a suspension token. The token
3209	* represents a reference on the suspended task.
3210	*/
3211	kern_return_t
3212	task_suspend2(
3213	task_t task,
3214	task_suspension_token_t *suspend_token)
3215	{
3216	kern_return_t kr;
3217
3218	kr = task_suspend_internal(task);
3219	if (kr != KERN_SUCCESS) {
3220	*suspend_token = TASK_NULL;
3221	return (kr);
3222	}
3223
3224	/*
3225	* Take a reference on the target task and return that to the caller
3226	* as a "suspension token," which can be converted into an SO right to
3227	* the now-suspended task's resume port.
3228	*/
3229	task_reference_internal(task);
3230	*suspend_token = task;
3231
3232	return (KERN_SUCCESS);
3233	}
3234
3235	/*
3236	* Resume the task
3237	* (reference/token/port management is caller's responsibility).
3238	*/
3239	kern_return_t
3240	task_resume_internal(
3241	task_suspension_token_t task)
3242	{
3243	kern_return_t kr;
3244
3245	if (task == TASK_NULL \|\| task == kernel_task)
3246	return (KERN_INVALID_ARGUMENT);
3247
3248	task_lock(task);
3249	kr = release_task_hold(task, TASK_HOLD_NORMAL);
3250	task_unlock(task);
3251	return (kr);
3252	}
3253
3254	/*
3255	* Resume the task using a suspension token. Consumes the token's ref.
3256	*/
3257	kern_return_t
3258	task_resume2(
3259	task_suspension_token_t task)
3260	{
3261	kern_return_t kr;
3262
3263	kr = task_resume_internal(task);
3264	task_suspension_token_deallocate(task);
3265
3266	return (kr);
3267	}
3268
3269	boolean_t
3270	task_suspension_notify(mach_msg_header_t *request_header)
3271	{
3272	ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3273	task_t task = convert_port_to_task_suspension_token(port);
3274	mach_msg_type_number_t not_count;
3275
3276	if (task == TASK_NULL \|\| task == kernel_task)
3277	return TRUE; / nothing to do /
3278
3279	switch (request_header->msgh_id) {
3280
3281	case MACH_NOTIFY_SEND_ONCE:
3282	/ release the hold held by this specific send-once right /
3283	task_lock(task);
3284	release_task_hold(task, TASK_HOLD_NORMAL);
3285	task_unlock(task);
3286	break;
3287
3288	case MACH_NOTIFY_NO_SENDERS:
3289	not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3290
3291	task_lock(task);
3292	ip_lock(port);
3293	if (port->ip_mscount == not_count) {
3294
3295	/ release all the [remaining] outstanding legacy holds /
3296	assert(port->ip_nsrequest == IP_NULL);
3297	ip_unlock(port);
3298	release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3299	task_unlock(task);
3300
3301	} else if (port->ip_nsrequest == IP_NULL) {
3302	ipc_port_t old_notify;
3303
3304	task_unlock(task);
3305	/ new send rights, re-arm notification at current make-send count /
3306	ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3307	assert(old_notify == IP_NULL);
3308	/ port unlocked /
3309	} else {
3310	ip_unlock(port);
3311	task_unlock(task);
3312	}
3313	break;
3314
3315	default:
3316	break;
3317	}
3318
3319	task_suspension_token_deallocate(task); / drop token reference /
3320	return TRUE;
3321	}
3322
3323	kern_return_t
3324	task_pidsuspend_locked(task_t task)
3325	{
3326	kern_return_t kr;
3327
3328	if (task->pidsuspended) {
3329	kr = KERN_FAILURE;
3330	goto out;
3331	}
3332
3333	task->pidsuspended = TRUE;
3334
3335	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3336	if (kr != KERN_SUCCESS) {
3337	task->pidsuspended = FALSE;
3338	}
3339	out:
3340	return(kr);
3341	}
3342
3343
3344	/*
3345	* task_pidsuspend:
3346	*
3347	* Suspends a task by placing a hold on its threads.
3348	*
3349	* Conditions:
3350	* The caller holds a reference to the task
3351	*/
3352	kern_return_t
3353	task_pidsuspend(
3354	task_t task)
3355	{
3356	kern_return_t kr;
3357
3358	if (task == TASK_NULL \|\| task == kernel_task)
3359	return (KERN_INVALID_ARGUMENT);
3360
3361	task_lock(task);
3362
3363	kr = task_pidsuspend_locked(task);
3364
3365	task_unlock(task);
3366
3367	return (kr);
3368	}
3369
3370	/*
3371	* task_pidresume:
3372	* Resumes a previously suspended task.
3373	*
3374	* Conditions:
3375	* The caller holds a reference to the task
3376	*/
3377	kern_return_t
3378	task_pidresume(
3379	task_t task)
3380	{
3381	kern_return_t kr;
3382
3383	if (task == TASK_NULL \|\| task == kernel_task)
3384	return (KERN_INVALID_ARGUMENT);
3385
3386	task_lock(task);
3387
3388	#if CONFIG_FREEZE
3389
3390	while (task->changing_freeze_state) {
3391
3392	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3393	task_unlock(task);
3394	thread_block(THREAD_CONTINUE_NULL);
3395
3396	task_lock(task);
3397	}
3398	task->changing_freeze_state = TRUE;
3399	#endif
3400
3401	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3402
3403	task_unlock(task);
3404
3405	#if CONFIG_FREEZE
3406
3407	task_lock(task);
3408
3409	if (kr == KERN_SUCCESS)
3410	task->frozen = FALSE;
3411	task->changing_freeze_state = FALSE;
3412	thread_wakeup(&task->changing_freeze_state);
3413
3414	task_unlock(task);
3415	#endif
3416
3417	return (kr);
3418	}
3419
3420
3421	#if DEVELOPMENT \|\| DEBUG
3422
3423	extern void IOSleep(int);
3424
3425	kern_return_t
3426	task_disconnect_page_mappings(task_t task)
3427	{
3428	int n;
3429
3430	if (task == TASK_NULL \|\| task == kernel_task)
3431	return (KERN_INVALID_ARGUMENT);
3432
3433	/*
3434	* this function is used to strip all of the mappings from
3435	* the pmap for the specified task to force the task to
3436	* re-fault all of the pages it is actively using... this
3437	* allows us to approximate the true working set of the
3438	* specified task. We only engage if at least 1 of the
3439	* threads in the task is runnable, but we want to continuously
3440	* sweep (at least for a while - I've arbitrarily set the limit at
3441	* 100 sweeps to be re-looked at as we gain experience) to get a better
3442	* view into what areas within a page are being visited (as opposed to only
3443	* seeing the first fault of a page after the task becomes
3444	* runnable)... in the future I may
3445	* try to block until awakened by a thread in this task
3446	* being made runnable, but for now we'll periodically poll from the
3447	* user level debug tool driving the sysctl
3448	*/
3449	for (n = `0`; n < `100`; n++) {
3450	thread_t thread;
3451	boolean_t runnable;
3452	boolean_t do_unnest;
3453	int page_count;
3454
3455	runnable = FALSE;
3456	do_unnest = FALSE;
3457
3458	task_lock(task);
3459
3460	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3461
3462	if (thread->state & TH_RUN) {
3463	runnable = TRUE;
3464	break;
3465	}
3466	}
3467	if (n == `0`)
3468	task->task_disconnected_count++;
3469
3470	if (task->task_unnested == FALSE) {
3471	if (runnable == TRUE) {
3472	task->task_unnested = TRUE;
3473	do_unnest = TRUE;
3474	}
3475	}
3476	task_unlock(task);
3477
3478	if (runnable == FALSE)
3479	break;
3480
3481	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) \| DBG_FUNC_START,
3482	task, do_unnest, task->task_disconnected_count, `0`, `0`);
3483
3484	page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3485
3486	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) \| DBG_FUNC_END,
3487	task, page_count, `0`, `0`, `0`);
3488
3489	if ((n % `5`) == `4`)
3490	IOSleep(`1`);
3491	}
3492	return (KERN_SUCCESS);
3493	}
3494
3495	#endif
3496
3497
3498	#if CONFIG_FREEZE
3499
3500	/*
3501	* task_freeze:
3502	*
3503	* Freeze a task.
3504	*
3505	* Conditions:
3506	* The caller holds a reference to the task
3507	*/
3508	extern void vm_wake_compactor_swapper(void);
3509	extern queue_head_t c_swapout_list_head;
3510
3511	kern_return_t
3512	task_freeze(
3513	task_t task,
3514	uint32_t *purgeable_count,
3515	uint32_t *wired_count,
3516	uint32_t *clean_count,
3517	uint32_t *dirty_count,
3518	uint32_t dirty_budget,
3519	uint32_t *shared_count,
3520	int *freezer_error_code,
3521	boolean_t eval_only)
3522	{
3523	kern_return_t kr = KERN_SUCCESS;
3524
3525	if (task == TASK_NULL \|\| task == kernel_task)
3526	return (KERN_INVALID_ARGUMENT);
3527
3528	task_lock(task);
3529
3530	while (task->changing_freeze_state) {
3531
3532	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3533	task_unlock(task);
3534	thread_block(THREAD_CONTINUE_NULL);
3535
3536	task_lock(task);
3537	}
3538	if (task->frozen) {
3539	task_unlock(task);
3540	return (KERN_FAILURE);
3541	}
3542	task->changing_freeze_state = TRUE;
3543
3544	task_unlock(task);
3545
3546	kr = vm_map_freeze(task->map,
3547	purgeable_count,
3548	wired_count,
3549	clean_count,
3550	dirty_count,
3551	dirty_budget,
3552	shared_count,
3553	freezer_error_code,
3554	eval_only);
3555
3556	task_lock(task);
3557
3558	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
3559	task->frozen = TRUE;
3560	}
3561
3562	task->changing_freeze_state = FALSE;
3563	thread_wakeup(&task->changing_freeze_state);
3564
3565	task_unlock(task);
3566
3567	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
3568	(eval_only == FALSE)) {
3569	vm_wake_compactor_swapper();
3570	/*
3571	* We do an explicit wakeup of the swapout thread here
3572	* because the compact_and_swap routines don't have
3573	* knowledge about these kind of "per-task packed c_segs"
3574	* and so will not be evaluating whether we need to do
3575	* a wakeup there.
3576	*/
3577	thread_wakeup((event_t)&c_swapout_list_head);
3578	}
3579
3580	return (kr);
3581	}
3582
3583	/*
3584	* task_thaw:
3585	*
3586	* Thaw a currently frozen task.
3587	*
3588	* Conditions:
3589	* The caller holds a reference to the task
3590	*/
3591	kern_return_t
3592	task_thaw(
3593	task_t task)
3594	{
3595	if (task == TASK_NULL \|\| task == kernel_task)
3596	return (KERN_INVALID_ARGUMENT);
3597
3598	task_lock(task);
3599
3600	while (task->changing_freeze_state) {
3601
3602	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3603	task_unlock(task);
3604	thread_block(THREAD_CONTINUE_NULL);
3605
3606	task_lock(task);
3607	}
3608	if (!task->frozen) {
3609	task_unlock(task);
3610	return (KERN_FAILURE);
3611	}
3612	task->frozen = FALSE;
3613
3614	task_unlock(task);
3615
3616	return (KERN_SUCCESS);
3617	}
3618
3619	#endif /* CONFIG_FREEZE */
3620
3621	kern_return_t
3622	host_security_set_task_token(
3623	host_security_t host_security,
3624	task_t task,
3625	security_token_t sec_token,
3626	audit_token_t audit_token,
3627	host_priv_t host_priv)
3628	{
3629	ipc_port_t host_port;
3630	kern_return_t kr;
3631
3632	if (task == TASK_NULL)
3633	return(KERN_INVALID_ARGUMENT);
3634
3635	if (host_security == HOST_NULL)
3636	return(KERN_INVALID_SECURITY);
3637
3638	task_lock(task);
3639	task->sec_token = sec_token;
3640	task->audit_token = audit_token;
3641
3642	task_unlock(task);
3643
3644	if (host_priv != HOST_PRIV_NULL) {
3645	kr = host_get_host_priv_port(host_priv, &host_port);
3646	} else {
3647	kr = host_get_host_port(host_priv_self(), &host_port);
3648	}
3649	assert(kr == KERN_SUCCESS);
3650	kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3651	return(kr);
3652	}
3653
3654	kern_return_t
3655	task_send_trace_memory(
3656	task_t target_task,
3657	__unused uint32_t pid,
3658	__unused uint64_t uniqueid)
3659	{
3660	kern_return_t kr = KERN_INVALID_ARGUMENT;
3661	if (target_task == TASK_NULL)
3662	return (KERN_INVALID_ARGUMENT);
3663
3664	#if CONFIG_ATM
3665	kr = atm_send_proc_inspect_notification(target_task,
3666	pid,
3667	uniqueid);
3668
3669	#endif
3670	return (kr);
3671	}
3672	/*
3673	* This routine was added, pretty much exclusively, for registering the
3674	* RPC glue vector for in-kernel short circuited tasks. Rather than
3675	* removing it completely, I have only disabled that feature (which was
3676	* the only feature at the time). It just appears that we are going to
3677	* want to add some user data to tasks in the future (i.e. bsd info,
3678	* task names, etc...), so I left it in the formal task interface.
3679	*/
3680	kern_return_t
3681	task_set_info(
3682	task_t task,
3683	task_flavor_t flavor,
3684	__unused task_info_t task_info_in, / pointer to IN array /
3685	__unused mach_msg_type_number_t task_info_count)
3686	{
3687	if (task == TASK_NULL)
3688	return(KERN_INVALID_ARGUMENT);
3689
3690	switch (flavor) {
3691
3692	#if CONFIG_ATM
3693	case TASK_TRACE_MEMORY_INFO:
3694	{
3695	if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3696	return (KERN_INVALID_ARGUMENT);
3697
3698	assert(task_info_in != NULL);
3699	task_trace_memory_info_t mem_info;
3700	mem_info = (task_trace_memory_info_t) task_info_in;
3701	kern_return_t kr = atm_register_trace_memory(task,
3702	mem_info->user_memory_address,
3703	mem_info->buffer_size);
3704	return kr;
3705	}
3706
3707	#endif
3708	default:
3709	return (KERN_INVALID_ARGUMENT);
3710	}
3711	return (KERN_SUCCESS);
3712	}
3713
3714	int radar_20146450 = `1`;
3715	kern_return_t
3716	task_info(
3717	task_t task,
3718	task_flavor_t flavor,
3719	task_info_t task_info_out,
3720	mach_msg_type_number_t *task_info_count)
3721	{
3722	kern_return_t error = KERN_SUCCESS;
3723	mach_msg_type_number_t original_task_info_count;
3724
3725	if (task == TASK_NULL)
3726	return (KERN_INVALID_ARGUMENT);
3727
3728	original_task_info_count = *task_info_count;
3729	task_lock(task);
3730
3731	if ((task != current_task()) && (!task->active)) {
3732	task_unlock(task);
3733	return (KERN_INVALID_ARGUMENT);
3734	}
3735
3736	switch (flavor) {
3737
3738	case TASK_BASIC_INFO_32:
3739	case TASK_BASIC2_INFO_32:
3740	#if defined(__arm__) \|\| defined(__arm64__)
3741	case TASK_BASIC_INFO_64:
3742	#endif
3743	{
3744	task_basic_info_32_t basic_info;
3745	vm_map_t map;
3746	clock_sec_t secs;
3747	clock_usec_t usecs;
3748
3749	if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3750	error = KERN_INVALID_ARGUMENT;
3751	break;
3752	}
3753
3754	basic_info = (task_basic_info_32_t)task_info_out;
3755
3756	map = (task == kernel_task)? kernel_map: task->map;
3757	basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3758	if (flavor == TASK_BASIC2_INFO_32) {
3759	/*
3760	* The "BASIC2" flavor gets the maximum resident
3761	* size instead of the current resident size...
3762	*/
3763	basic_info->resident_size = pmap_resident_max(map->pmap);
3764	} else {
3765	basic_info->resident_size = pmap_resident_count(map->pmap);
3766	}
3767	basic_info->resident_size *= PAGE_SIZE;
3768
3769	basic_info->policy = ((task != kernel_task)?
3770	POLICY_TIMESHARE: POLICY_RR);
3771	basic_info->suspend_count = task->user_stop_count;
3772
3773	absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3774	basic_info->user_time.seconds =
3775	(typeof(basic_info->user_time.seconds))secs;
3776	basic_info->user_time.microseconds = usecs;
3777
3778	absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3779	basic_info->system_time.seconds =
3780	(typeof(basic_info->system_time.seconds))secs;
3781	basic_info->system_time.microseconds = usecs;
3782
3783	*task_info_count = TASK_BASIC_INFO_32_COUNT;
3784	break;
3785	}
3786
3787	#if defined(__arm__) \|\| defined(__arm64__)
3788	case TASK_BASIC_INFO_64_2:
3789	{
3790	task_basic_info_64_2_t basic_info;
3791	vm_map_t map;
3792	clock_sec_t secs;
3793	clock_usec_t usecs;
3794
3795	if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3796	error = KERN_INVALID_ARGUMENT;
3797	break;
3798	}
3799
3800	basic_info = (task_basic_info_64_2_t)task_info_out;
3801
3802	map = (task == kernel_task)? kernel_map: task->map;
3803	basic_info->virtual_size = map->size;
3804	basic_info->resident_size =
3805	(mach_vm_size_t)(pmap_resident_count(map->pmap))
3806	* PAGE_SIZE_64;
3807
3808	basic_info->policy = ((task != kernel_task)?
3809	POLICY_TIMESHARE: POLICY_RR);
3810	basic_info->suspend_count = task->user_stop_count;
3811
3812	absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3813	basic_info->user_time.seconds =
3814	(typeof(basic_info->user_time.seconds))secs;
3815	basic_info->user_time.microseconds = usecs;
3816
3817	absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3818	basic_info->system_time.seconds =
3819	(typeof(basic_info->system_time.seconds))secs;
3820	basic_info->system_time.microseconds = usecs;
3821
3822	*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3823	break;
3824	}
3825
3826	#else /* defined(__arm__) \|\| defined(__arm64__) */
3827	case TASK_BASIC_INFO_64:
3828	{
3829	task_basic_info_64_t basic_info;
3830	vm_map_t map;
3831	clock_sec_t secs;
3832	clock_usec_t usecs;
3833
3834	if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3835	error = KERN_INVALID_ARGUMENT;
3836	break;
3837	}
3838
3839	basic_info = (task_basic_info_64_t)task_info_out;
3840
3841	map = (task == kernel_task)? kernel_map: task->map;
3842	basic_info->virtual_size = map->size;
3843	basic_info->resident_size =
3844	(mach_vm_size_t)(pmap_resident_count(map->pmap))
3845	* PAGE_SIZE_64;
3846
3847	basic_info->policy = ((task != kernel_task)?
3848	POLICY_TIMESHARE: POLICY_RR);
3849	basic_info->suspend_count = task->user_stop_count;
3850
3851	absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3852	basic_info->user_time.seconds =
3853	(typeof(basic_info->user_time.seconds))secs;
3854	basic_info->user_time.microseconds = usecs;
3855
3856	absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3857	basic_info->system_time.seconds =
3858	(typeof(basic_info->system_time.seconds))secs;
3859	basic_info->system_time.microseconds = usecs;
3860
3861	*task_info_count = TASK_BASIC_INFO_64_COUNT;
3862	break;
3863	}
3864	#endif /* defined(__arm__) \|\| defined(__arm64__) */
3865
3866	case MACH_TASK_BASIC_INFO:
3867	{
3868	mach_task_basic_info_t basic_info;
3869	vm_map_t map;
3870	clock_sec_t secs;
3871	clock_usec_t usecs;
3872
3873	if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3874	error = KERN_INVALID_ARGUMENT;
3875	break;
3876	}
3877
3878	basic_info = (mach_task_basic_info_t)task_info_out;
3879
3880	map = (task == kernel_task) ? kernel_map : task->map;
3881
3882	basic_info->virtual_size = map->size;
3883
3884	basic_info->resident_size =
3885	(mach_vm_size_t)(pmap_resident_count(map->pmap));
3886	basic_info->resident_size *= PAGE_SIZE_64;
3887
3888	basic_info->resident_size_max =
3889	(mach_vm_size_t)(pmap_resident_max(map->pmap));
3890	basic_info->resident_size_max *= PAGE_SIZE_64;
3891
3892	basic_info->policy = ((task != kernel_task) ?
3893	POLICY_TIMESHARE : POLICY_RR);
3894
3895	basic_info->suspend_count = task->user_stop_count;
3896
3897	absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3898	basic_info->user_time.seconds =
3899	(typeof(basic_info->user_time.seconds))secs;
3900	basic_info->user_time.microseconds = usecs;
3901
3902	absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3903	basic_info->system_time.seconds =
3904	(typeof(basic_info->system_time.seconds))secs;
3905	basic_info->system_time.microseconds = usecs;
3906
3907	*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3908	break;
3909	}
3910
3911	case TASK_THREAD_TIMES_INFO:
3912	{
3913	task_thread_times_info_t times_info;
3914	thread_t thread;
3915
3916	if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3917	error = KERN_INVALID_ARGUMENT;
3918	break;
3919	}
3920
3921	times_info = (task_thread_times_info_t) task_info_out;
3922	times_info->user_time.seconds = `0`;
3923	times_info->user_time.microseconds = `0`;
3924	times_info->system_time.seconds = `0`;
3925	times_info->system_time.microseconds = `0`;
3926
3927
3928	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3929	time_value_t user_time, system_time;
3930
3931	if (thread->options & TH_OPT_IDLE_THREAD)
3932	continue;
3933
3934	thread_read_times(thread, &user_time, &system_time, NULL);
3935
3936	time_value_add(&times_info->user_time, &user_time);
3937	time_value_add(&times_info->system_time, &system_time);
3938	}
3939
3940	*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3941	break;
3942	}
3943
3944	case TASK_ABSOLUTETIME_INFO:
3945	{
3946	task_absolutetime_info_t info;
3947	thread_t thread;
3948
3949	if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3950	error = KERN_INVALID_ARGUMENT;
3951	break;
3952	}
3953
3954	info = (task_absolutetime_info_t)task_info_out;
3955	info->threads_user = info->threads_system = `0`;
3956
3957
3958	info->total_user = task->total_user_time;
3959	info->total_system = task->total_system_time;
3960
3961	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3962	uint64_t tval;
3963	spl_t x;
3964
3965	if (thread->options & TH_OPT_IDLE_THREAD)
3966	continue;
3967
3968	x = splsched();
3969	thread_lock(thread);
3970
3971	tval = timer_grab(&thread->user_timer);
3972	info->threads_user += tval;
3973	info->total_user += tval;
3974
3975	tval = timer_grab(&thread->system_timer);
3976	if (thread->precise_user_kernel_time) {
3977	info->threads_system += tval;
3978	info->total_system += tval;
3979	} else {
3980	/ system_timer may represent either sys or user /
3981	info->threads_user += tval;
3982	info->total_user += tval;
3983	}
3984
3985	thread_unlock(thread);
3986	splx(x);
3987	}
3988
3989
3990	*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3991	break;
3992	}
3993
3994	case TASK_DYLD_INFO:
3995	{
3996	task_dyld_info_t info;
3997
3998	/*
3999	* We added the format field to TASK_DYLD_INFO output. For
4000	* temporary backward compatibility, accept the fact that
4001	* clients may ask for the old version - distinquished by the
4002	* size of the expected result structure.
4003	*/
4004	#define TASK_LEGACY_DYLD_INFO_COUNT \
4005	offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4006
4007	if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4008	error = KERN_INVALID_ARGUMENT;
4009	break;
4010	}
4011
4012	info = (task_dyld_info_t)task_info_out;
4013	info->all_image_info_addr = task->all_image_info_addr;
4014	info->all_image_info_size = task->all_image_info_size;
4015
4016	/ only set format on output for those expecting it /
4017	if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4018	info->all_image_info_format = task_has_64Bit_addr(task) ?
4019	TASK_DYLD_ALL_IMAGE_INFO_64 :
4020	TASK_DYLD_ALL_IMAGE_INFO_32 ;
4021	*task_info_count = TASK_DYLD_INFO_COUNT;
4022	} else {
4023	*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4024	}
4025	break;
4026	}
4027
4028	case TASK_EXTMOD_INFO:
4029	{
4030	task_extmod_info_t info;
4031	void *p;
4032
4033	if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4034	error = KERN_INVALID_ARGUMENT;
4035	break;
4036	}
4037
4038	info = (task_extmod_info_t)task_info_out;
4039
4040	p = get_bsdtask_info(task);
4041	if (p) {
4042	proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4043	} else {
4044	bzero(info->task_uuid, sizeof(info->task_uuid));
4045	}
4046	info->extmod_statistics = task->extmod_statistics;
4047	*task_info_count = TASK_EXTMOD_INFO_COUNT;
4048
4049	break;
4050	}
4051
4052	case TASK_KERNELMEMORY_INFO:
4053	{
4054	task_kernelmemory_info_t tkm_info;
4055	ledger_amount_t credit, debit;
4056
4057	if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4058	error = KERN_INVALID_ARGUMENT;
4059	break;
4060	}
4061
4062	tkm_info = (task_kernelmemory_info_t) task_info_out;
4063	tkm_info->total_palloc = `0`;
4064	tkm_info->total_pfree = `0`;
4065	tkm_info->total_salloc = `0`;
4066	tkm_info->total_sfree = `0`;
4067
4068	if (task == kernel_task) {
4069	/*
4070	* All shared allocs/frees from other tasks count against
4071	* the kernel private memory usage. If we are looking up
4072	* info for the kernel task, gather from everywhere.
4073	*/
4074	task_unlock(task);
4075
4076	/ start by accounting for all the terminated tasks against the kernel /
4077	tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4078	tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4079
4080	/ count all other task/thread shared alloc/free against the kernel /
4081	lck_mtx_lock(&tasks_threads_lock);
4082
4083	/ XXX this really shouldn't be using the function parameter 'task' as a local var! /
4084	queue_iterate(&tasks, task, task_t, tasks) {
4085	if (task == kernel_task) {
4086	if (ledger_get_entries(task->ledger,
4087	task_ledgers.tkm_private, &credit,
4088	&debit) == KERN_SUCCESS) {
4089	tkm_info->total_palloc += credit;
4090	tkm_info->total_pfree += debit;
4091	}
4092	}
4093	if (!ledger_get_entries(task->ledger,
4094	task_ledgers.tkm_shared, &credit, &debit)) {
4095	tkm_info->total_palloc += credit;
4096	tkm_info->total_pfree += debit;
4097	}
4098	}
4099	lck_mtx_unlock(&tasks_threads_lock);
4100	} else {
4101	if (!ledger_get_entries(task->ledger,
4102	task_ledgers.tkm_private, &credit, &debit)) {
4103	tkm_info->total_palloc = credit;
4104	tkm_info->total_pfree = debit;
4105	}
4106	if (!ledger_get_entries(task->ledger,
4107	task_ledgers.tkm_shared, &credit, &debit)) {
4108	tkm_info->total_salloc = credit;
4109	tkm_info->total_sfree = debit;
4110	}
4111	task_unlock(task);
4112	}
4113
4114	*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4115	return KERN_SUCCESS;
4116	}
4117
4118	/ OBSOLETE /
4119	case TASK_SCHED_FIFO_INFO:
4120	{
4121
4122	if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4123	error = KERN_INVALID_ARGUMENT;
4124	break;
4125	}
4126
4127	error = KERN_INVALID_POLICY;
4128	break;
4129	}
4130
4131	/ OBSOLETE /
4132	case TASK_SCHED_RR_INFO:
4133	{
4134	policy_rr_base_t rr_base;
4135	uint32_t quantum_time;
4136	uint64_t quantum_ns;
4137
4138	if (*task_info_count < POLICY_RR_BASE_COUNT) {
4139	error = KERN_INVALID_ARGUMENT;
4140	break;
4141	}
4142
4143	rr_base = (policy_rr_base_t) task_info_out;
4144
4145	if (task != kernel_task) {
4146	error = KERN_INVALID_POLICY;
4147	break;
4148	}
4149
4150	rr_base->base_priority = task->priority;
4151
4152	quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4153	absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4154
4155	rr_base->quantum = (uint32_t)(quantum_ns / `1000` / `1000`);
4156
4157	*task_info_count = POLICY_RR_BASE_COUNT;
4158	break;
4159	}
4160
4161	/ OBSOLETE /
4162	case TASK_SCHED_TIMESHARE_INFO:
4163	{
4164	policy_timeshare_base_t ts_base;
4165
4166	if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4167	error = KERN_INVALID_ARGUMENT;
4168	break;
4169	}
4170
4171	ts_base = (policy_timeshare_base_t) task_info_out;
4172
4173	if (task == kernel_task) {
4174	error = KERN_INVALID_POLICY;
4175	break;
4176	}
4177
4178	ts_base->base_priority = task->priority;
4179
4180	*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4181	break;
4182	}
4183
4184	case TASK_SECURITY_TOKEN:
4185	{
4186	security_token_t *sec_token_p;
4187
4188	if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4189	error = KERN_INVALID_ARGUMENT;
4190	break;
4191	}
4192
4193	sec_token_p = (security_token_t *) task_info_out;
4194
4195	*sec_token_p = task->sec_token;
4196
4197	*task_info_count = TASK_SECURITY_TOKEN_COUNT;
4198	break;
4199	}
4200
4201	case TASK_AUDIT_TOKEN:
4202	{
4203	audit_token_t *audit_token_p;
4204
4205	if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4206	error = KERN_INVALID_ARGUMENT;
4207	break;
4208	}
4209
4210	audit_token_p = (audit_token_t *) task_info_out;
4211
4212	*audit_token_p = task->audit_token;
4213
4214	*task_info_count = TASK_AUDIT_TOKEN_COUNT;
4215	break;
4216	}
4217
4218	case TASK_SCHED_INFO:
4219	error = KERN_INVALID_ARGUMENT;
4220	break;
4221
4222	case TASK_EVENTS_INFO:
4223	{
4224	task_events_info_t events_info;
4225	thread_t thread;
4226
4227	if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4228	error = KERN_INVALID_ARGUMENT;
4229	break;
4230	}
4231
4232	events_info = (task_events_info_t) task_info_out;
4233
4234
4235	events_info->faults = task->faults;
4236	events_info->pageins = task->pageins;
4237	events_info->cow_faults = task->cow_faults;
4238	events_info->messages_sent = task->messages_sent;
4239	events_info->messages_received = task->messages_received;
4240	events_info->syscalls_mach = task->syscalls_mach;
4241	events_info->syscalls_unix = task->syscalls_unix;
4242
4243	events_info->csw = task->c_switch;
4244
4245	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4246	events_info->csw += thread->c_switch;
4247	events_info->syscalls_mach += thread->syscalls_mach;
4248	events_info->syscalls_unix += thread->syscalls_unix;
4249	}
4250
4251
4252	*task_info_count = TASK_EVENTS_INFO_COUNT;
4253	break;
4254	}
4255	case TASK_AFFINITY_TAG_INFO:
4256	{
4257	if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4258	error = KERN_INVALID_ARGUMENT;
4259	break;
4260	}
4261
4262	error = task_affinity_info(task, task_info_out, task_info_count);
4263	break;
4264	}
4265	case TASK_POWER_INFO:
4266	{
4267	if (*task_info_count < TASK_POWER_INFO_COUNT) {
4268	error = KERN_INVALID_ARGUMENT;
4269	break;
4270	}
4271
4272	task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4273	break;
4274	}
4275
4276	case TASK_POWER_INFO_V2:
4277	{
4278	if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4279	error = KERN_INVALID_ARGUMENT;
4280	break;
4281	}
4282	task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4283	task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4284	break;
4285	}
4286
4287	case TASK_VM_INFO:
4288	case TASK_VM_INFO_PURGEABLE:
4289	{
4290	task_vm_info_t vm_info;
4291	vm_map_t map;
4292
4293	if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4294	error = KERN_INVALID_ARGUMENT;
4295	break;
4296	}
4297
4298	vm_info = (task_vm_info_t)task_info_out;
4299
4300	if (task == kernel_task) {
4301	map = kernel_map;
4302	/ no lock /
4303	} else {
4304	map = task->map;
4305	vm_map_lock_read(map);
4306	}
4307
4308	vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4309	vm_info->region_count = map->hdr.nentries;
4310	vm_info->page_size = vm_map_page_size(map);
4311
4312	vm_info->resident_size = pmap_resident_count(map->pmap);
4313	vm_info->resident_size *= PAGE_SIZE;
4314	vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4315	vm_info->resident_size_peak *= PAGE_SIZE;
4316
4317	#define _VM_INFO(_name) \
4318	vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4319
4320	_VM_INFO(device);
4321	_VM_INFO(device_peak);
4322	_VM_INFO(external);
4323	_VM_INFO(external_peak);
4324	_VM_INFO(internal);
4325	_VM_INFO(internal_peak);
4326	_VM_INFO(reusable);
4327	_VM_INFO(reusable_peak);
4328	_VM_INFO(compressed);
4329	_VM_INFO(compressed_peak);
4330	_VM_INFO(compressed_lifetime);
4331
4332	vm_info->purgeable_volatile_pmap = `0`;
4333	vm_info->purgeable_volatile_resident = `0`;
4334	vm_info->purgeable_volatile_virtual = `0`;
4335	if (task == kernel_task) {
4336	/*
4337	* We do not maintain the detailed stats for the
4338	* kernel_pmap, so just count everything as
4339	* "internal"...
4340	*/
4341	vm_info->internal = vm_info->resident_size;
4342	/*
4343	* ... but since the memory held by the VM compressor
4344	* in the kernel address space ought to be attributed
4345	* to user-space tasks, we subtract it from "internal"
4346	* to give memory reporting tools a more accurate idea
4347	* of what the kernel itself is actually using, instead
4348	* of making it look like the kernel is leaking memory
4349	* when the system is under memory pressure.
4350	*/
4351	vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4352	PAGE_SIZE);
4353	} else {
4354	mach_vm_size_t volatile_virtual_size;
4355	mach_vm_size_t volatile_resident_size;
4356	mach_vm_size_t volatile_compressed_size;
4357	mach_vm_size_t volatile_pmap_size;
4358	mach_vm_size_t volatile_compressed_pmap_size;
4359	kern_return_t kr;
4360
4361	if (flavor == TASK_VM_INFO_PURGEABLE) {
4362	kr = vm_map_query_volatile(
4363	map,
4364	&volatile_virtual_size,
4365	&volatile_resident_size,
4366	&volatile_compressed_size,
4367	&volatile_pmap_size,
4368	&volatile_compressed_pmap_size);
4369	if (kr == KERN_SUCCESS) {
4370	vm_info->purgeable_volatile_pmap =
4371	volatile_pmap_size;
4372	if (radar_20146450) {
4373	vm_info->compressed -=
4374	volatile_compressed_pmap_size;
4375	}
4376	vm_info->purgeable_volatile_resident =
4377	volatile_resident_size;
4378	vm_info->purgeable_volatile_virtual =
4379	volatile_virtual_size;
4380	}
4381	}
4382	}
4383	*task_info_count = TASK_VM_INFO_REV0_COUNT;
4384
4385	if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4386	vm_info->phys_footprint =
4387	(mach_vm_size_t) get_task_phys_footprint(task);
4388	*task_info_count = TASK_VM_INFO_REV1_COUNT;
4389	}
4390	if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4391	vm_info->min_address = map->min_offset;
4392	vm_info->max_address = map->max_offset;
4393	*task_info_count = TASK_VM_INFO_REV2_COUNT;
4394	}
4395
4396	if (task != kernel_task) {
4397	vm_map_unlock_read(map);
4398	}
4399
4400	break;
4401	}
4402
4403	case TASK_WAIT_STATE_INFO:
4404	{
4405	/*
4406	* Deprecated flavor. Currently allowing some results until all users
4407	* stop calling it. The results may not be accurate.
4408	*/
4409	task_wait_state_info_t wait_state_info;
4410	uint64_t total_sfi_ledger_val = `0`;
4411
4412	if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4413	error = KERN_INVALID_ARGUMENT;
4414	break;
4415	}
4416
4417	wait_state_info = (task_wait_state_info_t) task_info_out;
4418
4419	wait_state_info->total_wait_state_time = `0`;
4420	bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4421
4422	#if CONFIG_SCHED_SFI
4423	int i, prev_lentry = -`1`;
4424	int64_t val_credit, val_debit;
4425
4426	for (i = `0`; i < MAX_SFI_CLASS_ID; i++){
4427	val_credit =`0`;
4428	/*
4429	* checking with prev_lentry != entry ensures adjacent classes
4430	* which share the same ledger do not add wait times twice.
4431	* Note: Use ledger() call to get data for each individual sfi class.
4432	*/
4433	if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4434	KERN_SUCCESS == ledger_get_entries(task->ledger,
4435	task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4436	total_sfi_ledger_val += val_credit;
4437	}
4438	prev_lentry = task_ledgers.sfi_wait_times[i];
4439	}
4440
4441	#endif /* CONFIG_SCHED_SFI */
4442	wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4443	*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4444
4445	break;
4446	}
4447	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4448	{
4449	#if DEVELOPMENT \|\| DEBUG
4450	pvm_account_info_t acnt_info;
4451
4452	if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4453	error = KERN_INVALID_ARGUMENT;
4454	break;
4455	}
4456
4457	if (task_info_out == NULL) {
4458	error = KERN_INVALID_ARGUMENT;
4459	break;
4460	}
4461
4462	acnt_info = (pvm_account_info_t) task_info_out;
4463
4464	error = vm_purgeable_account(task, acnt_info);
4465
4466	*task_info_count = PVM_ACCOUNT_INFO_COUNT;
4467
4468	break;
4469	#else /* DEVELOPMENT \|\| DEBUG */
4470	error = KERN_NOT_SUPPORTED;
4471	break;
4472	#endif /* DEVELOPMENT \|\| DEBUG */
4473	}
4474	case TASK_FLAGS_INFO:
4475	{
4476	task_flags_info_t flags_info;
4477
4478	if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4479	error = KERN_INVALID_ARGUMENT;
4480	break;
4481	}
4482
4483	flags_info = (task_flags_info_t)task_info_out;
4484
4485	/ only publish the 64-bit flag of the task /
4486	flags_info->flags = task->t_flags & (TF_64B_ADDR \| TF_64B_DATA);
4487
4488	*task_info_count = TASK_FLAGS_INFO_COUNT;
4489	break;
4490	}
4491
4492	case TASK_DEBUG_INFO_INTERNAL:
4493	{
4494	#if DEVELOPMENT \|\| DEBUG
4495	task_debug_info_internal_t dbg_info;
4496	if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4497	error = KERN_NOT_SUPPORTED;
4498	break;
4499	}
4500
4501	if (task_info_out == NULL) {
4502	error = KERN_INVALID_ARGUMENT;
4503	break;
4504	}
4505	dbg_info = (task_debug_info_internal_t) task_info_out;
4506	dbg_info->ipc_space_size = `0`;
4507	if (task->itk_space){
4508	dbg_info->ipc_space_size = task->itk_space->is_table_size;
4509	}
4510
4511	dbg_info->suspend_count = task->suspend_count;
4512
4513	error = KERN_SUCCESS;
4514	*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4515	break;
4516	#else /* DEVELOPMENT \|\| DEBUG */
4517	error = KERN_NOT_SUPPORTED;
4518	break;
4519	#endif /* DEVELOPMENT \|\| DEBUG */
4520	}
4521	default:
4522	error = KERN_INVALID_ARGUMENT;
4523	}
4524
4525	task_unlock(task);
4526	return (error);
4527	}
4528
4529	/*
4530	* task_info_from_user
4531	*
4532	* When calling task_info from user space,
4533	* this function will be executed as mig server side
4534	* instead of calling directly into task_info.
4535	* This gives the possibility to perform more security
4536	* checks on task_port.
4537	*
4538	* In the case of TASK_DYLD_INFO, we require the more
4539	* privileged task_port not the less-privileged task_name_port.
4540	*
4541	*/
4542	kern_return_t
4543	task_info_from_user(
4544	mach_port_t task_port,
4545	task_flavor_t flavor,
4546	task_info_t task_info_out,
4547	mach_msg_type_number_t *task_info_count)
4548	{
4549	task_t task;
4550	kern_return_t ret;
4551
4552	if (flavor == TASK_DYLD_INFO)
4553	task = convert_port_to_task(task_port);
4554	else
4555	task = convert_port_to_task_name(task_port);
4556
4557	ret = task_info(task, flavor, task_info_out, task_info_count);
4558
4559	task_deallocate(task);
4560
4561	return ret;
4562	}
4563
4564	/*
4565	* task_power_info
4566	*
4567	* Returns power stats for the task.
4568	* Note: Called with task locked.
4569	*/
4570	void
4571	task_power_info_locked(
4572	task_t task,
4573	task_power_info_t info,
4574	gpu_energy_data_t ginfo,
4575	task_power_info_v2_t infov2)
4576	{
4577	thread_t thread;
4578	ledger_amount_t tmp;
4579
4580	task_lock_assert_owned(task);
4581
4582	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4583	(ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4584	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4585	(ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4586
4587	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4588	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4589
4590	info->total_user = task->total_user_time;
4591	info->total_system = task->total_system_time;
4592
4593	#if CONFIG_EMBEDDED
4594	if (infov2) {
4595	infov2->task_energy = task->task_energy;
4596	}
4597	#endif
4598
4599	if (ginfo) {
4600	ginfo->task_gpu_utilisation = task->task_gpu_ns;
4601	}
4602
4603	if (infov2) {
4604	infov2->task_ptime = task->total_ptime;
4605	infov2->task_pset_switches = task->ps_switch;
4606	}
4607
4608	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4609	uint64_t tval;
4610	spl_t x;
4611
4612	if (thread->options & TH_OPT_IDLE_THREAD)
4613	continue;
4614
4615	x = splsched();
4616	thread_lock(thread);
4617
4618	info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4619	info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4620
4621	#if CONFIG_EMBEDDED
4622	if (infov2) {
4623	infov2->task_energy += ml_energy_stat(thread);
4624	}
4625	#endif
4626
4627	tval = timer_grab(&thread->user_timer);
4628	info->total_user += tval;
4629
4630	if (infov2) {
4631	tval = timer_grab(&thread->ptime);
4632	infov2->task_ptime += tval;
4633	infov2->task_pset_switches += thread->ps_switch;
4634	}
4635
4636	tval = timer_grab(&thread->system_timer);
4637	if (thread->precise_user_kernel_time) {
4638	info->total_system += tval;
4639	} else {
4640	/ system_timer may represent either sys or user /
4641	info->total_user += tval;
4642	}
4643
4644	if (ginfo) {
4645	ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4646	}
4647	thread_unlock(thread);
4648	splx(x);
4649	}
4650	}
4651
4652	/*
4653	* task_gpu_utilisation
4654	*
4655	* Returns the total gpu time used by the all the threads of the task
4656	* (both dead and alive)
4657	*/
4658	uint64_t
4659	task_gpu_utilisation(
4660	task_t task)
4661	{
4662	uint64_t gpu_time = `0`;
4663	#if !CONFIG_EMBEDDED
4664	thread_t thread;
4665
4666	task_lock(task);
4667	gpu_time += task->task_gpu_ns;
4668
4669	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4670	spl_t x;
4671	x = splsched();
4672	thread_lock(thread);
4673	gpu_time += ml_gpu_stat(thread);
4674	thread_unlock(thread);
4675	splx(x);
4676	}
4677
4678	task_unlock(task);
4679	#else /* CONFIG_EMBEDDED */
4680	/ silence compiler warning /
4681	(void)task;
4682	#endif /* !CONFIG_EMBEDDED */
4683	return gpu_time;
4684	}
4685
4686	/*
4687	* task_energy
4688	*
4689	* Returns the total energy used by the all the threads of the task
4690	* (both dead and alive)
4691	*/
4692	uint64_t
4693	task_energy(
4694	task_t task)
4695	{
4696	uint64_t energy = `0`;
4697	thread_t thread;
4698
4699	task_lock(task);
4700	energy += task->task_energy;
4701
4702	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4703	spl_t x;
4704	x = splsched();
4705	thread_lock(thread);
4706	energy += ml_energy_stat(thread);
4707	thread_unlock(thread);
4708	splx(x);
4709	}
4710
4711	task_unlock(task);
4712	return energy;
4713	}
4714
4715
4716	uint64_t
4717	task_cpu_ptime(
4718	__unused task_t task)
4719	{
4720	return `0`;
4721	}
4722
4723
4724	/ This function updates the cpu time in the arrays for each*
4725	* effective and requested QoS class
4726	*/
4727	void
4728	task_update_cpu_time_qos_stats(
4729	task_t task,
4730	uint64_t *eqos_stats,
4731	uint64_t *rqos_stats)
4732	{
4733	if (!eqos_stats && !rqos_stats) {
4734	return;
4735	}
4736
4737	task_lock(task);
4738	thread_t thread;
4739	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4740	if (thread->options & TH_OPT_IDLE_THREAD) {
4741	continue;
4742	}
4743
4744	thread_update_qos_cpu_time(thread);
4745	}
4746
4747	if (eqos_stats) {
4748	eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4749	eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4750	eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4751	eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4752	eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4753	eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4754	eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4755	}
4756
4757	if (rqos_stats) {
4758	rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4759	rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4760	rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4761	rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4762	rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4763	rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4764	rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4765	}
4766
4767	task_unlock(task);
4768	}
4769
4770	kern_return_t
4771	task_purgable_info(
4772	task_t task,
4773	task_purgable_info_t *stats)
4774	{
4775	if (task == TASK_NULL \|\| stats == NULL)
4776	return KERN_INVALID_ARGUMENT;
4777	/ Take task reference /
4778	task_reference(task);
4779	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4780	/ Drop task reference /
4781	task_deallocate(task);
4782	return KERN_SUCCESS;
4783	}
4784
4785	void
4786	task_vtimer_set(
4787	task_t task,
4788	integer_t which)
4789	{
4790	thread_t thread;
4791	spl_t x;
4792
4793	task_lock(task);
4794
4795	task->vtimers \|= which;
4796
4797	switch (which) {
4798
4799	case TASK_VTIMER_USER:
4800	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4801	x = splsched();
4802	thread_lock(thread);
4803	if (thread->precise_user_kernel_time)
4804	thread->vtimer_user_save = timer_grab(&thread->user_timer);
4805	else
4806	thread->vtimer_user_save = timer_grab(&thread->system_timer);
4807	thread_unlock(thread);
4808	splx(x);
4809	}
4810	break;
4811
4812	case TASK_VTIMER_PROF:
4813	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4814	x = splsched();
4815	thread_lock(thread);
4816	thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4817	thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4818	thread_unlock(thread);
4819	splx(x);
4820	}
4821	break;
4822
4823	case TASK_VTIMER_RLIM:
4824	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4825	x = splsched();
4826	thread_lock(thread);
4827	thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4828	thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4829	thread_unlock(thread);
4830	splx(x);
4831	}
4832	break;
4833	}
4834
4835	task_unlock(task);
4836	}
4837
4838	void
4839	task_vtimer_clear(
4840	task_t task,
4841	integer_t which)
4842	{
4843	assert(task == current_task());
4844
4845	task_lock(task);
4846
4847	task->vtimers &= ~which;
4848
4849	task_unlock(task);
4850	}
4851
4852	void
4853	task_vtimer_update(
4854	__unused
4855	task_t task,
4856	integer_t which,
4857	uint32_t *microsecs)
4858	{
4859	thread_t thread = current_thread();
4860	uint32_t tdelt = `0`;
4861	clock_sec_t secs = `0`;
4862	uint64_t tsum;
4863
4864	assert(task == current_task());
4865
4866	spl_t s = splsched();
4867	thread_lock(thread);
4868
4869	if ((task->vtimers & which) != (uint32_t)which) {
4870	thread_unlock(thread);
4871	splx(s);
4872	return;
4873	}
4874
4875	switch (which) {
4876
4877	case TASK_VTIMER_USER:
4878	if (thread->precise_user_kernel_time) {
4879	tdelt = (uint32_t)timer_delta(&thread->user_timer,
4880	&thread->vtimer_user_save);
4881	} else {
4882	tdelt = (uint32_t)timer_delta(&thread->system_timer,
4883	&thread->vtimer_user_save);
4884	}
4885	absolutetime_to_microtime(tdelt, &secs, microsecs);
4886	break;
4887
4888	case TASK_VTIMER_PROF:
4889	tsum = timer_grab(&thread->user_timer);
4890	tsum += timer_grab(&thread->system_timer);
4891	tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4892	absolutetime_to_microtime(tdelt, &secs, microsecs);
4893	/ if the time delta is smaller than a usec, ignore /
4894	if (*microsecs != `0`)
4895	thread->vtimer_prof_save = tsum;
4896	break;
4897
4898	case TASK_VTIMER_RLIM:
4899	tsum = timer_grab(&thread->user_timer);
4900	tsum += timer_grab(&thread->system_timer);
4901	tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4902	thread->vtimer_rlim_save = tsum;
4903	absolutetime_to_microtime(tdelt, &secs, microsecs);
4904	break;
4905	}
4906
4907	thread_unlock(thread);
4908	splx(s);
4909	}
4910
4911	/*
4912	* task_assign:
4913	*
4914	* Change the assigned processor set for the task
4915	*/
4916	kern_return_t
4917	task_assign(
4918	__unused task_t task,
4919	__unused processor_set_t new_pset,
4920	__unused boolean_t assign_threads)
4921	{
4922	return(KERN_FAILURE);
4923	}
4924
4925	/*
4926	* task_assign_default:
4927	*
4928	* Version of task_assign to assign to default processor set.
4929	*/
4930	kern_return_t
4931	task_assign_default(
4932	task_t task,
4933	boolean_t assign_threads)
4934	{
4935	return (task_assign(task, &pset0, assign_threads));
4936	}
4937
4938	/*
4939	* task_get_assignment
4940	*
4941	* Return name of processor set that task is assigned to.
4942	*/
4943	kern_return_t
4944	task_get_assignment(
4945	task_t task,
4946	processor_set_t *pset)
4947	{
4948	if (!task \|\| !task->active)
4949	return KERN_FAILURE;
4950
4951	*pset = &pset0;
4952
4953	return KERN_SUCCESS;
4954	}
4955
4956	uint64_t
4957	get_task_dispatchqueue_offset(
4958	task_t task)
4959	{
4960	return task->dispatchqueue_offset;
4961	}
4962
4963	/*
4964	* task_policy
4965	*
4966	* Set scheduling policy and parameters, both base and limit, for
4967	* the given task. Policy must be a policy which is enabled for the
4968	* processor set. Change contained threads if requested.
4969	*/
4970	kern_return_t
4971	task_policy(
4972	__unused task_t task,
4973	__unused policy_t policy_id,
4974	__unused policy_base_t base,
4975	__unused mach_msg_type_number_t count,
4976	__unused boolean_t set_limit,
4977	__unused boolean_t change)
4978	{
4979	return(KERN_FAILURE);
4980	}
4981
4982	/*
4983	* task_set_policy
4984	*
4985	* Set scheduling policy and parameters, both base and limit, for
4986	* the given task. Policy can be any policy implemented by the
4987	* processor set, whether enabled or not. Change contained threads
4988	* if requested.
4989	*/
4990	kern_return_t
4991	task_set_policy(
4992	__unused task_t task,
4993	__unused processor_set_t pset,
4994	__unused policy_t policy_id,
4995	__unused policy_base_t base,
4996	__unused mach_msg_type_number_t base_count,
4997	__unused policy_limit_t limit,
4998	__unused mach_msg_type_number_t limit_count,
4999	__unused boolean_t change)
5000	{
5001	return(KERN_FAILURE);
5002	}
5003
5004	kern_return_t
5005	task_set_ras_pc(
5006	__unused task_t task,
5007	__unused vm_offset_t pc,
5008	__unused vm_offset_t endpc)
5009	{
5010	return KERN_FAILURE;
5011	}
5012
5013	void
5014	task_synchronizer_destroy_all(task_t task)
5015	{
5016	/*
5017	* Destroy owned semaphores
5018	*/
5019	semaphore_destroy_all(task);
5020	}
5021
5022	/*
5023	* Install default (machine-dependent) initial thread state
5024	* on the task. Subsequent thread creation will have this initial
5025	* state set on the thread by machine_thread_inherit_taskwide().
5026	* Flavors and structures are exactly the same as those to thread_set_state()
5027	*/
5028	kern_return_t
5029	task_set_state(
5030	task_t task,
5031	int flavor,
5032	thread_state_t state,
5033	mach_msg_type_number_t state_count)
5034	{
5035	kern_return_t ret;
5036
5037	if (task == TASK_NULL) {
5038	return (KERN_INVALID_ARGUMENT);
5039	}
5040
5041	task_lock(task);
5042
5043	if (!task->active) {
5044	task_unlock(task);
5045	return (KERN_FAILURE);
5046	}
5047
5048	ret = machine_task_set_state(task, flavor, state, state_count);
5049
5050	task_unlock(task);
5051	return ret;
5052	}
5053
5054	/*
5055	* Examine the default (machine-dependent) initial thread state
5056	* on the task, as set by task_set_state(). Flavors and structures
5057	* are exactly the same as those passed to thread_get_state().
5058	*/
5059	kern_return_t
5060	task_get_state(
5061	task_t task,
5062	int flavor,
5063	thread_state_t state,
5064	mach_msg_type_number_t *state_count)
5065	{
5066	kern_return_t ret;
5067
5068	if (task == TASK_NULL) {
5069	return (KERN_INVALID_ARGUMENT);
5070	}
5071
5072	task_lock(task);
5073
5074	if (!task->active) {
5075	task_unlock(task);
5076	return (KERN_FAILURE);
5077	}
5078
5079	ret = machine_task_get_state(task, flavor, state, state_count);
5080
5081	task_unlock(task);
5082	return ret;
5083	}
5084
5085
5086	static kern_return_t __attribute__((noinline,not_tail_called))
5087	PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5088	mach_exception_code_t code,
5089	mach_exception_subcode_t subcode,
5090	void *reason)
5091	{
5092	#ifdef MACH_BSD
5093	if (`1` == proc_selfpid())
5094	return KERN_NOT_SUPPORTED; // initproc is immune
5095	#endif
5096	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5097	[`0`] = code,
5098	[`1`] = subcode,
5099	};
5100	task_t task = current_task();
5101	kern_return_t kr;
5102
5103	/ (See jetsam-related comments below) /
5104
5105	proc_memstat_terminated(task->bsd_info, TRUE);
5106	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, `2`, reason);
5107	proc_memstat_terminated(task->bsd_info, FALSE);
5108	return kr;
5109	}
5110
5111	kern_return_t
5112	task_violated_guard(
5113	mach_exception_code_t code,
5114	mach_exception_subcode_t subcode,
5115	void *reason)
5116	{
5117	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5118	}
5119
5120
5121	#if CONFIG_MEMORYSTATUS
5122
5123	boolean_t
5124	task_get_memlimit_is_active(task_t task)
5125	{
5126	assert (task != NULL);
5127
5128	if (task->memlimit_is_active == `1`) {
5129	return(TRUE);
5130	} else {
5131	return (FALSE);
5132	}
5133	}
5134
5135	void
5136	task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5137	{
5138	assert (task != NULL);
5139
5140	if (memlimit_is_active) {
5141	task->memlimit_is_active = `1`;
5142	} else {
5143	task->memlimit_is_active = `0`;
5144	}
5145	}
5146
5147	boolean_t
5148	task_get_memlimit_is_fatal(task_t task)
5149	{
5150	assert(task != NULL);
5151
5152	if (task->memlimit_is_fatal == `1`) {
5153	return(TRUE);
5154	} else {
5155	return(FALSE);
5156	}
5157	}
5158
5159	void
5160	task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5161	{
5162	assert (task != NULL);
5163
5164	if (memlimit_is_fatal) {
5165	task->memlimit_is_fatal = `1`;
5166	} else {
5167	task->memlimit_is_fatal = `0`;
5168	}
5169	}
5170
5171	boolean_t
5172	task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5173	{
5174	boolean_t triggered = FALSE;
5175
5176	assert(task == current_task());
5177
5178	/*
5179	* Returns true, if task has already triggered an exc_resource exception.
5180	*/
5181
5182	if (memlimit_is_active) {
5183	triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5184	} else {
5185	triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5186	}
5187
5188	return(triggered);
5189	}
5190
5191	void
5192	task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5193	{
5194	assert(task == current_task());
5195
5196	/*
5197	* We allow one exc_resource per process per active/inactive limit.
5198	* The limit's fatal attribute does not come into play.
5199	*/
5200
5201	if (memlimit_is_active) {
5202	task->memlimit_active_exc_resource = `1`;
5203	} else {
5204	task->memlimit_inactive_exc_resource = `1`;
5205	}
5206	}
5207
5208	#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required after core file creation
5209
5210	void __attribute__((noinline))
5211	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
5212	{
5213	task_t task = current_task();
5214	int pid = `0`;
5215	const char *procname = "unknown";
5216	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5217	boolean_t send_sync_exc_resource = FALSE;
5218
5219	#ifdef MACH_BSD
5220	pid = proc_selfpid();
5221
5222	if (pid == `1`) {
5223	/*
5224	* Cannot have ReportCrash analyzing
5225	* a suspended initproc.
5226	*/
5227	return;
5228	}
5229
5230	if (task->bsd_info != NULL) {
5231	procname = proc_name_address(current_task()->bsd_info);
5232	send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
5233	}
5234	#endif
5235	#if CONFIG_COREDUMP
5236	if (hwm_user_cores) {
5237	int error;
5238	uint64_t starttime, end;
5239	clock_sec_t secs = `0`;
5240	uint32_t microsecs = `0`;
5241
5242	starttime = mach_absolute_time();
5243	/*
5244	* Trigger a coredump of this process. Don't proceed unless we know we won't
5245	* be filling up the disk; and ignore the core size resource limit for this
5246	* core file.
5247	*/
5248	if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != `0`) {
5249	printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5250	}
5251	/*
5252	* coredump() leaves the task suspended.
5253	*/
5254	task_resume_internal(current_task());
5255
5256	end = mach_absolute_time();
5257	absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5258	printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5259	proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5260	}
5261	#endif /* CONFIG_COREDUMP */
5262
5263	if (disable_exc_resource) {
5264	printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5265	"supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5266	return;
5267	}
5268
5269	/*
5270	* A task that has triggered an EXC_RESOURCE, should not be
5271	* jetsammed when the device is under memory pressure. Here
5272	* we set the P_MEMSTAT_TERMINATED flag so that the process
5273	* will be skipped if the memorystatus_thread wakes up.
5274	*/
5275	proc_memstat_terminated(current_task()->bsd_info, TRUE);
5276
5277	code[`0`] = code[`1`] = `0`;
5278	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_MEMORY);
5279	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], FLAVOR_HIGH_WATERMARK);
5280	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[`0`], max_footprint_mb);
5281
5282	/*
5283	* Do not generate a corpse fork if the violation is a fatal one
5284	* or the process wants synchronous EXC_RESOURCE exceptions.
5285	*/
5286	if (is_fatal \|\| send_sync_exc_resource \|\| exc_via_corpse_forking == `0`) {
5287	/ Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set /
5288	if (send_sync_exc_resource \|\| corpse_for_fatal_memkill == `0`) {
5289	/*
5290	* Use the _internal_ variant so that no user-space
5291	* process can resume our task from under us.
5292	*/
5293	task_suspend_internal(task);
5294	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5295	task_resume_internal(task);
5296	}
5297	} else {
5298	if (audio_active) {
5299	printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5300	"supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5301	} else {
5302	task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5303	code, EXCEPTION_CODE_MAX, NULL);
5304	}
5305	}
5306
5307	/*
5308	* After the EXC_RESOURCE has been handled, we must clear the
5309	* P_MEMSTAT_TERMINATED flag so that the process can again be
5310	* considered for jetsam if the memorystatus_thread wakes up.
5311	*/
5312	proc_memstat_terminated(current_task()->bsd_info, FALSE); / clear the flag /
5313	}
5314
5315	/*
5316	* Callback invoked when a task exceeds its physical footprint limit.
5317	*/
5318	void
5319	task_footprint_exceeded(int warning, __unused const void param0, __unused const* void *param1)
5320	{
5321	ledger_amount_t max_footprint, max_footprint_mb;
5322	task_t task;
5323	boolean_t is_warning;
5324	boolean_t memlimit_is_active;
5325	boolean_t memlimit_is_fatal;
5326
5327	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5328	/*
5329	* Task memory limits only provide a warning on the way up.
5330	*/
5331	return;
5332	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5333	/*
5334	* This task is in danger of violating a memory limit,
5335	* It has exceeded a percentage level of the limit.
5336	*/
5337	is_warning = TRUE;
5338	} else {
5339	/*
5340	* The task has exceeded the physical footprint limit.
5341	* This is not a warning but a true limit violation.
5342	*/
5343	is_warning = FALSE;
5344	}
5345
5346	task = current_task();
5347
5348	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5349	max_footprint_mb = max_footprint >> `20`;
5350
5351	memlimit_is_active = task_get_memlimit_is_active(task);
5352	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5353
5354	/*
5355	* If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5356	* We only generate the exception once per process per memlimit (active/inactive limit).
5357	* To enforce this, we monitor state based on the memlimit's active/inactive attribute
5358	* and we disable it by marking that memlimit as exception triggered.
5359	*/
5360	if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5361	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5362	memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5363	task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5364	}
5365
5366	memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5367	}
5368
5369	extern int proc_check_footprint_priv(void);
5370
5371	kern_return_t
5372	task_set_phys_footprint_limit(
5373	task_t task,
5374	int new_limit_mb,
5375	int *old_limit_mb)
5376	{
5377	kern_return_t error;
5378
5379	boolean_t memlimit_is_active;
5380	boolean_t memlimit_is_fatal;
5381
5382	if ((error = proc_check_footprint_priv())) {
5383	return (KERN_NO_ACCESS);
5384	}
5385
5386	/*
5387	* This call should probably be obsoleted.
5388	* But for now, we default to current state.
5389	*/
5390	memlimit_is_active = task_get_memlimit_is_active(task);
5391	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5392
5393	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5394	}
5395
5396	kern_return_t
5397	task_convert_phys_footprint_limit(
5398	int limit_mb,
5399	int *converted_limit_mb)
5400	{
5401	if (limit_mb == -`1`) {
5402	/*
5403	* No limit
5404	*/
5405	if (max_task_footprint != `0`) {
5406	converted_limit_mb = (int)(max_task_footprint / `1024` / `1024`); /* bytes to MB /
5407	} else {
5408	converted_limit_mb = (int*)(LEDGER_LIMIT_INFINITY >> `20`);
5409	}
5410	} else {
5411	/ nothing to convert /
5412	*converted_limit_mb = limit_mb;
5413	}
5414	return (KERN_SUCCESS);
5415	}
5416
5417
5418	kern_return_t
5419	task_set_phys_footprint_limit_internal(
5420	task_t task,
5421	int new_limit_mb,
5422	int *old_limit_mb,
5423	boolean_t memlimit_is_active,
5424	boolean_t memlimit_is_fatal)
5425	{
5426	ledger_amount_t old;
5427
5428	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5429
5430	/*
5431	* Check that limit >> 20 will not give an "unexpected" 32-bit
5432	* result. There are, however, implicit assumptions that -1 mb limit
5433	* equates to LEDGER_LIMIT_INFINITY.
5434	*/
5435	assert(((old & `0xFFF0000000000000LL`) == `0`) \|\| (old == LEDGER_LIMIT_INFINITY));
5436
5437	if (old_limit_mb) {
5438	old_limit_mb = (int*)(old >> `20`);
5439	}
5440
5441	if (new_limit_mb == -`1`) {
5442	/*
5443	* Caller wishes to remove the limit.
5444	*/
5445	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5446	max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5447	max_task_footprint ? max_task_footprint_warning_level : `0`);
5448
5449	task_lock(task);
5450	task_set_memlimit_is_active(task, memlimit_is_active);
5451	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5452	task_unlock(task);
5453
5454	return (KERN_SUCCESS);
5455	}
5456
5457	#ifdef CONFIG_NOMONITORS
5458	return (KERN_SUCCESS);
5459	#endif /* CONFIG_NOMONITORS */
5460
5461	task_lock(task);
5462
5463	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5464	(memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5465	(((ledger_amount_t)new_limit_mb << `20`) == old)) {
5466	/*
5467	* memlimit state is not changing
5468	*/
5469	task_unlock(task);
5470	return(KERN_SUCCESS);
5471	}
5472
5473	task_set_memlimit_is_active(task, memlimit_is_active);
5474	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5475
5476	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5477	(ledger_amount_t)new_limit_mb << `20`, PHYS_FOOTPRINT_WARNING_LEVEL);
5478
5479	if (task == current_task()) {
5480	ledger_check_new_balance(current_thread(), task->ledger,
5481	task_ledgers.phys_footprint);
5482	}
5483
5484	task_unlock(task);
5485
5486	return (KERN_SUCCESS);
5487	}
5488
5489	kern_return_t
5490	task_get_phys_footprint_limit(
5491	task_t task,
5492	int *limit_mb)
5493	{
5494	ledger_amount_t limit;
5495
5496	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5497	/*
5498	* Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5499	* result. There are, however, implicit assumptions that -1 mb limit
5500	* equates to LEDGER_LIMIT_INFINITY.
5501	*/
5502	assert(((limit & `0xFFF0000000000000LL`) == `0`) \|\| (limit == LEDGER_LIMIT_INFINITY));
5503	limit_mb = (int*)(limit >> `20`);
5504
5505	return (KERN_SUCCESS);
5506	}
5507	#else /* CONFIG_MEMORYSTATUS */
5508	kern_return_t
5509	task_set_phys_footprint_limit(
5510	__unused task_t task,
5511	__unused int new_limit_mb,
5512	__unused int *old_limit_mb)
5513	{
5514	return (KERN_FAILURE);
5515	}
5516
5517	kern_return_t
5518	task_get_phys_footprint_limit(
5519	__unused task_t task,
5520	__unused int *limit_mb)
5521	{
5522	return (KERN_FAILURE);
5523	}
5524	#endif /* CONFIG_MEMORYSTATUS */
5525
5526	void
5527	task_set_thread_limit(task_t task, uint16_t thread_limit)
5528	{
5529	assert(task != kernel_task);
5530	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
5531	task_lock(task);
5532	task->task_thread_limit = thread_limit;
5533	task_unlock(task);
5534	}
5535	}
5536
5537	/*
5538	* We need to export some functions to other components that
5539	* are currently implemented in macros within the osfmk
5540	* component. Just export them as functions of the same name.
5541	*/
5542	boolean_t is_kerneltask(task_t t)
5543	{
5544	if (t == kernel_task)
5545	return (TRUE);
5546
5547	return (FALSE);
5548	}
5549
5550	boolean_t is_corpsetask(task_t t)
5551	{
5552	return (task_is_a_corpse(t));
5553	}
5554
5555	#undef current_task
5556	task_t current_task(void);
5557	task_t current_task(void)
5558	{
5559	return (current_task_fast());
5560	}
5561
5562	#undef task_reference
5563	void task_reference(task_t task);
5564	void
5565	task_reference(
5566	task_t task)
5567	{
5568	if (task != TASK_NULL)
5569	task_reference_internal(task);
5570	}
5571
5572	/ defined in bsd/kern/kern_prot.c /
5573	extern int get_audit_token_pid(audit_token_t *audit_token);
5574
5575	int task_pid(task_t task)
5576	{
5577	if (task)
5578	return get_audit_token_pid(&task->audit_token);
5579	return -`1`;
5580	}
5581
5582
5583	/*
5584	* This routine finds a thread in a task by its unique id
5585	* Returns a referenced thread or THREAD_NULL if the thread was not found
5586	*
5587	* TODO: This is super inefficient - it's an O(threads in task) list walk!
5588	* We should make a tid hash, or transition all tid clients to thread ports
5589	*
5590	* Precondition: No locks held (will take task lock)
5591	*/
5592	thread_t
5593	task_findtid(task_t task, uint64_t tid)
5594	{
5595	thread_t self = current_thread();
5596	thread_t found_thread = THREAD_NULL;
5597	thread_t iter_thread = THREAD_NULL;
5598
5599	/ Short-circuit the lookup if we're looking up ourselves /
5600	if (tid == self->thread_id \|\| tid == TID_NULL) {
5601	assert(self->task == task);
5602
5603	thread_reference(self);
5604
5605	return self;
5606	}
5607
5608	task_lock(task);
5609
5610	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5611	if (iter_thread->thread_id == tid) {
5612	found_thread = iter_thread;
5613	thread_reference(found_thread);
5614	break;
5615	}
5616	}
5617
5618	task_unlock(task);
5619
5620	return (found_thread);
5621	}
5622
5623	int pid_from_task(task_t task)
5624	{
5625	int pid = -`1`;
5626
5627	if (task->bsd_info) {
5628	pid = proc_pid(task->bsd_info);
5629	} else {
5630	pid = task_pid(task);
5631	}
5632
5633	return pid;
5634	}
5635
5636	/*
5637	* Control the CPU usage monitor for a task.
5638	*/
5639	kern_return_t
5640	task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5641	{
5642	int error = KERN_SUCCESS;
5643
5644	if (*flags & CPUMON_MAKE_FATAL) {
5645	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5646	} else {
5647	error = KERN_INVALID_ARGUMENT;
5648	}
5649
5650	return error;
5651	}
5652
5653	/*
5654	* Control the wakeups monitor for a task.
5655	*/
5656	kern_return_t
5657	task_wakeups_monitor_ctl(task_t task, uint32_t flags, int32_t rate_hz)
5658	{
5659	ledger_t ledger = task->ledger;
5660
5661	task_lock(task);
5662	if (*flags & WAKEMON_GET_PARAMS) {
5663	ledger_amount_t limit;
5664	uint64_t period;
5665
5666	ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5667	ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5668
5669	if (limit != LEDGER_LIMIT_INFINITY) {
5670	/*
5671	* An active limit means the wakeups monitor is enabled.
5672	*/
5673	*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5674	*flags = WAKEMON_ENABLE;
5675	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5676	*flags \|= WAKEMON_MAKE_FATAL;
5677	}
5678	} else {
5679	*flags = WAKEMON_DISABLE;
5680	*rate_hz = -`1`;
5681	}
5682
5683	/*
5684	* If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5685	*/
5686	task_unlock(task);
5687	return KERN_SUCCESS;
5688	}
5689
5690	if (*flags & WAKEMON_ENABLE) {
5691	if (*flags & WAKEMON_SET_DEFAULTS) {
5692	*rate_hz = task_wakeups_monitor_rate;
5693	}
5694
5695	#ifndef CONFIG_NOMONITORS
5696	if (*flags & WAKEMON_MAKE_FATAL) {
5697	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5698	}
5699	#endif /* CONFIG_NOMONITORS */
5700
5701	if (*rate_hz <= `0`) {
5702	task_unlock(task);
5703	return KERN_INVALID_ARGUMENT;
5704	}
5705
5706	#ifndef CONFIG_NOMONITORS
5707	ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, rate_hz task_wakeups_monitor_interval,
5708	task_wakeups_monitor_ustackshots_trigger_pct);
5709	ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5710	ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5711	#endif /* CONFIG_NOMONITORS */
5712	} else if (*flags & WAKEMON_DISABLE) {
5713	/*
5714	* Caller wishes to disable wakeups monitor on the task.
5715	*
5716	* Disable telemetry if it was triggered by the wakeups monitor, and
5717	* remove the limit & callback on the wakeups ledger entry.
5718	*/
5719	#if CONFIG_TELEMETRY
5720	telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, `0`);
5721	#endif
5722	ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5723	ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5724	}
5725
5726	task_unlock(task);
5727	return KERN_SUCCESS;
5728	}
5729
5730	void
5731	task_wakeups_rate_exceeded(int warning, __unused const void param0, __unused const* void *param1)
5732	{
5733	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5734	#if CONFIG_TELEMETRY
5735	/*
5736	* This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5737	* so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5738	*/
5739	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, `1`);
5740	#endif
5741	return;
5742	}
5743
5744	#if CONFIG_TELEMETRY
5745	/*
5746	* If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5747	* exceeded the limit, turn telemetry off for the task.
5748	*/
5749	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, `0`);
5750	#endif
5751
5752	if (warning == `0`) {
5753	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5754	}
5755	}
5756
5757	void __attribute__((noinline))
5758	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5759	{
5760	task_t task = current_task();
5761	int pid = `0`;
5762	const char *procname = "unknown";
5763	boolean_t fatal;
5764	kern_return_t kr;
5765	#ifdef EXC_RESOURCE_MONITORS
5766	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5767	#endif /* EXC_RESOURCE_MONITORS */
5768	struct ledger_entry_info lei;
5769
5770	#ifdef MACH_BSD
5771	pid = proc_selfpid();
5772	if (task->bsd_info != NULL)
5773	procname = proc_name_address(current_task()->bsd_info);
5774	#endif
5775
5776	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5777
5778	/*
5779	* Disable the exception notification so we don't overwhelm
5780	* the listener with an endless stream of redundant exceptions.
5781	* TODO: detect whether another thread is already reporting the violation.
5782	*/
5783	uint32_t flags = WAKEMON_DISABLE;
5784	task_wakeups_monitor_ctl(task, &flags, NULL);
5785
5786	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5787	trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5788	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5789	"over ~%llu seconds, averaging %llu wakes / second and "
5790	"violating a %slimit of %llu wakes over %llu seconds.\n",
5791	procname, pid,
5792	lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5793	lei.lei_last_refill == `0` ? `0` :
5794	(NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5795	fatal ? "FATAL " : "",
5796	lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5797
5798	kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5799	fatal ? kRNFatalLimitFlag : `0`);
5800	if (kr) {
5801	printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5802	}
5803
5804	#ifdef EXC_RESOURCE_MONITORS
5805	if (disable_exc_resource) {
5806	printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5807	"supressed by a boot-arg\n", procname, pid);
5808	return;
5809	}
5810	if (audio_active) {
5811	os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5812	"supressed due to audio playback\n", procname, pid);
5813	return;
5814	}
5815	if (lei.lei_last_refill == `0`) {
5816	os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5817	"supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5818	}
5819
5820	code[`0`] = code[`1`] = `0`;
5821	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_WAKEUPS);
5822	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], FLAVOR_WAKEUPS_MONITOR);
5823	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[`0`],
5824	NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5825	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[`0`],
5826	lei.lei_last_refill);
5827	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[`1`],
5828	NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5829	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5830	#endif /* EXC_RESOURCE_MONITORS */
5831
5832	if (fatal) {
5833	task_terminate_internal(task);
5834	}
5835	}
5836
5837	static boolean_t
5838	global_update_logical_writes(int64_t io_delta)
5839	{
5840	int64_t old_count, new_count;
5841	boolean_t needs_telemetry;
5842
5843	do {
5844	new_count = old_count = global_logical_writes_count;
5845	new_count += io_delta;
5846	if (new_count >= io_telemetry_limit) {
5847	new_count = `0`;
5848	needs_telemetry = TRUE;
5849	} else {
5850	needs_telemetry = FALSE;
5851	}
5852	} while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5853	return needs_telemetry;
5854	}
5855
5856	void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5857	{
5858	int64_t io_delta = `0`;
5859	boolean_t needs_telemetry = FALSE;
5860
5861	if ((!task) \|\| (!io_size) \|\| (!vp))
5862	return;
5863
5864	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) \| DBG_FUNC_NONE,
5865	task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), `0`);
5866	DTRACE_IO4(logical_writes, struct task , task, uint32_t, io_size, int, flags, vnode , vp);
5867	switch(flags) {
5868	case TASK_WRITE_IMMEDIATE:
5869	OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5870	ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5871	break;
5872	case TASK_WRITE_DEFERRED:
5873	OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5874	ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5875	break;
5876	case TASK_WRITE_INVALIDATED:
5877	OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5878	ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5879	break;
5880	case TASK_WRITE_METADATA:
5881	OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5882	ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5883	break;
5884	}
5885
5886	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -`1ll`) : ((int64_t)io_size);
5887	if (io_telemetry_limit != `0`) {
5888	/ If io_telemetry_limit is 0, disable global updates and I/O telemetry /
5889	needs_telemetry = global_update_logical_writes(io_delta);
5890	if (needs_telemetry) {
5891	act_set_io_telemetry_ast(current_thread());
5892	}
5893	}
5894	}
5895
5896	/*
5897	* Control the I/O monitor for a task.
5898	*/
5899	kern_return_t
5900	task_io_monitor_ctl(task_t task, uint32_t *flags)
5901	{
5902	ledger_t ledger = task->ledger;
5903
5904	task_lock(task);
5905	if (*flags & IOMON_ENABLE) {
5906	/ Configure the physical I/O ledger /
5907	ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * `1024` * `1024`), `0`);
5908	ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5909
5910	/ Configure the logical I/O ledger /
5911	ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * `1024` * `1024`), `0`);
5912	ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5913
5914	} else if (*flags & IOMON_DISABLE) {
5915	/*
5916	* Caller wishes to disable I/O monitor on the task.
5917	*/
5918	ledger_disable_refill(ledger, task_ledgers.physical_writes);
5919	ledger_disable_callback(ledger, task_ledgers.physical_writes);
5920	ledger_disable_refill(ledger, task_ledgers.logical_writes);
5921	ledger_disable_callback(ledger, task_ledgers.logical_writes);
5922	}
5923
5924	task_unlock(task);
5925	return KERN_SUCCESS;
5926	}
5927
5928	void
5929	task_io_rate_exceeded(int warning, const void param0, __unused const* void *param1)
5930	{
5931	if (warning == `0`) {
5932	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5933	}
5934	}
5935
5936	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5937	{
5938	int pid = `0`;
5939	task_t task = current_task();
5940	#ifdef EXC_RESOURCE_MONITORS
5941	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5942	#endif /* EXC_RESOURCE_MONITORS */
5943	struct ledger_entry_info lei;
5944	kern_return_t kr;
5945
5946	#ifdef MACH_BSD
5947	pid = proc_selfpid();
5948	#endif
5949	/*
5950	* Get the ledger entry info. We need to do this before disabling the exception
5951	* to get correct values for all fields.
5952	*/
5953	switch(flavor) {
5954	case FLAVOR_IO_PHYSICAL_WRITES:
5955	ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5956	break;
5957	case FLAVOR_IO_LOGICAL_WRITES:
5958	ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5959	break;
5960	}
5961
5962
5963	/*
5964	* Disable the exception notification so we don't overwhelm
5965	* the listener with an endless stream of redundant exceptions.
5966	* TODO: detect whether another thread is already reporting the violation.
5967	*/
5968	uint32_t flags = IOMON_DISABLE;
5969	task_io_monitor_ctl(task, &flags);
5970
5971	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5972	trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5973	}
5974	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5975	pid, flavor, (lei.lei_balance / (`1024` * `1024`)), (lei.lei_limit / (`1024` * `1024`)), (lei.lei_refill_period / NSEC_PER_SEC));
5976
5977	kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5978	if (kr) {
5979	printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5980	}
5981
5982	#ifdef EXC_RESOURCE_MONITORS
5983	code[`0`] = code[`1`] = `0`;
5984	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_IO);
5985	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], flavor);
5986	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[`0`], (lei.lei_refill_period / NSEC_PER_SEC));
5987	EXC_RESOURCE_IO_ENCODE_LIMIT(code[`0`], (lei.lei_limit / (`1024` * `1024`)));
5988	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[`1`], (lei.lei_balance / (`1024` * `1024`)));
5989	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5990	#endif /* EXC_RESOURCE_MONITORS */
5991	}
5992
5993	/ Placeholders for the task set/get voucher interfaces /
5994	kern_return_t
5995	task_get_mach_voucher(
5996	task_t task,
5997	mach_voucher_selector_t __unused which,
5998	ipc_voucher_t *voucher)
5999	{
6000	if (TASK_NULL == task)
6001	return KERN_INVALID_TASK;
6002
6003	*voucher = NULL;
6004	return KERN_SUCCESS;
6005	}
6006
6007	kern_return_t
6008	task_set_mach_voucher(
6009	task_t task,
6010	ipc_voucher_t __unused voucher)
6011	{
6012	if (TASK_NULL == task)
6013	return KERN_INVALID_TASK;
6014
6015	return KERN_SUCCESS;
6016	}
6017
6018	kern_return_t
6019	task_swap_mach_voucher(
6020	task_t task,
6021	ipc_voucher_t new_voucher,
6022	ipc_voucher_t *in_out_old_voucher)
6023	{
6024	if (TASK_NULL == task)
6025	return KERN_INVALID_TASK;
6026
6027	*in_out_old_voucher = new_voucher;
6028	return KERN_SUCCESS;
6029	}
6030
6031	void task_set_gpu_denied(task_t task, boolean_t denied)
6032	{
6033	task_lock(task);
6034
6035	if (denied) {
6036	task->t_flags \|= TF_GPU_DENIED;
6037	} else {
6038	task->t_flags &= ~TF_GPU_DENIED;
6039	}
6040
6041	task_unlock(task);
6042	}
6043
6044	boolean_t task_is_gpu_denied(task_t task)
6045	{
6046	/ We don't need the lock to read this flag /
6047	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6048	}
6049
6050
6051	uint64_t get_task_memory_region_count(task_t task)
6052	{
6053	vm_map_t map;
6054	map = (task == kernel_task) ? kernel_map: task->map;
6055	return((uint64_t)get_map_nentries(map));
6056	}
6057
6058	static void
6059	kdebug_trace_dyld_internal(uint32_t base_code,
6060	struct dyld_kernel_image_info *info)
6061	{
6062	static_assert(sizeof(info->uuid) >= `16`);
6063
6064	#if defined(__LP64__)
6065	uint64_t uuid = (uint64_t )&(info->uuid);
6066
6067	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6068	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[`0`],
6069	uuid[`1`], info->load_addr,
6070	(uint64_t)info->fsid.val[`0`] \| ((uint64_t)info->fsid.val[`1`] << `32`),
6071	`0`);
6072	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6073	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `1`),
6074	(uint64_t)info->fsobjid.fid_objno \|
6075	((uint64_t)info->fsobjid.fid_generation << `32`),
6076	`0`, `0`, `0`, `0`);
6077	#else /* defined(__LP64__) */
6078	uint32_t uuid = (uint32_t )&(info->uuid);
6079
6080	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6081	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `2`), uuid[`0`],
6082	uuid[`1`], uuid[`2`], uuid[`3`], `0`);
6083	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6084	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `3`),
6085	(uint32_t)info->load_addr, info->fsid.val[`0`], info->fsid.val[`1`],
6086	info->fsobjid.fid_objno, `0`);
6087	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6088	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `4`),
6089	info->fsobjid.fid_generation, `0`, `0`, `0`, `0`);
6090	#endif /* !defined(__LP64__) */
6091	}
6092
6093	static kern_return_t
6094	kdebug_trace_dyld(task_t task, uint32_t base_code,
6095	vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6096	{
6097	kern_return_t kr;
6098	dyld_kernel_image_info_array_t infos;
6099	vm_map_offset_t map_data;
6100	vm_offset_t data;
6101
6102	if (!infos_copy) {
6103	return KERN_INVALID_ADDRESS;
6104	}
6105
6106	if (!kdebug_enable \|\|
6107	!kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, `0`)))
6108	{
6109	vm_map_copy_discard(infos_copy);
6110	return KERN_SUCCESS;
6111	}
6112
6113	if (task == NULL \|\| task != current_task()) {
6114	return KERN_INVALID_TASK;
6115	}
6116
6117	kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6118	if (kr != KERN_SUCCESS) {
6119	return kr;
6120	}
6121
6122	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6123
6124	for (mach_msg_type_number_t i = `0`; i < infos_len; i++) {
6125	kdebug_trace_dyld_internal(base_code, &(infos[i]));
6126	}
6127
6128	data = CAST_DOWN(vm_offset_t, map_data);
6129	mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[`0`]));
6130	return KERN_SUCCESS;
6131	}
6132
6133	kern_return_t
6134	task_register_dyld_image_infos(task_t task,
6135	dyld_kernel_image_info_array_t infos_copy,
6136	mach_msg_type_number_t infos_len)
6137	{
6138	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6139	(vm_map_copy_t)infos_copy, infos_len);
6140	}
6141
6142	kern_return_t
6143	task_unregister_dyld_image_infos(task_t task,
6144	dyld_kernel_image_info_array_t infos_copy,
6145	mach_msg_type_number_t infos_len)
6146	{
6147	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6148	(vm_map_copy_t)infos_copy, infos_len);
6149	}
6150
6151	kern_return_t
6152	task_get_dyld_image_infos(__unused task_t task,
6153	__unused dyld_kernel_image_info_array_t * dyld_images,
6154	__unused mach_msg_type_number_t * dyld_imagesCnt)
6155	{
6156	return KERN_NOT_SUPPORTED;
6157	}
6158
6159	kern_return_t
6160	task_register_dyld_shared_cache_image_info(task_t task,
6161	dyld_kernel_image_info_t cache_img,
6162	__unused boolean_t no_cache,
6163	__unused boolean_t private_cache)
6164	{
6165	if (task == NULL \|\| task != current_task()) {
6166	return KERN_INVALID_TASK;
6167	}
6168
6169	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6170	return KERN_SUCCESS;
6171	}
6172
6173	kern_return_t
6174	task_register_dyld_set_dyld_state(__unused task_t task,
6175	__unused uint8_t dyld_state)
6176	{
6177	return KERN_NOT_SUPPORTED;
6178	}
6179
6180	kern_return_t
6181	task_register_dyld_get_process_state(__unused task_t task,
6182	__unused dyld_kernel_process_info_t * dyld_process_state)
6183	{
6184	return KERN_NOT_SUPPORTED;
6185	}
6186
6187	kern_return_t
6188	task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6189	task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6190	{
6191	#if MONOTONIC
6192	task_t task = (task_t)task_insp;
6193	kern_return_t kr = KERN_SUCCESS;
6194	mach_msg_type_number_t size;
6195
6196	if (task == TASK_NULL) {
6197	return KERN_INVALID_ARGUMENT;
6198	}
6199
6200	size = *size_in_out;
6201
6202	switch (flavor) {
6203	case TASK_INSPECT_BASIC_COUNTS: {
6204	struct task_inspect_basic_counts *bc;
6205	uint64_t task_counts[MT_CORE_NFIXED];
6206
6207	if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6208	kr = KERN_INVALID_ARGUMENT;
6209	break;
6210	}
6211
6212	mt_fixed_task_counts(task, task_counts);
6213	bc = (struct task_inspect_basic_counts *)info_out;
6214	#ifdef MT_CORE_INSTRS
6215	bc->instructions = task_counts[MT_CORE_INSTRS];
6216	#else /* defined(MT_CORE_INSTRS) */
6217	bc->instructions = `0`;
6218	#endif /* !defined(MT_CORE_INSTRS) */
6219	bc->cycles = task_counts[MT_CORE_CYCLES];
6220	size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6221	break;
6222	}
6223	default:
6224	kr = KERN_INVALID_ARGUMENT;
6225	break;
6226	}
6227
6228	if (kr == KERN_SUCCESS) {
6229	*size_in_out = size;
6230	}
6231	return kr;
6232	#else /* MONOTONIC */
6233	#pragma unused(task_insp, flavor, info_out, size_in_out)
6234	return KERN_NOT_SUPPORTED;
6235	#endif /* !MONOTONIC */
6236	}
6237
6238	#if CONFIG_SECLUDED_MEMORY
6239	int num_tasks_can_use_secluded_mem = `0`;
6240
6241	void
6242	task_set_can_use_secluded_mem(
6243	task_t task,
6244	boolean_t can_use_secluded_mem)
6245	{
6246	if (!task->task_could_use_secluded_mem) {
6247	return;
6248	}
6249	task_lock(task);
6250	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6251	task_unlock(task);
6252	}
6253
6254	void
6255	task_set_can_use_secluded_mem_locked(
6256	task_t task,
6257	boolean_t can_use_secluded_mem)
6258	{
6259	assert(task->task_could_use_secluded_mem);
6260	if (can_use_secluded_mem &&
6261	secluded_for_apps && / global boot-arg /
6262	!task->task_can_use_secluded_mem) {
6263	assert(num_tasks_can_use_secluded_mem >= `0`);
6264	OSAddAtomic(+`1`,
6265	(volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6266	task->task_can_use_secluded_mem = TRUE;
6267	} else if (!can_use_secluded_mem &&
6268	task->task_can_use_secluded_mem) {
6269	assert(num_tasks_can_use_secluded_mem > `0`);
6270	OSAddAtomic(-`1`,
6271	(volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6272	task->task_can_use_secluded_mem = FALSE;
6273	}
6274	}
6275
6276	void
6277	task_set_could_use_secluded_mem(
6278	task_t task,
6279	boolean_t could_use_secluded_mem)
6280	{
6281	task->task_could_use_secluded_mem = could_use_secluded_mem;
6282	}
6283
6284	void
6285	task_set_could_also_use_secluded_mem(
6286	task_t task,
6287	boolean_t could_also_use_secluded_mem)
6288	{
6289	task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6290	}
6291
6292	boolean_t
6293	task_can_use_secluded_mem(
6294	task_t task,
6295	boolean_t is_alloc)
6296	{
6297	if (task->task_can_use_secluded_mem) {
6298	assert(task->task_could_use_secluded_mem);
6299	assert(num_tasks_can_use_secluded_mem > `0`);
6300	return TRUE;
6301	}
6302	if (task->task_could_also_use_secluded_mem &&
6303	num_tasks_can_use_secluded_mem > `0`) {
6304	assert(num_tasks_can_use_secluded_mem > `0`);
6305	return TRUE;
6306	}
6307
6308	/*
6309	* If a single task is using more than some amount of
6310	* memory, allow it to dip into secluded and also begin
6311	* suppression of secluded memory until the tasks exits.
6312	*/
6313	if (is_alloc && secluded_shutoff_trigger != `0`) {
6314	uint64_t phys_used = get_task_phys_footprint(task);
6315	if (phys_used > secluded_shutoff_trigger) {
6316	start_secluded_suppression(task);
6317	return TRUE;
6318	}
6319	}
6320
6321	return FALSE;
6322	}
6323
6324	boolean_t
6325	task_could_use_secluded_mem(
6326	task_t task)
6327	{
6328	return task->task_could_use_secluded_mem;
6329	}
6330	#endif /* CONFIG_SECLUDED_MEMORY */
6331
6332	queue_head_t *
6333	task_io_user_clients(task_t task)
6334	{
6335	return (&task->io_user_clients);
6336	}
6337
6338	void
6339	task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6340	{
6341	dst_task->vtimers = src_task->vtimers;
6342	}
6343
6344	#if DEVELOPMENT \|\| DEBUG
6345	int vm_region_footprint = `0`;
6346	#endif /* DEVELOPMENT \|\| DEBUG */
6347
6348	boolean_t
6349	task_self_region_footprint(void)
6350	{
6351	#if DEVELOPMENT \|\| DEBUG
6352	if (vm_region_footprint) {
6353	/ system-wide override /
6354	return TRUE;
6355	}
6356	#endif /* DEVELOPMENT \|\| DEBUG */
6357	return current_task()->task_region_footprint;
6358	}
6359
6360	void
6361	task_self_region_footprint_set(
6362	boolean_t newval)
6363	{
6364	task_t curtask;
6365
6366	curtask = current_task();
6367	task_lock(curtask);
6368	if (newval) {
6369	curtask->task_region_footprint = TRUE;
6370	} else {
6371	curtask->task_region_footprint = FALSE;
6372	}
6373	task_unlock(curtask);
6374	}
6375
6376	void
6377	task_set_darkwake_mode(task_t task, boolean_t set_mode)
6378	{
6379	assert(task);
6380
6381	task_lock(task);
6382
6383	if (set_mode) {
6384	task->t_flags \|= TF_DARKWAKE_MODE;
6385	} else {
6386	task->t_flags &= ~(TF_DARKWAKE_MODE);
6387	}
6388
6389	task_unlock(task);
6390	}
6391
6392	boolean_t
6393	task_get_darkwake_mode(task_t task)
6394	{
6395	assert(task);
6396	return ((task->t_flags & TF_DARKWAKE_MODE) != `0`);
6397	}
6398
6399	#if __arm64__
6400	void
6401	task_set_legacy_footprint(
6402	task_t task,
6403	boolean_t new_val)
6404	{
6405	task_lock(task);
6406	task->task_legacy_footprint = new_val;
6407	task_unlock(task);
6408	}
6409	#endif /* __arm64__ */
6410

Browse the source code of codebrowser/osfmk/kern/task.c