1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28#ifdef KERNEL
29#ifndef _KERNEL
30#define _KERNEL /* Solaris vs. Darwin */
31#endif
32#endif
33
34#include <kern/thread.h>
35#include <mach/thread_status.h>
36
37/* XXX All of these should really be derived from syscall_sw.h */
38#if defined (__x86_64__)
39#define SYSCALL_CLASS_SHIFT 24
40#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
41#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
42#define I386_SYSCALL_NUMBER_MASK (0xFFFF)
43#endif
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/proc.h>
48#include <sys/errno.h>
49#include <sys/ioctl.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/syscall.h>
53#include <miscfs/devfs/devfs.h>
54
55#include <sys/dtrace.h>
56#include <sys/dtrace_impl.h>
57#include <sys/systrace_args.h>
58#include "systrace.h"
59#include <sys/stat.h>
60#include <sys/systm.h>
61#include <sys/conf.h>
62#include <sys/user.h>
63
64#include <machine/pal_routines.h>
65
66#if defined (__x86_64__)
67#define SYSTRACE_ARTIFICIAL_FRAMES 2
68#define MACHTRACE_ARTIFICIAL_FRAMES 3
69#elif defined(__arm__) || defined(__arm64__)
70#define SYSTRACE_ARTIFICIAL_FRAMES 2
71#define MACHTRACE_ARTIFICIAL_FRAMES 3
72#else
73#error Unknown Architecture
74#endif
75
76#define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
77
78#include <sys/sysent.h>
79#define sy_callc sy_call /* Map Solaris slot name to Darwin's */
80#define NSYSCALL nsysent /* and is less than 500 or so */
81
82extern const char *syscallnames[];
83
84#include <sys/dtrace_glue.h>
85#define casptr dtrace_casptr
86#define membar_enter dtrace_membar_producer
87
88#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
89#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
90
91extern lck_attr_t* dtrace_lck_attr;
92extern lck_grp_t* dtrace_lck_grp;
93static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
94
95systrace_sysent_t *systrace_sysent = NULL;
96void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
97
98static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
99static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
100
101void
102systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
103 uint64_t arg2, uint64_t arg3, uint64_t arg4)
104{
105#pragma unused(id,arg0,arg1,arg2,arg3,arg4)
106}
107
108int32_t
109dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
110{
111 unsigned short code; /* The system call number */
112
113 systrace_sysent_t *sy;
114 dtrace_id_t id;
115 int32_t rval;
116 syscall_arg_t *ip = (syscall_arg_t *)uap;
117 uint64_t uargs[SYSTRACE_NARGS] = {0};
118
119#if defined (__x86_64__)
120 {
121 pal_register_cache_state(current_thread(), VALID);
122 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
123
124 if (is_saved_state64(tagged_regs)) {
125 x86_saved_state64_t *regs = saved_state64(tagged_regs);
126 code = regs->rax & SYSCALL_NUMBER_MASK;
127 /*
128 * Check for indirect system call... system call number
129 * passed as 'arg0'
130 */
131 if (code == 0) {
132 code = regs->rdi;
133 }
134 } else {
135 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
136
137 if (code == 0) {
138 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
139 code = fuword(params);
140 }
141 }
142 }
143#elif defined(__arm__)
144 {
145 /*
146 * On arm, syscall numbers depend on a flavor (indirect or not)
147 * and can be in either r0 or r12 (always u32)
148 */
149
150 /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
151 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
152
153 /* Check for indirect system call */
154 if (arm_regs->r[12] != 0)
155 code = arm_regs->r[12];
156 else
157 code = arm_regs->r[0];
158 }
159#elif defined(__arm64__)
160 {
161 /*
162 * On arm64, syscall numbers depend on a flavor (indirect or not)
163 * ... and for u32 can be in either r0 or r12
164 * ... and for u64 can be in either x0 or x16
165 */
166
167 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
168 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
169
170 if (is_saved_state32(arm_regs)) {
171 /* Check for indirect system call */
172 if (saved_state32(arm_regs)->r[12] != 0) {
173 code = saved_state32(arm_regs)->r[12];
174 }
175 else {
176 code = saved_state32(arm_regs)->r[0];
177 }
178 } else {
179 /* Check for indirect system call */
180 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0 ) {
181 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
182 }
183 else {
184 code = saved_state64(arm_regs)->x[0];
185 }
186 }
187 }
188#else
189#error Unknown Architecture
190#endif
191
192 // Bounds "check" the value of code a la unix_syscall
193 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
194
195 systrace_args(code, ip, uargs);
196
197 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
198 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
199 if (uthread)
200 uthread->t_dtrace_syscall_args = uargs;
201
202 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
203 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
204
205 if (uthread)
206 uthread->t_dtrace_syscall_args = NULL;
207 }
208
209
210
211#if 0 /* XXX */
212 /*
213 * APPLE NOTE: Not implemented.
214 * We want to explicitly allow DTrace consumers to stop a process
215 * before it actually executes the meat of the syscall.
216 */
217 p = ttoproc(curthread);
218 mutex_enter(&p->p_lock);
219 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
220 curthread->t_dtrace_stop = 0;
221 stop(PR_REQUESTED, 0);
222 }
223 mutex_exit(&p->p_lock);
224#endif
225
226 rval = (*sy->stsy_underlying)(pp, uap, rv);
227
228 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
229 uint64_t munged_rv0, munged_rv1;
230 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
231
232 if (uthread)
233 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
234
235 /*
236 * "Decode" rv for use in the call to dtrace_probe()
237 */
238 if (rval == ERESTART) {
239 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
240 munged_rv1 = -1LL;
241 } else if (rval != EJUSTRETURN) {
242 if (rval) {
243 munged_rv0 = -1LL; /* Mimic what libc will do. */
244 munged_rv1 = -1LL;
245 } else {
246 switch (sy->stsy_return_type) {
247 case _SYSCALL_RET_INT_T:
248 munged_rv0 = rv[0];
249 munged_rv1 = rv[1];
250 break;
251 case _SYSCALL_RET_UINT_T:
252 munged_rv0 = ((u_int)rv[0]);
253 munged_rv1 = ((u_int)rv[1]);
254 break;
255 case _SYSCALL_RET_OFF_T:
256 case _SYSCALL_RET_UINT64_T:
257 munged_rv0 = *(u_int64_t *)rv;
258 munged_rv1 = 0LL;
259 break;
260 case _SYSCALL_RET_ADDR_T:
261 case _SYSCALL_RET_SIZE_T:
262 case _SYSCALL_RET_SSIZE_T:
263 munged_rv0 = *(user_addr_t *)rv;
264 munged_rv1 = 0LL;
265 break;
266 case _SYSCALL_RET_NONE:
267 munged_rv0 = 0LL;
268 munged_rv1 = 0LL;
269 break;
270 default:
271 munged_rv0 = 0LL;
272 munged_rv1 = 0LL;
273 break;
274 }
275 }
276 } else {
277 munged_rv0 = 0LL;
278 munged_rv1 = 0LL;
279 }
280
281 /*
282 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
283 *
284 * "This is a bit of an historical artifact. At first, the syscall provider just
285 * had its return value in arg0, and the fbt and pid providers had their return
286 * values in arg1 (so that we could use arg0 for the offset of the return site).
287 *
288 * We inevitably started writing scripts where we wanted to see the return
289 * values from probes in all three providers, and we made this script easier
290 * to write by replicating the syscall return values in arg1 to match fbt and
291 * pid. We debated briefly about removing the return value from arg0, but
292 * decided that it would be less confusing to have the same data in two places
293 * than to have some non-helpful, non-intuitive value in arg0.
294 *
295 * This change was made 4/23/2003 according to the DTrace project's putback log."
296 */
297 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
298 }
299
300 return (rval);
301}
302
303void
304dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
305{
306 systrace_sysent_t *sy;
307 dtrace_id_t id;
308
309 // Bounds "check" the value of code a la unix_syscall_return
310 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
311
312 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
313 uint64_t munged_rv0, munged_rv1;
314 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
315
316 if (uthread)
317 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
318
319 /*
320 * "Decode" rv for use in the call to dtrace_probe()
321 */
322 if (rval == ERESTART) {
323 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
324 munged_rv1 = -1LL;
325 } else if (rval != EJUSTRETURN) {
326 if (rval) {
327 munged_rv0 = -1LL; /* Mimic what libc will do. */
328 munged_rv1 = -1LL;
329 } else {
330 switch (sy->stsy_return_type) {
331 case _SYSCALL_RET_INT_T:
332 munged_rv0 = rv[0];
333 munged_rv1 = rv[1];
334 break;
335 case _SYSCALL_RET_UINT_T:
336 munged_rv0 = ((u_int)rv[0]);
337 munged_rv1 = ((u_int)rv[1]);
338 break;
339 case _SYSCALL_RET_OFF_T:
340 case _SYSCALL_RET_UINT64_T:
341 munged_rv0 = *(u_int64_t *)rv;
342 munged_rv1 = 0LL;
343 break;
344 case _SYSCALL_RET_ADDR_T:
345 case _SYSCALL_RET_SIZE_T:
346 case _SYSCALL_RET_SSIZE_T:
347 munged_rv0 = *(user_addr_t *)rv;
348 munged_rv1 = 0LL;
349 break;
350 case _SYSCALL_RET_NONE:
351 munged_rv0 = 0LL;
352 munged_rv1 = 0LL;
353 break;
354 default:
355 munged_rv0 = 0LL;
356 munged_rv1 = 0LL;
357 break;
358 }
359 }
360 } else {
361 munged_rv0 = 0LL;
362 munged_rv1 = 0LL;
363 }
364
365 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
366 }
367}
368
369#define SYSTRACE_SHIFT 16
370#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
371#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
372#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
373#define SYSTRACE_RETURN(id) (id)
374
375#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
376#error 1 << SYSTRACE_SHIFT must exceed number of system calls
377#endif
378
379static dtrace_provider_id_t systrace_id;
380
381/*
382 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
383 * See balanced undef below.
384 */
385#define systrace_init _systrace_init
386
387static void
388systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
389{
390
391 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
392 from bsd/sys/sysent.h */
393 unsigned int i;
394
395 if (ssysent == NULL) {
396 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
397 NSYSCALL, KM_SLEEP);
398 }
399
400 for (i = 0; i < NSYSCALL; i++) {
401 struct sysent *a = &actual[i];
402 systrace_sysent_t *s = &ssysent[i];
403
404 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
405 continue;
406
407 if (a->sy_callc == dtrace_systrace_syscall)
408 continue;
409
410 s->stsy_underlying = a->sy_callc;
411 s->stsy_return_type = a->sy_return_type;
412 }
413 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
414}
415
416
417/*ARGSUSED*/
418static void
419systrace_provide(void *arg, const dtrace_probedesc_t *desc)
420{
421#pragma unused(arg) /* __APPLE__ */
422 unsigned int i;
423
424 if (desc != NULL)
425 return;
426
427 systrace_init(sysent, &systrace_sysent);
428
429 for (i = 0; i < NSYSCALL; i++) {
430 if (systrace_sysent[i].stsy_underlying == NULL)
431 continue;
432
433 if (dtrace_probe_lookup(systrace_id, NULL,
434 syscallnames[i], "entry") != 0)
435 continue;
436
437 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
438 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
439 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
440 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
441 "return", SYSTRACE_ARTIFICIAL_FRAMES,
442 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
443
444 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
445 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
446 }
447}
448#undef systrace_init
449
450/*ARGSUSED*/
451static void
452systrace_destroy(void *arg, dtrace_id_t id, void *parg)
453{
454#pragma unused(arg,id) /* __APPLE__ */
455
456 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
457
458#pragma unused(sysnum) /* __APPLE__ */
459 /*
460 * There's nothing to do here but assert that we have actually been
461 * disabled.
462 */
463 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
464 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
465 } else {
466 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
467 }
468}
469
470/*ARGSUSED*/
471static int
472systrace_enable(void *arg, dtrace_id_t id, void *parg)
473{
474#pragma unused(arg) /* __APPLE__ */
475
476 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
477 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
478 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
479
480 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
481 systrace_sysent[sysnum].stsy_entry = id;
482 } else {
483 systrace_sysent[sysnum].stsy_return = id;
484 }
485
486 if (enabled) {
487 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
488 return(0);
489 }
490
491 lck_mtx_lock(&dtrace_systrace_lock);
492 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
493 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
494 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
495 }
496 lck_mtx_unlock(&dtrace_systrace_lock);
497 return (0);
498}
499
500/*ARGSUSED*/
501static void
502systrace_disable(void *arg, dtrace_id_t id, void *parg)
503{
504#pragma unused(arg,id) /* __APPLE__ */
505
506 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
507 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
508 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
509
510 if (disable) {
511 lck_mtx_lock(&dtrace_systrace_lock);
512 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
513 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
514 lck_mtx_unlock(&dtrace_systrace_lock);
515
516 }
517
518 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
519 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
520 } else {
521 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
522 }
523}
524
525static dtrace_pattr_t systrace_attr = {
526{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
527{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
528{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
529{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
530{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
531};
532
533static dtrace_pops_t systrace_pops = {
534 .dtps_provide = systrace_provide,
535 .dtps_provide_module = NULL,
536 .dtps_enable = systrace_enable,
537 .dtps_disable = systrace_disable,
538 .dtps_suspend = NULL,
539 .dtps_resume = NULL,
540 .dtps_getargdesc = systrace_getargdesc,
541 .dtps_getargval = systrace_getargval,
542 .dtps_usermode = NULL,
543 .dtps_destroy = systrace_destroy
544};
545
546static int
547systrace_attach(dev_info_t *devi)
548{
549 systrace_probe = (void*)&dtrace_probe;
550 membar_enter();
551
552 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
553 DDI_PSEUDO, 0) == DDI_FAILURE ||
554 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
555 &systrace_pops, NULL, &systrace_id) != 0) {
556 systrace_probe = systrace_stub;
557 ddi_remove_minor_node(devi, NULL);
558 return (DDI_FAILURE);
559 }
560
561 return (DDI_SUCCESS);
562}
563
564
565/*
566 * APPLE NOTE: systrace_detach not implemented
567 */
568#if !defined(__APPLE__)
569static int
570systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
571{
572 switch (cmd) {
573 case DDI_DETACH:
574 break;
575 case DDI_SUSPEND:
576 return (DDI_SUCCESS);
577 default:
578 return (DDI_FAILURE);
579 }
580
581 if (dtrace_unregister(systrace_id) != 0)
582 return (DDI_FAILURE);
583
584 ddi_remove_minor_node(devi, NULL);
585 systrace_probe = systrace_stub;
586 return (DDI_SUCCESS);
587}
588#endif /* __APPLE__ */
589
590
591typedef kern_return_t (*mach_call_t)(void *);
592
593/* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
594typedef void mach_munge_t(void *);
595
596typedef struct {
597 int mach_trap_arg_count;
598 kern_return_t (*mach_trap_function)(void *);
599#if defined(__arm64__) || defined(__x86_64__)
600 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
601#endif
602 int mach_trap_u32_words;
603#if MACH_ASSERT
604 const char* mach_trap_name;
605#endif /* MACH_ASSERT */
606} mach_trap_t;
607
608extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
609extern int mach_trap_count;
610
611extern const char *mach_syscall_name_table[];
612
613/* XXX From osfmk/i386/bsd_i386.c */
614struct mach_call_args {
615 syscall_arg_t arg1;
616 syscall_arg_t arg2;
617 syscall_arg_t arg3;
618 syscall_arg_t arg4;
619 syscall_arg_t arg5;
620 syscall_arg_t arg6;
621 syscall_arg_t arg7;
622 syscall_arg_t arg8;
623 syscall_arg_t arg9;
624};
625
626#undef NSYSCALL
627#define NSYSCALL mach_trap_count
628
629#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
630#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
631#endif
632
633typedef struct machtrace_sysent {
634 dtrace_id_t stsy_entry;
635 dtrace_id_t stsy_return;
636 kern_return_t (*stsy_underlying)(void *);
637 int32_t stsy_return_type;
638} machtrace_sysent_t;
639
640static machtrace_sysent_t *machtrace_sysent = NULL;
641
642void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
643 uint64_t, uint64_t, uint64_t);
644
645static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
646
647static dtrace_provider_id_t machtrace_id;
648
649static kern_return_t
650dtrace_machtrace_syscall(struct mach_call_args *args)
651{
652 int code; /* The mach call number */
653
654 machtrace_sysent_t *sy;
655 dtrace_id_t id;
656 kern_return_t rval;
657#if 0 /* XXX */
658 proc_t *p;
659#endif
660 syscall_arg_t *ip = (syscall_arg_t *)args;
661 mach_call_t mach_call;
662
663#if defined (__x86_64__)
664 {
665 pal_register_cache_state(current_thread(), VALID);
666 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
667
668 if (is_saved_state64(tagged_regs)) {
669 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
670 } else {
671 code = -saved_state32(tagged_regs)->eax;
672 }
673 }
674#elif defined(__arm__)
675 {
676 /* r12 has the machcall number, but it is -ve */
677 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
678 code = (int)arm_regs->r[12];
679 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
680 code = -code;
681 }
682#elif defined(__arm64__)
683 {
684 /* From arm/thread_status.h:get_saved_state_svc_number */
685 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
686 if (is_saved_state32(arm_regs)) {
687 code = (int)saved_state32(arm_regs)->r[12];
688 } else {
689 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
690 }
691
692 /* From bsd/arm64.c:mach_syscall */
693 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
694 code = -code;
695 }
696#else
697#error Unknown Architecture
698#endif
699
700 sy = &machtrace_sysent[code];
701
702 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
703 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
704
705 if (uthread)
706 uthread->t_dtrace_syscall_args = (void *)ip;
707
708 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
709
710 if (uthread)
711 uthread->t_dtrace_syscall_args = (void *)0;
712 }
713
714#if 0 /* XXX */
715 /*
716 * APPLE NOTE: Not implemented.
717 * We want to explicitly allow DTrace consumers to stop a process
718 * before it actually executes the meat of the syscall.
719 */
720 p = ttoproc(curthread);
721 mutex_enter(&p->p_lock);
722 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
723 curthread->t_dtrace_stop = 0;
724 stop(PR_REQUESTED, 0);
725 }
726 mutex_exit(&p->p_lock);
727#endif
728
729 mach_call = (mach_call_t)(*sy->stsy_underlying);
730 rval = mach_call(args);
731
732 if ((id = sy->stsy_return) != DTRACE_IDNONE)
733 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
734
735 return (rval);
736}
737
738static void
739machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
740{
741 machtrace_sysent_t *msysent = *interposed;
742 int i;
743
744 if (msysent == NULL) {
745 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
746 NSYSCALL, KM_SLEEP);
747 }
748
749 for (i = 0; i < NSYSCALL; i++) {
750 const mach_trap_t *a = &actual[i];
751 machtrace_sysent_t *s = &msysent[i];
752
753 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
754 continue;
755
756 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
757 continue;
758
759 s->stsy_underlying = a->mach_trap_function;
760 }
761}
762
763/*ARGSUSED*/
764static void
765machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
766{
767#pragma unused(arg) /* __APPLE__ */
768
769 int i;
770
771 if (desc != NULL)
772 return;
773
774 machtrace_init(mach_trap_table, &machtrace_sysent);
775
776 for (i = 0; i < NSYSCALL; i++) {
777
778 if (machtrace_sysent[i].stsy_underlying == NULL)
779 continue;
780
781 if (dtrace_probe_lookup(machtrace_id, NULL,
782 mach_syscall_name_table[i], "entry") != 0)
783 continue;
784
785 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
786 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
787 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
788 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
789 "return", MACHTRACE_ARTIFICIAL_FRAMES,
790 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
791
792 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
793 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
794 }
795}
796
797/*ARGSUSED*/
798static void
799machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
800{
801#pragma unused(arg,id) /* __APPLE__ */
802 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
803
804#pragma unused(sysnum) /* __APPLE__ */
805
806 /*
807 * There's nothing to do here but assert that we have actually been
808 * disabled.
809 */
810 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
811 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
812 } else {
813 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
814 }
815}
816
817/*ARGSUSED*/
818static int
819machtrace_enable(void *arg, dtrace_id_t id, void *parg)
820{
821#pragma unused(arg) /* __APPLE__ */
822
823 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
824 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
825 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
826
827 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
828 machtrace_sysent[sysnum].stsy_entry = id;
829 } else {
830 machtrace_sysent[sysnum].stsy_return = id;
831 }
832
833 if (enabled) {
834 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
835 return(0);
836 }
837
838 lck_mtx_lock(&dtrace_systrace_lock);
839
840 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
841 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
842 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
843 }
844
845 lck_mtx_unlock(&dtrace_systrace_lock);
846
847 return(0);
848}
849
850/*ARGSUSED*/
851static void
852machtrace_disable(void *arg, dtrace_id_t id, void *parg)
853{
854#pragma unused(arg,id) /* __APPLE__ */
855
856 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
857 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
858 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
859
860 if (disable) {
861
862 lck_mtx_lock(&dtrace_systrace_lock);
863
864 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
865 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
866 }
867 lck_mtx_unlock(&dtrace_systrace_lock);
868 }
869
870 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
871 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
872 } else {
873 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
874 }
875}
876
877static dtrace_pattr_t machtrace_attr = {
878{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
879{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
880{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
881{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
882{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
883};
884
885static dtrace_pops_t machtrace_pops = {
886 .dtps_provide = machtrace_provide,
887 .dtps_provide_module = NULL,
888 .dtps_enable = machtrace_enable,
889 .dtps_disable = machtrace_disable,
890 .dtps_suspend = NULL,
891 .dtps_resume = NULL,
892 .dtps_getargdesc = NULL,
893 .dtps_getargval = machtrace_getarg,
894 .dtps_usermode = NULL,
895 .dtps_destroy = machtrace_destroy
896};
897
898static int
899machtrace_attach(dev_info_t *devi)
900{
901 machtrace_probe = dtrace_probe;
902 membar_enter();
903
904 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
905 DDI_PSEUDO, 0) == DDI_FAILURE ||
906 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
907 &machtrace_pops, NULL, &machtrace_id) != 0) {
908 machtrace_probe = (void*)&systrace_stub;
909 ddi_remove_minor_node(devi, NULL);
910 return (DDI_FAILURE);
911 }
912
913 return (DDI_SUCCESS);
914}
915
916d_open_t _systrace_open;
917
918int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
919{
920#pragma unused(dev,flags,devtype,p)
921 return 0;
922}
923
924#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
925
926/*
927 * A struct describing which functions will get invoked for certain
928 * actions.
929 */
930static struct cdevsw systrace_cdevsw =
931{
932 _systrace_open, /* open */
933 eno_opcl, /* close */
934 eno_rdwrt, /* read */
935 eno_rdwrt, /* write */
936 eno_ioctl, /* ioctl */
937 (stop_fcn_t *)nulldev, /* stop */
938 (reset_fcn_t *)nulldev, /* reset */
939 NULL, /* tty's */
940 eno_select, /* select */
941 eno_mmap, /* mmap */
942 eno_strat, /* strategy */
943 eno_getc, /* getc */
944 eno_putc, /* putc */
945 0 /* type */
946};
947
948void systrace_init( void );
949
950void systrace_init( void )
951{
952 if (dtrace_sdt_probes_restricted()) {
953 return;
954 }
955
956 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
957
958 if (majdevno < 0) {
959 printf("systrace_init: failed to allocate a major number!\n");
960 return;
961 }
962
963 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
964 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
965}
966#undef SYSTRACE_MAJOR
967
968static uint64_t
969systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
970{
971#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
972 uint64_t val = 0;
973 uint64_t *uargs = NULL;
974
975 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
976
977 if (uthread)
978 uargs = uthread->t_dtrace_syscall_args;
979 if (!uargs)
980 return(0);
981 if (argno < 0 || argno >= SYSTRACE_NARGS)
982 return(0);
983
984 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
985 val = uargs[argno];
986 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
987 return (val);
988}
989
990static void
991systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
992 dtrace_argdesc_t *desc)
993{
994#pragma unused(arg, id)
995 int sysnum = SYSTRACE_SYSNUM(parg);
996 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
997 uint64_t *uargs = NULL;
998
999 if (!uthread) {
1000 desc->dtargd_ndx = DTRACE_ARGNONE;
1001 return;
1002 }
1003
1004 uargs = uthread->t_dtrace_syscall_args;
1005
1006 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1007 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1008 desc->dtargd_native, sizeof(desc->dtargd_native));
1009 }
1010 else {
1011 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1012 desc->dtargd_native, sizeof(desc->dtargd_native));
1013 }
1014
1015 if (desc->dtargd_native[0] == '\0')
1016 desc->dtargd_ndx = DTRACE_ARGNONE;
1017}
1018
1019static uint64_t
1020machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1021{
1022#pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1023 uint64_t val = 0;
1024 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1025
1026 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1027
1028 if (uthread)
1029 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1030
1031 if (!stack)
1032 return(0);
1033
1034 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1035 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1036 val = (uint64_t)*(stack+argno);
1037 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1038 return (val);
1039}
1040
1041