1 | /* |
2 | * CDDL HEADER START |
3 | * |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. |
7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
9 | * or http://www.opensolaris.org/os/licensing. |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. |
12 | * |
13 | * When distributing Covered Code, include this CDDL HEADER in each |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
15 | * If applicable, add the following below this CDDL HEADER, with the |
16 | * fields enclosed by brackets "[]" replaced with your own identifying |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
18 | * |
19 | * CDDL HEADER END |
20 | */ |
21 | /* |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
23 | * Use is subject to license terms. |
24 | */ |
25 | |
26 | /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ |
27 | |
28 | #ifdef KERNEL |
29 | #ifndef _KERNEL |
30 | #define _KERNEL /* Solaris vs. Darwin */ |
31 | #endif |
32 | #endif |
33 | |
34 | #include <kern/thread.h> |
35 | #include <mach/thread_status.h> |
36 | |
37 | /* XXX All of these should really be derived from syscall_sw.h */ |
38 | #if defined (__x86_64__) |
39 | #define SYSCALL_CLASS_SHIFT 24 |
40 | #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) |
41 | #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) |
42 | #define I386_SYSCALL_NUMBER_MASK (0xFFFF) |
43 | #endif |
44 | |
45 | #include <sys/param.h> |
46 | #include <sys/systm.h> |
47 | #include <sys/proc.h> |
48 | #include <sys/errno.h> |
49 | #include <sys/ioctl.h> |
50 | #include <sys/conf.h> |
51 | #include <sys/fcntl.h> |
52 | #include <sys/syscall.h> |
53 | #include <miscfs/devfs/devfs.h> |
54 | |
55 | #include <sys/dtrace.h> |
56 | #include <sys/dtrace_impl.h> |
57 | #include <sys/systrace_args.h> |
58 | #include "systrace.h" |
59 | #include <sys/stat.h> |
60 | #include <sys/systm.h> |
61 | #include <sys/conf.h> |
62 | #include <sys/user.h> |
63 | |
64 | #include <machine/pal_routines.h> |
65 | |
66 | #if defined (__x86_64__) |
67 | #define SYSTRACE_ARTIFICIAL_FRAMES 2 |
68 | #define MACHTRACE_ARTIFICIAL_FRAMES 3 |
69 | #elif defined(__arm__) || defined(__arm64__) |
70 | #define SYSTRACE_ARTIFICIAL_FRAMES 2 |
71 | #define MACHTRACE_ARTIFICIAL_FRAMES 3 |
72 | #else |
73 | #error Unknown Architecture |
74 | #endif |
75 | |
76 | #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0])) |
77 | |
78 | #include <sys/sysent.h> |
79 | #define sy_callc sy_call /* Map Solaris slot name to Darwin's */ |
80 | #define NSYSCALL nsysent /* and is less than 500 or so */ |
81 | |
82 | extern const char *syscallnames[]; |
83 | |
84 | #include <sys/dtrace_glue.h> |
85 | #define casptr dtrace_casptr |
86 | #define membar_enter dtrace_membar_producer |
87 | |
88 | #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */ |
89 | #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */ |
90 | |
91 | extern lck_attr_t* dtrace_lck_attr; |
92 | extern lck_grp_t* dtrace_lck_grp; |
93 | static lck_mtx_t dtrace_systrace_lock; /* probe state lock */ |
94 | |
95 | systrace_sysent_t *systrace_sysent = NULL; |
96 | void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); |
97 | |
98 | static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int); |
99 | static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); |
100 | |
101 | void |
102 | systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, |
103 | uint64_t arg2, uint64_t arg3, uint64_t arg4) |
104 | { |
105 | #pragma unused(id,arg0,arg1,arg2,arg3,arg4) |
106 | } |
107 | |
108 | int32_t |
109 | dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) |
110 | { |
111 | unsigned short code; /* The system call number */ |
112 | |
113 | systrace_sysent_t *sy; |
114 | dtrace_id_t id; |
115 | int32_t rval; |
116 | syscall_arg_t *ip = (syscall_arg_t *)uap; |
117 | uint64_t uargs[SYSTRACE_NARGS] = {0}; |
118 | |
119 | #if defined (__x86_64__) |
120 | { |
121 | pal_register_cache_state(current_thread(), VALID); |
122 | x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); |
123 | |
124 | if (is_saved_state64(tagged_regs)) { |
125 | x86_saved_state64_t *regs = saved_state64(tagged_regs); |
126 | code = regs->rax & SYSCALL_NUMBER_MASK; |
127 | /* |
128 | * Check for indirect system call... system call number |
129 | * passed as 'arg0' |
130 | */ |
131 | if (code == 0) { |
132 | code = regs->rdi; |
133 | } |
134 | } else { |
135 | code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; |
136 | |
137 | if (code == 0) { |
138 | vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); |
139 | code = fuword(params); |
140 | } |
141 | } |
142 | } |
143 | #elif defined(__arm__) |
144 | { |
145 | /* |
146 | * On arm, syscall numbers depend on a flavor (indirect or not) |
147 | * and can be in either r0 or r12 (always u32) |
148 | */ |
149 | |
150 | /* See bsd/dev/arm/systemcalls.c:arm_get_syscall_number */ |
151 | arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread()); |
152 | |
153 | /* Check for indirect system call */ |
154 | if (arm_regs->r[12] != 0) |
155 | code = arm_regs->r[12]; |
156 | else |
157 | code = arm_regs->r[0]; |
158 | } |
159 | #elif defined(__arm64__) |
160 | { |
161 | /* |
162 | * On arm64, syscall numbers depend on a flavor (indirect or not) |
163 | * ... and for u32 can be in either r0 or r12 |
164 | * ... and for u64 can be in either x0 or x16 |
165 | */ |
166 | |
167 | /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */ |
168 | arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread()); |
169 | |
170 | if (is_saved_state32(arm_regs)) { |
171 | /* Check for indirect system call */ |
172 | if (saved_state32(arm_regs)->r[12] != 0) { |
173 | code = saved_state32(arm_regs)->r[12]; |
174 | } |
175 | else { |
176 | code = saved_state32(arm_regs)->r[0]; |
177 | } |
178 | } else { |
179 | /* Check for indirect system call */ |
180 | if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0 ) { |
181 | code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM]; |
182 | } |
183 | else { |
184 | code = saved_state64(arm_regs)->x[0]; |
185 | } |
186 | } |
187 | } |
188 | #else |
189 | #error Unknown Architecture |
190 | #endif |
191 | |
192 | // Bounds "check" the value of code a la unix_syscall |
193 | sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code]; |
194 | |
195 | systrace_args(code, ip, uargs); |
196 | |
197 | if ((id = sy->stsy_entry) != DTRACE_IDNONE) { |
198 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
199 | if (uthread) |
200 | uthread->t_dtrace_syscall_args = uargs; |
201 | |
202 | static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments" ); |
203 | (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); |
204 | |
205 | if (uthread) |
206 | uthread->t_dtrace_syscall_args = NULL; |
207 | } |
208 | |
209 | |
210 | |
211 | #if 0 /* XXX */ |
212 | /* |
213 | * APPLE NOTE: Not implemented. |
214 | * We want to explicitly allow DTrace consumers to stop a process |
215 | * before it actually executes the meat of the syscall. |
216 | */ |
217 | p = ttoproc(curthread); |
218 | mutex_enter(&p->p_lock); |
219 | if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { |
220 | curthread->t_dtrace_stop = 0; |
221 | stop(PR_REQUESTED, 0); |
222 | } |
223 | mutex_exit(&p->p_lock); |
224 | #endif |
225 | |
226 | rval = (*sy->stsy_underlying)(pp, uap, rv); |
227 | |
228 | if ((id = sy->stsy_return) != DTRACE_IDNONE) { |
229 | uint64_t munged_rv0, munged_rv1; |
230 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
231 | |
232 | if (uthread) |
233 | uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ |
234 | |
235 | /* |
236 | * "Decode" rv for use in the call to dtrace_probe() |
237 | */ |
238 | if (rval == ERESTART) { |
239 | munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ |
240 | munged_rv1 = -1LL; |
241 | } else if (rval != EJUSTRETURN) { |
242 | if (rval) { |
243 | munged_rv0 = -1LL; /* Mimic what libc will do. */ |
244 | munged_rv1 = -1LL; |
245 | } else { |
246 | switch (sy->stsy_return_type) { |
247 | case _SYSCALL_RET_INT_T: |
248 | munged_rv0 = rv[0]; |
249 | munged_rv1 = rv[1]; |
250 | break; |
251 | case _SYSCALL_RET_UINT_T: |
252 | munged_rv0 = ((u_int)rv[0]); |
253 | munged_rv1 = ((u_int)rv[1]); |
254 | break; |
255 | case _SYSCALL_RET_OFF_T: |
256 | case _SYSCALL_RET_UINT64_T: |
257 | munged_rv0 = *(u_int64_t *)rv; |
258 | munged_rv1 = 0LL; |
259 | break; |
260 | case _SYSCALL_RET_ADDR_T: |
261 | case _SYSCALL_RET_SIZE_T: |
262 | case _SYSCALL_RET_SSIZE_T: |
263 | munged_rv0 = *(user_addr_t *)rv; |
264 | munged_rv1 = 0LL; |
265 | break; |
266 | case _SYSCALL_RET_NONE: |
267 | munged_rv0 = 0LL; |
268 | munged_rv1 = 0LL; |
269 | break; |
270 | default: |
271 | munged_rv0 = 0LL; |
272 | munged_rv1 = 0LL; |
273 | break; |
274 | } |
275 | } |
276 | } else { |
277 | munged_rv0 = 0LL; |
278 | munged_rv1 = 0LL; |
279 | } |
280 | |
281 | /* |
282 | * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: |
283 | * |
284 | * "This is a bit of an historical artifact. At first, the syscall provider just |
285 | * had its return value in arg0, and the fbt and pid providers had their return |
286 | * values in arg1 (so that we could use arg0 for the offset of the return site). |
287 | * |
288 | * We inevitably started writing scripts where we wanted to see the return |
289 | * values from probes in all three providers, and we made this script easier |
290 | * to write by replicating the syscall return values in arg1 to match fbt and |
291 | * pid. We debated briefly about removing the return value from arg0, but |
292 | * decided that it would be less confusing to have the same data in two places |
293 | * than to have some non-helpful, non-intuitive value in arg0. |
294 | * |
295 | * This change was made 4/23/2003 according to the DTrace project's putback log." |
296 | */ |
297 | (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); |
298 | } |
299 | |
300 | return (rval); |
301 | } |
302 | |
303 | void |
304 | dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) |
305 | { |
306 | systrace_sysent_t *sy; |
307 | dtrace_id_t id; |
308 | |
309 | // Bounds "check" the value of code a la unix_syscall_return |
310 | sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code]; |
311 | |
312 | if ((id = sy->stsy_return) != DTRACE_IDNONE) { |
313 | uint64_t munged_rv0, munged_rv1; |
314 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
315 | |
316 | if (uthread) |
317 | uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ |
318 | |
319 | /* |
320 | * "Decode" rv for use in the call to dtrace_probe() |
321 | */ |
322 | if (rval == ERESTART) { |
323 | munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ |
324 | munged_rv1 = -1LL; |
325 | } else if (rval != EJUSTRETURN) { |
326 | if (rval) { |
327 | munged_rv0 = -1LL; /* Mimic what libc will do. */ |
328 | munged_rv1 = -1LL; |
329 | } else { |
330 | switch (sy->stsy_return_type) { |
331 | case _SYSCALL_RET_INT_T: |
332 | munged_rv0 = rv[0]; |
333 | munged_rv1 = rv[1]; |
334 | break; |
335 | case _SYSCALL_RET_UINT_T: |
336 | munged_rv0 = ((u_int)rv[0]); |
337 | munged_rv1 = ((u_int)rv[1]); |
338 | break; |
339 | case _SYSCALL_RET_OFF_T: |
340 | case _SYSCALL_RET_UINT64_T: |
341 | munged_rv0 = *(u_int64_t *)rv; |
342 | munged_rv1 = 0LL; |
343 | break; |
344 | case _SYSCALL_RET_ADDR_T: |
345 | case _SYSCALL_RET_SIZE_T: |
346 | case _SYSCALL_RET_SSIZE_T: |
347 | munged_rv0 = *(user_addr_t *)rv; |
348 | munged_rv1 = 0LL; |
349 | break; |
350 | case _SYSCALL_RET_NONE: |
351 | munged_rv0 = 0LL; |
352 | munged_rv1 = 0LL; |
353 | break; |
354 | default: |
355 | munged_rv0 = 0LL; |
356 | munged_rv1 = 0LL; |
357 | break; |
358 | } |
359 | } |
360 | } else { |
361 | munged_rv0 = 0LL; |
362 | munged_rv1 = 0LL; |
363 | } |
364 | |
365 | (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); |
366 | } |
367 | } |
368 | |
369 | #define SYSTRACE_SHIFT 16 |
370 | #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) |
371 | #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) |
372 | #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) |
373 | #define SYSTRACE_RETURN(id) (id) |
374 | |
375 | #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) |
376 | #error 1 << SYSTRACE_SHIFT must exceed number of system calls |
377 | #endif |
378 | |
379 | static dtrace_provider_id_t systrace_id; |
380 | |
381 | /* |
382 | * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol. |
383 | * See balanced undef below. |
384 | */ |
385 | #define systrace_init _systrace_init |
386 | |
387 | static void |
388 | systrace_init(struct sysent *actual, systrace_sysent_t **interposed) |
389 | { |
390 | |
391 | systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning |
392 | from bsd/sys/sysent.h */ |
393 | unsigned int i; |
394 | |
395 | if (ssysent == NULL) { |
396 | *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * |
397 | NSYSCALL, KM_SLEEP); |
398 | } |
399 | |
400 | for (i = 0; i < NSYSCALL; i++) { |
401 | struct sysent *a = &actual[i]; |
402 | systrace_sysent_t *s = &ssysent[i]; |
403 | |
404 | if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) |
405 | continue; |
406 | |
407 | if (a->sy_callc == dtrace_systrace_syscall) |
408 | continue; |
409 | |
410 | s->stsy_underlying = a->sy_callc; |
411 | s->stsy_return_type = a->sy_return_type; |
412 | } |
413 | lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr); |
414 | } |
415 | |
416 | |
417 | /*ARGSUSED*/ |
418 | static void |
419 | systrace_provide(void *arg, const dtrace_probedesc_t *desc) |
420 | { |
421 | #pragma unused(arg) /* __APPLE__ */ |
422 | unsigned int i; |
423 | |
424 | if (desc != NULL) |
425 | return; |
426 | |
427 | systrace_init(sysent, &systrace_sysent); |
428 | |
429 | for (i = 0; i < NSYSCALL; i++) { |
430 | if (systrace_sysent[i].stsy_underlying == NULL) |
431 | continue; |
432 | |
433 | if (dtrace_probe_lookup(systrace_id, NULL, |
434 | syscallnames[i], "entry" ) != 0) |
435 | continue; |
436 | |
437 | (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], |
438 | "entry" , SYSTRACE_ARTIFICIAL_FRAMES, |
439 | (void *)((uintptr_t)SYSTRACE_ENTRY(i))); |
440 | (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], |
441 | "return" , SYSTRACE_ARTIFICIAL_FRAMES, |
442 | (void *)((uintptr_t)SYSTRACE_RETURN(i))); |
443 | |
444 | systrace_sysent[i].stsy_entry = DTRACE_IDNONE; |
445 | systrace_sysent[i].stsy_return = DTRACE_IDNONE; |
446 | } |
447 | } |
448 | #undef systrace_init |
449 | |
450 | /*ARGSUSED*/ |
451 | static void |
452 | systrace_destroy(void *arg, dtrace_id_t id, void *parg) |
453 | { |
454 | #pragma unused(arg,id) /* __APPLE__ */ |
455 | |
456 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
457 | |
458 | #pragma unused(sysnum) /* __APPLE__ */ |
459 | /* |
460 | * There's nothing to do here but assert that we have actually been |
461 | * disabled. |
462 | */ |
463 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
464 | ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); |
465 | } else { |
466 | ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); |
467 | } |
468 | } |
469 | |
470 | /*ARGSUSED*/ |
471 | static int |
472 | systrace_enable(void *arg, dtrace_id_t id, void *parg) |
473 | { |
474 | #pragma unused(arg) /* __APPLE__ */ |
475 | |
476 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
477 | int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || |
478 | systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); |
479 | |
480 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
481 | systrace_sysent[sysnum].stsy_entry = id; |
482 | } else { |
483 | systrace_sysent[sysnum].stsy_return = id; |
484 | } |
485 | |
486 | if (enabled) { |
487 | ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); |
488 | return(0); |
489 | } |
490 | |
491 | lck_mtx_lock(&dtrace_systrace_lock); |
492 | if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) { |
493 | vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall; |
494 | ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t)); |
495 | } |
496 | lck_mtx_unlock(&dtrace_systrace_lock); |
497 | return (0); |
498 | } |
499 | |
500 | /*ARGSUSED*/ |
501 | static void |
502 | systrace_disable(void *arg, dtrace_id_t id, void *parg) |
503 | { |
504 | #pragma unused(arg,id) /* __APPLE__ */ |
505 | |
506 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
507 | int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || |
508 | systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); |
509 | |
510 | if (disable) { |
511 | lck_mtx_lock(&dtrace_systrace_lock); |
512 | if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) |
513 | ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying)); |
514 | lck_mtx_unlock(&dtrace_systrace_lock); |
515 | |
516 | } |
517 | |
518 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
519 | systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; |
520 | } else { |
521 | systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; |
522 | } |
523 | } |
524 | |
525 | static dtrace_pattr_t systrace_attr = { |
526 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, |
527 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
528 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, |
529 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, |
530 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, |
531 | }; |
532 | |
533 | static dtrace_pops_t systrace_pops = { |
534 | .dtps_provide = systrace_provide, |
535 | .dtps_provide_module = NULL, |
536 | .dtps_enable = systrace_enable, |
537 | .dtps_disable = systrace_disable, |
538 | .dtps_suspend = NULL, |
539 | .dtps_resume = NULL, |
540 | .dtps_getargdesc = systrace_getargdesc, |
541 | .dtps_getargval = systrace_getargval, |
542 | .dtps_usermode = NULL, |
543 | .dtps_destroy = systrace_destroy |
544 | }; |
545 | |
546 | static int |
547 | systrace_attach(dev_info_t *devi) |
548 | { |
549 | systrace_probe = (void*)&dtrace_probe; |
550 | membar_enter(); |
551 | |
552 | if (ddi_create_minor_node(devi, "systrace" , S_IFCHR, 0, |
553 | DDI_PSEUDO, 0) == DDI_FAILURE || |
554 | dtrace_register("syscall" , &systrace_attr, DTRACE_PRIV_USER, NULL, |
555 | &systrace_pops, NULL, &systrace_id) != 0) { |
556 | systrace_probe = systrace_stub; |
557 | ddi_remove_minor_node(devi, NULL); |
558 | return (DDI_FAILURE); |
559 | } |
560 | |
561 | return (DDI_SUCCESS); |
562 | } |
563 | |
564 | |
565 | /* |
566 | * APPLE NOTE: systrace_detach not implemented |
567 | */ |
568 | #if !defined(__APPLE__) |
569 | static int |
570 | systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) |
571 | { |
572 | switch (cmd) { |
573 | case DDI_DETACH: |
574 | break; |
575 | case DDI_SUSPEND: |
576 | return (DDI_SUCCESS); |
577 | default: |
578 | return (DDI_FAILURE); |
579 | } |
580 | |
581 | if (dtrace_unregister(systrace_id) != 0) |
582 | return (DDI_FAILURE); |
583 | |
584 | ddi_remove_minor_node(devi, NULL); |
585 | systrace_probe = systrace_stub; |
586 | return (DDI_SUCCESS); |
587 | } |
588 | #endif /* __APPLE__ */ |
589 | |
590 | |
591 | typedef kern_return_t (*mach_call_t)(void *); |
592 | |
593 | /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */ |
594 | typedef void mach_munge_t(void *); |
595 | |
596 | typedef struct { |
597 | int mach_trap_arg_count; |
598 | kern_return_t (*mach_trap_function)(void *); |
599 | #if defined(__arm64__) || defined(__x86_64__) |
600 | mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ |
601 | #endif |
602 | int mach_trap_u32_words; |
603 | #if MACH_ASSERT |
604 | const char* mach_trap_name; |
605 | #endif /* MACH_ASSERT */ |
606 | } mach_trap_t; |
607 | |
608 | extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */ |
609 | extern int mach_trap_count; |
610 | |
611 | extern const char *mach_syscall_name_table[]; |
612 | |
613 | /* XXX From osfmk/i386/bsd_i386.c */ |
614 | struct mach_call_args { |
615 | syscall_arg_t arg1; |
616 | syscall_arg_t arg2; |
617 | syscall_arg_t arg3; |
618 | syscall_arg_t arg4; |
619 | syscall_arg_t arg5; |
620 | syscall_arg_t arg6; |
621 | syscall_arg_t arg7; |
622 | syscall_arg_t arg8; |
623 | syscall_arg_t arg9; |
624 | }; |
625 | |
626 | #undef NSYSCALL |
627 | #define NSYSCALL mach_trap_count |
628 | |
629 | #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) |
630 | #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps |
631 | #endif |
632 | |
633 | typedef struct machtrace_sysent { |
634 | dtrace_id_t stsy_entry; |
635 | dtrace_id_t stsy_return; |
636 | kern_return_t (*stsy_underlying)(void *); |
637 | int32_t stsy_return_type; |
638 | } machtrace_sysent_t; |
639 | |
640 | static machtrace_sysent_t *machtrace_sysent = NULL; |
641 | |
642 | void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t, |
643 | uint64_t, uint64_t, uint64_t); |
644 | |
645 | static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int); |
646 | |
647 | static dtrace_provider_id_t machtrace_id; |
648 | |
649 | static kern_return_t |
650 | dtrace_machtrace_syscall(struct mach_call_args *args) |
651 | { |
652 | int code; /* The mach call number */ |
653 | |
654 | machtrace_sysent_t *sy; |
655 | dtrace_id_t id; |
656 | kern_return_t rval; |
657 | #if 0 /* XXX */ |
658 | proc_t *p; |
659 | #endif |
660 | syscall_arg_t *ip = (syscall_arg_t *)args; |
661 | mach_call_t mach_call; |
662 | |
663 | #if defined (__x86_64__) |
664 | { |
665 | pal_register_cache_state(current_thread(), VALID); |
666 | x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); |
667 | |
668 | if (is_saved_state64(tagged_regs)) { |
669 | code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; |
670 | } else { |
671 | code = -saved_state32(tagged_regs)->eax; |
672 | } |
673 | } |
674 | #elif defined(__arm__) |
675 | { |
676 | /* r12 has the machcall number, but it is -ve */ |
677 | arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread()); |
678 | code = (int)arm_regs->r[12]; |
679 | ASSERT(code < 0); /* Otherwise it would be a Unix syscall */ |
680 | code = -code; |
681 | } |
682 | #elif defined(__arm64__) |
683 | { |
684 | /* From arm/thread_status.h:get_saved_state_svc_number */ |
685 | arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread()); |
686 | if (is_saved_state32(arm_regs)) { |
687 | code = (int)saved_state32(arm_regs)->r[12]; |
688 | } else { |
689 | code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM]; |
690 | } |
691 | |
692 | /* From bsd/arm64.c:mach_syscall */ |
693 | ASSERT(code < 0); /* Otherwise it would be a Unix syscall */ |
694 | code = -code; |
695 | } |
696 | #else |
697 | #error Unknown Architecture |
698 | #endif |
699 | |
700 | sy = &machtrace_sysent[code]; |
701 | |
702 | if ((id = sy->stsy_entry) != DTRACE_IDNONE) { |
703 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
704 | |
705 | if (uthread) |
706 | uthread->t_dtrace_syscall_args = (void *)ip; |
707 | |
708 | (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); |
709 | |
710 | if (uthread) |
711 | uthread->t_dtrace_syscall_args = (void *)0; |
712 | } |
713 | |
714 | #if 0 /* XXX */ |
715 | /* |
716 | * APPLE NOTE: Not implemented. |
717 | * We want to explicitly allow DTrace consumers to stop a process |
718 | * before it actually executes the meat of the syscall. |
719 | */ |
720 | p = ttoproc(curthread); |
721 | mutex_enter(&p->p_lock); |
722 | if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { |
723 | curthread->t_dtrace_stop = 0; |
724 | stop(PR_REQUESTED, 0); |
725 | } |
726 | mutex_exit(&p->p_lock); |
727 | #endif |
728 | |
729 | mach_call = (mach_call_t)(*sy->stsy_underlying); |
730 | rval = mach_call(args); |
731 | |
732 | if ((id = sy->stsy_return) != DTRACE_IDNONE) |
733 | (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); |
734 | |
735 | return (rval); |
736 | } |
737 | |
738 | static void |
739 | machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed) |
740 | { |
741 | machtrace_sysent_t *msysent = *interposed; |
742 | int i; |
743 | |
744 | if (msysent == NULL) { |
745 | *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) * |
746 | NSYSCALL, KM_SLEEP); |
747 | } |
748 | |
749 | for (i = 0; i < NSYSCALL; i++) { |
750 | const mach_trap_t *a = &actual[i]; |
751 | machtrace_sysent_t *s = &msysent[i]; |
752 | |
753 | if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) |
754 | continue; |
755 | |
756 | if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) |
757 | continue; |
758 | |
759 | s->stsy_underlying = a->mach_trap_function; |
760 | } |
761 | } |
762 | |
763 | /*ARGSUSED*/ |
764 | static void |
765 | machtrace_provide(void *arg, const dtrace_probedesc_t *desc) |
766 | { |
767 | #pragma unused(arg) /* __APPLE__ */ |
768 | |
769 | int i; |
770 | |
771 | if (desc != NULL) |
772 | return; |
773 | |
774 | machtrace_init(mach_trap_table, &machtrace_sysent); |
775 | |
776 | for (i = 0; i < NSYSCALL; i++) { |
777 | |
778 | if (machtrace_sysent[i].stsy_underlying == NULL) |
779 | continue; |
780 | |
781 | if (dtrace_probe_lookup(machtrace_id, NULL, |
782 | mach_syscall_name_table[i], "entry" ) != 0) |
783 | continue; |
784 | |
785 | (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], |
786 | "entry" , MACHTRACE_ARTIFICIAL_FRAMES, |
787 | (void *)((uintptr_t)SYSTRACE_ENTRY(i))); |
788 | (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], |
789 | "return" , MACHTRACE_ARTIFICIAL_FRAMES, |
790 | (void *)((uintptr_t)SYSTRACE_RETURN(i))); |
791 | |
792 | machtrace_sysent[i].stsy_entry = DTRACE_IDNONE; |
793 | machtrace_sysent[i].stsy_return = DTRACE_IDNONE; |
794 | } |
795 | } |
796 | |
797 | /*ARGSUSED*/ |
798 | static void |
799 | machtrace_destroy(void *arg, dtrace_id_t id, void *parg) |
800 | { |
801 | #pragma unused(arg,id) /* __APPLE__ */ |
802 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
803 | |
804 | #pragma unused(sysnum) /* __APPLE__ */ |
805 | |
806 | /* |
807 | * There's nothing to do here but assert that we have actually been |
808 | * disabled. |
809 | */ |
810 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
811 | ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); |
812 | } else { |
813 | ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); |
814 | } |
815 | } |
816 | |
817 | /*ARGSUSED*/ |
818 | static int |
819 | machtrace_enable(void *arg, dtrace_id_t id, void *parg) |
820 | { |
821 | #pragma unused(arg) /* __APPLE__ */ |
822 | |
823 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
824 | int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || |
825 | machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); |
826 | |
827 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
828 | machtrace_sysent[sysnum].stsy_entry = id; |
829 | } else { |
830 | machtrace_sysent[sysnum].stsy_return = id; |
831 | } |
832 | |
833 | if (enabled) { |
834 | ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall); |
835 | return(0); |
836 | } |
837 | |
838 | lck_mtx_lock(&dtrace_systrace_lock); |
839 | |
840 | if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) { |
841 | vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall; |
842 | ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); |
843 | } |
844 | |
845 | lck_mtx_unlock(&dtrace_systrace_lock); |
846 | |
847 | return(0); |
848 | } |
849 | |
850 | /*ARGSUSED*/ |
851 | static void |
852 | machtrace_disable(void *arg, dtrace_id_t id, void *parg) |
853 | { |
854 | #pragma unused(arg,id) /* __APPLE__ */ |
855 | |
856 | int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); |
857 | int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || |
858 | machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); |
859 | |
860 | if (disable) { |
861 | |
862 | lck_mtx_lock(&dtrace_systrace_lock); |
863 | |
864 | if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) { |
865 | ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); |
866 | } |
867 | lck_mtx_unlock(&dtrace_systrace_lock); |
868 | } |
869 | |
870 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
871 | machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; |
872 | } else { |
873 | machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; |
874 | } |
875 | } |
876 | |
877 | static dtrace_pattr_t machtrace_attr = { |
878 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, |
879 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
880 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, |
881 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, |
882 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, |
883 | }; |
884 | |
885 | static dtrace_pops_t machtrace_pops = { |
886 | .dtps_provide = machtrace_provide, |
887 | .dtps_provide_module = NULL, |
888 | .dtps_enable = machtrace_enable, |
889 | .dtps_disable = machtrace_disable, |
890 | .dtps_suspend = NULL, |
891 | .dtps_resume = NULL, |
892 | .dtps_getargdesc = NULL, |
893 | .dtps_getargval = machtrace_getarg, |
894 | .dtps_usermode = NULL, |
895 | .dtps_destroy = machtrace_destroy |
896 | }; |
897 | |
898 | static int |
899 | machtrace_attach(dev_info_t *devi) |
900 | { |
901 | machtrace_probe = dtrace_probe; |
902 | membar_enter(); |
903 | |
904 | if (ddi_create_minor_node(devi, "machtrace" , S_IFCHR, 0, |
905 | DDI_PSEUDO, 0) == DDI_FAILURE || |
906 | dtrace_register("mach_trap" , &machtrace_attr, DTRACE_PRIV_USER, NULL, |
907 | &machtrace_pops, NULL, &machtrace_id) != 0) { |
908 | machtrace_probe = (void*)&systrace_stub; |
909 | ddi_remove_minor_node(devi, NULL); |
910 | return (DDI_FAILURE); |
911 | } |
912 | |
913 | return (DDI_SUCCESS); |
914 | } |
915 | |
916 | d_open_t _systrace_open; |
917 | |
918 | int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p) |
919 | { |
920 | #pragma unused(dev,flags,devtype,p) |
921 | return 0; |
922 | } |
923 | |
924 | #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */ |
925 | |
926 | /* |
927 | * A struct describing which functions will get invoked for certain |
928 | * actions. |
929 | */ |
930 | static struct cdevsw systrace_cdevsw = |
931 | { |
932 | _systrace_open, /* open */ |
933 | eno_opcl, /* close */ |
934 | eno_rdwrt, /* read */ |
935 | eno_rdwrt, /* write */ |
936 | eno_ioctl, /* ioctl */ |
937 | (stop_fcn_t *)nulldev, /* stop */ |
938 | (reset_fcn_t *)nulldev, /* reset */ |
939 | NULL, /* tty's */ |
940 | eno_select, /* select */ |
941 | eno_mmap, /* mmap */ |
942 | eno_strat, /* strategy */ |
943 | eno_getc, /* getc */ |
944 | eno_putc, /* putc */ |
945 | 0 /* type */ |
946 | }; |
947 | |
948 | void systrace_init( void ); |
949 | |
950 | void systrace_init( void ) |
951 | { |
952 | if (dtrace_sdt_probes_restricted()) { |
953 | return; |
954 | } |
955 | |
956 | int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); |
957 | |
958 | if (majdevno < 0) { |
959 | printf("systrace_init: failed to allocate a major number!\n" ); |
960 | return; |
961 | } |
962 | |
963 | systrace_attach((dev_info_t*)(uintptr_t)majdevno); |
964 | machtrace_attach((dev_info_t*)(uintptr_t)majdevno); |
965 | } |
966 | #undef SYSTRACE_MAJOR |
967 | |
968 | static uint64_t |
969 | systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) |
970 | { |
971 | #pragma unused(arg,id,parg,aframes) /* __APPLE__ */ |
972 | uint64_t val = 0; |
973 | uint64_t *uargs = NULL; |
974 | |
975 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
976 | |
977 | if (uthread) |
978 | uargs = uthread->t_dtrace_syscall_args; |
979 | if (!uargs) |
980 | return(0); |
981 | if (argno < 0 || argno >= SYSTRACE_NARGS) |
982 | return(0); |
983 | |
984 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); |
985 | val = uargs[argno]; |
986 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); |
987 | return (val); |
988 | } |
989 | |
990 | static void |
991 | systrace_getargdesc(void *arg, dtrace_id_t id, void *parg, |
992 | dtrace_argdesc_t *desc) |
993 | { |
994 | #pragma unused(arg, id) |
995 | int sysnum = SYSTRACE_SYSNUM(parg); |
996 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
997 | uint64_t *uargs = NULL; |
998 | |
999 | if (!uthread) { |
1000 | desc->dtargd_ndx = DTRACE_ARGNONE; |
1001 | return; |
1002 | } |
1003 | |
1004 | uargs = uthread->t_dtrace_syscall_args; |
1005 | |
1006 | if (SYSTRACE_ISENTRY((uintptr_t)parg)) { |
1007 | systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, |
1008 | desc->dtargd_native, sizeof(desc->dtargd_native)); |
1009 | } |
1010 | else { |
1011 | systrace_return_setargdesc(sysnum, desc->dtargd_ndx, |
1012 | desc->dtargd_native, sizeof(desc->dtargd_native)); |
1013 | } |
1014 | |
1015 | if (desc->dtargd_native[0] == '\0') |
1016 | desc->dtargd_ndx = DTRACE_ARGNONE; |
1017 | } |
1018 | |
1019 | static uint64_t |
1020 | machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) |
1021 | { |
1022 | #pragma unused(arg,id,parg,aframes) /* __APPLE__ */ |
1023 | uint64_t val = 0; |
1024 | syscall_arg_t *stack = (syscall_arg_t *)NULL; |
1025 | |
1026 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
1027 | |
1028 | if (uthread) |
1029 | stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; |
1030 | |
1031 | if (!stack) |
1032 | return(0); |
1033 | |
1034 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); |
1035 | /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ |
1036 | val = (uint64_t)*(stack+argno); |
1037 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); |
1038 | return (val); |
1039 | } |
1040 | |
1041 | |