1 | /* |
2 | * CDDL HEADER START |
3 | * |
4 | * The contents of this file are subject to the terms of the |
5 | * Common Development and Distribution License (the "License"). |
6 | * You may not use this file except in compliance with the License. |
7 | * |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
9 | * or http://www.opensolaris.org/os/licensing. |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. |
12 | * |
13 | * When distributing Covered Code, include this CDDL HEADER in each |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
15 | * If applicable, add the following below this CDDL HEADER, with the |
16 | * fields enclosed by brackets "[]" replaced with your own identifying |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
18 | * |
19 | * CDDL HEADER END |
20 | */ |
21 | /* |
22 | * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
23 | * Use is subject to license terms. |
24 | */ |
25 | |
26 | /* #pragma ident "@(#)fbt.c 1.18 07/01/10 SMI" */ |
27 | |
28 | #ifdef KERNEL |
29 | #ifndef _KERNEL |
30 | #define _KERNEL /* Solaris vs. Darwin */ |
31 | #endif |
32 | #endif |
33 | |
34 | #include <mach-o/loader.h> |
35 | #include <libkern/kernel_mach_header.h> |
36 | |
37 | #include <sys/param.h> |
38 | #include <sys/systm.h> |
39 | #include <sys/sysctl.h> |
40 | #include <sys/errno.h> |
41 | #include <sys/stat.h> |
42 | #include <sys/ioctl.h> |
43 | #include <sys/conf.h> |
44 | #include <sys/fcntl.h> |
45 | #include <miscfs/devfs/devfs.h> |
46 | #include <pexpert/pexpert.h> |
47 | |
48 | #include <sys/dtrace.h> |
49 | #include <sys/dtrace_impl.h> |
50 | #include <sys/fbt.h> |
51 | |
52 | #include <sys/dtrace_glue.h> |
53 | #include <san/kasan.h> |
54 | |
55 | /* #include <machine/trap.h> */ |
56 | struct savearea_t; /* Used anonymously */ |
57 | |
58 | #if defined(__arm__) || defined(__arm64__) |
59 | typedef kern_return_t (*perfCallback)(int, struct savearea_t *, __unused int, __unused int); |
60 | extern perfCallback tempDTraceTrapHook; |
61 | extern kern_return_t fbt_perfCallback(int, struct savearea_t *, __unused int, __unused int); |
62 | #elif defined(__x86_64__) |
63 | typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int); |
64 | extern perfCallback tempDTraceTrapHook; |
65 | extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int); |
66 | #else |
67 | #error Unknown architecture |
68 | #endif |
69 | |
70 | __private_extern__ |
71 | void |
72 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); |
73 | |
74 | #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) |
75 | #define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ |
76 | |
77 | static int fbt_probetab_size; |
78 | dtrace_provider_id_t fbt_id; |
79 | fbt_probe_t **fbt_probetab; |
80 | int fbt_probetab_mask; |
81 | static int fbt_verbose = 0; |
82 | |
83 | int ignore_fbt_blacklist = 0; |
84 | |
85 | extern int dtrace_kernel_symbol_mode; |
86 | |
87 | |
88 | void fbt_init( void ); |
89 | |
90 | /* |
91 | * Critical routines that must not be probed. PR_5221096, PR_5379018. |
92 | * The blacklist must be kept in alphabetic order for purposes of bsearch(). |
93 | */ |
94 | static const char * critical_blacklist[] = |
95 | { |
96 | "Call_DebuggerC" , |
97 | "DebuggerCall" , |
98 | "DebuggerTrapWithState" , |
99 | "DebuggerXCallEnter" , |
100 | "IOCPURunPlatformPanicActions" , |
101 | "PEARMDebugPanicHook" , |
102 | "PEHaltRestart" , |
103 | "SavePanicInfo" , |
104 | "SysChoked" , |
105 | "_ZN9IOService14newTemperatureElPS_" , /* IOService::newTemperature */ |
106 | "_ZN9IOService26temperatureCriticalForZoneEPS_" , /* IOService::temperatureCriticalForZone */ |
107 | "_ZNK6OSData14getBytesNoCopyEv" , /* Data::getBytesNoCopy, IOHibernateSystemWake path */ |
108 | "__ZN16IOPlatformExpert11haltRestartEj" , |
109 | "__ZN18IODTPlatformExpert11haltRestartEj" , |
110 | "__ZN9IODTNVRAM13savePanicInfoEPhy" |
111 | "_disable_preemption" , |
112 | "_enable_preemption" , |
113 | "alternate_debugger_enter" , |
114 | "bcopy_phys" , |
115 | "console_cpu_alloc" , |
116 | "console_cpu_free" , |
117 | "cpu_IA32e_disable" , |
118 | "cpu_IA32e_enable" , |
119 | "cpu_NMI_interrupt" , |
120 | "cpu_control" , |
121 | "cpu_data_alloc" , |
122 | "cpu_desc_init" , |
123 | "cpu_desc_init64" , |
124 | "cpu_desc_load" , |
125 | "cpu_desc_load64" , |
126 | "cpu_exit_wait" , |
127 | "cpu_info" , |
128 | "cpu_info_count" , |
129 | "cpu_init" , |
130 | "cpu_interrupt" , |
131 | "cpu_machine_init" , |
132 | "cpu_mode_init" , |
133 | "cpu_processor_alloc" , |
134 | "cpu_processor_free" , |
135 | "cpu_signal_handler" , |
136 | "cpu_sleep" , |
137 | "cpu_start" , |
138 | "cpu_subtype" , |
139 | "cpu_thread_alloc" , |
140 | "cpu_thread_halt" , |
141 | "cpu_thread_init" , |
142 | "cpu_threadtype" , |
143 | "cpu_to_processor" , |
144 | "cpu_topology_sort" , |
145 | "cpu_topology_start_cpu" , |
146 | "cpu_type" , |
147 | "cpuid_cpu_display" , |
148 | "cpuid_extfeatures" , |
149 | "dtrace_invop" , |
150 | "enter_lohandler" , |
151 | "fbt_invop" , |
152 | "fbt_perfCallback" , |
153 | "get_preemption_level" |
154 | "get_threadtask" , |
155 | "handle_pending_TLB_flushes" , |
156 | "hw_compare_and_store" , |
157 | "interrupt" , |
158 | "is_saved_state32" , |
159 | "kernel_preempt_check" , |
160 | "kernel_trap" , |
161 | "kprintf" , |
162 | "ks_dispatch_kernel" , |
163 | "ks_dispatch_user" , |
164 | "ks_kernel_trap" , |
165 | "lo_alltraps" , |
166 | "lock_debugger" , |
167 | "machine_idle_cstate" , |
168 | "machine_thread_get_kern_state" , |
169 | "mca_cpu_alloc" , |
170 | "mca_cpu_init" , |
171 | "ml_nofault_copy" , |
172 | "nanoseconds_to_absolutetime" , |
173 | "nanotime_to_absolutetime" , |
174 | "packA" , |
175 | "panic" , |
176 | "phystokv" , |
177 | "phystokv_range" , |
178 | "pltrace" , |
179 | "pmKextRegister" , |
180 | "pmMarkAllCPUsOff" , |
181 | "pmSafeMode" , |
182 | "pmTimerRestore" , |
183 | "pmTimerSave" , |
184 | "pmUnRegister" , |
185 | "pmap_cpu_alloc" , |
186 | "pmap_cpu_free" , |
187 | "pmap_cpu_high_map_vaddr" , |
188 | "pmap_cpu_high_shared_remap" , |
189 | "pmap_cpu_init" , |
190 | "power_management_init" , |
191 | "preemption_underflow_panic" , |
192 | "register_cpu_setup_func" , |
193 | "ret64_iret" |
194 | "ret_to_user" |
195 | "return_to_kernel" , |
196 | "return_to_user" , |
197 | "saved_state64" , |
198 | "sdt_invop" , |
199 | "sprlock" , |
200 | "sprunlock" , |
201 | "strlen" , |
202 | "strncmp" , |
203 | "t_invop" , |
204 | "tmrCvt" , |
205 | "trap_from_kernel" , |
206 | "uart_putc" , |
207 | "unlock_debugger" , |
208 | "unpackA" , |
209 | "unregister_cpu_setup_func" , |
210 | "uread" , |
211 | "uwrite" , |
212 | "vstart" |
213 | }; |
214 | |
215 | #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) |
216 | |
217 | /* |
218 | * The transitive closure of entry points that can be reached from probe context. |
219 | * (Apart from routines whose names begin with dtrace_). |
220 | */ |
221 | static const char * probe_ctx_closure[] = |
222 | { |
223 | "ClearIdlePop" , |
224 | "Debugger" , |
225 | "IS_64BIT_PROCESS" , |
226 | "OSCompareAndSwap" , |
227 | "SetIdlePop" , |
228 | "__dtrace_probe" , |
229 | "absolutetime_to_microtime" , |
230 | "act_set_astbsd" , |
231 | "arm_init_idle_cpu" , |
232 | "ast_dtrace_on" , |
233 | "ast_pending" , |
234 | "clean_dcache" , |
235 | "clean_mmu_dcache" , |
236 | "clock_get_calendar_nanotime_nowait" , |
237 | "copyin" , |
238 | "copyin_kern" , |
239 | "copyin_user" , |
240 | "copyinstr" , |
241 | "copyout" , |
242 | "copyoutstr" , |
243 | "cpu_number" , |
244 | "current_proc" , |
245 | "current_processor" , |
246 | "current_task" , |
247 | "current_thread" , |
248 | "debug_enter" , |
249 | "drain_write_buffer" , |
250 | "find_user_regs" , |
251 | "flush_dcache" , |
252 | "flush_tlb64" , |
253 | "get_bsdtask_info" , |
254 | "get_bsdthread_info" , |
255 | "hertz_tick" , |
256 | "hw_atomic_and" , |
257 | "invalidate_mmu_icache" , |
258 | "kauth_cred_get" , |
259 | "kauth_getgid" , |
260 | "kauth_getuid" , |
261 | "kernel_preempt_check" , |
262 | "kvtophys" , |
263 | "mach_absolute_time" , |
264 | "max_valid_stack_address" , |
265 | "memcpy" , |
266 | "memmove" , |
267 | "ml_at_interrupt_context" , |
268 | "ml_phys_write_byte_64" , |
269 | "ml_phys_write_half_64" , |
270 | "ml_phys_write_word_64" , |
271 | "ml_set_interrupts_enabled" , |
272 | "mt_core_snap" , |
273 | "mt_cur_cpu_cycles" , |
274 | "mt_cur_cpu_instrs" , |
275 | "mt_cur_thread_cycles" , |
276 | "mt_cur_thread_instrs" , |
277 | "mt_fixed_counts" , |
278 | "mt_fixed_counts_internal" , |
279 | "mt_mtc_update_count" , |
280 | "mt_update_thread" , |
281 | "ovbcopy" , |
282 | "panic" , |
283 | "pmap64_pde" , |
284 | "pmap64_pdpt" , |
285 | "pmap_find_phys" , |
286 | "pmap_get_mapwindow" , |
287 | "pmap_pde" , |
288 | "pmap_pte" , |
289 | "pmap_put_mapwindow" , |
290 | "pmap_valid_page" , |
291 | "prf" , |
292 | "proc_is64bit" , |
293 | "proc_selfname" , |
294 | "psignal_lock" , |
295 | "rtc_nanotime_load" , |
296 | "rtc_nanotime_read" , |
297 | "sdt_getargdesc" , |
298 | "setPop" , |
299 | "strlcpy" , |
300 | "sync_iss_to_iks_unconditionally" , |
301 | "systrace_stub" , |
302 | "timer_grab" |
303 | }; |
304 | #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) |
305 | |
306 | #pragma clang diagnostic push |
307 | #pragma clang diagnostic ignored "-Wcast-qual" |
308 | static int _cmp(const void *a, const void *b) |
309 | { |
310 | return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); |
311 | } |
312 | #pragma clang diagnostic pop |
313 | /* |
314 | * Module validation |
315 | */ |
316 | int |
317 | fbt_module_excluded(struct modctl* ctl) |
318 | { |
319 | ASSERT(!MOD_FBT_DONE(ctl)); |
320 | |
321 | if (ctl->mod_address == 0 || ctl->mod_size == 0) { |
322 | return TRUE; |
323 | } |
324 | |
325 | if (ctl->mod_loaded == 0) { |
326 | return TRUE; |
327 | } |
328 | |
329 | /* |
330 | * If the user sets this, trust they know what they are doing. |
331 | */ |
332 | if (ignore_fbt_blacklist) |
333 | return FALSE; |
334 | |
335 | /* |
336 | * These drivers control low level functions that when traced |
337 | * cause problems often in the sleep/wake paths as well as |
338 | * critical debug and panic paths. |
339 | * If somebody really wants to drill in on one of these kexts, then |
340 | * they can override blacklisting using the boot-arg above. |
341 | */ |
342 | |
343 | #ifdef __x86_64__ |
344 | if (strstr(ctl->mod_modname, "AppleACPIEC" ) != NULL) |
345 | return TRUE; |
346 | |
347 | if (strstr(ctl->mod_modname, "AppleACPIPlatform" ) != NULL) |
348 | return TRUE; |
349 | |
350 | if (strstr(ctl->mod_modname, "AppleRTC" ) != NULL) |
351 | return TRUE; |
352 | |
353 | if (strstr(ctl->mod_modname, "IOACPIFamily" ) != NULL) |
354 | return TRUE; |
355 | |
356 | if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement" ) != NULL) |
357 | return TRUE; |
358 | |
359 | if (strstr(ctl->mod_modname, "AppleProfile" ) != NULL) |
360 | return TRUE; |
361 | |
362 | if (strstr(ctl->mod_modname, "AppleIntelProfile" ) != NULL) |
363 | return TRUE; |
364 | |
365 | if (strstr(ctl->mod_modname, "AppleEFI" ) != NULL) |
366 | return TRUE; |
367 | |
368 | #elif __arm__ || __arm64__ |
369 | if (LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPlatform" ) || |
370 | LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPL192VIC" ) || |
371 | LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleInterruptController" )) |
372 | return TRUE; |
373 | #endif |
374 | |
375 | return FALSE; |
376 | } |
377 | |
378 | /* |
379 | * FBT probe name validation |
380 | */ |
381 | int |
382 | fbt_excluded(const char* name) |
383 | { |
384 | /* |
385 | * If the user set this, trust they know what they are doing. |
386 | */ |
387 | if (ignore_fbt_blacklist) |
388 | return FALSE; |
389 | |
390 | if (LIT_STRNSTART(name, "dtrace_" ) && !LIT_STRNSTART(name, "dtrace_safe_" )) { |
391 | /* |
392 | * Anything beginning with "dtrace_" may be called |
393 | * from probe context unless it explitly indicates |
394 | * that it won't be called from probe context by |
395 | * using the prefix "dtrace_safe_". |
396 | */ |
397 | return TRUE; |
398 | } |
399 | |
400 | /* |
401 | * Place no probes on critical routines (5221096) |
402 | */ |
403 | if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) |
404 | return TRUE; |
405 | |
406 | /* |
407 | * Place no probes that could be hit in probe context. |
408 | */ |
409 | if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) { |
410 | return TRUE; |
411 | } |
412 | |
413 | /* |
414 | * Place no probes that could be hit in probe context. |
415 | * In the interests of safety, some of these may be overly cautious. |
416 | * Also exclude very low-level "firmware" class calls. |
417 | */ |
418 | if (LIT_STRNSTART(name, "cpu_" ) || /* Coarse */ |
419 | LIT_STRNSTART(name, "platform_" ) || /* Coarse */ |
420 | LIT_STRNSTART(name, "machine_" ) || /* Coarse */ |
421 | LIT_STRNSTART(name, "ml_" ) || /* Coarse */ |
422 | LIT_STRNSTART(name, "PE_" ) || /* Coarse */ |
423 | LIT_STRNSTART(name, "rtc_" ) || /* Coarse */ |
424 | LIT_STRNSTART(name, "_rtc_" ) || |
425 | LIT_STRNSTART(name, "rtclock_" ) || |
426 | LIT_STRNSTART(name, "clock_" ) || |
427 | LIT_STRNSTART(name, "bcopy" ) || |
428 | LIT_STRNSTART(name, "pmap_" ) || |
429 | LIT_STRNSTART(name, "hw_" ) || /* Coarse */ |
430 | LIT_STRNSTART(name, "lapic_" ) || /* Coarse */ |
431 | LIT_STRNSTART(name, "OSAdd" ) || |
432 | LIT_STRNSTART(name, "OSBit" ) || |
433 | LIT_STRNSTART(name, "OSDecrement" ) || |
434 | LIT_STRNSTART(name, "OSIncrement" ) || |
435 | LIT_STRNSTART(name, "OSCompareAndSwap" ) || |
436 | LIT_STRNSTART(name, "etimer_" ) || |
437 | LIT_STRNSTART(name, "dtxnu_kern_" ) || |
438 | LIT_STRNSTART(name, "flush_mmu_tlb_" )) |
439 | return TRUE; |
440 | /* |
441 | * Fasttrap inner-workings we can't instrument |
442 | * on Intel (6230149) |
443 | */ |
444 | if (LIT_STRNSTART(name, "fasttrap_" ) || |
445 | LIT_STRNSTART(name, "fuword" ) || |
446 | LIT_STRNSTART(name, "suword" )) |
447 | return TRUE; |
448 | |
449 | if (LIT_STRNSTART(name, "_dtrace" )) |
450 | return TRUE; /* Shims in dtrace.c */ |
451 | |
452 | if (LIT_STRNSTART(name, "hibernate_" )) |
453 | return TRUE; |
454 | |
455 | /* |
456 | * Place no probes in the exception handling path |
457 | */ |
458 | #if __arm__ || __arm64__ |
459 | if (LIT_STRNSTART(name, "fleh_" ) || |
460 | LIT_STRNSTART(name, "sleh_" ) || |
461 | LIT_STRNSTART(name, "timer_state_event" ) || |
462 | LIT_STRNEQL(name, "get_vfp_enabled" )) |
463 | return TRUE; |
464 | |
465 | if (LIT_STRNSTART(name, "_ZNK15OSMetaClassBase8metaCastEPK11OSMetaClass" ) || |
466 | LIT_STRNSTART(name, "_ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass" ) || |
467 | LIT_STRNSTART(name, "_ZNK11OSMetaClass13checkMetaCastEPK15OSMetaClassBase" )) |
468 | return TRUE; |
469 | #endif |
470 | |
471 | #ifdef __x86_64__ |
472 | if (LIT_STRNSTART(name, "machine_" ) || |
473 | LIT_STRNSTART(name, "idt64" ) || |
474 | LIT_STRNSTART(name, "ks_" ) || |
475 | LIT_STRNSTART(name, "hndl_" ) || |
476 | LIT_STRNSTART(name, "_intr_" ) || |
477 | LIT_STRNSTART(name, "mapping_" ) || |
478 | LIT_STRNSTART(name, "tsc_" ) || |
479 | LIT_STRNSTART(name, "pmCPU" ) || |
480 | LIT_STRNSTART(name, "pms" ) || |
481 | LIT_STRNSTART(name, "usimple_" ) || |
482 | LIT_STRNSTART(name, "lck_spin_lock" ) || |
483 | LIT_STRNSTART(name, "lck_spin_unlock" ) || |
484 | LIT_STRNSTART(name, "absolutetime_to_" ) || |
485 | LIT_STRNSTART(name, "commpage_" ) || |
486 | LIT_STRNSTART(name, "ml_" ) || |
487 | LIT_STRNSTART(name, "PE_" ) || |
488 | LIT_STRNSTART(name, "act_machine" ) || |
489 | LIT_STRNSTART(name, "acpi_" ) || |
490 | LIT_STRNSTART(name, "pal_" )) { |
491 | return TRUE; |
492 | } |
493 | // Don't Steal Mac OS X |
494 | if (LIT_STRNSTART(name, "dsmos_" )) |
495 | return TRUE; |
496 | |
497 | #endif |
498 | |
499 | /* |
500 | * Place no probes that could be hit on the way to the debugger. |
501 | */ |
502 | if (LIT_STRNSTART(name, "kdp_" ) || |
503 | LIT_STRNSTART(name, "kdb_" ) || |
504 | LIT_STRNSTART(name, "debug_" )) { |
505 | return TRUE; |
506 | } |
507 | |
508 | #if KASAN |
509 | if (LIT_STRNSTART(name, "kasan" ) || |
510 | LIT_STRNSTART(name, "__kasan" ) || |
511 | LIT_STRNSTART(name, "__asan" )) { |
512 | return TRUE; |
513 | } |
514 | #endif |
515 | |
516 | /* |
517 | * Place no probes that could be hit on the way to a panic. |
518 | */ |
519 | if (NULL != strstr(name, "panic_" )) |
520 | return TRUE; |
521 | |
522 | return FALSE; |
523 | } |
524 | |
525 | |
526 | /*ARGSUSED*/ |
527 | static void |
528 | fbt_destroy(void *arg, dtrace_id_t id, void *parg) |
529 | { |
530 | #pragma unused(arg,id) |
531 | fbt_probe_t *fbt = parg, *next, *hash, *last; |
532 | int ndx; |
533 | |
534 | do { |
535 | /* |
536 | * Now we need to remove this probe from the fbt_probetab. |
537 | */ |
538 | ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint); |
539 | last = NULL; |
540 | hash = fbt_probetab[ndx]; |
541 | |
542 | while (hash != fbt) { |
543 | ASSERT(hash != NULL); |
544 | last = hash; |
545 | hash = hash->fbtp_hashnext; |
546 | } |
547 | |
548 | if (last != NULL) { |
549 | last->fbtp_hashnext = fbt->fbtp_hashnext; |
550 | } else { |
551 | fbt_probetab[ndx] = fbt->fbtp_hashnext; |
552 | } |
553 | |
554 | next = fbt->fbtp_next; |
555 | kmem_free(fbt, sizeof (fbt_probe_t)); |
556 | |
557 | fbt = next; |
558 | } while (fbt != NULL); |
559 | } |
560 | |
561 | /*ARGSUSED*/ |
562 | int |
563 | fbt_enable(void *arg, dtrace_id_t id, void *parg) |
564 | { |
565 | #pragma unused(arg,id) |
566 | fbt_probe_t *fbt = parg; |
567 | struct modctl *ctl = NULL; |
568 | |
569 | for (; fbt != NULL; fbt = fbt->fbtp_next) { |
570 | |
571 | ctl = fbt->fbtp_ctl; |
572 | |
573 | if (!ctl->mod_loaded) { |
574 | if (fbt_verbose) { |
575 | cmn_err(CE_NOTE, "fbt is failing for probe %s " |
576 | "(module %s unloaded)" , |
577 | fbt->fbtp_name, ctl->mod_modname); |
578 | } |
579 | |
580 | continue; |
581 | } |
582 | |
583 | /* |
584 | * Now check that our modctl has the expected load count. If it |
585 | * doesn't, this module must have been unloaded and reloaded -- and |
586 | * we're not going to touch it. |
587 | */ |
588 | if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) { |
589 | if (fbt_verbose) { |
590 | cmn_err(CE_NOTE, "fbt is failing for probe %s " |
591 | "(module %s reloaded)" , |
592 | fbt->fbtp_name, ctl->mod_modname); |
593 | } |
594 | |
595 | continue; |
596 | } |
597 | |
598 | dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback); |
599 | if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) { |
600 | if (fbt_verbose) { |
601 | cmn_err(CE_NOTE, "fbt_enable is failing for probe %s " |
602 | "in module %s: tempDTraceTrapHook already occupied." , |
603 | fbt->fbtp_name, ctl->mod_modname); |
604 | } |
605 | continue; |
606 | } |
607 | |
608 | if (fbt->fbtp_currentval != fbt->fbtp_patchval) { |
609 | #if KASAN |
610 | /* Since dtrace probes can call into KASan and vice versa, things can get |
611 | * very slow if we have a lot of probes. This call will disable the KASan |
612 | * fakestack after a threshold of probes is reached. */ |
613 | kasan_fakestack_suspend(); |
614 | #endif |
615 | |
616 | (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, |
617 | sizeof(fbt->fbtp_patchval)); |
618 | /* |
619 | * Make the patched instruction visible via a data + instruction |
620 | * cache flush for the platforms that need it |
621 | */ |
622 | flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
623 | invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
624 | fbt->fbtp_currentval = fbt->fbtp_patchval; |
625 | |
626 | ctl->mod_nenabled++; |
627 | } |
628 | |
629 | } |
630 | |
631 | dtrace_membar_consumer(); |
632 | |
633 | return (0); |
634 | } |
635 | |
636 | /*ARGSUSED*/ |
637 | static void |
638 | fbt_disable(void *arg, dtrace_id_t id, void *parg) |
639 | { |
640 | #pragma unused(arg,id) |
641 | fbt_probe_t *fbt = parg; |
642 | struct modctl *ctl = NULL; |
643 | |
644 | for (; fbt != NULL; fbt = fbt->fbtp_next) { |
645 | ctl = fbt->fbtp_ctl; |
646 | |
647 | if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) |
648 | continue; |
649 | |
650 | if (fbt->fbtp_currentval != fbt->fbtp_savedval) { |
651 | (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, |
652 | sizeof(fbt->fbtp_savedval)); |
653 | /* |
654 | * Make the patched instruction visible via a data + instruction |
655 | * cache flush for the platforms that need it |
656 | */ |
657 | flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
658 | invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
659 | |
660 | fbt->fbtp_currentval = fbt->fbtp_savedval; |
661 | ASSERT(ctl->mod_nenabled > 0); |
662 | ctl->mod_nenabled--; |
663 | |
664 | #if KASAN |
665 | kasan_fakestack_resume(); |
666 | #endif |
667 | } |
668 | } |
669 | dtrace_membar_consumer(); |
670 | } |
671 | |
672 | /*ARGSUSED*/ |
673 | static void |
674 | fbt_suspend(void *arg, dtrace_id_t id, void *parg) |
675 | { |
676 | #pragma unused(arg,id) |
677 | fbt_probe_t *fbt = parg; |
678 | struct modctl *ctl = NULL; |
679 | |
680 | for (; fbt != NULL; fbt = fbt->fbtp_next) { |
681 | ctl = fbt->fbtp_ctl; |
682 | |
683 | ASSERT(ctl->mod_nenabled > 0); |
684 | if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) |
685 | continue; |
686 | |
687 | (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, |
688 | sizeof(fbt->fbtp_savedval)); |
689 | |
690 | /* |
691 | * Make the patched instruction visible via a data + instruction |
692 | * cache flush for the platforms that need it |
693 | */ |
694 | flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_savedval), 0); |
695 | invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_savedval), 0); |
696 | |
697 | fbt->fbtp_currentval = fbt->fbtp_savedval; |
698 | } |
699 | |
700 | dtrace_membar_consumer(); |
701 | } |
702 | |
703 | /*ARGSUSED*/ |
704 | static void |
705 | fbt_resume(void *arg, dtrace_id_t id, void *parg) |
706 | { |
707 | #pragma unused(arg,id) |
708 | fbt_probe_t *fbt = parg; |
709 | struct modctl *ctl = NULL; |
710 | |
711 | for (; fbt != NULL; fbt = fbt->fbtp_next) { |
712 | ctl = fbt->fbtp_ctl; |
713 | |
714 | ASSERT(ctl->mod_nenabled > 0); |
715 | if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) |
716 | continue; |
717 | |
718 | dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback); |
719 | if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) { |
720 | if (fbt_verbose) { |
721 | cmn_err(CE_NOTE, "fbt_resume is failing for probe %s " |
722 | "in module %s: tempDTraceTrapHook already occupied." , |
723 | fbt->fbtp_name, ctl->mod_modname); |
724 | } |
725 | return; |
726 | } |
727 | |
728 | (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, |
729 | sizeof(fbt->fbtp_patchval)); |
730 | |
731 | /* |
732 | * Make the patched instruction visible via a data + instruction cache flush. |
733 | */ |
734 | flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
735 | invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); |
736 | |
737 | fbt->fbtp_currentval = fbt->fbtp_patchval; |
738 | } |
739 | |
740 | dtrace_membar_consumer(); |
741 | } |
742 | |
743 | static void |
744 | fbt_provide_module_user_syms(struct modctl *ctl) |
745 | { |
746 | unsigned int i; |
747 | char *modname = ctl->mod_modname; |
748 | |
749 | dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols; |
750 | if (module_symbols) { |
751 | for (i=0; i<module_symbols->dtmodsyms_count; i++) { |
752 | |
753 | /* |
754 | * symbol->dtsym_addr (the symbol address) passed in from |
755 | * user space, is already slid for both kexts and kernel. |
756 | */ |
757 | dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i]; |
758 | |
759 | char* name = symbol->dtsym_name; |
760 | |
761 | /* Lop off omnipresent leading underscore. */ |
762 | if (*name == '_') |
763 | name += 1; |
764 | |
765 | if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name)) |
766 | continue; |
767 | |
768 | /* |
769 | * Ignore symbols with a null address |
770 | */ |
771 | if (!symbol->dtsym_addr) |
772 | continue; |
773 | |
774 | /* |
775 | * Ignore symbols not part of this module |
776 | */ |
777 | if (!dtrace_addr_in_module((void*)symbol->dtsym_addr, ctl)) |
778 | continue; |
779 | |
780 | fbt_provide_probe(ctl, modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr, (machine_inst_t*)(uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size)); |
781 | } |
782 | } |
783 | } |
784 | static void |
785 | fbt_provide_kernel_section(struct modctl *ctl, kernel_section_t *sect, kernel_nlist_t *sym, uint32_t nsyms, const char *strings) |
786 | { |
787 | uintptr_t sect_start = (uintptr_t)sect->addr; |
788 | uintptr_t sect_end = (uintptr_t)sect->size + sect->addr; |
789 | unsigned int i; |
790 | |
791 | if ((sect->flags & S_ATTR_PURE_INSTRUCTIONS) != S_ATTR_PURE_INSTRUCTIONS) { |
792 | return; |
793 | } |
794 | |
795 | for (i = 0; i < nsyms; i++) { |
796 | uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); |
797 | const char *name = strings + sym[i].n_un.n_strx; |
798 | uint64_t limit; |
799 | |
800 | if (sym[i].n_value < sect_start || sym[i].n_value > sect_end) |
801 | continue; |
802 | |
803 | /* Check that the symbol is a global and that it has a name. */ |
804 | if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) |
805 | continue; |
806 | |
807 | if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ |
808 | continue; |
809 | |
810 | /* Lop off omnipresent leading underscore. */ |
811 | if (*name == '_') |
812 | name += 1; |
813 | |
814 | #if defined(__arm__) |
815 | // Skip non-thumb functions on arm32 |
816 | if (sym[i].n_sect == 1 && !(sym[i].n_desc & N_ARM_THUMB_DEF)) { |
817 | continue; |
818 | } |
819 | #endif /* defined(__arm__) */ |
820 | |
821 | if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name)) |
822 | continue; |
823 | |
824 | /* |
825 | * Find the function boundary by looking at either the |
826 | * end of the section or the beginning of the next symbol |
827 | */ |
828 | if (i == nsyms - 1) { |
829 | limit = sect_end; |
830 | } |
831 | else { |
832 | limit = sym[i + 1].n_value; |
833 | } |
834 | |
835 | fbt_provide_probe(ctl, ctl->mod_modname, name, (machine_inst_t*)sym[i].n_value, (machine_inst_t*)limit); |
836 | } |
837 | |
838 | } |
839 | |
840 | static int |
841 | fbt_sym_cmp(const void *ap, const void *bp) |
842 | { |
843 | return (int)(((const kernel_nlist_t*)ap)->n_value - ((const kernel_nlist_t*)bp)->n_value); |
844 | } |
845 | |
846 | static void |
847 | fbt_provide_module_kernel_syms(struct modctl *ctl) |
848 | { |
849 | kernel_mach_header_t *mh = (kernel_mach_header_t *)(ctl->mod_address); |
850 | kernel_segment_command_t *seg; |
851 | struct load_command *cmd; |
852 | kernel_segment_command_t *linkedit = NULL; |
853 | struct symtab_command *symtab = NULL; |
854 | kernel_nlist_t *syms = NULL, *sorted_syms = NULL; |
855 | const char *strings; |
856 | unsigned int i; |
857 | size_t symlen; |
858 | |
859 | if (mh->magic != MH_MAGIC_KERNEL) |
860 | return; |
861 | |
862 | cmd = (struct load_command *) &mh[1]; |
863 | for (i = 0; i < mh->ncmds; i++) { |
864 | if (cmd->cmd == LC_SEGMENT_KERNEL) { |
865 | kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; |
866 | if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) |
867 | linkedit = orig_sg; |
868 | } else if (cmd->cmd == LC_SYMTAB) { |
869 | symtab = (struct symtab_command *) cmd; |
870 | } |
871 | if (symtab && linkedit) { |
872 | break; |
873 | } |
874 | cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); |
875 | } |
876 | |
877 | if ((symtab == NULL) || (linkedit == NULL)) { |
878 | return; |
879 | } |
880 | |
881 | syms = (kernel_nlist_t *)(linkedit->vmaddr + symtab->symoff - linkedit->fileoff); |
882 | strings = (const char *)(linkedit->vmaddr + symtab->stroff - linkedit->fileoff); |
883 | |
884 | /* |
885 | * Make a copy of the symbol table and sort it to not cross into the next function |
886 | * when disassembling the function |
887 | */ |
888 | symlen = sizeof(kernel_nlist_t) * symtab->nsyms; |
889 | sorted_syms = kmem_alloc(symlen, KM_SLEEP); |
890 | bcopy(syms, sorted_syms, symlen); |
891 | qsort(sorted_syms, symtab->nsyms, sizeof(kernel_nlist_t), fbt_sym_cmp); |
892 | |
893 | for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) { |
894 | kernel_section_t *sect = firstsect(seg); |
895 | |
896 | if (strcmp(seg->segname, "__KLD" ) == 0) { |
897 | continue; |
898 | } |
899 | |
900 | for (sect = firstsect(seg); sect != NULL; sect = nextsect(seg, sect)) { |
901 | fbt_provide_kernel_section(ctl, sect, sorted_syms, symtab->nsyms, strings); |
902 | } |
903 | } |
904 | |
905 | kmem_free(sorted_syms, symlen); |
906 | } |
907 | |
908 | void |
909 | fbt_provide_module(void *arg, struct modctl *ctl) |
910 | { |
911 | #pragma unused(arg) |
912 | ASSERT(ctl != NULL); |
913 | ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER); |
914 | LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); |
915 | |
916 | // Update the "ignore blacklist" bit |
917 | if (ignore_fbt_blacklist) |
918 | ctl->mod_flags |= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES; |
919 | |
920 | if (MOD_FBT_DONE(ctl)) |
921 | return; |
922 | |
923 | if (fbt_module_excluded(ctl)) { |
924 | ctl->mod_flags |= MODCTL_FBT_INVALID; |
925 | return; |
926 | } |
927 | |
928 | if (MOD_HAS_KERNEL_SYMBOLS(ctl)) { |
929 | fbt_provide_module_kernel_syms(ctl); |
930 | ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; |
931 | if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) |
932 | ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED; |
933 | return; |
934 | } |
935 | |
936 | if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) { |
937 | fbt_provide_module_user_syms(ctl); |
938 | ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; |
939 | if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl)) |
940 | ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED; |
941 | if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) |
942 | ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED; |
943 | return; |
944 | } |
945 | } |
946 | |
947 | static dtrace_pattr_t fbt_attr = { |
948 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, |
949 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
950 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, |
951 | { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, |
952 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, |
953 | }; |
954 | |
955 | static dtrace_pops_t fbt_pops = { |
956 | .dtps_provide = NULL, |
957 | .dtps_provide_module = fbt_provide_module, |
958 | .dtps_enable = fbt_enable, |
959 | .dtps_disable = fbt_disable, |
960 | .dtps_suspend = fbt_suspend, |
961 | .dtps_resume = fbt_resume, |
962 | .dtps_getargdesc = NULL, /* APPLE NOTE: fbt_getargdesc implemented in userspace */ |
963 | .dtps_getargval = NULL, |
964 | .dtps_usermode = NULL, |
965 | .dtps_destroy = fbt_destroy |
966 | }; |
967 | |
968 | static void |
969 | fbt_cleanup(dev_info_t *devi) |
970 | { |
971 | dtrace_invop_remove(fbt_invop); |
972 | ddi_remove_minor_node(devi, NULL); |
973 | kmem_free(fbt_probetab, fbt_probetab_size * sizeof (fbt_probe_t *)); |
974 | fbt_probetab = NULL; |
975 | fbt_probetab_mask = 0; |
976 | } |
977 | |
978 | static int |
979 | fbt_attach(dev_info_t *devi) |
980 | { |
981 | if (fbt_probetab_size == 0) |
982 | fbt_probetab_size = FBT_PROBETAB_SIZE; |
983 | |
984 | fbt_probetab_mask = fbt_probetab_size - 1; |
985 | fbt_probetab = |
986 | kmem_zalloc(fbt_probetab_size * sizeof (fbt_probe_t *), KM_SLEEP); |
987 | |
988 | dtrace_invop_add(fbt_invop); |
989 | |
990 | if (ddi_create_minor_node(devi, "fbt" , S_IFCHR, 0, |
991 | DDI_PSEUDO, 0) == DDI_FAILURE || |
992 | dtrace_register("fbt" , &fbt_attr, DTRACE_PRIV_KERNEL, NULL, |
993 | &fbt_pops, NULL, &fbt_id) != 0) { |
994 | fbt_cleanup(devi); |
995 | return (DDI_FAILURE); |
996 | } |
997 | |
998 | return (DDI_SUCCESS); |
999 | } |
1000 | |
1001 | static d_open_t _fbt_open; |
1002 | |
1003 | static int |
1004 | _fbt_open(dev_t dev, int flags, int devtype, struct proc *p) |
1005 | { |
1006 | #pragma unused(dev,flags,devtype,p) |
1007 | return 0; |
1008 | } |
1009 | |
1010 | #define FBT_MAJOR -24 /* let the kernel pick the device number */ |
1011 | |
1012 | SYSCTL_DECL(_kern_dtrace); |
1013 | |
1014 | static int |
1015 | sysctl_dtrace_ignore_fbt_blacklist SYSCTL_HANDLER_ARGS |
1016 | { |
1017 | #pragma unused(oidp, arg2) |
1018 | int err; |
1019 | int value = *(int*)arg1; |
1020 | |
1021 | err = sysctl_io_number(req, value, sizeof(value), &value, NULL); |
1022 | if (err) |
1023 | return (err); |
1024 | if (req->newptr) { |
1025 | if (!(value == 0 || value == 1)) |
1026 | return (ERANGE); |
1027 | |
1028 | /* |
1029 | * We do not allow setting the blacklist back to on, as we have no way |
1030 | * of knowing if those unsafe probes are still used. |
1031 | * |
1032 | * If we are using kernel symbols, we also do not allow any change, |
1033 | * since the symbols are jettison'd after the first pass. |
1034 | * |
1035 | * We do not need to take any locks here because those symbol modes |
1036 | * are permanent and do not change after boot. |
1037 | */ |
1038 | if (value != 1 || dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || |
1039 | dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) |
1040 | return (EPERM); |
1041 | |
1042 | ignore_fbt_blacklist = 1; |
1043 | } |
1044 | |
1045 | return (0); |
1046 | } |
1047 | |
1048 | SYSCTL_PROC(_kern_dtrace, OID_AUTO, ignore_fbt_blacklist, |
1049 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
1050 | &ignore_fbt_blacklist, 0, |
1051 | sysctl_dtrace_ignore_fbt_blacklist, "I" , "fbt provider ignore blacklist" ); |
1052 | |
1053 | /* |
1054 | * A struct describing which functions will get invoked for certain |
1055 | * actions. |
1056 | */ |
1057 | static struct cdevsw fbt_cdevsw = |
1058 | { |
1059 | _fbt_open, /* open */ |
1060 | eno_opcl, /* close */ |
1061 | eno_rdwrt, /* read */ |
1062 | eno_rdwrt, /* write */ |
1063 | eno_ioctl, /* ioctl */ |
1064 | (stop_fcn_t *)nulldev, /* stop */ |
1065 | (reset_fcn_t *)nulldev, /* reset */ |
1066 | NULL, /* tty's */ |
1067 | eno_select, /* select */ |
1068 | eno_mmap, /* mmap */ |
1069 | eno_strat, /* strategy */ |
1070 | eno_getc, /* getc */ |
1071 | eno_putc, /* putc */ |
1072 | 0 /* type */ |
1073 | }; |
1074 | |
1075 | #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */ |
1076 | #undef kmem_free /* from its binding to dt_kmem_free glue */ |
1077 | #include <vm/vm_kern.h> |
1078 | |
1079 | void |
1080 | fbt_init( void ) |
1081 | { |
1082 | int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw); |
1083 | |
1084 | if (majdevno < 0) { |
1085 | printf("fbt_init: failed to allocate a major number!\n" ); |
1086 | return; |
1087 | } |
1088 | |
1089 | PE_parse_boot_argn("IgnoreFBTBlacklist" , &ignore_fbt_blacklist, sizeof (ignore_fbt_blacklist)); |
1090 | |
1091 | fbt_attach((dev_info_t*)(uintptr_t)majdevno); |
1092 | } |
1093 | #undef FBT_MAJOR |
1094 | |