1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29#ifdef KERNEL
30#ifndef _KERNEL
31#define _KERNEL /* Solaris vs. Darwin */
32#endif
33#endif
34
35#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36#include <kern/thread.h>
37#include <mach/thread_status.h>
38#include <mach/vm_param.h>
39#include <mach-o/loader.h>
40#include <mach-o/nlist.h>
41#include <libkern/kernel_mach_header.h>
42#include <libkern/OSAtomic.h>
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/errno.h>
47#include <sys/stat.h>
48#include <sys/ioctl.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <miscfs/devfs/devfs.h>
52
53#include <sys/dtrace.h>
54#include <sys/dtrace_impl.h>
55#include <sys/fbt.h>
56
57#include <sys/dtrace_glue.h>
58
59#include <san/kasan.h>
60
61#define DTRACE_INVOP_NOP_SKIP 1
62#define DTRACE_INVOP_MOVL_ESP_EBP 10
63#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
64#define DTRACE_INVOP_MOV_RSP_RBP 11
65#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
66#define DTRACE_INVOP_POP_RBP 12
67#define DTRACE_INVOP_POP_RBP_SKIP 1
68#define DTRACE_INVOP_LEAVE_SKIP 1
69
70#define FBT_PUSHL_EBP 0x55
71#define FBT_MOVL_ESP_EBP0_V0 0x8b
72#define FBT_MOVL_ESP_EBP1_V0 0xec
73#define FBT_MOVL_ESP_EBP0_V1 0x89
74#define FBT_MOVL_ESP_EBP1_V1 0xe5
75
76#define FBT_PUSH_RBP 0x55
77#define FBT_REX_RSP_RBP 0x48
78#define FBT_MOV_RSP_RBP0 0x89
79#define FBT_MOV_RSP_RBP1 0xe5
80#define FBT_POP_RBP 0x5d
81
82#define FBT_POPL_EBP 0x5d
83#define FBT_RET 0xc3
84#define FBT_RET_IMM16 0xc2
85#define FBT_LEAVE 0xc9
86#define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
87#define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
88#define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
89#define FBT_RET_LEN 1
90#define FBT_RET_IMM16_LEN 3
91#define FBT_JMP_SHORT_REL_LEN 2
92#define FBT_JMP_NEAR_REL_LEN 5
93#define FBT_JMP_FAR_ABS_LEN 5
94
95#define FBT_PATCHVAL 0xf0
96#define FBT_AFRAMES_ENTRY 7
97#define FBT_AFRAMES_RETURN 6
98
99#define FBT_ENTRY "entry"
100#define FBT_RETURN "return"
101#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
102
103extern dtrace_provider_id_t fbt_id;
104extern fbt_probe_t **fbt_probetab;
105extern int fbt_probetab_mask;
106
107kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
108
109int
110fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
111{
112 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
113
114 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
115 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
116
117 if (fbt->fbtp_roffset == 0) {
118 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
119
120 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
121 /* 64-bit ABI, arguments passed in registers. */
122 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
123 CPU->cpu_dtrace_caller = 0;
124 } else {
125
126 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
127 CPU->cpu_dtrace_caller = 0;
128 }
129
130 return (fbt->fbtp_rval);
131 }
132 }
133
134 return (0);
135}
136
137#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
138#define T_INVALID_OPCODE 6
139#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
140#define T_PREEMPT 255
141
142kern_return_t
143fbt_perfCallback(
144 int trapno,
145 x86_saved_state_t *tagged_regs,
146 uintptr_t *lo_spp,
147 __unused int unused2)
148{
149 kern_return_t retval = KERN_FAILURE;
150 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
151
152 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
153 boolean_t oldlevel;
154 uint64_t rsp_probe, fp, delta = 0;
155 uintptr_t old_sp;
156 uint32_t *pDst;
157 int emul;
158
159
160 oldlevel = ml_set_interrupts_enabled(FALSE);
161
162 /* Calculate where the stack pointer was when the probe instruction "fired." */
163 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
164
165 __asm__ volatile(
166 "Ldtrace_invop_callsite_pre_label:\n"
167 ".data\n"
168 ".private_extern _dtrace_invop_callsite_pre\n"
169 "_dtrace_invop_callsite_pre:\n"
170 " .quad Ldtrace_invop_callsite_pre_label\n"
171 ".text\n"
172 );
173
174 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
175
176 __asm__ volatile(
177 "Ldtrace_invop_callsite_post_label:\n"
178 ".data\n"
179 ".private_extern _dtrace_invop_callsite_post\n"
180 "_dtrace_invop_callsite_post:\n"
181 " .quad Ldtrace_invop_callsite_post_label\n"
182 ".text\n"
183 );
184
185 switch (emul) {
186 case DTRACE_INVOP_NOP:
187 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
188 retval = KERN_SUCCESS;
189 break;
190
191 case DTRACE_INVOP_MOV_RSP_RBP:
192 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
193 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
194 retval = KERN_SUCCESS;
195 break;
196
197 case DTRACE_INVOP_POP_RBP:
198 case DTRACE_INVOP_LEAVE:
199/*
200 * Emulate first micro-op of patched leave: mov %rbp,%rsp
201 * fp points just below the return address slot for target's ret
202 * and at the slot holding the frame pointer saved by the target's prologue.
203 */
204 fp = saved_state->rbp;
205/* Emulate second micro-op of patched leave: patched pop %rbp
206 * savearea rbp is set for the frame of the caller to target
207 * The *live* %rsp will be adjusted below for pop increment(s)
208 */
209 saved_state->rbp = *(uint64_t *)fp;
210/* Skip over the patched leave */
211 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
212/*
213 * Lift the stack to account for the emulated leave
214 * Account for words local in this frame
215 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
216 */
217 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
218/* Account for popping off the rbp (just accomplished by the emulation
219 * above...)
220 */
221 delta += 2;
222 saved_state->isf.rsp += (delta << 2);
223/* Obtain the stack pointer recorded by the trampolines */
224 old_sp = *lo_spp;
225/* Shift contents of stack */
226 for (pDst = (uint32_t *)fp;
227 pDst > (((uint32_t *)old_sp));
228 pDst--)
229 *pDst = pDst[-delta];
230
231#if KASAN
232 /*
233 * The above has moved stack objects so they are no longer in sync
234 * with the shadow.
235 */
236 uintptr_t base = (uintptr_t)((uint32_t *)old_sp - delta);
237 uintptr_t size = (uintptr_t)fp - base;
238 if (base >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
239 kasan_unpoison_stack(base, size);
240 }
241#endif
242
243/* Track the stack lift in "saved_state". */
244 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
245/* Adjust the stack pointer utilized by the trampolines */
246 *lo_spp = old_sp + (delta << 2);
247
248 retval = KERN_SUCCESS;
249 break;
250
251 default:
252 retval = KERN_FAILURE;
253 break;
254 }
255
256 /* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
257 saved_state->isf.trapno = T_PREEMPT;
258
259 ml_set_interrupts_enabled(oldlevel);
260 }
261
262 return retval;
263}
264
265void
266fbt_provide_probe(struct modctl *ctl, const char *modname, const char* symbolName, machine_inst_t* symbolStart, machine_inst_t* instrHigh)
267{
268 unsigned int j;
269 unsigned int doenable = 0;
270 dtrace_id_t thisid;
271
272 fbt_probe_t *newfbt, *retfbt, *entryfbt;
273 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
274 int size;
275
276 /*
277 * Guard against null symbols
278 */
279 if (!symbolStart || !instrHigh || instrHigh < symbolStart) {
280 kprintf("dtrace: %s has an invalid address\n", symbolName);
281 return;
282 }
283
284 for (j = 0, instr = symbolStart, theInstr = 0;
285 (j < 4) && (instrHigh > (instr + 2)); j++) {
286 theInstr = instr[0];
287 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
288 break;
289
290 if ((size = dtrace_instr_size(instr)) <= 0)
291 break;
292
293 instr += size;
294 }
295
296 if (theInstr != FBT_PUSH_RBP)
297 return;
298
299 i1 = instr[1];
300 i2 = instr[2];
301 i3 = instr[3];
302
303 limit = (machine_inst_t *)instrHigh;
304
305 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
306 instr += 1; /* Advance to the mov %rsp,%rbp */
307 theInstr = i1;
308 } else {
309 return;
310 }
311#if 0
312 else {
313 /*
314 * Sometimes, the compiler will schedule an intervening instruction
315 * in the function prologue. Example:
316 *
317 * _mach_vm_read:
318 * 000006d8 pushl %ebp
319 * 000006d9 movl $0x00000004,%edx
320 * 000006de movl %esp,%ebp
321 *
322 * Try the next instruction, to see if it is a movl %esp,%ebp
323 */
324
325 instr += 1; /* Advance past the pushl %ebp */
326 if ((size = dtrace_instr_size(instr)) <= 0)
327 return;
328
329 instr += size;
330
331 if ((instr + 1) >= limit)
332 return;
333
334 i1 = instr[0];
335 i2 = instr[1];
336
337 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
338 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
339 return;
340
341 /* instr already points at the movl %esp,%ebp */
342 theInstr = i1;
343 }
344#endif
345 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
346 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
347 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
348
349 if (thisid != 0) {
350 /*
351 * The dtrace_probe previously existed, so we have to hook
352 * the newfbt entry onto the end of the existing fbt's chain.
353 * If we find an fbt entry that was previously patched to
354 * fire, (as indicated by the current patched value), then
355 * we want to enable this newfbt on the spot.
356 */
357 entryfbt = dtrace_probe_arg (fbt_id, thisid);
358 ASSERT (entryfbt != NULL);
359 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
360 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
361 doenable++;
362
363 if (entryfbt->fbtp_next == NULL) {
364 entryfbt->fbtp_next = newfbt;
365 newfbt->fbtp_id = entryfbt->fbtp_id;
366 break;
367 }
368 }
369 }
370 else {
371 /*
372 * The dtrace_probe did not previously exist, so we
373 * create it and hook in the newfbt. Since the probe is
374 * new, we obviously do not need to enable it on the spot.
375 */
376 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
377 doenable = 0;
378 }
379
380 newfbt->fbtp_patchpoint = instr;
381 newfbt->fbtp_ctl = ctl;
382 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
383 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
384 newfbt->fbtp_savedval = theInstr;
385 newfbt->fbtp_patchval = FBT_PATCHVAL;
386 newfbt->fbtp_currentval = 0;
387 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
388 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
389
390 if (doenable)
391 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
392
393 /*
394 * The fbt entry chain is in place, one entry point per symbol.
395 * The fbt return chain can have multiple return points per symbol.
396 * Here we find the end of the fbt return chain.
397 */
398
399 doenable=0;
400
401 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
402 if (thisid != 0) {
403 /* The dtrace_probe previously existed, so we have to
404 * find the end of the existing fbt chain. If we find
405 * an fbt return that was previously patched to fire,
406 * (as indicated by the currrent patched value), then
407 * we want to enable any new fbts on the spot.
408 */
409 retfbt = dtrace_probe_arg (fbt_id, thisid);
410 ASSERT(retfbt != NULL);
411 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
412 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
413 doenable++;
414 if(retfbt->fbtp_next == NULL)
415 break;
416 }
417 }
418 else {
419 doenable = 0;
420 retfbt = NULL;
421 }
422
423again:
424 if (instr >= limit)
425 return;
426
427 /*
428 * If this disassembly fails, then we've likely walked off into
429 * a jump table or some other unsuitable area. Bail out of the
430 * disassembly now.
431 */
432 if ((size = dtrace_instr_size(instr)) <= 0)
433 return;
434
435 /*
436 * We (desperately) want to avoid erroneously instrumenting a
437 * jump table, especially given that our markers are pretty
438 * short: two bytes on x86, and just one byte on amd64. To
439 * determine if we're looking at a true instruction sequence
440 * or an inline jump table that happens to contain the same
441 * byte sequences, we resort to some heuristic sleeze: we
442 * treat this instruction as being contained within a pointer,
443 * and see if that pointer points to within the body of the
444 * function. If it does, we refuse to instrument it.
445 */
446 for (j = 0; j < sizeof (uintptr_t); j++) {
447 uintptr_t check = (uintptr_t)instr - j;
448 uint8_t *ptr;
449
450 if (check < (uintptr_t)symbolStart)
451 break;
452
453 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
454 continue;
455
456 ptr = *(uint8_t **)check;
457
458 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
459 instr += size;
460 goto again;
461 }
462 }
463
464 /*
465 * OK, it's an instruction.
466 */
467 theInstr = instr[0];
468
469 /* Walked onto the start of the next routine? If so, bail out of this function. */
470 if (theInstr == FBT_PUSH_RBP)
471 return;
472
473 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
474 instr += size;
475 goto again;
476 }
477
478 /*
479 * Found the pop %rbp; or leave.
480 */
481 machine_inst_t *patch_instr = instr;
482
483 /*
484 * Scan forward for a "ret", or "jmp".
485 */
486 instr += size;
487 if (instr >= limit)
488 return;
489
490 size = dtrace_instr_size(instr);
491 if (size <= 0) /* Failed instruction decode? */
492 return;
493
494 theInstr = instr[0];
495
496 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
497 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
498 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
499 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
500 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
501 return;
502
503 /*
504 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
505 */
506 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
507 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
508
509 if (retfbt == NULL) {
510 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
511 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
512 } else {
513 retfbt->fbtp_next = newfbt;
514 newfbt->fbtp_id = retfbt->fbtp_id;
515 }
516
517 retfbt = newfbt;
518 newfbt->fbtp_patchpoint = patch_instr;
519 newfbt->fbtp_ctl = ctl;
520 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
521
522 if (*patch_instr == FBT_POP_RBP) {
523 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
524 } else {
525 ASSERT(*patch_instr == FBT_LEAVE);
526 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
527 }
528 newfbt->fbtp_roffset =
529 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
530
531 newfbt->fbtp_savedval = *patch_instr;
532 newfbt->fbtp_patchval = FBT_PATCHVAL;
533 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
534 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
535
536 if (doenable)
537 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
538
539 instr += size;
540 goto again;
541}
542
543