| 1 | /* |
| 2 | * CDDL HEADER START |
| 3 | * |
| 4 | * The contents of this file are subject to the terms of the |
| 5 | * Common Development and Distribution License (the "License"). |
| 6 | * You may not use this file except in compliance with the License. |
| 7 | * |
| 8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| 9 | * or http://www.opensolaris.org/os/licensing. |
| 10 | * See the License for the specific language governing permissions |
| 11 | * and limitations under the License. |
| 12 | * |
| 13 | * When distributing Covered Code, include this CDDL HEADER in each |
| 14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| 15 | * If applicable, add the following below this CDDL HEADER, with the |
| 16 | * fields enclosed by brackets "[]" replaced with your own identifying |
| 17 | * information: Portions Copyright [yyyy] [name of copyright owner] |
| 18 | * |
| 19 | * CDDL HEADER END |
| 20 | */ |
| 21 | |
| 22 | /* |
| 23 | * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
| 24 | * Use is subject to license terms. |
| 25 | */ |
| 26 | |
| 27 | /* |
| 28 | * #pragma ident "@(#)dtrace_subr.c 1.16 07/09/18 SMI" |
| 29 | */ |
| 30 | |
| 31 | #include <sys/dtrace.h> |
| 32 | #include <sys/dtrace_glue.h> |
| 33 | #include <sys/dtrace_impl.h> |
| 34 | #include <sys/fasttrap.h> |
| 35 | #include <sys/vm.h> |
| 36 | #include <sys/user.h> |
| 37 | #include <sys/kauth.h> |
| 38 | #include <kern/debug.h> |
| 39 | |
| 40 | int (*dtrace_pid_probe_ptr)(x86_saved_state_t *); |
| 41 | int (*dtrace_return_probe_ptr)(x86_saved_state_t *); |
| 42 | |
| 43 | /* |
| 44 | * HACK! There doesn't seem to be an easy way to include trap.h from |
| 45 | * here. FIXME! |
| 46 | */ |
| 47 | #define T_INT3 3 /* int 3 instruction */ |
| 48 | #define T_DTRACE_RET 0x7f /* DTrace pid return */ |
| 49 | |
| 50 | kern_return_t |
| 51 | dtrace_user_probe(x86_saved_state_t *); |
| 52 | |
| 53 | kern_return_t |
| 54 | dtrace_user_probe(x86_saved_state_t *regs) |
| 55 | { |
| 56 | x86_saved_state64_t *regs64; |
| 57 | x86_saved_state32_t *regs32; |
| 58 | int trapno; |
| 59 | |
| 60 | /* |
| 61 | * FIXME! |
| 62 | * |
| 63 | * The only call path into this method is always a user trap. |
| 64 | * We don't need to test for user trap, but should assert it. |
| 65 | */ |
| 66 | boolean_t user_mode = TRUE; |
| 67 | |
| 68 | if (is_saved_state64(regs) == TRUE) { |
| 69 | regs64 = saved_state64(regs); |
| 70 | regs32 = NULL; |
| 71 | trapno = regs64->isf.trapno; |
| 72 | user_mode = TRUE; // By default, because xnu is 32 bit only |
| 73 | } else { |
| 74 | regs64 = NULL; |
| 75 | regs32 = saved_state32(regs); |
| 76 | if (regs32->cs & 0x03) user_mode = TRUE; |
| 77 | trapno = regs32->trapno; |
| 78 | } |
| 79 | |
| 80 | lck_rw_t *rwp; |
| 81 | struct proc *p = current_proc(); |
| 82 | |
| 83 | uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); |
| 84 | if (user_mode /*|| (rp->r_ps & PS_VM)*/) { |
| 85 | /* |
| 86 | * DTrace accesses t_cred in probe context. t_cred |
| 87 | * must always be either NULL, or point to a valid, |
| 88 | * allocated cred structure. |
| 89 | */ |
| 90 | kauth_cred_uthread_update(uthread, p); |
| 91 | } |
| 92 | |
| 93 | if (trapno == T_DTRACE_RET) { |
| 94 | uint8_t step = uthread->t_dtrace_step; |
| 95 | uint8_t ret = uthread->t_dtrace_ret; |
| 96 | user_addr_t npc = uthread->t_dtrace_npc; |
| 97 | |
| 98 | if (uthread->t_dtrace_ast) { |
| 99 | printf("dtrace_user_probe() should be calling aston()\n" ); |
| 100 | // aston(uthread); |
| 101 | // uthread->t_sig_check = 1; |
| 102 | } |
| 103 | |
| 104 | /* |
| 105 | * Clear all user tracing flags. |
| 106 | */ |
| 107 | uthread->t_dtrace_ft = 0; |
| 108 | |
| 109 | /* |
| 110 | * If we weren't expecting to take a return probe trap, kill |
| 111 | * the process as though it had just executed an unassigned |
| 112 | * trap instruction. |
| 113 | */ |
| 114 | if (step == 0) { |
| 115 | /* |
| 116 | * APPLE NOTE: We're returning KERN_FAILURE, which causes |
| 117 | * the generic signal handling code to take over, which will effectively |
| 118 | * deliver a EXC_BAD_INSTRUCTION to the user process. |
| 119 | */ |
| 120 | return KERN_FAILURE; |
| 121 | } |
| 122 | |
| 123 | /* |
| 124 | * If we hit this trap unrelated to a return probe, we're |
| 125 | * just here to reset the AST flag since we deferred a signal |
| 126 | * until after we logically single-stepped the instruction we |
| 127 | * copied out. |
| 128 | */ |
| 129 | if (ret == 0) { |
| 130 | if (regs64) { |
| 131 | regs64->isf.rip = npc; |
| 132 | } else { |
| 133 | regs32->eip = npc; |
| 134 | } |
| 135 | return KERN_SUCCESS; |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * We need to wait until after we've called the |
| 140 | * dtrace_return_probe_ptr function pointer to set %pc. |
| 141 | */ |
| 142 | rwp = &CPU->cpu_ft_lock; |
| 143 | lck_rw_lock_shared(rwp); |
| 144 | |
| 145 | if (dtrace_return_probe_ptr != NULL) |
| 146 | (void) (*dtrace_return_probe_ptr)(regs); |
| 147 | lck_rw_unlock_shared(rwp); |
| 148 | |
| 149 | if (regs64) { |
| 150 | regs64->isf.rip = npc; |
| 151 | } else { |
| 152 | regs32->eip = npc; |
| 153 | } |
| 154 | |
| 155 | return KERN_SUCCESS; |
| 156 | } else if (trapno == T_INT3) { |
| 157 | uint8_t instr, instr2; |
| 158 | rwp = &CPU->cpu_ft_lock; |
| 159 | |
| 160 | /* |
| 161 | * The DTrace fasttrap provider uses the breakpoint trap |
| 162 | * (int 3). We let DTrace take the first crack at handling |
| 163 | * this trap; if it's not a probe that DTrace knowns about, |
| 164 | * we call into the trap() routine to handle it like a |
| 165 | * breakpoint placed by a conventional debugger. |
| 166 | */ |
| 167 | |
| 168 | /* |
| 169 | * APPLE NOTE: I believe the purpose of the reader/writers lock |
| 170 | * is thus: There are times which dtrace needs to prevent calling |
| 171 | * dtrace_pid_probe_ptr(). Sun's original impl grabbed a plain |
| 172 | * mutex here. However, that serialized all probe calls, and |
| 173 | * destroyed MP behavior. So now they use a RW lock, with probes |
| 174 | * as readers, and the top level synchronization as a writer. |
| 175 | */ |
| 176 | lck_rw_lock_shared(rwp); |
| 177 | if (dtrace_pid_probe_ptr != NULL && |
| 178 | (*dtrace_pid_probe_ptr)(regs) == 0) { |
| 179 | lck_rw_unlock_shared(rwp); |
| 180 | return KERN_SUCCESS; |
| 181 | } |
| 182 | lck_rw_unlock_shared(rwp); |
| 183 | |
| 184 | |
| 185 | /* |
| 186 | * If the instruction that caused the breakpoint trap doesn't |
| 187 | * look like an int 3 anymore, it may be that this tracepoint |
| 188 | * was removed just after the user thread executed it. In |
| 189 | * that case, return to user land to retry the instuction. |
| 190 | */ |
| 191 | user_addr_t pc = (regs64) ? regs64->isf.rip : (user_addr_t)regs32->eip; |
| 192 | if (fuword8(pc - 1, &instr) == 0 && instr != FASTTRAP_INSTR && // neither single-byte INT3 (0xCC) |
| 193 | !(instr == 3 && fuword8(pc - 2, &instr2) == 0 && instr2 == 0xCD)) { // nor two-byte INT 3 (0xCD03) |
| 194 | if (regs64) { |
| 195 | regs64->isf.rip--; |
| 196 | } else { |
| 197 | regs32->eip--; |
| 198 | } |
| 199 | return KERN_SUCCESS; |
| 200 | } |
| 201 | |
| 202 | } |
| 203 | |
| 204 | return KERN_FAILURE; |
| 205 | } |
| 206 | |
| 207 | void |
| 208 | dtrace_safe_synchronous_signal(void) |
| 209 | { |
| 210 | #if 0 |
| 211 | kthread_t *t = curthread; |
| 212 | struct regs *rp = lwptoregs(ttolwp(t)); |
| 213 | size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; |
| 214 | |
| 215 | ASSERT(t->t_dtrace_on); |
| 216 | |
| 217 | /* |
| 218 | * If we're not in the range of scratch addresses, we're not actually |
| 219 | * tracing user instructions so turn off the flags. If the instruction |
| 220 | * we copied out caused a synchonous trap, reset the pc back to its |
| 221 | * original value and turn off the flags. |
| 222 | */ |
| 223 | if (rp->r_pc < t->t_dtrace_scrpc || |
| 224 | rp->r_pc > t->t_dtrace_astpc + isz) { |
| 225 | t->t_dtrace_ft = 0; |
| 226 | } else if (rp->r_pc == t->t_dtrace_scrpc || |
| 227 | rp->r_pc == t->t_dtrace_astpc) { |
| 228 | rp->r_pc = t->t_dtrace_pc; |
| 229 | t->t_dtrace_ft = 0; |
| 230 | } |
| 231 | #endif /* 0 */ |
| 232 | } |
| 233 | |
| 234 | int |
| 235 | dtrace_safe_defer_signal(void) |
| 236 | { |
| 237 | #if 0 |
| 238 | kthread_t *t = curthread; |
| 239 | struct regs *rp = lwptoregs(ttolwp(t)); |
| 240 | size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; |
| 241 | |
| 242 | ASSERT(t->t_dtrace_on); |
| 243 | |
| 244 | /* |
| 245 | * If we're not in the range of scratch addresses, we're not actually |
| 246 | * tracing user instructions so turn off the flags. |
| 247 | */ |
| 248 | if (rp->r_pc < t->t_dtrace_scrpc || |
| 249 | rp->r_pc > t->t_dtrace_astpc + isz) { |
| 250 | t->t_dtrace_ft = 0; |
| 251 | return (0); |
| 252 | } |
| 253 | |
| 254 | /* |
| 255 | * If we've executed the original instruction, but haven't performed |
| 256 | * the jmp back to t->t_dtrace_npc or the clean up of any registers |
| 257 | * used to emulate %rip-relative instructions in 64-bit mode, do that |
| 258 | * here and take the signal right away. We detect this condition by |
| 259 | * seeing if the program counter is the range [scrpc + isz, astpc). |
| 260 | */ |
| 261 | if (t->t_dtrace_astpc - rp->r_pc < |
| 262 | t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { |
| 263 | #ifdef __sol64 |
| 264 | /* |
| 265 | * If there is a scratch register and we're on the |
| 266 | * instruction immediately after the modified instruction, |
| 267 | * restore the value of that scratch register. |
| 268 | */ |
| 269 | if (t->t_dtrace_reg != 0 && |
| 270 | rp->r_pc == t->t_dtrace_scrpc + isz) { |
| 271 | switch (t->t_dtrace_reg) { |
| 272 | case REG_RAX: |
| 273 | rp->r_rax = t->t_dtrace_regv; |
| 274 | break; |
| 275 | case REG_RCX: |
| 276 | rp->r_rcx = t->t_dtrace_regv; |
| 277 | break; |
| 278 | case REG_R8: |
| 279 | rp->r_r8 = t->t_dtrace_regv; |
| 280 | break; |
| 281 | case REG_R9: |
| 282 | rp->r_r9 = t->t_dtrace_regv; |
| 283 | break; |
| 284 | } |
| 285 | } |
| 286 | #endif |
| 287 | rp->r_pc = t->t_dtrace_npc; |
| 288 | t->t_dtrace_ft = 0; |
| 289 | return (0); |
| 290 | } |
| 291 | |
| 292 | /* |
| 293 | * Otherwise, make sure we'll return to the kernel after executing |
| 294 | * the copied out instruction and defer the signal. |
| 295 | */ |
| 296 | if (!t->t_dtrace_step) { |
| 297 | ASSERT(rp->r_pc < t->t_dtrace_astpc); |
| 298 | rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; |
| 299 | t->t_dtrace_step = 1; |
| 300 | } |
| 301 | |
| 302 | t->t_dtrace_ast = 1; |
| 303 | |
| 304 | return (1); |
| 305 | |
| 306 | #endif /* 0 */ |
| 307 | |
| 308 | return 0; |
| 309 | } |
| 310 | |
| 311 | void |
| 312 | dtrace_flush_caches(void) |
| 313 | { |
| 314 | |
| 315 | } |
| 316 | |