1 | /* |
2 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/types.h> |
30 | #include <sys/proc.h> |
31 | #include <sys/proc_internal.h> |
32 | #include <sys/systm.h> |
33 | #include <sys/user.h> |
34 | #include <sys/dtrace_ptss.h> |
35 | |
36 | #include <mach/vm_map.h> |
37 | #include <mach/vm_param.h> |
38 | #include <mach/mach_vm.h> |
39 | |
40 | #include <kern/task.h> |
41 | |
42 | #include <vm/vm_map.h> |
43 | |
44 | /* |
45 | * This function requires the sprlock to be held |
46 | * |
47 | * In general, it will not block. If it needs to allocate a new |
48 | * page of memory, the underlying kernel _MALLOC may block. |
49 | */ |
50 | struct dtrace_ptss_page_entry* |
51 | dtrace_ptss_claim_entry_locked(struct proc* p) { |
52 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
53 | |
54 | struct dtrace_ptss_page_entry* entry = NULL; |
55 | |
56 | while (TRUE) { |
57 | struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list; |
58 | |
59 | if (temp == NULL) { |
60 | // Nothing on the free list. Allocate a new page, its okay if multiple threads race here. |
61 | struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p); |
62 | |
63 | // Make sure we actually got a page |
64 | if (page == NULL) |
65 | return NULL; |
66 | |
67 | // Add the page to the page list |
68 | page->next = p->p_dtrace_ptss_pages; |
69 | p->p_dtrace_ptss_pages = page; |
70 | |
71 | // CAS the entries onto the free list. |
72 | do { |
73 | page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next = p->p_dtrace_ptss_free_list; |
74 | } while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next, |
75 | (void *)&page->entries[0], |
76 | (void * volatile *)&p->p_dtrace_ptss_free_list)); |
77 | |
78 | // Now that we've added to the free list, try again. |
79 | continue; |
80 | } |
81 | |
82 | // Claim temp |
83 | if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) |
84 | continue; |
85 | |
86 | // At this point, we own temp. |
87 | entry = temp; |
88 | |
89 | break; |
90 | } |
91 | |
92 | return entry; |
93 | } |
94 | |
95 | /* |
96 | * This function does not require any locks to be held on entry. |
97 | */ |
98 | struct dtrace_ptss_page_entry* |
99 | dtrace_ptss_claim_entry(struct proc* p) { |
100 | // Verify no locks held on entry |
101 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); |
102 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
103 | |
104 | struct dtrace_ptss_page_entry* entry = NULL; |
105 | |
106 | while (TRUE) { |
107 | struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list; |
108 | |
109 | if (temp == NULL) { |
110 | lck_mtx_lock(&p->p_dtrace_sprlock); |
111 | temp = dtrace_ptss_claim_entry_locked(p); |
112 | lck_mtx_unlock(&p->p_dtrace_sprlock); |
113 | return temp; |
114 | } |
115 | |
116 | // Claim temp |
117 | if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) |
118 | continue; |
119 | |
120 | // At this point, we own temp. |
121 | entry = temp; |
122 | |
123 | break; |
124 | } |
125 | |
126 | return entry; |
127 | } |
128 | |
129 | /* |
130 | * This function does not require any locks to be held on entry. |
131 | * |
132 | * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can |
133 | * no longer be referenced safely. When found in this state, the chore |
134 | * of releasing an entry to the free list is ignored. |
135 | */ |
136 | void |
137 | dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e) { |
138 | if (p && p->p_dtrace_ptss_pages && e) { |
139 | do { |
140 | e->next = p->p_dtrace_ptss_free_list; |
141 | } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list)); |
142 | } |
143 | } |
144 | |
145 | /* |
146 | * This function allocates a new page in the target process's address space. |
147 | * |
148 | * It returns a dtrace_ptss_page that has its entries chained, with the last |
149 | * entries next field set to NULL. It does not add the page or the entries to |
150 | * the process's page/entry lists. |
151 | * |
152 | * This function does not require that any locks be held when it is invoked. |
153 | */ |
154 | struct dtrace_ptss_page* |
155 | dtrace_ptss_allocate_page(struct proc* p) |
156 | { |
157 | // Allocate the kernel side data |
158 | struct dtrace_ptss_page* ptss_page = _MALLOC(sizeof(struct dtrace_ptss_page), M_TEMP, M_ZERO | M_WAITOK); |
159 | if (ptss_page == NULL) |
160 | return NULL; |
161 | |
162 | // Now allocate a page in user space and set its protections to allow execute. |
163 | task_t task = p->task; |
164 | vm_map_t map = get_task_map_reference(task); |
165 | if (map == NULL) |
166 | goto err; |
167 | |
168 | mach_vm_size_t size = PAGE_MAX_SIZE; |
169 | mach_vm_offset_t addr = 0; |
170 | mach_vm_offset_t write_addr = 0; |
171 | /* |
172 | * The embedded OS has extra permissions for writable and executable pages. |
173 | * To ensure correct permissions, we must set the page protections separately. |
174 | */ |
175 | vm_prot_t cur_protection = VM_PROT_READ|VM_PROT_EXECUTE; |
176 | vm_prot_t max_protection = VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE; |
177 | |
178 | kern_return_t kr = mach_vm_map_kernel(map, &addr, size, 0, VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE, IPC_PORT_NULL, 0, FALSE, cur_protection, max_protection, VM_INHERIT_DEFAULT); |
179 | if (kr != KERN_SUCCESS) { |
180 | goto err; |
181 | } |
182 | /* |
183 | * If on embedded, remap the scratch space as writable at another |
184 | * virtual address |
185 | */ |
186 | kr = mach_vm_remap_kernel(map, &write_addr, size, 0, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, map, addr, FALSE, &cur_protection, &max_protection, VM_INHERIT_DEFAULT); |
187 | if (kr != KERN_SUCCESS || !(max_protection & VM_PROT_WRITE)) |
188 | goto err; |
189 | |
190 | kr = mach_vm_protect (map, (mach_vm_offset_t)write_addr, (mach_vm_size_t)size, 0, VM_PROT_READ | VM_PROT_WRITE); |
191 | if (kr != KERN_SUCCESS) |
192 | goto err; |
193 | |
194 | // Chain the page entries. |
195 | int i; |
196 | for (i=0; i<DTRACE_PTSS_ENTRIES_PER_PAGE; i++) { |
197 | ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD); |
198 | ptss_page->entries[i].write_addr = write_addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD); |
199 | ptss_page->entries[i].next = &ptss_page->entries[i+1]; |
200 | } |
201 | |
202 | // The last entry should point to NULL |
203 | ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next = NULL; |
204 | |
205 | vm_map_deallocate(map); |
206 | |
207 | return ptss_page; |
208 | |
209 | err: |
210 | _FREE(ptss_page, M_TEMP); |
211 | |
212 | if (map) |
213 | vm_map_deallocate(map); |
214 | |
215 | return NULL; |
216 | } |
217 | |
218 | /* |
219 | * This function frees an existing page in the target process's address space. |
220 | * |
221 | * It does not alter any of the process's page/entry lists. |
222 | * |
223 | * TODO: Inline in dtrace_ptrace_exec_exit? |
224 | */ |
225 | void |
226 | dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page) |
227 | { |
228 | // Grab the task and get a reference to its vm_map |
229 | task_t task = p->task; |
230 | vm_map_t map = get_task_map_reference(task); |
231 | |
232 | mach_vm_address_t addr = ptss_page->entries[0].addr; |
233 | mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!! |
234 | |
235 | // Silent failures, no point in checking return code. |
236 | mach_vm_deallocate(map, addr, size); |
237 | |
238 | mach_vm_address_t write_addr = ptss_page->entries[0].write_addr; |
239 | mach_vm_deallocate(map, write_addr, size); |
240 | |
241 | vm_map_deallocate(map); |
242 | } |
243 | |
244 | /* |
245 | * This function assumes that the target process has been |
246 | * suspended, and the proc_lock & sprlock is held |
247 | */ |
248 | void |
249 | dtrace_ptss_enable(struct proc* p) { |
250 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
251 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_OWNED); |
252 | |
253 | struct uthread* uth; |
254 | /* |
255 | * XXX There has been a concern raised about holding the proc_lock |
256 | * while calling dtrace_ptss_claim_entry(), due to the fact |
257 | * that dtrace_ptss_claim_entry() can potentially malloc. |
258 | */ |
259 | TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { |
260 | uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p); |
261 | } |
262 | } |
263 | |
264 | /* |
265 | * This function is not thread safe. |
266 | * |
267 | * It assumes the sprlock is held, and the proc_lock is not. |
268 | */ |
269 | void |
270 | dtrace_ptss_exec_exit(struct proc* p) { |
271 | /* |
272 | * Should hold sprlock to touch the pages list. Must not |
273 | * hold the proc lock to avoid deadlock. |
274 | */ |
275 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
276 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
277 | |
278 | p->p_dtrace_ptss_free_list = NULL; |
279 | |
280 | struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages; |
281 | p->p_dtrace_ptss_pages = NULL; |
282 | |
283 | while (temp != NULL) { |
284 | struct dtrace_ptss_page* next = temp->next; |
285 | |
286 | // Do we need to specifically mach_vm_deallocate the user pages? |
287 | // This can be called when the process is exiting, I believe the proc's |
288 | // vm_map_t may already be toast. |
289 | |
290 | // Must be certain to free the kernel memory! |
291 | _FREE(temp, M_TEMP); |
292 | temp = next; |
293 | } |
294 | } |
295 | |
296 | /* |
297 | * This function is not thread safe. It is not used for vfork. |
298 | * |
299 | * The child proc ptss fields are initialized to NULL at fork time. |
300 | * Pages allocated in the parent are copied as part of the vm_map copy, though. |
301 | * We need to deallocate those pages. |
302 | * |
303 | * Parent and child sprlock should be held, and proc_lock must NOT be held. |
304 | */ |
305 | void |
306 | dtrace_ptss_fork(struct proc* parent, struct proc* child) { |
307 | // The child should not have any pages/entries allocated at this point. |
308 | // ASSERT(child->p_dtrace_ptss_pages == NULL); |
309 | // ASSERT(child->p_dtrace_ptss_free_list == NULL); |
310 | |
311 | /* |
312 | * The parent's sprlock should be held, to protect its pages list |
313 | * from changing while the child references it. The child's sprlock |
314 | * must also be held, because we are modifying its pages list. |
315 | * Finally, to prevent a deadlock with the fasttrap cleanup code, |
316 | * neither the parent or child proc_lock should be held. |
317 | */ |
318 | LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
319 | LCK_MTX_ASSERT(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
320 | LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
321 | LCK_MTX_ASSERT(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
322 | |
323 | // Get page list from *PARENT* |
324 | struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages; |
325 | |
326 | while (temp != NULL) { |
327 | // Freeing the page in the *CHILD* |
328 | dtrace_ptss_free_page(child, temp); |
329 | |
330 | // Do not free the kernel memory, it belong to the parent. |
331 | temp = temp->next; |
332 | } |
333 | } |
334 | |