1 | /* |
2 | * Copyright (c) 2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. Please obtain a copy of the License at |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this |
11 | * file. |
12 | * |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
18 | * Please see the License for the specific language governing rights and |
19 | * limitations under the License. |
20 | * |
21 | * @APPLE_LICENSE_HEADER_END@ |
22 | */ |
23 | |
24 | /*- |
25 | * Portions Copyright (c) 1992, 1993 |
26 | * The Regents of the University of California. All rights reserved. |
27 | * |
28 | * This code is derived from software contributed to Berkeley by |
29 | * John Heidemann of the UCLA Ficus project. |
30 | * |
31 | * Redistribution and use in source and binary forms, with or without |
32 | * modification, are permitted provided that the following conditions |
33 | * are met: |
34 | * 1. Redistributions of source code must retain the above copyright |
35 | * notice, this list of conditions and the following disclaimer. |
36 | * 2. Redistributions in binary form must reproduce the above copyright |
37 | * notice, this list of conditions and the following disclaimer in the |
38 | * documentation and/or other materials provided with the distribution. |
39 | * 4. Neither the name of the University nor the names of its contributors |
40 | * may be used to endorse or promote products derived from this software |
41 | * without specific prior written permission. |
42 | * |
43 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
46 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
53 | * SUCH DAMAGE. |
54 | * |
55 | * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 |
56 | * |
57 | * Ancestors: |
58 | * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 |
59 | * ...and... |
60 | * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project |
61 | * |
62 | * $FreeBSD$ |
63 | */ |
64 | |
65 | #include <sys/param.h> |
66 | #include <sys/systm.h> |
67 | #include <sys/conf.h> |
68 | #include <sys/kernel.h> |
69 | #include <sys/lock.h> |
70 | #include <sys/malloc.h> |
71 | #include <sys/mount.h> |
72 | #include <sys/mount_internal.h> |
73 | #include <sys/namei.h> |
74 | #include <sys/sysctl.h> |
75 | #include <sys/vnode.h> |
76 | #include <sys/xattr.h> |
77 | #include <sys/ubc.h> |
78 | #include <sys/types.h> |
79 | #include <sys/dirent.h> |
80 | |
81 | #include "nullfs.h" |
82 | |
83 | #define NULL_ROOT_INO 2 |
84 | #define NULL_SECOND_INO 3 |
85 | #define NULL_THIRD_INO 4 |
86 | |
87 | vop_t * nullfs_vnodeop_p = NULL; |
88 | |
89 | /* the mountpoint lock should be held going into this function */ |
90 | static int |
91 | nullfs_isspecialvp(struct vnode * vp) |
92 | { |
93 | struct null_mount * null_mp; |
94 | |
95 | null_mp = MOUNTTONULLMOUNT(vnode_mount(vp)); |
96 | |
97 | /* only check for root and second here, third is special in a different way, |
98 | * related only to lookup and readdir */ |
99 | if (vp && (vp == null_mp->nullm_rootvp || vp == null_mp->nullm_secondvp)) { |
100 | return 1; |
101 | } |
102 | return 0; |
103 | } |
104 | |
105 | /* helper function to handle locking where possible */ |
106 | static int |
107 | nullfs_checkspecialvp(struct vnode* vp) |
108 | { |
109 | int result = 0; |
110 | struct null_mount * null_mp; |
111 | |
112 | null_mp = MOUNTTONULLMOUNT(vnode_mount(vp)); |
113 | |
114 | lck_mtx_lock(&null_mp->nullm_lock); |
115 | result = (nullfs_isspecialvp(vp)); |
116 | lck_mtx_unlock(&null_mp->nullm_lock); |
117 | |
118 | return result; |
119 | } |
120 | |
121 | static int |
122 | nullfs_default(__unused struct vnop_generic_args * args) |
123 | { |
124 | NULLFSDEBUG("%s (default)\n" , ((struct vnodeop_desc_fake *)args->a_desc)->vdesc_name); |
125 | return ENOTSUP; |
126 | } |
127 | |
128 | static int |
129 | nullfs_special_getattr(struct vnop_getattr_args * args) |
130 | { |
131 | mount_t mp = vnode_mount(args->a_vp); |
132 | struct null_mount * null_mp = MOUNTTONULLMOUNT(mp); |
133 | |
134 | ino_t ino = NULL_ROOT_INO; |
135 | struct vnode_attr covered_rootattr; |
136 | vnode_t checkvp = null_mp->nullm_lowerrootvp; |
137 | |
138 | VATTR_INIT(&covered_rootattr); |
139 | VATTR_WANTED(&covered_rootattr, va_uid); |
140 | VATTR_WANTED(&covered_rootattr, va_gid); |
141 | VATTR_WANTED(&covered_rootattr, va_create_time); |
142 | VATTR_WANTED(&covered_rootattr, va_modify_time); |
143 | VATTR_WANTED(&covered_rootattr, va_access_time); |
144 | |
145 | /* prefer to get this from the lower root vp, but if not (i.e. forced unmount |
146 | * of lower fs) try the mount point covered vnode */ |
147 | if (vnode_getwithvid(checkvp, null_mp->nullm_lowerrootvid)) { |
148 | checkvp = vfs_vnodecovered(mp); |
149 | if (checkvp == NULL) { |
150 | return EIO; |
151 | } |
152 | } |
153 | |
154 | int error = vnode_getattr(checkvp, &covered_rootattr, args->a_context); |
155 | |
156 | vnode_put(checkvp); |
157 | if (error) { |
158 | /* we should have been able to get attributes fore one of the two choices so |
159 | * fail if we didn't */ |
160 | return error; |
161 | } |
162 | |
163 | /* we got the attributes of the vnode we cover so plow ahead */ |
164 | if (args->a_vp == null_mp->nullm_secondvp) { |
165 | ino = NULL_SECOND_INO; |
166 | } |
167 | |
168 | VATTR_RETURN(args->a_vap, va_type, vnode_vtype(args->a_vp)); |
169 | VATTR_RETURN(args->a_vap, va_rdev, 0); |
170 | VATTR_RETURN(args->a_vap, va_nlink, 3); /* always just ., .., and the child */ |
171 | VATTR_RETURN(args->a_vap, va_total_size, 0); // hoping this is ok |
172 | |
173 | VATTR_RETURN(args->a_vap, va_data_size, 0); // hoping this is ok |
174 | VATTR_RETURN(args->a_vap, va_data_alloc, 0); |
175 | VATTR_RETURN(args->a_vap, va_iosize, vfs_statfs(mp)->f_iosize); |
176 | VATTR_RETURN(args->a_vap, va_fileid, ino); |
177 | VATTR_RETURN(args->a_vap, va_linkid, ino); |
178 | VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(mp)->f_fsid.val[0]); // return the fsid of the mount point |
179 | VATTR_RETURN(args->a_vap, va_filerev, 0); |
180 | VATTR_RETURN(args->a_vap, va_gen, 0); |
181 | VATTR_RETURN(args->a_vap, va_flags, UF_HIDDEN); /* mark our fake directories as hidden. People |
182 | shouldn't be enocouraged to poke around in them */ |
183 | |
184 | if (ino == NULL_SECOND_INO) { |
185 | VATTR_RETURN(args->a_vap, va_parentid, NULL_ROOT_INO); /* no parent at the root, so |
186 | the only other vnode that |
187 | goes through this path is |
188 | second and its parent is |
189 | 1.*/ |
190 | } |
191 | |
192 | if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) { |
193 | /* force dr_xr_xr_x */ |
194 | VATTR_RETURN(args->a_vap, va_mode, S_IFDIR | S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); |
195 | } |
196 | if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) { |
197 | VATTR_RETURN(args->a_vap, va_uid, covered_rootattr.va_uid); |
198 | } |
199 | if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) { |
200 | VATTR_RETURN(args->a_vap, va_gid, covered_rootattr.va_gid); |
201 | } |
202 | |
203 | if (VATTR_IS_ACTIVE(args->a_vap, va_create_time)) { |
204 | VATTR_SET_SUPPORTED(args->a_vap, va_create_time); |
205 | args->a_vap->va_create_time.tv_sec = covered_rootattr.va_create_time.tv_sec; |
206 | args->a_vap->va_create_time.tv_nsec = covered_rootattr.va_create_time.tv_nsec; |
207 | } |
208 | if (VATTR_IS_ACTIVE(args->a_vap, va_modify_time)) { |
209 | VATTR_SET_SUPPORTED(args->a_vap, va_modify_time); |
210 | args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_modify_time.tv_sec; |
211 | args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_modify_time.tv_nsec; |
212 | } |
213 | if (VATTR_IS_ACTIVE(args->a_vap, va_access_time)) { |
214 | VATTR_SET_SUPPORTED(args->a_vap, va_access_time); |
215 | args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_access_time.tv_sec; |
216 | args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_access_time.tv_nsec; |
217 | } |
218 | |
219 | return 0; |
220 | } |
221 | |
222 | static int |
223 | nullfs_getattr(struct vnop_getattr_args * args) |
224 | { |
225 | int error; |
226 | struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp)); |
227 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
228 | |
229 | lck_mtx_lock(&null_mp->nullm_lock); |
230 | if (nullfs_isspecialvp(args->a_vp)) { |
231 | error = nullfs_special_getattr(args); |
232 | lck_mtx_unlock(&null_mp->nullm_lock); |
233 | return error; |
234 | } |
235 | lck_mtx_unlock(&null_mp->nullm_lock); |
236 | |
237 | /* this will return a different inode for third than read dir will */ |
238 | struct vnode * lowervp = NULLVPTOLOWERVP(args->a_vp); |
239 | |
240 | error = vnode_getwithref(lowervp); |
241 | if (error == 0) { |
242 | error = VNOP_GETATTR(lowervp, args->a_vap, args->a_context); |
243 | vnode_put(lowervp); |
244 | |
245 | if (error == 0) { |
246 | /* fix up fsid so it doesn't say the underlying fs*/ |
247 | VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(vnode_mount(args->a_vp))->f_fsid.val[0]); |
248 | } |
249 | } |
250 | |
251 | return error; |
252 | } |
253 | |
254 | static int |
255 | nullfs_open(struct vnop_open_args * args) |
256 | { |
257 | int error; |
258 | struct vnode *vp, *lvp; |
259 | |
260 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
261 | |
262 | if (nullfs_checkspecialvp(args->a_vp)) { |
263 | return 0; /* nothing extra needed */ |
264 | } |
265 | |
266 | vp = args->a_vp; |
267 | lvp = NULLVPTOLOWERVP(vp); |
268 | error = vnode_getwithref(lvp); |
269 | if (error == 0) { |
270 | error = VNOP_OPEN(lvp, args->a_mode, args->a_context); |
271 | vnode_put(lvp); |
272 | } |
273 | |
274 | return error; |
275 | } |
276 | |
277 | static int |
278 | nullfs_close(struct vnop_close_args * args) |
279 | { |
280 | int error; |
281 | struct vnode *vp, *lvp; |
282 | |
283 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
284 | |
285 | if (nullfs_checkspecialvp(args->a_vp)) { |
286 | return 0; /* nothing extra needed */ |
287 | } |
288 | |
289 | vp = args->a_vp; |
290 | lvp = NULLVPTOLOWERVP(vp); |
291 | |
292 | error = vnode_getwithref(lvp); |
293 | if (error == 0) { |
294 | error = VNOP_CLOSE(lvp, args->a_fflag, args->a_context); |
295 | vnode_put(lvp); |
296 | } |
297 | return error; |
298 | } |
299 | |
300 | /* get lvp's parent, if possible, even if it isn't set. |
301 | |
302 | lvp is expected to have an iocount before and after this call. |
303 | |
304 | if a dvpp is populated the returned vnode has an iocount. */ |
305 | static int |
306 | null_get_lowerparent(vnode_t lvp, vnode_t * dvpp, vfs_context_t ctx) |
307 | { |
308 | int error = 0; |
309 | struct vnode_attr va; |
310 | mount_t mp = vnode_mount(lvp); |
311 | vnode_t dvp = vnode_parent(lvp); |
312 | |
313 | if (dvp) { |
314 | error = vnode_get(dvp); |
315 | goto end; |
316 | } |
317 | |
318 | error = ENOENT; |
319 | if (!(mp->mnt_kern_flag & MNTK_PATH_FROM_ID)) { |
320 | goto end; |
321 | } |
322 | |
323 | VATTR_INIT(&va); |
324 | VATTR_WANTED(&va, va_parentid); |
325 | |
326 | error = vnode_getattr(lvp, &va, ctx); |
327 | |
328 | if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) { |
329 | goto end; |
330 | } |
331 | |
332 | error = VFS_VGET(mp, (ino64_t)va.va_parentid, &dvp, ctx); |
333 | |
334 | end: |
335 | if (error == 0) { |
336 | *dvpp = dvp; |
337 | } |
338 | return error; |
339 | } |
340 | |
341 | /* the mountpoint lock should be held going into this function */ |
342 | static int |
343 | null_special_lookup(struct vnop_lookup_args * ap) |
344 | { |
345 | struct componentname * cnp = ap->a_cnp; |
346 | struct vnode * dvp = ap->a_dvp; |
347 | struct vnode * ldvp = NULL; |
348 | struct vnode * lvp = NULL; |
349 | struct vnode * vp = NULL; |
350 | struct mount * mp = vnode_mount(dvp); |
351 | struct null_mount * null_mp = MOUNTTONULLMOUNT(mp); |
352 | int error = ENOENT; |
353 | |
354 | if (dvp == null_mp->nullm_rootvp) { |
355 | /* handle . and .. */ |
356 | if (cnp->cn_nameptr[0] == '.') { |
357 | if (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')) { |
358 | /* this is the root so both . and .. give back the root */ |
359 | vp = dvp; |
360 | error = vnode_get(vp); |
361 | goto end; |
362 | } |
363 | } |
364 | |
365 | /* our virtual wrapper directory should be d but D is acceptable if the |
366 | * lower file system is case insensitive */ |
367 | if (cnp->cn_namelen == 1 && |
368 | (cnp->cn_nameptr[0] == 'd' || (null_mp->nullm_flags & NULLM_CASEINSENSITIVE ? cnp->cn_nameptr[0] == 'D' : 0))) { |
369 | error = 0; |
370 | if (null_mp->nullm_secondvp == NULL) { |
371 | error = null_getnewvnode(mp, NULL, dvp, &vp, cnp, 0); |
372 | if (error) { |
373 | goto end; |
374 | } |
375 | |
376 | null_mp->nullm_secondvp = vp; |
377 | } else { |
378 | vp = null_mp->nullm_secondvp; |
379 | error = vnode_get(vp); |
380 | } |
381 | } |
382 | |
383 | } else if (dvp == null_mp->nullm_secondvp) { |
384 | /* handle . and .. */ |
385 | if (cnp->cn_nameptr[0] == '.') { |
386 | if (cnp->cn_namelen == 1) { |
387 | vp = dvp; |
388 | error = vnode_get(vp); |
389 | goto end; |
390 | } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { |
391 | /* parent here is the root vp */ |
392 | vp = null_mp->nullm_rootvp; |
393 | error = vnode_get(vp); |
394 | goto end; |
395 | } |
396 | } |
397 | /* nullmp->nullm_lowerrootvp was set at mount time so don't need to lock to |
398 | * access it */ |
399 | /* v_name should be null terminated but cn_nameptr is not necessarily. |
400 | cn_namelen is the number of characters before the null in either case */ |
401 | error = vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid); |
402 | if (error) { |
403 | goto end; |
404 | } |
405 | |
406 | /* We don't want to mess with case insensitivity and unicode, so the plan to |
407 | check here is |
408 | 1. try to get the lower root's parent |
409 | 2. If we get a parent, then perform a lookup on the lower file system |
410 | using the parent and the passed in cnp |
411 | 3. If that worked and we got a vp, then see if the vp is lowerrootvp. If |
412 | so we got a match |
413 | 4. Anything else results in ENOENT. |
414 | */ |
415 | error = null_get_lowerparent(null_mp->nullm_lowerrootvp, &ldvp, ap->a_context); |
416 | |
417 | if (error == 0) { |
418 | error = VNOP_LOOKUP(ldvp, &lvp, cnp, ap->a_context); |
419 | vnode_put(ldvp); |
420 | |
421 | if (error == 0) { |
422 | if (lvp == null_mp->nullm_lowerrootvp) { |
423 | /* always check the hashmap for a vnode for this, the root of the |
424 | * mirrored system */ |
425 | error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0); |
426 | |
427 | if (error == 0 && null_mp->nullm_thirdcovervp == NULL) { |
428 | /* if nodeget succeeded then vp has an iocount*/ |
429 | null_mp->nullm_thirdcovervp = vp; |
430 | } |
431 | } else { |
432 | error = ENOENT; |
433 | } |
434 | vnode_put(lvp); |
435 | } |
436 | } |
437 | vnode_put(null_mp->nullm_lowerrootvp); |
438 | } |
439 | |
440 | end: |
441 | if (error == 0) { |
442 | *ap->a_vpp = vp; |
443 | } |
444 | return error; |
445 | } |
446 | |
447 | /* |
448 | * We have to carry on the locking protocol on the null layer vnodes |
449 | * as we progress through the tree. We also have to enforce read-only |
450 | * if this layer is mounted read-only. |
451 | */ |
452 | static int |
453 | null_lookup(struct vnop_lookup_args * ap) |
454 | { |
455 | struct componentname * cnp = ap->a_cnp; |
456 | struct vnode * dvp = ap->a_dvp; |
457 | struct vnode *vp, *ldvp, *lvp; |
458 | struct mount * mp; |
459 | struct null_mount * null_mp; |
460 | int error; |
461 | |
462 | NULLFSDEBUG("%s parent: %p component: %.*s\n" , __FUNCTION__, ap->a_dvp, cnp->cn_namelen, cnp->cn_nameptr); |
463 | |
464 | mp = vnode_mount(dvp); |
465 | /* rename and delete are not allowed. this is a read only file system */ |
466 | if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME || cnp->cn_nameiop == CREATE) { |
467 | return (EROFS); |
468 | } |
469 | null_mp = MOUNTTONULLMOUNT(mp); |
470 | |
471 | lck_mtx_lock(&null_mp->nullm_lock); |
472 | if (nullfs_isspecialvp(dvp)) { |
473 | error = null_special_lookup(ap); |
474 | lck_mtx_unlock(&null_mp->nullm_lock); |
475 | return error; |
476 | } |
477 | lck_mtx_unlock(&null_mp->nullm_lock); |
478 | |
479 | // . and .. handling |
480 | if (cnp->cn_nameptr[0] == '.') { |
481 | if (cnp->cn_namelen == 1) { |
482 | vp = dvp; |
483 | } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { |
484 | /* mount point crossing is handled in null_special_lookup */ |
485 | vp = vnode_parent(dvp); |
486 | } else { |
487 | goto notdot; |
488 | } |
489 | |
490 | error = vp ? vnode_get(vp) : ENOENT; |
491 | |
492 | if (error == 0) { |
493 | *ap->a_vpp = vp; |
494 | } |
495 | |
496 | return error; |
497 | } |
498 | |
499 | notdot: |
500 | ldvp = NULLVPTOLOWERVP(dvp); |
501 | vp = lvp = NULL; |
502 | |
503 | /* |
504 | * Hold ldvp. The reference on it, owned by dvp, is lost in |
505 | * case of dvp reclamation. |
506 | */ |
507 | error = vnode_getwithref(ldvp); |
508 | if (error) { |
509 | return error; |
510 | } |
511 | |
512 | error = VNOP_LOOKUP(ldvp, &lvp, cnp, ap->a_context); |
513 | |
514 | vnode_put(ldvp); |
515 | |
516 | if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { |
517 | if (ldvp == lvp) { |
518 | vp = dvp; |
519 | error = vnode_get(vp); |
520 | } else { |
521 | error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0); |
522 | } |
523 | if (error == 0) { |
524 | *ap->a_vpp = vp; |
525 | } |
526 | } |
527 | |
528 | /* if we got lvp, drop the iocount from VNOP_LOOKUP */ |
529 | if (lvp != NULL) { |
530 | vnode_put(lvp); |
531 | } |
532 | |
533 | return (error); |
534 | } |
535 | |
536 | /* |
537 | * Don't think this needs to do anything |
538 | */ |
539 | static int |
540 | null_inactive(__unused struct vnop_inactive_args * ap) |
541 | { |
542 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
543 | |
544 | return (0); |
545 | } |
546 | |
547 | static int |
548 | null_reclaim(struct vnop_reclaim_args * ap) |
549 | { |
550 | struct vnode * vp; |
551 | struct null_node * xp; |
552 | struct vnode * lowervp; |
553 | struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp)); |
554 | |
555 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
556 | |
557 | vp = ap->a_vp; |
558 | |
559 | xp = VTONULL(vp); |
560 | lowervp = xp->null_lowervp; |
561 | |
562 | lck_mtx_lock(&null_mp->nullm_lock); |
563 | |
564 | vnode_removefsref(vp); |
565 | |
566 | if (lowervp != NULL) { |
567 | /* root and second don't have a lowervp, so nothing to release and nothing |
568 | * got hashed */ |
569 | if (xp->null_flags & NULL_FLAG_HASHED) { |
570 | /* only call this if we actually made it into the hash list. reclaim gets |
571 | called also to |
572 | clean up a vnode that got created when it didn't need to under race |
573 | conditions */ |
574 | null_hashrem(xp); |
575 | } |
576 | vnode_getwithref(lowervp); |
577 | vnode_rele(lowervp); |
578 | vnode_put(lowervp); |
579 | } |
580 | |
581 | if (vp == null_mp->nullm_rootvp) { |
582 | null_mp->nullm_rootvp = NULL; |
583 | } else if (vp == null_mp->nullm_secondvp) { |
584 | null_mp->nullm_secondvp = NULL; |
585 | } else if (vp == null_mp->nullm_thirdcovervp) { |
586 | null_mp->nullm_thirdcovervp = NULL; |
587 | } |
588 | |
589 | lck_mtx_unlock(&null_mp->nullm_lock); |
590 | |
591 | cache_purge(vp); |
592 | vnode_clearfsnode(vp); |
593 | |
594 | FREE(xp, M_TEMP); |
595 | |
596 | return 0; |
597 | } |
598 | |
599 | #define DIRENT_SZ(dp) ((sizeof(struct dirent) - NAME_MAX) + (((dp)->d_namlen + 1 + 3) & ~3)) |
600 | |
601 | static int |
602 | store_entry_special(ino_t ino, const char * name, struct uio * uio) |
603 | { |
604 | struct dirent e; |
605 | size_t namelen = strlen(name); |
606 | int error = EINVAL; |
607 | |
608 | if (namelen + 1 <= NAME_MAX) { |
609 | memset(&e, 0, sizeof(e)); |
610 | |
611 | e.d_ino = ino; |
612 | e.d_type = DT_DIR; |
613 | |
614 | e.d_namlen = namelen; /* don't include NUL */ |
615 | e.d_reclen = DIRENT_SZ(&e); |
616 | if (uio_resid(uio) >= e.d_reclen) { |
617 | strlcpy(e.d_name, name, NAME_MAX); |
618 | error = uiomove((caddr_t)&e, e.d_reclen, uio); |
619 | } else { |
620 | error = EMSGSIZE; |
621 | } |
622 | } |
623 | return error; |
624 | } |
625 | |
626 | static int |
627 | nullfs_special_readdir(struct vnop_readdir_args * ap) |
628 | { |
629 | struct vnode * vp = ap->a_vp; |
630 | struct uio * uio = ap->a_uio; |
631 | struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(vp)); |
632 | off_t offset = uio_offset(uio); |
633 | int error = ERANGE; |
634 | int items = 0; |
635 | ino_t ino = 0; |
636 | const char * name = NULL; |
637 | |
638 | if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) |
639 | return (EINVAL); |
640 | |
641 | if (offset == 0) { |
642 | /* . case */ |
643 | if (vp == null_mp->nullm_rootvp) { |
644 | ino = NULL_ROOT_INO; |
645 | } else /* only get here if vp matches nullm_rootvp or nullm_secondvp */ |
646 | { |
647 | ino = NULL_SECOND_INO; |
648 | } |
649 | error = store_entry_special(ino, "." , uio); |
650 | if (error) { |
651 | goto out; |
652 | } |
653 | offset++; |
654 | items++; |
655 | } |
656 | if (offset == 1) { |
657 | /* .. case */ |
658 | /* only get here if vp matches nullm_rootvp or nullm_secondvp */ |
659 | ino = NULL_ROOT_INO; |
660 | |
661 | error = store_entry_special(ino, ".." , uio); |
662 | if (error) { |
663 | goto out; |
664 | } |
665 | offset++; |
666 | items++; |
667 | } |
668 | if (offset == 2) { |
669 | /* the directory case */ |
670 | if (vp == null_mp->nullm_rootvp) { |
671 | ino = NULL_SECOND_INO; |
672 | name = "d" ; |
673 | } else /* only get here if vp matches nullm_rootvp or nullm_secondvp */ |
674 | { |
675 | ino = NULL_THIRD_INO; |
676 | if (vnode_getwithvid(null_mp->nullm_lowerrootvp, null_mp->nullm_lowerrootvid)) { |
677 | /* In this case the lower file system has been ripped out from under us, |
678 | but we don't want to error out |
679 | Instead we just want d to look empty. */ |
680 | error = 0; |
681 | goto out; |
682 | } |
683 | name = vnode_getname_printable(null_mp->nullm_lowerrootvp); |
684 | } |
685 | error = store_entry_special(ino, name, uio); |
686 | |
687 | if (ino == NULL_THIRD_INO) { |
688 | vnode_putname_printable(name); |
689 | vnode_put(null_mp->nullm_lowerrootvp); |
690 | } |
691 | |
692 | if (error) { |
693 | goto out; |
694 | } |
695 | offset++; |
696 | items++; |
697 | } |
698 | |
699 | out: |
700 | if (error == EMSGSIZE) { |
701 | error = 0; /* return success if we ran out of space, but we wanted to make |
702 | sure that we didn't update offset and items incorrectly */ |
703 | } |
704 | uio_setoffset(uio, offset); |
705 | if (ap->a_numdirent) { |
706 | *ap->a_numdirent = items; |
707 | } |
708 | return error; |
709 | } |
710 | |
711 | static int |
712 | nullfs_readdir(struct vnop_readdir_args * ap) |
713 | { |
714 | struct vnode *vp, *lvp; |
715 | int error; |
716 | struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp)); |
717 | |
718 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
719 | /* assumption is that any vp that comes through here had to go through lookup |
720 | */ |
721 | |
722 | lck_mtx_lock(&null_mp->nullm_lock); |
723 | if (nullfs_isspecialvp(ap->a_vp)) { |
724 | error = nullfs_special_readdir(ap); |
725 | lck_mtx_unlock(&null_mp->nullm_lock); |
726 | return error; |
727 | } |
728 | lck_mtx_unlock(&null_mp->nullm_lock); |
729 | |
730 | vp = ap->a_vp; |
731 | lvp = NULLVPTOLOWERVP(vp); |
732 | error = vnode_getwithref(lvp); |
733 | if (error == 0) { |
734 | error = VNOP_READDIR(lvp, ap->a_uio, ap->a_flags, ap->a_eofflag, ap->a_numdirent, ap->a_context); |
735 | vnode_put(lvp); |
736 | } |
737 | |
738 | return error; |
739 | } |
740 | |
741 | static int |
742 | nullfs_readlink(struct vnop_readlink_args * ap) |
743 | { |
744 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
745 | int error; |
746 | struct vnode *vp, *lvp; |
747 | |
748 | if (nullfs_checkspecialvp(ap->a_vp)) { |
749 | return ENOTSUP; /* the special vnodes aren't links */ |
750 | } |
751 | |
752 | vp = ap->a_vp; |
753 | lvp = NULLVPTOLOWERVP(vp); |
754 | |
755 | error = vnode_getwithref(lvp); |
756 | if (error == 0) { |
757 | error = VNOP_READLINK(lvp, ap->a_uio, ap->a_context); |
758 | vnode_put(lvp); |
759 | |
760 | if (error) { |
761 | NULLFSDEBUG("readlink failed: %d\n" , error); |
762 | } |
763 | } |
764 | |
765 | return error; |
766 | } |
767 | |
768 | static int |
769 | nullfs_pathconf(__unused struct vnop_pathconf_args * args) |
770 | { |
771 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
772 | return EINVAL; |
773 | } |
774 | |
775 | static int |
776 | nullfs_fsync(__unused struct vnop_fsync_args * args) |
777 | { |
778 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
779 | return 0; |
780 | } |
781 | |
782 | static int |
783 | nullfs_mmap(struct vnop_mmap_args * args) |
784 | { |
785 | int error; |
786 | struct vnode *vp, *lvp; |
787 | |
788 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
789 | |
790 | if (nullfs_checkspecialvp(args->a_vp)) { |
791 | return 0; /* nothing extra needed */ |
792 | } |
793 | |
794 | vp = args->a_vp; |
795 | lvp = NULLVPTOLOWERVP(vp); |
796 | error = vnode_getwithref(lvp); |
797 | if (error == 0) { |
798 | error = VNOP_MMAP(lvp, args->a_fflags, args->a_context); |
799 | vnode_put(lvp); |
800 | } |
801 | |
802 | return error; |
803 | } |
804 | |
805 | static int |
806 | nullfs_mnomap(struct vnop_mnomap_args * args) |
807 | { |
808 | int error; |
809 | struct vnode *vp, *lvp; |
810 | |
811 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
812 | |
813 | if (nullfs_checkspecialvp(args->a_vp)) { |
814 | return 0; /* nothing extra needed */ |
815 | } |
816 | |
817 | vp = args->a_vp; |
818 | lvp = NULLVPTOLOWERVP(vp); |
819 | error = vnode_getwithref(lvp); |
820 | if (error == 0) { |
821 | error = VNOP_MNOMAP(lvp, args->a_context); |
822 | vnode_put(lvp); |
823 | } |
824 | |
825 | return error; |
826 | } |
827 | |
828 | static int |
829 | nullfs_getxattr(struct vnop_getxattr_args * args) |
830 | { |
831 | int error; |
832 | struct vnode *vp, *lvp; |
833 | |
834 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
835 | |
836 | if (nullfs_checkspecialvp(args->a_vp)) { |
837 | return 0; /* nothing extra needed */ |
838 | } |
839 | |
840 | vp = args->a_vp; |
841 | lvp = NULLVPTOLOWERVP(vp); |
842 | error = vnode_getwithref(lvp); |
843 | if (error == 0) { |
844 | error = VNOP_GETXATTR(lvp, args->a_name, args->a_uio, args->a_size, args->a_options, args->a_context); |
845 | vnode_put(lvp); |
846 | } |
847 | |
848 | return error; |
849 | } |
850 | |
851 | static int |
852 | nullfs_listxattr(struct vnop_listxattr_args * args) |
853 | { |
854 | int error; |
855 | struct vnode *vp, *lvp; |
856 | |
857 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, args->a_vp); |
858 | |
859 | if (nullfs_checkspecialvp(args->a_vp)) { |
860 | return 0; /* nothing extra needed */ |
861 | } |
862 | |
863 | vp = args->a_vp; |
864 | lvp = NULLVPTOLOWERVP(vp); |
865 | error = vnode_getwithref(lvp); |
866 | if (error == 0) { |
867 | error = VNOP_LISTXATTR(lvp, args->a_uio, args->a_size, args->a_options, args->a_context); |
868 | vnode_put(lvp); |
869 | } |
870 | |
871 | return error; |
872 | } |
873 | |
874 | /* relies on v1 paging */ |
875 | static int |
876 | nullfs_pagein(struct vnop_pagein_args * ap) |
877 | { |
878 | int error = EIO; |
879 | struct vnode *vp, *lvp; |
880 | |
881 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
882 | |
883 | vp = ap->a_vp; |
884 | lvp = NULLVPTOLOWERVP(vp); |
885 | |
886 | if (vnode_vtype(vp) != VREG) { |
887 | return ENOTSUP; |
888 | } |
889 | |
890 | /* |
891 | * Ask VM/UBC/VFS to do our bidding |
892 | */ |
893 | if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) { |
894 | vm_offset_t ioaddr; |
895 | uio_t auio; |
896 | kern_return_t kret; |
897 | off_t bytes_to_commit; |
898 | off_t lowersize; |
899 | upl_t upl = ap->a_pl; |
900 | user_ssize_t bytes_remaining = 0; |
901 | |
902 | auio = uio_create(1, ap->a_f_offset, UIO_SYSSPACE, UIO_READ); |
903 | if (auio == NULL) { |
904 | error = EIO; |
905 | goto exit_no_unmap; |
906 | } |
907 | |
908 | kret = ubc_upl_map(upl, &ioaddr); |
909 | if (KERN_SUCCESS != kret) { |
910 | panic("nullfs_pagein: ubc_upl_map() failed with (%d)" , kret); |
911 | } |
912 | |
913 | ioaddr += ap->a_pl_offset; |
914 | |
915 | error = uio_addiov(auio, (user_addr_t)ioaddr, ap->a_size); |
916 | if (error) { |
917 | goto exit; |
918 | } |
919 | |
920 | lowersize = ubc_getsize(lvp); |
921 | if (lowersize != ubc_getsize(vp)) { |
922 | (void)ubc_setsize(vp, lowersize); /* ignore failures, nothing can be done */ |
923 | } |
924 | |
925 | error = VNOP_READ(lvp, auio, ((ap->a_flags & UPL_IOSYNC) ? IO_SYNC : 0), ap->a_context); |
926 | |
927 | bytes_remaining = uio_resid(auio); |
928 | if (bytes_remaining > 0 && bytes_remaining <= (user_ssize_t)ap->a_size) |
929 | { |
930 | /* zero bytes that weren't read in to the upl */ |
931 | bzero((void*)((uintptr_t)(ioaddr + ap->a_size - bytes_remaining)), (size_t) bytes_remaining); |
932 | } |
933 | |
934 | exit: |
935 | kret = ubc_upl_unmap(upl); |
936 | if (KERN_SUCCESS != kret) { |
937 | panic("nullfs_pagein: ubc_upl_unmap() failed with (%d)" , kret); |
938 | } |
939 | |
940 | if (auio != NULL) { |
941 | uio_free(auio); |
942 | } |
943 | |
944 | exit_no_unmap: |
945 | if ((ap->a_flags & UPL_NOCOMMIT) == 0) { |
946 | if (!error && (bytes_remaining >= 0) && (bytes_remaining <= (user_ssize_t)ap->a_size)) { |
947 | /* only commit what was read in (page aligned)*/ |
948 | bytes_to_commit = ap->a_size - bytes_remaining; |
949 | if (bytes_to_commit) |
950 | { |
951 | /* need to make sure bytes_to_commit and byte_remaining are page aligned before calling ubc_upl_commit_range*/ |
952 | if (bytes_to_commit & PAGE_MASK) |
953 | { |
954 | bytes_to_commit = (bytes_to_commit & (~PAGE_MASK)) + (PAGE_MASK + 1); |
955 | assert(bytes_to_commit <= (off_t)ap->a_size); |
956 | |
957 | bytes_remaining = ap->a_size - bytes_to_commit; |
958 | } |
959 | ubc_upl_commit_range(upl, ap->a_pl_offset, (upl_size_t)bytes_to_commit, UPL_COMMIT_FREE_ON_EMPTY); |
960 | } |
961 | |
962 | /* abort anything thats left */ |
963 | if (bytes_remaining) { |
964 | ubc_upl_abort_range(upl, ap->a_pl_offset + bytes_to_commit, (upl_size_t)bytes_remaining, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); |
965 | } |
966 | } else { |
967 | ubc_upl_abort_range(upl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); |
968 | } |
969 | } |
970 | vnode_put(lvp); |
971 | } else if((ap->a_flags & UPL_NOCOMMIT) == 0) { |
972 | ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); |
973 | } |
974 | return error; |
975 | } |
976 | |
977 | static int |
978 | nullfs_read(struct vnop_read_args * ap) |
979 | { |
980 | int error = EIO; |
981 | |
982 | struct vnode *vp, *lvp; |
983 | |
984 | NULLFSDEBUG("%s %p\n" , __FUNCTION__, ap->a_vp); |
985 | |
986 | if (nullfs_checkspecialvp(ap->a_vp)) { |
987 | return ENOTSUP; /* the special vnodes can't be read */ |
988 | } |
989 | |
990 | vp = ap->a_vp; |
991 | lvp = NULLVPTOLOWERVP(vp); |
992 | |
993 | /* |
994 | * First some house keeping |
995 | */ |
996 | if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) { |
997 | if (!vnode_isreg(lvp) && !vnode_islnk(lvp)) { |
998 | error = EPERM; |
999 | goto end; |
1000 | } |
1001 | |
1002 | if (uio_resid(ap->a_uio) == 0) { |
1003 | error = 0; |
1004 | goto end; |
1005 | } |
1006 | |
1007 | /* |
1008 | * Now ask VM/UBC/VFS to do our bidding |
1009 | */ |
1010 | |
1011 | error = VNOP_READ(lvp, ap->a_uio, ap->a_ioflag, ap->a_context); |
1012 | if (error) { |
1013 | NULLFSDEBUG("VNOP_READ failed: %d\n" , error); |
1014 | } |
1015 | end: |
1016 | vnode_put(lvp); |
1017 | } |
1018 | return error; |
1019 | } |
1020 | |
1021 | /* |
1022 | * Global vfs data structures |
1023 | */ |
1024 | |
1025 | static struct vnodeopv_entry_desc nullfs_vnodeop_entries[] = { |
1026 | {&vnop_default_desc, (vop_t)nullfs_default}, {&vnop_getattr_desc, (vop_t)nullfs_getattr}, |
1027 | {&vnop_open_desc, (vop_t)nullfs_open}, {&vnop_close_desc, (vop_t)nullfs_close}, |
1028 | {&vnop_inactive_desc, (vop_t)null_inactive}, {&vnop_reclaim_desc, (vop_t)null_reclaim}, |
1029 | {&vnop_lookup_desc, (vop_t)null_lookup}, {&vnop_readdir_desc, (vop_t)nullfs_readdir}, |
1030 | {&vnop_readlink_desc, (vop_t)nullfs_readlink}, {&vnop_pathconf_desc, (vop_t)nullfs_pathconf}, |
1031 | {&vnop_fsync_desc, (vop_t)nullfs_fsync}, {&vnop_mmap_desc, (vop_t)nullfs_mmap}, |
1032 | {&vnop_mnomap_desc, (vop_t)nullfs_mnomap}, {&vnop_getxattr_desc, (vop_t)nullfs_getxattr}, |
1033 | {&vnop_pagein_desc, (vop_t)nullfs_pagein}, {&vnop_read_desc, (vop_t)nullfs_read}, |
1034 | {&vnop_listxattr_desc, (vop_t)nullfs_listxattr}, {NULL, NULL}, |
1035 | }; |
1036 | |
1037 | struct vnodeopv_desc nullfs_vnodeop_opv_desc = {&nullfs_vnodeop_p, nullfs_vnodeop_entries}; |
1038 | |
1039 | //NULLFS Specific helper function |
1040 | |
1041 | int |
1042 | nullfs_getbackingvnode(vnode_t in_vp, vnode_t* out_vpp) |
1043 | { |
1044 | int result = EINVAL; |
1045 | |
1046 | if (out_vpp == NULL || in_vp == NULL) { |
1047 | goto end; |
1048 | } |
1049 | |
1050 | struct vfsstatfs * sp = NULL; |
1051 | mount_t mp = vnode_mount(in_vp); |
1052 | |
1053 | sp = vfs_statfs(mp); |
1054 | //If this isn't a nullfs vnode or it is but it's a special vnode |
1055 | if (strcmp(sp->f_fstypename, "nullfs" ) != 0 || nullfs_checkspecialvp(in_vp)) { |
1056 | *out_vpp = NULLVP; |
1057 | result = ENOENT; |
1058 | goto end; |
1059 | } |
1060 | |
1061 | vnode_t lvp = NULLVPTOLOWERVP(in_vp); |
1062 | if ((result = vnode_getwithvid(lvp, NULLVPTOLOWERVID(in_vp)))) { |
1063 | goto end; |
1064 | } |
1065 | |
1066 | *out_vpp = lvp; |
1067 | |
1068 | end: |
1069 | return result; |
1070 | } |
1071 | |