1 | /* |
2 | * Copyright (c) 2002-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /*- |
29 | * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. |
30 | * |
31 | * Redistribution and use in source and binary forms, with or without |
32 | * modification, are permitted provided that the following conditions |
33 | * are met: |
34 | * 1. Redistributions of source code must retain the above copyright |
35 | * notice, this list of conditions and the following disclaimer. |
36 | * 2. Redistributions in binary form must reproduce the above copyright |
37 | * notice, this list of conditions and the following disclaimer in the |
38 | * documentation and/or other materials provided with the distribution. |
39 | * 3. Berkeley Software Design Inc's name may not be used to endorse or |
40 | * promote products derived from this software without specific prior |
41 | * written permission. |
42 | * |
43 | * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND |
44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
46 | * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE |
47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
53 | * SUCH DAMAGE. |
54 | * |
55 | * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp |
56 | */ |
57 | |
58 | #include <sys/cdefs.h> |
59 | #include <sys/param.h> |
60 | #include <sys/systm.h> |
61 | #include <sys/fcntl.h> |
62 | #include <sys/kernel.h> /* for hz */ |
63 | #include <sys/file_internal.h> |
64 | #include <sys/malloc.h> |
65 | #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ |
66 | #include <sys/kpi_mbuf.h> |
67 | #include <sys/mount_internal.h> |
68 | #include <sys/proc_internal.h> /* for p_start */ |
69 | #include <sys/kauth.h> |
70 | #include <sys/resourcevar.h> |
71 | #include <sys/socket.h> |
72 | #include <sys/unistd.h> |
73 | #include <sys/user.h> |
74 | #include <sys/vnode_internal.h> |
75 | |
76 | #include <kern/thread.h> |
77 | #include <kern/host.h> |
78 | |
79 | #include <machine/limits.h> |
80 | |
81 | #include <net/if.h> |
82 | |
83 | #include <nfs/rpcv2.h> |
84 | #include <nfs/nfsproto.h> |
85 | #include <nfs/nfs.h> |
86 | #include <nfs/nfs_gss.h> |
87 | #include <nfs/nfsmount.h> |
88 | #include <nfs/nfsnode.h> |
89 | #include <nfs/nfs_lock.h> |
90 | |
91 | #include <mach/host_priv.h> |
92 | #include <mach/mig_errors.h> |
93 | #include <mach/host_special_ports.h> |
94 | #include <lockd/lockd_mach.h> |
95 | |
96 | extern void ipc_port_release_send(ipc_port_t); |
97 | |
98 | /* |
99 | * pending lock request messages are kept in this queue which is |
100 | * kept sorted by transaction ID (xid). |
101 | */ |
102 | static uint64_t nfs_lockxid = 0; |
103 | static LOCKD_MSG_QUEUE nfs_pendlockq; |
104 | |
105 | /* list of mounts that are (potentially) making lockd requests */ |
106 | TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list; |
107 | |
108 | static lck_grp_t *nfs_lock_lck_grp; |
109 | static lck_mtx_t *nfs_lock_mutex; |
110 | |
111 | void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *); |
112 | void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *); |
113 | int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *); |
114 | LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *); |
115 | LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t); |
116 | uint64_t nfs_lockxid_get(void); |
117 | int nfs_lockd_send_request(LOCKD_MSG *, int); |
118 | |
119 | /* |
120 | * initialize global nfs lock state |
121 | */ |
122 | void |
123 | nfs_lockinit(void) |
124 | { |
125 | TAILQ_INIT(&nfs_pendlockq); |
126 | TAILQ_INIT(&nfs_lockd_mount_list); |
127 | |
128 | nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock" , LCK_GRP_ATTR_NULL); |
129 | nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL); |
130 | } |
131 | |
132 | /* |
133 | * Register a mount as (potentially) making lockd requests. |
134 | */ |
135 | void |
136 | nfs_lockd_mount_register(struct nfsmount *nmp) |
137 | { |
138 | lck_mtx_lock(nfs_lock_mutex); |
139 | TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink); |
140 | nfs_lockd_mounts++; |
141 | lck_mtx_unlock(nfs_lock_mutex); |
142 | } |
143 | |
144 | /* |
145 | * Unregister a mount as (potentially) making lockd requests. |
146 | * |
147 | * When the lockd mount count drops to zero, then send a shutdown request to |
148 | * lockd if we've sent any requests to it. |
149 | */ |
150 | void |
151 | nfs_lockd_mount_unregister(struct nfsmount *nmp) |
152 | { |
153 | int send_shutdown; |
154 | mach_port_t lockd_port = IPC_PORT_NULL; |
155 | kern_return_t kr; |
156 | |
157 | lck_mtx_lock(nfs_lock_mutex); |
158 | if (nmp->nm_ldlink.tqe_next == NFSNOLIST) { |
159 | lck_mtx_unlock(nfs_lock_mutex); |
160 | return; |
161 | } |
162 | |
163 | TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink); |
164 | nmp->nm_ldlink.tqe_next = NFSNOLIST; |
165 | |
166 | nfs_lockd_mounts--; |
167 | |
168 | /* send a shutdown request if there are no more lockd mounts */ |
169 | send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent); |
170 | if (send_shutdown) |
171 | nfs_lockd_request_sent = 0; |
172 | |
173 | lck_mtx_unlock(nfs_lock_mutex); |
174 | |
175 | if (!send_shutdown) |
176 | return; |
177 | |
178 | /* |
179 | * Let lockd know that it is no longer needed for any NFS mounts |
180 | */ |
181 | kr = host_get_lockd_port(host_priv_self(), &lockd_port); |
182 | if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) { |
183 | printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n" , |
184 | kr, (lockd_port == IPC_PORT_NULL) ? "NULL" : |
185 | (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID" ); |
186 | return; |
187 | } |
188 | |
189 | kr = lockd_shutdown(lockd_port); |
190 | if (kr != KERN_SUCCESS) |
191 | printf("nfs_lockd_mount_change: shutdown %d\n" , kr); |
192 | |
193 | ipc_port_release_send(lockd_port); |
194 | } |
195 | |
196 | /* |
197 | * insert a lock request message into the pending queue |
198 | * (nfs_lock_mutex must be held) |
199 | */ |
200 | void |
201 | nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) |
202 | { |
203 | LOCKD_MSG_REQUEST *mr; |
204 | |
205 | mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue); |
206 | if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { |
207 | /* fast path: empty queue or new largest xid */ |
208 | TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next); |
209 | return; |
210 | } |
211 | /* slow path: need to walk list to find insertion point */ |
212 | while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { |
213 | mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next); |
214 | } |
215 | if (mr) { |
216 | TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next); |
217 | } else { |
218 | TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next); |
219 | } |
220 | } |
221 | |
222 | /* |
223 | * remove a lock request message from the pending queue |
224 | * (nfs_lock_mutex must be held) |
225 | */ |
226 | void |
227 | nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) |
228 | { |
229 | TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next); |
230 | } |
231 | |
232 | /* |
233 | * find a pending lock request message by xid |
234 | * |
235 | * We search from the head of the list assuming that the message we're |
236 | * looking for is for an older request (because we have an answer to it). |
237 | * This assumes that lock request will be answered primarily in FIFO order. |
238 | * However, this may not be the case if there are blocked requests. We may |
239 | * want to move blocked requests to a separate queue (but that'll complicate |
240 | * duplicate xid checking). |
241 | * |
242 | * (nfs_lock_mutex must be held) |
243 | */ |
244 | LOCKD_MSG_REQUEST * |
245 | nfs_lockdmsg_find_by_xid(uint64_t lockxid) |
246 | { |
247 | LOCKD_MSG_REQUEST *mr; |
248 | |
249 | TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { |
250 | if (mr->lmr_msg.lm_xid == lockxid) |
251 | return mr; |
252 | if (mr->lmr_msg.lm_xid > lockxid) |
253 | return NULL; |
254 | } |
255 | return mr; |
256 | } |
257 | |
258 | /* |
259 | * Because we can't depend on nlm_granted messages containing the same |
260 | * cookie we sent with the original lock request, we need code to test |
261 | * if an nlm_granted answer matches the lock request. We also need code |
262 | * that can find a lockd message based solely on the nlm_granted answer. |
263 | */ |
264 | |
265 | /* |
266 | * compare lockd message to answer |
267 | * |
268 | * returns 0 on equality and 1 if different |
269 | */ |
270 | int |
271 | nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp) |
272 | { |
273 | if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) |
274 | return 1; |
275 | if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) |
276 | return 1; |
277 | if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) |
278 | return 1; |
279 | if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) |
280 | return 1; |
281 | if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) |
282 | return 1; |
283 | if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) |
284 | return 1; |
285 | return 0; |
286 | } |
287 | |
288 | /* |
289 | * find a pending lock request message based on the lock info provided |
290 | * in the lockd_ans/nlm_granted data. We need this because we can't |
291 | * depend on nlm_granted messages containing the same cookie we sent |
292 | * with the original lock request. |
293 | * |
294 | * We search from the head of the list assuming that the message we're |
295 | * looking for is for an older request (because we have an answer to it). |
296 | * This assumes that lock request will be answered primarily in FIFO order. |
297 | * However, this may not be the case if there are blocked requests. We may |
298 | * want to move blocked requests to a separate queue (but that'll complicate |
299 | * duplicate xid checking). |
300 | * |
301 | * (nfs_lock_mutex must be held) |
302 | */ |
303 | LOCKD_MSG_REQUEST * |
304 | nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) |
305 | { |
306 | LOCKD_MSG_REQUEST *mr; |
307 | |
308 | if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) |
309 | return NULL; |
310 | TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { |
311 | if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) |
312 | break; |
313 | } |
314 | return mr; |
315 | } |
316 | |
317 | /* |
318 | * return the next unique lock request transaction ID |
319 | * (nfs_lock_mutex must be held) |
320 | */ |
321 | uint64_t |
322 | nfs_lockxid_get(void) |
323 | { |
324 | LOCKD_MSG_REQUEST *mr; |
325 | |
326 | /* derive initial lock xid from system time */ |
327 | if (!nfs_lockxid) { |
328 | /* |
329 | * Note: it's OK if this code inits nfs_lockxid to 0 (for example, |
330 | * due to a broken clock) because we immediately increment it |
331 | * and we guarantee to never use xid 0. So, nfs_lockxid should only |
332 | * ever be 0 the first time this function is called. |
333 | */ |
334 | struct timeval tv; |
335 | microtime(&tv); |
336 | nfs_lockxid = (uint64_t)tv.tv_sec << 12; |
337 | } |
338 | |
339 | /* make sure we get a unique xid */ |
340 | do { |
341 | /* Skip zero xid if it should ever happen. */ |
342 | if (++nfs_lockxid == 0) |
343 | nfs_lockxid++; |
344 | if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) || |
345 | (mr->lmr_msg.lm_xid < nfs_lockxid)) { |
346 | /* fast path: empty queue or new largest xid */ |
347 | break; |
348 | } |
349 | /* check if xid is already in use */ |
350 | } while (nfs_lockdmsg_find_by_xid(nfs_lockxid)); |
351 | |
352 | return nfs_lockxid; |
353 | } |
354 | |
355 | #define MACH_MAX_TRIES 3 |
356 | |
357 | int |
358 | nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable) |
359 | { |
360 | kern_return_t kr; |
361 | int retries = 0; |
362 | mach_port_t lockd_port = IPC_PORT_NULL; |
363 | |
364 | kr = host_get_lockd_port(host_priv_self(), &lockd_port); |
365 | if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) |
366 | return (ENOTSUP); |
367 | |
368 | do { |
369 | /* In the kernel all mach messaging is interruptable */ |
370 | do { |
371 | kr = lockd_request( |
372 | lockd_port, |
373 | msg->lm_version, |
374 | msg->lm_flags, |
375 | msg->lm_xid, |
376 | msg->lm_fl.l_start, |
377 | msg->lm_fl.l_len, |
378 | msg->lm_fl.l_pid, |
379 | msg->lm_fl.l_type, |
380 | msg->lm_fl.l_whence, |
381 | (uint32_t *)&msg->lm_addr, |
382 | (uint32_t *)&msg->lm_cred, |
383 | msg->lm_fh_len, |
384 | msg->lm_fh); |
385 | if (kr != KERN_SUCCESS) |
386 | printf("lockd_request received %d!\n" , kr); |
387 | } while (!interruptable && kr == MACH_SEND_INTERRUPTED); |
388 | } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES); |
389 | |
390 | ipc_port_release_send(lockd_port); |
391 | switch (kr) { |
392 | case MACH_SEND_INTERRUPTED: |
393 | return (EINTR); |
394 | default: |
395 | /* |
396 | * Other MACH or MIG errors we will retry. Eventually |
397 | * we will call nfs_down and allow the user to disable |
398 | * locking. |
399 | */ |
400 | return (EAGAIN); |
401 | } |
402 | } |
403 | |
404 | /* |
405 | * NFS advisory byte-level locks (client) |
406 | */ |
407 | int |
408 | nfs3_lockd_request( |
409 | nfsnode_t np, |
410 | int type, |
411 | LOCKD_MSG_REQUEST *msgreq, |
412 | int flags, |
413 | thread_t thd) |
414 | { |
415 | LOCKD_MSG *msg = &msgreq->lmr_msg; |
416 | int error, error2; |
417 | int interruptable, slpflag; |
418 | struct nfsmount *nmp; |
419 | struct timeval now; |
420 | int timeo, starttime, endtime, lastmsg, wentdown = 0; |
421 | struct timespec ts; |
422 | struct sockaddr *saddr; |
423 | |
424 | nmp = NFSTONMP(np); |
425 | if (!nmp || !nmp->nm_saddr) |
426 | return (ENXIO); |
427 | |
428 | lck_mtx_lock(&nmp->nm_lock); |
429 | saddr = nmp->nm_saddr; |
430 | bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len)); |
431 | if (nmp->nm_vers == NFS_VER3) |
432 | msg->lm_flags |= LOCKD_MSG_NFSV3; |
433 | |
434 | if (nmp->nm_sotype != SOCK_DGRAM) |
435 | msg->lm_flags |= LOCKD_MSG_TCP; |
436 | |
437 | microuptime(&now); |
438 | starttime = now.tv_sec; |
439 | lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); |
440 | interruptable = NMFLAG(nmp, INTR); |
441 | lck_mtx_unlock(&nmp->nm_lock); |
442 | |
443 | lck_mtx_lock(nfs_lock_mutex); |
444 | |
445 | /* allocate unique xid */ |
446 | msg->lm_xid = nfs_lockxid_get(); |
447 | nfs_lockdmsg_enqueue(msgreq); |
448 | |
449 | timeo = 4; |
450 | |
451 | for (;;) { |
452 | nfs_lockd_request_sent = 1; |
453 | |
454 | /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */ |
455 | lck_mtx_unlock(nfs_lock_mutex); |
456 | error = nfs_lockd_send_request(msg, interruptable); |
457 | lck_mtx_lock(nfs_lock_mutex); |
458 | if (error && error != EAGAIN) |
459 | break; |
460 | |
461 | /* |
462 | * Always wait for an answer. Not waiting for unlocks could |
463 | * cause a lock to be left if the unlock request gets dropped. |
464 | */ |
465 | |
466 | /* |
467 | * Retry if it takes too long to get a response. |
468 | * |
469 | * The timeout numbers were picked out of thin air... they start |
470 | * at 4 and double each timeout with a max of 30 seconds. |
471 | * |
472 | * In order to maintain responsiveness, we pass a small timeout |
473 | * to msleep and calculate the timeouts ourselves. This allows |
474 | * us to pick up on mount changes quicker. |
475 | */ |
476 | wait_for_granted: |
477 | error = EWOULDBLOCK; |
478 | slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0; |
479 | ts.tv_sec = 2; |
480 | ts.tv_nsec = 0; |
481 | microuptime(&now); |
482 | endtime = now.tv_sec + timeo; |
483 | while (now.tv_sec < endtime) { |
484 | error = error2 = 0; |
485 | if (!msgreq->lmr_answered) { |
486 | error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd" , &ts); |
487 | slpflag = 0; |
488 | } |
489 | if (msgreq->lmr_answered) { |
490 | /* |
491 | * Note: it's possible to have a lock granted at |
492 | * essentially the same time that we get interrupted. |
493 | * Since the lock may be granted, we can't return an |
494 | * error from this request or we might not unlock the |
495 | * lock that's been granted. |
496 | */ |
497 | nmp = NFSTONMP(np); |
498 | if ((msgreq->lmr_errno == ENOTSUP) && nmp && |
499 | (nmp->nm_state & NFSSTA_LOCKSWORK)) { |
500 | /* |
501 | * We have evidence that locks work, yet lockd |
502 | * returned ENOTSUP. This is probably because |
503 | * it was unable to contact the server's lockd |
504 | * to send it the request. |
505 | * |
506 | * Because we know locks work, we'll consider |
507 | * this failure to be a timeout. |
508 | */ |
509 | error = EWOULDBLOCK; |
510 | } else { |
511 | error = 0; |
512 | } |
513 | break; |
514 | } |
515 | if (error != EWOULDBLOCK) |
516 | break; |
517 | /* check that we still have our mount... */ |
518 | /* ...and that we still support locks */ |
519 | /* ...and that there isn't a recovery pending */ |
520 | nmp = NFSTONMP(np); |
521 | if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { |
522 | error = error2; |
523 | if (type == F_UNLCK) |
524 | printf("nfs3_lockd_request: aborting unlock request, error %d\n" , error); |
525 | break; |
526 | } |
527 | lck_mtx_lock(&nmp->nm_lock); |
528 | if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { |
529 | lck_mtx_unlock(&nmp->nm_lock); |
530 | break; |
531 | } |
532 | if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { |
533 | /* recovery pending... return an error that'll get this operation restarted */ |
534 | error = NFSERR_GRACE; |
535 | lck_mtx_unlock(&nmp->nm_lock); |
536 | break; |
537 | } |
538 | interruptable = NMFLAG(nmp, INTR); |
539 | lck_mtx_unlock(&nmp->nm_lock); |
540 | microuptime(&now); |
541 | } |
542 | if (error) { |
543 | /* check that we still have our mount... */ |
544 | nmp = NFSTONMP(np); |
545 | if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { |
546 | error = error2; |
547 | if (error2 != EINTR) { |
548 | if (type == F_UNLCK) |
549 | printf("nfs3_lockd_request: aborting unlock request, error %d\n" , error); |
550 | break; |
551 | } |
552 | } |
553 | /* ...and that we still support locks */ |
554 | lck_mtx_lock(&nmp->nm_lock); |
555 | if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { |
556 | if (error == EWOULDBLOCK) |
557 | error = ENOTSUP; |
558 | lck_mtx_unlock(&nmp->nm_lock); |
559 | break; |
560 | } |
561 | /* ...and that there isn't a recovery pending */ |
562 | if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { |
563 | /* recovery pending... return to allow recovery to occur */ |
564 | error = NFSERR_DENIED; |
565 | lck_mtx_unlock(&nmp->nm_lock); |
566 | break; |
567 | } |
568 | interruptable = NMFLAG(nmp, INTR); |
569 | if ((error != EWOULDBLOCK) || |
570 | ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) || |
571 | ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) { |
572 | if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) { |
573 | /* give up if this is for recovery and taking too long */ |
574 | error = ETIMEDOUT; |
575 | } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { |
576 | /* recovery pending... return an error that'll get this operation restarted */ |
577 | error = NFSERR_GRACE; |
578 | } |
579 | lck_mtx_unlock(&nmp->nm_lock); |
580 | /* |
581 | * We're going to bail on this request. |
582 | * If we were a blocked lock request, send a cancel. |
583 | */ |
584 | if ((msgreq->lmr_errno == EINPROGRESS) && |
585 | !(msg->lm_flags & LOCKD_MSG_CANCEL)) { |
586 | /* set this request up as a cancel */ |
587 | msg->lm_flags |= LOCKD_MSG_CANCEL; |
588 | nfs_lockdmsg_dequeue(msgreq); |
589 | msg->lm_xid = nfs_lockxid_get(); |
590 | nfs_lockdmsg_enqueue(msgreq); |
591 | msgreq->lmr_saved_errno = error; |
592 | msgreq->lmr_errno = 0; |
593 | msgreq->lmr_answered = 0; |
594 | /* reset timeout */ |
595 | timeo = 2; |
596 | /* send cancel request */ |
597 | continue; |
598 | } |
599 | break; |
600 | } |
601 | |
602 | /* warn if we're not getting any response */ |
603 | microuptime(&now); |
604 | if ((msgreq->lmr_errno != EINPROGRESS) && |
605 | !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) && |
606 | (nmp->nm_tprintf_initial_delay != 0) && |
607 | ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { |
608 | lck_mtx_unlock(&nmp->nm_lock); |
609 | lastmsg = now.tv_sec; |
610 | nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding" , 1); |
611 | wentdown = 1; |
612 | } else |
613 | lck_mtx_unlock(&nmp->nm_lock); |
614 | |
615 | if (msgreq->lmr_errno == EINPROGRESS) { |
616 | /* |
617 | * We've got a blocked lock request that we are |
618 | * going to retry. First, we'll want to try to |
619 | * send a cancel for the previous request. |
620 | * |
621 | * Clear errno so if we don't get a response |
622 | * to the resend we'll call nfs_down(). |
623 | * Also reset timeout because we'll expect a |
624 | * quick response to the cancel/resend (even if |
625 | * it is NLM_BLOCKED). |
626 | */ |
627 | msg->lm_flags |= LOCKD_MSG_CANCEL; |
628 | nfs_lockdmsg_dequeue(msgreq); |
629 | msg->lm_xid = nfs_lockxid_get(); |
630 | nfs_lockdmsg_enqueue(msgreq); |
631 | msgreq->lmr_saved_errno = msgreq->lmr_errno; |
632 | msgreq->lmr_errno = 0; |
633 | msgreq->lmr_answered = 0; |
634 | timeo = 2; |
635 | /* send cancel then resend request */ |
636 | continue; |
637 | } |
638 | |
639 | /* |
640 | * We timed out, so we will resend the request. |
641 | */ |
642 | if (!(flags & R_RECOVER)) |
643 | timeo *= 2; |
644 | if (timeo > 30) |
645 | timeo = 30; |
646 | /* resend request */ |
647 | continue; |
648 | } |
649 | |
650 | /* we got a reponse, so the server's lockd is OK */ |
651 | nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO, |
652 | wentdown ? "lockd alive again" : NULL); |
653 | wentdown = 0; |
654 | |
655 | if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) { |
656 | /* |
657 | * The lock request was denied because the server lockd is |
658 | * still in its grace period. So, we need to try the |
659 | * request again in a little bit. Return the GRACE error so |
660 | * the higher levels can perform the retry. |
661 | */ |
662 | msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE; |
663 | } |
664 | |
665 | if (msgreq->lmr_errno == EINPROGRESS) { |
666 | /* got NLM_BLOCKED response */ |
667 | /* need to wait for NLM_GRANTED */ |
668 | timeo = 30; |
669 | msgreq->lmr_answered = 0; |
670 | goto wait_for_granted; |
671 | } |
672 | |
673 | if ((msg->lm_flags & LOCKD_MSG_CANCEL) && |
674 | (msgreq->lmr_saved_errno == EINPROGRESS)) { |
675 | /* |
676 | * We just got a successful reply to the |
677 | * cancel of the previous blocked lock request. |
678 | * Now, go ahead and return a DENIED error so the |
679 | * higher levels can resend the request. |
680 | */ |
681 | msg->lm_flags &= ~LOCKD_MSG_CANCEL; |
682 | error = NFSERR_DENIED; |
683 | /* Will dequeue msgreq after the following break at the end of this routine */ |
684 | break; |
685 | } |
686 | |
687 | /* |
688 | * If the blocked lock request was cancelled. |
689 | * Restore the error condition from when we |
690 | * originally bailed on the request. |
691 | */ |
692 | if (msg->lm_flags & LOCKD_MSG_CANCEL) { |
693 | msg->lm_flags &= ~LOCKD_MSG_CANCEL; |
694 | error = msgreq->lmr_saved_errno; |
695 | } else { |
696 | error = msgreq->lmr_errno; |
697 | } |
698 | |
699 | nmp = NFSTONMP(np); |
700 | if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) { |
701 | /* |
702 | * We have NO evidence that locks work and lockd |
703 | * returned ENOTSUP. Let's take this as a hint |
704 | * that locks aren't supported and disable them |
705 | * for this mount. |
706 | */ |
707 | nfs_lockdmsg_dequeue(msgreq); |
708 | lck_mtx_unlock(nfs_lock_mutex); |
709 | lck_mtx_lock(&nmp->nm_lock); |
710 | if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) { |
711 | nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED; |
712 | nfs_lockd_mount_unregister(nmp); |
713 | } |
714 | nmp->nm_state &= ~NFSSTA_LOCKTIMEO; |
715 | lck_mtx_unlock(&nmp->nm_lock); |
716 | printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n" , |
717 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
718 | return (error); |
719 | } |
720 | if (!error) { |
721 | /* record that NFS file locking has worked on this mount */ |
722 | if (nmp) { |
723 | lck_mtx_lock(&nmp->nm_lock); |
724 | if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) |
725 | nmp->nm_state |= NFSSTA_LOCKSWORK; |
726 | lck_mtx_unlock(&nmp->nm_lock); |
727 | } |
728 | } |
729 | break; |
730 | } |
731 | |
732 | nfs_lockdmsg_dequeue(msgreq); |
733 | |
734 | lck_mtx_unlock(nfs_lock_mutex); |
735 | |
736 | return (error); |
737 | } |
738 | |
739 | /* |
740 | * Send an NLM LOCK message to the server |
741 | */ |
742 | int |
743 | nfs3_setlock_rpc( |
744 | nfsnode_t np, |
745 | struct nfs_open_file *nofp, |
746 | struct nfs_file_lock *nflp, |
747 | int reclaim, |
748 | int flags, |
749 | thread_t thd, |
750 | kauth_cred_t cred) |
751 | { |
752 | struct nfs_lock_owner *nlop = nflp->nfl_owner; |
753 | struct nfsmount *nmp; |
754 | int error; |
755 | LOCKD_MSG_REQUEST msgreq; |
756 | LOCKD_MSG *msg; |
757 | |
758 | nmp = NFSTONMP(np); |
759 | if (nfs_mount_gone(nmp)) |
760 | return (ENXIO); |
761 | |
762 | if (!nlop->nlo_open_owner) { |
763 | nfs_open_owner_ref(nofp->nof_owner); |
764 | nlop->nlo_open_owner = nofp->nof_owner; |
765 | } |
766 | if ((error = nfs_lock_owner_set_busy(nlop, thd))) |
767 | return (error); |
768 | |
769 | /* set up lock message request structure */ |
770 | bzero(&msgreq, sizeof(msgreq)); |
771 | msg = &msgreq.lmr_msg; |
772 | msg->lm_version = LOCKD_MSG_VERSION; |
773 | if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) |
774 | msg->lm_flags |= LOCKD_MSG_BLOCK; |
775 | if (reclaim) |
776 | msg->lm_flags |= LOCKD_MSG_RECLAIM; |
777 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; |
778 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); |
779 | cru2x(cred, &msg->lm_cred); |
780 | |
781 | msg->lm_fl.l_whence = SEEK_SET; |
782 | msg->lm_fl.l_start = nflp->nfl_start; |
783 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end); |
784 | msg->lm_fl.l_type = nflp->nfl_type; |
785 | msg->lm_fl.l_pid = nlop->nlo_pid; |
786 | |
787 | error = nfs3_lockd_request(np, 0, &msgreq, flags, thd); |
788 | |
789 | nfs_lock_owner_clear_busy(nlop); |
790 | return (error); |
791 | } |
792 | |
793 | /* |
794 | * Send an NLM UNLOCK message to the server |
795 | */ |
796 | int |
797 | nfs3_unlock_rpc( |
798 | nfsnode_t np, |
799 | struct nfs_lock_owner *nlop, |
800 | __unused int type, |
801 | uint64_t start, |
802 | uint64_t end, |
803 | int flags, |
804 | thread_t thd, |
805 | kauth_cred_t cred) |
806 | { |
807 | struct nfsmount *nmp; |
808 | LOCKD_MSG_REQUEST msgreq; |
809 | LOCKD_MSG *msg; |
810 | |
811 | nmp = NFSTONMP(np); |
812 | if (!nmp) |
813 | return (ENXIO); |
814 | |
815 | /* set up lock message request structure */ |
816 | bzero(&msgreq, sizeof(msgreq)); |
817 | msg = &msgreq.lmr_msg; |
818 | msg->lm_version = LOCKD_MSG_VERSION; |
819 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; |
820 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); |
821 | cru2x(cred, &msg->lm_cred); |
822 | |
823 | msg->lm_fl.l_whence = SEEK_SET; |
824 | msg->lm_fl.l_start = start; |
825 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); |
826 | msg->lm_fl.l_type = F_UNLCK; |
827 | msg->lm_fl.l_pid = nlop->nlo_pid; |
828 | |
829 | return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd)); |
830 | } |
831 | |
832 | /* |
833 | * Send an NLM LOCK TEST message to the server |
834 | */ |
835 | int |
836 | nfs3_getlock_rpc( |
837 | nfsnode_t np, |
838 | struct nfs_lock_owner *nlop, |
839 | struct flock *fl, |
840 | uint64_t start, |
841 | uint64_t end, |
842 | vfs_context_t ctx) |
843 | { |
844 | struct nfsmount *nmp; |
845 | int error; |
846 | LOCKD_MSG_REQUEST msgreq; |
847 | LOCKD_MSG *msg; |
848 | |
849 | nmp = NFSTONMP(np); |
850 | if (nfs_mount_gone(nmp)) |
851 | return (ENXIO); |
852 | |
853 | /* set up lock message request structure */ |
854 | bzero(&msgreq, sizeof(msgreq)); |
855 | msg = &msgreq.lmr_msg; |
856 | msg->lm_version = LOCKD_MSG_VERSION; |
857 | msg->lm_flags |= LOCKD_MSG_TEST; |
858 | msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; |
859 | bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); |
860 | cru2x(vfs_context_ucred(ctx), &msg->lm_cred); |
861 | |
862 | msg->lm_fl.l_whence = SEEK_SET; |
863 | msg->lm_fl.l_start = start; |
864 | msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); |
865 | msg->lm_fl.l_type = fl->l_type; |
866 | msg->lm_fl.l_pid = nlop->nlo_pid; |
867 | |
868 | error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx)); |
869 | |
870 | if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) { |
871 | if (msg->lm_fl.l_type != F_UNLCK) { |
872 | fl->l_type = msg->lm_fl.l_type; |
873 | fl->l_pid = msg->lm_fl.l_pid; |
874 | fl->l_start = msg->lm_fl.l_start; |
875 | fl->l_len = msg->lm_fl.l_len; |
876 | fl->l_whence = SEEK_SET; |
877 | } else |
878 | fl->l_type = F_UNLCK; |
879 | } |
880 | |
881 | return (error); |
882 | } |
883 | |
884 | /* |
885 | * nfslockdans -- |
886 | * NFS advisory byte-level locks answer from the lock daemon. |
887 | */ |
888 | int |
889 | nfslockdans(proc_t p, struct lockd_ans *ansp) |
890 | { |
891 | LOCKD_MSG_REQUEST *msgreq; |
892 | int error; |
893 | |
894 | /* Let root make this call. */ |
895 | error = proc_suser(p); |
896 | if (error) |
897 | return (error); |
898 | |
899 | /* the version should match, or we're out of sync */ |
900 | if (ansp->la_version != LOCKD_ANS_VERSION) |
901 | return (EINVAL); |
902 | |
903 | lck_mtx_lock(nfs_lock_mutex); |
904 | |
905 | /* try to find the lockd message by transaction id (cookie) */ |
906 | msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid); |
907 | if (ansp->la_flags & LOCKD_ANS_GRANTED) { |
908 | /* |
909 | * We can't depend on the granted message having our cookie, |
910 | * so we check the answer against the lockd message found. |
911 | * If no message was found or it doesn't match the answer, |
912 | * we look for the lockd message by the answer's lock info. |
913 | */ |
914 | if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) |
915 | msgreq = nfs_lockdmsg_find_by_answer(ansp); |
916 | /* |
917 | * We need to make sure this request isn't being cancelled |
918 | * If it is, we don't want to accept the granted message. |
919 | */ |
920 | if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) |
921 | msgreq = NULL; |
922 | } |
923 | if (!msgreq) { |
924 | lck_mtx_unlock(nfs_lock_mutex); |
925 | return (EPIPE); |
926 | } |
927 | |
928 | msgreq->lmr_errno = ansp->la_errno; |
929 | if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) { |
930 | if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) { |
931 | if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) |
932 | msgreq->lmr_msg.lm_fl.l_type = F_WRLCK; |
933 | else |
934 | msgreq->lmr_msg.lm_fl.l_type = F_RDLCK; |
935 | msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid; |
936 | msgreq->lmr_msg.lm_fl.l_start = ansp->la_start; |
937 | msgreq->lmr_msg.lm_fl.l_len = ansp->la_len; |
938 | } else { |
939 | msgreq->lmr_msg.lm_fl.l_type = F_UNLCK; |
940 | } |
941 | } |
942 | if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) |
943 | msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE; |
944 | |
945 | msgreq->lmr_answered = 1; |
946 | lck_mtx_unlock(nfs_lock_mutex); |
947 | wakeup(msgreq); |
948 | |
949 | return (0); |
950 | } |
951 | |
952 | /* |
953 | * nfslockdnotify -- |
954 | * NFS host restart notification from the lock daemon. |
955 | * |
956 | * Used to initiate reclaiming of held locks when a server we |
957 | * have mounted reboots. |
958 | */ |
959 | int |
960 | nfslockdnotify(proc_t p, user_addr_t argp) |
961 | { |
962 | int error, i, headsize; |
963 | struct lockd_notify ln; |
964 | struct nfsmount *nmp; |
965 | struct sockaddr *saddr; |
966 | |
967 | /* Let root make this call. */ |
968 | error = proc_suser(p); |
969 | if (error) |
970 | return (error); |
971 | |
972 | headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version; |
973 | error = copyin(argp, &ln, headsize); |
974 | if (error) |
975 | return (error); |
976 | if (ln.ln_version != LOCKD_NOTIFY_VERSION) |
977 | return (EINVAL); |
978 | if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) |
979 | return (EINVAL); |
980 | argp += headsize; |
981 | saddr = (struct sockaddr *)&ln.ln_addr[0]; |
982 | |
983 | lck_mtx_lock(nfs_lock_mutex); |
984 | |
985 | for (i=0; i < ln.ln_addrcount; i++) { |
986 | error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0])); |
987 | if (error) |
988 | break; |
989 | argp += sizeof(ln.ln_addr[0]); |
990 | /* scan lockd mount list for match to this address */ |
991 | TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) { |
992 | /* check if address matches this mount's server address */ |
993 | if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) |
994 | continue; |
995 | /* We have a match! Mark it as needing recovery. */ |
996 | lck_mtx_lock(&nmp->nm_lock); |
997 | nfs_need_recover(nmp, 0); |
998 | lck_mtx_unlock(&nmp->nm_lock); |
999 | } |
1000 | } |
1001 | |
1002 | lck_mtx_unlock(nfs_lock_mutex); |
1003 | |
1004 | return (error); |
1005 | } |
1006 | |
1007 | |