1 | /* |
2 | * Copyright (c) 2000-2015 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ |
29 | /* |
30 | * Copyright (c) 1989, 1991, 1993, 1995 |
31 | * The Regents of the University of California. All rights reserved. |
32 | * |
33 | * This code is derived from software contributed to Berkeley by |
34 | * Rick Macklem at The University of Guelph. |
35 | * |
36 | * Redistribution and use in source and binary forms, with or without |
37 | * modification, are permitted provided that the following conditions |
38 | * are met: |
39 | * 1. Redistributions of source code must retain the above copyright |
40 | * notice, this list of conditions and the following disclaimer. |
41 | * 2. Redistributions in binary form must reproduce the above copyright |
42 | * notice, this list of conditions and the following disclaimer in the |
43 | * documentation and/or other materials provided with the distribution. |
44 | * 3. All advertising materials mentioning features or use of this software |
45 | * must display the following acknowledgement: |
46 | * This product includes software developed by the University of |
47 | * California, Berkeley and its contributors. |
48 | * 4. Neither the name of the University nor the names of its contributors |
49 | * may be used to endorse or promote products derived from this software |
50 | * without specific prior written permission. |
51 | * |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
62 | * SUCH DAMAGE. |
63 | * |
64 | * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 |
65 | * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $ |
66 | */ |
67 | |
68 | /* |
69 | * Socket operations for use by nfs |
70 | */ |
71 | |
72 | #include <sys/param.h> |
73 | #include <sys/systm.h> |
74 | #include <sys/proc.h> |
75 | #include <sys/signalvar.h> |
76 | #include <sys/kauth.h> |
77 | #include <sys/mount_internal.h> |
78 | #include <sys/kernel.h> |
79 | #include <sys/kpi_mbuf.h> |
80 | #include <sys/malloc.h> |
81 | #include <sys/vnode.h> |
82 | #include <sys/domain.h> |
83 | #include <sys/protosw.h> |
84 | #include <sys/socket.h> |
85 | #include <sys/syslog.h> |
86 | #include <sys/tprintf.h> |
87 | #include <libkern/OSAtomic.h> |
88 | |
89 | #include <sys/time.h> |
90 | #include <kern/clock.h> |
91 | #include <kern/task.h> |
92 | #include <kern/thread.h> |
93 | #include <kern/thread_call.h> |
94 | #include <sys/user.h> |
95 | #include <sys/acct.h> |
96 | |
97 | #include <netinet/in.h> |
98 | #include <netinet/tcp.h> |
99 | |
100 | #include <nfs/rpcv2.h> |
101 | #include <nfs/krpc.h> |
102 | #include <nfs/nfsproto.h> |
103 | #include <nfs/nfs.h> |
104 | #include <nfs/xdr_subs.h> |
105 | #include <nfs/nfsm_subs.h> |
106 | #include <nfs/nfs_gss.h> |
107 | #include <nfs/nfsmount.h> |
108 | #include <nfs/nfsnode.h> |
109 | |
110 | #define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__) |
111 | |
112 | /* XXX */ |
113 | boolean_t current_thread_aborted(void); |
114 | kern_return_t thread_terminate(thread_t); |
115 | |
116 | |
117 | #if NFSSERVER |
118 | int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */ |
119 | |
120 | int nfsrv_getstream(struct nfsrv_sock *,int); |
121 | int nfsrv_getreq(struct nfsrv_descript *); |
122 | extern int nfsv3_procid[NFS_NPROCS]; |
123 | #endif /* NFSSERVER */ |
124 | |
125 | /* |
126 | * compare two sockaddr structures |
127 | */ |
128 | int |
129 | nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2) |
130 | { |
131 | if (!sa1) |
132 | return (-1); |
133 | if (!sa2) |
134 | return (1); |
135 | if (sa1->sa_family != sa2->sa_family) |
136 | return ((sa1->sa_family < sa2->sa_family) ? -1 : 1); |
137 | if (sa1->sa_len != sa2->sa_len) |
138 | return ((sa1->sa_len < sa2->sa_len) ? -1 : 1); |
139 | if (sa1->sa_family == AF_INET) |
140 | return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr, |
141 | &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr))); |
142 | if (sa1->sa_family == AF_INET6) |
143 | return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr, |
144 | &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr))); |
145 | return (-1); |
146 | } |
147 | |
148 | #if NFSCLIENT |
149 | |
150 | int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *); |
151 | int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int); |
152 | int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *); |
153 | void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *); |
154 | void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *); |
155 | int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *); |
156 | int nfs_reconnect(struct nfsmount *); |
157 | int nfs_connect_setup(struct nfsmount *); |
158 | void nfs_mount_sock_thread(void *, wait_result_t); |
159 | void nfs_udp_rcv(socket_t, void*, int); |
160 | void nfs_tcp_rcv(socket_t, void*, int); |
161 | void nfs_sock_poke(struct nfsmount *); |
162 | void nfs_request_match_reply(struct nfsmount *, mbuf_t); |
163 | void nfs_reqdequeue(struct nfsreq *); |
164 | void nfs_reqbusy(struct nfsreq *); |
165 | struct nfsreq *nfs_reqnext(struct nfsreq *); |
166 | int nfs_wait_reply(struct nfsreq *); |
167 | void nfs_softterm(struct nfsreq *); |
168 | int nfs_can_squish(struct nfsmount *); |
169 | int nfs_is_squishy(struct nfsmount *); |
170 | int nfs_is_dead(int, struct nfsmount *); |
171 | |
172 | /* |
173 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. |
174 | * Use the mean and mean deviation of rtt for the appropriate type of rpc |
175 | * for the frequent rpcs and a default for the others. |
176 | * The justification for doing "other" this way is that these rpcs |
177 | * happen so infrequently that timer est. would probably be stale. |
178 | * Also, since many of these rpcs are |
179 | * non-idempotent, a conservative timeout is desired. |
180 | * getattr, lookup - A+2D |
181 | * read, write - A+4D |
182 | * other - nm_timeo |
183 | */ |
184 | #define NFS_RTO(n, t) \ |
185 | ((t) == 0 ? (n)->nm_timeo : \ |
186 | ((t) < 3 ? \ |
187 | (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ |
188 | ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) |
189 | #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] |
190 | #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] |
191 | |
192 | /* |
193 | * Defines which timer to use for the procnum. |
194 | * 0 - default |
195 | * 1 - getattr |
196 | * 2 - lookup |
197 | * 3 - read |
198 | * 4 - write |
199 | */ |
200 | static int proct[NFS_NPROCS] = { |
201 | 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0 |
202 | }; |
203 | |
204 | /* |
205 | * There is a congestion window for outstanding rpcs maintained per mount |
206 | * point. The cwnd size is adjusted in roughly the way that: |
207 | * Van Jacobson, Congestion avoidance and Control, In "Proceedings of |
208 | * SIGCOMM '88". ACM, August 1988. |
209 | * describes for TCP. The cwnd size is chopped in half on a retransmit timeout |
210 | * and incremented by 1/cwnd when each rpc reply is received and a full cwnd |
211 | * of rpcs is in progress. |
212 | * (The sent count and cwnd are scaled for integer arith.) |
213 | * Variants of "slow start" were tried and were found to be too much of a |
214 | * performance hit (ave. rtt 3 times larger), |
215 | * I suspect due to the large rtt that nfs rpcs have. |
216 | */ |
217 | #define NFS_CWNDSCALE 256 |
218 | #define NFS_MAXCWND (NFS_CWNDSCALE * 32) |
219 | static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; |
220 | |
221 | /* |
222 | * Increment location index to next address/server/location. |
223 | */ |
224 | void |
225 | nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip) |
226 | { |
227 | uint8_t loc = nlip->nli_loc; |
228 | uint8_t serv = nlip->nli_serv; |
229 | uint8_t addr = nlip->nli_addr; |
230 | |
231 | /* move to next address */ |
232 | addr++; |
233 | if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) { |
234 | /* no more addresses on current server, go to first address of next server */ |
235 | next_server: |
236 | addr = 0; |
237 | serv++; |
238 | if (serv >= nlp->nl_locations[loc]->nl_servcount) { |
239 | /* no more servers on current location, go to first server of next location */ |
240 | serv = 0; |
241 | loc++; |
242 | if (loc >= nlp->nl_numlocs) |
243 | loc = 0; /* after last location, wrap back around to first location */ |
244 | } |
245 | } |
246 | /* |
247 | * It's possible for this next server to not have any addresses. |
248 | * Check for that here and go to the next server. |
249 | * But bail out if we've managed to come back around to the original |
250 | * location that was passed in. (That would mean no servers had any |
251 | * addresses. And we don't want to spin here forever.) |
252 | */ |
253 | if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr)) |
254 | return; |
255 | if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) |
256 | goto next_server; |
257 | |
258 | nlip->nli_loc = loc; |
259 | nlip->nli_serv = serv; |
260 | nlip->nli_addr = addr; |
261 | } |
262 | |
263 | /* |
264 | * Compare two location indices. |
265 | */ |
266 | int |
267 | nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2) |
268 | { |
269 | if (nlip1->nli_loc != nlip2->nli_loc) |
270 | return (nlip1->nli_loc - nlip2->nli_loc); |
271 | if (nlip1->nli_serv != nlip2->nli_serv) |
272 | return (nlip1->nli_serv - nlip2->nli_serv); |
273 | return (nlip1->nli_addr - nlip2->nli_addr); |
274 | } |
275 | |
276 | /* |
277 | * Get the mntfromname (or path portion only) for a given location. |
278 | */ |
279 | void |
280 | nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly) |
281 | { |
282 | struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc]; |
283 | char *p; |
284 | int cnt, i; |
285 | |
286 | p = s; |
287 | if (!pathonly) { |
288 | cnt = snprintf(p, size, "%s:" , fsl->nl_servers[idx.nli_serv]->ns_name); |
289 | p += cnt; |
290 | size -= cnt; |
291 | } |
292 | if (fsl->nl_path.np_compcount == 0) { |
293 | /* mounting root export on server */ |
294 | if (size > 0) { |
295 | *p++ = '/'; |
296 | *p++ = '\0'; |
297 | } |
298 | return; |
299 | } |
300 | /* append each server path component */ |
301 | for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) { |
302 | cnt = snprintf(p, size, "/%s" , fsl->nl_path.np_components[i]); |
303 | p += cnt; |
304 | size -= cnt; |
305 | } |
306 | } |
307 | |
308 | /* |
309 | * NFS client connect socket upcall. |
310 | * (Used only during socket connect/search.) |
311 | */ |
312 | void |
313 | nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) |
314 | { |
315 | struct nfs_socket *nso = arg; |
316 | size_t rcvlen; |
317 | mbuf_t m; |
318 | int error = 0, recv = 1; |
319 | |
320 | if (nso->nso_flags & NSO_CONNECTING) { |
321 | NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting\n" , nso); |
322 | wakeup(nso->nso_wake); |
323 | return; |
324 | } |
325 | |
326 | lck_mtx_lock(&nso->nso_lock); |
327 | if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) { |
328 | NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n" , nso); |
329 | lck_mtx_unlock(&nso->nso_lock); |
330 | return; |
331 | } |
332 | NFS_SOCK_DBG("nfs connect - socket %p upcall\n" , nso); |
333 | nso->nso_flags |= NSO_UPCALL; |
334 | |
335 | /* loop while we make error-free progress */ |
336 | while (!error && recv) { |
337 | /* make sure we're still interested in this socket */ |
338 | if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD)) |
339 | break; |
340 | lck_mtx_unlock(&nso->nso_lock); |
341 | m = NULL; |
342 | if (nso->nso_sotype == SOCK_STREAM) { |
343 | error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m); |
344 | } else { |
345 | rcvlen = 1000000; |
346 | error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); |
347 | recv = m ? 1 : 0; |
348 | } |
349 | lck_mtx_lock(&nso->nso_lock); |
350 | if (m) { |
351 | /* match response with request */ |
352 | struct nfsm_chain nmrep; |
353 | uint32_t reply = 0, rxid = 0, verf_type, verf_len; |
354 | uint32_t reply_status, rejected_status, accepted_status; |
355 | |
356 | nfsm_chain_dissect_init(error, &nmrep, m); |
357 | nfsm_chain_get_32(error, &nmrep, rxid); |
358 | nfsm_chain_get_32(error, &nmrep, reply); |
359 | if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid))) |
360 | error = EBADRPC; |
361 | nfsm_chain_get_32(error, &nmrep, reply_status); |
362 | if (!error && (reply_status == RPC_MSGDENIED)) { |
363 | nfsm_chain_get_32(error, &nmrep, rejected_status); |
364 | if (!error) |
365 | error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; |
366 | } |
367 | nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */ |
368 | nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */ |
369 | nfsmout_if(error); |
370 | if (verf_len) |
371 | nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); |
372 | nfsm_chain_get_32(error, &nmrep, accepted_status); |
373 | nfsmout_if(error); |
374 | if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) { |
375 | uint32_t minvers, maxvers; |
376 | nfsm_chain_get_32(error, &nmrep, minvers); |
377 | nfsm_chain_get_32(error, &nmrep, maxvers); |
378 | nfsmout_if(error); |
379 | if (nso->nso_protocol == PMAPPROG) { |
380 | if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS)) |
381 | error = EPROGMISMATCH; |
382 | else if ((nso->nso_saddr->sa_family == AF_INET) && |
383 | (PMAPVERS >= minvers) && (PMAPVERS <= maxvers)) |
384 | nso->nso_version = PMAPVERS; |
385 | else if (nso->nso_saddr->sa_family == AF_INET6) { |
386 | if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers)) |
387 | nso->nso_version = RPCBVERS4; |
388 | else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers)) |
389 | nso->nso_version = RPCBVERS3; |
390 | } |
391 | } else if (nso->nso_protocol == NFS_PROG) { |
392 | int vers; |
393 | |
394 | /* |
395 | * N.B. Both portmapper and rpcbind V3 are happy to return |
396 | * addresses for other versions than the one you ask (getport or |
397 | * getaddr) and thus we may have fallen to this code path. So if |
398 | * we get a version that we support, use highest supported |
399 | * version. This assumes that the server supports all versions |
400 | * between minvers and maxvers. Note for IPv6 we will try and |
401 | * use rpcbind V4 which has getversaddr and we should not get |
402 | * here if that was successful. |
403 | */ |
404 | for (vers = nso->nso_nfs_max_vers; vers >= (int)nso->nso_nfs_min_vers; vers--) { |
405 | if (vers >= (int)minvers && vers <= (int)maxvers) |
406 | break; |
407 | } |
408 | nso->nso_version = (vers < (int)nso->nso_nfs_min_vers) ? 0 : vers; |
409 | } |
410 | if (!error && nso->nso_version) |
411 | accepted_status = RPC_SUCCESS; |
412 | } |
413 | if (!error) { |
414 | switch (accepted_status) { |
415 | case RPC_SUCCESS: |
416 | error = 0; |
417 | break; |
418 | case RPC_PROGUNAVAIL: |
419 | error = EPROGUNAVAIL; |
420 | break; |
421 | case RPC_PROGMISMATCH: |
422 | error = EPROGMISMATCH; |
423 | break; |
424 | case RPC_PROCUNAVAIL: |
425 | error = EPROCUNAVAIL; |
426 | break; |
427 | case RPC_GARBAGE: |
428 | error = EBADRPC; |
429 | break; |
430 | case RPC_SYSTEM_ERR: |
431 | default: |
432 | error = EIO; |
433 | break; |
434 | } |
435 | } |
436 | nfsmout: |
437 | nso->nso_flags &= ~NSO_PINGING; |
438 | if (error) { |
439 | nso->nso_error = error; |
440 | nso->nso_flags |= NSO_DEAD; |
441 | } else { |
442 | nso->nso_flags |= NSO_VERIFIED; |
443 | } |
444 | mbuf_freem(m); |
445 | /* wake up search thread */ |
446 | wakeup(nso->nso_wake); |
447 | break; |
448 | } |
449 | } |
450 | |
451 | nso->nso_flags &= ~NSO_UPCALL; |
452 | if ((error != EWOULDBLOCK) && (error || !recv)) { |
453 | /* problems with the socket... */ |
454 | nso->nso_error = error ? error : EPIPE; |
455 | nso->nso_flags |= NSO_DEAD; |
456 | wakeup(nso->nso_wake); |
457 | } |
458 | if (nso->nso_flags & NSO_DISCONNECTING) |
459 | wakeup(&nso->nso_flags); |
460 | lck_mtx_unlock(&nso->nso_lock); |
461 | } |
462 | |
463 | /* |
464 | * Create/initialize an nfs_socket structure. |
465 | */ |
466 | int |
467 | nfs_socket_create( |
468 | struct nfsmount *nmp, |
469 | struct sockaddr *sa, |
470 | int sotype, |
471 | in_port_t port, |
472 | uint32_t protocol, |
473 | uint32_t vers, |
474 | int resvport, |
475 | struct nfs_socket **nsop) |
476 | { |
477 | struct nfs_socket *nso; |
478 | struct timeval now; |
479 | int error; |
480 | #ifdef NFS_SOCKET_DEBUGGING |
481 | char naddr[MAX_IPv6_STR_LEN]; |
482 | void *sinaddr; |
483 | |
484 | if (sa->sa_family == AF_INET) |
485 | sinaddr = &((struct sockaddr_in*)sa)->sin_addr; |
486 | else |
487 | sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr; |
488 | if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) |
489 | strlcpy(naddr, "<unknown>" , sizeof(naddr)); |
490 | #else |
491 | char naddr[1] = { 0 }; |
492 | #endif |
493 | |
494 | *nsop = NULL; |
495 | |
496 | /* Create the socket. */ |
497 | MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO); |
498 | if (nso) |
499 | MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO); |
500 | if (!nso || !nso->nso_saddr) { |
501 | if (nso) |
502 | FREE(nso, M_TEMP); |
503 | return (ENOMEM); |
504 | } |
505 | lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL); |
506 | nso->nso_sotype = sotype; |
507 | if (nso->nso_sotype == SOCK_STREAM) |
508 | nfs_rpc_record_state_init(&nso->nso_rrs); |
509 | microuptime(&now); |
510 | nso->nso_timestamp = now.tv_sec; |
511 | bcopy(sa, nso->nso_saddr, sa->sa_len); |
512 | if (sa->sa_family == AF_INET) |
513 | ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); |
514 | else if (sa->sa_family == AF_INET6) |
515 | ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); |
516 | nso->nso_protocol = protocol; |
517 | nso->nso_version = vers; |
518 | nso->nso_nfs_min_vers = PVER2MAJOR(nmp->nm_min_vers); |
519 | nso->nso_nfs_max_vers = PVER2MAJOR(nmp->nm_max_vers); |
520 | |
521 | error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so); |
522 | |
523 | /* Some servers require that the client port be a reserved port number. */ |
524 | if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) { |
525 | struct sockaddr_storage ss; |
526 | int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; |
527 | int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; |
528 | int portrange = IP_PORTRANGE_LOW; |
529 | |
530 | error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange)); |
531 | if (!error) { /* bind now to check for failure */ |
532 | ss.ss_len = sa->sa_len; |
533 | ss.ss_family = sa->sa_family; |
534 | if (ss.ss_family == AF_INET) { |
535 | ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; |
536 | ((struct sockaddr_in*)&ss)->sin_port = htons(0); |
537 | } else if (ss.ss_family == AF_INET6) { |
538 | ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; |
539 | ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); |
540 | } else { |
541 | error = EINVAL; |
542 | } |
543 | if (!error) |
544 | error = sock_bind(nso->nso_so, (struct sockaddr*)&ss); |
545 | } |
546 | } |
547 | |
548 | if (error) { |
549 | NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n" , |
550 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype, |
551 | resvport ? "r" : "" , port, protocol, vers); |
552 | nfs_socket_destroy(nso); |
553 | } else { |
554 | NFS_SOCK_DBG("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n" , |
555 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr, |
556 | sotype, resvport ? "r" : "" , port, protocol, vers); |
557 | *nsop = nso; |
558 | } |
559 | return (error); |
560 | } |
561 | |
562 | /* |
563 | * Destroy an nfs_socket structure. |
564 | */ |
565 | void |
566 | nfs_socket_destroy(struct nfs_socket *nso) |
567 | { |
568 | struct timespec ts = { 4, 0 }; |
569 | |
570 | lck_mtx_lock(&nso->nso_lock); |
571 | nso->nso_flags |= NSO_DISCONNECTING; |
572 | if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */ |
573 | msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall" , &ts); |
574 | lck_mtx_unlock(&nso->nso_lock); |
575 | sock_shutdown(nso->nso_so, SHUT_RDWR); |
576 | sock_close(nso->nso_so); |
577 | if (nso->nso_sotype == SOCK_STREAM) |
578 | nfs_rpc_record_state_cleanup(&nso->nso_rrs); |
579 | lck_mtx_destroy(&nso->nso_lock, nfs_request_grp); |
580 | if (nso->nso_saddr) |
581 | FREE(nso->nso_saddr, M_SONAME); |
582 | if (nso->nso_saddr2) |
583 | FREE(nso->nso_saddr2, M_SONAME); |
584 | NFS_SOCK_DBG("nfs connect - socket %p destroyed\n" , nso); |
585 | FREE(nso, M_TEMP); |
586 | } |
587 | |
588 | /* |
589 | * Set common socket options on an nfs_socket. |
590 | */ |
591 | void |
592 | nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso) |
593 | { |
594 | /* |
595 | * Set socket send/receive timeouts |
596 | * - Receive timeout shouldn't matter because most receives are performed |
597 | * in the socket upcall non-blocking. |
598 | * - Send timeout should allow us to react to a blocked socket. |
599 | * Soft mounts will want to abort sooner. |
600 | */ |
601 | struct timeval timeo; |
602 | int on = 1, proto; |
603 | |
604 | timeo.tv_usec = 0; |
605 | timeo.tv_sec = (NMFLAG(nmp, SOFT) || nfs_can_squish(nmp)) ? 5 : 60; |
606 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); |
607 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); |
608 | if (nso->nso_sotype == SOCK_STREAM) { |
609 | /* Assume that SOCK_STREAM always requires a connection */ |
610 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); |
611 | /* set nodelay for TCP */ |
612 | sock_gettype(nso->nso_so, NULL, NULL, &proto); |
613 | if (proto == IPPROTO_TCP) |
614 | sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
615 | } |
616 | if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ |
617 | int reserve = NFS_UDPSOCKBUF; |
618 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); |
619 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); |
620 | } |
621 | /* set SO_NOADDRERR to detect network changes ASAP */ |
622 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); |
623 | /* just playin' it safe with upcalls */ |
624 | sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); |
625 | /* socket should be interruptible if the mount is */ |
626 | if (!NMFLAG(nmp, INTR)) |
627 | sock_nointerrupt(nso->nso_so, 1); |
628 | } |
629 | |
630 | /* |
631 | * Release resources held in an nfs_socket_search. |
632 | */ |
633 | void |
634 | nfs_socket_search_cleanup(struct nfs_socket_search *nss) |
635 | { |
636 | struct nfs_socket *nso, *nsonext; |
637 | |
638 | TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { |
639 | TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); |
640 | nss->nss_sockcnt--; |
641 | nfs_socket_destroy(nso); |
642 | } |
643 | if (nss->nss_sock) { |
644 | nfs_socket_destroy(nss->nss_sock); |
645 | nss->nss_sock = NULL; |
646 | } |
647 | } |
648 | |
649 | /* |
650 | * Prefer returning certain errors over others. |
651 | * This function returns a ranking of the given error. |
652 | */ |
653 | int |
654 | nfs_connect_error_class(int error) |
655 | { |
656 | switch (error) { |
657 | case 0: |
658 | return (0); |
659 | case ETIMEDOUT: |
660 | case EAGAIN: |
661 | return (1); |
662 | case EPIPE: |
663 | case EADDRNOTAVAIL: |
664 | case ENETDOWN: |
665 | case ENETUNREACH: |
666 | case ENETRESET: |
667 | case ECONNABORTED: |
668 | case ECONNRESET: |
669 | case EISCONN: |
670 | case ENOTCONN: |
671 | case ESHUTDOWN: |
672 | case ECONNREFUSED: |
673 | case EHOSTDOWN: |
674 | case EHOSTUNREACH: |
675 | return (2); |
676 | case ERPCMISMATCH: |
677 | case EPROCUNAVAIL: |
678 | case EPROGMISMATCH: |
679 | case EPROGUNAVAIL: |
680 | return (3); |
681 | case EBADRPC: |
682 | return (4); |
683 | default: |
684 | return (5); |
685 | } |
686 | } |
687 | |
688 | /* |
689 | * Make sure a socket search returns the best error. |
690 | */ |
691 | void |
692 | nfs_socket_search_update_error(struct nfs_socket_search *nss, int error) |
693 | { |
694 | if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error)) |
695 | nss->nss_error = error; |
696 | } |
697 | |
698 | /* nfs_connect_search_new_socket: |
699 | * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified |
700 | * by nss. |
701 | * |
702 | * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but |
703 | * could not be used or if a socket timed out. |
704 | */ |
705 | int |
706 | nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) |
707 | { |
708 | struct nfs_fs_location *fsl; |
709 | struct nfs_fs_server *fss; |
710 | struct sockaddr_storage ss; |
711 | struct nfs_socket *nso; |
712 | char *addrstr; |
713 | int error = 0; |
714 | |
715 | |
716 | NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n" , |
717 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt); |
718 | |
719 | /* |
720 | * while there are addresses and: |
721 | * we have no sockets or |
722 | * the last address failed and did not produce a socket (nss_last < 0) or |
723 | * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4) |
724 | * then attempt to create a socket with the current address. |
725 | */ |
726 | while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || |
727 | ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) { |
728 | if (nmp->nm_sockflags & NMSOCK_UNMOUNT) |
729 | return (EINTR); |
730 | /* Can we convert the address to a sockaddr? */ |
731 | fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc]; |
732 | fss = fsl->nl_servers[nss->nss_nextloc.nli_serv]; |
733 | addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr]; |
734 | if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) { |
735 | nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); |
736 | nss->nss_addrcnt -= 1; |
737 | nss->nss_last = -2; |
738 | continue; |
739 | } |
740 | /* Check that socket family is acceptable. */ |
741 | if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) { |
742 | nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); |
743 | nss->nss_addrcnt -= 1; |
744 | nss->nss_last = -2; |
745 | continue; |
746 | } |
747 | |
748 | /* Create the socket. */ |
749 | error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype, |
750 | nss->nss_port, nss->nss_protocol, nss->nss_version, |
751 | ((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso); |
752 | if (error) |
753 | return (error); |
754 | |
755 | nso->nso_location = nss->nss_nextloc; |
756 | nso->nso_wake = nss; |
757 | error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso); |
758 | if (error) { |
759 | lck_mtx_lock(&nso->nso_lock); |
760 | nso->nso_error = error; |
761 | nso->nso_flags |= NSO_DEAD; |
762 | lck_mtx_unlock(&nso->nso_lock); |
763 | } |
764 | |
765 | TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link); |
766 | nss->nss_sockcnt++; |
767 | nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); |
768 | nss->nss_addrcnt -= 1; |
769 | |
770 | nss->nss_last = now->tv_sec; |
771 | } |
772 | |
773 | if (nss->nss_addrcnt == 0 && nss->nss_last < 0) |
774 | nss->nss_last = now->tv_sec; |
775 | |
776 | return (error); |
777 | } |
778 | |
779 | /* |
780 | * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp. |
781 | * If successful set the socket options for the socket as require from the mount. |
782 | * |
783 | * Assumes: nso->nso_lock is held on entry and return. |
784 | */ |
785 | int |
786 | nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose) |
787 | { |
788 | int error; |
789 | |
790 | if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { |
791 | /* no connection needed, just say it's already connected */ |
792 | NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n" , |
793 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
794 | nso->nso_flags |= NSO_CONNECTED; |
795 | nfs_socket_options(nmp, nso); |
796 | return (1); /* Socket is connected and setup */ |
797 | } else if (!(nso->nso_flags & NSO_CONNECTING)) { |
798 | /* initiate the connection */ |
799 | nso->nso_flags |= NSO_CONNECTING; |
800 | lck_mtx_unlock(&nso->nso_lock); |
801 | NFS_SOCK_DBG("nfs connect %s connecting socket %p\n" , |
802 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
803 | error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); |
804 | lck_mtx_lock(&nso->nso_lock); |
805 | if (error && (error != EINPROGRESS)) { |
806 | nso->nso_error = error; |
807 | nso->nso_flags |= NSO_DEAD; |
808 | return (0); |
809 | } |
810 | } |
811 | if (nso->nso_flags & NSO_CONNECTING) { |
812 | /* check the connection */ |
813 | if (sock_isconnected(nso->nso_so)) { |
814 | NFS_SOCK_DBG("nfs connect %s socket %p is connected\n" , |
815 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
816 | nso->nso_flags &= ~NSO_CONNECTING; |
817 | nso->nso_flags |= NSO_CONNECTED; |
818 | nfs_socket_options(nmp, nso); |
819 | return (1); /* Socket is connected and setup */ |
820 | } else { |
821 | int optlen = sizeof(error); |
822 | error = 0; |
823 | sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); |
824 | if (error) { /* we got an error on the socket */ |
825 | NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n" , |
826 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); |
827 | if (verbose) |
828 | printf("nfs connect socket error %d for %s\n" , |
829 | error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
830 | nso->nso_error = error; |
831 | nso->nso_flags |= NSO_DEAD; |
832 | return (0); |
833 | } |
834 | } |
835 | } |
836 | |
837 | return (0); /* Waiting to be connected */ |
838 | } |
839 | |
840 | /* |
841 | * nfs_connect_search_ping: Send a null proc on the nso socket. |
842 | */ |
843 | int |
844 | nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now) |
845 | { |
846 | /* initiate a NULL RPC request */ |
847 | uint64_t xid = nso->nso_pingxid; |
848 | mbuf_t m, mreq = NULL; |
849 | struct msghdr msg; |
850 | size_t reqlen, sentlen; |
851 | uint32_t vers = nso->nso_version; |
852 | int error; |
853 | |
854 | if (!vers) { |
855 | if (nso->nso_protocol == PMAPPROG) |
856 | vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; |
857 | else if (nso->nso_protocol == NFS_PROG) |
858 | vers = PVER2MAJOR(nmp->nm_max_vers); |
859 | } |
860 | lck_mtx_unlock(&nso->nso_lock); |
861 | error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, |
862 | vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); |
863 | lck_mtx_lock(&nso->nso_lock); |
864 | if (!error) { |
865 | nso->nso_flags |= NSO_PINGING; |
866 | nso->nso_pingxid = R_XID32(xid); |
867 | nso->nso_reqtimestamp = now->tv_sec; |
868 | bzero(&msg, sizeof(msg)); |
869 | if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { |
870 | msg.msg_name = nso->nso_saddr; |
871 | msg.msg_namelen = nso->nso_saddr->sa_len; |
872 | } |
873 | for (reqlen=0, m=mreq; m; m = mbuf_next(m)) |
874 | reqlen += mbuf_len(m); |
875 | lck_mtx_unlock(&nso->nso_lock); |
876 | error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); |
877 | NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n" , |
878 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); |
879 | lck_mtx_lock(&nso->nso_lock); |
880 | if (!error && (sentlen != reqlen)) |
881 | error = ETIMEDOUT; |
882 | } |
883 | if (error) { |
884 | nso->nso_error = error; |
885 | nso->nso_flags |= NSO_DEAD; |
886 | return (0); |
887 | } |
888 | |
889 | return (1); |
890 | } |
891 | |
892 | /* |
893 | * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket. |
894 | * Set the nfs socket protocol and version if needed. |
895 | */ |
896 | void |
897 | nfs_connect_search_socket_found(struct nfsmount *nmp, struct nfs_socket_search *nss, struct nfs_socket *nso) |
898 | { |
899 | NFS_SOCK_DBG("nfs connect %s socket %p verified\n" , |
900 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
901 | if (!nso->nso_version) { |
902 | /* If the version isn't set, the default must have worked. */ |
903 | if (nso->nso_protocol == PMAPPROG) |
904 | nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; |
905 | if (nso->nso_protocol == NFS_PROG) |
906 | nso->nso_version = PVER2MAJOR(nmp->nm_max_vers); |
907 | } |
908 | TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); |
909 | nss->nss_sockcnt--; |
910 | nss->nss_sock = nso; |
911 | } |
912 | |
913 | /* |
914 | * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from |
915 | * the list. Dead socket are then destroyed. |
916 | */ |
917 | void |
918 | nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now) |
919 | { |
920 | struct nfs_socket *nso, *nsonext; |
921 | |
922 | TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { |
923 | lck_mtx_lock(&nso->nso_lock); |
924 | if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { |
925 | /* took too long */ |
926 | NFS_SOCK_DBG("nfs connect %s socket %p timed out\n" , |
927 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
928 | nso->nso_error = ETIMEDOUT; |
929 | nso->nso_flags |= NSO_DEAD; |
930 | } |
931 | if (!(nso->nso_flags & NSO_DEAD)) { |
932 | lck_mtx_unlock(&nso->nso_lock); |
933 | continue; |
934 | } |
935 | lck_mtx_unlock(&nso->nso_lock); |
936 | NFS_SOCK_DBG("nfs connect %s reaping socket %p %d\n" , |
937 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error); |
938 | nfs_socket_search_update_error(nss, nso->nso_error); |
939 | TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); |
940 | nss->nss_sockcnt--; |
941 | nfs_socket_destroy(nso); |
942 | /* If there are more sockets to try, force the starting of another socket */ |
943 | if (nss->nss_addrcnt > 0) |
944 | nss->nss_last = -2; |
945 | } |
946 | } |
947 | |
948 | /* |
949 | * nfs_connect_search_check: Check on the status of search and wait for replies if needed. |
950 | */ |
951 | int |
952 | nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) |
953 | { |
954 | int error; |
955 | |
956 | /* log a warning if connect is taking a while */ |
957 | if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { |
958 | printf("nfs_connect: socket connect taking a while for %s\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
959 | nss->nss_flags |= NSS_WARNED; |
960 | } |
961 | if (nmp->nm_sockflags & NMSOCK_UNMOUNT) |
962 | return (EINTR); |
963 | if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) |
964 | return (error); |
965 | |
966 | /* If we were succesfull at sending a ping, wait up to a second for a reply */ |
967 | if (nss->nss_last >= 0) |
968 | tsleep(nss, PSOCK, "nfs_connect_search_wait" , hz); |
969 | |
970 | return (0); |
971 | } |
972 | |
973 | |
974 | /* |
975 | * Continue the socket search until we have something to report. |
976 | */ |
977 | int |
978 | nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) |
979 | { |
980 | struct nfs_socket *nso; |
981 | struct timeval now; |
982 | int error; |
983 | int verbose = (nss->nss_flags & NSS_VERBOSE); |
984 | |
985 | loop: |
986 | microuptime(&now); |
987 | NFS_SOCK_DBG("nfs connect %s search %ld\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec); |
988 | |
989 | /* add a new socket to the socket list if needed and available */ |
990 | error = nfs_connect_search_new_socket(nmp, nss, &now); |
991 | if (error) { |
992 | NFS_SOCK_DBG("nfs connect returned %d\n" , error); |
993 | return (error); |
994 | } |
995 | |
996 | /* check each active socket on the list and try to push it along */ |
997 | TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { |
998 | lck_mtx_lock(&nso->nso_lock); |
999 | |
1000 | /* If not connected connect it */ |
1001 | if (!(nso->nso_flags & NSO_CONNECTED)) { |
1002 | if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) { |
1003 | lck_mtx_unlock(&nso->nso_lock); |
1004 | continue; |
1005 | } |
1006 | } |
1007 | |
1008 | /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */ |
1009 | if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || |
1010 | ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { |
1011 | if (!nfs_connect_search_ping(nmp, nso, &now)) { |
1012 | lck_mtx_unlock(&nso->nso_lock); |
1013 | continue; |
1014 | } |
1015 | } |
1016 | |
1017 | /* Has the socket been verified by the up call routine? */ |
1018 | if (nso->nso_flags & NSO_VERIFIED) { |
1019 | /* WOOHOO!! This socket looks good! */ |
1020 | nfs_connect_search_socket_found(nmp, nss, nso); |
1021 | lck_mtx_unlock(&nso->nso_lock); |
1022 | break; |
1023 | } |
1024 | lck_mtx_unlock(&nso->nso_lock); |
1025 | } |
1026 | |
1027 | /* Check for timed out sockets and mark as dead and then remove all dead sockets. */ |
1028 | nfs_connect_search_socket_reap(nmp, nss, &now); |
1029 | |
1030 | /* |
1031 | * Keep looping if we haven't found a socket yet and we have more |
1032 | * sockets to (continue to) try. |
1033 | */ |
1034 | error = 0; |
1035 | if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) { |
1036 | error = nfs_connect_search_check(nmp, nss, &now); |
1037 | if (!error) |
1038 | goto loop; |
1039 | } |
1040 | |
1041 | NFS_SOCK_DBG("nfs connect %s returning %d\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); |
1042 | return (error); |
1043 | } |
1044 | |
1045 | /* |
1046 | * Initialize a new NFS connection. |
1047 | * |
1048 | * Search for a location to connect a socket to and initialize the connection. |
1049 | * |
1050 | * An NFS mount may have multiple locations/servers/addresses available. |
1051 | * We attempt to connect to each one asynchronously and will start |
1052 | * several sockets in parallel if other locations are slow to answer. |
1053 | * We'll use the first NFS socket we can successfully set up. |
1054 | * |
1055 | * The search may involve contacting the portmapper service first. |
1056 | * |
1057 | * A mount's initial connection may require negotiating some parameters such |
1058 | * as socket type and NFS version. |
1059 | */ |
1060 | |
1061 | int |
1062 | nfs_connect(struct nfsmount *nmp, int verbose, int timeo) |
1063 | { |
1064 | struct nfs_socket_search nss; |
1065 | struct nfs_socket *nso, *nsonfs; |
1066 | struct sockaddr_storage ss; |
1067 | struct sockaddr *saddr, *oldsaddr; |
1068 | sock_upcall upcall; |
1069 | struct timeval now, start; |
1070 | int error, savederror, nfsvers; |
1071 | int tryv4 = 1; |
1072 | uint8_t sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM; |
1073 | fhandle_t *fh = NULL; |
1074 | char *path = NULL; |
1075 | in_port_t port; |
1076 | int addrtotal = 0; |
1077 | |
1078 | /* paranoia... check that we have at least one address in the locations */ |
1079 | uint32_t loc, serv; |
1080 | for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) { |
1081 | for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) { |
1082 | addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount; |
1083 | if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0) |
1084 | NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n" , |
1085 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, |
1086 | nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name); |
1087 | } |
1088 | } |
1089 | |
1090 | if (addrtotal == 0) { |
1091 | NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n" , |
1092 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1093 | return (EINVAL); |
1094 | } else |
1095 | NFS_SOCK_DBG("nfs connect %s has %d addresses\n" , |
1096 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal); |
1097 | |
1098 | lck_mtx_lock(&nmp->nm_lock); |
1099 | nmp->nm_sockflags |= NMSOCK_CONNECTING; |
1100 | nmp->nm_nss = &nss; |
1101 | lck_mtx_unlock(&nmp->nm_lock); |
1102 | microuptime(&start); |
1103 | savederror = error = 0; |
1104 | |
1105 | tryagain: |
1106 | /* initialize socket search state */ |
1107 | bzero(&nss, sizeof(nss)); |
1108 | nss.nss_addrcnt = addrtotal; |
1109 | nss.nss_error = savederror; |
1110 | TAILQ_INIT(&nss.nss_socklist); |
1111 | nss.nss_sotype = sotype; |
1112 | nss.nss_startloc = nmp->nm_locations.nl_current; |
1113 | nss.nss_timestamp = start.tv_sec; |
1114 | nss.nss_timeo = timeo; |
1115 | if (verbose) |
1116 | nss.nss_flags |= NSS_VERBOSE; |
1117 | |
1118 | /* First time connecting, we may need to negotiate some things */ |
1119 | if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { |
1120 | if (!nmp->nm_vers) { |
1121 | /* No NFS version specified... */ |
1122 | if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { |
1123 | if (PVER2MAJOR(nmp->nm_max_vers) >= NFS_VER4 && tryv4) { |
1124 | nss.nss_port = NFS_PORT; |
1125 | nss.nss_protocol = NFS_PROG; |
1126 | nss.nss_version = 4; |
1127 | nss.nss_flags |= NSS_FALLBACK2PMAP; |
1128 | } else { |
1129 | /* ...connect to portmapper first if we (may) need any ports. */ |
1130 | nss.nss_port = PMAPPORT; |
1131 | nss.nss_protocol = PMAPPROG; |
1132 | nss.nss_version = 0; |
1133 | } |
1134 | } else { |
1135 | /* ...connect to NFS port first. */ |
1136 | nss.nss_port = nmp->nm_nfsport; |
1137 | nss.nss_protocol = NFS_PROG; |
1138 | nss.nss_version = 0; |
1139 | } |
1140 | } else if (nmp->nm_vers >= NFS_VER4) { |
1141 | if (tryv4) { |
1142 | /* For NFSv4, we use the given (or default) port. */ |
1143 | nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT; |
1144 | nss.nss_protocol = NFS_PROG; |
1145 | nss.nss_version = 4; |
1146 | /* |
1147 | * set NSS_FALLBACK2PMAP here to pick up any non standard port |
1148 | * if no port is specified on the mount; |
1149 | * Note nm_vers is set so we will only try NFS_VER4. |
1150 | */ |
1151 | if (!nmp->nm_nfsport) |
1152 | nss.nss_flags |= NSS_FALLBACK2PMAP; |
1153 | } else { |
1154 | nss.nss_port = PMAPPORT; |
1155 | nss.nss_protocol = PMAPPROG; |
1156 | nss.nss_version = 0; |
1157 | } |
1158 | } else { |
1159 | /* For NFSv3/v2... */ |
1160 | if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) { |
1161 | /* ...connect to portmapper first if we need any ports. */ |
1162 | nss.nss_port = PMAPPORT; |
1163 | nss.nss_protocol = PMAPPROG; |
1164 | nss.nss_version = 0; |
1165 | } else { |
1166 | /* ...connect to NFS port first. */ |
1167 | nss.nss_port = nmp->nm_nfsport; |
1168 | nss.nss_protocol = NFS_PROG; |
1169 | nss.nss_version = nmp->nm_vers; |
1170 | } |
1171 | } |
1172 | NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n" , |
1173 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, |
1174 | nss.nss_protocol, nss.nss_version); |
1175 | } else { |
1176 | /* we've connected before, just connect to NFS port */ |
1177 | if (!nmp->nm_nfsport) { |
1178 | /* need to ask portmapper which port that would be */ |
1179 | nss.nss_port = PMAPPORT; |
1180 | nss.nss_protocol = PMAPPROG; |
1181 | nss.nss_version = 0; |
1182 | } else { |
1183 | nss.nss_port = nmp->nm_nfsport; |
1184 | nss.nss_protocol = NFS_PROG; |
1185 | nss.nss_version = nmp->nm_vers; |
1186 | } |
1187 | NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n" , |
1188 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, |
1189 | nss.nss_protocol, nss.nss_version); |
1190 | } |
1191 | |
1192 | /* Set next location to first valid location. */ |
1193 | /* If start location is invalid, find next location. */ |
1194 | nss.nss_nextloc = nss.nss_startloc; |
1195 | if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) || |
1196 | (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) { |
1197 | nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc); |
1198 | if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) { |
1199 | NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n" , |
1200 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1201 | return (ENOENT); |
1202 | } |
1203 | } |
1204 | nss.nss_last = -1; |
1205 | |
1206 | keepsearching: |
1207 | |
1208 | error = nfs_connect_search_loop(nmp, &nss); |
1209 | if (error || !nss.nss_sock) { |
1210 | /* search failed */ |
1211 | nfs_socket_search_cleanup(&nss); |
1212 | if (nss.nss_flags & NSS_FALLBACK2PMAP) { |
1213 | tryv4 = 0; |
1214 | NFS_SOCK_DBG("nfs connect %s TCP failed for V4 %d %d, trying PORTMAP\n" , |
1215 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); |
1216 | goto tryagain; |
1217 | } |
1218 | |
1219 | if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) { |
1220 | /* Try using UDP */ |
1221 | sotype = SOCK_DGRAM; |
1222 | savederror = nss.nss_error; |
1223 | NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n" , |
1224 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); |
1225 | goto tryagain; |
1226 | } |
1227 | if (!error) |
1228 | error = nss.nss_error ? nss.nss_error : ETIMEDOUT; |
1229 | lck_mtx_lock(&nmp->nm_lock); |
1230 | nmp->nm_sockflags &= ~NMSOCK_CONNECTING; |
1231 | nmp->nm_nss = NULL; |
1232 | lck_mtx_unlock(&nmp->nm_lock); |
1233 | if (nss.nss_flags & NSS_WARNED) |
1234 | log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n" , |
1235 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1236 | if (fh) |
1237 | FREE(fh, M_TEMP); |
1238 | if (path) |
1239 | FREE_ZONE(path, MAXPATHLEN, M_NAMEI); |
1240 | NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n" , |
1241 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); |
1242 | return (error); |
1243 | } |
1244 | |
1245 | /* try to use nss_sock */ |
1246 | nso = nss.nss_sock; |
1247 | nss.nss_sock = NULL; |
1248 | |
1249 | /* We may be speaking to portmap first... to determine port(s). */ |
1250 | if (nso->nso_saddr->sa_family == AF_INET) |
1251 | port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port); |
1252 | else |
1253 | port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port); |
1254 | if (port == PMAPPORT) { |
1255 | /* Use this portmapper port to get the port #s we need. */ |
1256 | NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n" , |
1257 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
1258 | |
1259 | /* remove the connect upcall so nfs_portmap_lookup() can use this socket */ |
1260 | sock_setupcall(nso->nso_so, NULL, NULL); |
1261 | |
1262 | /* Set up socket address and port for NFS socket. */ |
1263 | bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); |
1264 | |
1265 | /* If NFS version not set, try nm_max_vers down to nm_min_vers */ |
1266 | nfsvers = nmp->nm_vers ? nmp->nm_vers : PVER2MAJOR(nmp->nm_max_vers); |
1267 | if (!(port = nmp->nm_nfsport)) { |
1268 | if (ss.ss_family == AF_INET) |
1269 | ((struct sockaddr_in*)&ss)->sin_port = htons(0); |
1270 | else if (ss.ss_family == AF_INET6) |
1271 | ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); |
1272 | for (; nfsvers >= (int)PVER2MAJOR(nmp->nm_min_vers); nfsvers--) { |
1273 | if (nmp->nm_vers && nmp->nm_vers != nfsvers) |
1274 | continue; /* Wrong version */ |
1275 | if (nfsvers == NFS_VER4 && nso->nso_sotype == SOCK_DGRAM) |
1276 | continue; /* NFSv4 does not do UDP */ |
1277 | error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, |
1278 | nso->nso_so, NFS_PROG, nfsvers, |
1279 | (nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo); |
1280 | if (!error) { |
1281 | if (ss.ss_family == AF_INET) |
1282 | port = ntohs(((struct sockaddr_in*)&ss)->sin_port); |
1283 | else if (ss.ss_family == AF_INET6) |
1284 | port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); |
1285 | if (!port) |
1286 | error = EPROGUNAVAIL; |
1287 | if (port == NFS_PORT && nfsvers == NFS_VER4 && tryv4 == 0) |
1288 | continue; /* We already tried this */ |
1289 | } |
1290 | if (!error) |
1291 | break; |
1292 | } |
1293 | if (nfsvers < (int)PVER2MAJOR(nmp->nm_min_vers) && error == 0) |
1294 | error = EPROGUNAVAIL; |
1295 | if (error) { |
1296 | nfs_socket_search_update_error(&nss, error); |
1297 | nfs_socket_destroy(nso); |
1298 | goto keepsearching; |
1299 | } |
1300 | } |
1301 | /* Create NFS protocol socket and add it to the list of sockets. */ |
1302 | /* N.B. If nfsvers is NFS_VER4 at this point then we're on a non standard port */ |
1303 | error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port, |
1304 | NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs); |
1305 | if (error) { |
1306 | nfs_socket_search_update_error(&nss, error); |
1307 | nfs_socket_destroy(nso); |
1308 | goto keepsearching; |
1309 | } |
1310 | nsonfs->nso_location = nso->nso_location; |
1311 | nsonfs->nso_wake = &nss; |
1312 | error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs); |
1313 | if (error) { |
1314 | nfs_socket_search_update_error(&nss, error); |
1315 | nfs_socket_destroy(nsonfs); |
1316 | nfs_socket_destroy(nso); |
1317 | goto keepsearching; |
1318 | } |
1319 | TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link); |
1320 | nss.nss_sockcnt++; |
1321 | if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { |
1322 | /* Set up socket address and port for MOUNT socket. */ |
1323 | error = 0; |
1324 | bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); |
1325 | port = nmp->nm_mountport; |
1326 | if (ss.ss_family == AF_INET) |
1327 | ((struct sockaddr_in*)&ss)->sin_port = htons(port); |
1328 | else if (ss.ss_family == AF_INET6) |
1329 | ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); |
1330 | if (!port) { |
1331 | /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ |
1332 | /* If NFS version is unknown, optimistically choose for NFSv3. */ |
1333 | int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; |
1334 | int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; |
1335 | error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, |
1336 | nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo); |
1337 | } |
1338 | if (!error) { |
1339 | if (ss.ss_family == AF_INET) |
1340 | port = ntohs(((struct sockaddr_in*)&ss)->sin_port); |
1341 | else if (ss.ss_family == AF_INET6) |
1342 | port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); |
1343 | if (!port) |
1344 | error = EPROGUNAVAIL; |
1345 | } |
1346 | /* create sockaddr for MOUNT */ |
1347 | if (!error) |
1348 | MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO); |
1349 | if (!error && !nsonfs->nso_saddr2) |
1350 | error = ENOMEM; |
1351 | if (!error) |
1352 | bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len); |
1353 | if (error) { |
1354 | lck_mtx_lock(&nsonfs->nso_lock); |
1355 | nsonfs->nso_error = error; |
1356 | nsonfs->nso_flags |= NSO_DEAD; |
1357 | lck_mtx_unlock(&nsonfs->nso_lock); |
1358 | } |
1359 | } |
1360 | nfs_socket_destroy(nso); |
1361 | goto keepsearching; |
1362 | } |
1363 | |
1364 | /* nso is an NFS socket */ |
1365 | NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); |
1366 | |
1367 | /* If NFS version wasn't specified, it was determined during the connect. */ |
1368 | nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version; |
1369 | |
1370 | /* Perform MOUNT call for initial NFSv2/v3 connection/mount. */ |
1371 | if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) { |
1372 | error = 0; |
1373 | saddr = nso->nso_saddr2; |
1374 | if (!saddr) { |
1375 | /* Need sockaddr for MOUNT port */ |
1376 | bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len); |
1377 | port = nmp->nm_mountport; |
1378 | if (ss.ss_family == AF_INET) |
1379 | ((struct sockaddr_in*)&ss)->sin_port = htons(port); |
1380 | else if (ss.ss_family == AF_INET6) |
1381 | ((struct sockaddr_in6*)&ss)->sin6_port = htons(port); |
1382 | if (!port) { |
1383 | /* Get port/sockaddr for MOUNT version corresponding to NFS version. */ |
1384 | int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3; |
1385 | int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP; |
1386 | error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, |
1387 | NULL, RPCPROG_MNT, mntvers, mntproto, timeo); |
1388 | if (ss.ss_family == AF_INET) |
1389 | port = ntohs(((struct sockaddr_in*)&ss)->sin_port); |
1390 | else if (ss.ss_family == AF_INET6) |
1391 | port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port); |
1392 | } |
1393 | if (!error) { |
1394 | if (port) |
1395 | saddr = (struct sockaddr*)&ss; |
1396 | else |
1397 | error = EPROGUNAVAIL; |
1398 | } |
1399 | } |
1400 | if (saddr) |
1401 | MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO); |
1402 | if (saddr && fh) |
1403 | MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); |
1404 | if (!saddr || !fh || !path) { |
1405 | if (!error) |
1406 | error = ENOMEM; |
1407 | if (fh) |
1408 | FREE(fh, M_TEMP); |
1409 | if (path) |
1410 | FREE_ZONE(path, MAXPATHLEN, M_NAMEI); |
1411 | fh = NULL; |
1412 | path = NULL; |
1413 | nfs_socket_search_update_error(&nss, error); |
1414 | nfs_socket_destroy(nso); |
1415 | goto keepsearching; |
1416 | } |
1417 | nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1); |
1418 | error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers, |
1419 | path, vfs_context_current(), timeo, fh, &nmp->nm_servsec); |
1420 | NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n" , |
1421 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); |
1422 | if (!error) { |
1423 | /* Make sure we can agree on a security flavor. */ |
1424 | int o, s; /* indices into mount option and server security flavor lists */ |
1425 | int found = 0; |
1426 | |
1427 | if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) { |
1428 | /* Some servers return an empty list to indicate RPCAUTH_SYS? */ |
1429 | nmp->nm_servsec.count = 1; |
1430 | nmp->nm_servsec.flavors[0] = RPCAUTH_SYS; |
1431 | } |
1432 | if (nmp->nm_sec.count) { |
1433 | /* Choose the first flavor in our list that the server supports. */ |
1434 | if (!nmp->nm_servsec.count) { |
1435 | /* we don't know what the server supports, just use our first choice */ |
1436 | nmp->nm_auth = nmp->nm_sec.flavors[0]; |
1437 | found = 1; |
1438 | } |
1439 | for (o=0; !found && (o < nmp->nm_sec.count); o++) |
1440 | for (s=0; !found && (s < nmp->nm_servsec.count); s++) |
1441 | if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) { |
1442 | nmp->nm_auth = nmp->nm_sec.flavors[o]; |
1443 | found = 1; |
1444 | } |
1445 | } else { |
1446 | /* Choose the first one we support from the server's list. */ |
1447 | if (!nmp->nm_servsec.count) { |
1448 | nmp->nm_auth = RPCAUTH_SYS; |
1449 | found = 1; |
1450 | } |
1451 | for (s=0; s < nmp->nm_servsec.count; s++) |
1452 | switch (nmp->nm_servsec.flavors[s]) { |
1453 | case RPCAUTH_SYS: |
1454 | /* prefer RPCAUTH_SYS to RPCAUTH_NONE */ |
1455 | if (found && (nmp->nm_auth == RPCAUTH_NONE)) |
1456 | found = 0; |
1457 | case RPCAUTH_NONE: |
1458 | case RPCAUTH_KRB5: |
1459 | case RPCAUTH_KRB5I: |
1460 | case RPCAUTH_KRB5P: |
1461 | if (!found) { |
1462 | nmp->nm_auth = nmp->nm_servsec.flavors[s]; |
1463 | found = 1; |
1464 | } |
1465 | break; |
1466 | } |
1467 | } |
1468 | error = !found ? EAUTH : 0; |
1469 | } |
1470 | FREE_ZONE(path, MAXPATHLEN, M_NAMEI); |
1471 | path = NULL; |
1472 | if (error) { |
1473 | nfs_socket_search_update_error(&nss, error); |
1474 | FREE(fh, M_TEMP); |
1475 | fh = NULL; |
1476 | nfs_socket_destroy(nso); |
1477 | goto keepsearching; |
1478 | } |
1479 | if (nmp->nm_fh) |
1480 | FREE(nmp->nm_fh, M_TEMP); |
1481 | nmp->nm_fh = fh; |
1482 | fh = NULL; |
1483 | NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT); |
1484 | } |
1485 | |
1486 | /* put the real upcall in place */ |
1487 | upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv; |
1488 | error = sock_setupcall(nso->nso_so, upcall, nmp); |
1489 | if (error) { |
1490 | nfs_socket_search_update_error(&nss, error); |
1491 | nfs_socket_destroy(nso); |
1492 | goto keepsearching; |
1493 | } |
1494 | |
1495 | if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { |
1496 | /* set mntfromname to this location */ |
1497 | if (!NM_OMATTR_GIVEN(nmp, MNTFROM)) |
1498 | nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, |
1499 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, |
1500 | sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0); |
1501 | /* some negotiated values need to remain unchanged for the life of the mount */ |
1502 | if (!nmp->nm_sotype) |
1503 | nmp->nm_sotype = nso->nso_sotype; |
1504 | if (!nmp->nm_vers) { |
1505 | nmp->nm_vers = nfsvers; |
1506 | /* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */ |
1507 | if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) { |
1508 | if (nso->nso_saddr->sa_family == AF_INET) |
1509 | port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port); |
1510 | else if (nso->nso_saddr->sa_family == AF_INET6) |
1511 | port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port); |
1512 | else |
1513 | port = 0; |
1514 | if (port == NFS_PORT) |
1515 | nmp->nm_nfsport = NFS_PORT; |
1516 | } |
1517 | } |
1518 | /* do some version-specific pre-mount set up */ |
1519 | if (nmp->nm_vers >= NFS_VER4) { |
1520 | microtime(&now); |
1521 | nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; |
1522 | if (!NMFLAG(nmp, NOCALLBACK)) |
1523 | nfs4_mount_callback_setup(nmp); |
1524 | } |
1525 | } |
1526 | |
1527 | /* Initialize NFS socket state variables */ |
1528 | lck_mtx_lock(&nmp->nm_lock); |
1529 | nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = |
1530 | nmp->nm_srtt[3] = (NFS_TIMEO << 3); |
1531 | nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = |
1532 | nmp->nm_sdrtt[3] = 0; |
1533 | if (nso->nso_sotype == SOCK_DGRAM) { |
1534 | nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ |
1535 | nmp->nm_sent = 0; |
1536 | } else if (nso->nso_sotype == SOCK_STREAM) { |
1537 | nmp->nm_timeouts = 0; |
1538 | } |
1539 | nmp->nm_sockflags &= ~NMSOCK_CONNECTING; |
1540 | nmp->nm_sockflags |= NMSOCK_SETUP; |
1541 | /* move the socket to the mount structure */ |
1542 | nmp->nm_nso = nso; |
1543 | oldsaddr = nmp->nm_saddr; |
1544 | nmp->nm_saddr = nso->nso_saddr; |
1545 | lck_mtx_unlock(&nmp->nm_lock); |
1546 | error = nfs_connect_setup(nmp); |
1547 | lck_mtx_lock(&nmp->nm_lock); |
1548 | nmp->nm_sockflags &= ~NMSOCK_SETUP; |
1549 | if (!error) { |
1550 | nmp->nm_sockflags |= NMSOCK_READY; |
1551 | wakeup(&nmp->nm_sockflags); |
1552 | } |
1553 | if (error) { |
1554 | NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n" , |
1555 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); |
1556 | nfs_socket_search_update_error(&nss, error); |
1557 | nmp->nm_saddr = oldsaddr; |
1558 | if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { |
1559 | /* undo settings made prior to setup */ |
1560 | if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE)) |
1561 | nmp->nm_sotype = 0; |
1562 | if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) { |
1563 | if (nmp->nm_vers >= NFS_VER4) { |
1564 | if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) |
1565 | nmp->nm_nfsport = 0; |
1566 | if (nmp->nm_cbid) |
1567 | nfs4_mount_callback_shutdown(nmp); |
1568 | if (IS_VALID_CRED(nmp->nm_mcred)) |
1569 | kauth_cred_unref(&nmp->nm_mcred); |
1570 | bzero(&nmp->nm_un, sizeof(nmp->nm_un)); |
1571 | } |
1572 | nmp->nm_vers = 0; |
1573 | } |
1574 | } |
1575 | lck_mtx_unlock(&nmp->nm_lock); |
1576 | nmp->nm_nso = NULL; |
1577 | nfs_socket_destroy(nso); |
1578 | goto keepsearching; |
1579 | } |
1580 | |
1581 | /* update current location */ |
1582 | if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) && |
1583 | (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) { |
1584 | /* server has changed, we should initiate failover/recovery */ |
1585 | // XXX |
1586 | } |
1587 | nmp->nm_locations.nl_current = nso->nso_location; |
1588 | nmp->nm_locations.nl_current.nli_flags |= NLI_VALID; |
1589 | |
1590 | if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { |
1591 | /* We have now successfully connected... make a note of it. */ |
1592 | nmp->nm_sockflags |= NMSOCK_HASCONNECTED; |
1593 | } |
1594 | |
1595 | lck_mtx_unlock(&nmp->nm_lock); |
1596 | if (oldsaddr) |
1597 | FREE(oldsaddr, M_SONAME); |
1598 | |
1599 | if (nss.nss_flags & NSS_WARNED) |
1600 | log(LOG_INFO, "nfs_connect: socket connect completed for %s\n" , |
1601 | vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1602 | |
1603 | nmp->nm_nss = NULL; |
1604 | nfs_socket_search_cleanup(&nss); |
1605 | if (fh) |
1606 | FREE(fh, M_TEMP); |
1607 | if (path) |
1608 | FREE_ZONE(path, MAXPATHLEN, M_NAMEI); |
1609 | NFS_SOCK_DBG("nfs connect %s success\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1610 | return (0); |
1611 | } |
1612 | |
1613 | |
1614 | /* setup & confirm socket connection is functional */ |
1615 | int |
1616 | nfs_connect_setup(struct nfsmount *nmp) |
1617 | { |
1618 | int error = 0; |
1619 | |
1620 | if (nmp->nm_vers >= NFS_VER4) { |
1621 | if (nmp->nm_state & NFSSTA_CLIENTID) { |
1622 | /* first, try to renew our current state */ |
1623 | error = nfs4_renew(nmp, R_SETUP); |
1624 | if ((error == NFSERR_ADMIN_REVOKED) || |
1625 | (error == NFSERR_CB_PATH_DOWN) || |
1626 | (error == NFSERR_EXPIRED) || |
1627 | (error == NFSERR_LEASE_MOVED) || |
1628 | (error == NFSERR_STALE_CLIENTID)) { |
1629 | lck_mtx_lock(&nmp->nm_lock); |
1630 | nfs_need_recover(nmp, error); |
1631 | lck_mtx_unlock(&nmp->nm_lock); |
1632 | } |
1633 | } |
1634 | error = nfs4_setclientid(nmp); |
1635 | } |
1636 | return (error); |
1637 | } |
1638 | |
1639 | /* |
1640 | * NFS socket reconnect routine: |
1641 | * Called when a connection is broken. |
1642 | * - disconnect the old socket |
1643 | * - nfs_connect() again |
1644 | * - set R_MUSTRESEND for all outstanding requests on mount point |
1645 | * If this fails the mount point is DEAD! |
1646 | */ |
1647 | int |
1648 | nfs_reconnect(struct nfsmount *nmp) |
1649 | { |
1650 | struct nfsreq *rq; |
1651 | struct timeval now; |
1652 | thread_t thd = current_thread(); |
1653 | int error, wentdown = 0, verbose = 1; |
1654 | time_t lastmsg; |
1655 | int timeo; |
1656 | |
1657 | microuptime(&now); |
1658 | lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay); |
1659 | |
1660 | nfs_disconnect(nmp); |
1661 | |
1662 | |
1663 | lck_mtx_lock(&nmp->nm_lock); |
1664 | timeo = nfs_is_squishy(nmp) ? 8 : 30; |
1665 | lck_mtx_unlock(&nmp->nm_lock); |
1666 | |
1667 | while ((error = nfs_connect(nmp, verbose, timeo))) { |
1668 | verbose = 0; |
1669 | nfs_disconnect(nmp); |
1670 | if ((error == EINTR) || (error == ERESTART)) |
1671 | return (EINTR); |
1672 | if (error == EIO) |
1673 | return (EIO); |
1674 | microuptime(&now); |
1675 | if ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec) { |
1676 | lastmsg = now.tv_sec; |
1677 | nfs_down(nmp, thd, error, NFSSTA_TIMEO, "can not connect" , 0); |
1678 | wentdown = 1; |
1679 | } |
1680 | lck_mtx_lock(&nmp->nm_lock); |
1681 | if (!(nmp->nm_state & NFSSTA_MOUNTED)) { |
1682 | /* we're not yet completely mounted and */ |
1683 | /* we can't reconnect, so we fail */ |
1684 | lck_mtx_unlock(&nmp->nm_lock); |
1685 | NFS_SOCK_DBG("Not mounted returning %d\n" , error); |
1686 | return (error); |
1687 | } |
1688 | |
1689 | if (nfs_mount_check_dead_timeout(nmp)) { |
1690 | nfs_mount_make_zombie(nmp); |
1691 | lck_mtx_unlock(&nmp->nm_lock); |
1692 | return (ENXIO); |
1693 | } |
1694 | |
1695 | if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { |
1696 | lck_mtx_unlock(&nmp->nm_lock); |
1697 | return (error); |
1698 | } |
1699 | lck_mtx_unlock(&nmp->nm_lock); |
1700 | tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay" , 2*hz); |
1701 | if ((error = nfs_sigintr(nmp, NULL, thd, 0))) |
1702 | return (error); |
1703 | } |
1704 | |
1705 | if (wentdown) |
1706 | nfs_up(nmp, thd, NFSSTA_TIMEO, "connected" ); |
1707 | |
1708 | /* |
1709 | * Loop through outstanding request list and mark all requests |
1710 | * as needing a resend. (Though nfs_need_reconnect() probably |
1711 | * marked them all already.) |
1712 | */ |
1713 | lck_mtx_lock(nfs_request_mutex); |
1714 | TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { |
1715 | if (rq->r_nmp == nmp) { |
1716 | lck_mtx_lock(&rq->r_mtx); |
1717 | if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { |
1718 | rq->r_flags |= R_MUSTRESEND; |
1719 | rq->r_rtt = -1; |
1720 | wakeup(rq); |
1721 | if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) |
1722 | nfs_asyncio_resend(rq); |
1723 | } |
1724 | lck_mtx_unlock(&rq->r_mtx); |
1725 | } |
1726 | } |
1727 | lck_mtx_unlock(nfs_request_mutex); |
1728 | return (0); |
1729 | } |
1730 | |
1731 | /* |
1732 | * NFS disconnect. Clean up and unlink. |
1733 | */ |
1734 | void |
1735 | nfs_disconnect(struct nfsmount *nmp) |
1736 | { |
1737 | struct nfs_socket *nso; |
1738 | |
1739 | lck_mtx_lock(&nmp->nm_lock); |
1740 | tryagain: |
1741 | if (nmp->nm_nso) { |
1742 | struct timespec ts = { 1, 0 }; |
1743 | if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */ |
1744 | nmp->nm_state |= NFSSTA_WANTSND; |
1745 | msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending" , &ts); |
1746 | goto tryagain; |
1747 | } |
1748 | if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */ |
1749 | msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke" , &ts); |
1750 | goto tryagain; |
1751 | } |
1752 | nmp->nm_sockflags |= NMSOCK_DISCONNECTING; |
1753 | nmp->nm_sockflags &= ~NMSOCK_READY; |
1754 | nso = nmp->nm_nso; |
1755 | nmp->nm_nso = NULL; |
1756 | if (nso->nso_saddr == nmp->nm_saddr) |
1757 | nso->nso_saddr = NULL; |
1758 | lck_mtx_unlock(&nmp->nm_lock); |
1759 | nfs_socket_destroy(nso); |
1760 | lck_mtx_lock(&nmp->nm_lock); |
1761 | nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING; |
1762 | lck_mtx_unlock(&nmp->nm_lock); |
1763 | } else { |
1764 | lck_mtx_unlock(&nmp->nm_lock); |
1765 | } |
1766 | } |
1767 | |
1768 | /* |
1769 | * mark an NFS mount as needing a reconnect/resends. |
1770 | */ |
1771 | void |
1772 | nfs_need_reconnect(struct nfsmount *nmp) |
1773 | { |
1774 | struct nfsreq *rq; |
1775 | |
1776 | lck_mtx_lock(&nmp->nm_lock); |
1777 | nmp->nm_sockflags &= ~(NMSOCK_READY|NMSOCK_SETUP); |
1778 | lck_mtx_unlock(&nmp->nm_lock); |
1779 | |
1780 | /* |
1781 | * Loop through outstanding request list and |
1782 | * mark all requests as needing a resend. |
1783 | */ |
1784 | lck_mtx_lock(nfs_request_mutex); |
1785 | TAILQ_FOREACH(rq, &nfs_reqq, r_chain) { |
1786 | if (rq->r_nmp == nmp) { |
1787 | lck_mtx_lock(&rq->r_mtx); |
1788 | if (!rq->r_error && !rq->r_nmrep.nmc_mhead && !(rq->r_flags & R_MUSTRESEND)) { |
1789 | rq->r_flags |= R_MUSTRESEND; |
1790 | rq->r_rtt = -1; |
1791 | wakeup(rq); |
1792 | if ((rq->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) |
1793 | nfs_asyncio_resend(rq); |
1794 | } |
1795 | lck_mtx_unlock(&rq->r_mtx); |
1796 | } |
1797 | } |
1798 | lck_mtx_unlock(nfs_request_mutex); |
1799 | } |
1800 | |
1801 | |
1802 | /* |
1803 | * thread to handle miscellaneous async NFS socket work (reconnects/resends) |
1804 | */ |
1805 | void |
1806 | nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) |
1807 | { |
1808 | struct nfsmount *nmp = arg; |
1809 | struct timespec ts = { 30, 0 }; |
1810 | thread_t thd = current_thread(); |
1811 | struct nfsreq *req; |
1812 | struct timeval now; |
1813 | int error, dofinish; |
1814 | nfsnode_t np; |
1815 | int do_reconnect_sleep = 0; |
1816 | |
1817 | lck_mtx_lock(&nmp->nm_lock); |
1818 | while (!(nmp->nm_sockflags & NMSOCK_READY) || |
1819 | !TAILQ_EMPTY(&nmp->nm_resendq) || |
1820 | !LIST_EMPTY(&nmp->nm_monlist) || |
1821 | nmp->nm_deadto_start || |
1822 | (nmp->nm_state & NFSSTA_RECOVER) || |
1823 | ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq))) |
1824 | { |
1825 | if (nmp->nm_sockflags & NMSOCK_UNMOUNT) |
1826 | break; |
1827 | /* do reconnect, if necessary */ |
1828 | if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { |
1829 | if (nmp->nm_reconnect_start <= 0) { |
1830 | microuptime(&now); |
1831 | nmp->nm_reconnect_start = now.tv_sec; |
1832 | } |
1833 | lck_mtx_unlock(&nmp->nm_lock); |
1834 | NFS_SOCK_DBG("nfs reconnect %s\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
1835 | /* |
1836 | * XXX We don't want to call reconnect again right away if returned errors |
1837 | * before that may not have blocked. This has caused spamming null procs |
1838 | * from machines in the pass. |
1839 | */ |
1840 | if (do_reconnect_sleep) |
1841 | tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay" , hz); |
1842 | error = nfs_reconnect(nmp); |
1843 | if (error) { |
1844 | int lvl = 7; |
1845 | if (error == EIO || error == EINTR) { |
1846 | lvl = (do_reconnect_sleep++ % 600) ? 7 : 0; |
1847 | } |
1848 | nfs_printf(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n" , |
1849 | vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); |
1850 | } else { |
1851 | nmp->nm_reconnect_start = 0; |
1852 | do_reconnect_sleep = 0; |
1853 | } |
1854 | lck_mtx_lock(&nmp->nm_lock); |
1855 | } |
1856 | if ((nmp->nm_sockflags & NMSOCK_READY) && |
1857 | (nmp->nm_state & NFSSTA_RECOVER) && |
1858 | !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && |
1859 | !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { |
1860 | /* perform state recovery */ |
1861 | lck_mtx_unlock(&nmp->nm_lock); |
1862 | nfs_recover(nmp); |
1863 | lck_mtx_lock(&nmp->nm_lock); |
1864 | } |
1865 | /* handle NFSv4 delegation returns */ |
1866 | while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && |
1867 | (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && |
1868 | ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) { |
1869 | lck_mtx_unlock(&nmp->nm_lock); |
1870 | nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred); |
1871 | lck_mtx_lock(&nmp->nm_lock); |
1872 | } |
1873 | /* do resends, if necessary/possible */ |
1874 | while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || |
1875 | (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) && |
1876 | ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { |
1877 | if (req->r_resendtime) |
1878 | microuptime(&now); |
1879 | while (req && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && req->r_resendtime && (now.tv_sec < req->r_resendtime)) |
1880 | req = TAILQ_NEXT(req, r_rchain); |
1881 | if (!req) |
1882 | break; |
1883 | TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); |
1884 | req->r_rchain.tqe_next = NFSREQNOLIST; |
1885 | lck_mtx_unlock(&nmp->nm_lock); |
1886 | lck_mtx_lock(&req->r_mtx); |
1887 | /* Note that we have a reference on the request that was taken nfs_asyncio_resend */ |
1888 | if (req->r_error || req->r_nmrep.nmc_mhead) { |
1889 | dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); |
1890 | req->r_flags &= ~R_RESENDQ; |
1891 | wakeup(req); |
1892 | lck_mtx_unlock(&req->r_mtx); |
1893 | if (dofinish) |
1894 | nfs_asyncio_finish(req); |
1895 | nfs_request_rele(req); |
1896 | lck_mtx_lock(&nmp->nm_lock); |
1897 | continue; |
1898 | } |
1899 | if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) { |
1900 | req->r_flags &= ~R_RESTART; |
1901 | req->r_resendtime = 0; |
1902 | lck_mtx_unlock(&req->r_mtx); |
1903 | /* async RPCs on GSS mounts need to be rebuilt and resent. */ |
1904 | nfs_reqdequeue(req); |
1905 | if (nfs_request_using_gss(req)) { |
1906 | nfs_gss_clnt_rpcdone(req); |
1907 | error = nfs_gss_clnt_args_restore(req); |
1908 | if (error == ENEEDAUTH) |
1909 | req->r_xid = 0; |
1910 | } |
1911 | NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n" , |
1912 | nfs_request_using_gss(req) ? " gss" : "" , req->r_procnum, req->r_xid, |
1913 | req->r_flags, req->r_rtt); |
1914 | error = nfs_sigintr(nmp, req, req->r_thread, 0); |
1915 | if (!error) |
1916 | error = nfs_request_add_header(req); |
1917 | if (!error) |
1918 | error = nfs_request_send(req, 0); |
1919 | lck_mtx_lock(&req->r_mtx); |
1920 | if (req->r_flags & R_RESENDQ) |
1921 | req->r_flags &= ~R_RESENDQ; |
1922 | if (error) |
1923 | req->r_error = error; |
1924 | wakeup(req); |
1925 | dofinish = error && req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); |
1926 | lck_mtx_unlock(&req->r_mtx); |
1927 | if (dofinish) |
1928 | nfs_asyncio_finish(req); |
1929 | nfs_request_rele(req); |
1930 | lck_mtx_lock(&nmp->nm_lock); |
1931 | error = 0; |
1932 | continue; |
1933 | } |
1934 | NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n" , |
1935 | req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); |
1936 | error = nfs_sigintr(nmp, req, req->r_thread, 0); |
1937 | if (!error) { |
1938 | req->r_flags |= R_SENDING; |
1939 | lck_mtx_unlock(&req->r_mtx); |
1940 | error = nfs_send(req, 0); |
1941 | lck_mtx_lock(&req->r_mtx); |
1942 | if (!error) { |
1943 | if (req->r_flags & R_RESENDQ) |
1944 | req->r_flags &= ~R_RESENDQ; |
1945 | wakeup(req); |
1946 | lck_mtx_unlock(&req->r_mtx); |
1947 | nfs_request_rele(req); |
1948 | lck_mtx_lock(&nmp->nm_lock); |
1949 | continue; |
1950 | } |
1951 | } |
1952 | req->r_error = error; |
1953 | if (req->r_flags & R_RESENDQ) |
1954 | req->r_flags &= ~R_RESENDQ; |
1955 | wakeup(req); |
1956 | dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); |
1957 | lck_mtx_unlock(&req->r_mtx); |
1958 | if (dofinish) |
1959 | nfs_asyncio_finish(req); |
1960 | nfs_request_rele(req); |
1961 | lck_mtx_lock(&nmp->nm_lock); |
1962 | } |
1963 | if (nfs_mount_check_dead_timeout(nmp)) { |
1964 | nfs_mount_make_zombie(nmp); |
1965 | break; |
1966 | } |
1967 | |
1968 | if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) |
1969 | break; |
1970 | /* check monitored nodes, if necessary/possible */ |
1971 | if (!LIST_EMPTY(&nmp->nm_monlist)) { |
1972 | nmp->nm_state |= NFSSTA_MONITOR_SCAN; |
1973 | LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) { |
1974 | if (!(nmp->nm_sockflags & NMSOCK_READY) || |
1975 | (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) |
1976 | break; |
1977 | np->n_mflag |= NMMONSCANINPROG; |
1978 | lck_mtx_unlock(&nmp->nm_lock); |
1979 | error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR)); |
1980 | if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */ |
1981 | nfs_data_update_size(np, 0); |
1982 | lck_mtx_lock(&nmp->nm_lock); |
1983 | np->n_mflag &= ~NMMONSCANINPROG; |
1984 | if (np->n_mflag & NMMONSCANWANT) { |
1985 | np->n_mflag &= ~NMMONSCANWANT; |
1986 | wakeup(&np->n_mflag); |
1987 | } |
1988 | if (error || !(nmp->nm_sockflags & NMSOCK_READY) || |
1989 | (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE|NFSSTA_DEAD))) |
1990 | break; |
1991 | } |
1992 | nmp->nm_state &= ~NFSSTA_MONITOR_SCAN; |
1993 | if (nmp->nm_state & NFSSTA_UNMOUNTING) |
1994 | wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */ |
1995 | } |
1996 | if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) { |
1997 | if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) || |
1998 | (nmp->nm_state & NFSSTA_RECOVER)) |
1999 | ts.tv_sec = 1; |
2000 | else |
2001 | ts.tv_sec = 5; |
2002 | msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread" , &ts); |
2003 | } |
2004 | } |
2005 | |
2006 | /* If we're unmounting, send the unmount RPC, if requested/appropriate. */ |
2007 | if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && |
2008 | (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) && |
2009 | (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { |
2010 | lck_mtx_unlock(&nmp->nm_lock); |
2011 | nfs3_umount_rpc(nmp, vfs_context_kernel(), |
2012 | (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2); |
2013 | lck_mtx_lock(&nmp->nm_lock); |
2014 | } |
2015 | |
2016 | if (nmp->nm_sockthd == thd) |
2017 | nmp->nm_sockthd = NULL; |
2018 | lck_mtx_unlock(&nmp->nm_lock); |
2019 | wakeup(&nmp->nm_sockthd); |
2020 | thread_terminate(thd); |
2021 | } |
2022 | |
2023 | /* start or wake a mount's socket thread */ |
2024 | void |
2025 | nfs_mount_sock_thread_wake(struct nfsmount *nmp) |
2026 | { |
2027 | if (nmp->nm_sockthd) |
2028 | wakeup(&nmp->nm_sockthd); |
2029 | else if (kernel_thread_start(nfs_mount_sock_thread, nmp, &nmp->nm_sockthd) == KERN_SUCCESS) |
2030 | thread_deallocate(nmp->nm_sockthd); |
2031 | } |
2032 | |
2033 | /* |
2034 | * Check if we should mark the mount dead because the |
2035 | * unresponsive mount has reached the dead timeout. |
2036 | * (must be called with nmp locked) |
2037 | */ |
2038 | int |
2039 | nfs_mount_check_dead_timeout(struct nfsmount *nmp) |
2040 | { |
2041 | struct timeval now; |
2042 | |
2043 | if (nmp->nm_state & NFSSTA_DEAD) |
2044 | return 1; |
2045 | if (nmp->nm_deadto_start == 0) |
2046 | return 0; |
2047 | nfs_is_squishy(nmp); |
2048 | if (nmp->nm_curdeadtimeout <= 0) |
2049 | return 0; |
2050 | microuptime(&now); |
2051 | if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_curdeadtimeout) |
2052 | return 0; |
2053 | return 1; |
2054 | } |
2055 | |
2056 | /* |
2057 | * Call nfs_mount_zombie to remove most of the |
2058 | * nfs state for the mount, and then ask to be forcibly unmounted. |
2059 | * |
2060 | * Assumes the nfs mount structure lock nm_lock is held. |
2061 | */ |
2062 | |
2063 | void |
2064 | nfs_mount_make_zombie(struct nfsmount *nmp) |
2065 | { |
2066 | fsid_t fsid; |
2067 | |
2068 | if (!nmp) |
2069 | return; |
2070 | |
2071 | if (nmp->nm_state & NFSSTA_DEAD) |
2072 | return; |
2073 | |
2074 | printf("nfs server %s: %sdead\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname, |
2075 | (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "" ); |
2076 | fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; |
2077 | lck_mtx_unlock(&nmp->nm_lock); |
2078 | nfs_mount_zombie(nmp, NFSSTA_DEAD); |
2079 | vfs_event_signal(&fsid, VQ_DEAD, 0); |
2080 | lck_mtx_lock(&nmp->nm_lock); |
2081 | } |
2082 | |
2083 | |
2084 | /* |
2085 | * NFS callback channel socket state |
2086 | */ |
2087 | struct nfs_callback_socket |
2088 | { |
2089 | TAILQ_ENTRY(nfs_callback_socket) ncbs_link; |
2090 | socket_t ncbs_so; /* the socket */ |
2091 | struct sockaddr_storage ncbs_saddr; /* socket address */ |
2092 | struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */ |
2093 | time_t ncbs_stamp; /* last accessed at */ |
2094 | uint32_t ncbs_flags; /* see below */ |
2095 | }; |
2096 | #define NCBSOCK_UPCALL 0x0001 |
2097 | #define NCBSOCK_UPCALLWANT 0x0002 |
2098 | #define NCBSOCK_DEAD 0x0004 |
2099 | |
2100 | /* |
2101 | * NFS callback channel state |
2102 | * |
2103 | * One listening socket for accepting socket connections from servers and |
2104 | * a list of connected sockets to handle callback requests on. |
2105 | * Mounts registered with the callback channel are assigned IDs and |
2106 | * put on a list so that the callback request handling code can match |
2107 | * the requests up with mounts. |
2108 | */ |
2109 | socket_t nfs4_cb_so = NULL; |
2110 | socket_t nfs4_cb_so6 = NULL; |
2111 | in_port_t nfs4_cb_port = 0; |
2112 | in_port_t nfs4_cb_port6 = 0; |
2113 | uint32_t nfs4_cb_id = 0; |
2114 | uint32_t nfs4_cb_so_usecount = 0; |
2115 | TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks; |
2116 | TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts; |
2117 | |
2118 | int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t); |
2119 | |
2120 | /* |
2121 | * Set up the callback channel for the NFS mount. |
2122 | * |
2123 | * Initializes the callback channel socket state and |
2124 | * assigns a callback ID to the mount. |
2125 | */ |
2126 | void |
2127 | nfs4_mount_callback_setup(struct nfsmount *nmp) |
2128 | { |
2129 | struct sockaddr_in sin; |
2130 | struct sockaddr_in6 sin6; |
2131 | socket_t so = NULL; |
2132 | socket_t so6 = NULL; |
2133 | struct timeval timeo; |
2134 | int error, on = 1; |
2135 | in_port_t port; |
2136 | |
2137 | lck_mtx_lock(nfs_global_mutex); |
2138 | if (nfs4_cb_id == 0) { |
2139 | TAILQ_INIT(&nfs4_cb_mounts); |
2140 | TAILQ_INIT(&nfs4_cb_socks); |
2141 | nfs4_cb_id++; |
2142 | } |
2143 | nmp->nm_cbid = nfs4_cb_id++; |
2144 | if (nmp->nm_cbid == 0) |
2145 | nmp->nm_cbid = nfs4_cb_id++; |
2146 | nfs4_cb_so_usecount++; |
2147 | TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink); |
2148 | |
2149 | if (nfs4_cb_so) { |
2150 | lck_mtx_unlock(nfs_global_mutex); |
2151 | return; |
2152 | } |
2153 | |
2154 | /* IPv4 */ |
2155 | error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so); |
2156 | if (error) { |
2157 | log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n" , error); |
2158 | goto fail; |
2159 | } |
2160 | so = nfs4_cb_so; |
2161 | |
2162 | sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); |
2163 | sin.sin_len = sizeof(struct sockaddr_in); |
2164 | sin.sin_family = AF_INET; |
2165 | sin.sin_addr.s_addr = htonl(INADDR_ANY); |
2166 | sin.sin_port = htons(nfs_callback_port); /* try to use specified port */ |
2167 | error = sock_bind(so, (struct sockaddr *)&sin); |
2168 | if (error) { |
2169 | log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n" , error); |
2170 | goto fail; |
2171 | } |
2172 | error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len); |
2173 | if (error) { |
2174 | log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n" , error); |
2175 | goto fail; |
2176 | } |
2177 | nfs4_cb_port = ntohs(sin.sin_port); |
2178 | |
2179 | error = sock_listen(so, 32); |
2180 | if (error) { |
2181 | log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n" , error); |
2182 | goto fail; |
2183 | } |
2184 | |
2185 | /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ |
2186 | timeo.tv_usec = 0; |
2187 | timeo.tv_sec = 60; |
2188 | error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); |
2189 | if (error) |
2190 | log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n" , error); |
2191 | error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); |
2192 | if (error) |
2193 | log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n" , error); |
2194 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
2195 | sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); |
2196 | sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); |
2197 | error = 0; |
2198 | |
2199 | /* IPv6 */ |
2200 | error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6); |
2201 | if (error) { |
2202 | log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n" , error); |
2203 | goto fail; |
2204 | } |
2205 | so6 = nfs4_cb_so6; |
2206 | |
2207 | sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); |
2208 | sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); |
2209 | /* try to use specified port or same port as IPv4 */ |
2210 | port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port; |
2211 | ipv6_bind_again: |
2212 | sin6.sin6_len = sizeof(struct sockaddr_in6); |
2213 | sin6.sin6_family = AF_INET6; |
2214 | sin6.sin6_addr = in6addr_any; |
2215 | sin6.sin6_port = htons(port); |
2216 | error = sock_bind(so6, (struct sockaddr *)&sin6); |
2217 | if (error) { |
2218 | if (port != nfs_callback_port) { |
2219 | /* if we simply tried to match the IPv4 port, then try any port */ |
2220 | port = 0; |
2221 | goto ipv6_bind_again; |
2222 | } |
2223 | log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n" , error); |
2224 | goto fail; |
2225 | } |
2226 | error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len); |
2227 | if (error) { |
2228 | log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n" , error); |
2229 | goto fail; |
2230 | } |
2231 | nfs4_cb_port6 = ntohs(sin6.sin6_port); |
2232 | |
2233 | error = sock_listen(so6, 32); |
2234 | if (error) { |
2235 | log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n" , error); |
2236 | goto fail; |
2237 | } |
2238 | |
2239 | /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ |
2240 | timeo.tv_usec = 0; |
2241 | timeo.tv_sec = 60; |
2242 | error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); |
2243 | if (error) |
2244 | log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n" , error); |
2245 | error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); |
2246 | if (error) |
2247 | log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n" , error); |
2248 | sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
2249 | sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); |
2250 | sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); |
2251 | error = 0; |
2252 | |
2253 | fail: |
2254 | if (error) { |
2255 | nfs4_cb_so = nfs4_cb_so6 = NULL; |
2256 | lck_mtx_unlock(nfs_global_mutex); |
2257 | if (so) { |
2258 | sock_shutdown(so, SHUT_RDWR); |
2259 | sock_close(so); |
2260 | } |
2261 | if (so6) { |
2262 | sock_shutdown(so6, SHUT_RDWR); |
2263 | sock_close(so6); |
2264 | } |
2265 | } else { |
2266 | lck_mtx_unlock(nfs_global_mutex); |
2267 | } |
2268 | } |
2269 | |
2270 | /* |
2271 | * Shut down the callback channel for the NFS mount. |
2272 | * |
2273 | * Clears the mount's callback ID and releases the mounts |
2274 | * reference on the callback socket. Last reference dropped |
2275 | * will also shut down the callback socket(s). |
2276 | */ |
2277 | void |
2278 | nfs4_mount_callback_shutdown(struct nfsmount *nmp) |
2279 | { |
2280 | struct nfs_callback_socket *ncbsp; |
2281 | socket_t so, so6; |
2282 | struct nfs4_cb_sock_list cb_socks; |
2283 | struct timespec ts = {1,0}; |
2284 | |
2285 | lck_mtx_lock(nfs_global_mutex); |
2286 | TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink); |
2287 | /* wait for any callbacks in progress to complete */ |
2288 | while (nmp->nm_cbrefs) |
2289 | msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait" , &ts); |
2290 | nmp->nm_cbid = 0; |
2291 | if (--nfs4_cb_so_usecount) { |
2292 | lck_mtx_unlock(nfs_global_mutex); |
2293 | return; |
2294 | } |
2295 | so = nfs4_cb_so; |
2296 | so6 = nfs4_cb_so6; |
2297 | nfs4_cb_so = nfs4_cb_so6 = NULL; |
2298 | TAILQ_INIT(&cb_socks); |
2299 | TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link); |
2300 | lck_mtx_unlock(nfs_global_mutex); |
2301 | if (so) { |
2302 | sock_shutdown(so, SHUT_RDWR); |
2303 | sock_close(so); |
2304 | } |
2305 | if (so6) { |
2306 | sock_shutdown(so6, SHUT_RDWR); |
2307 | sock_close(so6); |
2308 | } |
2309 | while ((ncbsp = TAILQ_FIRST(&cb_socks))) { |
2310 | TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link); |
2311 | sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); |
2312 | sock_close(ncbsp->ncbs_so); |
2313 | nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); |
2314 | FREE(ncbsp, M_TEMP); |
2315 | } |
2316 | } |
2317 | |
2318 | /* |
2319 | * Check periodically for stale/unused nfs callback sockets |
2320 | */ |
2321 | #define NFS4_CB_TIMER_PERIOD 30 |
2322 | #define NFS4_CB_IDLE_MAX 300 |
2323 | void |
2324 | nfs4_callback_timer(__unused void *param0, __unused void *param1) |
2325 | { |
2326 | struct nfs_callback_socket *ncbsp, *nextncbsp; |
2327 | struct timeval now; |
2328 | |
2329 | loop: |
2330 | lck_mtx_lock(nfs_global_mutex); |
2331 | if (TAILQ_EMPTY(&nfs4_cb_socks)) { |
2332 | nfs4_callback_timer_on = 0; |
2333 | lck_mtx_unlock(nfs_global_mutex); |
2334 | return; |
2335 | } |
2336 | microuptime(&now); |
2337 | TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) { |
2338 | if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) && |
2339 | (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) |
2340 | continue; |
2341 | TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link); |
2342 | lck_mtx_unlock(nfs_global_mutex); |
2343 | sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); |
2344 | sock_close(ncbsp->ncbs_so); |
2345 | nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs); |
2346 | FREE(ncbsp, M_TEMP); |
2347 | goto loop; |
2348 | } |
2349 | nfs4_callback_timer_on = 1; |
2350 | nfs_interval_timer_start(nfs4_callback_timer_call, |
2351 | NFS4_CB_TIMER_PERIOD * 1000); |
2352 | lck_mtx_unlock(nfs_global_mutex); |
2353 | } |
2354 | |
2355 | /* |
2356 | * Accept a new callback socket. |
2357 | */ |
2358 | void |
2359 | nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag) |
2360 | { |
2361 | socket_t newso = NULL; |
2362 | struct nfs_callback_socket *ncbsp; |
2363 | struct nfsmount *nmp; |
2364 | struct timeval timeo, now; |
2365 | int error, on = 1, ip; |
2366 | |
2367 | if (so == nfs4_cb_so) |
2368 | ip = 4; |
2369 | else if (so == nfs4_cb_so6) |
2370 | ip = 6; |
2371 | else |
2372 | return; |
2373 | |
2374 | /* allocate/initialize a new nfs_callback_socket */ |
2375 | MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK); |
2376 | if (!ncbsp) { |
2377 | log(LOG_ERR, "nfs callback accept: no memory for new socket\n" ); |
2378 | return; |
2379 | } |
2380 | bzero(ncbsp, sizeof(*ncbsp)); |
2381 | ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); |
2382 | nfs_rpc_record_state_init(&ncbsp->ncbs_rrs); |
2383 | |
2384 | /* accept a new socket */ |
2385 | error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr, |
2386 | ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT, |
2387 | nfs4_cb_rcv, ncbsp, &newso); |
2388 | if (error) { |
2389 | log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n" , error, ip); |
2390 | FREE(ncbsp, M_TEMP); |
2391 | return; |
2392 | } |
2393 | |
2394 | /* set up the new socket */ |
2395 | /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ |
2396 | timeo.tv_usec = 0; |
2397 | timeo.tv_sec = 60; |
2398 | error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); |
2399 | if (error) |
2400 | log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n" , error, ip); |
2401 | error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); |
2402 | if (error) |
2403 | log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n" , error, ip); |
2404 | sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
2405 | sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); |
2406 | sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); |
2407 | sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); |
2408 | |
2409 | ncbsp->ncbs_so = newso; |
2410 | microuptime(&now); |
2411 | ncbsp->ncbs_stamp = now.tv_sec; |
2412 | |
2413 | lck_mtx_lock(nfs_global_mutex); |
2414 | |
2415 | /* add it to the list */ |
2416 | TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link); |
2417 | |
2418 | /* verify it's from a host we have mounted */ |
2419 | TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { |
2420 | /* check if socket's source address matches this mount's server address */ |
2421 | if (!nmp->nm_saddr) |
2422 | continue; |
2423 | if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) |
2424 | break; |
2425 | } |
2426 | if (!nmp) /* we don't want this socket, mark it dead */ |
2427 | ncbsp->ncbs_flags |= NCBSOCK_DEAD; |
2428 | |
2429 | /* make sure the callback socket cleanup timer is running */ |
2430 | /* (shorten the timer if we've got a socket we don't want) */ |
2431 | if (!nfs4_callback_timer_on) { |
2432 | nfs4_callback_timer_on = 1; |
2433 | nfs_interval_timer_start(nfs4_callback_timer_call, |
2434 | !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000)); |
2435 | } else if (!nmp && (nfs4_callback_timer_on < 2)) { |
2436 | nfs4_callback_timer_on = 2; |
2437 | thread_call_cancel(nfs4_callback_timer_call); |
2438 | nfs_interval_timer_start(nfs4_callback_timer_call, 500); |
2439 | } |
2440 | |
2441 | lck_mtx_unlock(nfs_global_mutex); |
2442 | } |
2443 | |
2444 | /* |
2445 | * Receive mbufs from callback sockets into RPC records and process each record. |
2446 | * Detect connection has been closed and shut down. |
2447 | */ |
2448 | void |
2449 | nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag) |
2450 | { |
2451 | struct nfs_callback_socket *ncbsp = arg; |
2452 | struct timespec ts = {1,0}; |
2453 | struct timeval now; |
2454 | mbuf_t m; |
2455 | int error = 0, recv = 1; |
2456 | |
2457 | lck_mtx_lock(nfs_global_mutex); |
2458 | while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) { |
2459 | /* wait if upcall is already in progress */ |
2460 | ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT; |
2461 | msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall" , &ts); |
2462 | } |
2463 | ncbsp->ncbs_flags |= NCBSOCK_UPCALL; |
2464 | lck_mtx_unlock(nfs_global_mutex); |
2465 | |
2466 | /* loop while we make error-free progress */ |
2467 | while (!error && recv) { |
2468 | error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m); |
2469 | if (m) /* handle the request */ |
2470 | error = nfs4_cb_handler(ncbsp, m); |
2471 | } |
2472 | |
2473 | /* note: no error and no data indicates server closed its end */ |
2474 | if ((error != EWOULDBLOCK) && (error || !recv)) { |
2475 | /* |
2476 | * Socket is either being closed or should be. |
2477 | * We can't close the socket in the context of the upcall. |
2478 | * So we mark it as dead and leave it for the cleanup timer to reap. |
2479 | */ |
2480 | ncbsp->ncbs_stamp = 0; |
2481 | ncbsp->ncbs_flags |= NCBSOCK_DEAD; |
2482 | } else { |
2483 | microuptime(&now); |
2484 | ncbsp->ncbs_stamp = now.tv_sec; |
2485 | } |
2486 | |
2487 | lck_mtx_lock(nfs_global_mutex); |
2488 | ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL; |
2489 | lck_mtx_unlock(nfs_global_mutex); |
2490 | wakeup(ncbsp); |
2491 | } |
2492 | |
2493 | /* |
2494 | * Handle an NFS callback channel request. |
2495 | */ |
2496 | int |
2497 | nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq) |
2498 | { |
2499 | socket_t so = ncbsp->ncbs_so; |
2500 | struct nfsm_chain nmreq, nmrep; |
2501 | mbuf_t mhead = NULL, mrest = NULL, m; |
2502 | struct msghdr msg; |
2503 | struct nfsmount *nmp; |
2504 | fhandle_t fh; |
2505 | nfsnode_t np; |
2506 | nfs_stateid stateid; |
2507 | uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes; |
2508 | uint32_t val, xid, procnum, taglen, cbid, numops, op, status; |
2509 | uint32_t auth_type, auth_len; |
2510 | uint32_t numres, *pnumres; |
2511 | int error = 0, replen, len; |
2512 | size_t sentlen = 0; |
2513 | |
2514 | xid = numops = op = status = procnum = taglen = cbid = 0; |
2515 | |
2516 | nfsm_chain_dissect_init(error, &nmreq, mreq); |
2517 | nfsm_chain_get_32(error, &nmreq, xid); // RPC XID |
2518 | nfsm_chain_get_32(error, &nmreq, val); // RPC Call |
2519 | nfsm_assert(error, (val == RPC_CALL), EBADRPC); |
2520 | nfsm_chain_get_32(error, &nmreq, val); // RPC Version |
2521 | nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH); |
2522 | nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number |
2523 | nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL); |
2524 | nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number |
2525 | nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH); |
2526 | nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number |
2527 | nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL); |
2528 | |
2529 | /* Handle authentication */ |
2530 | /* XXX just ignore auth for now - handling kerberos may be tricky */ |
2531 | nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor |
2532 | nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length |
2533 | nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); |
2534 | if (!error && (auth_len > 0)) |
2535 | nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); |
2536 | nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) |
2537 | nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length |
2538 | nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); |
2539 | if (!error && (auth_len > 0)) |
2540 | nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); |
2541 | if (error) { |
2542 | status = error; |
2543 | error = 0; |
2544 | goto nfsmout; |
2545 | } |
2546 | |
2547 | switch (procnum) { |
2548 | case NFSPROC4_CB_NULL: |
2549 | status = NFSERR_RETVOID; |
2550 | break; |
2551 | case NFSPROC4_CB_COMPOUND: |
2552 | /* tag, minorversion, cb ident, numops, op array */ |
2553 | nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */ |
2554 | nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC); |
2555 | |
2556 | /* start building the body of the response */ |
2557 | nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED); |
2558 | nfsm_chain_init(&nmrep, mrest); |
2559 | |
2560 | /* copy tag from request to response */ |
2561 | nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */ |
2562 | for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) { |
2563 | nfsm_chain_get_32(error, &nmreq, val); |
2564 | nfsm_chain_add_32(error, &nmrep, val); |
2565 | } |
2566 | |
2567 | /* insert number of results placeholder */ |
2568 | numres = 0; |
2569 | nfsm_chain_add_32(error, &nmrep, numres); |
2570 | pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED); |
2571 | |
2572 | nfsm_chain_get_32(error, &nmreq, val); /* minorversion */ |
2573 | nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH); |
2574 | nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */ |
2575 | nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */ |
2576 | if (error) { |
2577 | if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) |
2578 | status = error; |
2579 | else if ((error == ENOBUFS) || (error == ENOMEM)) |
2580 | status = NFSERR_RESOURCE; |
2581 | else |
2582 | status = NFSERR_SERVERFAULT; |
2583 | error = 0; |
2584 | nfsm_chain_null(&nmrep); |
2585 | goto nfsmout; |
2586 | } |
2587 | /* match the callback ID to a registered mount */ |
2588 | lck_mtx_lock(nfs_global_mutex); |
2589 | TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { |
2590 | if (nmp->nm_cbid != cbid) |
2591 | continue; |
2592 | /* verify socket's source address matches this mount's server address */ |
2593 | if (!nmp->nm_saddr) |
2594 | continue; |
2595 | if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0) |
2596 | break; |
2597 | } |
2598 | /* mark the NFS mount as busy */ |
2599 | if (nmp) |
2600 | nmp->nm_cbrefs++; |
2601 | lck_mtx_unlock(nfs_global_mutex); |
2602 | if (!nmp) { |
2603 | /* if no mount match, just drop socket. */ |
2604 | error = EPERM; |
2605 | nfsm_chain_null(&nmrep); |
2606 | goto out; |
2607 | } |
2608 | |
2609 | /* process ops, adding results to mrest */ |
2610 | while (numops > 0) { |
2611 | numops--; |
2612 | nfsm_chain_get_32(error, &nmreq, op); |
2613 | if (error) |
2614 | break; |
2615 | switch (op) { |
2616 | case NFS_OP_CB_GETATTR: |
2617 | // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS) |
2618 | np = NULL; |
2619 | nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); |
2620 | bmlen = NFS_ATTR_BITMAP_LEN; |
2621 | nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen); |
2622 | if (error) { |
2623 | status = error; |
2624 | error = 0; |
2625 | numops = 0; /* don't process any more ops */ |
2626 | } else { |
2627 | /* find the node for the file handle */ |
2628 | error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); |
2629 | if (error || !np) { |
2630 | status = NFSERR_BADHANDLE; |
2631 | error = 0; |
2632 | np = NULL; |
2633 | numops = 0; /* don't process any more ops */ |
2634 | } |
2635 | } |
2636 | nfsm_chain_add_32(error, &nmrep, op); |
2637 | nfsm_chain_add_32(error, &nmrep, status); |
2638 | if (!error && (status == EBADRPC)) |
2639 | error = status; |
2640 | if (np) { |
2641 | /* only allow returning size, change, and mtime attrs */ |
2642 | NFS_CLEAR_ATTRIBUTES(&rbitmap); |
2643 | attrbytes = 0; |
2644 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) { |
2645 | NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE); |
2646 | attrbytes += 2 * NFSX_UNSIGNED; |
2647 | } |
2648 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) { |
2649 | NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE); |
2650 | attrbytes += 2 * NFSX_UNSIGNED; |
2651 | } |
2652 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { |
2653 | NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY); |
2654 | attrbytes += 3 * NFSX_UNSIGNED; |
2655 | } |
2656 | nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN); |
2657 | nfsm_chain_add_32(error, &nmrep, attrbytes); |
2658 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) |
2659 | nfsm_chain_add_64(error, &nmrep, |
2660 | np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0)); |
2661 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) |
2662 | nfsm_chain_add_64(error, &nmrep, np->n_size); |
2663 | if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { |
2664 | nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]); |
2665 | nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]); |
2666 | } |
2667 | nfs_node_unlock(np); |
2668 | vnode_put(NFSTOV(np)); |
2669 | np = NULL; |
2670 | } |
2671 | /* |
2672 | * If we hit an error building the reply, we can't easily back up. |
2673 | * So we'll just update the status and hope the server ignores the |
2674 | * extra garbage. |
2675 | */ |
2676 | break; |
2677 | case NFS_OP_CB_RECALL: |
2678 | // (STATEID, TRUNCATE, FH) -> (STATUS) |
2679 | np = NULL; |
2680 | nfsm_chain_get_stateid(error, &nmreq, &stateid); |
2681 | nfsm_chain_get_32(error, &nmreq, truncate); |
2682 | nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); |
2683 | if (error) { |
2684 | status = error; |
2685 | error = 0; |
2686 | numops = 0; /* don't process any more ops */ |
2687 | } else { |
2688 | /* find the node for the file handle */ |
2689 | error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np); |
2690 | if (error || !np) { |
2691 | status = NFSERR_BADHANDLE; |
2692 | error = 0; |
2693 | np = NULL; |
2694 | numops = 0; /* don't process any more ops */ |
2695 | } else if (!(np->n_openflags & N_DELEG_MASK) || |
2696 | bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) { |
2697 | /* delegation stateid state doesn't match */ |
2698 | status = NFSERR_BAD_STATEID; |
2699 | numops = 0; /* don't process any more ops */ |
2700 | } |
2701 | if (!status) /* add node to recall queue, and wake socket thread */ |
2702 | nfs4_delegation_return_enqueue(np); |
2703 | if (np) { |
2704 | nfs_node_unlock(np); |
2705 | vnode_put(NFSTOV(np)); |
2706 | } |
2707 | } |
2708 | nfsm_chain_add_32(error, &nmrep, op); |
2709 | nfsm_chain_add_32(error, &nmrep, status); |
2710 | if (!error && (status == EBADRPC)) |
2711 | error = status; |
2712 | break; |
2713 | case NFS_OP_CB_ILLEGAL: |
2714 | default: |
2715 | nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL); |
2716 | status = NFSERR_OP_ILLEGAL; |
2717 | nfsm_chain_add_32(error, &nmrep, status); |
2718 | numops = 0; /* don't process any more ops */ |
2719 | break; |
2720 | } |
2721 | numres++; |
2722 | } |
2723 | |
2724 | if (!status && error) { |
2725 | if (error == EBADRPC) |
2726 | status = error; |
2727 | else if ((error == ENOBUFS) || (error == ENOMEM)) |
2728 | status = NFSERR_RESOURCE; |
2729 | else |
2730 | status = NFSERR_SERVERFAULT; |
2731 | error = 0; |
2732 | } |
2733 | |
2734 | /* Now, set the numres field */ |
2735 | *pnumres = txdr_unsigned(numres); |
2736 | nfsm_chain_build_done(error, &nmrep); |
2737 | nfsm_chain_null(&nmrep); |
2738 | |
2739 | /* drop the callback reference on the mount */ |
2740 | lck_mtx_lock(nfs_global_mutex); |
2741 | nmp->nm_cbrefs--; |
2742 | if (!nmp->nm_cbid) |
2743 | wakeup(&nmp->nm_cbrefs); |
2744 | lck_mtx_unlock(nfs_global_mutex); |
2745 | break; |
2746 | } |
2747 | |
2748 | nfsmout: |
2749 | if (status == EBADRPC) |
2750 | OSAddAtomic64(1, &nfsstats.rpcinvalid); |
2751 | |
2752 | /* build reply header */ |
2753 | error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead); |
2754 | nfsm_chain_init(&nmrep, mhead); |
2755 | nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */ |
2756 | nfsm_chain_add_32(error, &nmrep, xid); |
2757 | nfsm_chain_add_32(error, &nmrep, RPC_REPLY); |
2758 | if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) { |
2759 | nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); |
2760 | if (status & NFSERR_AUTHERR) { |
2761 | nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); |
2762 | nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR)); |
2763 | } else { |
2764 | nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); |
2765 | nfsm_chain_add_32(error, &nmrep, RPC_VER2); |
2766 | nfsm_chain_add_32(error, &nmrep, RPC_VER2); |
2767 | } |
2768 | } else { |
2769 | /* reply status */ |
2770 | nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); |
2771 | /* XXX RPCAUTH_NULL verifier */ |
2772 | nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); |
2773 | nfsm_chain_add_32(error, &nmrep, 0); |
2774 | /* accepted status */ |
2775 | switch (status) { |
2776 | case EPROGUNAVAIL: |
2777 | nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); |
2778 | break; |
2779 | case EPROGMISMATCH: |
2780 | nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); |
2781 | nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); |
2782 | nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); |
2783 | break; |
2784 | case EPROCUNAVAIL: |
2785 | nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); |
2786 | break; |
2787 | case EBADRPC: |
2788 | nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); |
2789 | break; |
2790 | default: |
2791 | nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); |
2792 | if (status != NFSERR_RETVOID) |
2793 | nfsm_chain_add_32(error, &nmrep, status); |
2794 | break; |
2795 | } |
2796 | } |
2797 | nfsm_chain_build_done(error, &nmrep); |
2798 | if (error) { |
2799 | nfsm_chain_null(&nmrep); |
2800 | goto out; |
2801 | } |
2802 | error = mbuf_setnext(nmrep.nmc_mcur, mrest); |
2803 | if (error) { |
2804 | printf("nfs cb: mbuf_setnext failed %d\n" , error); |
2805 | goto out; |
2806 | } |
2807 | mrest = NULL; |
2808 | /* Calculate the size of the reply */ |
2809 | replen = 0; |
2810 | for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) |
2811 | replen += mbuf_len(m); |
2812 | mbuf_pkthdr_setlen(mhead, replen); |
2813 | error = mbuf_pkthdr_setrcvif(mhead, NULL); |
2814 | nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000); |
2815 | nfsm_chain_null(&nmrep); |
2816 | |
2817 | /* send the reply */ |
2818 | bzero(&msg, sizeof(msg)); |
2819 | error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen); |
2820 | mhead = NULL; |
2821 | if (!error && ((int)sentlen != replen)) |
2822 | error = EWOULDBLOCK; |
2823 | if (error == EWOULDBLOCK) /* inability to send response is considered fatal */ |
2824 | error = ETIMEDOUT; |
2825 | out: |
2826 | if (error) |
2827 | nfsm_chain_cleanup(&nmrep); |
2828 | if (mhead) |
2829 | mbuf_freem(mhead); |
2830 | if (mrest) |
2831 | mbuf_freem(mrest); |
2832 | if (mreq) |
2833 | mbuf_freem(mreq); |
2834 | return (error); |
2835 | } |
2836 | |
2837 | |
2838 | /* |
2839 | * Initialize an nfs_rpc_record_state structure. |
2840 | */ |
2841 | void |
2842 | nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp) |
2843 | { |
2844 | bzero(nrrsp, sizeof(*nrrsp)); |
2845 | nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); |
2846 | } |
2847 | |
2848 | /* |
2849 | * Clean up an nfs_rpc_record_state structure. |
2850 | */ |
2851 | void |
2852 | nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp) |
2853 | { |
2854 | if (nrrsp->nrrs_m) { |
2855 | mbuf_freem(nrrsp->nrrs_m); |
2856 | nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; |
2857 | } |
2858 | } |
2859 | |
2860 | /* |
2861 | * Read the next (marked) RPC record from the socket. |
2862 | * |
2863 | * *recvp returns if any data was received. |
2864 | * *mp returns the next complete RPC record |
2865 | */ |
2866 | int |
2867 | nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp) |
2868 | { |
2869 | struct iovec aio; |
2870 | struct msghdr msg; |
2871 | size_t rcvlen; |
2872 | int error = 0; |
2873 | mbuf_t m; |
2874 | |
2875 | *recvp = 0; |
2876 | *mp = NULL; |
2877 | |
2878 | /* read the TCP RPC record marker */ |
2879 | while (!error && nrrsp->nrrs_markerleft) { |
2880 | aio.iov_base = ((char*)&nrrsp->nrrs_fragleft + |
2881 | sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft); |
2882 | aio.iov_len = nrrsp->nrrs_markerleft; |
2883 | bzero(&msg, sizeof(msg)); |
2884 | msg.msg_iov = &aio; |
2885 | msg.msg_iovlen = 1; |
2886 | error = sock_receive(so, &msg, flags, &rcvlen); |
2887 | if (error || !rcvlen) |
2888 | break; |
2889 | *recvp = 1; |
2890 | nrrsp->nrrs_markerleft -= rcvlen; |
2891 | if (nrrsp->nrrs_markerleft) |
2892 | continue; |
2893 | /* record marker complete */ |
2894 | nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft); |
2895 | if (nrrsp->nrrs_fragleft & 0x80000000) { |
2896 | nrrsp->nrrs_lastfrag = 1; |
2897 | nrrsp->nrrs_fragleft &= ~0x80000000; |
2898 | } |
2899 | nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft; |
2900 | if (nrrsp->nrrs_reclen > NFS_MAXPACKET) { |
2901 | /* This is SERIOUS! We are out of sync with the sender. */ |
2902 | log(LOG_ERR, "impossible RPC record length (%d) on callback" , nrrsp->nrrs_reclen); |
2903 | error = EFBIG; |
2904 | } |
2905 | } |
2906 | |
2907 | /* read the TCP RPC record fragment */ |
2908 | while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) { |
2909 | m = NULL; |
2910 | rcvlen = nrrsp->nrrs_fragleft; |
2911 | error = sock_receivembuf(so, NULL, &m, flags, &rcvlen); |
2912 | if (error || !rcvlen || !m) |
2913 | break; |
2914 | *recvp = 1; |
2915 | /* append mbufs to list */ |
2916 | nrrsp->nrrs_fragleft -= rcvlen; |
2917 | if (!nrrsp->nrrs_m) { |
2918 | nrrsp->nrrs_m = m; |
2919 | } else { |
2920 | error = mbuf_setnext(nrrsp->nrrs_mlast, m); |
2921 | if (error) { |
2922 | printf("nfs tcp rcv: mbuf_setnext failed %d\n" , error); |
2923 | mbuf_freem(m); |
2924 | break; |
2925 | } |
2926 | } |
2927 | while (mbuf_next(m)) |
2928 | m = mbuf_next(m); |
2929 | nrrsp->nrrs_mlast = m; |
2930 | } |
2931 | |
2932 | /* done reading fragment? */ |
2933 | if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) { |
2934 | /* reset socket fragment parsing state */ |
2935 | nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); |
2936 | if (nrrsp->nrrs_lastfrag) { |
2937 | /* RPC record complete */ |
2938 | *mp = nrrsp->nrrs_m; |
2939 | /* reset socket record parsing state */ |
2940 | nrrsp->nrrs_reclen = 0; |
2941 | nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; |
2942 | nrrsp->nrrs_lastfrag = 0; |
2943 | } |
2944 | } |
2945 | |
2946 | return (error); |
2947 | } |
2948 | |
2949 | |
2950 | |
2951 | /* |
2952 | * The NFS client send routine. |
2953 | * |
2954 | * Send the given NFS request out the mount's socket. |
2955 | * Holds nfs_sndlock() for the duration of this call. |
2956 | * |
2957 | * - check for request termination (sigintr) |
2958 | * - wait for reconnect, if necessary |
2959 | * - UDP: check the congestion window |
2960 | * - make a copy of the request to send |
2961 | * - UDP: update the congestion window |
2962 | * - send the request |
2963 | * |
2964 | * If sent successfully, R_MUSTRESEND and R_RESENDERR are cleared. |
2965 | * rexmit count is also updated if this isn't the first send. |
2966 | * |
2967 | * If the send is not successful, make sure R_MUSTRESEND is set. |
2968 | * If this wasn't the first transmit, set R_RESENDERR. |
2969 | * Also, undo any UDP congestion window changes made. |
2970 | * |
2971 | * If the error appears to indicate that the socket should |
2972 | * be reconnected, mark the socket for reconnection. |
2973 | * |
2974 | * Only return errors when the request should be aborted. |
2975 | */ |
2976 | int |
2977 | nfs_send(struct nfsreq *req, int wait) |
2978 | { |
2979 | struct nfsmount *nmp; |
2980 | struct nfs_socket *nso; |
2981 | int error, error2, sotype, rexmit, slpflag = 0, needrecon; |
2982 | struct msghdr msg; |
2983 | struct sockaddr *sendnam; |
2984 | mbuf_t mreqcopy; |
2985 | size_t sentlen = 0; |
2986 | struct timespec ts = { 2, 0 }; |
2987 | |
2988 | again: |
2989 | error = nfs_sndlock(req); |
2990 | if (error) { |
2991 | lck_mtx_lock(&req->r_mtx); |
2992 | req->r_error = error; |
2993 | req->r_flags &= ~R_SENDING; |
2994 | lck_mtx_unlock(&req->r_mtx); |
2995 | return (error); |
2996 | } |
2997 | |
2998 | error = nfs_sigintr(req->r_nmp, req, NULL, 0); |
2999 | if (error) { |
3000 | nfs_sndunlock(req); |
3001 | lck_mtx_lock(&req->r_mtx); |
3002 | req->r_error = error; |
3003 | req->r_flags &= ~R_SENDING; |
3004 | lck_mtx_unlock(&req->r_mtx); |
3005 | return (error); |
3006 | } |
3007 | nmp = req->r_nmp; |
3008 | sotype = nmp->nm_sotype; |
3009 | |
3010 | /* |
3011 | * If it's a setup RPC but we're not in SETUP... must need reconnect. |
3012 | * If it's a recovery RPC but the socket's not ready... must need reconnect. |
3013 | */ |
3014 | if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) || |
3015 | ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) { |
3016 | error = ETIMEDOUT; |
3017 | nfs_sndunlock(req); |
3018 | lck_mtx_lock(&req->r_mtx); |
3019 | req->r_error = error; |
3020 | req->r_flags &= ~R_SENDING; |
3021 | lck_mtx_unlock(&req->r_mtx); |
3022 | return (error); |
3023 | } |
3024 | |
3025 | /* If the socket needs reconnection, do that now. */ |
3026 | /* wait until socket is ready - unless this request is part of setup */ |
3027 | lck_mtx_lock(&nmp->nm_lock); |
3028 | if (!(nmp->nm_sockflags & NMSOCK_READY) && |
3029 | !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) { |
3030 | if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) |
3031 | slpflag |= PCATCH; |
3032 | lck_mtx_unlock(&nmp->nm_lock); |
3033 | nfs_sndunlock(req); |
3034 | if (!wait) { |
3035 | lck_mtx_lock(&req->r_mtx); |
3036 | req->r_flags &= ~R_SENDING; |
3037 | req->r_flags |= R_MUSTRESEND; |
3038 | req->r_rtt = 0; |
3039 | lck_mtx_unlock(&req->r_mtx); |
3040 | return (0); |
3041 | } |
3042 | NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n" , req->r_xid); |
3043 | lck_mtx_lock(&req->r_mtx); |
3044 | req->r_flags &= ~R_MUSTRESEND; |
3045 | req->r_rtt = 0; |
3046 | lck_mtx_unlock(&req->r_mtx); |
3047 | lck_mtx_lock(&nmp->nm_lock); |
3048 | while (!(nmp->nm_sockflags & NMSOCK_READY)) { |
3049 | /* don't bother waiting if the socket thread won't be reconnecting it */ |
3050 | if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) { |
3051 | error = EIO; |
3052 | break; |
3053 | } |
3054 | if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (nmp->nm_reconnect_start > 0)) { |
3055 | struct timeval now; |
3056 | microuptime(&now); |
3057 | if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { |
3058 | /* soft mount in reconnect for a while... terminate ASAP */ |
3059 | OSAddAtomic64(1, &nfsstats.rpctimeouts); |
3060 | req->r_flags |= R_SOFTTERM; |
3061 | req->r_error = error = ETIMEDOUT; |
3062 | break; |
3063 | } |
3064 | } |
3065 | /* make sure socket thread is running, then wait */ |
3066 | nfs_mount_sock_thread_wake(nmp); |
3067 | if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) |
3068 | break; |
3069 | msleep(req, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectwait" , &ts); |
3070 | slpflag = 0; |
3071 | } |
3072 | lck_mtx_unlock(&nmp->nm_lock); |
3073 | if (error) { |
3074 | lck_mtx_lock(&req->r_mtx); |
3075 | req->r_error = error; |
3076 | req->r_flags &= ~R_SENDING; |
3077 | lck_mtx_unlock(&req->r_mtx); |
3078 | return (error); |
3079 | } |
3080 | goto again; |
3081 | } |
3082 | nso = nmp->nm_nso; |
3083 | /* note that we're using the mount's socket to do the send */ |
3084 | nmp->nm_state |= NFSSTA_SENDING; /* will be cleared by nfs_sndunlock() */ |
3085 | lck_mtx_unlock(&nmp->nm_lock); |
3086 | if (!nso) { |
3087 | nfs_sndunlock(req); |
3088 | lck_mtx_lock(&req->r_mtx); |
3089 | req->r_flags &= ~R_SENDING; |
3090 | req->r_flags |= R_MUSTRESEND; |
3091 | req->r_rtt = 0; |
3092 | lck_mtx_unlock(&req->r_mtx); |
3093 | return (0); |
3094 | } |
3095 | |
3096 | lck_mtx_lock(&req->r_mtx); |
3097 | rexmit = (req->r_flags & R_SENT); |
3098 | |
3099 | if (sotype == SOCK_DGRAM) { |
3100 | lck_mtx_lock(&nmp->nm_lock); |
3101 | if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) { |
3102 | /* if we can't send this out yet, wait on the cwnd queue */ |
3103 | slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0; |
3104 | lck_mtx_unlock(&nmp->nm_lock); |
3105 | nfs_sndunlock(req); |
3106 | req->r_flags &= ~R_SENDING; |
3107 | req->r_flags |= R_MUSTRESEND; |
3108 | lck_mtx_unlock(&req->r_mtx); |
3109 | if (!wait) { |
3110 | req->r_rtt = 0; |
3111 | return (0); |
3112 | } |
3113 | lck_mtx_lock(&nmp->nm_lock); |
3114 | while (nmp->nm_sent >= nmp->nm_cwnd) { |
3115 | if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) |
3116 | break; |
3117 | TAILQ_INSERT_TAIL(&nmp->nm_cwndq, req, r_cchain); |
3118 | msleep(req, &nmp->nm_lock, slpflag | (PZERO - 1), "nfswaitcwnd" , &ts); |
3119 | slpflag = 0; |
3120 | if ((req->r_cchain.tqe_next != NFSREQNOLIST)) { |
3121 | TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); |
3122 | req->r_cchain.tqe_next = NFSREQNOLIST; |
3123 | } |
3124 | } |
3125 | lck_mtx_unlock(&nmp->nm_lock); |
3126 | goto again; |
3127 | } |
3128 | /* |
3129 | * We update these *before* the send to avoid racing |
3130 | * against others who may be looking to send requests. |
3131 | */ |
3132 | if (!rexmit) { |
3133 | /* first transmit */ |
3134 | req->r_flags |= R_CWND; |
3135 | nmp->nm_sent += NFS_CWNDSCALE; |
3136 | } else { |
3137 | /* |
3138 | * When retransmitting, turn timing off |
3139 | * and divide congestion window by 2. |
3140 | */ |
3141 | req->r_flags &= ~R_TIMING; |
3142 | nmp->nm_cwnd >>= 1; |
3143 | if (nmp->nm_cwnd < NFS_CWNDSCALE) |
3144 | nmp->nm_cwnd = NFS_CWNDSCALE; |
3145 | } |
3146 | lck_mtx_unlock(&nmp->nm_lock); |
3147 | } |
3148 | |
3149 | req->r_flags &= ~R_MUSTRESEND; |
3150 | lck_mtx_unlock(&req->r_mtx); |
3151 | |
3152 | error = mbuf_copym(req->r_mhead, 0, MBUF_COPYALL, |
3153 | wait ? MBUF_WAITOK : MBUF_DONTWAIT, &mreqcopy); |
3154 | if (error) { |
3155 | if (wait) |
3156 | log(LOG_INFO, "nfs_send: mbuf copy failed %d\n" , error); |
3157 | nfs_sndunlock(req); |
3158 | lck_mtx_lock(&req->r_mtx); |
3159 | req->r_flags &= ~R_SENDING; |
3160 | req->r_flags |= R_MUSTRESEND; |
3161 | req->r_rtt = 0; |
3162 | lck_mtx_unlock(&req->r_mtx); |
3163 | return (0); |
3164 | } |
3165 | |
3166 | bzero(&msg, sizeof(msg)); |
3167 | if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) { |
3168 | msg.msg_name = (caddr_t)sendnam; |
3169 | msg.msg_namelen = sendnam->sa_len; |
3170 | } |
3171 | error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); |
3172 | if (error || (sentlen != req->r_mreqlen)) { |
3173 | NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n" , |
3174 | req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); |
3175 | } |
3176 | |
3177 | if (!error && (sentlen != req->r_mreqlen)) |
3178 | error = EWOULDBLOCK; |
3179 | needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); |
3180 | |
3181 | lck_mtx_lock(&req->r_mtx); |
3182 | req->r_flags &= ~R_SENDING; |
3183 | req->r_rtt = 0; |
3184 | if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) |
3185 | req->r_rexmit = NFS_MAXREXMIT; |
3186 | |
3187 | if (!error) { |
3188 | /* SUCCESS */ |
3189 | req->r_flags &= ~R_RESENDERR; |
3190 | if (rexmit) |
3191 | OSAddAtomic64(1, &nfsstats.rpcretries); |
3192 | req->r_flags |= R_SENT; |
3193 | if (req->r_flags & R_WAITSENT) { |
3194 | req->r_flags &= ~R_WAITSENT; |
3195 | wakeup(req); |
3196 | } |
3197 | nfs_sndunlock(req); |
3198 | lck_mtx_unlock(&req->r_mtx); |
3199 | return (0); |
3200 | } |
3201 | |
3202 | /* send failed */ |
3203 | req->r_flags |= R_MUSTRESEND; |
3204 | if (rexmit) |
3205 | req->r_flags |= R_RESENDERR; |
3206 | if ((error == EINTR) || (error == ERESTART)) |
3207 | req->r_error = error; |
3208 | lck_mtx_unlock(&req->r_mtx); |
3209 | |
3210 | if (sotype == SOCK_DGRAM) { |
3211 | /* |
3212 | * Note: even though a first send may fail, we consider |
3213 | * the request sent for congestion window purposes. |
3214 | * So we don't need to undo any of the changes made above. |
3215 | */ |
3216 | /* |
3217 | * Socket errors ignored for connectionless sockets?? |
3218 | * For now, ignore them all |
3219 | */ |
3220 | if ((error != EINTR) && (error != ERESTART) && |
3221 | (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) { |
3222 | int clearerror = 0, optlen = sizeof(clearerror); |
3223 | sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); |
3224 | #ifdef NFS_SOCKET_DEBUGGING |
3225 | if (clearerror) |
3226 | NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n" , |
3227 | error, clearerror); |
3228 | #endif |
3229 | } |
3230 | } |
3231 | |
3232 | /* check if it appears we should reconnect the socket */ |
3233 | switch (error) { |
3234 | case EWOULDBLOCK: |
3235 | /* if send timed out, reconnect if on TCP */ |
3236 | if (sotype != SOCK_STREAM) |
3237 | break; |
3238 | case EPIPE: |
3239 | case EADDRNOTAVAIL: |
3240 | case ENETDOWN: |
3241 | case ENETUNREACH: |
3242 | case ENETRESET: |
3243 | case ECONNABORTED: |
3244 | case ECONNRESET: |
3245 | case ENOTCONN: |
3246 | case ESHUTDOWN: |
3247 | case ECONNREFUSED: |
3248 | case EHOSTDOWN: |
3249 | case EHOSTUNREACH: |
3250 | /* case ECANCELED??? */ |
3251 | needrecon = 1; |
3252 | break; |
3253 | } |
3254 | if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */ |
3255 | NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n" , req->r_xid, error); |
3256 | nfs_need_reconnect(nmp); |
3257 | } |
3258 | |
3259 | nfs_sndunlock(req); |
3260 | |
3261 | if (nfs_is_dead(error, nmp)) |
3262 | error = EIO; |
3263 | |
3264 | /* |
3265 | * Don't log some errors: |
3266 | * EPIPE errors may be common with servers that drop idle connections. |
3267 | * EADDRNOTAVAIL may occur on network transitions. |
3268 | * ENOTCONN may occur under some network conditions. |
3269 | */ |
3270 | if ((error == EPIPE) || (error == EADDRNOTAVAIL) || (error == ENOTCONN)) |
3271 | error = 0; |
3272 | if (error && (error != EINTR) && (error != ERESTART)) |
3273 | log(LOG_INFO, "nfs send error %d for server %s\n" , error, |
3274 | !req->r_nmp ? "<unmounted>" : |
3275 | vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname); |
3276 | |
3277 | /* prefer request termination error over other errors */ |
3278 | error2 = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); |
3279 | if (error2) |
3280 | error = error2; |
3281 | |
3282 | /* only allow the following errors to be returned */ |
3283 | if ((error != EINTR) && (error != ERESTART) && (error != EIO) && |
3284 | (error != ENXIO) && (error != ETIMEDOUT)) |
3285 | /* |
3286 | * We got some error we don't know what do do with, |
3287 | * i.e., we're not reconnecting, we map it to |
3288 | * EIO. Presumably our send failed and we better tell |
3289 | * the caller so they don't wait for a reply that is |
3290 | * never going to come. If we are reconnecting we |
3291 | * return 0 and the request will be resent. |
3292 | */ |
3293 | error = needrecon ? 0 : EIO; |
3294 | return (error); |
3295 | } |
3296 | |
3297 | /* |
3298 | * NFS client socket upcalls |
3299 | * |
3300 | * Pull RPC replies out of an NFS mount's socket and match them |
3301 | * up with the pending request. |
3302 | * |
3303 | * The datagram code is simple because we always get whole |
3304 | * messages out of the socket. |
3305 | * |
3306 | * The stream code is more involved because we have to parse |
3307 | * the RPC records out of the stream. |
3308 | */ |
3309 | |
3310 | /* NFS client UDP socket upcall */ |
3311 | void |
3312 | nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) |
3313 | { |
3314 | struct nfsmount *nmp = arg; |
3315 | struct nfs_socket *nso = nmp->nm_nso; |
3316 | size_t rcvlen; |
3317 | mbuf_t m; |
3318 | int error = 0; |
3319 | |
3320 | if (nmp->nm_sockflags & NMSOCK_CONNECTING) |
3321 | return; |
3322 | |
3323 | do { |
3324 | /* make sure we're on the current socket */ |
3325 | if (!nso || (nso->nso_so != so)) |
3326 | return; |
3327 | |
3328 | m = NULL; |
3329 | rcvlen = 1000000; |
3330 | error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); |
3331 | if (m) |
3332 | nfs_request_match_reply(nmp, m); |
3333 | } while (m && !error); |
3334 | |
3335 | if (error && (error != EWOULDBLOCK)) { |
3336 | /* problems with the socket... mark for reconnection */ |
3337 | NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n" , error); |
3338 | nfs_need_reconnect(nmp); |
3339 | } |
3340 | } |
3341 | |
3342 | /* NFS client TCP socket upcall */ |
3343 | void |
3344 | nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) |
3345 | { |
3346 | struct nfsmount *nmp = arg; |
3347 | struct nfs_socket *nso = nmp->nm_nso; |
3348 | struct nfs_rpc_record_state nrrs; |
3349 | mbuf_t m; |
3350 | int error = 0; |
3351 | int recv = 1; |
3352 | int wup = 0; |
3353 | |
3354 | if (nmp->nm_sockflags & NMSOCK_CONNECTING) |
3355 | return; |
3356 | |
3357 | /* make sure we're on the current socket */ |
3358 | lck_mtx_lock(&nmp->nm_lock); |
3359 | nso = nmp->nm_nso; |
3360 | if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) { |
3361 | lck_mtx_unlock(&nmp->nm_lock); |
3362 | return; |
3363 | } |
3364 | lck_mtx_unlock(&nmp->nm_lock); |
3365 | |
3366 | /* make sure this upcall should be trying to do work */ |
3367 | lck_mtx_lock(&nso->nso_lock); |
3368 | if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) { |
3369 | lck_mtx_unlock(&nso->nso_lock); |
3370 | return; |
3371 | } |
3372 | nso->nso_flags |= NSO_UPCALL; |
3373 | nrrs = nso->nso_rrs; |
3374 | lck_mtx_unlock(&nso->nso_lock); |
3375 | |
3376 | /* loop while we make error-free progress */ |
3377 | while (!error && recv) { |
3378 | error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m); |
3379 | if (m) /* match completed response with request */ |
3380 | nfs_request_match_reply(nmp, m); |
3381 | } |
3382 | |
3383 | /* Update the sockets's rpc parsing state */ |
3384 | lck_mtx_lock(&nso->nso_lock); |
3385 | nso->nso_rrs = nrrs; |
3386 | if (nso->nso_flags & NSO_DISCONNECTING) |
3387 | wup = 1; |
3388 | nso->nso_flags &= ~NSO_UPCALL; |
3389 | lck_mtx_unlock(&nso->nso_lock); |
3390 | if (wup) |
3391 | wakeup(&nso->nso_flags); |
3392 | |
3393 | #ifdef NFS_SOCKET_DEBUGGING |
3394 | if (!recv && (error != EWOULDBLOCK)) |
3395 | NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n" , error); |
3396 | #endif |
3397 | /* note: no error and no data indicates server closed its end */ |
3398 | if ((error != EWOULDBLOCK) && (error || !recv)) { |
3399 | /* problems with the socket... mark for reconnection */ |
3400 | NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n" , error); |
3401 | nfs_need_reconnect(nmp); |
3402 | } |
3403 | } |
3404 | |
3405 | /* |
3406 | * "poke" a socket to try to provoke any pending errors |
3407 | */ |
3408 | void |
3409 | nfs_sock_poke(struct nfsmount *nmp) |
3410 | { |
3411 | struct iovec aio; |
3412 | struct msghdr msg; |
3413 | size_t len; |
3414 | int error = 0; |
3415 | int dummy; |
3416 | |
3417 | lck_mtx_lock(&nmp->nm_lock); |
3418 | if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || |
3419 | !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) { |
3420 | /* Nothing to poke */ |
3421 | nmp->nm_sockflags &= ~NMSOCK_POKE; |
3422 | wakeup(&nmp->nm_sockflags); |
3423 | lck_mtx_unlock(&nmp->nm_lock); |
3424 | return; |
3425 | } |
3426 | lck_mtx_unlock(&nmp->nm_lock); |
3427 | aio.iov_base = &dummy; |
3428 | aio.iov_len = 0; |
3429 | len = 0; |
3430 | bzero(&msg, sizeof(msg)); |
3431 | msg.msg_iov = &aio; |
3432 | msg.msg_iovlen = 1; |
3433 | error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); |
3434 | NFS_SOCK_DBG("nfs_sock_poke: error %d\n" , error); |
3435 | lck_mtx_lock(&nmp->nm_lock); |
3436 | nmp->nm_sockflags &= ~NMSOCK_POKE; |
3437 | wakeup(&nmp->nm_sockflags); |
3438 | lck_mtx_unlock(&nmp->nm_lock); |
3439 | nfs_is_dead(error, nmp); |
3440 | } |
3441 | |
3442 | /* |
3443 | * Match an RPC reply with the corresponding request |
3444 | */ |
3445 | void |
3446 | nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) |
3447 | { |
3448 | struct nfsreq *req; |
3449 | struct nfsm_chain nmrep; |
3450 | u_int32_t reply = 0, rxid = 0; |
3451 | int error = 0, asyncioq, t1; |
3452 | |
3453 | /* Get the xid and check that it is an rpc reply */ |
3454 | nfsm_chain_dissect_init(error, &nmrep, mrep); |
3455 | nfsm_chain_get_32(error, &nmrep, rxid); |
3456 | nfsm_chain_get_32(error, &nmrep, reply); |
3457 | if (error || (reply != RPC_REPLY)) { |
3458 | OSAddAtomic64(1, &nfsstats.rpcinvalid); |
3459 | mbuf_freem(mrep); |
3460 | return; |
3461 | } |
3462 | |
3463 | /* |
3464 | * Loop through the request list to match up the reply |
3465 | * Iff no match, just drop it. |
3466 | */ |
3467 | lck_mtx_lock(nfs_request_mutex); |
3468 | TAILQ_FOREACH(req, &nfs_reqq, r_chain) { |
3469 | if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) |
3470 | continue; |
3471 | /* looks like we have it, grab lock and double check */ |
3472 | lck_mtx_lock(&req->r_mtx); |
3473 | if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) { |
3474 | lck_mtx_unlock(&req->r_mtx); |
3475 | continue; |
3476 | } |
3477 | /* Found it.. */ |
3478 | req->r_nmrep = nmrep; |
3479 | lck_mtx_lock(&nmp->nm_lock); |
3480 | if (nmp->nm_sotype == SOCK_DGRAM) { |
3481 | /* |
3482 | * Update congestion window. |
3483 | * Do the additive increase of one rpc/rtt. |
3484 | */ |
3485 | FSDBG(530, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); |
3486 | if (nmp->nm_cwnd <= nmp->nm_sent) { |
3487 | nmp->nm_cwnd += |
3488 | ((NFS_CWNDSCALE * NFS_CWNDSCALE) + |
3489 | (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; |
3490 | if (nmp->nm_cwnd > NFS_MAXCWND) |
3491 | nmp->nm_cwnd = NFS_MAXCWND; |
3492 | } |
3493 | if (req->r_flags & R_CWND) { |
3494 | nmp->nm_sent -= NFS_CWNDSCALE; |
3495 | req->r_flags &= ~R_CWND; |
3496 | } |
3497 | if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { |
3498 | /* congestion window is open, poke the cwnd queue */ |
3499 | struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); |
3500 | TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); |
3501 | req2->r_cchain.tqe_next = NFSREQNOLIST; |
3502 | wakeup(req2); |
3503 | } |
3504 | } |
3505 | /* |
3506 | * Update rtt using a gain of 0.125 on the mean |
3507 | * and a gain of 0.25 on the deviation. |
3508 | */ |
3509 | if (req->r_flags & R_TIMING) { |
3510 | /* |
3511 | * Since the timer resolution of |
3512 | * NFS_HZ is so course, it can often |
3513 | * result in r_rtt == 0. Since |
3514 | * r_rtt == N means that the actual |
3515 | * rtt is between N+dt and N+2-dt ticks, |
3516 | * add 1. |
3517 | */ |
3518 | if (proct[req->r_procnum] == 0) |
3519 | panic("nfs_request_match_reply: proct[%d] is zero" , req->r_procnum); |
3520 | t1 = req->r_rtt + 1; |
3521 | t1 -= (NFS_SRTT(req) >> 3); |
3522 | NFS_SRTT(req) += t1; |
3523 | if (t1 < 0) |
3524 | t1 = -t1; |
3525 | t1 -= (NFS_SDRTT(req) >> 2); |
3526 | NFS_SDRTT(req) += t1; |
3527 | } |
3528 | nmp->nm_timeouts = 0; |
3529 | lck_mtx_unlock(&nmp->nm_lock); |
3530 | /* signal anyone waiting on this request */ |
3531 | wakeup(req); |
3532 | asyncioq = (req->r_callback.rcb_func != NULL); |
3533 | if (nfs_request_using_gss(req)) |
3534 | nfs_gss_clnt_rpcdone(req); |
3535 | lck_mtx_unlock(&req->r_mtx); |
3536 | lck_mtx_unlock(nfs_request_mutex); |
3537 | /* if it's an async RPC with a callback, queue it up */ |
3538 | if (asyncioq) |
3539 | nfs_asyncio_finish(req); |
3540 | break; |
3541 | } |
3542 | |
3543 | if (!req) { |
3544 | /* not matched to a request, so drop it. */ |
3545 | lck_mtx_unlock(nfs_request_mutex); |
3546 | OSAddAtomic64(1, &nfsstats.rpcunexpected); |
3547 | mbuf_freem(mrep); |
3548 | } |
3549 | } |
3550 | |
3551 | /* |
3552 | * Wait for the reply for a given request... |
3553 | * ...potentially resending the request if necessary. |
3554 | */ |
3555 | int |
3556 | nfs_wait_reply(struct nfsreq *req) |
3557 | { |
3558 | struct timespec ts = { 2, 0 }; |
3559 | int error = 0, slpflag, first = 1; |
3560 | |
3561 | if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) |
3562 | slpflag = PCATCH; |
3563 | else |
3564 | slpflag = 0; |
3565 | |
3566 | lck_mtx_lock(&req->r_mtx); |
3567 | while (!req->r_nmrep.nmc_mhead) { |
3568 | if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0))) |
3569 | break; |
3570 | if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) |
3571 | break; |
3572 | /* check if we need to resend */ |
3573 | if (req->r_flags & R_MUSTRESEND) { |
3574 | NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n" , |
3575 | req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); |
3576 | req->r_flags |= R_SENDING; |
3577 | lck_mtx_unlock(&req->r_mtx); |
3578 | if (nfs_request_using_gss(req)) { |
3579 | /* |
3580 | * It's an RPCSEC_GSS request. |
3581 | * Can't just resend the original request |
3582 | * without bumping the cred sequence number. |
3583 | * Go back and re-build the request. |
3584 | */ |
3585 | lck_mtx_lock(&req->r_mtx); |
3586 | req->r_flags &= ~R_SENDING; |
3587 | lck_mtx_unlock(&req->r_mtx); |
3588 | return (EAGAIN); |
3589 | } |
3590 | error = nfs_send(req, 1); |
3591 | lck_mtx_lock(&req->r_mtx); |
3592 | NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n" , |
3593 | req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error); |
3594 | if (error) |
3595 | break; |
3596 | if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) |
3597 | break; |
3598 | } |
3599 | /* need to poll if we're P_NOREMOTEHANG */ |
3600 | if (nfs_noremotehang(req->r_thread)) |
3601 | ts.tv_sec = 1; |
3602 | msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply" , &ts); |
3603 | first = slpflag = 0; |
3604 | } |
3605 | lck_mtx_unlock(&req->r_mtx); |
3606 | |
3607 | return (error); |
3608 | } |
3609 | |
3610 | /* |
3611 | * An NFS request goes something like this: |
3612 | * (nb: always frees up mreq mbuf list) |
3613 | * nfs_request_create() |
3614 | * - allocates a request struct if one is not provided |
3615 | * - initial fill-in of the request struct |
3616 | * nfs_request_add_header() |
3617 | * - add the RPC header |
3618 | * nfs_request_send() |
3619 | * - link it into list |
3620 | * - call nfs_send() for first transmit |
3621 | * nfs_request_wait() |
3622 | * - call nfs_wait_reply() to wait for the reply |
3623 | * nfs_request_finish() |
3624 | * - break down rpc header and return with error or nfs reply |
3625 | * pointed to by nmrep. |
3626 | * nfs_request_rele() |
3627 | * nfs_request_destroy() |
3628 | * - clean up the request struct |
3629 | * - free the request struct if it was allocated by nfs_request_create() |
3630 | */ |
3631 | |
3632 | /* |
3633 | * Set up an NFS request struct (allocating if no request passed in). |
3634 | */ |
3635 | int |
3636 | nfs_request_create( |
3637 | nfsnode_t np, |
3638 | mount_t mp, /* used only if !np */ |
3639 | struct nfsm_chain *nmrest, |
3640 | int procnum, |
3641 | thread_t thd, |
3642 | kauth_cred_t cred, |
3643 | struct nfsreq **reqp) |
3644 | { |
3645 | struct nfsreq *req, *newreq = NULL; |
3646 | struct nfsmount *nmp; |
3647 | |
3648 | req = *reqp; |
3649 | if (!req) { |
3650 | /* allocate a new NFS request structure */ |
3651 | MALLOC_ZONE(newreq, struct nfsreq*, sizeof(*newreq), M_NFSREQ, M_WAITOK); |
3652 | if (!newreq) { |
3653 | mbuf_freem(nmrest->nmc_mhead); |
3654 | nmrest->nmc_mhead = NULL; |
3655 | return (ENOMEM); |
3656 | } |
3657 | req = newreq; |
3658 | } |
3659 | |
3660 | bzero(req, sizeof(*req)); |
3661 | if (req == newreq) |
3662 | req->r_flags = R_ALLOCATED; |
3663 | |
3664 | nmp = VFSTONFS(np ? NFSTOMP(np) : mp); |
3665 | if (nfs_mount_gone(nmp)) { |
3666 | if (newreq) |
3667 | FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); |
3668 | return (ENXIO); |
3669 | } |
3670 | lck_mtx_lock(&nmp->nm_lock); |
3671 | if ((nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) && |
3672 | (nmp->nm_state & NFSSTA_TIMEO)) { |
3673 | lck_mtx_unlock(&nmp->nm_lock); |
3674 | mbuf_freem(nmrest->nmc_mhead); |
3675 | nmrest->nmc_mhead = NULL; |
3676 | if (newreq) |
3677 | FREE_ZONE(newreq, sizeof(*newreq), M_NFSREQ); |
3678 | return (ENXIO); |
3679 | } |
3680 | |
3681 | if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) |
3682 | OSAddAtomic64(1, &nfsstats.rpccnt[procnum]); |
3683 | if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) |
3684 | panic("nfs_request: invalid NFSv4 RPC request %d\n" , procnum); |
3685 | |
3686 | lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL); |
3687 | req->r_nmp = nmp; |
3688 | nmp->nm_ref++; |
3689 | req->r_np = np; |
3690 | req->r_thread = thd; |
3691 | if (!thd) |
3692 | req->r_flags |= R_NOINTR; |
3693 | if (IS_VALID_CRED(cred)) { |
3694 | kauth_cred_ref(cred); |
3695 | req->r_cred = cred; |
3696 | } |
3697 | req->r_procnum = procnum; |
3698 | if (proct[procnum] > 0) |
3699 | req->r_flags |= R_TIMING; |
3700 | req->r_nmrep.nmc_mhead = NULL; |
3701 | SLIST_INIT(&req->r_gss_seqlist); |
3702 | req->r_achain.tqe_next = NFSREQNOLIST; |
3703 | req->r_rchain.tqe_next = NFSREQNOLIST; |
3704 | req->r_cchain.tqe_next = NFSREQNOLIST; |
3705 | |
3706 | /* set auth flavor to use for request */ |
3707 | if (!req->r_cred) |
3708 | req->r_auth = RPCAUTH_NONE; |
3709 | else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID)) |
3710 | req->r_auth = req->r_np->n_auth; |
3711 | else |
3712 | req->r_auth = nmp->nm_auth; |
3713 | |
3714 | lck_mtx_unlock(&nmp->nm_lock); |
3715 | |
3716 | /* move the request mbuf chain to the nfsreq */ |
3717 | req->r_mrest = nmrest->nmc_mhead; |
3718 | nmrest->nmc_mhead = NULL; |
3719 | |
3720 | req->r_flags |= R_INITTED; |
3721 | req->r_refs = 1; |
3722 | if (newreq) |
3723 | *reqp = req; |
3724 | return (0); |
3725 | } |
3726 | |
3727 | /* |
3728 | * Clean up and free an NFS request structure. |
3729 | */ |
3730 | void |
3731 | nfs_request_destroy(struct nfsreq *req) |
3732 | { |
3733 | struct nfsmount *nmp; |
3734 | struct gss_seq *gsp, *ngsp; |
3735 | int clearjbtimeo = 0; |
3736 | |
3737 | if (!req || !(req->r_flags & R_INITTED)) |
3738 | return; |
3739 | nmp = req->r_nmp; |
3740 | req->r_flags &= ~R_INITTED; |
3741 | if (req->r_lflags & RL_QUEUED) |
3742 | nfs_reqdequeue(req); |
3743 | |
3744 | if (req->r_achain.tqe_next != NFSREQNOLIST) { |
3745 | /* |
3746 | * Still on an async I/O queue? |
3747 | * %%% But which one, we may be on a local iod. |
3748 | */ |
3749 | lck_mtx_lock(nfsiod_mutex); |
3750 | if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) { |
3751 | TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain); |
3752 | req->r_achain.tqe_next = NFSREQNOLIST; |
3753 | } |
3754 | lck_mtx_unlock(nfsiod_mutex); |
3755 | } |
3756 | |
3757 | lck_mtx_lock(&req->r_mtx); |
3758 | if (nmp) { |
3759 | lck_mtx_lock(&nmp->nm_lock); |
3760 | if (req->r_flags & R_CWND) { |
3761 | /* Decrement the outstanding request count. */ |
3762 | req->r_flags &= ~R_CWND; |
3763 | nmp->nm_sent -= NFS_CWNDSCALE; |
3764 | if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { |
3765 | /* congestion window is open, poke the cwnd queue */ |
3766 | struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); |
3767 | TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); |
3768 | req2->r_cchain.tqe_next = NFSREQNOLIST; |
3769 | wakeup(req2); |
3770 | } |
3771 | } |
3772 | assert((req->r_flags & R_RESENDQ) == 0); |
3773 | /* XXX should we just remove this conditional, we should have a reference if we're resending */ |
3774 | if (req->r_rchain.tqe_next != NFSREQNOLIST) { |
3775 | TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); |
3776 | req->r_rchain.tqe_next = NFSREQNOLIST; |
3777 | if (req->r_flags & R_RESENDQ) |
3778 | req->r_flags &= ~R_RESENDQ; |
3779 | } |
3780 | if (req->r_cchain.tqe_next != NFSREQNOLIST) { |
3781 | TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); |
3782 | req->r_cchain.tqe_next = NFSREQNOLIST; |
3783 | } |
3784 | if (req->r_flags & R_JBTPRINTFMSG) { |
3785 | req->r_flags &= ~R_JBTPRINTFMSG; |
3786 | nmp->nm_jbreqs--; |
3787 | clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; |
3788 | } |
3789 | lck_mtx_unlock(&nmp->nm_lock); |
3790 | } |
3791 | lck_mtx_unlock(&req->r_mtx); |
3792 | |
3793 | if (clearjbtimeo) |
3794 | nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); |
3795 | if (req->r_mhead) |
3796 | mbuf_freem(req->r_mhead); |
3797 | else if (req->r_mrest) |
3798 | mbuf_freem(req->r_mrest); |
3799 | if (req->r_nmrep.nmc_mhead) |
3800 | mbuf_freem(req->r_nmrep.nmc_mhead); |
3801 | if (IS_VALID_CRED(req->r_cred)) |
3802 | kauth_cred_unref(&req->r_cred); |
3803 | if (nfs_request_using_gss(req)) |
3804 | nfs_gss_clnt_rpcdone(req); |
3805 | SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp) |
3806 | FREE(gsp, M_TEMP); |
3807 | if (req->r_gss_ctx) |
3808 | nfs_gss_clnt_ctx_unref(req); |
3809 | if (req->r_wrongsec) |
3810 | FREE(req->r_wrongsec, M_TEMP); |
3811 | if (nmp) |
3812 | nfs_mount_rele(nmp); |
3813 | lck_mtx_destroy(&req->r_mtx, nfs_request_grp); |
3814 | if (req->r_flags & R_ALLOCATED) |
3815 | FREE_ZONE(req, sizeof(*req), M_NFSREQ); |
3816 | } |
3817 | |
3818 | void |
3819 | nfs_request_ref(struct nfsreq *req, int locked) |
3820 | { |
3821 | if (!locked) |
3822 | lck_mtx_lock(&req->r_mtx); |
3823 | if (req->r_refs <= 0) |
3824 | panic("nfsreq reference error" ); |
3825 | req->r_refs++; |
3826 | if (!locked) |
3827 | lck_mtx_unlock(&req->r_mtx); |
3828 | } |
3829 | |
3830 | void |
3831 | nfs_request_rele(struct nfsreq *req) |
3832 | { |
3833 | int destroy; |
3834 | |
3835 | lck_mtx_lock(&req->r_mtx); |
3836 | if (req->r_refs <= 0) |
3837 | panic("nfsreq reference underflow" ); |
3838 | req->r_refs--; |
3839 | destroy = (req->r_refs == 0); |
3840 | lck_mtx_unlock(&req->r_mtx); |
3841 | if (destroy) |
3842 | nfs_request_destroy(req); |
3843 | } |
3844 | |
3845 | |
3846 | /* |
3847 | * Add an (updated) RPC header with authorization to an NFS request. |
3848 | */ |
3849 | int |
3850 | (struct nfsreq *req) |
3851 | { |
3852 | struct nfsmount *nmp; |
3853 | int error = 0; |
3854 | mbuf_t m; |
3855 | |
3856 | /* free up any previous header */ |
3857 | if ((m = req->r_mhead)) { |
3858 | while (m && (m != req->r_mrest)) |
3859 | m = mbuf_free(m); |
3860 | req->r_mhead = NULL; |
3861 | } |
3862 | |
3863 | nmp = req->r_nmp; |
3864 | if (nfs_mount_gone(nmp)) |
3865 | return (ENXIO); |
3866 | |
3867 | error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead); |
3868 | if (error) |
3869 | return (error); |
3870 | |
3871 | req->r_mreqlen = mbuf_pkthdr_len(req->r_mhead); |
3872 | nmp = req->r_nmp; |
3873 | if (nfs_mount_gone(nmp)) |
3874 | return (ENXIO); |
3875 | lck_mtx_lock(&nmp->nm_lock); |
3876 | if (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) |
3877 | req->r_retry = nmp->nm_retry; |
3878 | else |
3879 | req->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ |
3880 | lck_mtx_unlock(&nmp->nm_lock); |
3881 | |
3882 | return (error); |
3883 | } |
3884 | |
3885 | |
3886 | /* |
3887 | * Queue an NFS request up and send it out. |
3888 | */ |
3889 | int |
3890 | nfs_request_send(struct nfsreq *req, int wait) |
3891 | { |
3892 | struct nfsmount *nmp; |
3893 | struct timeval now; |
3894 | |
3895 | lck_mtx_lock(&req->r_mtx); |
3896 | req->r_flags |= R_SENDING; |
3897 | lck_mtx_unlock(&req->r_mtx); |
3898 | |
3899 | lck_mtx_lock(nfs_request_mutex); |
3900 | |
3901 | nmp = req->r_nmp; |
3902 | if (nfs_mount_gone(nmp)) { |
3903 | lck_mtx_unlock(nfs_request_mutex); |
3904 | return (ENXIO); |
3905 | } |
3906 | |
3907 | microuptime(&now); |
3908 | if (!req->r_start) { |
3909 | req->r_start = now.tv_sec; |
3910 | req->r_lastmsg = now.tv_sec - |
3911 | ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); |
3912 | } |
3913 | |
3914 | OSAddAtomic64(1, &nfsstats.rpcrequests); |
3915 | |
3916 | /* |
3917 | * Chain request into list of outstanding requests. Be sure |
3918 | * to put it LAST so timer finds oldest requests first. |
3919 | * Make sure that the request queue timer is running |
3920 | * to check for possible request timeout. |
3921 | */ |
3922 | TAILQ_INSERT_TAIL(&nfs_reqq, req, r_chain); |
3923 | req->r_lflags |= RL_QUEUED; |
3924 | if (!nfs_request_timer_on) { |
3925 | nfs_request_timer_on = 1; |
3926 | nfs_interval_timer_start(nfs_request_timer_call, |
3927 | NFS_REQUESTDELAY); |
3928 | } |
3929 | lck_mtx_unlock(nfs_request_mutex); |
3930 | |
3931 | /* Send the request... */ |
3932 | return (nfs_send(req, wait)); |
3933 | } |
3934 | |
3935 | /* |
3936 | * Call nfs_wait_reply() to wait for the reply. |
3937 | */ |
3938 | void |
3939 | nfs_request_wait(struct nfsreq *req) |
3940 | { |
3941 | req->r_error = nfs_wait_reply(req); |
3942 | } |
3943 | |
3944 | /* |
3945 | * Finish up an NFS request by dequeueing it and |
3946 | * doing the initial NFS request reply processing. |
3947 | */ |
3948 | int |
3949 | nfs_request_finish( |
3950 | struct nfsreq *req, |
3951 | struct nfsm_chain *nmrepp, |
3952 | int *status) |
3953 | { |
3954 | struct nfsmount *nmp; |
3955 | mbuf_t mrep; |
3956 | int verf_type = 0; |
3957 | uint32_t verf_len = 0; |
3958 | uint32_t reply_status = 0; |
3959 | uint32_t rejected_status = 0; |
3960 | uint32_t auth_status = 0; |
3961 | uint32_t accepted_status = 0; |
3962 | struct nfsm_chain nmrep; |
3963 | int error, clearjbtimeo; |
3964 | |
3965 | error = req->r_error; |
3966 | |
3967 | if (nmrepp) |
3968 | nmrepp->nmc_mhead = NULL; |
3969 | |
3970 | /* RPC done, unlink the request. */ |
3971 | nfs_reqdequeue(req); |
3972 | |
3973 | mrep = req->r_nmrep.nmc_mhead; |
3974 | |
3975 | nmp = req->r_nmp; |
3976 | |
3977 | if ((req->r_flags & R_CWND) && nmp) { |
3978 | /* |
3979 | * Decrement the outstanding request count. |
3980 | */ |
3981 | req->r_flags &= ~R_CWND; |
3982 | lck_mtx_lock(&nmp->nm_lock); |
3983 | FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); |
3984 | nmp->nm_sent -= NFS_CWNDSCALE; |
3985 | if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { |
3986 | /* congestion window is open, poke the cwnd queue */ |
3987 | struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); |
3988 | TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); |
3989 | req2->r_cchain.tqe_next = NFSREQNOLIST; |
3990 | wakeup(req2); |
3991 | } |
3992 | lck_mtx_unlock(&nmp->nm_lock); |
3993 | } |
3994 | |
3995 | if (nfs_request_using_gss(req)) { |
3996 | /* |
3997 | * If the request used an RPCSEC_GSS credential |
3998 | * then reset its sequence number bit in the |
3999 | * request window. |
4000 | */ |
4001 | nfs_gss_clnt_rpcdone(req); |
4002 | |
4003 | /* |
4004 | * If we need to re-send, go back and re-build the |
4005 | * request based on a new sequence number. |
4006 | * Note that we're using the original XID. |
4007 | */ |
4008 | if (error == EAGAIN) { |
4009 | req->r_error = 0; |
4010 | if (mrep) |
4011 | mbuf_freem(mrep); |
4012 | error = nfs_gss_clnt_args_restore(req); // remove any trailer mbufs |
4013 | req->r_nmrep.nmc_mhead = NULL; |
4014 | req->r_flags |= R_RESTART; |
4015 | if (error == ENEEDAUTH) { |
4016 | req->r_xid = 0; // get a new XID |
4017 | error = 0; |
4018 | } |
4019 | goto nfsmout; |
4020 | } |
4021 | } |
4022 | |
4023 | /* |
4024 | * If there was a successful reply, make sure to mark the mount as up. |
4025 | * If a tprintf message was given (or if this is a timed-out soft mount) |
4026 | * then post a tprintf message indicating the server is alive again. |
4027 | */ |
4028 | if (!error) { |
4029 | if ((req->r_flags & R_TPRINTFMSG) || |
4030 | (nmp && (NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && |
4031 | ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE|NFSSTA_DEAD)) == NFSSTA_TIMEO))) |
4032 | nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again" ); |
4033 | else |
4034 | nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, NULL); |
4035 | } |
4036 | if (!error && !nmp) |
4037 | error = ENXIO; |
4038 | nfsmout_if(error); |
4039 | |
4040 | /* |
4041 | * break down the RPC header and check if ok |
4042 | */ |
4043 | nmrep = req->r_nmrep; |
4044 | nfsm_chain_get_32(error, &nmrep, reply_status); |
4045 | nfsmout_if(error); |
4046 | if (reply_status == RPC_MSGDENIED) { |
4047 | nfsm_chain_get_32(error, &nmrep, rejected_status); |
4048 | nfsmout_if(error); |
4049 | if (rejected_status == RPC_MISMATCH) { |
4050 | error = ENOTSUP; |
4051 | goto nfsmout; |
4052 | } |
4053 | nfsm_chain_get_32(error, &nmrep, auth_status); |
4054 | nfsmout_if(error); |
4055 | switch (auth_status) { |
4056 | case RPCSEC_GSS_CREDPROBLEM: |
4057 | case RPCSEC_GSS_CTXPROBLEM: |
4058 | /* |
4059 | * An RPCSEC_GSS cred or context problem. |
4060 | * We can't use it anymore. |
4061 | * Restore the args, renew the context |
4062 | * and set up for a resend. |
4063 | */ |
4064 | error = nfs_gss_clnt_args_restore(req); |
4065 | if (error && error != ENEEDAUTH) |
4066 | break; |
4067 | |
4068 | if (!error) { |
4069 | error = nfs_gss_clnt_ctx_renew(req); |
4070 | if (error) |
4071 | break; |
4072 | } |
4073 | mbuf_freem(mrep); |
4074 | req->r_nmrep.nmc_mhead = NULL; |
4075 | req->r_xid = 0; // get a new XID |
4076 | req->r_flags |= R_RESTART; |
4077 | goto nfsmout; |
4078 | default: |
4079 | error = EACCES; |
4080 | break; |
4081 | } |
4082 | goto nfsmout; |
4083 | } |
4084 | |
4085 | /* Now check the verifier */ |
4086 | nfsm_chain_get_32(error, &nmrep, verf_type); // verifier flavor |
4087 | nfsm_chain_get_32(error, &nmrep, verf_len); // verifier length |
4088 | nfsmout_if(error); |
4089 | |
4090 | switch (req->r_auth) { |
4091 | case RPCAUTH_NONE: |
4092 | case RPCAUTH_SYS: |
4093 | /* Any AUTH_SYS verifier is ignored */ |
4094 | if (verf_len > 0) |
4095 | nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len)); |
4096 | nfsm_chain_get_32(error, &nmrep, accepted_status); |
4097 | break; |
4098 | case RPCAUTH_KRB5: |
4099 | case RPCAUTH_KRB5I: |
4100 | case RPCAUTH_KRB5P: |
4101 | error = nfs_gss_clnt_verf_get(req, &nmrep, |
4102 | verf_type, verf_len, &accepted_status); |
4103 | break; |
4104 | } |
4105 | nfsmout_if(error); |
4106 | |
4107 | switch (accepted_status) { |
4108 | case RPC_SUCCESS: |
4109 | if (req->r_procnum == NFSPROC_NULL) { |
4110 | /* |
4111 | * The NFS null procedure is unique, |
4112 | * in not returning an NFS status. |
4113 | */ |
4114 | *status = NFS_OK; |
4115 | } else { |
4116 | nfsm_chain_get_32(error, &nmrep, *status); |
4117 | nfsmout_if(error); |
4118 | } |
4119 | |
4120 | if ((nmp->nm_vers != NFS_VER2) && (*status == NFSERR_TRYLATER)) { |
4121 | /* |
4122 | * It's a JUKEBOX error - delay and try again |
4123 | */ |
4124 | int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; |
4125 | |
4126 | mbuf_freem(mrep); |
4127 | req->r_nmrep.nmc_mhead = NULL; |
4128 | if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) { |
4129 | /* we're not yet completely mounted and */ |
4130 | /* we can't complete an RPC, so we fail */ |
4131 | OSAddAtomic64(1, &nfsstats.rpctimeouts); |
4132 | nfs_softterm(req); |
4133 | error = req->r_error; |
4134 | goto nfsmout; |
4135 | } |
4136 | req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2); |
4137 | if (req->r_delay > 30) |
4138 | req->r_delay = 30; |
4139 | if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) { |
4140 | if (!(req->r_flags & R_JBTPRINTFMSG)) { |
4141 | req->r_flags |= R_JBTPRINTFMSG; |
4142 | lck_mtx_lock(&nmp->nm_lock); |
4143 | nmp->nm_jbreqs++; |
4144 | lck_mtx_unlock(&nmp->nm_lock); |
4145 | } |
4146 | nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, |
4147 | "resource temporarily unavailable (jukebox)" , 0); |
4148 | } |
4149 | if ((NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && (req->r_delay == 30) && |
4150 | !(req->r_flags & R_NOINTR)) { |
4151 | /* for soft mounts, just give up after a short while */ |
4152 | OSAddAtomic64(1, &nfsstats.rpctimeouts); |
4153 | nfs_softterm(req); |
4154 | error = req->r_error; |
4155 | goto nfsmout; |
4156 | } |
4157 | delay = req->r_delay; |
4158 | if (req->r_callback.rcb_func) { |
4159 | struct timeval now; |
4160 | microuptime(&now); |
4161 | req->r_resendtime = now.tv_sec + delay; |
4162 | } else { |
4163 | do { |
4164 | if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) |
4165 | goto nfsmout; |
4166 | tsleep(nfs_request_finish, PSOCK|slpflag, "nfs_jukebox_trylater" , hz); |
4167 | slpflag = 0; |
4168 | } while (--delay > 0); |
4169 | } |
4170 | req->r_xid = 0; // get a new XID |
4171 | req->r_flags |= R_RESTART; |
4172 | req->r_start = 0; |
4173 | FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_TRYLATER); |
4174 | return (0); |
4175 | } |
4176 | |
4177 | if (req->r_flags & R_JBTPRINTFMSG) { |
4178 | req->r_flags &= ~R_JBTPRINTFMSG; |
4179 | lck_mtx_lock(&nmp->nm_lock); |
4180 | nmp->nm_jbreqs--; |
4181 | clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; |
4182 | lck_mtx_unlock(&nmp->nm_lock); |
4183 | nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again" ); |
4184 | } |
4185 | |
4186 | if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) { |
4187 | /* |
4188 | * Hmmm... we need to try a different security flavor. |
4189 | * The first time a request hits this, we will allocate an array |
4190 | * to track flavors to try. We fill the array with the mount's |
4191 | * preferred flavors or the server's preferred flavors or just the |
4192 | * flavors we support. |
4193 | */ |
4194 | uint32_t srvflavors[NX_MAX_SEC_FLAVORS]; |
4195 | int srvcount, i, j; |
4196 | |
4197 | /* Call SECINFO to try to get list of flavors from server. */ |
4198 | srvcount = NX_MAX_SEC_FLAVORS; |
4199 | nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount); |
4200 | |
4201 | if (!req->r_wrongsec) { |
4202 | /* first time... set up flavor array */ |
4203 | MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK); |
4204 | if (!req->r_wrongsec) { |
4205 | error = EACCES; |
4206 | goto nfsmout; |
4207 | } |
4208 | i=0; |
4209 | if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */ |
4210 | for(; i < nmp->nm_sec.count; i++) |
4211 | req->r_wrongsec[i] = nmp->nm_sec.flavors[i]; |
4212 | } else if (srvcount) { /* otherwise use the server's list of flavors */ |
4213 | for(; i < srvcount; i++) |
4214 | req->r_wrongsec[i] = srvflavors[i]; |
4215 | } else { /* otherwise, just try the flavors we support. */ |
4216 | req->r_wrongsec[i++] = RPCAUTH_KRB5P; |
4217 | req->r_wrongsec[i++] = RPCAUTH_KRB5I; |
4218 | req->r_wrongsec[i++] = RPCAUTH_KRB5; |
4219 | req->r_wrongsec[i++] = RPCAUTH_SYS; |
4220 | req->r_wrongsec[i++] = RPCAUTH_NONE; |
4221 | } |
4222 | for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */ |
4223 | req->r_wrongsec[i] = RPCAUTH_INVALID; |
4224 | } |
4225 | |
4226 | /* clear the current flavor from the list */ |
4227 | for(i=0; i < NX_MAX_SEC_FLAVORS; i++) |
4228 | if (req->r_wrongsec[i] == req->r_auth) |
4229 | req->r_wrongsec[i] = RPCAUTH_INVALID; |
4230 | |
4231 | /* find the next flavor to try */ |
4232 | for(i=0; i < NX_MAX_SEC_FLAVORS; i++) |
4233 | if (req->r_wrongsec[i] != RPCAUTH_INVALID) { |
4234 | if (!srvcount) /* no server list, just try it */ |
4235 | break; |
4236 | /* check that it's in the server's list */ |
4237 | for(j=0; j < srvcount; j++) |
4238 | if (req->r_wrongsec[i] == srvflavors[j]) |
4239 | break; |
4240 | if (j < srvcount) /* found */ |
4241 | break; |
4242 | /* not found in server list */ |
4243 | req->r_wrongsec[i] = RPCAUTH_INVALID; |
4244 | } |
4245 | if (i == NX_MAX_SEC_FLAVORS) { |
4246 | /* nothing left to try! */ |
4247 | error = EACCES; |
4248 | goto nfsmout; |
4249 | } |
4250 | |
4251 | /* retry with the next auth flavor */ |
4252 | req->r_auth = req->r_wrongsec[i]; |
4253 | req->r_xid = 0; // get a new XID |
4254 | req->r_flags |= R_RESTART; |
4255 | req->r_start = 0; |
4256 | FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC); |
4257 | return (0); |
4258 | } |
4259 | if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) { |
4260 | /* |
4261 | * We renegotiated security for this request; so update the |
4262 | * default security flavor for the associated node. |
4263 | */ |
4264 | if (req->r_np) |
4265 | req->r_np->n_auth = req->r_auth; |
4266 | } |
4267 | |
4268 | if (*status == NFS_OK) { |
4269 | /* |
4270 | * Successful NFS request |
4271 | */ |
4272 | *nmrepp = nmrep; |
4273 | req->r_nmrep.nmc_mhead = NULL; |
4274 | break; |
4275 | } |
4276 | /* Got an NFS error of some kind */ |
4277 | |
4278 | /* |
4279 | * If the File Handle was stale, invalidate the |
4280 | * lookup cache, just in case. |
4281 | */ |
4282 | if ((*status == ESTALE) && req->r_np) { |
4283 | cache_purge(NFSTOV(req->r_np)); |
4284 | /* if monitored, also send delete event */ |
4285 | if (vnode_ismonitored(NFSTOV(req->r_np))) |
4286 | nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE)); |
4287 | } |
4288 | if (nmp->nm_vers == NFS_VER2) |
4289 | mbuf_freem(mrep); |
4290 | else |
4291 | *nmrepp = nmrep; |
4292 | req->r_nmrep.nmc_mhead = NULL; |
4293 | error = 0; |
4294 | break; |
4295 | case RPC_PROGUNAVAIL: |
4296 | error = EPROGUNAVAIL; |
4297 | break; |
4298 | case RPC_PROGMISMATCH: |
4299 | error = ERPCMISMATCH; |
4300 | break; |
4301 | case RPC_PROCUNAVAIL: |
4302 | error = EPROCUNAVAIL; |
4303 | break; |
4304 | case RPC_GARBAGE: |
4305 | error = EBADRPC; |
4306 | break; |
4307 | case RPC_SYSTEM_ERR: |
4308 | default: |
4309 | error = EIO; |
4310 | break; |
4311 | } |
4312 | nfsmout: |
4313 | if (req->r_flags & R_JBTPRINTFMSG) { |
4314 | req->r_flags &= ~R_JBTPRINTFMSG; |
4315 | lck_mtx_lock(&nmp->nm_lock); |
4316 | nmp->nm_jbreqs--; |
4317 | clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; |
4318 | lck_mtx_unlock(&nmp->nm_lock); |
4319 | if (clearjbtimeo) |
4320 | nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); |
4321 | } |
4322 | FSDBG(273, R_XID32(req->r_xid), nmp, req, |
4323 | (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error); |
4324 | return (error); |
4325 | } |
4326 | |
4327 | /* |
4328 | * NFS request using a GSS/Kerberos security flavor? |
4329 | */ |
4330 | int |
4331 | nfs_request_using_gss(struct nfsreq *req) |
4332 | { |
4333 | if (!req->r_gss_ctx) |
4334 | return (0); |
4335 | switch (req->r_auth) { |
4336 | case RPCAUTH_KRB5: |
4337 | case RPCAUTH_KRB5I: |
4338 | case RPCAUTH_KRB5P: |
4339 | return (1); |
4340 | } |
4341 | return (0); |
4342 | } |
4343 | |
4344 | /* |
4345 | * Perform an NFS request synchronously. |
4346 | */ |
4347 | |
4348 | int |
4349 | nfs_request( |
4350 | nfsnode_t np, |
4351 | mount_t mp, /* used only if !np */ |
4352 | struct nfsm_chain *nmrest, |
4353 | int procnum, |
4354 | vfs_context_t ctx, |
4355 | struct nfsreq_secinfo_args *si, |
4356 | struct nfsm_chain *nmrepp, |
4357 | u_int64_t *xidp, |
4358 | int *status) |
4359 | { |
4360 | return nfs_request2(np, mp, nmrest, procnum, |
4361 | vfs_context_thread(ctx), vfs_context_ucred(ctx), |
4362 | si, 0, nmrepp, xidp, status); |
4363 | } |
4364 | |
4365 | int |
4366 | nfs_request2( |
4367 | nfsnode_t np, |
4368 | mount_t mp, /* used only if !np */ |
4369 | struct nfsm_chain *nmrest, |
4370 | int procnum, |
4371 | thread_t thd, |
4372 | kauth_cred_t cred, |
4373 | struct nfsreq_secinfo_args *si, |
4374 | int flags, |
4375 | struct nfsm_chain *nmrepp, |
4376 | u_int64_t *xidp, |
4377 | int *status) |
4378 | { |
4379 | struct nfsreq rq, *req = &rq; |
4380 | int error; |
4381 | |
4382 | if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req))) |
4383 | return (error); |
4384 | req->r_flags |= (flags & (R_OPTMASK | R_SOFT)); |
4385 | if (si) |
4386 | req->r_secinfo = *si; |
4387 | |
4388 | FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0); |
4389 | do { |
4390 | req->r_error = 0; |
4391 | req->r_flags &= ~R_RESTART; |
4392 | if ((error = nfs_request_add_header(req))) |
4393 | break; |
4394 | if (xidp) |
4395 | *xidp = req->r_xid; |
4396 | if ((error = nfs_request_send(req, 1))) |
4397 | break; |
4398 | nfs_request_wait(req); |
4399 | if ((error = nfs_request_finish(req, nmrepp, status))) |
4400 | break; |
4401 | } while (req->r_flags & R_RESTART); |
4402 | |
4403 | FSDBG_BOT(273, R_XID32(req->r_xid), np, procnum, error); |
4404 | nfs_request_rele(req); |
4405 | return (error); |
4406 | } |
4407 | |
4408 | |
4409 | /* |
4410 | * Set up a new null proc request to exchange GSS context tokens with the |
4411 | * server. Associate the context that we are setting up with the request that we |
4412 | * are sending. |
4413 | */ |
4414 | |
4415 | int |
4416 | nfs_request_gss( |
4417 | mount_t mp, |
4418 | struct nfsm_chain *nmrest, |
4419 | thread_t thd, |
4420 | kauth_cred_t cred, |
4421 | int flags, |
4422 | struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */ |
4423 | struct nfsm_chain *nmrepp, |
4424 | int *status) |
4425 | { |
4426 | struct nfsreq rq, *req = &rq; |
4427 | int error, wait = 1; |
4428 | |
4429 | if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) |
4430 | return (error); |
4431 | req->r_flags |= (flags & R_OPTMASK); |
4432 | |
4433 | if (cp == NULL) { |
4434 | printf("nfs_request_gss request has no context\n" ); |
4435 | nfs_request_rele(req); |
4436 | return (NFSERR_EAUTH); |
4437 | } |
4438 | nfs_gss_clnt_ctx_ref(req, cp); |
4439 | |
4440 | /* |
4441 | * Don't wait for a reply to a context destroy advisory |
4442 | * to avoid hanging on a dead server. |
4443 | */ |
4444 | if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY) |
4445 | wait = 0; |
4446 | |
4447 | FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); |
4448 | do { |
4449 | req->r_error = 0; |
4450 | req->r_flags &= ~R_RESTART; |
4451 | if ((error = nfs_request_add_header(req))) |
4452 | break; |
4453 | |
4454 | if ((error = nfs_request_send(req, wait))) |
4455 | break; |
4456 | if (!wait) |
4457 | break; |
4458 | |
4459 | nfs_request_wait(req); |
4460 | if ((error = nfs_request_finish(req, nmrepp, status))) |
4461 | break; |
4462 | } while (req->r_flags & R_RESTART); |
4463 | |
4464 | FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); |
4465 | |
4466 | nfs_gss_clnt_ctx_unref(req); |
4467 | nfs_request_rele(req); |
4468 | |
4469 | return (error); |
4470 | } |
4471 | |
4472 | /* |
4473 | * Create and start an asynchronous NFS request. |
4474 | */ |
4475 | int |
4476 | nfs_request_async( |
4477 | nfsnode_t np, |
4478 | mount_t mp, /* used only if !np */ |
4479 | struct nfsm_chain *nmrest, |
4480 | int procnum, |
4481 | thread_t thd, |
4482 | kauth_cred_t cred, |
4483 | struct nfsreq_secinfo_args *si, |
4484 | int flags, |
4485 | struct nfsreq_cbinfo *cb, |
4486 | struct nfsreq **reqp) |
4487 | { |
4488 | struct nfsreq *req; |
4489 | struct nfsmount *nmp; |
4490 | int error, sent; |
4491 | |
4492 | error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp); |
4493 | req = *reqp; |
4494 | FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error); |
4495 | if (error) |
4496 | return (error); |
4497 | req->r_flags |= (flags & R_OPTMASK); |
4498 | req->r_flags |= R_ASYNC; |
4499 | if (si) |
4500 | req->r_secinfo = *si; |
4501 | if (cb) |
4502 | req->r_callback = *cb; |
4503 | error = nfs_request_add_header(req); |
4504 | if (!error) { |
4505 | req->r_flags |= R_WAITSENT; |
4506 | if (req->r_callback.rcb_func) |
4507 | nfs_request_ref(req, 0); |
4508 | error = nfs_request_send(req, 1); |
4509 | lck_mtx_lock(&req->r_mtx); |
4510 | if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) { |
4511 | /* make sure to wait until this async I/O request gets sent */ |
4512 | int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0; |
4513 | struct timespec ts = { 2, 0 }; |
4514 | while (!(req->r_flags & R_SENT)) { |
4515 | nmp = req->r_nmp; |
4516 | if ((req->r_flags & R_RESENDQ) && !nfs_mount_gone(nmp)) { |
4517 | lck_mtx_lock(&nmp->nm_lock); |
4518 | if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { |
4519 | /* |
4520 | * It's not going to get off the resend queue if we're in recovery. |
4521 | * So, just take it off ourselves. We could be holding mount state |
4522 | * busy and thus holding up the start of recovery. |
4523 | */ |
4524 | TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); |
4525 | req->r_rchain.tqe_next = NFSREQNOLIST; |
4526 | if (req->r_flags & R_RESENDQ) |
4527 | req->r_flags &= ~R_RESENDQ; |
4528 | lck_mtx_unlock(&nmp->nm_lock); |
4529 | req->r_flags |= R_SENDING; |
4530 | lck_mtx_unlock(&req->r_mtx); |
4531 | error = nfs_send(req, 1); |
4532 | /* Remove the R_RESENDQ reference */ |
4533 | nfs_request_rele(req); |
4534 | lck_mtx_lock(&req->r_mtx); |
4535 | if (error) |
4536 | break; |
4537 | continue; |
4538 | } |
4539 | lck_mtx_unlock(&nmp->nm_lock); |
4540 | } |
4541 | if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) |
4542 | break; |
4543 | msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent" , &ts); |
4544 | slpflag = 0; |
4545 | } |
4546 | } |
4547 | sent = req->r_flags & R_SENT; |
4548 | lck_mtx_unlock(&req->r_mtx); |
4549 | if (error && req->r_callback.rcb_func && !sent) { |
4550 | nfs_request_rele(req); |
4551 | } |
4552 | } |
4553 | FSDBG(274, R_XID32(req->r_xid), np, procnum, error); |
4554 | if (error || req->r_callback.rcb_func) |
4555 | nfs_request_rele(req); |
4556 | |
4557 | return (error); |
4558 | } |
4559 | |
4560 | /* |
4561 | * Wait for and finish an asynchronous NFS request. |
4562 | */ |
4563 | int |
4564 | nfs_request_async_finish( |
4565 | struct nfsreq *req, |
4566 | struct nfsm_chain *nmrepp, |
4567 | u_int64_t *xidp, |
4568 | int *status) |
4569 | { |
4570 | int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0; |
4571 | struct nfsmount *nmp; |
4572 | |
4573 | lck_mtx_lock(&req->r_mtx); |
4574 | if (!asyncio) |
4575 | req->r_flags |= R_ASYNCWAIT; |
4576 | while (req->r_flags & R_RESENDQ) { /* wait until the request is off the resend queue */ |
4577 | struct timespec ts = { 2, 0 }; |
4578 | |
4579 | if ((nmp = req->r_nmp)) { |
4580 | lck_mtx_lock(&nmp->nm_lock); |
4581 | if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) { |
4582 | /* |
4583 | * It's not going to get off the resend queue if we're in recovery. |
4584 | * So, just take it off ourselves. We could be holding mount state |
4585 | * busy and thus holding up the start of recovery. |
4586 | */ |
4587 | TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); |
4588 | req->r_rchain.tqe_next = NFSREQNOLIST; |
4589 | if (req->r_flags & R_RESENDQ) |
4590 | req->r_flags &= ~R_RESENDQ; |
4591 | /* Remove the R_RESENDQ reference */ |
4592 | assert(req->r_refs > 0); |
4593 | req->r_refs--; |
4594 | lck_mtx_unlock(&nmp->nm_lock); |
4595 | break; |
4596 | } |
4597 | lck_mtx_unlock(&nmp->nm_lock); |
4598 | } |
4599 | if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) |
4600 | break; |
4601 | msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait" , &ts); |
4602 | } |
4603 | lck_mtx_unlock(&req->r_mtx); |
4604 | |
4605 | if (!error) { |
4606 | nfs_request_wait(req); |
4607 | error = nfs_request_finish(req, nmrepp, status); |
4608 | } |
4609 | |
4610 | while (!error && (req->r_flags & R_RESTART)) { |
4611 | if (asyncio) { |
4612 | assert(req->r_achain.tqe_next == NFSREQNOLIST); |
4613 | lck_mtx_lock(&req->r_mtx); |
4614 | req->r_flags &= ~R_IOD; |
4615 | if (req->r_resendtime) { /* send later */ |
4616 | nfs_asyncio_resend(req); |
4617 | lck_mtx_unlock(&req->r_mtx); |
4618 | return (EINPROGRESS); |
4619 | } |
4620 | lck_mtx_unlock(&req->r_mtx); |
4621 | } |
4622 | req->r_error = 0; |
4623 | req->r_flags &= ~R_RESTART; |
4624 | if ((error = nfs_request_add_header(req))) |
4625 | break; |
4626 | if ((error = nfs_request_send(req, !asyncio))) |
4627 | break; |
4628 | if (asyncio) |
4629 | return (EINPROGRESS); |
4630 | nfs_request_wait(req); |
4631 | if ((error = nfs_request_finish(req, nmrepp, status))) |
4632 | break; |
4633 | } |
4634 | if (xidp) |
4635 | *xidp = req->r_xid; |
4636 | |
4637 | FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, error); |
4638 | nfs_request_rele(req); |
4639 | return (error); |
4640 | } |
4641 | |
4642 | /* |
4643 | * Cancel a pending asynchronous NFS request. |
4644 | */ |
4645 | void |
4646 | nfs_request_async_cancel(struct nfsreq *req) |
4647 | { |
4648 | FSDBG(275, R_XID32(req->r_xid), req->r_np, req->r_procnum, 0xD1ED1E); |
4649 | nfs_request_rele(req); |
4650 | } |
4651 | |
4652 | /* |
4653 | * Flag a request as being terminated. |
4654 | */ |
4655 | void |
4656 | nfs_softterm(struct nfsreq *req) |
4657 | { |
4658 | struct nfsmount *nmp = req->r_nmp; |
4659 | req->r_flags |= R_SOFTTERM; |
4660 | req->r_error = ETIMEDOUT; |
4661 | if (!(req->r_flags & R_CWND) || nfs_mount_gone(nmp)) |
4662 | return; |
4663 | /* update congestion window */ |
4664 | req->r_flags &= ~R_CWND; |
4665 | lck_mtx_lock(&nmp->nm_lock); |
4666 | FSDBG(532, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); |
4667 | nmp->nm_sent -= NFS_CWNDSCALE; |
4668 | if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) { |
4669 | /* congestion window is open, poke the cwnd queue */ |
4670 | struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq); |
4671 | TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain); |
4672 | req2->r_cchain.tqe_next = NFSREQNOLIST; |
4673 | wakeup(req2); |
4674 | } |
4675 | lck_mtx_unlock(&nmp->nm_lock); |
4676 | } |
4677 | |
4678 | /* |
4679 | * Ensure req isn't in use by the timer, then dequeue it. |
4680 | */ |
4681 | void |
4682 | nfs_reqdequeue(struct nfsreq *req) |
4683 | { |
4684 | lck_mtx_lock(nfs_request_mutex); |
4685 | while (req->r_lflags & RL_BUSY) { |
4686 | req->r_lflags |= RL_WAITING; |
4687 | msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq" , NULL); |
4688 | } |
4689 | if (req->r_lflags & RL_QUEUED) { |
4690 | TAILQ_REMOVE(&nfs_reqq, req, r_chain); |
4691 | req->r_lflags &= ~RL_QUEUED; |
4692 | } |
4693 | lck_mtx_unlock(nfs_request_mutex); |
4694 | } |
4695 | |
4696 | /* |
4697 | * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not |
4698 | * free()'d out from under it. |
4699 | */ |
4700 | void |
4701 | nfs_reqbusy(struct nfsreq *req) |
4702 | { |
4703 | if (req->r_lflags & RL_BUSY) |
4704 | panic("req locked" ); |
4705 | req->r_lflags |= RL_BUSY; |
4706 | } |
4707 | |
4708 | /* |
4709 | * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. |
4710 | */ |
4711 | struct nfsreq * |
4712 | nfs_reqnext(struct nfsreq *req) |
4713 | { |
4714 | struct nfsreq * nextreq; |
4715 | |
4716 | if (req == NULL) |
4717 | return (NULL); |
4718 | /* |
4719 | * We need to get and busy the next req before signalling the |
4720 | * current one, otherwise wakeup() may block us and we'll race to |
4721 | * grab the next req. |
4722 | */ |
4723 | nextreq = TAILQ_NEXT(req, r_chain); |
4724 | if (nextreq != NULL) |
4725 | nfs_reqbusy(nextreq); |
4726 | /* unbusy and signal. */ |
4727 | req->r_lflags &= ~RL_BUSY; |
4728 | if (req->r_lflags & RL_WAITING) { |
4729 | req->r_lflags &= ~RL_WAITING; |
4730 | wakeup(&req->r_lflags); |
4731 | } |
4732 | return (nextreq); |
4733 | } |
4734 | |
4735 | /* |
4736 | * NFS request queue timer routine |
4737 | * |
4738 | * Scan the NFS request queue for any requests that have timed out. |
4739 | * |
4740 | * Alert the system of unresponsive servers. |
4741 | * Mark expired requests on soft mounts as terminated. |
4742 | * For UDP, mark/signal requests for retransmission. |
4743 | */ |
4744 | void |
4745 | nfs_request_timer(__unused void *param0, __unused void *param1) |
4746 | { |
4747 | struct nfsreq *req; |
4748 | struct nfsmount *nmp; |
4749 | int timeo, maxtime, finish_asyncio, error; |
4750 | struct timeval now; |
4751 | TAILQ_HEAD(nfs_mount_pokeq, nfsmount) nfs_mount_poke_queue; |
4752 | TAILQ_INIT(&nfs_mount_poke_queue); |
4753 | |
4754 | restart: |
4755 | lck_mtx_lock(nfs_request_mutex); |
4756 | req = TAILQ_FIRST(&nfs_reqq); |
4757 | if (req == NULL) { /* no requests - turn timer off */ |
4758 | nfs_request_timer_on = 0; |
4759 | lck_mtx_unlock(nfs_request_mutex); |
4760 | return; |
4761 | } |
4762 | |
4763 | nfs_reqbusy(req); |
4764 | |
4765 | microuptime(&now); |
4766 | for ( ; req != NULL ; req = nfs_reqnext(req)) { |
4767 | nmp = req->r_nmp; |
4768 | if (nmp == NULL) { |
4769 | NFS_SOCK_DBG("Found a request with out a mount!\n" ); |
4770 | continue; |
4771 | } |
4772 | if (req->r_error || req->r_nmrep.nmc_mhead) |
4773 | continue; |
4774 | if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { |
4775 | if (req->r_callback.rcb_func != NULL) { |
4776 | /* async I/O RPC needs to be finished */ |
4777 | lck_mtx_lock(&req->r_mtx); |
4778 | req->r_error = error; |
4779 | finish_asyncio = !(req->r_flags & R_WAITSENT); |
4780 | wakeup(req); |
4781 | lck_mtx_unlock(&req->r_mtx); |
4782 | if (finish_asyncio) |
4783 | nfs_asyncio_finish(req); |
4784 | } |
4785 | continue; |
4786 | } |
4787 | |
4788 | lck_mtx_lock(&req->r_mtx); |
4789 | |
4790 | if (nmp->nm_tprintf_initial_delay && |
4791 | ((req->r_rexmit > 2) || (req->r_flags & R_RESENDERR)) && |
4792 | ((req->r_lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { |
4793 | req->r_lastmsg = now.tv_sec; |
4794 | nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, |
4795 | "not responding" , 1); |
4796 | req->r_flags |= R_TPRINTFMSG; |
4797 | lck_mtx_lock(&nmp->nm_lock); |
4798 | if (!(nmp->nm_state & NFSSTA_MOUNTED)) { |
4799 | lck_mtx_unlock(&nmp->nm_lock); |
4800 | /* we're not yet completely mounted and */ |
4801 | /* we can't complete an RPC, so we fail */ |
4802 | OSAddAtomic64(1, &nfsstats.rpctimeouts); |
4803 | nfs_softterm(req); |
4804 | finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); |
4805 | wakeup(req); |
4806 | lck_mtx_unlock(&req->r_mtx); |
4807 | if (finish_asyncio) |
4808 | nfs_asyncio_finish(req); |
4809 | continue; |
4810 | } |
4811 | lck_mtx_unlock(&nmp->nm_lock); |
4812 | } |
4813 | |
4814 | /* |
4815 | * Put a reasonable limit on the maximum timeout, |
4816 | * and reduce that limit when soft mounts get timeouts or are in reconnect. |
4817 | */ |
4818 | if (!(NMFLAG(nmp, SOFT) || (req->r_flags & R_SOFT)) && !nfs_can_squish(nmp)) |
4819 | maxtime = NFS_MAXTIMEO; |
4820 | else if ((req->r_flags & (R_SETUP|R_RECOVER)) || |
4821 | ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) |
4822 | maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2; |
4823 | else |
4824 | maxtime = NFS_MINTIMEO/4; |
4825 | |
4826 | /* |
4827 | * Check for request timeout. |
4828 | */ |
4829 | if (req->r_rtt >= 0) { |
4830 | req->r_rtt++; |
4831 | lck_mtx_lock(&nmp->nm_lock); |
4832 | if (req->r_flags & R_RESENDERR) { |
4833 | /* with resend errors, retry every few seconds */ |
4834 | timeo = 4*hz; |
4835 | } else { |
4836 | if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL) |
4837 | timeo = NFS_MINIDEMTIMEO; // gss context setup |
4838 | else if (NMFLAG(nmp, DUMBTIMER)) |
4839 | timeo = nmp->nm_timeo; |
4840 | else |
4841 | timeo = NFS_RTO(nmp, proct[req->r_procnum]); |
4842 | |
4843 | /* ensure 62.5 ms floor */ |
4844 | while (16 * timeo < hz) |
4845 | timeo *= 2; |
4846 | if (nmp->nm_timeouts > 0) |
4847 | timeo *= nfs_backoff[nmp->nm_timeouts - 1]; |
4848 | } |
4849 | /* limit timeout to max */ |
4850 | if (timeo > maxtime) |
4851 | timeo = maxtime; |
4852 | if (req->r_rtt <= timeo) { |
4853 | NFS_SOCK_DBG("nfs timeout: req time %d and timeo is %d continue\n" , req->r_rtt, timeo); |
4854 | lck_mtx_unlock(&nmp->nm_lock); |
4855 | lck_mtx_unlock(&req->r_mtx); |
4856 | continue; |
4857 | } |
4858 | /* The request has timed out */ |
4859 | NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n" , |
4860 | req->r_procnum, proct[req->r_procnum], |
4861 | req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts, |
4862 | (now.tv_sec - req->r_start)*NFS_HZ, maxtime); |
4863 | if (nmp->nm_timeouts < 8) |
4864 | nmp->nm_timeouts++; |
4865 | if (nfs_mount_check_dead_timeout(nmp)) { |
4866 | /* Unbusy this request */ |
4867 | req->r_lflags &= ~RL_BUSY; |
4868 | if (req->r_lflags & RL_WAITING) { |
4869 | req->r_lflags &= ~RL_WAITING; |
4870 | wakeup(&req->r_lflags); |
4871 | } |
4872 | lck_mtx_unlock(&req->r_mtx); |
4873 | |
4874 | /* No need to poke this mount */ |
4875 | if (nmp->nm_sockflags & NMSOCK_POKE) { |
4876 | nmp->nm_sockflags &= ~NMSOCK_POKE; |
4877 | TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); |
4878 | } |
4879 | /* Release our lock state, so we can become a zombie */ |
4880 | lck_mtx_unlock(nfs_request_mutex); |
4881 | |
4882 | /* |
4883 | * Note nfs_mount_make zombie(nmp) must be |
4884 | * called with nm_lock held. After doing some |
4885 | * work we release nm_lock in |
4886 | * nfs_make_mount_zombie with out acquiring any |
4887 | * other locks. (Later, in nfs_mount_zombie we |
4888 | * will acquire nfs_request_mutex, r_mtx, |
4889 | * nm_lock in that order). So we should not be |
4890 | * introducing deadlock here. We take a reference |
4891 | * on the mount so that its still there when we |
4892 | * release the lock. |
4893 | */ |
4894 | nmp->nm_ref++; |
4895 | nfs_mount_make_zombie(nmp); |
4896 | lck_mtx_unlock(&nmp->nm_lock); |
4897 | nfs_mount_rele(nmp); |
4898 | |
4899 | /* |
4900 | * All the request for this mount have now been |
4901 | * removed from the request queue. Restart to |
4902 | * process the remaining mounts |
4903 | */ |
4904 | goto restart; |
4905 | } |
4906 | |
4907 | /* if it's been a few seconds, try poking the socket */ |
4908 | if ((nmp->nm_sotype == SOCK_STREAM) && |
4909 | ((now.tv_sec - req->r_start) >= 3) && |
4910 | !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) && |
4911 | (nmp->nm_sockflags & NMSOCK_READY)) { |
4912 | nmp->nm_sockflags |= NMSOCK_POKE; |
4913 | /* |
4914 | * We take a ref on the mount so that we know the mount will still be there |
4915 | * when we process the nfs_mount_poke_queue. An unmount request will block |
4916 | * in nfs_mount_drain_and_cleanup until after the poke is finished. We release |
4917 | * the reference after calling nfs_sock_poke below; |
4918 | */ |
4919 | nmp->nm_ref++; |
4920 | TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq); |
4921 | } |
4922 | lck_mtx_unlock(&nmp->nm_lock); |
4923 | } |
4924 | |
4925 | /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ |
4926 | if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER|R_SOFT))) && |
4927 | ((req->r_rexmit >= req->r_retry) || /* too many */ |
4928 | ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ |
4929 | OSAddAtomic64(1, &nfsstats.rpctimeouts); |
4930 | lck_mtx_lock(&nmp->nm_lock); |
4931 | if (!(nmp->nm_state & NFSSTA_TIMEO)) { |
4932 | lck_mtx_unlock(&nmp->nm_lock); |
4933 | /* make sure we note the unresponsive server */ |
4934 | /* (maxtime may be less than tprintf delay) */ |
4935 | nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_TIMEO, |
4936 | "not responding" , 1); |
4937 | req->r_lastmsg = now.tv_sec; |
4938 | req->r_flags |= R_TPRINTFMSG; |
4939 | } else { |
4940 | lck_mtx_unlock(&nmp->nm_lock); |
4941 | } |
4942 | if (req->r_flags & R_NOINTR) { |
4943 | /* don't terminate nointr requests on timeout */ |
4944 | lck_mtx_unlock(&req->r_mtx); |
4945 | continue; |
4946 | } |
4947 | NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n" , |
4948 | req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, |
4949 | now.tv_sec - req->r_start); |
4950 | nfs_softterm(req); |
4951 | finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); |
4952 | wakeup(req); |
4953 | lck_mtx_unlock(&req->r_mtx); |
4954 | if (finish_asyncio) |
4955 | nfs_asyncio_finish(req); |
4956 | continue; |
4957 | } |
4958 | |
4959 | /* for TCP, only resend if explicitly requested */ |
4960 | if ((nmp->nm_sotype == SOCK_STREAM) && !(req->r_flags & R_MUSTRESEND)) { |
4961 | if (++req->r_rexmit > NFS_MAXREXMIT) |
4962 | req->r_rexmit = NFS_MAXREXMIT; |
4963 | req->r_rtt = 0; |
4964 | lck_mtx_unlock(&req->r_mtx); |
4965 | continue; |
4966 | } |
4967 | |
4968 | /* |
4969 | * The request needs to be (re)sent. Kick the requester to resend it. |
4970 | * (unless it's already marked as needing a resend) |
4971 | */ |
4972 | if ((req->r_flags & R_MUSTRESEND) && (req->r_rtt == -1)) { |
4973 | lck_mtx_unlock(&req->r_mtx); |
4974 | continue; |
4975 | } |
4976 | NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n" , |
4977 | req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); |
4978 | req->r_flags |= R_MUSTRESEND; |
4979 | req->r_rtt = -1; |
4980 | wakeup(req); |
4981 | if ((req->r_flags & (R_IOD|R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) |
4982 | nfs_asyncio_resend(req); |
4983 | lck_mtx_unlock(&req->r_mtx); |
4984 | } |
4985 | |
4986 | lck_mtx_unlock(nfs_request_mutex); |
4987 | |
4988 | /* poke any sockets */ |
4989 | while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) { |
4990 | TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq); |
4991 | nfs_sock_poke(nmp); |
4992 | nfs_mount_rele(nmp); |
4993 | } |
4994 | |
4995 | nfs_interval_timer_start(nfs_request_timer_call, NFS_REQUESTDELAY); |
4996 | } |
4997 | |
4998 | /* |
4999 | * check a thread's proc for the "noremotehang" flag. |
5000 | */ |
5001 | int |
5002 | nfs_noremotehang(thread_t thd) |
5003 | { |
5004 | proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; |
5005 | return (p && proc_noremotehang(p)); |
5006 | } |
5007 | |
5008 | /* |
5009 | * Test for a termination condition pending on the process. |
5010 | * This is used to determine if we need to bail on a mount. |
5011 | * ETIMEDOUT is returned if there has been a soft timeout. |
5012 | * EINTR is returned if there is a signal pending that is not being ignored |
5013 | * and the mount is interruptable, or if we are a thread that is in the process |
5014 | * of cancellation (also SIGKILL posted). |
5015 | */ |
5016 | extern int sigprop[NSIG+1]; |
5017 | int |
5018 | nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked) |
5019 | { |
5020 | proc_t p; |
5021 | int error = 0; |
5022 | |
5023 | if (!nmp) |
5024 | return (ENXIO); |
5025 | |
5026 | if (req && (req->r_flags & R_SOFTTERM)) |
5027 | return (ETIMEDOUT); /* request has been terminated. */ |
5028 | if (req && (req->r_flags & R_NOINTR)) |
5029 | thd = NULL; /* don't check for signal on R_NOINTR */ |
5030 | |
5031 | if (!nmplocked) |
5032 | lck_mtx_lock(&nmp->nm_lock); |
5033 | if (nmp->nm_state & NFSSTA_FORCE) { |
5034 | /* If a force unmount is in progress then fail. */ |
5035 | error = EIO; |
5036 | } else if (vfs_isforce(nmp->nm_mountp)) { |
5037 | /* Someone is unmounting us, go soft and mark it. */ |
5038 | NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT); |
5039 | nmp->nm_state |= NFSSTA_FORCE; |
5040 | } |
5041 | |
5042 | /* Check if the mount is marked dead. */ |
5043 | if (!error && (nmp->nm_state & NFSSTA_DEAD)) |
5044 | error = ENXIO; |
5045 | |
5046 | /* |
5047 | * If the mount is hung and we've requested not to hang |
5048 | * on remote filesystems, then bail now. |
5049 | */ |
5050 | if (current_proc() != kernproc && |
5051 | !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) |
5052 | error = EIO; |
5053 | |
5054 | if (!nmplocked) |
5055 | lck_mtx_unlock(&nmp->nm_lock); |
5056 | if (error) |
5057 | return (error); |
5058 | |
5059 | /* may not have a thread for async I/O */ |
5060 | if (thd == NULL || current_proc() == kernproc) |
5061 | return (0); |
5062 | |
5063 | /* |
5064 | * Check if the process is aborted, but don't interrupt if we |
5065 | * were killed by a signal and this is the exiting thread which |
5066 | * is attempting to dump core. |
5067 | */ |
5068 | if (((p = current_proc()) != kernproc) && current_thread_aborted() && |
5069 | (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) || |
5070 | (p->p_sigacts == NULL) || |
5071 | (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) || |
5072 | !(sigprop[p->p_sigacts->ps_sig] & SA_CORE))) |
5073 | return (EINTR); |
5074 | |
5075 | /* mask off thread and process blocked signals. */ |
5076 | if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) && |
5077 | proc_pendingsignals(p, NFSINT_SIGMASK)) |
5078 | return (EINTR); |
5079 | return (0); |
5080 | } |
5081 | |
5082 | /* |
5083 | * Lock a socket against others. |
5084 | * Necessary for STREAM sockets to ensure you get an entire rpc request/reply |
5085 | * and also to avoid race conditions between the processes with nfs requests |
5086 | * in progress when a reconnect is necessary. |
5087 | */ |
5088 | int |
5089 | nfs_sndlock(struct nfsreq *req) |
5090 | { |
5091 | struct nfsmount *nmp = req->r_nmp; |
5092 | int *statep; |
5093 | int error = 0, slpflag = 0; |
5094 | struct timespec ts = { 0, 0 }; |
5095 | |
5096 | if (nfs_mount_gone(nmp)) |
5097 | return (ENXIO); |
5098 | |
5099 | lck_mtx_lock(&nmp->nm_lock); |
5100 | statep = &nmp->nm_state; |
5101 | |
5102 | if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) |
5103 | slpflag = PCATCH; |
5104 | while (*statep & NFSSTA_SNDLOCK) { |
5105 | if ((error = nfs_sigintr(nmp, req, req->r_thread, 1))) |
5106 | break; |
5107 | *statep |= NFSSTA_WANTSND; |
5108 | if (nfs_noremotehang(req->r_thread)) |
5109 | ts.tv_sec = 1; |
5110 | msleep(statep, &nmp->nm_lock, slpflag | (PZERO - 1), "nfsndlck" , &ts); |
5111 | if (slpflag == PCATCH) { |
5112 | slpflag = 0; |
5113 | ts.tv_sec = 2; |
5114 | } |
5115 | } |
5116 | if (!error) |
5117 | *statep |= NFSSTA_SNDLOCK; |
5118 | lck_mtx_unlock(&nmp->nm_lock); |
5119 | return (error); |
5120 | } |
5121 | |
5122 | /* |
5123 | * Unlock the stream socket for others. |
5124 | */ |
5125 | void |
5126 | nfs_sndunlock(struct nfsreq *req) |
5127 | { |
5128 | struct nfsmount *nmp = req->r_nmp; |
5129 | int *statep, wake = 0; |
5130 | |
5131 | if (!nmp) |
5132 | return; |
5133 | lck_mtx_lock(&nmp->nm_lock); |
5134 | statep = &nmp->nm_state; |
5135 | if ((*statep & NFSSTA_SNDLOCK) == 0) |
5136 | panic("nfs sndunlock" ); |
5137 | *statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING); |
5138 | if (*statep & NFSSTA_WANTSND) { |
5139 | *statep &= ~NFSSTA_WANTSND; |
5140 | wake = 1; |
5141 | } |
5142 | lck_mtx_unlock(&nmp->nm_lock); |
5143 | if (wake) |
5144 | wakeup(statep); |
5145 | } |
5146 | |
5147 | int |
5148 | nfs_aux_request( |
5149 | struct nfsmount *nmp, |
5150 | thread_t thd, |
5151 | struct sockaddr *saddr, |
5152 | socket_t so, |
5153 | int sotype, |
5154 | mbuf_t mreq, |
5155 | uint32_t xid, |
5156 | int bindresv, |
5157 | int timeo, |
5158 | struct nfsm_chain *nmrep) |
5159 | { |
5160 | int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0; |
5161 | socket_t newso = NULL; |
5162 | struct sockaddr_storage ss; |
5163 | struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 }; |
5164 | mbuf_t m, mrep = NULL; |
5165 | struct msghdr msg; |
5166 | uint32_t rxid = 0, reply = 0, reply_status, rejected_status; |
5167 | uint32_t verf_type, verf_len, accepted_status; |
5168 | size_t readlen, sentlen; |
5169 | struct nfs_rpc_record_state nrrs; |
5170 | |
5171 | if (!so) { |
5172 | /* create socket and set options */ |
5173 | soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP; |
5174 | if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso))) |
5175 | goto nfsmout; |
5176 | |
5177 | if (bindresv) { |
5178 | int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6; |
5179 | int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE; |
5180 | int portrange = IP_PORTRANGE_LOW; |
5181 | error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange)); |
5182 | nfsmout_if(error); |
5183 | ss.ss_len = saddr->sa_len; |
5184 | ss.ss_family = saddr->sa_family; |
5185 | if (ss.ss_family == AF_INET) { |
5186 | ((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY; |
5187 | ((struct sockaddr_in*)&ss)->sin_port = htons(0); |
5188 | } else if (ss.ss_family == AF_INET6) { |
5189 | ((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any; |
5190 | ((struct sockaddr_in6*)&ss)->sin6_port = htons(0); |
5191 | } else { |
5192 | error = EINVAL; |
5193 | } |
5194 | if (!error) |
5195 | error = sock_bind(newso, (struct sockaddr *)&ss); |
5196 | nfsmout_if(error); |
5197 | } |
5198 | |
5199 | if (sotype == SOCK_STREAM) { |
5200 | # define NFS_AUX_CONNECTION_TIMEOUT 4 /* 4 second timeout for connections */ |
5201 | int count = 0; |
5202 | |
5203 | error = sock_connect(newso, saddr, MSG_DONTWAIT); |
5204 | if (error == EINPROGRESS) |
5205 | error = 0; |
5206 | nfsmout_if(error); |
5207 | |
5208 | while ((error = sock_connectwait(newso, &tv)) == EINPROGRESS) { |
5209 | /* After NFS_AUX_CONNECTION_TIMEOUT bail */ |
5210 | if (++count >= NFS_AUX_CONNECTION_TIMEOUT) { |
5211 | error = ETIMEDOUT; |
5212 | break; |
5213 | } |
5214 | } |
5215 | nfsmout_if(error); |
5216 | } |
5217 | if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || |
5218 | ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || |
5219 | ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) |
5220 | goto nfsmout; |
5221 | so = newso; |
5222 | } else { |
5223 | /* make sure socket is using a one second timeout in this function */ |
5224 | optlen = sizeof(orig_rcvto); |
5225 | error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen); |
5226 | if (!error) { |
5227 | optlen = sizeof(orig_sndto); |
5228 | error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen); |
5229 | } |
5230 | if (!error) { |
5231 | sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); |
5232 | sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); |
5233 | restoreto = 1; |
5234 | } |
5235 | } |
5236 | |
5237 | if (sotype == SOCK_STREAM) { |
5238 | sendat = 0; /* we only resend the request for UDP */ |
5239 | nfs_rpc_record_state_init(&nrrs); |
5240 | } |
5241 | |
5242 | for (try=0; try < timeo; try++) { |
5243 | if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0))) |
5244 | break; |
5245 | if (!try || (try == sendat)) { |
5246 | /* send the request (resending periodically for UDP) */ |
5247 | if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) |
5248 | goto nfsmout; |
5249 | bzero(&msg, sizeof(msg)); |
5250 | if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) { |
5251 | msg.msg_name = saddr; |
5252 | msg.msg_namelen = saddr->sa_len; |
5253 | } |
5254 | if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen))) |
5255 | goto nfsmout; |
5256 | sendat *= 2; |
5257 | if (sendat > 30) |
5258 | sendat = 30; |
5259 | } |
5260 | /* wait for the response */ |
5261 | if (sotype == SOCK_STREAM) { |
5262 | /* try to read (more of) record */ |
5263 | error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep); |
5264 | /* if we don't have the whole record yet, we'll keep trying */ |
5265 | } else { |
5266 | readlen = 1<<18; |
5267 | bzero(&msg, sizeof(msg)); |
5268 | error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); |
5269 | } |
5270 | if (error == EWOULDBLOCK) |
5271 | continue; |
5272 | nfsmout_if(error); |
5273 | /* parse the response */ |
5274 | nfsm_chain_dissect_init(error, nmrep, mrep); |
5275 | nfsm_chain_get_32(error, nmrep, rxid); |
5276 | nfsm_chain_get_32(error, nmrep, reply); |
5277 | nfsmout_if(error); |
5278 | if ((rxid != xid) || (reply != RPC_REPLY)) |
5279 | error = EBADRPC; |
5280 | nfsm_chain_get_32(error, nmrep, reply_status); |
5281 | nfsmout_if(error); |
5282 | if (reply_status == RPC_MSGDENIED) { |
5283 | nfsm_chain_get_32(error, nmrep, rejected_status); |
5284 | nfsmout_if(error); |
5285 | error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES; |
5286 | goto nfsmout; |
5287 | } |
5288 | nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ |
5289 | nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ |
5290 | nfsmout_if(error); |
5291 | if (verf_len) |
5292 | nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); |
5293 | nfsm_chain_get_32(error, nmrep, accepted_status); |
5294 | nfsmout_if(error); |
5295 | switch (accepted_status) { |
5296 | case RPC_SUCCESS: |
5297 | error = 0; |
5298 | break; |
5299 | case RPC_PROGUNAVAIL: |
5300 | error = EPROGUNAVAIL; |
5301 | break; |
5302 | case RPC_PROGMISMATCH: |
5303 | error = EPROGMISMATCH; |
5304 | break; |
5305 | case RPC_PROCUNAVAIL: |
5306 | error = EPROCUNAVAIL; |
5307 | break; |
5308 | case RPC_GARBAGE: |
5309 | error = EBADRPC; |
5310 | break; |
5311 | case RPC_SYSTEM_ERR: |
5312 | default: |
5313 | error = EIO; |
5314 | break; |
5315 | } |
5316 | break; |
5317 | } |
5318 | nfsmout: |
5319 | if (restoreto) { |
5320 | sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv)); |
5321 | sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv)); |
5322 | } |
5323 | if (newso) { |
5324 | sock_shutdown(newso, SHUT_RDWR); |
5325 | sock_close(newso); |
5326 | } |
5327 | mbuf_freem(mreq); |
5328 | return (error); |
5329 | } |
5330 | |
5331 | int |
5332 | nfs_portmap_lookup( |
5333 | struct nfsmount *nmp, |
5334 | vfs_context_t ctx, |
5335 | struct sockaddr *sa, |
5336 | socket_t so, |
5337 | uint32_t protocol, |
5338 | uint32_t vers, |
5339 | uint32_t ipproto, |
5340 | int timeo) |
5341 | { |
5342 | thread_t thd = vfs_context_thread(ctx); |
5343 | kauth_cred_t cred = vfs_context_ucred(ctx); |
5344 | struct sockaddr_storage ss; |
5345 | struct sockaddr *saddr = (struct sockaddr*)&ss; |
5346 | struct nfsm_chain nmreq, nmrep; |
5347 | mbuf_t mreq; |
5348 | int error = 0, ip, pmprog, pmvers, pmproc; |
5349 | uint32_t ualen = 0; |
5350 | uint32_t port; |
5351 | uint64_t xid = 0; |
5352 | char uaddr[MAX_IPv6_STR_LEN+16]; |
5353 | |
5354 | bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); |
5355 | if (saddr->sa_family == AF_INET) { |
5356 | ip = 4; |
5357 | pmprog = PMAPPROG; |
5358 | pmvers = PMAPVERS; |
5359 | pmproc = PMAPPROC_GETPORT; |
5360 | } else if (saddr->sa_family == AF_INET6) { |
5361 | ip = 6; |
5362 | pmprog = RPCBPROG; |
5363 | pmvers = RPCBVERS4; |
5364 | pmproc = RPCBPROC_GETVERSADDR; |
5365 | } else { |
5366 | return (EINVAL); |
5367 | } |
5368 | nfsm_chain_null(&nmreq); |
5369 | nfsm_chain_null(&nmrep); |
5370 | |
5371 | tryagain: |
5372 | /* send portmapper request to get port/uaddr */ |
5373 | if (ip == 4) |
5374 | ((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT); |
5375 | else |
5376 | ((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT); |
5377 | nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED); |
5378 | nfsm_chain_add_32(error, &nmreq, protocol); |
5379 | nfsm_chain_add_32(error, &nmreq, vers); |
5380 | if (ip == 4) { |
5381 | nfsm_chain_add_32(error, &nmreq, ipproto); |
5382 | nfsm_chain_add_32(error, &nmreq, 0); |
5383 | } else { |
5384 | if (ipproto == IPPROTO_TCP) |
5385 | nfsm_chain_add_string(error, &nmreq, "tcp6" , 4); |
5386 | else |
5387 | nfsm_chain_add_string(error, &nmreq, "udp6" , 4); |
5388 | nfsm_chain_add_string(error, &nmreq, "" , 0); /* uaddr */ |
5389 | nfsm_chain_add_string(error, &nmreq, "" , 0); /* owner */ |
5390 | } |
5391 | nfsm_chain_build_done(error, &nmreq); |
5392 | nfsmout_if(error); |
5393 | error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, |
5394 | pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, |
5395 | &xid, &mreq); |
5396 | nfsmout_if(error); |
5397 | nmreq.nmc_mhead = NULL; |
5398 | error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM, |
5399 | mreq, R_XID32(xid), 0, timeo, &nmrep); |
5400 | |
5401 | /* grab port from portmap response */ |
5402 | if (ip == 4) { |
5403 | nfsm_chain_get_32(error, &nmrep, port); |
5404 | if (!error) |
5405 | ((struct sockaddr_in*)sa)->sin_port = htons(port); |
5406 | } else { |
5407 | /* get uaddr string and convert to sockaddr */ |
5408 | nfsm_chain_get_32(error, &nmrep, ualen); |
5409 | if (!error) { |
5410 | if (ualen > (sizeof(uaddr)-1)) |
5411 | error = EIO; |
5412 | if (ualen < 1) { |
5413 | /* program is not available, just return a zero port */ |
5414 | bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); |
5415 | ((struct sockaddr_in6*)saddr)->sin6_port = htons(0); |
5416 | } else { |
5417 | nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr); |
5418 | if (!error) { |
5419 | uaddr[ualen] = '\0'; |
5420 | if (!nfs_uaddr2sockaddr(uaddr, saddr)) |
5421 | error = EIO; |
5422 | } |
5423 | } |
5424 | } |
5425 | if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) { |
5426 | /* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */ |
5427 | if (pmvers == RPCBVERS4) { |
5428 | /* fall back to v3 and GETADDR */ |
5429 | pmvers = RPCBVERS3; |
5430 | pmproc = RPCBPROC_GETADDR; |
5431 | nfsm_chain_cleanup(&nmreq); |
5432 | nfsm_chain_cleanup(&nmrep); |
5433 | bcopy(sa, saddr, min(sizeof(ss), sa->sa_len)); |
5434 | xid = 0; |
5435 | error = 0; |
5436 | goto tryagain; |
5437 | } |
5438 | } |
5439 | if (!error) |
5440 | bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len)); |
5441 | } |
5442 | nfsmout: |
5443 | nfsm_chain_cleanup(&nmreq); |
5444 | nfsm_chain_cleanup(&nmrep); |
5445 | return (error); |
5446 | } |
5447 | |
5448 | int |
5449 | nfs_msg(thread_t thd, |
5450 | const char *server, |
5451 | const char *msg, |
5452 | int error) |
5453 | { |
5454 | proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; |
5455 | tpr_t tpr; |
5456 | |
5457 | if (p) |
5458 | tpr = tprintf_open(p); |
5459 | else |
5460 | tpr = NULL; |
5461 | if (error) |
5462 | tprintf(tpr, "nfs server %s: %s, error %d\n" , server, msg, error); |
5463 | else |
5464 | tprintf(tpr, "nfs server %s: %s\n" , server, msg); |
5465 | tprintf_close(tpr); |
5466 | return (0); |
5467 | } |
5468 | |
5469 | #define NFS_SQUISH_MOBILE_ONLY 0x0001 /* Squish mounts only on mobile machines */ |
5470 | #define NFS_SQUISH_AUTOMOUNTED_ONLY 0x0002 /* Squish mounts only if the are automounted */ |
5471 | #define NFS_SQUISH_SOFT 0x0004 /* Treat all soft mounts as though they were on a mobile machine */ |
5472 | #define NFS_SQUISH_QUICK 0x0008 /* Try to squish mounts more quickly. */ |
5473 | #define NFS_SQUISH_SHUTDOWN 0x1000 /* Squish all mounts on shutdown. Currently not implemented */ |
5474 | |
5475 | uint32_t nfs_squishy_flags = NFS_SQUISH_MOBILE_ONLY | NFS_SQUISH_AUTOMOUNTED_ONLY | NFS_SQUISH_QUICK; |
5476 | int32_t nfs_is_mobile; |
5477 | |
5478 | #define NFS_SQUISHY_DEADTIMEOUT 8 /* Dead time out for squishy mounts */ |
5479 | #define NFS_SQUISHY_QUICKTIMEOUT 4 /* Quicker dead time out when nfs_squish_flags NFS_SQUISH_QUICK bit is set*/ |
5480 | |
5481 | /* |
5482 | * Could this mount be squished? |
5483 | */ |
5484 | int |
5485 | nfs_can_squish(struct nfsmount *nmp) |
5486 | { |
5487 | uint64_t flags = vfs_flags(nmp->nm_mountp); |
5488 | int softsquish = ((nfs_squishy_flags & NFS_SQUISH_SOFT) & NMFLAG(nmp, SOFT)); |
5489 | |
5490 | if (!softsquish && (nfs_squishy_flags & NFS_SQUISH_MOBILE_ONLY) && nfs_is_mobile == 0) |
5491 | return (0); |
5492 | |
5493 | if ((nfs_squishy_flags & NFS_SQUISH_AUTOMOUNTED_ONLY) && (flags & MNT_AUTOMOUNTED) == 0) |
5494 | return (0); |
5495 | |
5496 | return (1); |
5497 | } |
5498 | |
5499 | /* |
5500 | * NFS mounts default to "rw,hard" - but frequently on mobile clients |
5501 | * the mount may become "not responding". It's desirable to be able |
5502 | * to unmount these dead mounts, but only if there is no risk of |
5503 | * losing data or crashing applications. A "squishy" NFS mount is one |
5504 | * that can be force unmounted with little risk of harm. |
5505 | * |
5506 | * nfs_is_squishy checks if a mount is in a squishy state. A mount is |
5507 | * in a squishy state iff it is allowed to be squishy and there are no |
5508 | * dirty pages and there are no mmapped files and there are no files |
5509 | * open for write. Mounts are allowed to be squishy is controlled by |
5510 | * the settings of the nfs_squishy_flags and its mobility state. These |
5511 | * flags can be set by sysctls. |
5512 | * |
5513 | * If nfs_is_squishy determines that we are in a squishy state we will |
5514 | * update the current dead timeout to at least NFS_SQUISHY_DEADTIMEOUT |
5515 | * (or NFS_SQUISHY_QUICKTIMEOUT if NFS_SQUISH_QUICK is set) (see |
5516 | * above) or 1/8th of the mount's nm_deadtimeout value, otherwise we just |
5517 | * update the current dead timeout with the mount's nm_deadtimeout |
5518 | * value set at mount time. |
5519 | * |
5520 | * Assumes that nm_lock is held. |
5521 | * |
5522 | * Note this routine is racey, but its effects on setting the |
5523 | * dead timeout only have effects when we're in trouble and are likely |
5524 | * to stay that way. Since by default its only for automounted |
5525 | * volumes on mobile machines; this is a reasonable trade off between |
5526 | * data integrity and user experience. It can be disabled or set via |
5527 | * nfs.conf file. |
5528 | */ |
5529 | |
5530 | int |
5531 | nfs_is_squishy(struct nfsmount *nmp) |
5532 | { |
5533 | mount_t mp = nmp->nm_mountp; |
5534 | int squishy = 0; |
5535 | int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; |
5536 | |
5537 | NFS_SOCK_DBG("%s: nm_curdeadtimeout = %d, nfs_is_mobile = %d\n" , |
5538 | vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile); |
5539 | |
5540 | if (!nfs_can_squish(nmp)) |
5541 | goto out; |
5542 | |
5543 | timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo; |
5544 | NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n" , nmp->nm_writers, nmp->nm_mappers, timeo); |
5545 | |
5546 | if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) { |
5547 | uint64_t flags = mp ? vfs_flags(mp) : 0; |
5548 | squishy = 1; |
5549 | |
5550 | /* |
5551 | * Walk the nfs nodes and check for dirty buffers it we're not |
5552 | * RDONLY and we've not already been declared as squishy since |
5553 | * this can be a bit expensive. |
5554 | */ |
5555 | if (!(flags & MNT_RDONLY) && !(nmp->nm_state & NFSSTA_SQUISHY)) |
5556 | squishy = !nfs_mount_is_dirty(mp); |
5557 | } |
5558 | |
5559 | out: |
5560 | if (squishy) |
5561 | nmp->nm_state |= NFSSTA_SQUISHY; |
5562 | else |
5563 | nmp->nm_state &= ~NFSSTA_SQUISHY; |
5564 | |
5565 | nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout; |
5566 | |
5567 | NFS_SOCK_DBG("nm_curdeadtimeout = %d\n" , nmp->nm_curdeadtimeout); |
5568 | |
5569 | return (squishy); |
5570 | } |
5571 | |
5572 | /* |
5573 | * On a send operation, if we can't reach the server and we've got only one server to talk to |
5574 | * and NFS_SQUISH_QUICK flag is set and we are in a squishy state then mark the mount as dead |
5575 | * and ask to be forcibly unmounted. Return 1 if we're dead and 0 otherwise. |
5576 | */ |
5577 | int |
5578 | nfs_is_dead(int error, struct nfsmount *nmp) |
5579 | { |
5580 | fsid_t fsid; |
5581 | |
5582 | lck_mtx_lock(&nmp->nm_lock); |
5583 | if (nmp->nm_state & NFSSTA_DEAD) { |
5584 | lck_mtx_unlock(&nmp->nm_lock); |
5585 | return (1); |
5586 | } |
5587 | |
5588 | if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) || |
5589 | !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) { |
5590 | lck_mtx_unlock(&nmp->nm_lock); |
5591 | return (0); |
5592 | } |
5593 | |
5594 | if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { |
5595 | printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname); |
5596 | fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; |
5597 | lck_mtx_unlock(&nmp->nm_lock); |
5598 | nfs_mount_zombie(nmp, NFSSTA_DEAD); |
5599 | vfs_event_signal(&fsid, VQ_DEAD, 0); |
5600 | return (1); |
5601 | } |
5602 | lck_mtx_unlock(&nmp->nm_lock); |
5603 | return (0); |
5604 | } |
5605 | |
5606 | /* |
5607 | * If we've experienced timeouts and we're not really a |
5608 | * classic hard mount, then just return cached data to |
5609 | * the caller instead of likely hanging on an RPC. |
5610 | */ |
5611 | int |
5612 | nfs_use_cache(struct nfsmount *nmp) |
5613 | { |
5614 | /* |
5615 | *%%% We always let mobile users goto the cache, |
5616 | * perhaps we should not even require them to have |
5617 | * a timeout? |
5618 | */ |
5619 | int cache_ok = (nfs_is_mobile || NMFLAG(nmp, SOFT) || |
5620 | nfs_can_squish(nmp) || nmp->nm_deadtimeout); |
5621 | |
5622 | int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; |
5623 | |
5624 | /* |
5625 | * So if we have a timeout and we're not really a hard hard-mount, |
5626 | * return 1 to not get things out of the cache. |
5627 | */ |
5628 | |
5629 | return ((nmp->nm_state & timeoutmask) && cache_ok); |
5630 | } |
5631 | |
5632 | /* |
5633 | * Log a message that nfs or lockd server is unresponsive. Check if we |
5634 | * can be squished and if we can, or that our dead timeout has |
5635 | * expired, and we're not holding state, set our mount as dead, remove |
5636 | * our mount state and ask to be unmounted. If we are holding state |
5637 | * we're being called from the nfs_request_timer and will soon detect |
5638 | * that we need to unmount. |
5639 | */ |
5640 | void |
5641 | nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg, int holding_state) |
5642 | { |
5643 | int timeoutmask, wasunresponsive, unresponsive, softnobrowse; |
5644 | uint32_t do_vfs_signal = 0; |
5645 | struct timeval now; |
5646 | |
5647 | if (nfs_mount_gone(nmp)) |
5648 | return; |
5649 | |
5650 | lck_mtx_lock(&nmp->nm_lock); |
5651 | |
5652 | timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; |
5653 | if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ |
5654 | timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; |
5655 | wasunresponsive = (nmp->nm_state & timeoutmask); |
5656 | |
5657 | /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ |
5658 | softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); |
5659 | |
5660 | if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) |
5661 | nmp->nm_state |= NFSSTA_TIMEO; |
5662 | if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) |
5663 | nmp->nm_state |= NFSSTA_LOCKTIMEO; |
5664 | if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) |
5665 | nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; |
5666 | |
5667 | unresponsive = (nmp->nm_state & timeoutmask); |
5668 | |
5669 | nfs_is_squishy(nmp); |
5670 | |
5671 | if (unresponsive && (nmp->nm_curdeadtimeout > 0)) { |
5672 | microuptime(&now); |
5673 | if (!wasunresponsive) { |
5674 | nmp->nm_deadto_start = now.tv_sec; |
5675 | nfs_mount_sock_thread_wake(nmp); |
5676 | } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_curdeadtimeout && !holding_state) { |
5677 | if (!(nmp->nm_state & NFSSTA_DEAD)) |
5678 | printf("nfs server %s: %sdead\n" , vfs_statfs(nmp->nm_mountp)->f_mntfromname, |
5679 | (nmp->nm_curdeadtimeout != nmp->nm_deadtimeout) ? "squished " : "" ); |
5680 | do_vfs_signal = VQ_DEAD; |
5681 | } |
5682 | } |
5683 | lck_mtx_unlock(&nmp->nm_lock); |
5684 | |
5685 | if (do_vfs_signal == VQ_DEAD && !(nmp->nm_state & NFSSTA_DEAD)) |
5686 | nfs_mount_zombie(nmp, NFSSTA_DEAD); |
5687 | else if (softnobrowse || wasunresponsive || !unresponsive) |
5688 | do_vfs_signal = 0; |
5689 | else |
5690 | do_vfs_signal = VQ_NOTRESP; |
5691 | if (do_vfs_signal) |
5692 | vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0); |
5693 | |
5694 | nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); |
5695 | } |
5696 | |
5697 | void |
5698 | nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) |
5699 | { |
5700 | int timeoutmask, wasunresponsive, unresponsive, softnobrowse; |
5701 | int do_vfs_signal; |
5702 | |
5703 | if (nfs_mount_gone(nmp)) |
5704 | return; |
5705 | |
5706 | if (msg) |
5707 | nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); |
5708 | |
5709 | lck_mtx_lock(&nmp->nm_lock); |
5710 | |
5711 | timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; |
5712 | if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ |
5713 | timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; |
5714 | wasunresponsive = (nmp->nm_state & timeoutmask); |
5715 | |
5716 | /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ |
5717 | softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); |
5718 | |
5719 | if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) |
5720 | nmp->nm_state &= ~NFSSTA_TIMEO; |
5721 | if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) |
5722 | nmp->nm_state &= ~NFSSTA_LOCKTIMEO; |
5723 | if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) |
5724 | nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; |
5725 | |
5726 | unresponsive = (nmp->nm_state & timeoutmask); |
5727 | |
5728 | nmp->nm_deadto_start = 0; |
5729 | nmp->nm_curdeadtimeout = nmp->nm_deadtimeout; |
5730 | nmp->nm_state &= ~NFSSTA_SQUISHY; |
5731 | lck_mtx_unlock(&nmp->nm_lock); |
5732 | |
5733 | if (softnobrowse) |
5734 | do_vfs_signal = 0; |
5735 | else |
5736 | do_vfs_signal = (wasunresponsive && !unresponsive); |
5737 | if (do_vfs_signal) |
5738 | vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); |
5739 | } |
5740 | |
5741 | |
5742 | #endif /* NFSCLIENT */ |
5743 | |
5744 | #if NFSSERVER |
5745 | |
5746 | /* |
5747 | * Generate the rpc reply header |
5748 | * siz arg. is used to decide if adding a cluster is worthwhile |
5749 | */ |
5750 | int |
5751 | nfsrv_rephead( |
5752 | struct nfsrv_descript *nd, |
5753 | __unused struct nfsrv_sock *slp, |
5754 | struct nfsm_chain *nmrepp, |
5755 | size_t siz) |
5756 | { |
5757 | mbuf_t mrep; |
5758 | u_int32_t *tl; |
5759 | struct nfsm_chain nmrep; |
5760 | int err, error; |
5761 | |
5762 | err = nd->nd_repstat; |
5763 | if (err && (nd->nd_vers == NFS_VER2)) |
5764 | siz = 0; |
5765 | |
5766 | /* |
5767 | * If this is a big reply, use a cluster else |
5768 | * try and leave leading space for the lower level headers. |
5769 | */ |
5770 | siz += RPC_REPLYSIZ; |
5771 | if (siz >= nfs_mbuf_minclsize) { |
5772 | error = mbuf_getpacket(MBUF_WAITOK, &mrep); |
5773 | } else { |
5774 | error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mrep); |
5775 | } |
5776 | if (error) { |
5777 | /* unable to allocate packet */ |
5778 | /* XXX should we keep statistics for these errors? */ |
5779 | return (error); |
5780 | } |
5781 | if (siz < nfs_mbuf_minclsize) { |
5782 | /* leave space for lower level headers */ |
5783 | tl = mbuf_data(mrep); |
5784 | tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */ |
5785 | mbuf_setdata(mrep, tl, 6 * NFSX_UNSIGNED); |
5786 | } |
5787 | nfsm_chain_init(&nmrep, mrep); |
5788 | nfsm_chain_add_32(error, &nmrep, nd->nd_retxid); |
5789 | nfsm_chain_add_32(error, &nmrep, RPC_REPLY); |
5790 | if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { |
5791 | nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); |
5792 | if (err & NFSERR_AUTHERR) { |
5793 | nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); |
5794 | nfsm_chain_add_32(error, &nmrep, (err & ~NFSERR_AUTHERR)); |
5795 | } else { |
5796 | nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); |
5797 | nfsm_chain_add_32(error, &nmrep, RPC_VER2); |
5798 | nfsm_chain_add_32(error, &nmrep, RPC_VER2); |
5799 | } |
5800 | } else { |
5801 | /* reply status */ |
5802 | nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); |
5803 | if (nd->nd_gss_context != NULL) { |
5804 | /* RPCSEC_GSS verifier */ |
5805 | error = nfs_gss_svc_verf_put(nd, &nmrep); |
5806 | if (error) { |
5807 | nfsm_chain_add_32(error, &nmrep, RPC_SYSTEM_ERR); |
5808 | goto done; |
5809 | } |
5810 | } else { |
5811 | /* RPCAUTH_NULL verifier */ |
5812 | nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); |
5813 | nfsm_chain_add_32(error, &nmrep, 0); |
5814 | } |
5815 | /* accepted status */ |
5816 | switch (err) { |
5817 | case EPROGUNAVAIL: |
5818 | nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); |
5819 | break; |
5820 | case EPROGMISMATCH: |
5821 | nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); |
5822 | /* XXX hard coded versions? */ |
5823 | nfsm_chain_add_32(error, &nmrep, NFS_VER2); |
5824 | nfsm_chain_add_32(error, &nmrep, NFS_VER3); |
5825 | break; |
5826 | case EPROCUNAVAIL: |
5827 | nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); |
5828 | break; |
5829 | case EBADRPC: |
5830 | nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); |
5831 | break; |
5832 | default: |
5833 | nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); |
5834 | if (nd->nd_gss_context != NULL) |
5835 | error = nfs_gss_svc_prepare_reply(nd, &nmrep); |
5836 | if (err != NFSERR_RETVOID) |
5837 | nfsm_chain_add_32(error, &nmrep, |
5838 | (err ? nfsrv_errmap(nd, err) : 0)); |
5839 | break; |
5840 | } |
5841 | } |
5842 | |
5843 | done: |
5844 | nfsm_chain_build_done(error, &nmrep); |
5845 | if (error) { |
5846 | /* error composing reply header */ |
5847 | /* XXX should we keep statistics for these errors? */ |
5848 | mbuf_freem(mrep); |
5849 | return (error); |
5850 | } |
5851 | |
5852 | *nmrepp = nmrep; |
5853 | if ((err != 0) && (err != NFSERR_RETVOID)) |
5854 | OSAddAtomic64(1, &nfsstats.srvrpc_errs); |
5855 | return (0); |
5856 | } |
5857 | |
5858 | /* |
5859 | * The nfs server send routine. |
5860 | * |
5861 | * - return EINTR or ERESTART if interrupted by a signal |
5862 | * - return EPIPE if a connection is lost for connection based sockets (TCP...) |
5863 | * - do any cleanup required by recoverable socket errors (???) |
5864 | */ |
5865 | int |
5866 | nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top) |
5867 | { |
5868 | int error; |
5869 | socket_t so = slp->ns_so; |
5870 | struct sockaddr *sendnam; |
5871 | struct msghdr msg; |
5872 | |
5873 | bzero(&msg, sizeof(msg)); |
5874 | if (nam && !sock_isconnected(so) && (slp->ns_sotype != SOCK_STREAM)) { |
5875 | if ((sendnam = mbuf_data(nam))) { |
5876 | msg.msg_name = (caddr_t)sendnam; |
5877 | msg.msg_namelen = sendnam->sa_len; |
5878 | } |
5879 | } |
5880 | error = sock_sendmbuf(so, &msg, top, 0, NULL); |
5881 | if (!error) |
5882 | return (0); |
5883 | log(LOG_INFO, "nfsd send error %d\n" , error); |
5884 | |
5885 | if ((error == EWOULDBLOCK) && (slp->ns_sotype == SOCK_STREAM)) |
5886 | error = EPIPE; /* zap TCP sockets if they time out on send */ |
5887 | |
5888 | /* Handle any recoverable (soft) socket errors here. (???) */ |
5889 | if (error != EINTR && error != ERESTART && error != EIO && |
5890 | error != EWOULDBLOCK && error != EPIPE) |
5891 | error = 0; |
5892 | |
5893 | return (error); |
5894 | } |
5895 | |
5896 | /* |
5897 | * Socket upcall routine for the nfsd sockets. |
5898 | * The caddr_t arg is a pointer to the "struct nfsrv_sock". |
5899 | * Essentially do as much as possible non-blocking, else punt and it will |
5900 | * be called with MBUF_WAITOK from an nfsd. |
5901 | */ |
5902 | void |
5903 | nfsrv_rcv(socket_t so, void *arg, int waitflag) |
5904 | { |
5905 | struct nfsrv_sock *slp = arg; |
5906 | |
5907 | if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID)) |
5908 | return; |
5909 | |
5910 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
5911 | nfsrv_rcv_locked(so, slp, waitflag); |
5912 | /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */ |
5913 | } |
5914 | void |
5915 | nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag) |
5916 | { |
5917 | mbuf_t m, mp, mhck, m2; |
5918 | int ns_flag=0, error; |
5919 | struct msghdr msg; |
5920 | size_t bytes_read; |
5921 | |
5922 | if ((slp->ns_flag & SLP_VALID) == 0) { |
5923 | if (waitflag == MBUF_DONTWAIT) |
5924 | lck_rw_done(&slp->ns_rwlock); |
5925 | return; |
5926 | } |
5927 | |
5928 | #ifdef notdef |
5929 | /* |
5930 | * Define this to test for nfsds handling this under heavy load. |
5931 | */ |
5932 | if (waitflag == MBUF_DONTWAIT) { |
5933 | ns_flag = SLP_NEEDQ; |
5934 | goto dorecs; |
5935 | } |
5936 | #endif |
5937 | if (slp->ns_sotype == SOCK_STREAM) { |
5938 | /* |
5939 | * If there are already records on the queue, defer soreceive() |
5940 | * to an(other) nfsd so that there is feedback to the TCP layer that |
5941 | * the nfs servers are heavily loaded. |
5942 | */ |
5943 | if (slp->ns_rec) { |
5944 | ns_flag = SLP_NEEDQ; |
5945 | goto dorecs; |
5946 | } |
5947 | |
5948 | /* |
5949 | * Do soreceive(). |
5950 | */ |
5951 | bytes_read = 1000000000; |
5952 | error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read); |
5953 | if (error || mp == NULL) { |
5954 | if (error == EWOULDBLOCK) |
5955 | ns_flag = (waitflag == MBUF_DONTWAIT) ? SLP_NEEDQ : 0; |
5956 | else |
5957 | ns_flag = SLP_DISCONN; |
5958 | goto dorecs; |
5959 | } |
5960 | m = mp; |
5961 | if (slp->ns_rawend) { |
5962 | if ((error = mbuf_setnext(slp->ns_rawend, m))) |
5963 | panic("nfsrv_rcv: mbuf_setnext failed %d\n" , error); |
5964 | slp->ns_cc += bytes_read; |
5965 | } else { |
5966 | slp->ns_raw = m; |
5967 | slp->ns_cc = bytes_read; |
5968 | } |
5969 | while ((m2 = mbuf_next(m))) |
5970 | m = m2; |
5971 | slp->ns_rawend = m; |
5972 | |
5973 | /* |
5974 | * Now try and parse record(s) out of the raw stream data. |
5975 | */ |
5976 | error = nfsrv_getstream(slp, waitflag); |
5977 | if (error) { |
5978 | if (error == EPERM) |
5979 | ns_flag = SLP_DISCONN; |
5980 | else |
5981 | ns_flag = SLP_NEEDQ; |
5982 | } |
5983 | } else { |
5984 | struct sockaddr_storage nam; |
5985 | |
5986 | if (slp->ns_reccnt >= nfsrv_sock_max_rec_queue_length) { |
5987 | /* already have max # RPC records queued on this socket */ |
5988 | ns_flag = SLP_NEEDQ; |
5989 | goto dorecs; |
5990 | } |
5991 | |
5992 | bzero(&msg, sizeof(msg)); |
5993 | msg.msg_name = (caddr_t)&nam; |
5994 | msg.msg_namelen = sizeof(nam); |
5995 | |
5996 | do { |
5997 | bytes_read = 1000000000; |
5998 | error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read); |
5999 | if (mp) { |
6000 | if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) { |
6001 | mbuf_setlen(mhck, nam.ss_len); |
6002 | bcopy(&nam, mbuf_data(mhck), nam.ss_len); |
6003 | m = mhck; |
6004 | if (mbuf_setnext(m, mp)) { |
6005 | /* trouble... just drop it */ |
6006 | printf("nfsrv_rcv: mbuf_setnext failed\n" ); |
6007 | mbuf_free(mhck); |
6008 | m = mp; |
6009 | } |
6010 | } else { |
6011 | m = mp; |
6012 | } |
6013 | if (slp->ns_recend) |
6014 | mbuf_setnextpkt(slp->ns_recend, m); |
6015 | else { |
6016 | slp->ns_rec = m; |
6017 | slp->ns_flag |= SLP_DOREC; |
6018 | } |
6019 | slp->ns_recend = m; |
6020 | mbuf_setnextpkt(m, NULL); |
6021 | slp->ns_reccnt++; |
6022 | } |
6023 | } while (mp); |
6024 | } |
6025 | |
6026 | /* |
6027 | * Now try and process the request records, non-blocking. |
6028 | */ |
6029 | dorecs: |
6030 | if (ns_flag) |
6031 | slp->ns_flag |= ns_flag; |
6032 | if (waitflag == MBUF_DONTWAIT) { |
6033 | int wake = (slp->ns_flag & SLP_WORKTODO); |
6034 | lck_rw_done(&slp->ns_rwlock); |
6035 | if (wake && nfsd_thread_count) { |
6036 | lck_mtx_lock(nfsd_mutex); |
6037 | nfsrv_wakenfsd(slp); |
6038 | lck_mtx_unlock(nfsd_mutex); |
6039 | } |
6040 | } |
6041 | } |
6042 | |
6043 | /* |
6044 | * Try and extract an RPC request from the mbuf data list received on a |
6045 | * stream socket. The "waitflag" argument indicates whether or not it |
6046 | * can sleep. |
6047 | */ |
6048 | int |
6049 | nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) |
6050 | { |
6051 | mbuf_t m; |
6052 | char *cp1, *cp2, *mdata; |
6053 | int len, mlen, error; |
6054 | mbuf_t om, m2, recm; |
6055 | u_int32_t recmark; |
6056 | |
6057 | if (slp->ns_flag & SLP_GETSTREAM) |
6058 | panic("nfs getstream" ); |
6059 | slp->ns_flag |= SLP_GETSTREAM; |
6060 | for (;;) { |
6061 | if (slp->ns_reclen == 0) { |
6062 | if (slp->ns_cc < NFSX_UNSIGNED) { |
6063 | slp->ns_flag &= ~SLP_GETSTREAM; |
6064 | return (0); |
6065 | } |
6066 | m = slp->ns_raw; |
6067 | mdata = mbuf_data(m); |
6068 | mlen = mbuf_len(m); |
6069 | if (mlen >= NFSX_UNSIGNED) { |
6070 | bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED); |
6071 | mdata += NFSX_UNSIGNED; |
6072 | mlen -= NFSX_UNSIGNED; |
6073 | mbuf_setdata(m, mdata, mlen); |
6074 | } else { |
6075 | cp1 = (caddr_t)&recmark; |
6076 | cp2 = mdata; |
6077 | while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { |
6078 | while (mlen == 0) { |
6079 | m = mbuf_next(m); |
6080 | cp2 = mbuf_data(m); |
6081 | mlen = mbuf_len(m); |
6082 | } |
6083 | *cp1++ = *cp2++; |
6084 | mlen--; |
6085 | mbuf_setdata(m, cp2, mlen); |
6086 | } |
6087 | } |
6088 | slp->ns_cc -= NFSX_UNSIGNED; |
6089 | recmark = ntohl(recmark); |
6090 | slp->ns_reclen = recmark & ~0x80000000; |
6091 | if (recmark & 0x80000000) |
6092 | slp->ns_flag |= SLP_LASTFRAG; |
6093 | else |
6094 | slp->ns_flag &= ~SLP_LASTFRAG; |
6095 | if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) { |
6096 | slp->ns_flag &= ~SLP_GETSTREAM; |
6097 | return (EPERM); |
6098 | } |
6099 | } |
6100 | |
6101 | /* |
6102 | * Now get the record part. |
6103 | * |
6104 | * Note that slp->ns_reclen may be 0. Linux sometimes |
6105 | * generates 0-length RPCs |
6106 | */ |
6107 | recm = NULL; |
6108 | if (slp->ns_cc == slp->ns_reclen) { |
6109 | recm = slp->ns_raw; |
6110 | slp->ns_raw = slp->ns_rawend = NULL; |
6111 | slp->ns_cc = slp->ns_reclen = 0; |
6112 | } else if (slp->ns_cc > slp->ns_reclen) { |
6113 | len = 0; |
6114 | m = slp->ns_raw; |
6115 | mlen = mbuf_len(m); |
6116 | mdata = mbuf_data(m); |
6117 | om = NULL; |
6118 | while (len < slp->ns_reclen) { |
6119 | if ((len + mlen) > slp->ns_reclen) { |
6120 | if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) { |
6121 | slp->ns_flag &= ~SLP_GETSTREAM; |
6122 | return (EWOULDBLOCK); |
6123 | } |
6124 | if (om) { |
6125 | if (mbuf_setnext(om, m2)) { |
6126 | /* trouble... just drop it */ |
6127 | printf("nfsrv_getstream: mbuf_setnext failed\n" ); |
6128 | mbuf_freem(m2); |
6129 | slp->ns_flag &= ~SLP_GETSTREAM; |
6130 | return (EWOULDBLOCK); |
6131 | } |
6132 | recm = slp->ns_raw; |
6133 | } else { |
6134 | recm = m2; |
6135 | } |
6136 | mdata += slp->ns_reclen - len; |
6137 | mlen -= slp->ns_reclen - len; |
6138 | mbuf_setdata(m, mdata, mlen); |
6139 | len = slp->ns_reclen; |
6140 | } else if ((len + mlen) == slp->ns_reclen) { |
6141 | om = m; |
6142 | len += mlen; |
6143 | m = mbuf_next(m); |
6144 | recm = slp->ns_raw; |
6145 | if (mbuf_setnext(om, NULL)) { |
6146 | printf("nfsrv_getstream: mbuf_setnext failed 2\n" ); |
6147 | slp->ns_flag &= ~SLP_GETSTREAM; |
6148 | return (EWOULDBLOCK); |
6149 | } |
6150 | mlen = mbuf_len(m); |
6151 | mdata = mbuf_data(m); |
6152 | } else { |
6153 | om = m; |
6154 | len += mlen; |
6155 | m = mbuf_next(m); |
6156 | mlen = mbuf_len(m); |
6157 | mdata = mbuf_data(m); |
6158 | } |
6159 | } |
6160 | slp->ns_raw = m; |
6161 | slp->ns_cc -= len; |
6162 | slp->ns_reclen = 0; |
6163 | } else { |
6164 | slp->ns_flag &= ~SLP_GETSTREAM; |
6165 | return (0); |
6166 | } |
6167 | |
6168 | /* |
6169 | * Accumulate the fragments into a record. |
6170 | */ |
6171 | if (slp->ns_frag == NULL) { |
6172 | slp->ns_frag = recm; |
6173 | } else { |
6174 | m = slp->ns_frag; |
6175 | while ((m2 = mbuf_next(m))) |
6176 | m = m2; |
6177 | if ((error = mbuf_setnext(m, recm))) |
6178 | panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n" , error); |
6179 | } |
6180 | if (slp->ns_flag & SLP_LASTFRAG) { |
6181 | if (slp->ns_recend) |
6182 | mbuf_setnextpkt(slp->ns_recend, slp->ns_frag); |
6183 | else { |
6184 | slp->ns_rec = slp->ns_frag; |
6185 | slp->ns_flag |= SLP_DOREC; |
6186 | } |
6187 | slp->ns_recend = slp->ns_frag; |
6188 | slp->ns_frag = NULL; |
6189 | } |
6190 | } |
6191 | } |
6192 | |
6193 | /* |
6194 | * Parse an RPC header. |
6195 | */ |
6196 | int |
6197 | nfsrv_dorec( |
6198 | struct nfsrv_sock *slp, |
6199 | struct nfsd *nfsd, |
6200 | struct nfsrv_descript **ndp) |
6201 | { |
6202 | mbuf_t m; |
6203 | mbuf_t nam; |
6204 | struct nfsrv_descript *nd; |
6205 | int error = 0; |
6206 | |
6207 | *ndp = NULL; |
6208 | if (!(slp->ns_flag & (SLP_VALID|SLP_DOREC)) || (slp->ns_rec == NULL)) |
6209 | return (ENOBUFS); |
6210 | MALLOC_ZONE(nd, struct nfsrv_descript *, |
6211 | sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); |
6212 | if (!nd) |
6213 | return (ENOMEM); |
6214 | m = slp->ns_rec; |
6215 | slp->ns_rec = mbuf_nextpkt(m); |
6216 | if (slp->ns_rec) |
6217 | mbuf_setnextpkt(m, NULL); |
6218 | else { |
6219 | slp->ns_flag &= ~SLP_DOREC; |
6220 | slp->ns_recend = NULL; |
6221 | } |
6222 | slp->ns_reccnt--; |
6223 | if (mbuf_type(m) == MBUF_TYPE_SONAME) { |
6224 | nam = m; |
6225 | m = mbuf_next(m); |
6226 | if ((error = mbuf_setnext(nam, NULL))) |
6227 | panic("nfsrv_dorec: mbuf_setnext failed %d\n" , error); |
6228 | } else |
6229 | nam = NULL; |
6230 | nd->nd_nam2 = nam; |
6231 | nfsm_chain_dissect_init(error, &nd->nd_nmreq, m); |
6232 | if (!error) |
6233 | error = nfsrv_getreq(nd); |
6234 | if (error) { |
6235 | if (nam) |
6236 | mbuf_freem(nam); |
6237 | if (nd->nd_gss_context) |
6238 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); |
6239 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
6240 | return (error); |
6241 | } |
6242 | nd->nd_mrep = NULL; |
6243 | *ndp = nd; |
6244 | nfsd->nfsd_nd = nd; |
6245 | return (0); |
6246 | } |
6247 | |
6248 | /* |
6249 | * Parse an RPC request |
6250 | * - verify it |
6251 | * - fill in the cred struct. |
6252 | */ |
6253 | int |
6254 | nfsrv_getreq(struct nfsrv_descript *nd) |
6255 | { |
6256 | struct nfsm_chain *nmreq; |
6257 | int len, i; |
6258 | u_int32_t nfsvers, auth_type; |
6259 | int error = 0; |
6260 | uid_t user_id; |
6261 | gid_t group_id; |
6262 | int ngroups; |
6263 | uint32_t val; |
6264 | |
6265 | nd->nd_cr = NULL; |
6266 | nd->nd_gss_context = NULL; |
6267 | nd->nd_gss_seqnum = 0; |
6268 | nd->nd_gss_mb = NULL; |
6269 | |
6270 | user_id = group_id = -2; |
6271 | val = auth_type = len = 0; |
6272 | |
6273 | nmreq = &nd->nd_nmreq; |
6274 | nfsm_chain_get_32(error, nmreq, nd->nd_retxid); // XID |
6275 | nfsm_chain_get_32(error, nmreq, val); // RPC Call |
6276 | if (!error && (val != RPC_CALL)) |
6277 | error = EBADRPC; |
6278 | nfsmout_if(error); |
6279 | nd->nd_repstat = 0; |
6280 | nfsm_chain_get_32(error, nmreq, val); // RPC Version |
6281 | nfsmout_if(error); |
6282 | if (val != RPC_VER2) { |
6283 | nd->nd_repstat = ERPCMISMATCH; |
6284 | nd->nd_procnum = NFSPROC_NOOP; |
6285 | return (0); |
6286 | } |
6287 | nfsm_chain_get_32(error, nmreq, val); // RPC Program Number |
6288 | nfsmout_if(error); |
6289 | if (val != NFS_PROG) { |
6290 | nd->nd_repstat = EPROGUNAVAIL; |
6291 | nd->nd_procnum = NFSPROC_NOOP; |
6292 | return (0); |
6293 | } |
6294 | nfsm_chain_get_32(error, nmreq, nfsvers);// NFS Version Number |
6295 | nfsmout_if(error); |
6296 | if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) { |
6297 | nd->nd_repstat = EPROGMISMATCH; |
6298 | nd->nd_procnum = NFSPROC_NOOP; |
6299 | return (0); |
6300 | } |
6301 | nd->nd_vers = nfsvers; |
6302 | nfsm_chain_get_32(error, nmreq, nd->nd_procnum);// NFS Procedure Number |
6303 | nfsmout_if(error); |
6304 | if ((nd->nd_procnum >= NFS_NPROCS) || |
6305 | ((nd->nd_vers == NFS_VER2) && (nd->nd_procnum > NFSV2PROC_STATFS))) { |
6306 | nd->nd_repstat = EPROCUNAVAIL; |
6307 | nd->nd_procnum = NFSPROC_NOOP; |
6308 | return (0); |
6309 | } |
6310 | if (nfsvers != NFS_VER3) |
6311 | nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; |
6312 | nfsm_chain_get_32(error, nmreq, auth_type); // Auth Flavor |
6313 | nfsm_chain_get_32(error, nmreq, len); // Auth Length |
6314 | if (!error && (len < 0 || len > RPCAUTH_MAXSIZ)) |
6315 | error = EBADRPC; |
6316 | nfsmout_if(error); |
6317 | |
6318 | /* Handle authentication */ |
6319 | if (auth_type == RPCAUTH_SYS) { |
6320 | struct posix_cred temp_pcred; |
6321 | if (nd->nd_procnum == NFSPROC_NULL) |
6322 | return (0); |
6323 | nd->nd_sec = RPCAUTH_SYS; |
6324 | nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // skip stamp |
6325 | nfsm_chain_get_32(error, nmreq, len); // hostname length |
6326 | if (len < 0 || len > NFS_MAXNAMLEN) |
6327 | error = EBADRPC; |
6328 | nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); // skip hostname |
6329 | nfsmout_if(error); |
6330 | |
6331 | /* create a temporary credential using the bits from the wire */ |
6332 | bzero(&temp_pcred, sizeof(temp_pcred)); |
6333 | nfsm_chain_get_32(error, nmreq, user_id); |
6334 | nfsm_chain_get_32(error, nmreq, group_id); |
6335 | temp_pcred.cr_groups[0] = group_id; |
6336 | nfsm_chain_get_32(error, nmreq, len); // extra GID count |
6337 | if ((len < 0) || (len > RPCAUTH_UNIXGIDS)) |
6338 | error = EBADRPC; |
6339 | nfsmout_if(error); |
6340 | for (i = 1; i <= len; i++) |
6341 | if (i < NGROUPS) |
6342 | nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]); |
6343 | else |
6344 | nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); |
6345 | nfsmout_if(error); |
6346 | ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); |
6347 | if (ngroups > 1) |
6348 | nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups); |
6349 | nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) |
6350 | nfsm_chain_get_32(error, nmreq, len); // verifier length |
6351 | if (len < 0 || len > RPCAUTH_MAXSIZ) |
6352 | error = EBADRPC; |
6353 | if (len > 0) |
6354 | nfsm_chain_adv(error, nmreq, nfsm_rndup(len)); |
6355 | |
6356 | /* request creation of a real credential */ |
6357 | temp_pcred.cr_uid = user_id; |
6358 | temp_pcred.cr_ngroups = ngroups; |
6359 | nd->nd_cr = posix_cred_create(&temp_pcred); |
6360 | if (nd->nd_cr == NULL) { |
6361 | nd->nd_repstat = ENOMEM; |
6362 | nd->nd_procnum = NFSPROC_NOOP; |
6363 | return (0); |
6364 | } |
6365 | } else if (auth_type == RPCSEC_GSS) { |
6366 | error = nfs_gss_svc_cred_get(nd, nmreq); |
6367 | if (error) { |
6368 | if (error == EINVAL) |
6369 | goto nfsmout; // drop the request |
6370 | nd->nd_repstat = error; |
6371 | nd->nd_procnum = NFSPROC_NOOP; |
6372 | return (0); |
6373 | } |
6374 | } else { |
6375 | if (nd->nd_procnum == NFSPROC_NULL) // assume it's AUTH_NONE |
6376 | return (0); |
6377 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); |
6378 | nd->nd_procnum = NFSPROC_NOOP; |
6379 | return (0); |
6380 | } |
6381 | return (0); |
6382 | nfsmout: |
6383 | if (IS_VALID_CRED(nd->nd_cr)) |
6384 | kauth_cred_unref(&nd->nd_cr); |
6385 | nfsm_chain_cleanup(nmreq); |
6386 | return (error); |
6387 | } |
6388 | |
6389 | /* |
6390 | * Search for a sleeping nfsd and wake it up. |
6391 | * SIDE EFFECT: If none found, make sure the socket is queued up so that one |
6392 | * of the running nfsds will go look for the work in the nfsrv_sockwait list. |
6393 | * Note: Must be called with nfsd_mutex held. |
6394 | */ |
6395 | void |
6396 | nfsrv_wakenfsd(struct nfsrv_sock *slp) |
6397 | { |
6398 | struct nfsd *nd; |
6399 | |
6400 | if ((slp->ns_flag & SLP_VALID) == 0) |
6401 | return; |
6402 | |
6403 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
6404 | /* if there's work to do on this socket, make sure it's queued up */ |
6405 | if ((slp->ns_flag & SLP_WORKTODO) && !(slp->ns_flag & SLP_QUEUED)) { |
6406 | TAILQ_INSERT_TAIL(&nfsrv_sockwait, slp, ns_svcq); |
6407 | slp->ns_flag |= SLP_WAITQ; |
6408 | } |
6409 | lck_rw_done(&slp->ns_rwlock); |
6410 | |
6411 | /* wake up a waiting nfsd, if possible */ |
6412 | nd = TAILQ_FIRST(&nfsd_queue); |
6413 | if (!nd) |
6414 | return; |
6415 | |
6416 | TAILQ_REMOVE(&nfsd_queue, nd, nfsd_queue); |
6417 | nd->nfsd_flag &= ~NFSD_WAITING; |
6418 | wakeup(nd); |
6419 | } |
6420 | |
6421 | #endif /* NFSSERVER */ |
6422 | |