1 | /* |
2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ |
29 | /* |
30 | * Copyright (c) 1989, 1993 |
31 | * The Regents of the University of California. All rights reserved. |
32 | * |
33 | * This code is derived from software contributed to Berkeley by |
34 | * Rick Macklem at The University of Guelph. |
35 | * |
36 | * Redistribution and use in source and binary forms, with or without |
37 | * modification, are permitted provided that the following conditions |
38 | * are met: |
39 | * 1. Redistributions of source code must retain the above copyright |
40 | * notice, this list of conditions and the following disclaimer. |
41 | * 2. Redistributions in binary form must reproduce the above copyright |
42 | * notice, this list of conditions and the following disclaimer in the |
43 | * documentation and/or other materials provided with the distribution. |
44 | * 3. All advertising materials mentioning features or use of this software |
45 | * must display the following acknowledgement: |
46 | * This product includes software developed by the University of |
47 | * California, Berkeley and its contributors. |
48 | * 4. Neither the name of the University nor the names of its contributors |
49 | * may be used to endorse or promote products derived from this software |
50 | * without specific prior written permission. |
51 | * |
52 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
53 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
54 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
55 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
56 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
57 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
58 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
59 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
60 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
61 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
62 | * SUCH DAMAGE. |
63 | * |
64 | * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 |
65 | * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ |
66 | */ |
67 | /* |
68 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce |
69 | * support for mandatory and extensible security protections. This notice |
70 | * is included in support of clause 2.2 (b) of the Apple Public License, |
71 | * Version 2.0. |
72 | */ |
73 | |
74 | #include <sys/param.h> |
75 | #include <sys/systm.h> |
76 | #include <sys/kernel.h> |
77 | #include <sys/file_internal.h> |
78 | #include <sys/filedesc.h> |
79 | #include <sys/stat.h> |
80 | #include <sys/vnode_internal.h> |
81 | #include <sys/mount_internal.h> |
82 | #include <sys/proc_internal.h> /* for fdflags */ |
83 | #include <sys/kauth.h> |
84 | #include <sys/sysctl.h> |
85 | #include <sys/ubc.h> |
86 | #include <sys/uio.h> |
87 | #include <sys/malloc.h> |
88 | #include <sys/kpi_mbuf.h> |
89 | #include <sys/socket.h> |
90 | #include <sys/socketvar.h> |
91 | #include <sys/domain.h> |
92 | #include <sys/protosw.h> |
93 | #include <sys/fcntl.h> |
94 | #include <sys/lockf.h> |
95 | #include <sys/syslog.h> |
96 | #include <sys/user.h> |
97 | #include <sys/sysproto.h> |
98 | #include <sys/kpi_socket.h> |
99 | #include <sys/fsevents.h> |
100 | #include <libkern/OSAtomic.h> |
101 | #include <kern/thread_call.h> |
102 | #include <kern/task.h> |
103 | |
104 | #include <security/audit/audit.h> |
105 | |
106 | #include <netinet/in.h> |
107 | #include <netinet/tcp.h> |
108 | #include <nfs/xdr_subs.h> |
109 | #include <nfs/rpcv2.h> |
110 | #include <nfs/nfsproto.h> |
111 | #include <nfs/nfs.h> |
112 | #include <nfs/nfsm_subs.h> |
113 | #include <nfs/nfsrvcache.h> |
114 | #include <nfs/nfs_gss.h> |
115 | #include <nfs/nfsmount.h> |
116 | #include <nfs/nfsnode.h> |
117 | #include <nfs/nfs_lock.h> |
118 | #if CONFIG_MACF |
119 | #include <security/mac_framework.h> |
120 | #endif |
121 | |
122 | kern_return_t thread_terminate(thread_t); /* XXX */ |
123 | |
124 | #if NFSSERVER |
125 | |
126 | extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS]; |
127 | |
128 | extern int nfsrv_wg_delay; |
129 | extern int nfsrv_wg_delay_v3; |
130 | |
131 | static int nfsrv_require_resv_port = 0; |
132 | static time_t nfsrv_idlesock_timer_on = 0; |
133 | static int nfsrv_sock_tcp_cnt = 0; |
134 | #define NFSD_MIN_IDLE_TIMEOUT 30 |
135 | static int nfsrv_sock_idle_timeout = 3600; /* One hour */ |
136 | |
137 | int nfssvc_export(user_addr_t argp); |
138 | int nfssvc_nfsd(void); |
139 | int nfssvc_addsock(socket_t, mbuf_t); |
140 | void nfsrv_zapsock(struct nfsrv_sock *); |
141 | void nfsrv_slpderef(struct nfsrv_sock *); |
142 | void nfsrv_slpfree(struct nfsrv_sock *); |
143 | |
144 | #endif /* NFSSERVER */ |
145 | |
146 | /* |
147 | * sysctl stuff |
148 | */ |
149 | SYSCTL_DECL(_vfs_generic); |
150 | SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge" ); |
151 | |
152 | #if NFSCLIENT |
153 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge" ); |
154 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "" ); |
155 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "" ); |
156 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "" ); |
157 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "" ); |
158 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "" ); |
159 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "" ); |
160 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "" ); |
161 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "" ); |
162 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "" ); |
163 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "" ); |
164 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "" ); |
165 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "" ); |
166 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "" ); |
167 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "" ); |
168 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "" ); |
169 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "" ); |
170 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "" ); |
171 | SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "" ); |
172 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "" ); |
173 | SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "" ); |
174 | SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "" ); |
175 | #endif /* NFSCLIENT */ |
176 | |
177 | #if NFSSERVER |
178 | SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge" ); |
179 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "" ); |
180 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "" ); |
181 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "" ); |
182 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "" ); |
183 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "" ); |
184 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "" ); |
185 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "" ); |
186 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "" ); |
187 | SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "" ); |
188 | #if CONFIG_FSE |
189 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "" ); |
190 | #endif |
191 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "" ); |
192 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "" ); |
193 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "" ); |
194 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "" ); |
195 | #ifdef NFS_UC_Q_DEBUG |
196 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "" ); |
197 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "" ); |
198 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "" ); |
199 | SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "" ); |
200 | #endif |
201 | #endif /* NFSSERVER */ |
202 | |
203 | |
204 | #if NFSCLIENT |
205 | |
206 | static int |
207 | mapname2id(struct nfs_testmapid *map) |
208 | { |
209 | int error; |
210 | |
211 | error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); |
212 | if (error) |
213 | return (error); |
214 | |
215 | if (map->ntm_grpflag) |
216 | error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); |
217 | else |
218 | error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); |
219 | |
220 | return (error); |
221 | } |
222 | |
223 | static int |
224 | mapid2name(struct nfs_testmapid *map) |
225 | { |
226 | int error; |
227 | size_t len = sizeof(map->ntm_name); |
228 | |
229 | if (map->ntm_grpflag) |
230 | error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); |
231 | else |
232 | error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); |
233 | |
234 | if (error) |
235 | return (error); |
236 | |
237 | error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); |
238 | |
239 | return (error); |
240 | |
241 | } |
242 | |
243 | static int |
244 | nfsclnt_testidmap(proc_t p, user_addr_t argp) |
245 | { |
246 | struct nfs_testmapid mapid; |
247 | int error, coerror; |
248 | size_t len = sizeof(mapid.ntm_name); |
249 | |
250 | /* Let root make this call. */ |
251 | error = proc_suser(p); |
252 | if (error) |
253 | return (error); |
254 | |
255 | error = copyin(argp, &mapid, sizeof(mapid)); |
256 | if (error) |
257 | return (error); |
258 | switch (mapid.ntm_lookup) { |
259 | case NTM_NAME2ID: |
260 | error = mapname2id(&mapid); |
261 | break; |
262 | case NTM_ID2NAME: |
263 | error = mapid2name(&mapid); |
264 | break; |
265 | case NTM_NAME2GUID: |
266 | error = nfs4_id2guid(mapid.ntm_name, &mapid.ntm_guid, mapid.ntm_grpflag); |
267 | break; |
268 | case NTM_GUID2NAME: |
269 | error = nfs4_guid2id(&mapid.ntm_guid, mapid.ntm_name, &len, mapid.ntm_grpflag); |
270 | break; |
271 | default: |
272 | return (EINVAL); |
273 | } |
274 | |
275 | coerror = copyout(&mapid, argp, sizeof(mapid)); |
276 | |
277 | return (error ? error : coerror); |
278 | } |
279 | |
280 | int |
281 | nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) |
282 | { |
283 | struct lockd_ans la; |
284 | int error; |
285 | |
286 | switch (uap->flag) { |
287 | case NFSCLNT_LOCKDANS: |
288 | error = copyin(uap->argp, &la, sizeof(la)); |
289 | if (!error) |
290 | error = nfslockdans(p, &la); |
291 | break; |
292 | case NFSCLNT_LOCKDNOTIFY: |
293 | error = nfslockdnotify(p, uap->argp); |
294 | break; |
295 | case NFSCLNT_TESTIDMAP: |
296 | error = nfsclnt_testidmap(p, uap->argp); |
297 | break; |
298 | default: |
299 | error = EINVAL; |
300 | } |
301 | return (error); |
302 | } |
303 | |
304 | |
305 | /* |
306 | * Asynchronous I/O threads for client NFS. |
307 | * They do read-ahead and write-behind operations on the block I/O cache. |
308 | * |
309 | * The pool of up to nfsiod_thread_max threads is launched on demand and exit |
310 | * when unused for a while. There are as many nfsiod structs as there are |
311 | * nfsiod threads; however there's no strict tie between a thread and a struct. |
312 | * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes |
313 | * up, it removes the next struct nfsiod from the queue and services it. Then |
314 | * it will put the struct at the head of free list and sleep on it. |
315 | * Async requests will pull the next struct nfsiod from the head of the free list, |
316 | * put it on the work queue, and wake whatever thread is waiting on that struct. |
317 | */ |
318 | |
319 | /* |
320 | * nfsiod thread exit routine |
321 | * |
322 | * Must be called with nfsiod_mutex held so that the |
323 | * decision to terminate is atomic with the termination. |
324 | */ |
325 | void |
326 | nfsiod_terminate(struct nfsiod *niod) |
327 | { |
328 | nfsiod_thread_count--; |
329 | lck_mtx_unlock(nfsiod_mutex); |
330 | if (niod) |
331 | FREE(niod, M_TEMP); |
332 | else |
333 | printf("nfsiod: terminating without niod\n" ); |
334 | thread_terminate(current_thread()); |
335 | /*NOTREACHED*/ |
336 | } |
337 | |
338 | /* nfsiod thread startup routine */ |
339 | void |
340 | nfsiod_thread(void) |
341 | { |
342 | struct nfsiod *niod; |
343 | int error; |
344 | |
345 | MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); |
346 | if (!niod) { |
347 | lck_mtx_lock(nfsiod_mutex); |
348 | nfsiod_thread_count--; |
349 | wakeup(current_thread()); |
350 | lck_mtx_unlock(nfsiod_mutex); |
351 | thread_terminate(current_thread()); |
352 | /*NOTREACHED*/ |
353 | } |
354 | bzero(niod, sizeof(*niod)); |
355 | lck_mtx_lock(nfsiod_mutex); |
356 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); |
357 | wakeup(current_thread()); |
358 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod" , NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); |
359 | /* shouldn't return... so we have an error */ |
360 | /* remove an old nfsiod struct and terminate */ |
361 | lck_mtx_lock(nfsiod_mutex); |
362 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) |
363 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); |
364 | nfsiod_terminate(niod); |
365 | /*NOTREACHED*/ |
366 | } |
367 | |
368 | /* |
369 | * Start up another nfsiod thread. |
370 | * (unless we're already maxed out and there are nfsiods running) |
371 | */ |
372 | int |
373 | nfsiod_start(void) |
374 | { |
375 | thread_t thd = THREAD_NULL; |
376 | |
377 | lck_mtx_lock(nfsiod_mutex); |
378 | if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { |
379 | lck_mtx_unlock(nfsiod_mutex); |
380 | return (EBUSY); |
381 | } |
382 | nfsiod_thread_count++; |
383 | if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { |
384 | lck_mtx_unlock(nfsiod_mutex); |
385 | return (EBUSY); |
386 | } |
387 | /* wait for the thread to complete startup */ |
388 | msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw" , NULL); |
389 | thread_deallocate(thd); |
390 | return (0); |
391 | } |
392 | |
393 | /* |
394 | * Continuation for Asynchronous I/O threads for NFS client. |
395 | * |
396 | * Grab an nfsiod struct to work on, do some work, then drop it |
397 | */ |
398 | int |
399 | nfsiod_continue(int error) |
400 | { |
401 | struct nfsiod *niod; |
402 | struct nfsmount *nmp; |
403 | struct nfsreq *req, *treq; |
404 | struct nfs_reqqhead iodq; |
405 | int morework; |
406 | |
407 | lck_mtx_lock(nfsiod_mutex); |
408 | niod = TAILQ_FIRST(&nfsiodwork); |
409 | if (!niod) { |
410 | /* there's no work queued up */ |
411 | /* remove an old nfsiod struct and terminate */ |
412 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) |
413 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); |
414 | nfsiod_terminate(niod); |
415 | /*NOTREACHED*/ |
416 | } |
417 | TAILQ_REMOVE(&nfsiodwork, niod, niod_link); |
418 | |
419 | worktodo: |
420 | while ((nmp = niod->niod_nmp)) { |
421 | if (nmp == NULL){ |
422 | niod->niod_nmp = NULL; |
423 | break; |
424 | } |
425 | |
426 | /* |
427 | * Service this mount's async I/O queue. |
428 | * |
429 | * In order to ensure some level of fairness between mounts, |
430 | * we grab all the work up front before processing it so any |
431 | * new work that arrives will be serviced on a subsequent |
432 | * iteration - and we have a chance to see if other work needs |
433 | * to be done (e.g. the delayed write queue needs to be pushed |
434 | * or other mounts are waiting for an nfsiod). |
435 | */ |
436 | /* grab the current contents of the queue */ |
437 | TAILQ_INIT(&iodq); |
438 | TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); |
439 | /* Mark each iod request as being managed by an iod */ |
440 | TAILQ_FOREACH(req, &iodq, r_achain) { |
441 | lck_mtx_lock(&req->r_mtx); |
442 | assert(!(req->r_flags & R_IOD)); |
443 | req->r_flags |= R_IOD; |
444 | lck_mtx_unlock(&req->r_mtx); |
445 | } |
446 | lck_mtx_unlock(nfsiod_mutex); |
447 | |
448 | /* process the queue */ |
449 | TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { |
450 | TAILQ_REMOVE(&iodq, req, r_achain); |
451 | req->r_achain.tqe_next = NFSREQNOLIST; |
452 | req->r_callback.rcb_func(req); |
453 | } |
454 | |
455 | /* now check if there's more/other work to be done */ |
456 | lck_mtx_lock(nfsiod_mutex); |
457 | morework = !TAILQ_EMPTY(&nmp->nm_iodq); |
458 | if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { |
459 | /* |
460 | * we're going to stop working on this mount but if the |
461 | * mount still needs more work so queue it up |
462 | */ |
463 | if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) |
464 | TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); |
465 | nmp->nm_niod = NULL; |
466 | niod->niod_nmp = NULL; |
467 | } |
468 | } |
469 | |
470 | /* loop if there's still a mount to work on */ |
471 | if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { |
472 | niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); |
473 | TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); |
474 | niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; |
475 | } |
476 | if (niod->niod_nmp) |
477 | goto worktodo; |
478 | |
479 | /* queue ourselves back up - if there aren't too many threads running */ |
480 | if (nfsiod_thread_count <= NFSIOD_MAX) { |
481 | TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); |
482 | error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod" , NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); |
483 | /* shouldn't return... so we have an error */ |
484 | /* remove an old nfsiod struct and terminate */ |
485 | lck_mtx_lock(nfsiod_mutex); |
486 | if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) |
487 | TAILQ_REMOVE(&nfsiodfree, niod, niod_link); |
488 | } |
489 | nfsiod_terminate(niod); |
490 | /*NOTREACHED*/ |
491 | return (0); |
492 | } |
493 | |
494 | #endif /* NFSCLIENT */ |
495 | |
496 | |
497 | #if NFSSERVER |
498 | |
499 | /* |
500 | * NFS server system calls |
501 | * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c |
502 | */ |
503 | |
504 | /* |
505 | * Get file handle system call |
506 | */ |
507 | int |
508 | getfh(proc_t p, struct getfh_args *uap, __unused int *retval) |
509 | { |
510 | vnode_t vp; |
511 | struct nfs_filehandle nfh; |
512 | int error, fhlen, fidlen; |
513 | struct nameidata nd; |
514 | char path[MAXPATHLEN], *ptr; |
515 | size_t pathlen; |
516 | struct nfs_exportfs *nxfs; |
517 | struct nfs_export *nx; |
518 | |
519 | /* |
520 | * Must be super user |
521 | */ |
522 | error = proc_suser(p); |
523 | if (error) |
524 | return (error); |
525 | |
526 | error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); |
527 | if (!error) |
528 | error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); |
529 | if (error) |
530 | return (error); |
531 | /* limit fh size to length specified (or v3 size by default) */ |
532 | if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) |
533 | fhlen = NFSV3_MAX_FH_SIZE; |
534 | fidlen = fhlen - sizeof(struct nfs_exphandle); |
535 | |
536 | if (!nfsrv_is_initialized()) |
537 | return (EINVAL); |
538 | |
539 | NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, |
540 | UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); |
541 | error = namei(&nd); |
542 | if (error) |
543 | return (error); |
544 | nameidone(&nd); |
545 | |
546 | vp = nd.ni_vp; |
547 | |
548 | // find exportfs that matches f_mntonname |
549 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
550 | ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; |
551 | LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { |
552 | if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) |
553 | break; |
554 | } |
555 | if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { |
556 | error = EINVAL; |
557 | goto out; |
558 | } |
559 | // find export that best matches remainder of path |
560 | ptr = path + strlen(nxfs->nxfs_path); |
561 | while (*ptr && (*ptr == '/')) |
562 | ptr++; |
563 | LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { |
564 | int len = strlen(nx->nx_path); |
565 | if (len == 0) // we've hit the export entry for the root directory |
566 | break; |
567 | if (!strncmp(nx->nx_path, ptr, len)) |
568 | break; |
569 | } |
570 | if (!nx) { |
571 | error = EINVAL; |
572 | goto out; |
573 | } |
574 | |
575 | bzero(&nfh, sizeof(nfh)); |
576 | nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); |
577 | nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); |
578 | nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); |
579 | nfh.nfh_xh.nxh_flags = 0; |
580 | nfh.nfh_xh.nxh_reserved = 0; |
581 | nfh.nfh_len = fidlen; |
582 | error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); |
583 | if (nfh.nfh_len > (uint32_t)fidlen) |
584 | error = EOVERFLOW; |
585 | nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; |
586 | nfh.nfh_len += sizeof(nfh.nfh_xh); |
587 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
588 | |
589 | out: |
590 | lck_rw_done(&nfsrv_export_rwlock); |
591 | vnode_put(vp); |
592 | if (error) |
593 | return (error); |
594 | /* |
595 | * At first blush, this may appear to leak a kernel stack |
596 | * address, but the copyout() never reaches &nfh.nfh_fhp |
597 | * (sizeof(fhandle_t) < sizeof(nfh)). |
598 | */ |
599 | error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); |
600 | return (error); |
601 | } |
602 | |
603 | extern const struct fileops vnops; |
604 | |
605 | /* |
606 | * syscall for the rpc.lockd to use to translate a NFS file handle into |
607 | * an open descriptor. |
608 | * |
609 | * warning: do not remove the suser() call or this becomes one giant |
610 | * security hole. |
611 | */ |
612 | int |
613 | fhopen( proc_t p, |
614 | struct fhopen_args *uap, |
615 | int32_t *retval) |
616 | { |
617 | vnode_t vp; |
618 | struct nfs_filehandle nfh; |
619 | struct nfs_export *nx; |
620 | struct nfs_export_options *nxo; |
621 | struct flock lf; |
622 | struct fileproc *fp, *nfp; |
623 | int fmode, error, type; |
624 | int indx; |
625 | vfs_context_t ctx = vfs_context_current(); |
626 | kauth_action_t action; |
627 | |
628 | /* |
629 | * Must be super user |
630 | */ |
631 | error = suser(vfs_context_ucred(ctx), 0); |
632 | if (error) { |
633 | return (error); |
634 | } |
635 | |
636 | if (!nfsrv_is_initialized()) { |
637 | return (EINVAL); |
638 | } |
639 | |
640 | fmode = FFLAGS(uap->flags); |
641 | /* why not allow a non-read/write open for our lockd? */ |
642 | if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) |
643 | return (EINVAL); |
644 | |
645 | error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); |
646 | if (error) |
647 | return (error); |
648 | if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || |
649 | (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) |
650 | return (EINVAL); |
651 | error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); |
652 | if (error) |
653 | return (error); |
654 | nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; |
655 | |
656 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
657 | /* now give me my vnode, it gets returned to me with a reference */ |
658 | error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); |
659 | lck_rw_done(&nfsrv_export_rwlock); |
660 | if (error) { |
661 | if (error == NFSERR_TRYLATER) |
662 | error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? |
663 | return (error); |
664 | } |
665 | |
666 | /* |
667 | * From now on we have to make sure not |
668 | * to forget about the vnode. |
669 | * Any error that causes an abort must vnode_put(vp). |
670 | * Just set error = err and 'goto bad;'. |
671 | */ |
672 | |
673 | /* |
674 | * from vn_open |
675 | */ |
676 | if (vnode_vtype(vp) == VSOCK) { |
677 | error = EOPNOTSUPP; |
678 | goto bad; |
679 | } |
680 | |
681 | /* disallow write operations on directories */ |
682 | if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { |
683 | error = EISDIR; |
684 | goto bad; |
685 | } |
686 | |
687 | #if CONFIG_MACF |
688 | if ((error = mac_vnode_check_open(ctx, vp, fmode))) |
689 | goto bad; |
690 | #endif |
691 | |
692 | /* compute action to be authorized */ |
693 | action = 0; |
694 | if (fmode & FREAD) |
695 | action |= KAUTH_VNODE_READ_DATA; |
696 | if (fmode & (FWRITE | O_TRUNC)) |
697 | action |= KAUTH_VNODE_WRITE_DATA; |
698 | if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) |
699 | goto bad; |
700 | |
701 | if ((error = VNOP_OPEN(vp, fmode, ctx))) |
702 | goto bad; |
703 | if ((error = vnode_ref_ext(vp, fmode, 0))) |
704 | goto bad; |
705 | |
706 | /* |
707 | * end of vn_open code |
708 | */ |
709 | |
710 | // starting here... error paths should call vn_close/vnode_put |
711 | if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { |
712 | vn_close(vp, fmode & FMASK, ctx); |
713 | goto bad; |
714 | } |
715 | fp = nfp; |
716 | |
717 | fp->f_fglob->fg_flag = fmode & FMASK; |
718 | fp->f_fglob->fg_ops = &vnops; |
719 | fp->f_fglob->fg_data = (caddr_t)vp; |
720 | |
721 | // XXX do we really need to support this with fhopen()? |
722 | if (fmode & (O_EXLOCK | O_SHLOCK)) { |
723 | lf.l_whence = SEEK_SET; |
724 | lf.l_start = 0; |
725 | lf.l_len = 0; |
726 | if (fmode & O_EXLOCK) |
727 | lf.l_type = F_WRLCK; |
728 | else |
729 | lf.l_type = F_RDLCK; |
730 | type = F_FLOCK; |
731 | if ((fmode & FNONBLOCK) == 0) |
732 | type |= F_WAIT; |
733 | if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { |
734 | struct vfs_context context = *vfs_context_current(); |
735 | /* Modify local copy (to not damage thread copy) */ |
736 | context.vc_ucred = fp->f_fglob->fg_cred; |
737 | |
738 | vn_close(vp, fp->f_fglob->fg_flag, &context); |
739 | fp_free(p, indx, fp); |
740 | return (error); |
741 | } |
742 | fp->f_fglob->fg_flag |= FHASLOCK; |
743 | } |
744 | |
745 | vnode_put(vp); |
746 | |
747 | proc_fdlock(p); |
748 | procfdtbl_releasefd(p, indx, NULL); |
749 | fp_drop(p, indx, fp, 1); |
750 | proc_fdunlock(p); |
751 | |
752 | *retval = indx; |
753 | return (0); |
754 | |
755 | bad: |
756 | vnode_put(vp); |
757 | return (error); |
758 | } |
759 | |
760 | /* |
761 | * NFS server pseudo system call |
762 | */ |
763 | int |
764 | nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) |
765 | { |
766 | mbuf_t nam; |
767 | struct user_nfsd_args user_nfsdarg; |
768 | socket_t so; |
769 | int error; |
770 | |
771 | AUDIT_ARG(cmd, uap->flag); |
772 | |
773 | /* |
774 | * Must be super user for most operations (export ops checked later). |
775 | */ |
776 | if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) |
777 | return (error); |
778 | #if CONFIG_MACF |
779 | error = mac_system_check_nfsd(kauth_cred_get()); |
780 | if (error) |
781 | return (error); |
782 | #endif |
783 | |
784 | /* make sure NFS server data structures have been initialized */ |
785 | nfsrv_init(); |
786 | |
787 | if (uap->flag & NFSSVC_ADDSOCK) { |
788 | if (IS_64BIT_PROCESS(p)) { |
789 | error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); |
790 | } else { |
791 | struct nfsd_args tmp_args; |
792 | error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); |
793 | if (error == 0) { |
794 | user_nfsdarg.sock = tmp_args.sock; |
795 | user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); |
796 | user_nfsdarg.namelen = tmp_args.namelen; |
797 | } |
798 | } |
799 | if (error) |
800 | return (error); |
801 | /* get the socket */ |
802 | error = file_socket(user_nfsdarg.sock, &so); |
803 | if (error) |
804 | return (error); |
805 | /* Get the client address for connected sockets. */ |
806 | if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { |
807 | nam = NULL; |
808 | } else { |
809 | error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); |
810 | if (error) { |
811 | /* drop the iocount file_socket() grabbed on the file descriptor */ |
812 | file_drop(user_nfsdarg.sock); |
813 | return (error); |
814 | } |
815 | } |
816 | /* |
817 | * nfssvc_addsock() will grab a retain count on the socket |
818 | * to keep the socket from being closed when nfsd closes its |
819 | * file descriptor for it. |
820 | */ |
821 | error = nfssvc_addsock(so, nam); |
822 | /* drop the iocount file_socket() grabbed on the file descriptor */ |
823 | file_drop(user_nfsdarg.sock); |
824 | } else if (uap->flag & NFSSVC_NFSD) { |
825 | error = nfssvc_nfsd(); |
826 | } else if (uap->flag & NFSSVC_EXPORT) { |
827 | error = nfssvc_export(uap->argp); |
828 | } else { |
829 | error = EINVAL; |
830 | } |
831 | if (error == EINTR || error == ERESTART) |
832 | error = 0; |
833 | return (error); |
834 | } |
835 | |
836 | /* |
837 | * Adds a socket to the list for servicing by nfsds. |
838 | */ |
839 | int |
840 | nfssvc_addsock(socket_t so, mbuf_t mynam) |
841 | { |
842 | struct nfsrv_sock *slp; |
843 | int error = 0, sodomain, sotype, soprotocol, on = 1; |
844 | int first; |
845 | struct timeval timeo; |
846 | |
847 | /* make sure mbuf constants are set up */ |
848 | if (!nfs_mbuf_mhlen) |
849 | nfs_mbuf_init(); |
850 | |
851 | sock_gettype(so, &sodomain, &sotype, &soprotocol); |
852 | |
853 | /* There should be only one UDP socket for each of IPv4 and IPv6 */ |
854 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { |
855 | mbuf_freem(mynam); |
856 | return (EEXIST); |
857 | } |
858 | if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { |
859 | mbuf_freem(mynam); |
860 | return (EEXIST); |
861 | } |
862 | |
863 | /* Set protocol options and reserve some space (for UDP). */ |
864 | if (sotype == SOCK_STREAM) { |
865 | error = nfsrv_check_exports_allow_address(mynam); |
866 | if (error) |
867 | return (error); |
868 | sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); |
869 | } |
870 | if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) |
871 | sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); |
872 | if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ |
873 | int reserve = NFS_UDPSOCKBUF; |
874 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); |
875 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); |
876 | if (error) { |
877 | log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n" , error); |
878 | error = 0; |
879 | } |
880 | } |
881 | sock_nointerrupt(so, 0); |
882 | |
883 | /* |
884 | * Set socket send/receive timeouts. |
885 | * Receive timeout shouldn't matter, but setting the send timeout |
886 | * will make sure that an unresponsive client can't hang the server. |
887 | */ |
888 | timeo.tv_usec = 0; |
889 | timeo.tv_sec = 1; |
890 | error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); |
891 | timeo.tv_sec = 30; |
892 | error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); |
893 | if (error) { |
894 | log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n" , error); |
895 | error = 0; |
896 | } |
897 | |
898 | MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); |
899 | if (!slp) { |
900 | mbuf_freem(mynam); |
901 | return (ENOMEM); |
902 | } |
903 | bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); |
904 | lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); |
905 | lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); |
906 | |
907 | lck_mtx_lock(nfsd_mutex); |
908 | |
909 | if (soprotocol == IPPROTO_UDP) { |
910 | if (sodomain == AF_INET) { |
911 | /* There should be only one UDP/IPv4 socket */ |
912 | if (nfsrv_udpsock) { |
913 | lck_mtx_unlock(nfsd_mutex); |
914 | nfsrv_slpfree(slp); |
915 | mbuf_freem(mynam); |
916 | return (EEXIST); |
917 | } |
918 | nfsrv_udpsock = slp; |
919 | } |
920 | if (sodomain == AF_INET6) { |
921 | /* There should be only one UDP/IPv6 socket */ |
922 | if (nfsrv_udp6sock) { |
923 | lck_mtx_unlock(nfsd_mutex); |
924 | nfsrv_slpfree(slp); |
925 | mbuf_freem(mynam); |
926 | return (EEXIST); |
927 | } |
928 | nfsrv_udp6sock = slp; |
929 | } |
930 | } |
931 | |
932 | /* add the socket to the list */ |
933 | first = TAILQ_EMPTY(&nfsrv_socklist); |
934 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); |
935 | if (soprotocol == IPPROTO_TCP) { |
936 | nfsrv_sock_tcp_cnt++; |
937 | if (nfsrv_sock_idle_timeout < 0) |
938 | nfsrv_sock_idle_timeout = 0; |
939 | if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) |
940 | nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT; |
941 | /* |
942 | * Possibly start or stop the idle timer. We only start the idle timer when |
943 | * we have more than 2 * nfsd_thread_max connections. If the idle timer is |
944 | * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or |
945 | * the number of connections. |
946 | */ |
947 | if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) { |
948 | if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { |
949 | if (nfsrv_idlesock_timer_on) { |
950 | thread_call_cancel(nfsrv_idlesock_timer_call); |
951 | nfsrv_idlesock_timer_on = 0; |
952 | } |
953 | } else { |
954 | struct nfsrv_sock *old_slp; |
955 | struct timeval now; |
956 | time_t time_to_wait = nfsrv_sock_idle_timeout; |
957 | /* |
958 | * Get the oldest tcp socket and calculate the |
959 | * earliest time for the next idle timer to fire |
960 | * based on the possibly updated nfsrv_sock_idle_timeout |
961 | */ |
962 | TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) { |
963 | if (old_slp->ns_sotype == SOCK_STREAM) { |
964 | microuptime(&now); |
965 | time_to_wait -= now.tv_sec - old_slp->ns_timestamp; |
966 | if (time_to_wait < 1) |
967 | time_to_wait = 1; |
968 | break; |
969 | } |
970 | } |
971 | /* |
972 | * If we have a timer scheduled, but if its going to fire too late, |
973 | * turn it off. |
974 | */ |
975 | if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) { |
976 | thread_call_cancel(nfsrv_idlesock_timer_call); |
977 | nfsrv_idlesock_timer_on = 0; |
978 | } |
979 | /* Schedule the idle thread if it isn't already */ |
980 | if (!nfsrv_idlesock_timer_on) { |
981 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); |
982 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; |
983 | } |
984 | } |
985 | } |
986 | } |
987 | |
988 | sock_retain(so); /* grab a retain count on the socket */ |
989 | slp->ns_so = so; |
990 | slp->ns_sotype = sotype; |
991 | slp->ns_nam = mynam; |
992 | |
993 | /* set up the socket up-call */ |
994 | nfsrv_uc_addsock(slp, first); |
995 | |
996 | /* mark that the socket is not in the nfsrv_sockwg list */ |
997 | slp->ns_wgq.tqe_next = SLPNOLIST; |
998 | |
999 | slp->ns_flag = SLP_VALID | SLP_NEEDQ; |
1000 | |
1001 | nfsrv_wakenfsd(slp); |
1002 | lck_mtx_unlock(nfsd_mutex); |
1003 | |
1004 | return (0); |
1005 | } |
1006 | |
1007 | /* |
1008 | * nfssvc_nfsd() |
1009 | * |
1010 | * nfsd theory of operation: |
1011 | * |
1012 | * The first nfsd thread stays in user mode accepting new TCP connections |
1013 | * which are then added via the "addsock" call. The rest of the nfsd threads |
1014 | * simply call into the kernel and remain there in a loop handling NFS |
1015 | * requests until killed by a signal. |
1016 | * |
1017 | * There's a list of nfsd threads (nfsd_head). |
1018 | * There's an nfsd queue that contains only those nfsds that are |
1019 | * waiting for work to do (nfsd_queue). |
1020 | * |
1021 | * There's a list of all NFS sockets (nfsrv_socklist) and two queues for |
1022 | * managing the work on the sockets: |
1023 | * nfsrv_sockwait - sockets w/new data waiting to be worked on |
1024 | * nfsrv_sockwork - sockets being worked on which may have more work to do |
1025 | * nfsrv_sockwg -- sockets which have pending write gather data |
1026 | * When a socket receives data, if it is not currently queued, it |
1027 | * will be placed at the end of the "wait" queue. |
1028 | * Whenever a socket needs servicing we make sure it is queued and |
1029 | * wake up a waiting nfsd (if there is one). |
1030 | * |
1031 | * nfsds will service at most 8 requests from the same socket before |
1032 | * defecting to work on another socket. |
1033 | * nfsds will defect immediately if there are any sockets in the "wait" queue |
1034 | * nfsds looking for a socket to work on check the "wait" queue first and |
1035 | * then check the "work" queue. |
1036 | * When an nfsd starts working on a socket, it removes it from the head of |
1037 | * the queue it's currently on and moves it to the end of the "work" queue. |
1038 | * When nfsds are checking the queues for work, any sockets found not to |
1039 | * have any work are simply dropped from the queue. |
1040 | * |
1041 | */ |
1042 | int |
1043 | nfssvc_nfsd(void) |
1044 | { |
1045 | mbuf_t m, mrep; |
1046 | struct nfsrv_sock *slp; |
1047 | struct nfsd *nfsd; |
1048 | struct nfsrv_descript *nd = NULL; |
1049 | int error = 0, cacherep, writes_todo; |
1050 | int siz, procrastinate, opcnt = 0; |
1051 | u_quad_t cur_usec; |
1052 | struct timeval now; |
1053 | struct vfs_context context; |
1054 | struct timespec to; |
1055 | |
1056 | #ifndef nolint |
1057 | cacherep = RC_DOIT; |
1058 | writes_todo = 0; |
1059 | #endif |
1060 | |
1061 | MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); |
1062 | if (!nfsd) |
1063 | return (ENOMEM); |
1064 | bzero(nfsd, sizeof(struct nfsd)); |
1065 | lck_mtx_lock(nfsd_mutex); |
1066 | if (nfsd_thread_count++ == 0) |
1067 | nfsrv_initcache(); /* Init the server request cache */ |
1068 | |
1069 | TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); |
1070 | lck_mtx_unlock(nfsd_mutex); |
1071 | |
1072 | context.vc_thread = current_thread(); |
1073 | |
1074 | /* Set time out so that nfsd threads can wake up a see if they are still needed. */ |
1075 | to.tv_sec = 5; |
1076 | to.tv_nsec = 0; |
1077 | |
1078 | /* |
1079 | * Loop getting rpc requests until SIGKILL. |
1080 | */ |
1081 | for (;;) { |
1082 | if (nfsd_thread_max <= 0) { |
1083 | /* NFS server shutting down, get out ASAP */ |
1084 | error = EINTR; |
1085 | slp = nfsd->nfsd_slp; |
1086 | } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { |
1087 | /* already have some work to do */ |
1088 | error = 0; |
1089 | slp = nfsd->nfsd_slp; |
1090 | } else { |
1091 | /* need to find work to do */ |
1092 | error = 0; |
1093 | lck_mtx_lock(nfsd_mutex); |
1094 | while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { |
1095 | if (nfsd_thread_count > nfsd_thread_max) { |
1096 | /* |
1097 | * If we have no socket and there are more |
1098 | * nfsd threads than configured, let's exit. |
1099 | */ |
1100 | error = 0; |
1101 | goto done; |
1102 | } |
1103 | nfsd->nfsd_flag |= NFSD_WAITING; |
1104 | TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); |
1105 | error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd" , &to); |
1106 | if (error) { |
1107 | if (nfsd->nfsd_flag & NFSD_WAITING) { |
1108 | TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); |
1109 | nfsd->nfsd_flag &= ~NFSD_WAITING; |
1110 | } |
1111 | if (error == EWOULDBLOCK) |
1112 | continue; |
1113 | goto done; |
1114 | } |
1115 | } |
1116 | slp = nfsd->nfsd_slp; |
1117 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { |
1118 | /* look for a socket to work on in the wait queue */ |
1119 | while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { |
1120 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1121 | /* remove from the head of the queue */ |
1122 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); |
1123 | slp->ns_flag &= ~SLP_WAITQ; |
1124 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) |
1125 | break; |
1126 | /* nothing to do, so skip this socket */ |
1127 | lck_rw_done(&slp->ns_rwlock); |
1128 | } |
1129 | } |
1130 | if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { |
1131 | /* look for a socket to work on in the work queue */ |
1132 | while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { |
1133 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1134 | /* remove from the head of the queue */ |
1135 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); |
1136 | slp->ns_flag &= ~SLP_WORKQ; |
1137 | if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) |
1138 | break; |
1139 | /* nothing to do, so skip this socket */ |
1140 | lck_rw_done(&slp->ns_rwlock); |
1141 | } |
1142 | } |
1143 | if (!nfsd->nfsd_slp && slp) { |
1144 | /* we found a socket to work on, grab a reference */ |
1145 | slp->ns_sref++; |
1146 | microuptime(&now); |
1147 | slp->ns_timestamp = now.tv_sec; |
1148 | /* We keep the socket list in least recently used order for reaping idle sockets */ |
1149 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); |
1150 | TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); |
1151 | nfsd->nfsd_slp = slp; |
1152 | opcnt = 0; |
1153 | /* and put it at the back of the work queue */ |
1154 | TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); |
1155 | slp->ns_flag |= SLP_WORKQ; |
1156 | lck_rw_done(&slp->ns_rwlock); |
1157 | } |
1158 | lck_mtx_unlock(nfsd_mutex); |
1159 | if (!slp) |
1160 | continue; |
1161 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1162 | if (slp->ns_flag & SLP_VALID) { |
1163 | if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { |
1164 | slp->ns_flag &= ~SLP_NEEDQ; |
1165 | nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); |
1166 | } |
1167 | if (slp->ns_flag & SLP_DISCONN) |
1168 | nfsrv_zapsock(slp); |
1169 | error = nfsrv_dorec(slp, nfsd, &nd); |
1170 | if (error == EINVAL) { // RPCSEC_GSS drop |
1171 | if (slp->ns_sotype == SOCK_STREAM) |
1172 | nfsrv_zapsock(slp); // drop connection |
1173 | } |
1174 | writes_todo = 0; |
1175 | if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { |
1176 | microuptime(&now); |
1177 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + |
1178 | (u_quad_t)now.tv_usec; |
1179 | if (slp->ns_wgtime <= cur_usec) { |
1180 | error = 0; |
1181 | cacherep = RC_DOIT; |
1182 | writes_todo = 1; |
1183 | } |
1184 | slp->ns_flag &= ~SLP_DOWRITES; |
1185 | } |
1186 | nfsd->nfsd_flag |= NFSD_REQINPROG; |
1187 | } |
1188 | lck_rw_done(&slp->ns_rwlock); |
1189 | } |
1190 | if (error || (slp && !(slp->ns_flag & SLP_VALID))) { |
1191 | if (nd) { |
1192 | nfsm_chain_cleanup(&nd->nd_nmreq); |
1193 | if (nd->nd_nam2) |
1194 | mbuf_freem(nd->nd_nam2); |
1195 | if (IS_VALID_CRED(nd->nd_cr)) |
1196 | kauth_cred_unref(&nd->nd_cr); |
1197 | if (nd->nd_gss_context) |
1198 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); |
1199 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1200 | nd = NULL; |
1201 | } |
1202 | nfsd->nfsd_slp = NULL; |
1203 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
1204 | if (slp) |
1205 | nfsrv_slpderef(slp); |
1206 | if (nfsd_thread_max <= 0) |
1207 | break; |
1208 | continue; |
1209 | } |
1210 | if (nd) { |
1211 | microuptime(&nd->nd_starttime); |
1212 | if (nd->nd_nam2) |
1213 | nd->nd_nam = nd->nd_nam2; |
1214 | else |
1215 | nd->nd_nam = slp->ns_nam; |
1216 | |
1217 | cacherep = nfsrv_getcache(nd, slp, &mrep); |
1218 | |
1219 | if (nfsrv_require_resv_port) { |
1220 | /* Check if source port is a reserved port */ |
1221 | in_port_t port = 0; |
1222 | struct sockaddr *saddr = mbuf_data(nd->nd_nam); |
1223 | |
1224 | if (saddr->sa_family == AF_INET) |
1225 | port = ntohs(((struct sockaddr_in*)saddr)->sin_port); |
1226 | else if (saddr->sa_family == AF_INET6) |
1227 | port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); |
1228 | if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { |
1229 | nd->nd_procnum = NFSPROC_NOOP; |
1230 | nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); |
1231 | cacherep = RC_DOIT; |
1232 | } |
1233 | } |
1234 | |
1235 | } |
1236 | |
1237 | /* |
1238 | * Loop to get all the write RPC replies that have been |
1239 | * gathered together. |
1240 | */ |
1241 | do { |
1242 | switch (cacherep) { |
1243 | case RC_DOIT: |
1244 | if (nd && (nd->nd_vers == NFS_VER3)) |
1245 | procrastinate = nfsrv_wg_delay_v3; |
1246 | else |
1247 | procrastinate = nfsrv_wg_delay; |
1248 | lck_rw_lock_shared(&nfsrv_export_rwlock); |
1249 | context.vc_ucred = NULL; |
1250 | if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) |
1251 | error = nfsrv_writegather(&nd, slp, &context, &mrep); |
1252 | else |
1253 | error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); |
1254 | lck_rw_done(&nfsrv_export_rwlock); |
1255 | if (mrep == NULL) { |
1256 | /* |
1257 | * If this is a stream socket and we are not going |
1258 | * to send a reply we better close the connection |
1259 | * so the client doesn't hang. |
1260 | */ |
1261 | if (error && slp->ns_sotype == SOCK_STREAM) { |
1262 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1263 | nfsrv_zapsock(slp); |
1264 | lck_rw_done(&slp->ns_rwlock); |
1265 | printf("NFS server: NULL reply from proc = %d error = %d\n" , |
1266 | nd->nd_procnum, error); |
1267 | } |
1268 | break; |
1269 | |
1270 | } |
1271 | if (error) { |
1272 | OSAddAtomic64(1, &nfsstats.srv_errs); |
1273 | nfsrv_updatecache(nd, FALSE, mrep); |
1274 | if (nd->nd_nam2) { |
1275 | mbuf_freem(nd->nd_nam2); |
1276 | nd->nd_nam2 = NULL; |
1277 | } |
1278 | break; |
1279 | } |
1280 | OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); |
1281 | nfsrv_updatecache(nd, TRUE, mrep); |
1282 | /* FALLTHRU */ |
1283 | |
1284 | case RC_REPLY: |
1285 | if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS |
1286 | /* |
1287 | * Need to checksum or encrypt the reply |
1288 | */ |
1289 | error = nfs_gss_svc_protect_reply(nd, mrep); |
1290 | if (error) { |
1291 | mbuf_freem(mrep); |
1292 | break; |
1293 | } |
1294 | } |
1295 | |
1296 | /* |
1297 | * Get the total size of the reply |
1298 | */ |
1299 | m = mrep; |
1300 | siz = 0; |
1301 | while (m) { |
1302 | siz += mbuf_len(m); |
1303 | m = mbuf_next(m); |
1304 | } |
1305 | if (siz <= 0 || siz > NFS_MAXPACKET) { |
1306 | printf("mbuf siz=%d\n" ,siz); |
1307 | panic("Bad nfs svc reply" ); |
1308 | } |
1309 | m = mrep; |
1310 | mbuf_pkthdr_setlen(m, siz); |
1311 | error = mbuf_pkthdr_setrcvif(m, NULL); |
1312 | if (error) |
1313 | panic("nfsd setrcvif failed: %d" , error); |
1314 | /* |
1315 | * For stream protocols, prepend a Sun RPC |
1316 | * Record Mark. |
1317 | */ |
1318 | if (slp->ns_sotype == SOCK_STREAM) { |
1319 | error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); |
1320 | if (!error) |
1321 | *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); |
1322 | } |
1323 | if (!error) { |
1324 | if (slp->ns_flag & SLP_VALID) { |
1325 | error = nfsrv_send(slp, nd->nd_nam2, m); |
1326 | } else { |
1327 | error = EPIPE; |
1328 | mbuf_freem(m); |
1329 | } |
1330 | } else { |
1331 | mbuf_freem(m); |
1332 | } |
1333 | mrep = NULL; |
1334 | if (nd->nd_nam2) { |
1335 | mbuf_freem(nd->nd_nam2); |
1336 | nd->nd_nam2 = NULL; |
1337 | } |
1338 | if (error == EPIPE) { |
1339 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1340 | nfsrv_zapsock(slp); |
1341 | lck_rw_done(&slp->ns_rwlock); |
1342 | } |
1343 | if (error == EINTR || error == ERESTART) { |
1344 | nfsm_chain_cleanup(&nd->nd_nmreq); |
1345 | if (IS_VALID_CRED(nd->nd_cr)) |
1346 | kauth_cred_unref(&nd->nd_cr); |
1347 | if (nd->nd_gss_context) |
1348 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); |
1349 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1350 | nfsrv_slpderef(slp); |
1351 | lck_mtx_lock(nfsd_mutex); |
1352 | goto done; |
1353 | } |
1354 | break; |
1355 | case RC_DROPIT: |
1356 | mbuf_freem(nd->nd_nam2); |
1357 | nd->nd_nam2 = NULL; |
1358 | break; |
1359 | }; |
1360 | opcnt++; |
1361 | if (nd) { |
1362 | nfsm_chain_cleanup(&nd->nd_nmreq); |
1363 | if (nd->nd_nam2) |
1364 | mbuf_freem(nd->nd_nam2); |
1365 | if (IS_VALID_CRED(nd->nd_cr)) |
1366 | kauth_cred_unref(&nd->nd_cr); |
1367 | if (nd->nd_gss_context) |
1368 | nfs_gss_svc_ctx_deref(nd->nd_gss_context); |
1369 | FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); |
1370 | nd = NULL; |
1371 | } |
1372 | |
1373 | /* |
1374 | * Check to see if there are outstanding writes that |
1375 | * need to be serviced. |
1376 | */ |
1377 | writes_todo = 0; |
1378 | if (slp->ns_wgtime) { |
1379 | microuptime(&now); |
1380 | cur_usec = (u_quad_t)now.tv_sec * 1000000 + |
1381 | (u_quad_t)now.tv_usec; |
1382 | if (slp->ns_wgtime <= cur_usec) { |
1383 | cacherep = RC_DOIT; |
1384 | writes_todo = 1; |
1385 | } |
1386 | } |
1387 | } while (writes_todo); |
1388 | |
1389 | nd = NULL; |
1390 | if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { |
1391 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1392 | error = nfsrv_dorec(slp, nfsd, &nd); |
1393 | if (error == EINVAL) { // RPCSEC_GSS drop |
1394 | if (slp->ns_sotype == SOCK_STREAM) |
1395 | nfsrv_zapsock(slp); // drop connection |
1396 | } |
1397 | lck_rw_done(&slp->ns_rwlock); |
1398 | } |
1399 | if (!nd) { |
1400 | /* drop our reference on the socket */ |
1401 | nfsd->nfsd_flag &= ~NFSD_REQINPROG; |
1402 | nfsd->nfsd_slp = NULL; |
1403 | nfsrv_slpderef(slp); |
1404 | } |
1405 | } |
1406 | lck_mtx_lock(nfsd_mutex); |
1407 | done: |
1408 | TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); |
1409 | FREE(nfsd, M_NFSD); |
1410 | if (--nfsd_thread_count == 0) |
1411 | nfsrv_cleanup(); |
1412 | lck_mtx_unlock(nfsd_mutex); |
1413 | return (error); |
1414 | } |
1415 | |
1416 | int |
1417 | nfssvc_export(user_addr_t argp) |
1418 | { |
1419 | int error = 0, is_64bit; |
1420 | struct user_nfs_export_args unxa; |
1421 | vfs_context_t ctx = vfs_context_current(); |
1422 | |
1423 | is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); |
1424 | |
1425 | /* copy in pointers to path and export args */ |
1426 | if (is_64bit) { |
1427 | error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); |
1428 | } else { |
1429 | struct nfs_export_args tnxa; |
1430 | error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); |
1431 | if (error == 0) { |
1432 | /* munge into LP64 version of nfs_export_args structure */ |
1433 | unxa.nxa_fsid = tnxa.nxa_fsid; |
1434 | unxa.nxa_expid = tnxa.nxa_expid; |
1435 | unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); |
1436 | unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); |
1437 | unxa.nxa_flags = tnxa.nxa_flags; |
1438 | unxa.nxa_netcount = tnxa.nxa_netcount; |
1439 | unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); |
1440 | } |
1441 | } |
1442 | if (error) |
1443 | return (error); |
1444 | |
1445 | error = nfsrv_export(&unxa, ctx); |
1446 | |
1447 | return (error); |
1448 | } |
1449 | |
1450 | /* |
1451 | * Shut down a socket associated with an nfsrv_sock structure. |
1452 | * Should be called with the send lock set, if required. |
1453 | * The trick here is to increment the sref at the start, so that the nfsds |
1454 | * will stop using it and clear ns_flag at the end so that it will not be |
1455 | * reassigned during cleanup. |
1456 | */ |
1457 | void |
1458 | nfsrv_zapsock(struct nfsrv_sock *slp) |
1459 | { |
1460 | socket_t so; |
1461 | |
1462 | if ((slp->ns_flag & SLP_VALID) == 0) |
1463 | return; |
1464 | slp->ns_flag &= ~SLP_ALLFLAGS; |
1465 | |
1466 | so = slp->ns_so; |
1467 | if (so == NULL) |
1468 | return; |
1469 | |
1470 | sock_setupcall(so, NULL, NULL); |
1471 | sock_shutdown(so, SHUT_RDWR); |
1472 | |
1473 | /* |
1474 | * Remove from the up-call queue |
1475 | */ |
1476 | nfsrv_uc_dequeue(slp); |
1477 | } |
1478 | |
1479 | /* |
1480 | * cleanup and release a server socket structure. |
1481 | */ |
1482 | void |
1483 | nfsrv_slpfree(struct nfsrv_sock *slp) |
1484 | { |
1485 | struct nfsrv_descript *nwp, *nnwp; |
1486 | |
1487 | if (slp->ns_so) { |
1488 | sock_release(slp->ns_so); |
1489 | slp->ns_so = NULL; |
1490 | } |
1491 | if (slp->ns_nam) |
1492 | mbuf_free(slp->ns_nam); |
1493 | if (slp->ns_raw) |
1494 | mbuf_freem(slp->ns_raw); |
1495 | if (slp->ns_rec) |
1496 | mbuf_freem(slp->ns_rec); |
1497 | if (slp->ns_frag) |
1498 | mbuf_freem(slp->ns_frag); |
1499 | slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; |
1500 | slp->ns_reccnt = 0; |
1501 | |
1502 | for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { |
1503 | nnwp = nwp->nd_tq.le_next; |
1504 | LIST_REMOVE(nwp, nd_tq); |
1505 | nfsm_chain_cleanup(&nwp->nd_nmreq); |
1506 | if (nwp->nd_mrep) |
1507 | mbuf_freem(nwp->nd_mrep); |
1508 | if (nwp->nd_nam2) |
1509 | mbuf_freem(nwp->nd_nam2); |
1510 | if (IS_VALID_CRED(nwp->nd_cr)) |
1511 | kauth_cred_unref(&nwp->nd_cr); |
1512 | if (nwp->nd_gss_context) |
1513 | nfs_gss_svc_ctx_deref(nwp->nd_gss_context); |
1514 | FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); |
1515 | } |
1516 | LIST_INIT(&slp->ns_tq); |
1517 | |
1518 | lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); |
1519 | lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); |
1520 | FREE(slp, M_NFSSVC); |
1521 | } |
1522 | |
1523 | /* |
1524 | * Derefence a server socket structure. If it has no more references and |
1525 | * is no longer valid, you can throw it away. |
1526 | */ |
1527 | static void |
1528 | nfsrv_slpderef_locked(struct nfsrv_sock *slp) |
1529 | { |
1530 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1531 | slp->ns_sref--; |
1532 | |
1533 | if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { |
1534 | if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { |
1535 | /* remove socket from queue since there's no work */ |
1536 | if (slp->ns_flag & SLP_WAITQ) |
1537 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); |
1538 | else |
1539 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); |
1540 | slp->ns_flag &= ~SLP_QUEUED; |
1541 | } |
1542 | lck_rw_done(&slp->ns_rwlock); |
1543 | return; |
1544 | } |
1545 | |
1546 | /* This socket is no longer valid, so we'll get rid of it */ |
1547 | |
1548 | if (slp->ns_flag & SLP_QUEUED) { |
1549 | if (slp->ns_flag & SLP_WAITQ) |
1550 | TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); |
1551 | else |
1552 | TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); |
1553 | slp->ns_flag &= ~SLP_QUEUED; |
1554 | } |
1555 | lck_rw_done(&slp->ns_rwlock); |
1556 | |
1557 | TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); |
1558 | if (slp->ns_sotype == SOCK_STREAM) |
1559 | nfsrv_sock_tcp_cnt--; |
1560 | |
1561 | /* now remove from the write gather socket list */ |
1562 | if (slp->ns_wgq.tqe_next != SLPNOLIST) { |
1563 | TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); |
1564 | slp->ns_wgq.tqe_next = SLPNOLIST; |
1565 | } |
1566 | nfsrv_slpfree(slp); |
1567 | } |
1568 | |
1569 | void |
1570 | nfsrv_slpderef(struct nfsrv_sock *slp) |
1571 | { |
1572 | lck_mtx_lock(nfsd_mutex); |
1573 | nfsrv_slpderef_locked(slp); |
1574 | lck_mtx_unlock(nfsd_mutex); |
1575 | } |
1576 | |
1577 | /* |
1578 | * Check periodically for idle sockest if needed and |
1579 | * zap them. |
1580 | */ |
1581 | void |
1582 | nfsrv_idlesock_timer(__unused void *param0, __unused void *param1) |
1583 | { |
1584 | struct nfsrv_sock *slp, *tslp; |
1585 | struct timeval now; |
1586 | time_t time_to_wait = nfsrv_sock_idle_timeout; |
1587 | |
1588 | microuptime(&now); |
1589 | lck_mtx_lock(nfsd_mutex); |
1590 | |
1591 | /* Turn off the timer if we're suppose to and get out */ |
1592 | if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) |
1593 | nfsrv_sock_idle_timeout = 0; |
1594 | if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) { |
1595 | nfsrv_idlesock_timer_on = 0; |
1596 | lck_mtx_unlock(nfsd_mutex); |
1597 | return; |
1598 | } |
1599 | |
1600 | TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) { |
1601 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1602 | /* Skip udp and referenced sockets */ |
1603 | if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) { |
1604 | lck_rw_done(&slp->ns_rwlock); |
1605 | continue; |
1606 | } |
1607 | /* |
1608 | * If this is the first non-referenced socket that hasn't idle out, |
1609 | * use its time stamp to calculate the earlist time in the future |
1610 | * to start the next invocation of the timer. Since the nfsrv_socklist |
1611 | * is sorted oldest access to newest. Once we find the first one, |
1612 | * we're done and break out of the loop. |
1613 | */ |
1614 | if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) || |
1615 | nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { |
1616 | time_to_wait -= now.tv_sec - slp->ns_timestamp; |
1617 | if (time_to_wait < 1) |
1618 | time_to_wait = 1; |
1619 | lck_rw_done(&slp->ns_rwlock); |
1620 | break; |
1621 | } |
1622 | /* |
1623 | * Bump the ref count. nfsrv_slpderef below will destroy |
1624 | * the socket, since nfsrv_zapsock has closed it. |
1625 | */ |
1626 | slp->ns_sref++; |
1627 | nfsrv_zapsock(slp); |
1628 | lck_rw_done(&slp->ns_rwlock); |
1629 | nfsrv_slpderef_locked(slp); |
1630 | } |
1631 | |
1632 | /* Start ourself back up */ |
1633 | nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); |
1634 | /* Remember when the next timer will fire for nfssvc_addsock. */ |
1635 | nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; |
1636 | lck_mtx_unlock(nfsd_mutex); |
1637 | } |
1638 | |
1639 | /* |
1640 | * Clean up the data structures for the server. |
1641 | */ |
1642 | void |
1643 | nfsrv_cleanup(void) |
1644 | { |
1645 | struct nfsrv_sock *slp, *nslp; |
1646 | struct timeval now; |
1647 | #if CONFIG_FSE |
1648 | struct nfsrv_fmod *fp, *nfp; |
1649 | int i; |
1650 | #endif |
1651 | |
1652 | microuptime(&now); |
1653 | for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { |
1654 | nslp = TAILQ_NEXT(slp, ns_chain); |
1655 | lck_rw_lock_exclusive(&slp->ns_rwlock); |
1656 | slp->ns_sref++; |
1657 | if (slp->ns_flag & SLP_VALID) |
1658 | nfsrv_zapsock(slp); |
1659 | lck_rw_done(&slp->ns_rwlock); |
1660 | nfsrv_slpderef_locked(slp); |
1661 | } |
1662 | # |
1663 | #if CONFIG_FSE |
1664 | /* |
1665 | * Flush pending file write fsevents |
1666 | */ |
1667 | lck_mtx_lock(nfsrv_fmod_mutex); |
1668 | for (i = 0; i < NFSRVFMODHASHSZ; i++) { |
1669 | for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { |
1670 | /* |
1671 | * Fire off the content modified fsevent for each |
1672 | * entry, remove it from the list, and free it. |
1673 | */ |
1674 | if (nfsrv_fsevents_enabled) { |
1675 | fp->fm_context.vc_thread = current_thread(); |
1676 | add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, |
1677 | FSE_ARG_VNODE, fp->fm_vp, |
1678 | FSE_ARG_DONE); |
1679 | } |
1680 | vnode_put(fp->fm_vp); |
1681 | kauth_cred_unref(&fp->fm_context.vc_ucred); |
1682 | nfp = LIST_NEXT(fp, fm_link); |
1683 | LIST_REMOVE(fp, fm_link); |
1684 | FREE(fp, M_TEMP); |
1685 | } |
1686 | } |
1687 | nfsrv_fmod_pending = 0; |
1688 | lck_mtx_unlock(nfsrv_fmod_mutex); |
1689 | #endif |
1690 | |
1691 | nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ |
1692 | |
1693 | nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ |
1694 | |
1695 | nfsrv_cleancache(); /* And clear out server cache */ |
1696 | |
1697 | nfsrv_udpsock = NULL; |
1698 | nfsrv_udp6sock = NULL; |
1699 | } |
1700 | |
1701 | #endif /* NFS_NOSERVER */ |
1702 | |