1 | /* Copyright (C) 1998-2021 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <assert.h> |
20 | #include <errno.h> |
21 | #include <fcntl.h> |
22 | #include <stdbool.h> |
23 | #include <stddef.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <time.h> |
27 | #include <unistd.h> |
28 | #include <stdint.h> |
29 | #include <sys/mman.h> |
30 | #include <sys/param.h> |
31 | #include <sys/poll.h> |
32 | #include <sys/socket.h> |
33 | #include <sys/stat.h> |
34 | #include <sys/time.h> |
35 | #include <sys/uio.h> |
36 | #include <sys/un.h> |
37 | #include <not-cancel.h> |
38 | #include <kernel-features.h> |
39 | #include <nss.h> |
40 | #include <struct___timespec64.h> |
41 | |
42 | #include "nscd-client.h" |
43 | |
44 | /* Extra time we wait if the socket is still receiving data. This |
45 | value is in milliseconds. Note that the other side is nscd on the |
46 | local machine and it is already transmitting data. So the wait |
47 | time need not be long. */ |
48 | #define 200 |
49 | |
50 | |
51 | static int |
52 | wait_on_socket (int sock, long int usectmo) |
53 | { |
54 | struct pollfd fds[1]; |
55 | fds[0].fd = sock; |
56 | fds[0].events = POLLIN | POLLERR | POLLHUP; |
57 | int n = __poll (fds, 1, usectmo); |
58 | if (n == -1 && __builtin_expect (errno == EINTR, 0)) |
59 | { |
60 | /* Handle the case where the poll() call is interrupted by a |
61 | signal. We cannot just use TEMP_FAILURE_RETRY since it might |
62 | lead to infinite loops. */ |
63 | struct __timespec64 now; |
64 | __clock_gettime64 (CLOCK_REALTIME, &now); |
65 | int64_t end = (now.tv_sec * 1000 + usectmo |
66 | + (now.tv_nsec + 500000) / 1000000); |
67 | long int timeout = usectmo; |
68 | while (1) |
69 | { |
70 | n = __poll (fds, 1, timeout); |
71 | if (n != -1 || errno != EINTR) |
72 | break; |
73 | |
74 | /* Recompute the timeout time. */ |
75 | __clock_gettime64 (CLOCK_REALTIME, &now); |
76 | timeout = end - ((now.tv_sec * 1000 |
77 | + (now.tv_nsec + 500000) / 1000000)); |
78 | } |
79 | } |
80 | |
81 | return n; |
82 | } |
83 | |
84 | |
85 | ssize_t |
86 | __readall (int fd, void *buf, size_t len) |
87 | { |
88 | size_t n = len; |
89 | ssize_t ret; |
90 | do |
91 | { |
92 | again: |
93 | ret = TEMP_FAILURE_RETRY (__read (fd, buf, n)); |
94 | if (ret <= 0) |
95 | { |
96 | if (__builtin_expect (ret < 0 && errno == EAGAIN, 0) |
97 | /* The socket is still receiving data. Wait a bit more. */ |
98 | && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0) |
99 | goto again; |
100 | |
101 | break; |
102 | } |
103 | buf = (char *) buf + ret; |
104 | n -= ret; |
105 | } |
106 | while (n > 0); |
107 | return ret < 0 ? ret : len - n; |
108 | } |
109 | |
110 | |
111 | ssize_t |
112 | __readvall (int fd, const struct iovec *iov, int iovcnt) |
113 | { |
114 | ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt)); |
115 | if (ret <= 0) |
116 | { |
117 | if (__glibc_likely (ret == 0 || errno != EAGAIN)) |
118 | /* A genuine error or no data to read. */ |
119 | return ret; |
120 | |
121 | /* The data has not all yet been received. Do as if we have not |
122 | read anything yet. */ |
123 | ret = 0; |
124 | } |
125 | |
126 | size_t total = 0; |
127 | for (int i = 0; i < iovcnt; ++i) |
128 | total += iov[i].iov_len; |
129 | |
130 | if (ret < total) |
131 | { |
132 | struct iovec iov_buf[iovcnt]; |
133 | ssize_t r = ret; |
134 | |
135 | struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov)); |
136 | do |
137 | { |
138 | while (iovp->iov_len <= r) |
139 | { |
140 | r -= iovp->iov_len; |
141 | --iovcnt; |
142 | ++iovp; |
143 | } |
144 | iovp->iov_base = (char *) iovp->iov_base + r; |
145 | iovp->iov_len -= r; |
146 | again: |
147 | r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt)); |
148 | if (r <= 0) |
149 | { |
150 | if (__builtin_expect (r < 0 && errno == EAGAIN, 0) |
151 | /* The socket is still receiving data. Wait a bit more. */ |
152 | && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0) |
153 | goto again; |
154 | |
155 | break; |
156 | } |
157 | ret += r; |
158 | } |
159 | while (ret < total); |
160 | if (r < 0) |
161 | ret = r; |
162 | } |
163 | return ret; |
164 | } |
165 | |
166 | |
167 | static int |
168 | open_socket (request_type type, const char *key, size_t keylen) |
169 | { |
170 | int sock; |
171 | |
172 | sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0); |
173 | if (sock < 0) |
174 | return -1; |
175 | |
176 | size_t real_sizeof_reqdata = sizeof (request_header) + keylen; |
177 | struct |
178 | { |
179 | request_header req; |
180 | char key[]; |
181 | } *reqdata = alloca (real_sizeof_reqdata); |
182 | |
183 | struct sockaddr_un sun; |
184 | sun.sun_family = AF_UNIX; |
185 | strcpy (sun.sun_path, _PATH_NSCDSOCKET); |
186 | if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0 |
187 | && errno != EINPROGRESS) |
188 | goto out; |
189 | |
190 | reqdata->req.version = NSCD_VERSION; |
191 | reqdata->req.type = type; |
192 | reqdata->req.key_len = keylen; |
193 | |
194 | memcpy (reqdata->key, key, keylen); |
195 | |
196 | bool first_try = true; |
197 | struct __timespec64 tvend = { 0, 0 }; |
198 | while (1) |
199 | { |
200 | #ifndef MSG_NOSIGNAL |
201 | # define MSG_NOSIGNAL 0 |
202 | #endif |
203 | ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata, |
204 | real_sizeof_reqdata, |
205 | MSG_NOSIGNAL)); |
206 | if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata)) |
207 | /* We managed to send the request. */ |
208 | return sock; |
209 | |
210 | if (wres != -1 || errno != EAGAIN) |
211 | /* Something is really wrong, no chance to continue. */ |
212 | break; |
213 | |
214 | /* The daemon is busy wait for it. */ |
215 | int to; |
216 | struct __timespec64 now; |
217 | __clock_gettime64 (CLOCK_REALTIME, &now); |
218 | if (first_try) |
219 | { |
220 | tvend.tv_nsec = now.tv_nsec; |
221 | tvend.tv_sec = now.tv_sec + 5; |
222 | to = 5 * 1000; |
223 | first_try = false; |
224 | } |
225 | else |
226 | to = ((tvend.tv_sec - now.tv_sec) * 1000 |
227 | + (tvend.tv_nsec - now.tv_nsec) / 1000000); |
228 | |
229 | struct pollfd fds[1]; |
230 | fds[0].fd = sock; |
231 | fds[0].events = POLLOUT | POLLERR | POLLHUP; |
232 | if (__poll (fds, 1, to) <= 0) |
233 | /* The connection timed out or broke down. */ |
234 | break; |
235 | |
236 | /* We try to write again. */ |
237 | } |
238 | |
239 | out: |
240 | __close_nocancel_nostatus (sock); |
241 | |
242 | return -1; |
243 | } |
244 | |
245 | |
246 | void |
247 | __nscd_unmap (struct mapped_database *mapped) |
248 | { |
249 | assert (mapped->counter == 0); |
250 | __munmap ((void *) mapped->head, mapped->mapsize); |
251 | free (mapped); |
252 | } |
253 | |
254 | |
255 | /* Try to get a file descriptor for the shared meory segment |
256 | containing the database. */ |
257 | struct mapped_database * |
258 | __nscd_get_mapping (request_type type, const char *key, |
259 | struct mapped_database **mappedp) |
260 | { |
261 | struct mapped_database *result = NO_MAPPING; |
262 | #ifdef SCM_RIGHTS |
263 | const size_t keylen = strlen (key) + 1; |
264 | int saved_errno = errno; |
265 | |
266 | int mapfd = -1; |
267 | char resdata[keylen]; |
268 | |
269 | /* Open a socket and send the request. */ |
270 | int sock = open_socket (type, key, keylen); |
271 | if (sock < 0) |
272 | goto out; |
273 | |
274 | /* Room for the data sent along with the file descriptor. We expect |
275 | the key name back. */ |
276 | uint64_t mapsize; |
277 | struct iovec iov[2]; |
278 | iov[0].iov_base = resdata; |
279 | iov[0].iov_len = keylen; |
280 | iov[1].iov_base = &mapsize; |
281 | iov[1].iov_len = sizeof (mapsize); |
282 | |
283 | union |
284 | { |
285 | struct cmsghdr hdr; |
286 | char bytes[CMSG_SPACE (sizeof (int))]; |
287 | } buf; |
288 | struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2, |
289 | .msg_control = buf.bytes, |
290 | .msg_controllen = sizeof (buf) }; |
291 | struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg); |
292 | |
293 | cmsg->cmsg_level = SOL_SOCKET; |
294 | cmsg->cmsg_type = SCM_RIGHTS; |
295 | cmsg->cmsg_len = CMSG_LEN (sizeof (int)); |
296 | |
297 | /* This access is well-aligned since BUF is correctly aligned for an |
298 | int and CMSG_DATA preserves this alignment. */ |
299 | memset (CMSG_DATA (cmsg), '\xff', sizeof (int)); |
300 | |
301 | msg.msg_controllen = cmsg->cmsg_len; |
302 | |
303 | if (wait_on_socket (sock, 5 * 1000) <= 0) |
304 | goto out_close2; |
305 | |
306 | # ifndef MSG_CMSG_CLOEXEC |
307 | # define MSG_CMSG_CLOEXEC 0 |
308 | # endif |
309 | ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC)); |
310 | |
311 | if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL |
312 | || (CMSG_FIRSTHDR (&msg)->cmsg_len |
313 | != CMSG_LEN (sizeof (int))), 0)) |
314 | goto out_close2; |
315 | |
316 | int *ip = (void *) CMSG_DATA (cmsg); |
317 | mapfd = *ip; |
318 | |
319 | if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize))) |
320 | goto out_close; |
321 | |
322 | if (__glibc_unlikely (strcmp (resdata, key) != 0)) |
323 | goto out_close; |
324 | |
325 | if (__glibc_unlikely (n == keylen)) |
326 | { |
327 | struct stat64 st; |
328 | if (__builtin_expect (__fstat64 (mapfd, &st) != 0, 0) |
329 | || __builtin_expect (st.st_size < sizeof (struct database_pers_head), |
330 | 0)) |
331 | goto out_close; |
332 | |
333 | mapsize = st.st_size; |
334 | } |
335 | |
336 | /* The file is large enough, map it now. */ |
337 | void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0); |
338 | if (__glibc_likely (mapping != MAP_FAILED)) |
339 | { |
340 | /* Check whether the database is correct and up-to-date. */ |
341 | struct database_pers_head *head = mapping; |
342 | |
343 | if (__builtin_expect (head->version != DB_VERSION, 0) |
344 | || __builtin_expect (head->header_size != sizeof (*head), 0) |
345 | /* Catch some misconfiguration. The server should catch |
346 | them now but some older versions did not. */ |
347 | || __builtin_expect (head->module == 0, 0) |
348 | /* This really should not happen but who knows, maybe the update |
349 | thread got stuck. */ |
350 | || __builtin_expect (! head->nscd_certainly_running |
351 | && (head->timestamp + MAPPING_TIMEOUT |
352 | < time_now ()), 0)) |
353 | { |
354 | out_unmap: |
355 | __munmap (mapping, mapsize); |
356 | goto out_close; |
357 | } |
358 | |
359 | size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t), |
360 | ALIGN) |
361 | + head->data_size); |
362 | |
363 | if (__glibc_unlikely (mapsize < size)) |
364 | goto out_unmap; |
365 | |
366 | /* Allocate a record for the mapping. */ |
367 | struct mapped_database *newp = malloc (sizeof (*newp)); |
368 | if (newp == NULL) |
369 | /* Ugh, after all we went through the memory allocation failed. */ |
370 | goto out_unmap; |
371 | |
372 | newp->head = mapping; |
373 | newp->data = ((char *) mapping + head->header_size |
374 | + roundup (head->module * sizeof (ref_t), ALIGN)); |
375 | newp->mapsize = size; |
376 | newp->datasize = head->data_size; |
377 | /* Set counter to 1 to show it is usable. */ |
378 | newp->counter = 1; |
379 | |
380 | result = newp; |
381 | } |
382 | |
383 | out_close: |
384 | __close (mapfd); |
385 | out_close2: |
386 | __close (sock); |
387 | out: |
388 | __set_errno (saved_errno); |
389 | #endif /* SCM_RIGHTS */ |
390 | |
391 | struct mapped_database *oldval = *mappedp; |
392 | *mappedp = result; |
393 | |
394 | if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0) |
395 | __nscd_unmap (oldval); |
396 | |
397 | return result; |
398 | } |
399 | |
400 | struct mapped_database * |
401 | __nscd_get_map_ref (request_type type, const char *name, |
402 | volatile struct locked_map_ptr *mapptr, int *gc_cyclep) |
403 | { |
404 | struct mapped_database *cur = mapptr->mapped; |
405 | if (cur == NO_MAPPING) |
406 | return cur; |
407 | |
408 | if (!__nscd_acquire_maplock (mapptr)) |
409 | return NO_MAPPING; |
410 | |
411 | cur = mapptr->mapped; |
412 | |
413 | if (__glibc_likely (cur != NO_MAPPING)) |
414 | { |
415 | /* If not mapped or timestamp not updated, request new map. */ |
416 | if (cur == NULL |
417 | || (cur->head->nscd_certainly_running == 0 |
418 | && cur->head->timestamp + MAPPING_TIMEOUT < time_now ()) |
419 | || cur->head->data_size > cur->datasize) |
420 | cur = __nscd_get_mapping (type, name, |
421 | (struct mapped_database **) &mapptr->mapped); |
422 | |
423 | if (__glibc_likely (cur != NO_MAPPING)) |
424 | { |
425 | if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0, |
426 | 0)) |
427 | cur = NO_MAPPING; |
428 | else |
429 | atomic_increment (&cur->counter); |
430 | } |
431 | } |
432 | |
433 | mapptr->lock = 0; |
434 | |
435 | return cur; |
436 | } |
437 | |
438 | |
439 | /* Using sizeof (hashentry) is not always correct to determine the size of |
440 | the data structure as found in the nscd cache. The program could be |
441 | a 64-bit process and nscd could be a 32-bit process. In this case |
442 | sizeof (hashentry) would overestimate the size. The following is |
443 | the minimum size of such an entry, good enough for our tests here. */ |
444 | #define MINIMUM_HASHENTRY_SIZE \ |
445 | (offsetof (struct hashentry, dellist) + sizeof (int32_t)) |
446 | |
447 | /* Don't return const struct datahead *, as eventhough the record |
448 | is normally constant, it can change arbitrarily during nscd |
449 | garbage collection. */ |
450 | struct datahead * |
451 | __nscd_cache_search (request_type type, const char *key, size_t keylen, |
452 | const struct mapped_database *mapped, size_t datalen) |
453 | { |
454 | unsigned long int hash = __nss_hash (key, keylen) % mapped->head->module; |
455 | size_t datasize = mapped->datasize; |
456 | |
457 | ref_t trail = mapped->head->array[hash]; |
458 | trail = atomic_forced_read (trail); |
459 | ref_t work = trail; |
460 | size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE |
461 | + offsetof (struct datahead, data) / 2); |
462 | int tick = 0; |
463 | |
464 | while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize) |
465 | { |
466 | struct hashentry *here = (struct hashentry *) (mapped->data + work); |
467 | ref_t here_key, here_packet; |
468 | |
469 | #if !_STRING_ARCH_unaligned |
470 | /* Although during garbage collection when moving struct hashentry |
471 | records around we first copy from old to new location and then |
472 | adjust pointer from previous hashentry to it, there is no barrier |
473 | between those memory writes. It is very unlikely to hit it, |
474 | so check alignment only if a misaligned load can crash the |
475 | application. */ |
476 | if ((uintptr_t) here & (__alignof__ (*here) - 1)) |
477 | return NULL; |
478 | #endif |
479 | |
480 | if (type == here->type |
481 | && keylen == here->len |
482 | && (here_key = atomic_forced_read (here->key)) + keylen <= datasize |
483 | && memcmp (key, mapped->data + here_key, keylen) == 0 |
484 | && ((here_packet = atomic_forced_read (here->packet)) |
485 | + sizeof (struct datahead) <= datasize)) |
486 | { |
487 | /* We found the entry. Increment the appropriate counter. */ |
488 | struct datahead *dh |
489 | = (struct datahead *) (mapped->data + here_packet); |
490 | |
491 | #if !_STRING_ARCH_unaligned |
492 | if ((uintptr_t) dh & (__alignof__ (*dh) - 1)) |
493 | return NULL; |
494 | #endif |
495 | |
496 | /* See whether we must ignore the entry or whether something |
497 | is wrong because garbage collection is in progress. */ |
498 | if (dh->usable |
499 | && here_packet + dh->allocsize <= datasize |
500 | && (here_packet + offsetof (struct datahead, data) + datalen |
501 | <= datasize)) |
502 | return dh; |
503 | } |
504 | |
505 | work = atomic_forced_read (here->next); |
506 | /* Prevent endless loops. This should never happen but perhaps |
507 | the database got corrupted, accidentally or deliberately. */ |
508 | if (work == trail || loop_cnt-- == 0) |
509 | break; |
510 | if (tick) |
511 | { |
512 | struct hashentry *trailelem; |
513 | trailelem = (struct hashentry *) (mapped->data + trail); |
514 | |
515 | #if !_STRING_ARCH_unaligned |
516 | /* We have to redo the checks. Maybe the data changed. */ |
517 | if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1)) |
518 | return NULL; |
519 | #endif |
520 | |
521 | if (trail + MINIMUM_HASHENTRY_SIZE > datasize) |
522 | return NULL; |
523 | |
524 | trail = atomic_forced_read (trailelem->next); |
525 | } |
526 | tick = 1 - tick; |
527 | } |
528 | |
529 | return NULL; |
530 | } |
531 | |
532 | |
533 | /* Create a socket connected to a name. */ |
534 | int |
535 | __nscd_open_socket (const char *key, size_t keylen, request_type type, |
536 | void *response, size_t responselen) |
537 | { |
538 | /* This should never happen and it is something the nscd daemon |
539 | enforces, too. He it helps to limit the amount of stack |
540 | used. */ |
541 | if (keylen > MAXKEYLEN) |
542 | return -1; |
543 | |
544 | int saved_errno = errno; |
545 | |
546 | int sock = open_socket (type, key, keylen); |
547 | if (sock >= 0) |
548 | { |
549 | /* Wait for data. */ |
550 | if (wait_on_socket (sock, 5 * 1000) > 0) |
551 | { |
552 | ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response, |
553 | responselen)); |
554 | if (nbytes == (ssize_t) responselen) |
555 | return sock; |
556 | } |
557 | |
558 | __close_nocancel_nostatus (sock); |
559 | } |
560 | |
561 | __set_errno (saved_errno); |
562 | |
563 | return -1; |
564 | } |
565 | |