1 | /* Cache memory handling. |
2 | Copyright (C) 2004-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; version 2 of the License, or |
8 | (at your option) any later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <assert.h> |
19 | #include <errno.h> |
20 | #include <error.h> |
21 | #include <fcntl.h> |
22 | #include <inttypes.h> |
23 | #include <libintl.h> |
24 | #include <limits.h> |
25 | #include <obstack.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <unistd.h> |
29 | #include <sys/mman.h> |
30 | #include <sys/param.h> |
31 | |
32 | #include "dbg_log.h" |
33 | #include "nscd.h" |
34 | |
35 | |
36 | static int |
37 | sort_he (const void *p1, const void *p2) |
38 | { |
39 | struct hashentry *h1 = *(struct hashentry **) p1; |
40 | struct hashentry *h2 = *(struct hashentry **) p2; |
41 | |
42 | if (h1 < h2) |
43 | return -1; |
44 | if (h1 > h2) |
45 | return 1; |
46 | return 0; |
47 | } |
48 | |
49 | |
50 | static int |
51 | sort_he_data (const void *p1, const void *p2) |
52 | { |
53 | struct hashentry *h1 = *(struct hashentry **) p1; |
54 | struct hashentry *h2 = *(struct hashentry **) p2; |
55 | |
56 | if (h1->packet < h2->packet) |
57 | return -1; |
58 | if (h1->packet > h2->packet) |
59 | return 1; |
60 | return 0; |
61 | } |
62 | |
63 | |
64 | /* Basic definitions for the bitmap implementation. Only BITMAP_T |
65 | needs to be changed to choose a different word size. */ |
66 | #define BITMAP_T uint8_t |
67 | #define BITS (CHAR_BIT * sizeof (BITMAP_T)) |
68 | #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1) |
69 | #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1)) |
70 | |
71 | |
72 | static void |
73 | markrange (BITMAP_T *mark, ref_t start, size_t len) |
74 | { |
75 | /* Adjust parameters for block alignment. */ |
76 | assert ((start & BLOCK_ALIGN_M1) == 0); |
77 | start /= BLOCK_ALIGN; |
78 | len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN; |
79 | |
80 | size_t elem = start / BITS; |
81 | |
82 | if (start % BITS != 0) |
83 | { |
84 | if (start % BITS + len <= BITS) |
85 | { |
86 | /* All fits in the partial byte. */ |
87 | mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS); |
88 | return; |
89 | } |
90 | |
91 | mark[elem++] |= ALLBITS << (start % BITS); |
92 | len -= BITS - (start % BITS); |
93 | } |
94 | |
95 | while (len >= BITS) |
96 | { |
97 | mark[elem++] = ALLBITS; |
98 | len -= BITS; |
99 | } |
100 | |
101 | if (len > 0) |
102 | mark[elem] |= ALLBITS >> (BITS - len); |
103 | } |
104 | |
105 | |
106 | void |
107 | gc (struct database_dyn *db) |
108 | { |
109 | /* We need write access. */ |
110 | pthread_rwlock_wrlock (&db->lock); |
111 | |
112 | /* And the memory handling lock. */ |
113 | pthread_mutex_lock (&db->memlock); |
114 | |
115 | /* We need an array representing the data area. All memory |
116 | allocation is BLOCK_ALIGN aligned so this is the level at which |
117 | we have to look at the memory. We use a mark and sweep algorithm |
118 | where the marks are placed in this array. */ |
119 | assert (db->head->first_free % BLOCK_ALIGN == 0); |
120 | |
121 | BITMAP_T *mark; |
122 | bool mark_use_malloc; |
123 | /* In prune_cache we are also using a dynamically allocated array. |
124 | If the array in the caller is too large we have malloc'ed it. */ |
125 | size_t stack_used = sizeof (bool) * db->head->module; |
126 | if (__glibc_unlikely (stack_used > MAX_STACK_USE)) |
127 | stack_used = 0; |
128 | size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS; |
129 | size_t memory_needed = nmark * sizeof (BITMAP_T); |
130 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
131 | { |
132 | mark = (BITMAP_T *) alloca_account (memory_needed, stack_used); |
133 | mark_use_malloc = false; |
134 | memset (mark, '\0', memory_needed); |
135 | } |
136 | else |
137 | { |
138 | mark = (BITMAP_T *) xcalloc (1, memory_needed); |
139 | mark_use_malloc = true; |
140 | } |
141 | |
142 | /* Create an array which can hold pointer to all the entries in hash |
143 | entries. */ |
144 | memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *); |
145 | struct hashentry **he; |
146 | struct hashentry **he_data; |
147 | bool he_use_malloc; |
148 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
149 | { |
150 | he = alloca_account (memory_needed, stack_used); |
151 | he_use_malloc = false; |
152 | } |
153 | else |
154 | { |
155 | he = xmalloc (memory_needed); |
156 | he_use_malloc = true; |
157 | } |
158 | he_data = &he[db->head->nentries]; |
159 | |
160 | size_t cnt = 0; |
161 | for (size_t idx = 0; idx < db->head->module; ++idx) |
162 | { |
163 | ref_t *prevp = &db->head->array[idx]; |
164 | ref_t run = *prevp; |
165 | |
166 | while (run != ENDREF) |
167 | { |
168 | assert (cnt < db->head->nentries); |
169 | he[cnt] = (struct hashentry *) (db->data + run); |
170 | |
171 | he[cnt]->prevp = prevp; |
172 | prevp = &he[cnt]->next; |
173 | |
174 | /* This is the hash entry itself. */ |
175 | markrange (mark, run, sizeof (struct hashentry)); |
176 | |
177 | /* Add the information for the data itself. We do this |
178 | only for the one special entry marked with FIRST. */ |
179 | if (he[cnt]->first) |
180 | { |
181 | struct datahead *dh |
182 | = (struct datahead *) (db->data + he[cnt]->packet); |
183 | markrange (mark, he[cnt]->packet, dh->allocsize); |
184 | } |
185 | |
186 | run = he[cnt]->next; |
187 | |
188 | ++cnt; |
189 | } |
190 | } |
191 | assert (cnt == db->head->nentries); |
192 | |
193 | /* Sort the entries by the addresses of the referenced data. All |
194 | the entries pointing to the same DATAHEAD object will have the |
195 | same key. Stability of the sorting is unimportant. */ |
196 | memcpy (he_data, he, cnt * sizeof (struct hashentry *)); |
197 | qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data); |
198 | |
199 | /* Sort the entries by their address. */ |
200 | qsort (he, cnt, sizeof (struct hashentry *), sort_he); |
201 | |
202 | #define obstack_chunk_alloc xmalloc |
203 | #define obstack_chunk_free free |
204 | struct obstack ob; |
205 | obstack_init (&ob); |
206 | |
207 | /* Determine the highest used address. */ |
208 | size_t high = nmark; |
209 | while (high > 0 && mark[high - 1] == 0) |
210 | --high; |
211 | |
212 | /* No memory used. */ |
213 | if (high == 0) |
214 | { |
215 | db->head->first_free = 0; |
216 | goto out; |
217 | } |
218 | |
219 | /* Determine the highest offset. */ |
220 | BITMAP_T mask = HIGHBIT; |
221 | ref_t highref = (high * BITS - 1) * BLOCK_ALIGN; |
222 | while ((mark[high - 1] & mask) == 0) |
223 | { |
224 | mask >>= 1; |
225 | highref -= BLOCK_ALIGN; |
226 | } |
227 | |
228 | /* Now we can iterate over the MARK array and find bits which are not |
229 | set. These represent memory which can be recovered. */ |
230 | size_t byte = 0; |
231 | /* Find the first gap. */ |
232 | while (byte < high && mark[byte] == ALLBITS) |
233 | ++byte; |
234 | |
235 | if (byte == high |
236 | || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0)) |
237 | /* No gap. */ |
238 | goto out; |
239 | |
240 | mask = 1; |
241 | cnt = 0; |
242 | while ((mark[byte] & mask) != 0) |
243 | { |
244 | ++cnt; |
245 | mask <<= 1; |
246 | } |
247 | ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN; |
248 | assert (off_free <= db->head->first_free); |
249 | |
250 | struct hashentry **next_hash = he; |
251 | struct hashentry **next_data = he_data; |
252 | |
253 | /* Skip over the hash entries in the first block which does not get |
254 | moved. */ |
255 | while (next_hash < &he[db->head->nentries] |
256 | && *next_hash < (struct hashentry *) (db->data + off_free)) |
257 | ++next_hash; |
258 | |
259 | while (next_data < &he_data[db->head->nentries] |
260 | && (*next_data)->packet < off_free) |
261 | ++next_data; |
262 | |
263 | |
264 | /* Now we start modifying the data. Make sure all readers of the |
265 | data are aware of this and temporarily don't use the data. */ |
266 | atomic_fetch_add_relaxed (&db->head->gc_cycle, 1); |
267 | assert ((db->head->gc_cycle & 1) == 1); |
268 | |
269 | |
270 | /* We do not perform the move operations right away since the |
271 | he_data array is not sorted by the address of the data. */ |
272 | struct moveinfo |
273 | { |
274 | void *from; |
275 | void *to; |
276 | size_t size; |
277 | struct moveinfo *next; |
278 | } *moves = NULL; |
279 | |
280 | while (byte < high) |
281 | { |
282 | /* Search for the next filled block. BYTE is the index of the |
283 | entry in MARK, MASK is the bit, and CNT is the bit number. |
284 | OFF_FILLED is the corresponding offset. */ |
285 | if ((mark[byte] & ~(mask - 1)) == 0) |
286 | { |
287 | /* No other bit set in the same element of MARK. Search in the |
288 | following memory. */ |
289 | do |
290 | ++byte; |
291 | while (byte < high && mark[byte] == 0); |
292 | |
293 | if (byte == high) |
294 | /* That was it. */ |
295 | break; |
296 | |
297 | mask = 1; |
298 | cnt = 0; |
299 | } |
300 | /* Find the exact bit. */ |
301 | while ((mark[byte] & mask) == 0) |
302 | { |
303 | ++cnt; |
304 | mask <<= 1; |
305 | } |
306 | |
307 | ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN; |
308 | assert (off_alloc <= db->head->first_free); |
309 | |
310 | /* Find the end of the used area. */ |
311 | if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1)) |
312 | { |
313 | /* All other bits set. Search the next bytes in MARK. */ |
314 | do |
315 | ++byte; |
316 | while (byte < high && mark[byte] == ALLBITS); |
317 | |
318 | mask = 1; |
319 | cnt = 0; |
320 | } |
321 | if (byte < high) |
322 | { |
323 | /* Find the exact bit. */ |
324 | while ((mark[byte] & mask) != 0) |
325 | { |
326 | ++cnt; |
327 | mask <<= 1; |
328 | } |
329 | } |
330 | |
331 | ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN; |
332 | assert (off_allocend <= db->head->first_free); |
333 | /* Now we know that we can copy the area from OFF_ALLOC to |
334 | OFF_ALLOCEND (not included) to the memory starting at |
335 | OFF_FREE. First fix up all the entries for the |
336 | displacement. */ |
337 | ref_t disp = off_alloc - off_free; |
338 | |
339 | struct moveinfo *new_move; |
340 | if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE, |
341 | 1)) |
342 | new_move = alloca_account (sizeof (*new_move), stack_used); |
343 | else |
344 | new_move = obstack_alloc (&ob, sizeof (*new_move)); |
345 | new_move->from = db->data + off_alloc; |
346 | new_move->to = db->data + off_free; |
347 | new_move->size = off_allocend - off_alloc; |
348 | /* Create a circular list to be always able to append at the end. */ |
349 | if (moves == NULL) |
350 | moves = new_move->next = new_move; |
351 | else |
352 | { |
353 | new_move->next = moves->next; |
354 | moves = moves->next = new_move; |
355 | } |
356 | |
357 | /* The following loop will prepare to move this much data. */ |
358 | off_free += off_allocend - off_alloc; |
359 | |
360 | while (off_alloc < off_allocend) |
361 | { |
362 | /* Determine whether the next entry is for a hash entry or |
363 | the data. */ |
364 | if ((struct hashentry *) (db->data + off_alloc) == *next_hash) |
365 | { |
366 | /* Just correct the forward reference. */ |
367 | *(*next_hash++)->prevp -= disp; |
368 | |
369 | off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1) |
370 | & ~BLOCK_ALIGN_M1); |
371 | } |
372 | else |
373 | { |
374 | assert (next_data < &he_data[db->head->nentries]); |
375 | assert ((*next_data)->packet == off_alloc); |
376 | |
377 | struct datahead *dh = (struct datahead *) (db->data + off_alloc); |
378 | do |
379 | { |
380 | assert ((*next_data)->key >= (*next_data)->packet); |
381 | assert ((*next_data)->key + (*next_data)->len |
382 | <= (*next_data)->packet + dh->allocsize); |
383 | |
384 | (*next_data)->packet -= disp; |
385 | (*next_data)->key -= disp; |
386 | ++next_data; |
387 | } |
388 | while (next_data < &he_data[db->head->nentries] |
389 | && (*next_data)->packet == off_alloc); |
390 | |
391 | off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1; |
392 | } |
393 | } |
394 | assert (off_alloc == off_allocend); |
395 | |
396 | assert (off_alloc <= db->head->first_free); |
397 | if (off_alloc == db->head->first_free) |
398 | /* We are done, that was the last block. */ |
399 | break; |
400 | } |
401 | assert (next_hash == &he[db->head->nentries]); |
402 | assert (next_data == &he_data[db->head->nentries]); |
403 | |
404 | /* Now perform the actual moves. */ |
405 | if (moves != NULL) |
406 | { |
407 | struct moveinfo *runp = moves->next; |
408 | do |
409 | { |
410 | assert ((char *) runp->to >= db->data); |
411 | assert ((char *) runp->to + runp->size |
412 | <= db->data + db->head->first_free); |
413 | assert ((char *) runp->from >= db->data); |
414 | assert ((char *) runp->from + runp->size |
415 | <= db->data + db->head->first_free); |
416 | |
417 | /* The regions may overlap. */ |
418 | memmove (runp->to, runp->from, runp->size); |
419 | runp = runp->next; |
420 | } |
421 | while (runp != moves->next); |
422 | |
423 | if (__glibc_unlikely (debug_level >= 3)) |
424 | dbg_log (_("freed %zu bytes in %s cache" ), |
425 | (size_t) (db->head->first_free |
426 | - ((char *) moves->to + moves->size - db->data)), |
427 | dbnames[db - dbs]); |
428 | |
429 | /* The byte past the end of the last copied block is the next |
430 | available byte. */ |
431 | db->head->first_free = (char *) moves->to + moves->size - db->data; |
432 | |
433 | /* Consistency check. */ |
434 | if (__glibc_unlikely (debug_level >= 3)) |
435 | { |
436 | for (size_t idx = 0; idx < db->head->module; ++idx) |
437 | { |
438 | ref_t run = db->head->array[idx]; |
439 | size_t cnt = 0; |
440 | |
441 | while (run != ENDREF) |
442 | { |
443 | if (run + sizeof (struct hashentry) > db->head->first_free) |
444 | { |
445 | dbg_log ("entry %zu in hash bucket %zu out of bounds: " |
446 | "%" PRIu32 "+%zu > %zu\n" , |
447 | cnt, idx, run, sizeof (struct hashentry), |
448 | (size_t) db->head->first_free); |
449 | break; |
450 | } |
451 | |
452 | struct hashentry *he = (struct hashentry *) (db->data + run); |
453 | |
454 | if (he->key + he->len > db->head->first_free) |
455 | dbg_log ("key of entry %zu in hash bucket %zu out of " |
456 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
457 | cnt, idx, he->key, (size_t) he->len, |
458 | (size_t) db->head->first_free); |
459 | |
460 | if (he->packet + sizeof (struct datahead) |
461 | > db->head->first_free) |
462 | dbg_log ("packet of entry %zu in hash bucket %zu out of " |
463 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
464 | cnt, idx, he->packet, sizeof (struct datahead), |
465 | (size_t) db->head->first_free); |
466 | else |
467 | { |
468 | struct datahead *dh = (struct datahead *) (db->data |
469 | + he->packet); |
470 | if (he->packet + dh->allocsize |
471 | > db->head->first_free) |
472 | dbg_log ("full key of entry %zu in hash bucket %zu " |
473 | "out of bounds: %" PRIu32 "+%zu > %zu" , |
474 | cnt, idx, he->packet, (size_t) dh->allocsize, |
475 | (size_t) db->head->first_free); |
476 | } |
477 | |
478 | run = he->next; |
479 | ++cnt; |
480 | } |
481 | } |
482 | } |
483 | } |
484 | |
485 | /* Make sure the data on disk is updated. */ |
486 | if (db->persistent) |
487 | msync (db->head, db->data + db->head->first_free - (char *) db->head, |
488 | MS_ASYNC); |
489 | |
490 | |
491 | /* Now we are done modifying the data. */ |
492 | atomic_fetch_add_relaxed (&db->head->gc_cycle, 1); |
493 | assert ((db->head->gc_cycle & 1) == 0); |
494 | |
495 | /* We are done. */ |
496 | out: |
497 | pthread_mutex_unlock (&db->memlock); |
498 | pthread_rwlock_unlock (&db->lock); |
499 | |
500 | if (he_use_malloc) |
501 | free (he); |
502 | if (mark_use_malloc) |
503 | free (mark); |
504 | |
505 | obstack_free (&ob, NULL); |
506 | } |
507 | |
508 | |
509 | void * |
510 | mempool_alloc (struct database_dyn *db, size_t len, int data_alloc) |
511 | { |
512 | /* Make sure LEN is a multiple of our maximum alignment so we can |
513 | keep track of used memory is multiples of this alignment value. */ |
514 | if ((len & BLOCK_ALIGN_M1) != 0) |
515 | len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1); |
516 | |
517 | if (data_alloc) |
518 | pthread_rwlock_rdlock (&db->lock); |
519 | |
520 | pthread_mutex_lock (&db->memlock); |
521 | |
522 | assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0); |
523 | |
524 | bool tried_resize = false; |
525 | void *res; |
526 | retry: |
527 | res = db->data + db->head->first_free; |
528 | |
529 | if (__glibc_unlikely (db->head->first_free + len > db->head->data_size)) |
530 | { |
531 | if (! tried_resize) |
532 | { |
533 | /* Try to resize the database. Grow size of 1/8th. */ |
534 | size_t oldtotal = (sizeof (struct database_pers_head) |
535 | + roundup (db->head->module * sizeof (ref_t), |
536 | ALIGN) |
537 | + db->head->data_size); |
538 | size_t new_data_size = (db->head->data_size |
539 | + MAX (2 * len, db->head->data_size / 8)); |
540 | size_t newtotal = (sizeof (struct database_pers_head) |
541 | + roundup (db->head->module * sizeof (ref_t), ALIGN) |
542 | + new_data_size); |
543 | if (newtotal > db->max_db_size) |
544 | { |
545 | new_data_size -= newtotal - db->max_db_size; |
546 | newtotal = db->max_db_size; |
547 | } |
548 | |
549 | if (db->mmap_used && newtotal > oldtotal |
550 | /* We only have to adjust the file size. The new pages |
551 | become magically available. */ |
552 | && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal, |
553 | newtotal |
554 | - oldtotal)) == 0) |
555 | { |
556 | db->head->data_size = new_data_size; |
557 | tried_resize = true; |
558 | goto retry; |
559 | } |
560 | } |
561 | |
562 | if (data_alloc) |
563 | pthread_rwlock_unlock (&db->lock); |
564 | |
565 | if (! db->last_alloc_failed) |
566 | { |
567 | dbg_log (_("no more memory for database '%s'" ), dbnames[db - dbs]); |
568 | |
569 | db->last_alloc_failed = true; |
570 | } |
571 | |
572 | ++db->head->addfailed; |
573 | |
574 | /* No luck. */ |
575 | res = NULL; |
576 | } |
577 | else |
578 | { |
579 | db->head->first_free += len; |
580 | |
581 | db->last_alloc_failed = false; |
582 | |
583 | } |
584 | |
585 | pthread_mutex_unlock (&db->memlock); |
586 | |
587 | return res; |
588 | } |
589 | |