1 | /* Cache memory handling. |
2 | Copyright (C) 2004-2017 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@redhat.com>, 2004. |
5 | |
6 | This program is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published |
8 | by the Free Software Foundation; version 2 of the License, or |
9 | (at your option) any later version. |
10 | |
11 | This program is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <assert.h> |
20 | #include <errno.h> |
21 | #include <error.h> |
22 | #include <fcntl.h> |
23 | #include <inttypes.h> |
24 | #include <libintl.h> |
25 | #include <limits.h> |
26 | #include <obstack.h> |
27 | #include <stdlib.h> |
28 | #include <string.h> |
29 | #include <unistd.h> |
30 | #include <sys/mman.h> |
31 | #include <sys/param.h> |
32 | |
33 | #include "dbg_log.h" |
34 | #include "nscd.h" |
35 | |
36 | |
37 | static int |
38 | sort_he (const void *p1, const void *p2) |
39 | { |
40 | struct hashentry *h1 = *(struct hashentry **) p1; |
41 | struct hashentry *h2 = *(struct hashentry **) p2; |
42 | |
43 | if (h1 < h2) |
44 | return -1; |
45 | if (h1 > h2) |
46 | return 1; |
47 | return 0; |
48 | } |
49 | |
50 | |
51 | static int |
52 | sort_he_data (const void *p1, const void *p2) |
53 | { |
54 | struct hashentry *h1 = *(struct hashentry **) p1; |
55 | struct hashentry *h2 = *(struct hashentry **) p2; |
56 | |
57 | if (h1->packet < h2->packet) |
58 | return -1; |
59 | if (h1->packet > h2->packet) |
60 | return 1; |
61 | return 0; |
62 | } |
63 | |
64 | |
65 | /* Basic definitions for the bitmap implementation. Only BITMAP_T |
66 | needs to be changed to choose a different word size. */ |
67 | #define BITMAP_T uint8_t |
68 | #define BITS (CHAR_BIT * sizeof (BITMAP_T)) |
69 | #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1) |
70 | #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1)) |
71 | |
72 | |
73 | static void |
74 | markrange (BITMAP_T *mark, ref_t start, size_t len) |
75 | { |
76 | /* Adjust parameters for block alignment. */ |
77 | assert ((start & BLOCK_ALIGN_M1) == 0); |
78 | start /= BLOCK_ALIGN; |
79 | len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN; |
80 | |
81 | size_t elem = start / BITS; |
82 | |
83 | if (start % BITS != 0) |
84 | { |
85 | if (start % BITS + len <= BITS) |
86 | { |
87 | /* All fits in the partial byte. */ |
88 | mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS); |
89 | return; |
90 | } |
91 | |
92 | mark[elem++] |= ALLBITS << (start % BITS); |
93 | len -= BITS - (start % BITS); |
94 | } |
95 | |
96 | while (len >= BITS) |
97 | { |
98 | mark[elem++] = ALLBITS; |
99 | len -= BITS; |
100 | } |
101 | |
102 | if (len > 0) |
103 | mark[elem] |= ALLBITS >> (BITS - len); |
104 | } |
105 | |
106 | |
107 | void |
108 | gc (struct database_dyn *db) |
109 | { |
110 | /* We need write access. */ |
111 | pthread_rwlock_wrlock (&db->lock); |
112 | |
113 | /* And the memory handling lock. */ |
114 | pthread_mutex_lock (&db->memlock); |
115 | |
116 | /* We need an array representing the data area. All memory |
117 | allocation is BLOCK_ALIGN aligned so this is the level at which |
118 | we have to look at the memory. We use a mark and sweep algorithm |
119 | where the marks are placed in this array. */ |
120 | assert (db->head->first_free % BLOCK_ALIGN == 0); |
121 | |
122 | BITMAP_T *mark; |
123 | bool mark_use_malloc; |
124 | /* In prune_cache we are also using a dynamically allocated array. |
125 | If the array in the caller is too large we have malloc'ed it. */ |
126 | size_t stack_used = sizeof (bool) * db->head->module; |
127 | if (__glibc_unlikely (stack_used > MAX_STACK_USE)) |
128 | stack_used = 0; |
129 | size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS; |
130 | size_t memory_needed = nmark * sizeof (BITMAP_T); |
131 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
132 | { |
133 | mark = (BITMAP_T *) alloca_account (memory_needed, stack_used); |
134 | mark_use_malloc = false; |
135 | memset (mark, '\0', memory_needed); |
136 | } |
137 | else |
138 | { |
139 | mark = (BITMAP_T *) xcalloc (1, memory_needed); |
140 | mark_use_malloc = true; |
141 | } |
142 | |
143 | /* Create an array which can hold pointer to all the entries in hash |
144 | entries. */ |
145 | memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *); |
146 | struct hashentry **he; |
147 | struct hashentry **he_data; |
148 | bool he_use_malloc; |
149 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
150 | { |
151 | he = alloca_account (memory_needed, stack_used); |
152 | he_use_malloc = false; |
153 | } |
154 | else |
155 | { |
156 | he = xmalloc (memory_needed); |
157 | he_use_malloc = true; |
158 | } |
159 | he_data = &he[db->head->nentries]; |
160 | |
161 | size_t cnt = 0; |
162 | for (size_t idx = 0; idx < db->head->module; ++idx) |
163 | { |
164 | ref_t *prevp = &db->head->array[idx]; |
165 | ref_t run = *prevp; |
166 | |
167 | while (run != ENDREF) |
168 | { |
169 | assert (cnt < db->head->nentries); |
170 | he[cnt] = (struct hashentry *) (db->data + run); |
171 | |
172 | he[cnt]->prevp = prevp; |
173 | prevp = &he[cnt]->next; |
174 | |
175 | /* This is the hash entry itself. */ |
176 | markrange (mark, run, sizeof (struct hashentry)); |
177 | |
178 | /* Add the information for the data itself. We do this |
179 | only for the one special entry marked with FIRST. */ |
180 | if (he[cnt]->first) |
181 | { |
182 | struct datahead *dh |
183 | = (struct datahead *) (db->data + he[cnt]->packet); |
184 | markrange (mark, he[cnt]->packet, dh->allocsize); |
185 | } |
186 | |
187 | run = he[cnt]->next; |
188 | |
189 | ++cnt; |
190 | } |
191 | } |
192 | assert (cnt == db->head->nentries); |
193 | |
194 | /* Sort the entries by the addresses of the referenced data. All |
195 | the entries pointing to the same DATAHEAD object will have the |
196 | same key. Stability of the sorting is unimportant. */ |
197 | memcpy (he_data, he, cnt * sizeof (struct hashentry *)); |
198 | qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data); |
199 | |
200 | /* Sort the entries by their address. */ |
201 | qsort (he, cnt, sizeof (struct hashentry *), sort_he); |
202 | |
203 | #define obstack_chunk_alloc xmalloc |
204 | #define obstack_chunk_free free |
205 | struct obstack ob; |
206 | obstack_init (&ob); |
207 | |
208 | /* Determine the highest used address. */ |
209 | size_t high = nmark; |
210 | while (high > 0 && mark[high - 1] == 0) |
211 | --high; |
212 | |
213 | /* No memory used. */ |
214 | if (high == 0) |
215 | { |
216 | db->head->first_free = 0; |
217 | goto out; |
218 | } |
219 | |
220 | /* Determine the highest offset. */ |
221 | BITMAP_T mask = HIGHBIT; |
222 | ref_t highref = (high * BITS - 1) * BLOCK_ALIGN; |
223 | while ((mark[high - 1] & mask) == 0) |
224 | { |
225 | mask >>= 1; |
226 | highref -= BLOCK_ALIGN; |
227 | } |
228 | |
229 | /* Now we can iterate over the MARK array and find bits which are not |
230 | set. These represent memory which can be recovered. */ |
231 | size_t byte = 0; |
232 | /* Find the first gap. */ |
233 | while (byte < high && mark[byte] == ALLBITS) |
234 | ++byte; |
235 | |
236 | if (byte == high |
237 | || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0)) |
238 | /* No gap. */ |
239 | goto out; |
240 | |
241 | mask = 1; |
242 | cnt = 0; |
243 | while ((mark[byte] & mask) != 0) |
244 | { |
245 | ++cnt; |
246 | mask <<= 1; |
247 | } |
248 | ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN; |
249 | assert (off_free <= db->head->first_free); |
250 | |
251 | struct hashentry **next_hash = he; |
252 | struct hashentry **next_data = he_data; |
253 | |
254 | /* Skip over the hash entries in the first block which does not get |
255 | moved. */ |
256 | while (next_hash < &he[db->head->nentries] |
257 | && *next_hash < (struct hashentry *) (db->data + off_free)) |
258 | ++next_hash; |
259 | |
260 | while (next_data < &he_data[db->head->nentries] |
261 | && (*next_data)->packet < off_free) |
262 | ++next_data; |
263 | |
264 | |
265 | /* Now we start modifying the data. Make sure all readers of the |
266 | data are aware of this and temporarily don't use the data. */ |
267 | ++db->head->gc_cycle; |
268 | assert ((db->head->gc_cycle & 1) == 1); |
269 | |
270 | |
271 | /* We do not perform the move operations right away since the |
272 | he_data array is not sorted by the address of the data. */ |
273 | struct moveinfo |
274 | { |
275 | void *from; |
276 | void *to; |
277 | size_t size; |
278 | struct moveinfo *next; |
279 | } *moves = NULL; |
280 | |
281 | while (byte < high) |
282 | { |
283 | /* Search for the next filled block. BYTE is the index of the |
284 | entry in MARK, MASK is the bit, and CNT is the bit number. |
285 | OFF_FILLED is the corresponding offset. */ |
286 | if ((mark[byte] & ~(mask - 1)) == 0) |
287 | { |
288 | /* No other bit set in the same element of MARK. Search in the |
289 | following memory. */ |
290 | do |
291 | ++byte; |
292 | while (byte < high && mark[byte] == 0); |
293 | |
294 | if (byte == high) |
295 | /* That was it. */ |
296 | break; |
297 | |
298 | mask = 1; |
299 | cnt = 0; |
300 | } |
301 | /* Find the exact bit. */ |
302 | while ((mark[byte] & mask) == 0) |
303 | { |
304 | ++cnt; |
305 | mask <<= 1; |
306 | } |
307 | |
308 | ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN; |
309 | assert (off_alloc <= db->head->first_free); |
310 | |
311 | /* Find the end of the used area. */ |
312 | if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1)) |
313 | { |
314 | /* All other bits set. Search the next bytes in MARK. */ |
315 | do |
316 | ++byte; |
317 | while (byte < high && mark[byte] == ALLBITS); |
318 | |
319 | mask = 1; |
320 | cnt = 0; |
321 | } |
322 | if (byte < high) |
323 | { |
324 | /* Find the exact bit. */ |
325 | while ((mark[byte] & mask) != 0) |
326 | { |
327 | ++cnt; |
328 | mask <<= 1; |
329 | } |
330 | } |
331 | |
332 | ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN; |
333 | assert (off_allocend <= db->head->first_free); |
334 | /* Now we know that we can copy the area from OFF_ALLOC to |
335 | OFF_ALLOCEND (not included) to the memory starting at |
336 | OFF_FREE. First fix up all the entries for the |
337 | displacement. */ |
338 | ref_t disp = off_alloc - off_free; |
339 | |
340 | struct moveinfo *new_move; |
341 | if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE, |
342 | 1)) |
343 | new_move = alloca_account (sizeof (*new_move), stack_used); |
344 | else |
345 | new_move = obstack_alloc (&ob, sizeof (*new_move)); |
346 | new_move->from = db->data + off_alloc; |
347 | new_move->to = db->data + off_free; |
348 | new_move->size = off_allocend - off_alloc; |
349 | /* Create a circular list to be always able to append at the end. */ |
350 | if (moves == NULL) |
351 | moves = new_move->next = new_move; |
352 | else |
353 | { |
354 | new_move->next = moves->next; |
355 | moves = moves->next = new_move; |
356 | } |
357 | |
358 | /* The following loop will prepare to move this much data. */ |
359 | off_free += off_allocend - off_alloc; |
360 | |
361 | while (off_alloc < off_allocend) |
362 | { |
363 | /* Determine whether the next entry is for a hash entry or |
364 | the data. */ |
365 | if ((struct hashentry *) (db->data + off_alloc) == *next_hash) |
366 | { |
367 | /* Just correct the forward reference. */ |
368 | *(*next_hash++)->prevp -= disp; |
369 | |
370 | off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1) |
371 | & ~BLOCK_ALIGN_M1); |
372 | } |
373 | else |
374 | { |
375 | assert (next_data < &he_data[db->head->nentries]); |
376 | assert ((*next_data)->packet == off_alloc); |
377 | |
378 | struct datahead *dh = (struct datahead *) (db->data + off_alloc); |
379 | do |
380 | { |
381 | assert ((*next_data)->key >= (*next_data)->packet); |
382 | assert ((*next_data)->key + (*next_data)->len |
383 | <= (*next_data)->packet + dh->allocsize); |
384 | |
385 | (*next_data)->packet -= disp; |
386 | (*next_data)->key -= disp; |
387 | ++next_data; |
388 | } |
389 | while (next_data < &he_data[db->head->nentries] |
390 | && (*next_data)->packet == off_alloc); |
391 | |
392 | off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1; |
393 | } |
394 | } |
395 | assert (off_alloc == off_allocend); |
396 | |
397 | assert (off_alloc <= db->head->first_free); |
398 | if (off_alloc == db->head->first_free) |
399 | /* We are done, that was the last block. */ |
400 | break; |
401 | } |
402 | assert (next_hash == &he[db->head->nentries]); |
403 | assert (next_data == &he_data[db->head->nentries]); |
404 | |
405 | /* Now perform the actual moves. */ |
406 | if (moves != NULL) |
407 | { |
408 | struct moveinfo *runp = moves->next; |
409 | do |
410 | { |
411 | assert ((char *) runp->to >= db->data); |
412 | assert ((char *) runp->to + runp->size |
413 | <= db->data + db->head->first_free); |
414 | assert ((char *) runp->from >= db->data); |
415 | assert ((char *) runp->from + runp->size |
416 | <= db->data + db->head->first_free); |
417 | |
418 | /* The regions may overlap. */ |
419 | memmove (runp->to, runp->from, runp->size); |
420 | runp = runp->next; |
421 | } |
422 | while (runp != moves->next); |
423 | |
424 | if (__glibc_unlikely (debug_level >= 3)) |
425 | dbg_log (_("freed %zu bytes in %s cache" ), |
426 | (size_t) (db->head->first_free |
427 | - ((char *) moves->to + moves->size - db->data)), |
428 | dbnames[db - dbs]); |
429 | |
430 | /* The byte past the end of the last copied block is the next |
431 | available byte. */ |
432 | db->head->first_free = (char *) moves->to + moves->size - db->data; |
433 | |
434 | /* Consistency check. */ |
435 | if (__glibc_unlikely (debug_level >= 3)) |
436 | { |
437 | for (size_t idx = 0; idx < db->head->module; ++idx) |
438 | { |
439 | ref_t run = db->head->array[idx]; |
440 | size_t cnt = 0; |
441 | |
442 | while (run != ENDREF) |
443 | { |
444 | if (run + sizeof (struct hashentry) > db->head->first_free) |
445 | { |
446 | dbg_log ("entry %zu in hash bucket %zu out of bounds: " |
447 | "%" PRIu32 "+%zu > %zu\n" , |
448 | cnt, idx, run, sizeof (struct hashentry), |
449 | (size_t) db->head->first_free); |
450 | break; |
451 | } |
452 | |
453 | struct hashentry *he = (struct hashentry *) (db->data + run); |
454 | |
455 | if (he->key + he->len > db->head->first_free) |
456 | dbg_log ("key of entry %zu in hash bucket %zu out of " |
457 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
458 | cnt, idx, he->key, (size_t) he->len, |
459 | (size_t) db->head->first_free); |
460 | |
461 | if (he->packet + sizeof (struct datahead) |
462 | > db->head->first_free) |
463 | dbg_log ("packet of entry %zu in hash bucket %zu out of " |
464 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
465 | cnt, idx, he->packet, sizeof (struct datahead), |
466 | (size_t) db->head->first_free); |
467 | else |
468 | { |
469 | struct datahead *dh = (struct datahead *) (db->data |
470 | + he->packet); |
471 | if (he->packet + dh->allocsize |
472 | > db->head->first_free) |
473 | dbg_log ("full key of entry %zu in hash bucket %zu " |
474 | "out of bounds: %" PRIu32 "+%zu > %zu" , |
475 | cnt, idx, he->packet, (size_t) dh->allocsize, |
476 | (size_t) db->head->first_free); |
477 | } |
478 | |
479 | run = he->next; |
480 | ++cnt; |
481 | } |
482 | } |
483 | } |
484 | } |
485 | |
486 | /* Make sure the data on disk is updated. */ |
487 | if (db->persistent) |
488 | msync (db->head, db->data + db->head->first_free - (char *) db->head, |
489 | MS_ASYNC); |
490 | |
491 | |
492 | /* Now we are done modifying the data. */ |
493 | ++db->head->gc_cycle; |
494 | assert ((db->head->gc_cycle & 1) == 0); |
495 | |
496 | /* We are done. */ |
497 | out: |
498 | pthread_mutex_unlock (&db->memlock); |
499 | pthread_rwlock_unlock (&db->lock); |
500 | |
501 | if (he_use_malloc) |
502 | free (he); |
503 | if (mark_use_malloc) |
504 | free (mark); |
505 | |
506 | obstack_free (&ob, NULL); |
507 | } |
508 | |
509 | |
510 | void * |
511 | mempool_alloc (struct database_dyn *db, size_t len, int data_alloc) |
512 | { |
513 | /* Make sure LEN is a multiple of our maximum alignment so we can |
514 | keep track of used memory is multiples of this alignment value. */ |
515 | if ((len & BLOCK_ALIGN_M1) != 0) |
516 | len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1); |
517 | |
518 | if (data_alloc) |
519 | pthread_rwlock_rdlock (&db->lock); |
520 | |
521 | pthread_mutex_lock (&db->memlock); |
522 | |
523 | assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0); |
524 | |
525 | bool tried_resize = false; |
526 | void *res; |
527 | retry: |
528 | res = db->data + db->head->first_free; |
529 | |
530 | if (__glibc_unlikely (db->head->first_free + len > db->head->data_size)) |
531 | { |
532 | if (! tried_resize) |
533 | { |
534 | /* Try to resize the database. Grow size of 1/8th. */ |
535 | size_t oldtotal = (sizeof (struct database_pers_head) |
536 | + roundup (db->head->module * sizeof (ref_t), |
537 | ALIGN) |
538 | + db->head->data_size); |
539 | size_t new_data_size = (db->head->data_size |
540 | + MAX (2 * len, db->head->data_size / 8)); |
541 | size_t newtotal = (sizeof (struct database_pers_head) |
542 | + roundup (db->head->module * sizeof (ref_t), ALIGN) |
543 | + new_data_size); |
544 | if (newtotal > db->max_db_size) |
545 | { |
546 | new_data_size -= newtotal - db->max_db_size; |
547 | newtotal = db->max_db_size; |
548 | } |
549 | |
550 | if (db->mmap_used && newtotal > oldtotal |
551 | /* We only have to adjust the file size. The new pages |
552 | become magically available. */ |
553 | && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal, |
554 | newtotal |
555 | - oldtotal)) == 0) |
556 | { |
557 | db->head->data_size = new_data_size; |
558 | tried_resize = true; |
559 | goto retry; |
560 | } |
561 | } |
562 | |
563 | if (data_alloc) |
564 | pthread_rwlock_unlock (&db->lock); |
565 | |
566 | if (! db->last_alloc_failed) |
567 | { |
568 | dbg_log (_("no more memory for database '%s'" ), dbnames[db - dbs]); |
569 | |
570 | db->last_alloc_failed = true; |
571 | } |
572 | |
573 | ++db->head->addfailed; |
574 | |
575 | /* No luck. */ |
576 | res = NULL; |
577 | } |
578 | else |
579 | { |
580 | db->head->first_free += len; |
581 | |
582 | db->last_alloc_failed = false; |
583 | |
584 | } |
585 | |
586 | pthread_mutex_unlock (&db->memlock); |
587 | |
588 | return res; |
589 | } |
590 | |