cache.c source code [glibc/nscd/cache.c]

1	/ Copyright (c) 1998-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
17
18	#include <assert.h>
19	#include <atomic.h>
20	#include <errno.h>
21	#include <error.h>
22	#include <inttypes.h>
23	#include <limits.h>
24	#include <stdlib.h>
25	#include <string.h>
26	#include <libintl.h>
27	#include <arpa/inet.h>
28	#include <sys/mman.h>
29	#include <sys/param.h>
30	#include <sys/stat.h>
31	#include <sys/uio.h>
32	#include <nss.h>
33
34	#include "nscd.h"
35	#include "dbg_log.h"
36
37
38	/ Wrapper functions with error checking for standard functions. /
39	extern void *xcalloc (size_t n, size_t s);
40
41
42	/ Number of times a value is reloaded without being used. UINT_MAX*
43	means unlimited. /*
44	unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
45
46
47	static time_t (*const readdfcts[LASTREQ]) (struct database_dyn *,
48	struct hashentry *,
49	struct datahead *) =
50	{
51	[GETPWBYNAME] = readdpwbyname,
52	[GETPWBYUID] = readdpwbyuid,
53	[GETGRBYNAME] = readdgrbyname,
54	[GETGRBYGID] = readdgrbygid,
55	[GETHOSTBYNAME] = readdhstbyname,
56	[GETHOSTBYNAMEv6] = readdhstbynamev6,
57	[GETHOSTBYADDR] = readdhstbyaddr,
58	[GETHOSTBYADDRv6] = readdhstbyaddrv6,
59	[GETAI] = readdhstai,
60	[INITGROUPS] = readdinitgroups,
61	[GETSERVBYNAME] = readdservbyname,
62	[GETSERVBYPORT] = readdservbyport,
63	[GETNETGRENT] = readdgetnetgrent,
64	[INNETGR] = readdinnetgr
65	};
66
67
68	/ Search the cache for a matching entry and return it when found. If*
69	this fails search the negative cache and return (void ) -1 if this*
70	search was successful. Otherwise return NULL.
71
72	This function must be called with the read-lock held. /*
73	struct datahead *
74	cache_search (request_type type, const void *key, size_t len,
75	struct database_dyn *table, uid_t owner)
76	{
77	unsigned long int hash = __nss_hash (key, len) % table->head->module;
78
79	unsigned long int nsearched = `0`;
80	struct datahead *result = NULL;
81
82	ref_t work = table->head->array[hash];
83	while (work != ENDREF)
84	{
85	++nsearched;
86
87	struct hashentry here = (struct* hashentry *) (table->data + work);
88
89	if (type == here->type && len == here->len
90	&& memcmp (key, table->data + here->key, len) == `0`
91	&& here->owner == owner)
92	{
93	/ We found the entry. Increment the appropriate counter. /
94	struct datahead *dh
95	= (struct datahead *) (table->data + here->packet);
96
97	/ See whether we must ignore the entry. /
98	if (dh->usable)
99	{
100	/ We do not synchronize the memory here. The statistics*
101	data is not crucial, we synchronize only once in a while
102	in the cleanup threads. /*
103	if (dh->notfound)
104	++table->head->neghit;
105	else
106	{
107	++table->head->poshit;
108
109	if (dh->nreloads != `0`)
110	dh->nreloads = `0`;
111	}
112
113	result = dh;
114	break;
115	}
116	}
117
118	work = here->next;
119	}
120
121	if (nsearched > table->head->maxnsearched)
122	table->head->maxnsearched = nsearched;
123
124	return result;
125	}
126
127	/ Add a new entry to the cache. The return value is zero if the function*
128	call was successful.
129
130	This function must be called with the read-lock held.
131
132	We modify the table but we nevertheless only acquire a read-lock.
133	This is ok since we use operations which would be safe even without
134	locking, given that the `prune_cache' function never runs. Using
135	the readlock reduces the chance of conflicts. /*
136	int
137	cache_add (int type, const void key, size_t len, struct* datahead *packet,
138	bool first, struct database_dyn *table,
139	uid_t owner, bool prune_wakeup)
140	{
141	if (__glibc_unlikely (debug_level >= `2`))
142	{
143	const char *str;
144	char buf[INET6_ADDRSTRLEN + `1`];
145	if (type == GETHOSTBYADDR \|\| type == GETHOSTBYADDRv6)
146	str = inet_ntop (type == GETHOSTBYADDR ? AF_INET : AF_INET6,
147	key, buf, sizeof (buf));
148	else
149	str = key;
150
151	dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
152	str, serv2str[type], dbnames[table - dbs],
153	first ? _(" (first)") : "");
154	}
155
156	unsigned long int hash = __nss_hash (key, len) % table->head->module;
157	struct hashentry *newp;
158
159	newp = mempool_alloc (table, sizeof (struct hashentry), `0`);
160	/ If we cannot allocate memory, just do not do anything. /
161	if (newp == NULL)
162	{
163	/ If necessary mark the entry as unusable so that lookups will*
164	not use it. /*
165	if (first)
166	packet->usable = false;
167
168	return -`1`;
169	}
170
171	newp->type = type;
172	newp->first = first;
173	newp->len = len;
174	newp->key = (char *) key - table->data;
175	assert (newp->key + newp->len <= table->head->first_free);
176	newp->owner = owner;
177	newp->packet = (char *) packet - table->data;
178	assert ((newp->packet & BLOCK_ALIGN_M1) == `0`);
179
180	/ Put the new entry in the first position. /
181	/ TODO Review concurrency. Use atomic_exchange_release. /
182	newp->next = atomic_load_relaxed (&table->head->array[hash]);
183	while (!atomic_compare_exchange_weak_release (&table->head->array[hash],
184	(ref_t *) &newp->next,
185	(ref_t) ((char *) newp
186	- table->data)));
187
188	/ Update the statistics. /
189	if (packet->notfound)
190	++table->head->negmiss;
191	else if (first)
192	++table->head->posmiss;
193
194	/ We depend on this value being correct and at least as high as the*
195	real number of entries. /*
196	atomic_increment (&table->head->nentries);
197
198	/ It does not matter that we are not loading the just increment*
199	value, this is just for statistics. /*
200	unsigned long int nentries = table->head->nentries;
201	if (nentries > table->head->maxnentries)
202	table->head->maxnentries = nentries;
203
204	if (table->persistent)
205	// XXX async OK?
206	msync ((void *) table->head,
207	(char ) &table->head->array[hash] - (char* *) table->head
208	+ sizeof (ref_t), MS_ASYNC);
209
210	/ We do not have to worry about the pruning thread if we are*
211	re-adding the data since this is done by the pruning thread. We
212	also do not have to do anything in case this is not the first
213	time the data is entered since different data heads all have the
214	same timeout. /*
215	if (first && prune_wakeup)
216	{
217	/ Perhaps the prune thread for the table is not running in a long*
218	time. Wake it if necessary. /*
219	pthread_mutex_lock (&table->prune_lock);
220	time_t next_wakeup = table->wakeup_time;
221	bool do_wakeup = false;
222	if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL)
223	{
224	table->wakeup_time = packet->timeout;
225	do_wakeup = true;
226	}
227	pthread_mutex_unlock (&table->prune_lock);
228	if (do_wakeup)
229	pthread_cond_signal (&table->prune_cond);
230	}
231
232	return `0`;
233	}
234
235	/ Walk through the table and remove all entries which lifetime ended.*
236
237	We have a problem here. To actually remove the entries we must get
238	the write-lock. But since we want to keep the time we have the
239	lock as short as possible we cannot simply acquire the lock when we
240	start looking for timedout entries.
241
242	Therefore we do it in two stages: first we look for entries which
243	must be invalidated and remember them. Then we get the lock and
244	actually remove them. This is complicated by the way we have to
245	free the data structures since some hash table entries share the same
246	data. /*
247	time_t
248	prune_cache (struct database_dyn table, time_t now, int* fd)
249	{
250	size_t cnt = table->head->module;
251
252	/ If this table is not actually used don't do anything. /
253	if (cnt == `0`)
254	{
255	if (fd != -`1`)
256	{
257	/ Reply to the INVALIDATE initiator. /
258	int32_t resp = `0`;
259	writeall (fd, &resp, sizeof (resp));
260	}
261
262	/ No need to do this again anytime soon. /
263	return `24` * `60` * `60`;
264	}
265
266	/ If we check for the modification of the underlying file we invalidate*
267	the entries also in this case. /*
268	if (table->check_file && now != LONG_MAX)
269	{
270	struct traced_file *runp = table->traced_files;
271
272	while (runp != NULL)
273	{
274	#ifdef HAVE_INOTIFY
275	if (runp->inotify_descr[TRACED_FILE] == -`1`)
276	#endif
277	{
278	struct stat64 st;
279
280	if (stat64 (runp->fname, &st) < `0`)
281	{
282	/ Print a diagnostic that the traced file was missing.*
283	We must not disable tracing since the file might return
284	shortly and we want to reload it at the next pruning.
285	Disabling tracing here would go against the configuration
286	as specified by the user via check-files. /*
287	char buf[`128`];
288	dbg_log (_("checking for monitored file `%s': %s"),
289	runp->fname, strerror_r (errno, buf, sizeof (buf)));
290	}
291	else
292	{
293	/ This must be `!=` to catch cases where users turn the*
294	clocks back and we still want to detect any time difference
295	in mtime. /*
296	if (st.st_mtime != runp->mtime)
297	{
298	dbg_log (_("monitored file `%s` changed (mtime)"),
299	runp->fname);
300	/ The file changed. Invalidate all entries. /
301	now = LONG_MAX;
302	runp->mtime = st.st_mtime;
303	#ifdef HAVE_INOTIFY
304	/ Attempt to install a watch on the file. /
305	install_watches (runp);
306	#endif
307	}
308	}
309	}
310
311	runp = runp->next;
312	}
313	}
314
315	/ We run through the table and find values which are not valid anymore.*
316
317	Note that for the initial step, finding the entries to be removed,
318	we don't need to get any lock. It is at all timed assured that the
319	linked lists are set up correctly and that no second thread prunes
320	the cache. /*
321	bool *mark;
322	size_t memory_needed = cnt * sizeof (bool);
323	bool mark_use_alloca;
324	if (__glibc_likely (memory_needed <= MAX_STACK_USE))
325	{
326	mark = alloca (cnt * sizeof (bool));
327	memset (mark, `'\0'`, memory_needed);
328	mark_use_alloca = true;
329	}
330	else
331	{
332	mark = xcalloc (`1`, memory_needed);
333	mark_use_alloca = false;
334	}
335	size_t first = cnt + `1`;
336	size_t last = `0`;
337	char *const data = table->data;
338	bool any = false;
339
340	if (__glibc_unlikely (debug_level > `2`))
341	dbg_log (_("pruning %s cache; time %ld"),
342	dbnames[table - dbs], (long int) now);
343
344	#define NO_TIMEOUT LONG_MAX
345	time_t next_timeout = NO_TIMEOUT;
346	do
347	{
348	ref_t run = table->head->array[--cnt];
349
350	while (run != ENDREF)
351	{
352	struct hashentry runp = (struct* hashentry *) (data + run);
353	struct datahead dh = (struct* datahead *) (data + runp->packet);
354
355	/ Some debug support. /
356	if (__glibc_unlikely (debug_level > `2`))
357	{
358	char buf[INET6_ADDRSTRLEN];
359	const char *str;
360
361	if (runp->type == GETHOSTBYADDR \|\| runp->type == GETHOSTBYADDRv6)
362	{
363	inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
364	data + runp->key, buf, sizeof (buf));
365	str = buf;
366	}
367	else
368	str = data + runp->key;
369
370	dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
371	serv2str[runp->type], str, dh->timeout);
372	}
373
374	/ Check whether the entry timed out. /
375	if (dh->timeout < now)
376	{
377	/ This hash bucket could contain entries which need to*
378	be looked at. /*
379	mark[cnt] = true;
380
381	first = MIN (first, cnt);
382	last = MAX (last, cnt);
383
384	/ We only have to look at the data of the first entries*
385	since the count information is kept in the data part
386	which is shared. /*
387	if (runp->first)
388	{
389
390	/ At this point there are two choices: we reload the*
391	value or we discard it. Do not change NRELOADS if
392	we never not reload the record. /*
393	if ((reload_count != UINT_MAX
394	&& __builtin_expect (dh->nreloads >= reload_count, `0`))
395	/ We always remove negative entries. /
396	\|\| dh->notfound
397	/ Discard everything if the user explicitly*
398	requests it. /*
399	\|\| now == LONG_MAX)
400	{
401	/ Remove the value. /
402	dh->usable = false;
403
404	/ We definitely have some garbage entries now. /
405	any = true;
406	}
407	else
408	{
409	/ Reload the value. We do this only for the*
410	initially used key, not the additionally
411	added derived value. /*
412	assert (runp->type < LASTREQ
413	&& readdfcts[runp->type] != NULL);
414
415	time_t timeout = readdfcts[runp->type] (table, runp, dh);
416	next_timeout = MIN (next_timeout, timeout);
417
418	/ If the entry has been replaced, we might need*
419	cleanup. /*
420	any \|= !dh->usable;
421	}
422	}
423	}
424	else
425	{
426	assert (dh->usable);
427	next_timeout = MIN (next_timeout, dh->timeout);
428	}
429
430	run = runp->next;
431	}
432	}
433	while (cnt > `0`);
434
435	if (__glibc_unlikely (fd != -`1`))
436	{
437	/ Reply to the INVALIDATE initiator that the cache has been*
438	invalidated. /*
439	int32_t resp = `0`;
440	writeall (fd, &resp, sizeof (resp));
441	}
442
443	if (first <= last)
444	{
445	struct hashentry *head = NULL;
446
447	/ Now we have to get the write lock since we are about to modify*
448	the table. /*
449	if (__glibc_unlikely (pthread_rwlock_trywrlock (&table->lock) != `0`))
450	{
451	++table->head->wrlockdelayed;
452	pthread_rwlock_wrlock (&table->lock);
453	}
454
455	/ Now we start modifying the data. Make sure all readers of the*
456	data are aware of this and temporarily don't use the data. /*
457	atomic_fetch_add_relaxed (&table->head->gc_cycle, `1`);
458	assert ((table->head->gc_cycle & `1`) == `1`);
459
460	while (first <= last)
461	{
462	if (mark[first])
463	{
464	ref_t *old = &table->head->array[first];
465	ref_t run = table->head->array[first];
466
467	assert (run != ENDREF);
468	do
469	{
470	struct hashentry runp = (struct* hashentry *) (data + run);
471	struct datahead *dh
472	= (struct datahead *) (data + runp->packet);
473
474	if (! dh->usable)
475	{
476	/ We need the list only for debugging but it is*
477	more costly to avoid creating the list than
478	doing it. /*
479	runp->dellist = head;
480	head = runp;
481
482	/ No need for an atomic operation, we have the*
483	write lock. /*
484	--table->head->nentries;
485
486	run = *old = runp->next;
487	}
488	else
489	{
490	old = &runp->next;
491	run = runp->next;
492	}
493	}
494	while (run != ENDREF);
495	}
496
497	++first;
498	}
499
500	/ Now we are done modifying the data. /
501	atomic_fetch_add_relaxed (&table->head->gc_cycle, `1`);
502	assert ((table->head->gc_cycle & `1`) == `0`);
503
504	/ It's all done. /
505	pthread_rwlock_unlock (&table->lock);
506
507	/ Make sure the data is saved to disk. /
508	if (table->persistent)
509	msync (table->head,
510	data + table->head->first_free - (char *) table->head,
511	MS_ASYNC);
512
513	/ One extra pass if we do debugging. /
514	if (__glibc_unlikely (debug_level > `0`))
515	{
516	struct hashentry *runp = head;
517
518	while (runp != NULL)
519	{
520	char buf[INET6_ADDRSTRLEN];
521	const char *str;
522
523	if (runp->type == GETHOSTBYADDR \|\| runp->type == GETHOSTBYADDRv6)
524	{
525	inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
526	data + runp->key, buf, sizeof (buf));
527	str = buf;
528	}
529	else
530	str = data + runp->key;
531
532	dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);
533
534	runp = runp->dellist;
535	}
536	}
537	}
538
539	if (__glibc_unlikely (! mark_use_alloca))
540	free (mark);
541
542	/ Run garbage collection if any entry has been removed or replaced. /
543	if (any)
544	gc (table);
545
546	/ If there is no entry in the database and we therefore have no new*
547	timeout value, tell the caller to wake up in 24 hours. /*
548	return next_timeout == NO_TIMEOUT ? `24` * `60` * `60` : next_timeout - now;
549	}
550

Browse the source code of glibc/nscd/cache.c