1 | /* Code to load locale data from the locale archive file. |
2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <locale.h> |
20 | #include <stddef.h> |
21 | #include <stdlib.h> |
22 | #include <stdbool.h> |
23 | #include <errno.h> |
24 | #include <assert.h> |
25 | #include <string.h> |
26 | #include <fcntl.h> |
27 | #include <unistd.h> |
28 | #include <stdint.h> |
29 | #include <sys/mman.h> |
30 | #include <sys/stat.h> |
31 | #include <sys/param.h> |
32 | |
33 | #include "localeinfo.h" |
34 | #include "locarchive.h" |
35 | #include <not-cancel.h> |
36 | |
37 | /* Define the hash function. We define the function as static inline. */ |
38 | #define compute_hashval static inline compute_hashval |
39 | #define hashval_t uint32_t |
40 | #include "hashval.h" |
41 | #undef compute_hashval |
42 | |
43 | |
44 | /* Name of the locale archive file. */ |
45 | static const char archfname[] = COMPLOCALEDIR "/locale-archive" ; |
46 | |
47 | /* Size of initial mapping window, optimal if large enough to |
48 | cover the header plus the initial locale. */ |
49 | #define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024) |
50 | |
51 | #ifndef MAP_COPY |
52 | /* This is not quite as good as MAP_COPY since unexamined pages |
53 | can change out from under us and give us inconsistent data. |
54 | But we rely on the user not to diddle the system's live archive. |
55 | Even though we only ever use PROT_READ, using MAP_SHARED would |
56 | not give the system sufficient freedom to e.g. let the on disk |
57 | file go away because it doesn't know we won't call mprotect later. */ |
58 | # define MAP_COPY MAP_PRIVATE |
59 | #endif |
60 | #ifndef MAP_FILE |
61 | /* Some systems do not have this flag; it is superfluous. */ |
62 | # define MAP_FILE 0 |
63 | #endif |
64 | |
65 | /* Record of contiguous pages already mapped from the locale archive. */ |
66 | struct archmapped |
67 | { |
68 | void *ptr; |
69 | uint32_t from; |
70 | uint32_t len; |
71 | struct archmapped *next; |
72 | }; |
73 | static struct archmapped *archmapped; |
74 | |
75 | /* This describes the mapping at the beginning of the file that contains |
76 | the header data. There could be data in the following partial page, |
77 | so this is searched like any other. Once the archive has been used, |
78 | ARCHMAPPED points to this; if mapping the archive header failed, |
79 | then headmap.ptr is null. */ |
80 | static struct archmapped headmap; |
81 | static struct __stat64_t64 archive_stat; /* stat of archive when header mapped. */ |
82 | |
83 | /* Record of locales that we have already loaded from the archive. */ |
84 | struct locale_in_archive |
85 | { |
86 | struct locale_in_archive *next; |
87 | char *name; |
88 | struct __locale_data *data[__LC_LAST]; |
89 | }; |
90 | static struct locale_in_archive *archloaded; |
91 | |
92 | |
93 | /* Local structure and subroutine of _nl_load_archive, see below. */ |
94 | struct range |
95 | { |
96 | uint32_t from; |
97 | uint32_t len; |
98 | int category; |
99 | void *result; |
100 | }; |
101 | |
102 | static int |
103 | rangecmp (const void *p1, const void *p2) |
104 | { |
105 | return ((struct range *) p1)->from - ((struct range *) p2)->from; |
106 | } |
107 | |
108 | |
109 | /* Calculate the amount of space needed for all the tables described |
110 | by the given header. Note we do not include the empty table space |
111 | that has been preallocated in the file, so our mapping may not be |
112 | large enough if localedef adds data to the file in place. However, |
113 | doing that would permute the header fields while we are accessing |
114 | them and thus not be safe anyway, so we don't allow for that. */ |
115 | static inline off_t |
116 | calculate_head_size (const struct locarhead *h) |
117 | { |
118 | off_t namehash_end = (h->namehash_offset |
119 | + h->namehash_size * sizeof (struct namehashent)); |
120 | off_t string_end = h->string_offset + h->string_used; |
121 | off_t locrectab_end = (h->locrectab_offset |
122 | + h->locrectab_used * sizeof (struct locrecent)); |
123 | return MAX (namehash_end, MAX (string_end, locrectab_end)); |
124 | } |
125 | |
126 | |
127 | /* Find the locale *NAMEP in the locale archive, and return the |
128 | internalized data structure for its CATEGORY data. If this locale has |
129 | already been loaded from the archive, just returns the existing data |
130 | structure. If successful, sets *NAMEP to point directly into the mapped |
131 | archive string table; that way, the next call can short-circuit strcmp. */ |
132 | struct __locale_data * |
133 | _nl_load_locale_from_archive (int category, const char **namep) |
134 | { |
135 | const char *name = *namep; |
136 | struct |
137 | { |
138 | void *addr; |
139 | size_t len; |
140 | } results[__LC_LAST]; |
141 | struct locale_in_archive *lia; |
142 | struct locarhead *head; |
143 | struct namehashent *namehashtab; |
144 | struct locrecent *locrec; |
145 | struct archmapped *mapped; |
146 | struct archmapped *last; |
147 | unsigned long int hval; |
148 | size_t idx; |
149 | size_t incr; |
150 | struct range ranges[__LC_LAST - 1]; |
151 | int nranges; |
152 | int cnt; |
153 | size_t ps = __sysconf (_SC_PAGE_SIZE); |
154 | int fd = -1; |
155 | |
156 | /* Check if we have already loaded this locale from the archive. |
157 | If we previously loaded the locale but found bogons in the data, |
158 | then we will have stored a null pointer to return here. */ |
159 | for (lia = archloaded; lia != NULL; lia = lia->next) |
160 | if (name == lia->name || !strcmp (name, lia->name)) |
161 | { |
162 | *namep = lia->name; |
163 | return lia->data[category]; |
164 | } |
165 | |
166 | { |
167 | /* If the name contains a codeset, then we normalize the name before |
168 | doing the lookup. */ |
169 | const char *p = strchr (name, '.'); |
170 | if (p != NULL && p[1] != '@' && p[1] != '\0') |
171 | { |
172 | const char *rest = __strchrnul (++p, '@'); |
173 | const char *normalized_codeset = _nl_normalize_codeset (p, rest - p); |
174 | if (normalized_codeset == NULL) /* malloc failure */ |
175 | return NULL; |
176 | if (strncmp (normalized_codeset, p, rest - p) != 0 |
177 | || normalized_codeset[rest - p] != '\0') |
178 | { |
179 | /* There is a normalized codeset name that is different from |
180 | what was specified; reconstruct a new locale name using it. */ |
181 | size_t normlen = strlen (normalized_codeset); |
182 | size_t restlen = strlen (rest) + 1; |
183 | char *newname = alloca (p - name + normlen + restlen); |
184 | memcpy (__mempcpy (__mempcpy (newname, name, p - name), |
185 | normalized_codeset, normlen), |
186 | rest, restlen); |
187 | name = newname; |
188 | } |
189 | free ((char *) normalized_codeset); |
190 | } |
191 | } |
192 | |
193 | /* Make sure the archive is loaded. */ |
194 | if (archmapped == NULL) |
195 | { |
196 | void *result; |
197 | size_t headsize, mapsize; |
198 | |
199 | /* We do this early as a sign that we have tried to open the archive. |
200 | If headmap.ptr remains null, that's an indication that we tried |
201 | and failed, so we won't try again. */ |
202 | archmapped = &headmap; |
203 | |
204 | /* The archive has never been opened. */ |
205 | fd = __open_nocancel (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC); |
206 | if (fd < 0) |
207 | /* Cannot open the archive, for whatever reason. */ |
208 | return NULL; |
209 | |
210 | if (__fstat64_time64 (fd, &archive_stat) == -1) |
211 | { |
212 | /* stat failed, very strange. */ |
213 | close_and_out: |
214 | if (fd >= 0) |
215 | __close_nocancel_nostatus (fd); |
216 | return NULL; |
217 | } |
218 | |
219 | |
220 | /* Map an initial window probably large enough to cover the header |
221 | and the first locale's data. With a large address space, we can |
222 | just map the whole file and be sure everything is covered. */ |
223 | |
224 | mapsize = (sizeof (void *) > 4 ? archive_stat.st_size |
225 | : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW)); |
226 | |
227 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0); |
228 | if (result == MAP_FAILED) |
229 | goto close_and_out; |
230 | |
231 | /* Check whether the file is large enough for the sizes given in |
232 | the header. Theoretically an archive could be so large that |
233 | just the header fails to fit in our initial mapping window. */ |
234 | headsize = calculate_head_size ((const struct locarhead *) result); |
235 | if (headsize > mapsize) |
236 | { |
237 | (void) __munmap (result, mapsize); |
238 | if (sizeof (void *) > 4 || headsize > archive_stat.st_size) |
239 | /* The file is not big enough for the header. Bogus. */ |
240 | goto close_and_out; |
241 | |
242 | /* Freakishly long header. */ |
243 | /* XXX could use mremap when available */ |
244 | mapsize = (headsize + ps - 1) & ~(ps - 1); |
245 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, |
246 | fd, 0); |
247 | if (result == MAP_FAILED) |
248 | goto close_and_out; |
249 | } |
250 | |
251 | if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size) |
252 | { |
253 | /* We've mapped the whole file already, so we can be |
254 | sure we won't need this file descriptor later. */ |
255 | __close_nocancel_nostatus (fd); |
256 | fd = -1; |
257 | } |
258 | |
259 | headmap.ptr = result; |
260 | /* headmap.from already initialized to zero. */ |
261 | headmap.len = mapsize; |
262 | } |
263 | |
264 | /* If there is no archive or it cannot be loaded for some reason fail. */ |
265 | if (__glibc_unlikely (headmap.ptr == NULL)) |
266 | goto close_and_out; |
267 | |
268 | /* We have the archive available. To find the name we first have to |
269 | determine its hash value. */ |
270 | hval = compute_hashval (name, strlen (name)); |
271 | |
272 | head = headmap.ptr; |
273 | namehashtab = (struct namehashent *) ((char *) head |
274 | + head->namehash_offset); |
275 | |
276 | /* Avoid division by 0 if the file is corrupted. */ |
277 | if (__glibc_unlikely (head->namehash_size <= 2)) |
278 | goto close_and_out; |
279 | |
280 | idx = hval % head->namehash_size; |
281 | incr = 1 + hval % (head->namehash_size - 2); |
282 | |
283 | /* If the name_offset field is zero this means this is a |
284 | deleted entry and therefore no entry can be found. */ |
285 | while (1) |
286 | { |
287 | if (namehashtab[idx].name_offset == 0) |
288 | /* Not found. */ |
289 | goto close_and_out; |
290 | |
291 | if (namehashtab[idx].hashval == hval |
292 | && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0) |
293 | /* Found the entry. */ |
294 | break; |
295 | |
296 | idx += incr; |
297 | if (idx >= head->namehash_size) |
298 | idx -= head->namehash_size; |
299 | } |
300 | |
301 | /* We found an entry. It might be a placeholder for a removed one. */ |
302 | if (namehashtab[idx].locrec_offset == 0) |
303 | goto close_and_out; |
304 | |
305 | locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset); |
306 | |
307 | if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */) |
308 | { |
309 | /* We already have the whole locale archive mapped in. */ |
310 | assert (headmap.len == archive_stat.st_size); |
311 | for (cnt = 0; cnt < __LC_LAST; ++cnt) |
312 | if (cnt != LC_ALL) |
313 | { |
314 | if (locrec->record[cnt].offset + locrec->record[cnt].len |
315 | > headmap.len) |
316 | /* The archive locrectab contains bogus offsets. */ |
317 | goto close_and_out; |
318 | results[cnt].addr = headmap.ptr + locrec->record[cnt].offset; |
319 | results[cnt].len = locrec->record[cnt].len; |
320 | } |
321 | } |
322 | else |
323 | { |
324 | /* Get the offsets of the data files and sort them. */ |
325 | for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt) |
326 | if (cnt != LC_ALL) |
327 | { |
328 | ranges[nranges].from = locrec->record[cnt].offset; |
329 | ranges[nranges].len = locrec->record[cnt].len; |
330 | ranges[nranges].category = cnt; |
331 | ranges[nranges].result = NULL; |
332 | |
333 | ++nranges; |
334 | } |
335 | |
336 | qsort (ranges, nranges, sizeof (ranges[0]), rangecmp); |
337 | |
338 | /* The information about mmap'd blocks is kept in a list. |
339 | Skip over the blocks which are before the data we need. */ |
340 | last = mapped = archmapped; |
341 | for (cnt = 0; cnt < nranges; ++cnt) |
342 | { |
343 | int upper; |
344 | size_t from; |
345 | size_t to; |
346 | void *addr; |
347 | struct archmapped *newp; |
348 | |
349 | /* Determine whether the appropriate page is already mapped. */ |
350 | while (mapped != NULL |
351 | && (mapped->from + mapped->len |
352 | <= ranges[cnt].from + ranges[cnt].len)) |
353 | { |
354 | last = mapped; |
355 | mapped = mapped->next; |
356 | } |
357 | |
358 | /* Do we have a match? */ |
359 | if (mapped != NULL |
360 | && mapped->from <= ranges[cnt].from |
361 | && (ranges[cnt].from + ranges[cnt].len |
362 | <= mapped->from + mapped->len)) |
363 | { |
364 | /* Yep, already loaded. */ |
365 | results[ranges[cnt].category].addr = ((char *) mapped->ptr |
366 | + ranges[cnt].from |
367 | - mapped->from); |
368 | results[ranges[cnt].category].len = ranges[cnt].len; |
369 | continue; |
370 | } |
371 | |
372 | /* Map the range with the locale data from the file. We will |
373 | try to cover as much of the locale as possible. I.e., if the |
374 | next category (next as in "next offset") is on the current or |
375 | immediately following page we use it as well. */ |
376 | assert (powerof2 (ps)); |
377 | from = ranges[cnt].from & ~(ps - 1); |
378 | upper = cnt; |
379 | do |
380 | { |
381 | to = ranges[upper].from + ranges[upper].len; |
382 | if (to > (size_t) archive_stat.st_size) |
383 | /* The archive locrectab contains bogus offsets. */ |
384 | goto close_and_out; |
385 | to = (to + ps - 1) & ~(ps - 1); |
386 | |
387 | /* If a range is already mmaped in, stop. */ |
388 | if (mapped != NULL && ranges[upper].from >= mapped->from) |
389 | break; |
390 | |
391 | ++upper; |
392 | } |
393 | /* Loop while still in contiguous pages. */ |
394 | while (upper < nranges && ranges[upper].from < to + ps); |
395 | |
396 | /* Open the file if it hasn't happened yet. */ |
397 | if (fd == -1) |
398 | { |
399 | struct __stat64_t64 st; |
400 | fd = __open_nocancel (archfname, |
401 | O_RDONLY|O_LARGEFILE|O_CLOEXEC); |
402 | if (fd == -1) |
403 | /* Cannot open the archive, for whatever reason. */ |
404 | return NULL; |
405 | /* Now verify we think this is really the same archive file |
406 | we opened before. If it has been changed we cannot trust |
407 | the header we read previously. */ |
408 | if (__fstat64_time64 (fd, &st) < 0 |
409 | || st.st_size != archive_stat.st_size |
410 | || st.st_mtime != archive_stat.st_mtime |
411 | || st.st_dev != archive_stat.st_dev |
412 | || st.st_ino != archive_stat.st_ino) |
413 | goto close_and_out; |
414 | } |
415 | |
416 | /* Map the range from the archive. */ |
417 | addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY, |
418 | fd, from); |
419 | if (addr == MAP_FAILED) |
420 | goto close_and_out; |
421 | |
422 | /* Allocate a record for this mapping. */ |
423 | newp = (struct archmapped *) malloc (sizeof (struct archmapped)); |
424 | if (newp == NULL) |
425 | { |
426 | (void) __munmap (addr, to - from); |
427 | goto close_and_out; |
428 | } |
429 | |
430 | /* And queue it. */ |
431 | newp->ptr = addr; |
432 | newp->from = from; |
433 | newp->len = to - from; |
434 | assert (last->next == mapped); |
435 | newp->next = mapped; |
436 | last->next = newp; |
437 | last = newp; |
438 | |
439 | /* Determine the load addresses for the category data. */ |
440 | do |
441 | { |
442 | assert (ranges[cnt].from >= from); |
443 | results[ranges[cnt].category].addr = ((char *) addr |
444 | + ranges[cnt].from - from); |
445 | results[ranges[cnt].category].len = ranges[cnt].len; |
446 | } |
447 | while (++cnt < upper); |
448 | --cnt; /* The 'for' will increase 'cnt' again. */ |
449 | } |
450 | } |
451 | |
452 | /* We don't need the file descriptor any longer. */ |
453 | if (fd >= 0) |
454 | __close_nocancel_nostatus (fd); |
455 | fd = -1; |
456 | |
457 | /* We succeeded in mapping all the necessary regions of the archive. |
458 | Now we need the expected data structures to point into the data. */ |
459 | |
460 | lia = malloc (sizeof *lia); |
461 | if (__glibc_unlikely (lia == NULL)) |
462 | return NULL; |
463 | |
464 | lia->name = __strdup (*namep); |
465 | if (__glibc_unlikely (lia->name == NULL)) |
466 | { |
467 | free (lia); |
468 | return NULL; |
469 | } |
470 | |
471 | lia->next = archloaded; |
472 | archloaded = lia; |
473 | |
474 | for (cnt = 0; cnt < __LC_LAST; ++cnt) |
475 | if (cnt != LC_ALL) |
476 | { |
477 | lia->data[cnt] = _nl_intern_locale_data (cnt, |
478 | results[cnt].addr, |
479 | results[cnt].len); |
480 | if (__glibc_likely (lia->data[cnt] != NULL)) |
481 | { |
482 | /* _nl_intern_locale_data leaves us these fields to initialize. */ |
483 | lia->data[cnt]->alloc = ld_archive; |
484 | lia->data[cnt]->name = lia->name; |
485 | |
486 | /* We do this instead of bumping the count each time we return |
487 | this data because the mappings stay around forever anyway |
488 | and we might as well hold on to a little more memory and not |
489 | have to rebuild it on the next lookup of the same thing. |
490 | If we were to maintain the usage_count normally and let the |
491 | structures be freed, we would have to remove the elements |
492 | from archloaded too. */ |
493 | lia->data[cnt]->usage_count = UNDELETABLE; |
494 | } |
495 | } |
496 | |
497 | *namep = lia->name; |
498 | return lia->data[category]; |
499 | } |
500 | |
501 | void __libc_freeres_fn_section |
502 | _nl_archive_subfreeres (void) |
503 | { |
504 | struct locale_in_archive *lia; |
505 | struct archmapped *am; |
506 | |
507 | /* Toss out our cached locales. */ |
508 | lia = archloaded; |
509 | while (lia != NULL) |
510 | { |
511 | int category; |
512 | struct locale_in_archive *dead = lia; |
513 | lia = lia->next; |
514 | |
515 | free (dead->name); |
516 | for (category = 0; category < __LC_LAST; ++category) |
517 | if (category != LC_ALL && dead->data[category] != NULL) |
518 | _nl_unload_locale (category, dead->data[category]); |
519 | free (dead); |
520 | } |
521 | archloaded = NULL; |
522 | |
523 | if (archmapped != NULL) |
524 | { |
525 | /* Now toss all the mapping windows, which we know nothing is using any |
526 | more because we just tossed all the locales that point into them. */ |
527 | |
528 | assert (archmapped == &headmap); |
529 | archmapped = NULL; |
530 | (void) __munmap (headmap.ptr, headmap.len); |
531 | am = headmap.next; |
532 | while (am != NULL) |
533 | { |
534 | struct archmapped *dead = am; |
535 | am = am->next; |
536 | (void) __munmap (dead->ptr, dead->len); |
537 | free (dead); |
538 | } |
539 | } |
540 | } |
541 | |