1 | /* Generate fastloading iconv module configuration files. |
2 | Copyright (C) 2000-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. |
5 | |
6 | This program is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published |
8 | by the Free Software Foundation; version 2 of the License, or |
9 | (at your option) any later version. |
10 | |
11 | This program is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <argp.h> |
20 | #include <assert.h> |
21 | #include <error.h> |
22 | #include <errno.h> |
23 | #include <fcntl.h> |
24 | #include <libintl.h> |
25 | #include <locale.h> |
26 | #include <mcheck.h> |
27 | #include <search.h> |
28 | #include <stdint.h> |
29 | #include <stdbool.h> |
30 | #include <stdio.h> |
31 | #include <stdio_ext.h> |
32 | #include <stdlib.h> |
33 | #include <string.h> |
34 | #include <unistd.h> |
35 | #include <sys/cdefs.h> |
36 | #include <sys/uio.h> |
37 | |
38 | #include "iconvconfig.h" |
39 | #include <gconv_parseconfdir.h> |
40 | |
41 | /* Get libc version number. */ |
42 | #include "../version.h" |
43 | |
44 | #define PACKAGE _libc_intl_domainname |
45 | |
46 | |
47 | /* The hashing function we use. */ |
48 | #include "../intl/hash-string.h" |
49 | |
50 | |
51 | /* Types used. */ |
52 | struct module |
53 | { |
54 | char *fromname; |
55 | struct Strent *fromname_strent; |
56 | char *filename; |
57 | struct Strent *filename_strent; |
58 | const char *directory; |
59 | struct Strent *directory_strent; |
60 | struct module *next; |
61 | int cost; |
62 | struct Strent *toname_strent; |
63 | char toname[0]; |
64 | }; |
65 | |
66 | struct alias |
67 | { |
68 | char *fromname; |
69 | struct Strent *froment; |
70 | struct module *module; |
71 | struct Strent *toent; |
72 | char toname[0]; |
73 | }; |
74 | |
75 | struct name |
76 | { |
77 | const char *name; |
78 | struct Strent *strent; |
79 | int module_idx; |
80 | uint32_t hashval; |
81 | }; |
82 | |
83 | struct name_info |
84 | { |
85 | const char *canonical_name; |
86 | struct Strent *canonical_strent; |
87 | |
88 | struct module *from_internal; |
89 | struct module *to_internal; |
90 | |
91 | struct other_conv_list |
92 | { |
93 | int dest_idx; |
94 | struct other_conv |
95 | { |
96 | gidx_t module_idx; |
97 | struct module *module; |
98 | struct other_conv *next; |
99 | } other_conv; |
100 | struct other_conv_list *next; |
101 | } *other_conv_list; |
102 | }; |
103 | |
104 | |
105 | /* Name and version of program. */ |
106 | static void print_version (FILE *stream, struct argp_state *state); |
107 | void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; |
108 | |
109 | /* Short description of program. */ |
110 | static const char doc[] = N_("\ |
111 | Create fastloading iconv module configuration file." ); |
112 | |
113 | /* Strings for arguments in help texts. */ |
114 | static const char args_doc[] = N_("[DIR...]" ); |
115 | |
116 | /* Prototype for option handler. */ |
117 | static error_t parse_opt (int key, char *arg, struct argp_state *state); |
118 | |
119 | /* Function to print some extra text in the help message. */ |
120 | static char *more_help (int key, const char *text, void *input); |
121 | |
122 | /* Definitions of arguments for argp functions. */ |
123 | #define OPT_PREFIX 300 |
124 | #define OPT_NOSTDLIB 301 |
125 | static const struct argp_option options[] = |
126 | { |
127 | { "prefix" , OPT_PREFIX, N_("PATH" ), 0, |
128 | N_("Prefix used for all file accesses" ) }, |
129 | { "output" , 'o', N_("FILE" ), 0, N_("\ |
130 | Put output in FILE instead of installed location\ |
131 | (--prefix does not apply to FILE)" ) }, |
132 | { "nostdlib" , OPT_NOSTDLIB, NULL, 0, |
133 | N_("Do not search standard directories, only those on the command line" ) }, |
134 | { NULL, 0, NULL, 0, NULL } |
135 | }; |
136 | |
137 | /* Data structure to communicate with argp functions. */ |
138 | static struct argp argp = |
139 | { |
140 | options, parse_opt, args_doc, doc, NULL, more_help |
141 | }; |
142 | |
143 | |
144 | /* The function doing the actual work. */ |
145 | static int handle_dir (const char *dir); |
146 | |
147 | /* Add all known builtin conversions and aliases. */ |
148 | static void add_builtins (void); |
149 | |
150 | /* Create list of all aliases without circular aliases. */ |
151 | static void get_aliases (void); |
152 | |
153 | /* Create list of all modules. */ |
154 | static void get_modules (void); |
155 | |
156 | /* Get list of all the names and thereby indexing them. */ |
157 | static void generate_name_list (void); |
158 | |
159 | /* Collect information about all the names. */ |
160 | static void generate_name_info (void); |
161 | |
162 | /* Write the output file. */ |
163 | static int write_output (void); |
164 | |
165 | |
166 | /* Prefix to be used for all file accesses. */ |
167 | static const char *prefix = "" ; |
168 | /* Its length. */ |
169 | static size_t prefix_len; |
170 | |
171 | /* Directory to place output file in. */ |
172 | static const char *output_file; |
173 | /* Its length. */ |
174 | static size_t output_file_len; |
175 | |
176 | /* If true, omit the GCONV_PATH directories and require some arguments. */ |
177 | static bool nostdlib; |
178 | |
179 | /* Search tree of the modules we know. */ |
180 | static void *modules; |
181 | |
182 | /* Search tree of the aliases we know. */ |
183 | static void *aliases; |
184 | |
185 | /* Search tree for name to index mapping. */ |
186 | static void *names; |
187 | |
188 | /* Number of names we know about. */ |
189 | static int nnames; |
190 | |
191 | /* List of all aliases. */ |
192 | static struct alias **alias_list; |
193 | static size_t nalias_list; |
194 | static size_t nalias_list_max; |
195 | |
196 | /* List of all modules. */ |
197 | static struct module **module_list; |
198 | static size_t nmodule_list; |
199 | static size_t nmodule_list_max; |
200 | |
201 | /* Names and information about them. */ |
202 | static struct name_info *name_info; |
203 | static size_t nname_info; |
204 | |
205 | /* Number of translations not from or to INTERNAL. */ |
206 | static size_t ; |
207 | |
208 | |
209 | /* Names and aliases for the builtin transformations. */ |
210 | static struct |
211 | { |
212 | const char *from; |
213 | const char *to; |
214 | } builtin_alias[] = |
215 | { |
216 | #define BUILTIN_ALIAS(alias, real) \ |
217 | { .from = alias, .to = real }, |
218 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
219 | MinF, MaxF, MinT, MaxT) |
220 | #include <gconv_builtin.h> |
221 | }; |
222 | #undef BUILTIN_ALIAS |
223 | #undef BUILTIN_TRANSFORMATION |
224 | #define nbuiltin_alias (sizeof (builtin_alias) / sizeof (builtin_alias[0])) |
225 | |
226 | static struct |
227 | { |
228 | const char *from; |
229 | const char *to; |
230 | const char *module; |
231 | int cost; |
232 | } builtin_trans[] = |
233 | { |
234 | #define BUILTIN_ALIAS(alias, real) |
235 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
236 | MinF, MaxF, MinT, MaxT) \ |
237 | { .from = From, .to = To, .module = Name, .cost = Cost }, |
238 | #include <gconv_builtin.h> |
239 | }; |
240 | #undef BUILTIN_ALIAS |
241 | #undef BUILTIN_TRANSFORMATION |
242 | #define nbuiltin_trans (sizeof (builtin_trans) / sizeof (builtin_trans[0])) |
243 | |
244 | |
245 | /* Filename extension for the modules. */ |
246 | #ifndef MODULE_EXT |
247 | # define MODULE_EXT ".so" |
248 | #endif |
249 | static const char gconv_module_ext[] = MODULE_EXT; |
250 | |
251 | |
252 | #include <programs/xmalloc.h> |
253 | #include <programs/xasprintf.h> |
254 | |
255 | |
256 | /* C string table handling. */ |
257 | struct Strtab; |
258 | struct Strent; |
259 | |
260 | /* Create new C string table object in memory. */ |
261 | extern struct Strtab *strtabinit (void); |
262 | |
263 | /* Free resources allocated for C string table ST. */ |
264 | extern void strtabfree (struct Strtab *st); |
265 | |
266 | /* Add string STR (length LEN is != 0) to C string table ST. */ |
267 | extern struct Strent *strtabadd (struct Strtab *st, const char *str, |
268 | size_t len); |
269 | |
270 | /* Finalize string table ST and store size in *SIZE and return a pointer. */ |
271 | extern void *strtabfinalize (struct Strtab *st, size_t *size); |
272 | |
273 | /* Get offset in string table for string associated with SE. */ |
274 | extern size_t strtaboffset (struct Strent *se); |
275 | |
276 | /* String table we construct. */ |
277 | static struct Strtab *strtab; |
278 | |
279 | |
280 | |
281 | int |
282 | main (int argc, char *argv[]) |
283 | { |
284 | int remaining; |
285 | int status = 0; |
286 | |
287 | /* Enable memory use testing. */ |
288 | /* mcheck_pedantic (NULL); */ |
289 | mtrace (); |
290 | |
291 | /* Set locale via LC_ALL. */ |
292 | setlocale (LC_ALL, "" ); |
293 | |
294 | /* Set the text message domain. */ |
295 | textdomain (_libc_intl_domainname); |
296 | |
297 | /* Parse and process arguments. */ |
298 | argp_parse (&argp, argc, argv, 0, &remaining, NULL); |
299 | |
300 | if (nostdlib && remaining == argc) |
301 | error (2, 0, _("Directory arguments required when using --nostdlib" )); |
302 | |
303 | /* Initialize the string table. */ |
304 | strtab = strtabinit (); |
305 | |
306 | /* Handle all directories mentioned. */ |
307 | while (remaining < argc) |
308 | status |= handle_dir (argv[remaining++]); |
309 | |
310 | if (! nostdlib) |
311 | { |
312 | /* In any case also handle the standard directory. */ |
313 | char *path = strdupa (GCONV_PATH), *tp = strsep (&path, ":" ); |
314 | while (tp != NULL) |
315 | { |
316 | status |= handle_dir (tp); |
317 | |
318 | tp = strsep (&path, ":" ); |
319 | } |
320 | } |
321 | |
322 | /* Add the builtin transformations and aliases without overwriting |
323 | anything. */ |
324 | add_builtins (); |
325 | |
326 | /* Store aliases in an array. */ |
327 | get_aliases (); |
328 | |
329 | /* Get list of all modules. */ |
330 | get_modules (); |
331 | |
332 | /* Generate list of all the names we know to handle in some way. */ |
333 | generate_name_list (); |
334 | |
335 | /* Now we know all the names we will handle, collect information |
336 | about them. */ |
337 | generate_name_info (); |
338 | |
339 | /* Write the output file, but only if we haven't seen any error. */ |
340 | if (status == 0) |
341 | status = write_output (); |
342 | else |
343 | error (1, 0, _("no output file produced because warnings were issued" )); |
344 | |
345 | return status; |
346 | } |
347 | |
348 | |
349 | /* Handle program arguments. */ |
350 | static error_t |
351 | parse_opt (int key, char *arg, struct argp_state *state) |
352 | { |
353 | switch (key) |
354 | { |
355 | case OPT_PREFIX: |
356 | prefix = arg; |
357 | prefix_len = strlen (prefix); |
358 | break; |
359 | case 'o': |
360 | output_file = arg; |
361 | output_file_len = strlen (output_file); |
362 | break; |
363 | case OPT_NOSTDLIB: |
364 | nostdlib = true; |
365 | break; |
366 | default: |
367 | return ARGP_ERR_UNKNOWN; |
368 | } |
369 | return 0; |
370 | } |
371 | |
372 | |
373 | static char * |
374 | more_help (int key, const char *text, void *input) |
375 | { |
376 | char *tp = NULL; |
377 | switch (key) |
378 | { |
379 | case ARGP_KEY_HELP_EXTRA: |
380 | /* We print some extra information. */ |
381 | if (asprintf (&tp, gettext ("\ |
382 | For bug reporting instructions, please see:\n\ |
383 | %s.\n" ), REPORT_BUGS_TO) < 0) |
384 | return NULL; |
385 | return tp; |
386 | default: |
387 | break; |
388 | } |
389 | return (char *) text; |
390 | } |
391 | |
392 | |
393 | /* Print the version information. */ |
394 | static void |
395 | print_version (FILE *stream, struct argp_state *state) |
396 | { |
397 | fprintf (stream, "iconvconfig %s%s\n" , PKGVERSION, VERSION); |
398 | fprintf (stream, gettext ("\ |
399 | Copyright (C) %s Free Software Foundation, Inc.\n\ |
400 | This is free software; see the source for copying conditions. There is NO\n\ |
401 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ |
402 | " ), "2021" ); |
403 | fprintf (stream, gettext ("Written by %s.\n" ), "Ulrich Drepper" ); |
404 | } |
405 | |
406 | |
407 | static int |
408 | alias_compare (const void *p1, const void *p2) |
409 | { |
410 | const struct alias *a1 = (const struct alias *) p1; |
411 | const struct alias *a2 = (const struct alias *) p2; |
412 | |
413 | return strcmp (a1->fromname, a2->fromname); |
414 | } |
415 | |
416 | |
417 | static void |
418 | new_alias (const char *fromname, size_t fromlen, const char *toname, |
419 | size_t tolen) |
420 | { |
421 | struct alias *newp; |
422 | void **inserted; |
423 | |
424 | newp = (struct alias *) xmalloc (sizeof (struct alias) + fromlen + tolen); |
425 | |
426 | newp->fromname = mempcpy (newp->toname, toname, tolen); |
427 | memcpy (newp->fromname, fromname, fromlen); |
428 | newp->module = NULL; |
429 | |
430 | inserted = (void **) tsearch (newp, &aliases, alias_compare); |
431 | if (inserted == NULL) |
432 | error (EXIT_FAILURE, errno, gettext ("while inserting in search tree" )); |
433 | if (*inserted != newp) |
434 | /* Something went wrong, free this entry. */ |
435 | free (newp); |
436 | else |
437 | { |
438 | newp->froment = strtabadd (strtab, newp->fromname, fromlen); |
439 | newp->toent = strtabadd (strtab, newp->toname, tolen); |
440 | } |
441 | } |
442 | |
443 | |
444 | /* Add new alias. */ |
445 | static void |
446 | add_alias (char *rp) |
447 | { |
448 | /* We now expect two more string. The strings are normalized |
449 | (converted to UPPER case) and strored in the alias database. */ |
450 | char *from; |
451 | char *to; |
452 | char *wp; |
453 | |
454 | while (isspace (*rp)) |
455 | ++rp; |
456 | from = wp = rp; |
457 | while (*rp != '\0' && !isspace (*rp)) |
458 | *wp++ = toupper (*rp++); |
459 | if (*rp == '\0') |
460 | /* There is no `to' string on the line. Ignore it. */ |
461 | return; |
462 | *wp++ = '\0'; |
463 | to = ++rp; |
464 | while (isspace (*rp)) |
465 | ++rp; |
466 | while (*rp != '\0' && !isspace (*rp)) |
467 | *wp++ = toupper (*rp++); |
468 | if (to == wp) |
469 | /* No `to' string, ignore the line. */ |
470 | return; |
471 | *wp++ = '\0'; |
472 | |
473 | assert (strlen (from) + 1 == (size_t) (to - from)); |
474 | assert (strlen (to) + 1 == (size_t) (wp - to)); |
475 | |
476 | new_alias (from, to - from, to, wp - to); |
477 | } |
478 | |
479 | |
480 | static void |
481 | append_alias (const void *nodep, VISIT value, int level) |
482 | { |
483 | if (value != leaf && value != postorder) |
484 | return; |
485 | |
486 | if (nalias_list_max == nalias_list) |
487 | { |
488 | nalias_list_max += 50; |
489 | alias_list = (struct alias **) xrealloc (alias_list, |
490 | (nalias_list_max |
491 | * sizeof (struct alias *))); |
492 | } |
493 | |
494 | alias_list[nalias_list++] = *(struct alias **) nodep; |
495 | } |
496 | |
497 | |
498 | static void |
499 | get_aliases (void) |
500 | { |
501 | twalk (aliases, append_alias); |
502 | } |
503 | |
504 | |
505 | static int |
506 | module_compare (const void *p1, const void *p2) |
507 | { |
508 | const struct module *m1 = (const struct module *) p1; |
509 | const struct module *m2 = (const struct module *) p2; |
510 | int result; |
511 | |
512 | result = strcmp (m1->fromname, m2->fromname); |
513 | if (result == 0) |
514 | result = strcmp (m1->toname, m2->toname); |
515 | |
516 | return result; |
517 | } |
518 | |
519 | |
520 | /* Create new module record. */ |
521 | static void |
522 | new_module (const char *fromname, size_t fromlen, const char *toname, |
523 | size_t tolen, const char *dir_in, |
524 | const char *filename, size_t filelen, int cost, size_t need_ext) |
525 | { |
526 | struct module *new_module; |
527 | size_t dirlen = strlen (dir_in) + 1; |
528 | const char *directory = xstrdup (dir_in); |
529 | char *tmp; |
530 | void **inserted; |
531 | |
532 | new_module = (struct module *) xmalloc (sizeof (struct module) |
533 | + fromlen + tolen + filelen |
534 | + need_ext); |
535 | |
536 | new_module->fromname = mempcpy (new_module->toname, toname, tolen); |
537 | |
538 | new_module->filename = mempcpy (new_module->fromname, fromname, fromlen); |
539 | |
540 | new_module->cost = cost; |
541 | new_module->next = NULL; |
542 | |
543 | tmp = mempcpy (new_module->filename, filename, filelen); |
544 | if (need_ext) |
545 | { |
546 | memcpy (tmp - 1, gconv_module_ext, need_ext + 1); |
547 | filelen += need_ext; |
548 | } |
549 | new_module->directory = directory; |
550 | |
551 | /* Now insert the new module data structure in our search tree. */ |
552 | inserted = (void **) tsearch (new_module, &modules, module_compare); |
553 | if (inserted == NULL) |
554 | error (EXIT_FAILURE, errno, "while inserting in search tree" ); |
555 | if (*inserted != new_module) |
556 | free (new_module); |
557 | else |
558 | { |
559 | new_module->fromname_strent = strtabadd (strtab, new_module->fromname, |
560 | fromlen); |
561 | new_module->toname_strent = strtabadd (strtab, new_module->toname, |
562 | tolen); |
563 | new_module->filename_strent = strtabadd (strtab, new_module->filename, |
564 | filelen); |
565 | new_module->directory_strent = strtabadd (strtab, directory, dirlen); |
566 | } |
567 | } |
568 | |
569 | |
570 | /* Add new module. */ |
571 | static void |
572 | add_module (char *rp, const char *directory, |
573 | size_t dirlen __attribute__ ((__unused__)), |
574 | int modcount __attribute__ ((__unused__))) |
575 | { |
576 | /* We expect now |
577 | 1. `from' name |
578 | 2. `to' name |
579 | 3. filename of the module |
580 | 4. an optional cost value |
581 | */ |
582 | char *from; |
583 | char *to; |
584 | char *module; |
585 | char *wp; |
586 | int need_ext; |
587 | int cost; |
588 | |
589 | while (isspace (*rp)) |
590 | ++rp; |
591 | from = rp; |
592 | while (*rp != '\0' && !isspace (*rp)) |
593 | { |
594 | *rp = toupper (*rp); |
595 | ++rp; |
596 | } |
597 | if (*rp == '\0') |
598 | return; |
599 | *rp++ = '\0'; |
600 | to = wp = rp; |
601 | while (isspace (*rp)) |
602 | ++rp; |
603 | while (*rp != '\0' && !isspace (*rp)) |
604 | *wp++ = toupper (*rp++); |
605 | if (*rp == '\0') |
606 | return; |
607 | *wp++ = '\0'; |
608 | do |
609 | ++rp; |
610 | while (isspace (*rp)); |
611 | module = wp; |
612 | while (*rp != '\0' && !isspace (*rp)) |
613 | *wp++ = *rp++; |
614 | if (*rp == '\0') |
615 | { |
616 | /* There is no cost, use one by default. */ |
617 | *wp++ = '\0'; |
618 | cost = 1; |
619 | } |
620 | else |
621 | { |
622 | /* There might be a cost value. */ |
623 | char *endp; |
624 | |
625 | *wp++ = '\0'; |
626 | cost = strtol (rp, &endp, 10); |
627 | if (rp == endp || cost < 1) |
628 | /* No useful information. */ |
629 | cost = 1; |
630 | } |
631 | |
632 | if (module[0] == '\0') |
633 | /* No module name given. */ |
634 | return; |
635 | |
636 | /* See whether we must add the ending. */ |
637 | need_ext = 0; |
638 | if ((size_t) (wp - module) < sizeof (gconv_module_ext) |
639 | || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, |
640 | sizeof (gconv_module_ext)) != 0) |
641 | /* We must add the module extension. */ |
642 | need_ext = sizeof (gconv_module_ext) - 1; |
643 | |
644 | assert (strlen (from) + 1 == (size_t) (to - from)); |
645 | assert (strlen (to) + 1 == (size_t) (module - to)); |
646 | assert (strlen (module) + 1 == (size_t) (wp - module)); |
647 | |
648 | new_module (from, to - from, to, module - to, directory, module, wp - module, |
649 | cost, need_ext); |
650 | } |
651 | |
652 | /* Read config files and add the data for this directory to cache. */ |
653 | static int |
654 | handle_dir (const char *dir) |
655 | { |
656 | size_t dirlen = strlen (dir); |
657 | bool found = false; |
658 | |
659 | char *fulldir = xasprintf ("%s%s%s" , dir[0] == '/' ? prefix : "" , |
660 | dir, dir[dirlen - 1] != '/' ? "/" : "" ); |
661 | |
662 | found = gconv_parseconfdir (fulldir, strlen (fulldir)); |
663 | |
664 | if (!found) |
665 | { |
666 | error (0, errno, "failed to open gconv configuration files in `%s'" , |
667 | dir); |
668 | error (0, 0, |
669 | "ensure that the directory contains either a valid " |
670 | "gconv-modules file or a gconv-modules.d directory with " |
671 | "configuration files with names ending in .conf." ); |
672 | } |
673 | |
674 | free (fulldir); |
675 | |
676 | return found ? 0 : 1; |
677 | } |
678 | |
679 | |
680 | static void |
681 | append_module (const void *nodep, VISIT value, int level) |
682 | { |
683 | struct module *mo; |
684 | |
685 | if (value != leaf && value != postorder) |
686 | return; |
687 | |
688 | mo = *(struct module **) nodep; |
689 | |
690 | if (nmodule_list > 0 |
691 | && strcmp (module_list[nmodule_list - 1]->fromname, mo->fromname) == 0) |
692 | { |
693 | /* Same name. */ |
694 | mo->next = module_list[nmodule_list - 1]; |
695 | module_list[nmodule_list - 1] = mo; |
696 | |
697 | return; |
698 | } |
699 | |
700 | if (nmodule_list_max == nmodule_list) |
701 | { |
702 | nmodule_list_max += 50; |
703 | module_list = (struct module **) xrealloc (module_list, |
704 | (nmodule_list_max |
705 | * sizeof (struct module *))); |
706 | } |
707 | |
708 | module_list[nmodule_list++] = mo; |
709 | } |
710 | |
711 | |
712 | static void |
713 | get_modules (void) |
714 | { |
715 | twalk (modules, append_module); |
716 | } |
717 | |
718 | |
719 | static void |
720 | add_builtins (void) |
721 | { |
722 | size_t cnt; |
723 | |
724 | /* Add all aliases. */ |
725 | for (cnt = 0; cnt < nbuiltin_alias; ++cnt) |
726 | new_alias (builtin_alias[cnt].from, |
727 | strlen (builtin_alias[cnt].from) + 1, |
728 | builtin_alias[cnt].to, |
729 | strlen (builtin_alias[cnt].to) + 1); |
730 | |
731 | /* add the builtin transformations. */ |
732 | for (cnt = 0; cnt < nbuiltin_trans; ++cnt) |
733 | new_module (builtin_trans[cnt].from, |
734 | strlen (builtin_trans[cnt].from) + 1, |
735 | builtin_trans[cnt].to, |
736 | strlen (builtin_trans[cnt].to) + 1, |
737 | "" , builtin_trans[cnt].module, |
738 | strlen (builtin_trans[cnt].module) + 1, |
739 | builtin_trans[cnt].cost, 0); |
740 | } |
741 | |
742 | |
743 | static int |
744 | name_compare (const void *p1, const void *p2) |
745 | { |
746 | const struct name *n1 = (const struct name *) p1; |
747 | const struct name *n2 = (const struct name *) p2; |
748 | |
749 | return strcmp (n1->name, n2->name); |
750 | } |
751 | |
752 | |
753 | static struct name * |
754 | new_name (const char *str, struct Strent *strent) |
755 | { |
756 | struct name *newp = (struct name *) xmalloc (sizeof (struct name)); |
757 | |
758 | newp->name = str; |
759 | newp->strent = strent; |
760 | newp->module_idx = -1; |
761 | newp->hashval = __hash_string (str); |
762 | |
763 | ++nnames; |
764 | |
765 | return newp; |
766 | } |
767 | |
768 | |
769 | static void |
770 | generate_name_list (void) |
771 | { |
772 | size_t i; |
773 | |
774 | /* A name we always need. */ |
775 | tsearch (new_name ("INTERNAL" , strtabadd (strtab, "INTERNAL" , |
776 | sizeof ("INTERNAL" ))), |
777 | &names, name_compare); |
778 | |
779 | for (i = 0; i < nmodule_list; ++i) |
780 | { |
781 | struct module *runp; |
782 | |
783 | if (strcmp (module_list[i]->fromname, "INTERNAL" ) != 0) |
784 | tsearch (new_name (module_list[i]->fromname, |
785 | module_list[i]->fromname_strent), |
786 | &names, name_compare); |
787 | |
788 | for (runp = module_list[i]; runp != NULL; runp = runp->next) |
789 | if (strcmp (runp->toname, "INTERNAL" ) != 0) |
790 | tsearch (new_name (runp->toname, runp->toname_strent), |
791 | &names, name_compare); |
792 | } |
793 | } |
794 | |
795 | |
796 | static int |
797 | name_to_module_idx (const char *name, int add) |
798 | { |
799 | struct name **res; |
800 | struct name fake_name = { .name = name }; |
801 | int idx; |
802 | |
803 | res = (struct name **) tfind (&fake_name, &names, name_compare); |
804 | if (res == NULL) |
805 | abort (); |
806 | |
807 | idx = (*res)->module_idx; |
808 | if (idx == -1 && add) |
809 | /* No module index assigned yet. */ |
810 | idx = (*res)->module_idx = nname_info++; |
811 | |
812 | return idx; |
813 | } |
814 | |
815 | |
816 | static void |
817 | generate_name_info (void) |
818 | { |
819 | size_t i; |
820 | int idx; |
821 | |
822 | name_info = (struct name_info *) xcalloc (nmodule_list + 1, |
823 | sizeof (struct name_info)); |
824 | |
825 | /* First add a special entry for the INTERNAL name. This must have |
826 | index zero. */ |
827 | idx = name_to_module_idx ("INTERNAL" , 1); |
828 | name_info[0].canonical_name = "INTERNAL" ; |
829 | name_info[0].canonical_strent = strtabadd (strtab, "INTERNAL" , |
830 | sizeof ("INTERNAL" )); |
831 | assert (nname_info == 1); |
832 | |
833 | for (i = 0; i < nmodule_list; ++i) |
834 | { |
835 | struct module *runp; |
836 | |
837 | for (runp = module_list[i]; runp != NULL; runp = runp->next) |
838 | if (strcmp (runp->fromname, "INTERNAL" ) == 0) |
839 | { |
840 | idx = name_to_module_idx (runp->toname, 1); |
841 | name_info[idx].from_internal = runp; |
842 | assert (name_info[idx].canonical_name == NULL |
843 | || strcmp (name_info[idx].canonical_name, |
844 | runp->toname) == 0); |
845 | name_info[idx].canonical_name = runp->toname; |
846 | name_info[idx].canonical_strent = runp->toname_strent; |
847 | } |
848 | else if (strcmp (runp->toname, "INTERNAL" ) == 0) |
849 | { |
850 | idx = name_to_module_idx (runp->fromname, 1); |
851 | name_info[idx].to_internal = runp; |
852 | assert (name_info[idx].canonical_name == NULL |
853 | || strcmp (name_info[idx].canonical_name, |
854 | runp->fromname) == 0); |
855 | name_info[idx].canonical_name = runp->fromname; |
856 | name_info[idx].canonical_strent = runp->fromname_strent; |
857 | } |
858 | else |
859 | { |
860 | /* This is a transformation not to or from the INTERNAL |
861 | encoding. */ |
862 | int from_idx = name_to_module_idx (runp->fromname, 1); |
863 | int to_idx = name_to_module_idx (runp->toname, 1); |
864 | struct other_conv_list *newp; |
865 | |
866 | newp = (struct other_conv_list *) |
867 | xmalloc (sizeof (struct other_conv_list)); |
868 | newp->other_conv.module_idx = to_idx; |
869 | newp->other_conv.module = runp; |
870 | newp->other_conv.next = NULL; /* XXX Allow multiple module sequence */ |
871 | newp->dest_idx = to_idx; |
872 | newp->next = name_info[from_idx].other_conv_list; |
873 | name_info[from_idx].other_conv_list = newp; |
874 | assert (name_info[from_idx].canonical_name == NULL |
875 | || strcmp (name_info[from_idx].canonical_name, |
876 | runp->fromname) == 0); |
877 | name_info[from_idx].canonical_name = runp->fromname; |
878 | name_info[from_idx].canonical_strent = runp->fromname_strent; |
879 | |
880 | ++nextra_modules; |
881 | } |
882 | } |
883 | |
884 | /* Now add the module index information for all the aliases. */ |
885 | for (i = 0; i < nalias_list; ++i) |
886 | { |
887 | struct name fake_name = { .name = alias_list[i]->toname }; |
888 | struct name **tonamep; |
889 | |
890 | tonamep = (struct name **) tfind (&fake_name, &names, name_compare); |
891 | if (tonamep != NULL) |
892 | { |
893 | struct name *newp = new_name (alias_list[i]->fromname, |
894 | alias_list[i]->froment); |
895 | newp->module_idx = (*tonamep)->module_idx; |
896 | tsearch (newp, &names, name_compare); |
897 | } |
898 | } |
899 | } |
900 | |
901 | |
902 | static int |
903 | is_prime (unsigned long int candidate) |
904 | { |
905 | /* No even number and none less than 10 will be passed here. */ |
906 | unsigned long int divn = 3; |
907 | unsigned long int sq = divn * divn; |
908 | |
909 | while (sq < candidate && candidate % divn != 0) |
910 | { |
911 | ++divn; |
912 | sq += 4 * divn; |
913 | ++divn; |
914 | } |
915 | |
916 | return candidate % divn != 0; |
917 | } |
918 | |
919 | |
920 | static uint32_t |
921 | next_prime (uint32_t seed) |
922 | { |
923 | /* Make it definitely odd. */ |
924 | seed |= 1; |
925 | |
926 | while (!is_prime (seed)) |
927 | seed += 2; |
928 | |
929 | return seed; |
930 | } |
931 | |
932 | |
933 | /* Format of the output file. |
934 | |
935 | Offset Length Description |
936 | 0000 4 Magic header bytes |
937 | 0004 2 Offset of string table (stoff) |
938 | 0006 2 Offset of name hashing table (hoff) |
939 | 0008 2 Hashing table size (hsize) |
940 | 000A 2 Offset of module table (moff) |
941 | 000C 2 Offset of other conversion module table (ooff) |
942 | |
943 | stoff ??? String table |
944 | |
945 | hoff 8*hsize Array of tuples |
946 | string table offset |
947 | module index |
948 | |
949 | moff ??? Array of tuples |
950 | canonical name offset |
951 | from-internal module dir name offset |
952 | from-internal module name off |
953 | to-internal module dir name offset |
954 | to-internal module name offset |
955 | offset into other conversion table |
956 | |
957 | ooff ??? One or more of |
958 | number of steps/modules |
959 | one or more of tuple |
960 | canonical name offset for output |
961 | module dir name offset |
962 | module name offset |
963 | (following last entry with step count 0) |
964 | */ |
965 | |
966 | static struct hash_entry *hash_table; |
967 | static size_t hash_size; |
968 | |
969 | /* Function to insert the names. */ |
970 | static void name_insert (const void *nodep, VISIT value, int level) |
971 | { |
972 | struct name *name; |
973 | unsigned int idx; |
974 | unsigned int hval2; |
975 | |
976 | if (value != leaf && value != postorder) |
977 | return; |
978 | |
979 | name = *(struct name **) nodep; |
980 | idx = name->hashval % hash_size; |
981 | hval2 = 1 + name->hashval % (hash_size - 2); |
982 | |
983 | while (hash_table[idx].string_offset != 0) |
984 | if ((idx += hval2) >= hash_size) |
985 | idx -= hash_size; |
986 | |
987 | hash_table[idx].string_offset = strtaboffset (name->strent); |
988 | |
989 | assert (name->module_idx != -1); |
990 | hash_table[idx].module_idx = name->module_idx; |
991 | } |
992 | |
993 | static int |
994 | write_output (void) |
995 | { |
996 | int fd; |
997 | char *string_table; |
998 | size_t string_table_size; |
999 | struct gconvcache_header ; |
1000 | struct module_entry *module_table; |
1001 | char *; |
1002 | char *; |
1003 | size_t n; |
1004 | int idx; |
1005 | struct iovec iov[6]; |
1006 | static const gidx_t null_word; |
1007 | size_t total; |
1008 | char finalname[prefix_len + sizeof GCONV_MODULES_CACHE]; |
1009 | char tmpfname[(output_file == NULL ? sizeof finalname : output_file_len + 1) |
1010 | + strlen (".XXXXXX" )]; |
1011 | |
1012 | /* Open the output file. */ |
1013 | if (output_file == NULL) |
1014 | { |
1015 | assert (GCONV_MODULES_CACHE[0] == '/'); |
1016 | strcpy (stpcpy (mempcpy (tmpfname, prefix, prefix_len), |
1017 | GCONV_MODULES_CACHE), |
1018 | ".XXXXXX" ); |
1019 | strcpy (mempcpy (finalname, prefix, prefix_len), GCONV_MODULES_CACHE); |
1020 | } |
1021 | else |
1022 | strcpy (mempcpy (tmpfname, output_file, output_file_len), ".XXXXXX" ); |
1023 | fd = mkstemp (tmpfname); |
1024 | if (fd == -1) |
1025 | return 1; |
1026 | |
1027 | /* Create the string table. */ |
1028 | string_table = strtabfinalize (strtab, &string_table_size); |
1029 | |
1030 | /* Create the hashing table. We know how many strings we have. |
1031 | Creating a perfect hash table is not reasonable here. Therefore |
1032 | we use open hashing and a table size which is the next prime 50% |
1033 | larger than the number of strings. */ |
1034 | hash_size = next_prime (nnames + (nnames >> 1)); |
1035 | hash_table = (struct hash_entry *) xcalloc (hash_size, |
1036 | sizeof (struct hash_entry)); |
1037 | /* Fill the hash table. */ |
1038 | twalk (names, name_insert); |
1039 | |
1040 | /* Create the section for the module list. */ |
1041 | module_table = (struct module_entry *) xcalloc (sizeof (struct module_entry), |
1042 | nname_info); |
1043 | |
1044 | /* Allocate memory for the non-INTERNAL conversions. The allocated |
1045 | memory can be more than is actually needed. */ |
1046 | extra_table = (char *) xcalloc (sizeof (struct extra_entry) |
1047 | + sizeof (gidx_t) |
1048 | + sizeof (struct extra_entry_module), |
1049 | nextra_modules); |
1050 | cur_extra_table = extra_table; |
1051 | |
1052 | /* Fill in the module information. */ |
1053 | for (n = 0; n < nname_info; ++n) |
1054 | { |
1055 | module_table[n].canonname_offset = |
1056 | strtaboffset (name_info[n].canonical_strent); |
1057 | |
1058 | if (name_info[n].from_internal == NULL) |
1059 | { |
1060 | module_table[n].fromdir_offset = 0; |
1061 | module_table[n].fromname_offset = 0; |
1062 | } |
1063 | else |
1064 | { |
1065 | module_table[n].fromdir_offset = |
1066 | strtaboffset (name_info[n].from_internal->directory_strent); |
1067 | module_table[n].fromname_offset = |
1068 | strtaboffset (name_info[n].from_internal->filename_strent); |
1069 | } |
1070 | |
1071 | if (name_info[n].to_internal == NULL) |
1072 | { |
1073 | module_table[n].todir_offset = 0; |
1074 | module_table[n].toname_offset = 0; |
1075 | } |
1076 | else |
1077 | { |
1078 | module_table[n].todir_offset = |
1079 | strtaboffset (name_info[n].to_internal->directory_strent); |
1080 | module_table[n].toname_offset = |
1081 | strtaboffset (name_info[n].to_internal->filename_strent); |
1082 | } |
1083 | |
1084 | if (name_info[n].other_conv_list != NULL) |
1085 | { |
1086 | struct other_conv_list *other = name_info[n].other_conv_list; |
1087 | |
1088 | /* Store the reference. We add 1 to distinguish the entry |
1089 | at offset zero from the case where no extra modules are |
1090 | available. The file reader has to account for the |
1091 | offset. */ |
1092 | module_table[n].extra_offset = 1 + cur_extra_table - extra_table; |
1093 | |
1094 | do |
1095 | { |
1096 | struct other_conv *runp; |
1097 | struct extra_entry *; |
1098 | |
1099 | /* Allocate new entry. */ |
1100 | extra = (struct extra_entry *) cur_extra_table; |
1101 | cur_extra_table += sizeof (struct extra_entry); |
1102 | extra->module_cnt = 0; |
1103 | |
1104 | runp = &other->other_conv; |
1105 | do |
1106 | { |
1107 | cur_extra_table += sizeof (struct extra_entry_module); |
1108 | extra->module[extra->module_cnt].outname_offset = |
1109 | runp->next == NULL |
1110 | ? other->dest_idx : runp->next->module_idx; |
1111 | extra->module[extra->module_cnt].dir_offset = |
1112 | strtaboffset (runp->module->directory_strent); |
1113 | extra->module[extra->module_cnt].name_offset = |
1114 | strtaboffset (runp->module->filename_strent); |
1115 | ++extra->module_cnt; |
1116 | |
1117 | runp = runp->next; |
1118 | } |
1119 | while (runp != NULL); |
1120 | |
1121 | other = other->next; |
1122 | } |
1123 | while (other != NULL); |
1124 | |
1125 | /* Final module_cnt is zero. */ |
1126 | *((gidx_t *) cur_extra_table) = 0; |
1127 | cur_extra_table += sizeof (gidx_t); |
1128 | } |
1129 | } |
1130 | |
1131 | /* Clear padding. */ |
1132 | memset (&header, 0, sizeof (struct gconvcache_header)); |
1133 | |
1134 | header.magic = GCONVCACHE_MAGIC; |
1135 | |
1136 | iov[0].iov_base = &header; |
1137 | iov[0].iov_len = sizeof (struct gconvcache_header); |
1138 | total = iov[0].iov_len; |
1139 | |
1140 | header.string_offset = total; |
1141 | iov[1].iov_base = string_table; |
1142 | iov[1].iov_len = string_table_size; |
1143 | total += iov[1].iov_len; |
1144 | |
1145 | idx = 2; |
1146 | if ((string_table_size & (sizeof (gidx_t) - 1)) != 0) |
1147 | { |
1148 | iov[2].iov_base = (void *) &null_word; |
1149 | iov[2].iov_len = (sizeof (gidx_t) |
1150 | - (string_table_size & (sizeof (gidx_t) - 1))); |
1151 | total += iov[2].iov_len; |
1152 | ++idx; |
1153 | } |
1154 | |
1155 | header.hash_offset = total; |
1156 | header.hash_size = hash_size; |
1157 | iov[idx].iov_base = hash_table; |
1158 | iov[idx].iov_len = hash_size * sizeof (struct hash_entry); |
1159 | total += iov[idx].iov_len; |
1160 | ++idx; |
1161 | |
1162 | header.module_offset = total; |
1163 | iov[idx].iov_base = module_table; |
1164 | iov[idx].iov_len = nname_info * sizeof (struct module_entry); |
1165 | total += iov[idx].iov_len; |
1166 | ++idx; |
1167 | |
1168 | assert ((size_t) (cur_extra_table - extra_table) |
1169 | <= ((sizeof (struct extra_entry) + sizeof (gidx_t) |
1170 | + sizeof (struct extra_entry_module)) |
1171 | * nextra_modules)); |
1172 | header.otherconv_offset = total; |
1173 | iov[idx].iov_base = extra_table; |
1174 | iov[idx].iov_len = cur_extra_table - extra_table; |
1175 | total += iov[idx].iov_len; |
1176 | ++idx; |
1177 | |
1178 | if ((size_t) TEMP_FAILURE_RETRY (writev (fd, iov, idx)) != total |
1179 | /* The file was created with mode 0600. Make it world-readable. */ |
1180 | || fchmod (fd, 0644) != 0 |
1181 | /* Rename the file, possibly replacing an old one. */ |
1182 | || rename (tmpfname, output_file ?: finalname) != 0) |
1183 | { |
1184 | int save_errno = errno; |
1185 | close (fd); |
1186 | unlink (tmpfname); |
1187 | error (EXIT_FAILURE, save_errno, |
1188 | gettext ("cannot generate output file" )); |
1189 | } |
1190 | |
1191 | close (fd); |
1192 | |
1193 | return 0; |
1194 | } |
1195 | |