| 1 | /* Handle configuration data. |
| 2 | Copyright (C) 1997-2022 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <assert.h> |
| 20 | #include <ctype.h> |
| 21 | #include <errno.h> |
| 22 | #include <limits.h> |
| 23 | #include <locale.h> |
| 24 | #include <search.h> |
| 25 | #include <stddef.h> |
| 26 | #include <stdio.h> |
| 27 | #include <stdio_ext.h> |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include <unistd.h> |
| 31 | #include <sys/param.h> |
| 32 | |
| 33 | #include <libc-lock.h> |
| 34 | #include <gconv_int.h> |
| 35 | #include <gconv_parseconfdir.h> |
| 36 | |
| 37 | /* This is the default path where we look for module lists. */ |
| 38 | static const char default_gconv_path[] = GCONV_PATH; |
| 39 | |
| 40 | /* Type to represent search path. */ |
| 41 | struct path_elem |
| 42 | { |
| 43 | const char *name; |
| 44 | size_t len; |
| 45 | }; |
| 46 | |
| 47 | /* The path elements, as determined by the __gconv_get_path function. |
| 48 | All path elements end in a slash. */ |
| 49 | struct path_elem *__gconv_path_elem; |
| 50 | /* Maximum length of a single path element in __gconv_path_elem. */ |
| 51 | size_t __gconv_max_path_elem_len; |
| 52 | |
| 53 | /* We use the following struct if we couldn't allocate memory. */ |
| 54 | static const struct path_elem empty_path_elem = { NULL, 0 }; |
| 55 | |
| 56 | /* Filename extension for the modules. */ |
| 57 | #ifndef MODULE_EXT |
| 58 | # define MODULE_EXT ".so" |
| 59 | #endif |
| 60 | static const char gconv_module_ext[] = MODULE_EXT; |
| 61 | |
| 62 | /* We have a few builtin transformations. */ |
| 63 | static struct gconv_module builtin_modules[] = |
| 64 | { |
| 65 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
| 66 | MinF, MaxF, MinT, MaxT) \ |
| 67 | { \ |
| 68 | .from_string = From, \ |
| 69 | .to_string = To, \ |
| 70 | .cost_hi = Cost, \ |
| 71 | .cost_lo = INT_MAX, \ |
| 72 | .module_name = Name \ |
| 73 | }, |
| 74 | #define BUILTIN_ALIAS(From, To) |
| 75 | |
| 76 | #include "gconv_builtin.h" |
| 77 | |
| 78 | #undef BUILTIN_TRANSFORMATION |
| 79 | #undef BUILTIN_ALIAS |
| 80 | }; |
| 81 | |
| 82 | static const char builtin_aliases[] = |
| 83 | { |
| 84 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
| 85 | MinF, MaxF, MinT, MaxT) |
| 86 | #define BUILTIN_ALIAS(From, To) From "\0" To "\0" |
| 87 | |
| 88 | #include "gconv_builtin.h" |
| 89 | |
| 90 | #undef BUILTIN_TRANSFORMATION |
| 91 | #undef BUILTIN_ALIAS |
| 92 | }; |
| 93 | |
| 94 | |
| 95 | /* Value of the GCONV_PATH environment variable. */ |
| 96 | const char *__gconv_path_envvar; |
| 97 | |
| 98 | |
| 99 | /* Test whether there is already a matching module known. */ |
| 100 | static int |
| 101 | detect_conflict (const char *alias) |
| 102 | { |
| 103 | struct gconv_module *node = __gconv_modules_db; |
| 104 | |
| 105 | while (node != NULL) |
| 106 | { |
| 107 | int cmpres = strcmp (alias, node->from_string); |
| 108 | |
| 109 | if (cmpres == 0) |
| 110 | /* We have a conflict. */ |
| 111 | return 1; |
| 112 | else if (cmpres < 0) |
| 113 | node = node->left; |
| 114 | else |
| 115 | node = node->right; |
| 116 | } |
| 117 | |
| 118 | return node != NULL; |
| 119 | } |
| 120 | |
| 121 | |
| 122 | /* The actual code to add aliases. */ |
| 123 | static void |
| 124 | add_alias2 (const char *from, const char *to, const char *wp) |
| 125 | { |
| 126 | /* Test whether this alias conflicts with any available module. */ |
| 127 | if (detect_conflict (from)) |
| 128 | /* It does conflict, don't add the alias. */ |
| 129 | return; |
| 130 | |
| 131 | struct gconv_alias *new_alias = (struct gconv_alias *) |
| 132 | malloc (sizeof (struct gconv_alias) + (wp - from)); |
| 133 | if (new_alias != NULL) |
| 134 | { |
| 135 | void **inserted; |
| 136 | |
| 137 | new_alias->fromname = memcpy ((char *) new_alias |
| 138 | + sizeof (struct gconv_alias), |
| 139 | from, wp - from); |
| 140 | new_alias->toname = new_alias->fromname + (to - from); |
| 141 | |
| 142 | inserted = (void **) __tsearch (new_alias, &__gconv_alias_db, |
| 143 | __gconv_alias_compare); |
| 144 | if (inserted == NULL || *inserted != new_alias) |
| 145 | /* Something went wrong, free this entry. */ |
| 146 | free (new_alias); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | |
| 151 | /* Add new alias. */ |
| 152 | static void |
| 153 | add_alias (char *rp) |
| 154 | { |
| 155 | /* We now expect two more string. The strings are normalized |
| 156 | (converted to UPPER case) and strored in the alias database. */ |
| 157 | char *from, *to, *wp; |
| 158 | |
| 159 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 160 | ++rp; |
| 161 | from = wp = rp; |
| 162 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 163 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
| 164 | if (*rp == '\0') |
| 165 | /* There is no `to' string on the line. Ignore it. */ |
| 166 | return; |
| 167 | *wp++ = '\0'; |
| 168 | to = ++rp; |
| 169 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 170 | ++rp; |
| 171 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 172 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
| 173 | if (to == wp) |
| 174 | /* No `to' string, ignore the line. */ |
| 175 | return; |
| 176 | *wp++ = '\0'; |
| 177 | |
| 178 | add_alias2 (from, to, wp); |
| 179 | } |
| 180 | |
| 181 | |
| 182 | /* Insert a data structure for a new module in the search tree. */ |
| 183 | static void |
| 184 | insert_module (struct gconv_module *newp, int tobefreed) |
| 185 | { |
| 186 | struct gconv_module **rootp = &__gconv_modules_db; |
| 187 | |
| 188 | while (*rootp != NULL) |
| 189 | { |
| 190 | struct gconv_module *root = *rootp; |
| 191 | int cmpres; |
| 192 | |
| 193 | cmpres = strcmp (newp->from_string, root->from_string); |
| 194 | if (cmpres == 0) |
| 195 | { |
| 196 | /* Both strings are identical. Insert the string at the |
| 197 | end of the `same' list if it is not already there. */ |
| 198 | while (strcmp (newp->from_string, root->from_string) != 0 |
| 199 | || strcmp (newp->to_string, root->to_string) != 0) |
| 200 | { |
| 201 | rootp = &root->same; |
| 202 | root = *rootp; |
| 203 | if (root == NULL) |
| 204 | break; |
| 205 | } |
| 206 | |
| 207 | if (root != NULL) |
| 208 | { |
| 209 | /* This is a no new conversion. But maybe the cost is |
| 210 | better. */ |
| 211 | if (newp->cost_hi < root->cost_hi |
| 212 | || (newp->cost_hi == root->cost_hi |
| 213 | && newp->cost_lo < root->cost_lo)) |
| 214 | { |
| 215 | newp->left = root->left; |
| 216 | newp->right = root->right; |
| 217 | newp->same = root->same; |
| 218 | *rootp = newp; |
| 219 | |
| 220 | free (root); |
| 221 | } |
| 222 | else if (tobefreed) |
| 223 | free (newp); |
| 224 | return; |
| 225 | } |
| 226 | |
| 227 | break; |
| 228 | } |
| 229 | else if (cmpres < 0) |
| 230 | rootp = &root->left; |
| 231 | else |
| 232 | rootp = &root->right; |
| 233 | } |
| 234 | |
| 235 | /* Plug in the new node here. */ |
| 236 | *rootp = newp; |
| 237 | } |
| 238 | |
| 239 | |
| 240 | /* Add new module. */ |
| 241 | static void |
| 242 | add_module (char *rp, const char *directory, size_t dir_len, int modcounter) |
| 243 | { |
| 244 | /* We expect now |
| 245 | 1. `from' name |
| 246 | 2. `to' name |
| 247 | 3. filename of the module |
| 248 | 4. an optional cost value |
| 249 | */ |
| 250 | struct gconv_alias fake_alias; |
| 251 | struct gconv_module *new_module; |
| 252 | char *from, *to, *module, *wp; |
| 253 | int need_ext; |
| 254 | int cost_hi; |
| 255 | |
| 256 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 257 | ++rp; |
| 258 | from = rp; |
| 259 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 260 | { |
| 261 | *rp = __toupper_l (*rp, _nl_C_locobj_ptr); |
| 262 | ++rp; |
| 263 | } |
| 264 | if (*rp == '\0') |
| 265 | return; |
| 266 | *rp++ = '\0'; |
| 267 | to = wp = rp; |
| 268 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 269 | ++rp; |
| 270 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 271 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
| 272 | if (*rp == '\0') |
| 273 | return; |
| 274 | *wp++ = '\0'; |
| 275 | do |
| 276 | ++rp; |
| 277 | while (__isspace_l (*rp, _nl_C_locobj_ptr)); |
| 278 | module = wp; |
| 279 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
| 280 | *wp++ = *rp++; |
| 281 | if (*rp == '\0') |
| 282 | { |
| 283 | /* There is no cost, use one by default. */ |
| 284 | *wp++ = '\0'; |
| 285 | cost_hi = 1; |
| 286 | } |
| 287 | else |
| 288 | { |
| 289 | /* There might be a cost value. */ |
| 290 | char *endp; |
| 291 | |
| 292 | *wp++ = '\0'; |
| 293 | cost_hi = strtol (rp, &endp, 10); |
| 294 | if (rp == endp || cost_hi < 1) |
| 295 | /* No useful information. */ |
| 296 | cost_hi = 1; |
| 297 | } |
| 298 | |
| 299 | if (module[0] == '\0') |
| 300 | /* No module name given. */ |
| 301 | return; |
| 302 | if (module[0] == '/') |
| 303 | dir_len = 0; |
| 304 | |
| 305 | /* See whether we must add the ending. */ |
| 306 | need_ext = 0; |
| 307 | if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext) |
| 308 | || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, |
| 309 | sizeof (gconv_module_ext)) != 0) |
| 310 | /* We must add the module extension. */ |
| 311 | need_ext = sizeof (gconv_module_ext) - 1; |
| 312 | |
| 313 | /* See whether we have already an alias with this name defined. */ |
| 314 | fake_alias.fromname = strndupa (from, to - from); |
| 315 | |
| 316 | if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL) |
| 317 | /* This module duplicates an alias. */ |
| 318 | return; |
| 319 | |
| 320 | new_module = (struct gconv_module *) calloc (1, |
| 321 | sizeof (struct gconv_module) |
| 322 | + (wp - from) |
| 323 | + dir_len + need_ext); |
| 324 | if (new_module != NULL) |
| 325 | { |
| 326 | char *tmp; |
| 327 | |
| 328 | new_module->from_string = tmp = (char *) (new_module + 1); |
| 329 | tmp = __mempcpy (tmp, from, to - from); |
| 330 | |
| 331 | new_module->to_string = tmp; |
| 332 | tmp = __mempcpy (tmp, to, module - to); |
| 333 | |
| 334 | new_module->cost_hi = cost_hi; |
| 335 | new_module->cost_lo = modcounter; |
| 336 | |
| 337 | new_module->module_name = tmp; |
| 338 | |
| 339 | if (dir_len != 0) |
| 340 | tmp = __mempcpy (tmp, directory, dir_len); |
| 341 | |
| 342 | tmp = __mempcpy (tmp, module, wp - module); |
| 343 | |
| 344 | if (need_ext) |
| 345 | memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); |
| 346 | |
| 347 | /* Now insert the new module data structure in our search tree. */ |
| 348 | insert_module (new_module, 1); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | |
| 353 | /* Determine the directories we are looking for data in. This function should |
| 354 | only be called from __gconv_read_conf. */ |
| 355 | static void |
| 356 | __gconv_get_path (void) |
| 357 | { |
| 358 | struct path_elem *result; |
| 359 | |
| 360 | /* This function is only ever called when __gconv_path_elem is NULL. */ |
| 361 | result = __gconv_path_elem; |
| 362 | assert (result == NULL); |
| 363 | |
| 364 | /* Determine the complete path first. */ |
| 365 | char *gconv_path; |
| 366 | size_t gconv_path_len; |
| 367 | char *elem; |
| 368 | char *oldp; |
| 369 | char *cp; |
| 370 | int nelems; |
| 371 | char *cwd; |
| 372 | size_t cwdlen; |
| 373 | |
| 374 | if (__gconv_path_envvar == NULL) |
| 375 | { |
| 376 | /* No user-defined path. Make a modifiable copy of the |
| 377 | default path. */ |
| 378 | gconv_path = strdupa (default_gconv_path); |
| 379 | gconv_path_len = sizeof (default_gconv_path); |
| 380 | cwd = NULL; |
| 381 | cwdlen = 0; |
| 382 | } |
| 383 | else |
| 384 | { |
| 385 | /* Append the default path to the user-defined path. */ |
| 386 | size_t user_len = strlen (__gconv_path_envvar); |
| 387 | |
| 388 | gconv_path_len = user_len + 1 + sizeof (default_gconv_path); |
| 389 | gconv_path = alloca (gconv_path_len); |
| 390 | __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar, |
| 391 | user_len), |
| 392 | ":" , 1), |
| 393 | default_gconv_path, sizeof (default_gconv_path)); |
| 394 | cwd = __getcwd (NULL, 0); |
| 395 | cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd); |
| 396 | } |
| 397 | assert (default_gconv_path[0] == '/'); |
| 398 | |
| 399 | /* In a first pass we calculate the number of elements. */ |
| 400 | oldp = NULL; |
| 401 | cp = strchr (gconv_path, ':'); |
| 402 | nelems = 1; |
| 403 | while (cp != NULL) |
| 404 | { |
| 405 | if (cp != oldp + 1) |
| 406 | ++nelems; |
| 407 | oldp = cp; |
| 408 | cp = strchr (cp + 1, ':'); |
| 409 | } |
| 410 | |
| 411 | /* Allocate the memory for the result. */ |
| 412 | result = malloc ((nelems + 1) |
| 413 | * sizeof (struct path_elem) |
| 414 | + gconv_path_len + nelems |
| 415 | + (nelems - 1) * (cwdlen + 1)); |
| 416 | if (result != NULL) |
| 417 | { |
| 418 | char *strspace = (char *) &result[nelems + 1]; |
| 419 | int n = 0; |
| 420 | |
| 421 | /* Separate the individual parts. */ |
| 422 | __gconv_max_path_elem_len = 0; |
| 423 | elem = __strtok_r (gconv_path, ":" , &gconv_path); |
| 424 | assert (elem != NULL); |
| 425 | do |
| 426 | { |
| 427 | result[n].name = strspace; |
| 428 | if (elem[0] != '/') |
| 429 | { |
| 430 | assert (cwd != NULL); |
| 431 | strspace = __mempcpy (strspace, cwd, cwdlen); |
| 432 | *strspace++ = '/'; |
| 433 | } |
| 434 | strspace = __stpcpy (strspace, elem); |
| 435 | if (strspace[-1] != '/') |
| 436 | *strspace++ = '/'; |
| 437 | |
| 438 | result[n].len = strspace - result[n].name; |
| 439 | if (result[n].len > __gconv_max_path_elem_len) |
| 440 | __gconv_max_path_elem_len = result[n].len; |
| 441 | |
| 442 | *strspace++ = '\0'; |
| 443 | ++n; |
| 444 | } |
| 445 | while ((elem = __strtok_r (NULL, ":" , &gconv_path)) != NULL); |
| 446 | |
| 447 | result[n].name = NULL; |
| 448 | result[n].len = 0; |
| 449 | } |
| 450 | |
| 451 | __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem; |
| 452 | |
| 453 | free (cwd); |
| 454 | } |
| 455 | |
| 456 | |
| 457 | /* Read all configuration files found in the user-specified and the default |
| 458 | path. This function should only be called once during the program's |
| 459 | lifetime. It disregards locking and synchronization because its only |
| 460 | caller, __gconv_load_conf, handles this. */ |
| 461 | static void |
| 462 | __gconv_read_conf (void) |
| 463 | { |
| 464 | int save_errno = errno; |
| 465 | size_t cnt; |
| 466 | |
| 467 | /* First see whether we should use the cache. */ |
| 468 | if (__gconv_load_cache () == 0) |
| 469 | { |
| 470 | /* Yes, we are done. */ |
| 471 | __set_errno (save_errno); |
| 472 | return; |
| 473 | } |
| 474 | |
| 475 | #ifndef STATIC_GCONV |
| 476 | /* Find out where we have to look. */ |
| 477 | __gconv_get_path (); |
| 478 | |
| 479 | for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt) |
| 480 | gconv_parseconfdir (NULL, __gconv_path_elem[cnt].name, |
| 481 | __gconv_path_elem[cnt].len); |
| 482 | #endif |
| 483 | |
| 484 | /* Add the internal modules. */ |
| 485 | for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); |
| 486 | ++cnt) |
| 487 | { |
| 488 | struct gconv_alias fake_alias; |
| 489 | |
| 490 | fake_alias.fromname = (char *) builtin_modules[cnt].from_string; |
| 491 | |
| 492 | if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) |
| 493 | != NULL) |
| 494 | /* It'll conflict so don't add it. */ |
| 495 | continue; |
| 496 | |
| 497 | insert_module (&builtin_modules[cnt], 0); |
| 498 | } |
| 499 | |
| 500 | /* Add aliases for builtin conversions. */ |
| 501 | const char *cp = builtin_aliases; |
| 502 | do |
| 503 | { |
| 504 | const char *from = cp; |
| 505 | const char *to = __rawmemchr (from, '\0') + 1; |
| 506 | cp = __rawmemchr (to, '\0') + 1; |
| 507 | |
| 508 | add_alias2 (from, to, cp); |
| 509 | } |
| 510 | while (*cp != '\0'); |
| 511 | |
| 512 | /* Restore the error number. */ |
| 513 | __set_errno (save_errno); |
| 514 | } |
| 515 | |
| 516 | |
| 517 | /* This "once" variable is used to do a one-time load of the configuration. */ |
| 518 | __libc_once_define (static, once); |
| 519 | |
| 520 | |
| 521 | /* Read all configuration files found in the user-specified and the default |
| 522 | path, but do it only "once" using __gconv_read_conf to do the actual |
| 523 | work. This is the function that must be called when reading iconv |
| 524 | configuration. */ |
| 525 | void |
| 526 | __gconv_load_conf (void) |
| 527 | { |
| 528 | __libc_once (once, __gconv_read_conf); |
| 529 | } |
| 530 | |
| 531 | |
| 532 | /* Free all resources if necessary. */ |
| 533 | libc_freeres_fn (free_mem) |
| 534 | { |
| 535 | if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem) |
| 536 | free ((void *) __gconv_path_elem); |
| 537 | } |
| 538 | |