1 | /* Copyright (C) 1996-2021 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; version 2 of the License, or |
8 | (at your option) any later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef HAVE_CONFIG_H |
19 | # include <config.h> |
20 | #endif |
21 | |
22 | #include <assert.h> |
23 | #include <ctype.h> |
24 | #include <errno.h> |
25 | #include <libintl.h> |
26 | #include <stdarg.h> |
27 | #include <stdlib.h> |
28 | #include <string.h> |
29 | #include <stdint.h> |
30 | |
31 | #include "localedef.h" |
32 | #include "charmap.h" |
33 | #include "error.h" |
34 | #include "linereader.h" |
35 | #include "locfile.h" |
36 | |
37 | /* Prototypes for local functions. */ |
38 | static struct token *get_toplvl_escape (struct linereader *lr); |
39 | static struct token *get_symname (struct linereader *lr); |
40 | static struct token *get_ident (struct linereader *lr); |
41 | static struct token *get_string (struct linereader *lr, |
42 | const struct charmap_t *charmap, |
43 | struct localedef_t *locale, |
44 | const struct repertoire_t *repertoire, |
45 | int verbose); |
46 | |
47 | |
48 | struct linereader * |
49 | lr_open (const char *fname, kw_hash_fct_t hf) |
50 | { |
51 | FILE *fp; |
52 | |
53 | if (fname == NULL || strcmp (fname, "-" ) == 0 |
54 | || strcmp (fname, "/dev/stdin" ) == 0) |
55 | return lr_create (stdin, "<stdin>" , hf); |
56 | else |
57 | { |
58 | fp = fopen (fname, "rm" ); |
59 | if (fp == NULL) |
60 | return NULL; |
61 | return lr_create (fp, fname, hf); |
62 | } |
63 | } |
64 | |
65 | struct linereader * |
66 | lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf) |
67 | { |
68 | struct linereader *result; |
69 | int n; |
70 | |
71 | result = (struct linereader *) xmalloc (sizeof (*result)); |
72 | |
73 | result->fp = fp; |
74 | result->fname = xstrdup (fname); |
75 | result->buf = NULL; |
76 | result->bufsize = 0; |
77 | result->lineno = 1; |
78 | result->idx = 0; |
79 | result->comment_char = '#'; |
80 | result->escape_char = '\\'; |
81 | result->translate_strings = 1; |
82 | result->return_widestr = 0; |
83 | |
84 | n = getdelim (&result->buf, &result->bufsize, '\n', result->fp); |
85 | if (n < 0) |
86 | { |
87 | int save = errno; |
88 | fclose (result->fp); |
89 | free ((char *) result->fname); |
90 | free (result); |
91 | errno = save; |
92 | return NULL; |
93 | } |
94 | |
95 | if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n') |
96 | n -= 2; |
97 | |
98 | result->buf[n] = '\0'; |
99 | result->bufact = n; |
100 | result->hash_fct = hf; |
101 | |
102 | return result; |
103 | } |
104 | |
105 | |
106 | int |
107 | lr_eof (struct linereader *lr) |
108 | { |
109 | return lr->bufact = 0; |
110 | } |
111 | |
112 | |
113 | void |
114 | lr_ignore_rest (struct linereader *lr, int verbose) |
115 | { |
116 | if (verbose) |
117 | { |
118 | while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n' |
119 | && lr->buf[lr->idx] != lr->comment_char) |
120 | if (lr->buf[lr->idx] == '\0') |
121 | { |
122 | if (lr_next (lr) < 0) |
123 | return; |
124 | } |
125 | else |
126 | ++lr->idx; |
127 | |
128 | if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp) |
129 | && lr->buf[lr->idx] != lr->comment_char) |
130 | lr_error (lr, _("trailing garbage at end of line" )); |
131 | } |
132 | |
133 | /* Ignore continued line. */ |
134 | while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n') |
135 | if (lr_next (lr) < 0) |
136 | break; |
137 | |
138 | lr->idx = lr->bufact; |
139 | } |
140 | |
141 | |
142 | void |
143 | lr_close (struct linereader *lr) |
144 | { |
145 | fclose (lr->fp); |
146 | free (lr->buf); |
147 | free (lr); |
148 | } |
149 | |
150 | |
151 | int |
152 | lr_next (struct linereader *lr) |
153 | { |
154 | int n; |
155 | |
156 | n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp); |
157 | if (n < 0) |
158 | return -1; |
159 | |
160 | ++lr->lineno; |
161 | |
162 | if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') |
163 | { |
164 | #if 0 |
165 | /* XXX Is this correct? */ |
166 | /* An escaped newline character is substituted with a single <SP>. */ |
167 | --n; |
168 | lr->buf[n - 1] = ' '; |
169 | #else |
170 | n -= 2; |
171 | #endif |
172 | } |
173 | |
174 | lr->buf[n] = '\0'; |
175 | lr->bufact = n; |
176 | lr->idx = 0; |
177 | |
178 | return 0; |
179 | } |
180 | |
181 | |
182 | /* Defined in error.c. */ |
183 | /* This variable is incremented each time `error' is called. */ |
184 | extern unsigned int error_message_count; |
185 | |
186 | /* The calling program should define program_name and set it to the |
187 | name of the executing program. */ |
188 | extern char *program_name; |
189 | |
190 | |
191 | struct token * |
192 | lr_token (struct linereader *lr, const struct charmap_t *charmap, |
193 | struct localedef_t *locale, const struct repertoire_t *repertoire, |
194 | int verbose) |
195 | { |
196 | int ch; |
197 | |
198 | while (1) |
199 | { |
200 | do |
201 | { |
202 | ch = lr_getc (lr); |
203 | |
204 | if (ch == EOF) |
205 | { |
206 | lr->token.tok = tok_eof; |
207 | return &lr->token; |
208 | }; |
209 | |
210 | if (ch == '\n') |
211 | { |
212 | lr->token.tok = tok_eol; |
213 | return &lr->token; |
214 | } |
215 | } |
216 | while (isspace (ch)); |
217 | |
218 | if (ch != lr->comment_char) |
219 | break; |
220 | |
221 | /* Is there an newline at the end of the buffer? */ |
222 | if (lr->buf[lr->bufact - 1] != '\n') |
223 | { |
224 | /* No. Some people want this to mean that only the line in |
225 | the file not the logical, concatenated line is ignored. |
226 | Let's try this. */ |
227 | lr->idx = lr->bufact; |
228 | continue; |
229 | } |
230 | |
231 | /* Ignore rest of line. */ |
232 | lr_ignore_rest (lr, 0); |
233 | lr->token.tok = tok_eol; |
234 | return &lr->token; |
235 | } |
236 | |
237 | /* Match escape sequences. */ |
238 | if (ch == lr->escape_char) |
239 | return get_toplvl_escape (lr); |
240 | |
241 | /* Match ellipsis. */ |
242 | if (ch == '.') |
243 | { |
244 | if (strncmp (&lr->buf[lr->idx], "...(2)...." , 10) == 0) |
245 | { |
246 | int cnt; |
247 | for (cnt = 0; cnt < 10; ++cnt) |
248 | lr_getc (lr); |
249 | lr->token.tok = tok_ellipsis4_2; |
250 | return &lr->token; |
251 | } |
252 | if (strncmp (&lr->buf[lr->idx], "..." , 3) == 0) |
253 | { |
254 | lr_getc (lr); |
255 | lr_getc (lr); |
256 | lr_getc (lr); |
257 | lr->token.tok = tok_ellipsis4; |
258 | return &lr->token; |
259 | } |
260 | if (strncmp (&lr->buf[lr->idx], ".." , 2) == 0) |
261 | { |
262 | lr_getc (lr); |
263 | lr_getc (lr); |
264 | lr->token.tok = tok_ellipsis3; |
265 | return &lr->token; |
266 | } |
267 | if (strncmp (&lr->buf[lr->idx], ".(2).." , 6) == 0) |
268 | { |
269 | int cnt; |
270 | for (cnt = 0; cnt < 6; ++cnt) |
271 | lr_getc (lr); |
272 | lr->token.tok = tok_ellipsis2_2; |
273 | return &lr->token; |
274 | } |
275 | if (lr->buf[lr->idx] == '.') |
276 | { |
277 | lr_getc (lr); |
278 | lr->token.tok = tok_ellipsis2; |
279 | return &lr->token; |
280 | } |
281 | } |
282 | |
283 | switch (ch) |
284 | { |
285 | case '<': |
286 | return get_symname (lr); |
287 | |
288 | case '0' ... '9': |
289 | lr->token.tok = tok_number; |
290 | lr->token.val.num = ch - '0'; |
291 | |
292 | while (isdigit (ch = lr_getc (lr))) |
293 | { |
294 | lr->token.val.num *= 10; |
295 | lr->token.val.num += ch - '0'; |
296 | } |
297 | if (isalpha (ch)) |
298 | lr_error (lr, _("garbage at end of number" )); |
299 | lr_ungetn (lr, 1); |
300 | |
301 | return &lr->token; |
302 | |
303 | case ';': |
304 | lr->token.tok = tok_semicolon; |
305 | return &lr->token; |
306 | |
307 | case ',': |
308 | lr->token.tok = tok_comma; |
309 | return &lr->token; |
310 | |
311 | case '(': |
312 | lr->token.tok = tok_open_brace; |
313 | return &lr->token; |
314 | |
315 | case ')': |
316 | lr->token.tok = tok_close_brace; |
317 | return &lr->token; |
318 | |
319 | case '"': |
320 | return get_string (lr, charmap, locale, repertoire, verbose); |
321 | |
322 | case '-': |
323 | ch = lr_getc (lr); |
324 | if (ch == '1') |
325 | { |
326 | lr->token.tok = tok_minus1; |
327 | return &lr->token; |
328 | } |
329 | lr_ungetn (lr, 2); |
330 | break; |
331 | } |
332 | |
333 | return get_ident (lr); |
334 | } |
335 | |
336 | |
337 | static struct token * |
338 | get_toplvl_escape (struct linereader *lr) |
339 | { |
340 | /* This is supposed to be a numeric value. We return the |
341 | numerical value and the number of bytes. */ |
342 | size_t start_idx = lr->idx - 1; |
343 | unsigned char *bytes = lr->token.val.charcode.bytes; |
344 | size_t nbytes = 0; |
345 | int ch; |
346 | |
347 | do |
348 | { |
349 | unsigned int byte = 0; |
350 | unsigned int base = 8; |
351 | |
352 | ch = lr_getc (lr); |
353 | |
354 | if (ch == 'd') |
355 | { |
356 | base = 10; |
357 | ch = lr_getc (lr); |
358 | } |
359 | else if (ch == 'x') |
360 | { |
361 | base = 16; |
362 | ch = lr_getc (lr); |
363 | } |
364 | |
365 | if ((base == 16 && !isxdigit (ch)) |
366 | || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) |
367 | { |
368 | esc_error: |
369 | lr->token.val.str.startmb = &lr->buf[start_idx]; |
370 | |
371 | while (ch != EOF && !isspace (ch)) |
372 | ch = lr_getc (lr); |
373 | lr->token.val.str.lenmb = lr->idx - start_idx; |
374 | |
375 | lr->token.tok = tok_error; |
376 | return &lr->token; |
377 | } |
378 | |
379 | if (isdigit (ch)) |
380 | byte = ch - '0'; |
381 | else |
382 | byte = tolower (ch) - 'a' + 10; |
383 | |
384 | ch = lr_getc (lr); |
385 | if ((base == 16 && !isxdigit (ch)) |
386 | || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) |
387 | goto esc_error; |
388 | |
389 | byte *= base; |
390 | if (isdigit (ch)) |
391 | byte += ch - '0'; |
392 | else |
393 | byte += tolower (ch) - 'a' + 10; |
394 | |
395 | ch = lr_getc (lr); |
396 | if (base != 16 && isdigit (ch)) |
397 | { |
398 | byte *= base; |
399 | byte += ch - '0'; |
400 | |
401 | ch = lr_getc (lr); |
402 | } |
403 | |
404 | bytes[nbytes++] = byte; |
405 | } |
406 | while (ch == lr->escape_char |
407 | && nbytes < (int) sizeof (lr->token.val.charcode.bytes)); |
408 | |
409 | if (!isspace (ch)) |
410 | lr_error (lr, _("garbage at end of character code specification" )); |
411 | |
412 | lr_ungetn (lr, 1); |
413 | |
414 | lr->token.tok = tok_charcode; |
415 | lr->token.val.charcode.nbytes = nbytes; |
416 | |
417 | return &lr->token; |
418 | } |
419 | |
420 | |
421 | #define ADDC(ch) \ |
422 | do \ |
423 | { \ |
424 | if (bufact == bufmax) \ |
425 | { \ |
426 | bufmax *= 2; \ |
427 | buf = xrealloc (buf, bufmax); \ |
428 | } \ |
429 | buf[bufact++] = (ch); \ |
430 | } \ |
431 | while (0) |
432 | |
433 | |
434 | #define ADDS(s, l) \ |
435 | do \ |
436 | { \ |
437 | size_t _l = (l); \ |
438 | if (bufact + _l > bufmax) \ |
439 | { \ |
440 | if (bufact < _l) \ |
441 | bufact = _l; \ |
442 | bufmax *= 2; \ |
443 | buf = xrealloc (buf, bufmax); \ |
444 | } \ |
445 | memcpy (&buf[bufact], s, _l); \ |
446 | bufact += _l; \ |
447 | } \ |
448 | while (0) |
449 | |
450 | |
451 | #define ADDWC(ch) \ |
452 | do \ |
453 | { \ |
454 | if (buf2act == buf2max) \ |
455 | { \ |
456 | buf2max *= 2; \ |
457 | buf2 = xrealloc (buf2, buf2max * 4); \ |
458 | } \ |
459 | buf2[buf2act++] = (ch); \ |
460 | } \ |
461 | while (0) |
462 | |
463 | |
464 | static struct token * |
465 | get_symname (struct linereader *lr) |
466 | { |
467 | /* Symbol in brackets. We must distinguish three kinds: |
468 | 1. reserved words |
469 | 2. ISO 10646 position values |
470 | 3. all other. */ |
471 | char *buf; |
472 | size_t bufact = 0; |
473 | size_t bufmax = 56; |
474 | const struct keyword_t *kw; |
475 | int ch; |
476 | |
477 | buf = (char *) xmalloc (bufmax); |
478 | |
479 | do |
480 | { |
481 | ch = lr_getc (lr); |
482 | if (ch == lr->escape_char) |
483 | { |
484 | int c2 = lr_getc (lr); |
485 | ADDC (c2); |
486 | |
487 | if (c2 == '\n') |
488 | ch = '\n'; |
489 | } |
490 | else |
491 | ADDC (ch); |
492 | } |
493 | while (ch != '>' && ch != '\n'); |
494 | |
495 | if (ch == '\n') |
496 | lr_error (lr, _("unterminated symbolic name" )); |
497 | |
498 | /* Test for ISO 10646 position value. */ |
499 | if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) |
500 | { |
501 | char *cp = buf + 1; |
502 | while (cp < &buf[bufact - 1] && isxdigit (*cp)) |
503 | ++cp; |
504 | |
505 | if (cp == &buf[bufact - 1]) |
506 | { |
507 | /* Yes, it is. */ |
508 | lr->token.tok = tok_ucs4; |
509 | lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); |
510 | |
511 | return &lr->token; |
512 | } |
513 | } |
514 | |
515 | /* It is a symbolic name. Test for reserved words. */ |
516 | kw = lr->hash_fct (buf, bufact - 1); |
517 | |
518 | if (kw != NULL && kw->symname_or_ident == 1) |
519 | { |
520 | lr->token.tok = kw->token; |
521 | free (buf); |
522 | } |
523 | else |
524 | { |
525 | lr->token.tok = tok_bsymbol; |
526 | |
527 | buf = xrealloc (buf, bufact + 1); |
528 | buf[bufact] = '\0'; |
529 | |
530 | lr->token.val.str.startmb = buf; |
531 | lr->token.val.str.lenmb = bufact - 1; |
532 | } |
533 | |
534 | return &lr->token; |
535 | } |
536 | |
537 | |
538 | static struct token * |
539 | get_ident (struct linereader *lr) |
540 | { |
541 | char *buf; |
542 | size_t bufact; |
543 | size_t bufmax = 56; |
544 | const struct keyword_t *kw; |
545 | int ch; |
546 | |
547 | buf = xmalloc (bufmax); |
548 | bufact = 0; |
549 | |
550 | ADDC (lr->buf[lr->idx - 1]); |
551 | |
552 | while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' |
553 | && ch != '<' && ch != ',' && ch != EOF) |
554 | { |
555 | if (ch == lr->escape_char) |
556 | { |
557 | ch = lr_getc (lr); |
558 | if (ch == '\n' || ch == EOF) |
559 | { |
560 | lr_error (lr, _("invalid escape sequence" )); |
561 | break; |
562 | } |
563 | } |
564 | ADDC (ch); |
565 | } |
566 | |
567 | lr_ungetc (lr, ch); |
568 | |
569 | kw = lr->hash_fct (buf, bufact); |
570 | |
571 | if (kw != NULL && kw->symname_or_ident == 0) |
572 | { |
573 | lr->token.tok = kw->token; |
574 | free (buf); |
575 | } |
576 | else |
577 | { |
578 | lr->token.tok = tok_ident; |
579 | |
580 | buf = xrealloc (buf, bufact + 1); |
581 | buf[bufact] = '\0'; |
582 | |
583 | lr->token.val.str.startmb = buf; |
584 | lr->token.val.str.lenmb = bufact; |
585 | } |
586 | |
587 | return &lr->token; |
588 | } |
589 | |
590 | |
591 | static struct token * |
592 | get_string (struct linereader *lr, const struct charmap_t *charmap, |
593 | struct localedef_t *locale, const struct repertoire_t *repertoire, |
594 | int verbose) |
595 | { |
596 | int return_widestr = lr->return_widestr; |
597 | char *buf; |
598 | wchar_t *buf2 = NULL; |
599 | size_t bufact; |
600 | size_t bufmax = 56; |
601 | |
602 | /* We must return two different strings. */ |
603 | buf = xmalloc (bufmax); |
604 | bufact = 0; |
605 | |
606 | /* We know it'll be a string. */ |
607 | lr->token.tok = tok_string; |
608 | |
609 | /* If we need not translate the strings (i.e., expand <...> parts) |
610 | we can run a simple loop. */ |
611 | if (!lr->translate_strings) |
612 | { |
613 | int ch; |
614 | |
615 | buf2 = NULL; |
616 | while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) |
617 | ADDC (ch); |
618 | |
619 | /* Catch errors with trailing escape character. */ |
620 | if (bufact > 0 && buf[bufact - 1] == lr->escape_char |
621 | && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) |
622 | { |
623 | lr_error (lr, _("illegal escape sequence at end of string" )); |
624 | --bufact; |
625 | } |
626 | else if (ch == '\n' || ch == EOF) |
627 | lr_error (lr, _("unterminated string" )); |
628 | |
629 | ADDC ('\0'); |
630 | } |
631 | else |
632 | { |
633 | int illegal_string = 0; |
634 | size_t buf2act = 0; |
635 | size_t buf2max = 56 * sizeof (uint32_t); |
636 | int ch; |
637 | |
638 | /* We have to provide the wide character result as well. */ |
639 | if (return_widestr) |
640 | buf2 = xmalloc (buf2max); |
641 | |
642 | /* Read until the end of the string (or end of the line or file). */ |
643 | while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) |
644 | { |
645 | size_t startidx; |
646 | uint32_t wch; |
647 | struct charseq *seq; |
648 | |
649 | if (ch != '<') |
650 | { |
651 | /* The standards leave it up to the implementation to decide |
652 | what to do with character which stand for themself. We |
653 | could jump through hoops to find out the value relative to |
654 | the charmap and the repertoire map, but instead we leave |
655 | it up to the locale definition author to write a better |
656 | definition. We assume here that every character which |
657 | stands for itself is encoded using ISO 8859-1. Using the |
658 | escape character is allowed. */ |
659 | if (ch == lr->escape_char) |
660 | { |
661 | ch = lr_getc (lr); |
662 | if (ch == '\n' || ch == EOF) |
663 | break; |
664 | } |
665 | |
666 | ADDC (ch); |
667 | if (return_widestr) |
668 | ADDWC ((uint32_t) ch); |
669 | |
670 | continue; |
671 | } |
672 | |
673 | /* Now we have to search for the end of the symbolic name, i.e., |
674 | the closing '>'. */ |
675 | startidx = bufact; |
676 | while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) |
677 | { |
678 | if (ch == lr->escape_char) |
679 | { |
680 | ch = lr_getc (lr); |
681 | if (ch == '\n' || ch == EOF) |
682 | break; |
683 | } |
684 | ADDC (ch); |
685 | } |
686 | if (ch == '\n' || ch == EOF) |
687 | /* Not a correct string. */ |
688 | break; |
689 | if (bufact == startidx) |
690 | { |
691 | /* <> is no correct name. Ignore it and also signal an |
692 | error. */ |
693 | illegal_string = 1; |
694 | continue; |
695 | } |
696 | |
697 | /* It might be a Uxxxx symbol. */ |
698 | if (buf[startidx] == 'U' |
699 | && (bufact - startidx == 5 || bufact - startidx == 9)) |
700 | { |
701 | char *cp = buf + startidx + 1; |
702 | while (cp < &buf[bufact] && isxdigit (*cp)) |
703 | ++cp; |
704 | |
705 | if (cp == &buf[bufact]) |
706 | { |
707 | char utmp[10]; |
708 | |
709 | /* Yes, it is. */ |
710 | ADDC ('\0'); |
711 | wch = strtoul (buf + startidx + 1, NULL, 16); |
712 | |
713 | /* Now forget about the name we just added. */ |
714 | bufact = startidx; |
715 | |
716 | if (return_widestr) |
717 | ADDWC (wch); |
718 | |
719 | /* See whether the charmap contains the Uxxxxxxxx names. */ |
720 | snprintf (utmp, sizeof (utmp), "U%08X" , wch); |
721 | seq = charmap_find_value (charmap, utmp, 9); |
722 | |
723 | if (seq == NULL) |
724 | { |
725 | /* No, this isn't the case. Now determine from |
726 | the repertoire the name of the character and |
727 | find it in the charmap. */ |
728 | if (repertoire != NULL) |
729 | { |
730 | const char *symbol; |
731 | |
732 | symbol = repertoire_find_symbol (repertoire, wch); |
733 | |
734 | if (symbol != NULL) |
735 | seq = charmap_find_value (charmap, symbol, |
736 | strlen (symbol)); |
737 | } |
738 | |
739 | if (seq == NULL) |
740 | { |
741 | #ifndef NO_TRANSLITERATION |
742 | /* Transliterate if possible. */ |
743 | if (locale != NULL) |
744 | { |
745 | uint32_t *translit; |
746 | |
747 | if ((locale->avail & CTYPE_LOCALE) == 0) |
748 | { |
749 | /* Load the CTYPE data now. */ |
750 | int old_needed = locale->needed; |
751 | |
752 | locale->needed = 0; |
753 | locale = load_locale (LC_CTYPE, |
754 | locale->name, |
755 | locale->repertoire_name, |
756 | charmap, locale); |
757 | locale->needed = old_needed; |
758 | } |
759 | |
760 | if ((locale->avail & CTYPE_LOCALE) != 0 |
761 | && ((translit = find_translit (locale, |
762 | charmap, wch)) |
763 | != NULL)) |
764 | /* The CTYPE data contains a matching |
765 | transliteration. */ |
766 | { |
767 | int i; |
768 | |
769 | for (i = 0; translit[i] != 0; ++i) |
770 | { |
771 | char utmp[10]; |
772 | |
773 | snprintf (utmp, sizeof (utmp), "U%08X" , |
774 | translit[i]); |
775 | seq = charmap_find_value (charmap, utmp, |
776 | 9); |
777 | assert (seq != NULL); |
778 | ADDS (seq->bytes, seq->nbytes); |
779 | } |
780 | |
781 | continue; |
782 | } |
783 | } |
784 | #endif /* NO_TRANSLITERATION */ |
785 | |
786 | /* Not a known name. */ |
787 | illegal_string = 1; |
788 | } |
789 | } |
790 | |
791 | if (seq != NULL) |
792 | ADDS (seq->bytes, seq->nbytes); |
793 | |
794 | continue; |
795 | } |
796 | } |
797 | |
798 | /* We now have the symbolic name in buf[startidx] to |
799 | buf[bufact-1]. Now find out the value for this character |
800 | in the charmap as well as in the repertoire map (in this |
801 | order). */ |
802 | seq = charmap_find_value (charmap, &buf[startidx], |
803 | bufact - startidx); |
804 | |
805 | if (seq == NULL) |
806 | { |
807 | /* This name is not in the charmap. */ |
808 | lr_error (lr, _("symbol `%.*s' not in charmap" ), |
809 | (int) (bufact - startidx), &buf[startidx]); |
810 | illegal_string = 1; |
811 | } |
812 | |
813 | if (return_widestr) |
814 | { |
815 | /* Now the same for the multibyte representation. */ |
816 | if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE) |
817 | wch = seq->ucs4; |
818 | else |
819 | { |
820 | wch = repertoire_find_value (repertoire, &buf[startidx], |
821 | bufact - startidx); |
822 | if (seq != NULL) |
823 | seq->ucs4 = wch; |
824 | } |
825 | |
826 | if (wch == ILLEGAL_CHAR_VALUE) |
827 | { |
828 | /* This name is not in the repertoire map. */ |
829 | lr_error (lr, _("symbol `%.*s' not in repertoire map" ), |
830 | (int) (bufact - startidx), &buf[startidx]); |
831 | illegal_string = 1; |
832 | } |
833 | else |
834 | ADDWC (wch); |
835 | } |
836 | |
837 | /* Now forget about the name we just added. */ |
838 | bufact = startidx; |
839 | |
840 | /* And copy the bytes. */ |
841 | if (seq != NULL) |
842 | ADDS (seq->bytes, seq->nbytes); |
843 | } |
844 | |
845 | if (ch == '\n' || ch == EOF) |
846 | { |
847 | lr_error (lr, _("unterminated string" )); |
848 | illegal_string = 1; |
849 | } |
850 | |
851 | if (illegal_string) |
852 | { |
853 | free (buf); |
854 | free (buf2); |
855 | lr->token.val.str.startmb = NULL; |
856 | lr->token.val.str.lenmb = 0; |
857 | lr->token.val.str.startwc = NULL; |
858 | lr->token.val.str.lenwc = 0; |
859 | |
860 | return &lr->token; |
861 | } |
862 | |
863 | ADDC ('\0'); |
864 | |
865 | if (return_widestr) |
866 | { |
867 | ADDWC (0); |
868 | lr->token.val.str.startwc = xrealloc (buf2, |
869 | buf2act * sizeof (uint32_t)); |
870 | lr->token.val.str.lenwc = buf2act; |
871 | } |
872 | } |
873 | |
874 | lr->token.val.str.startmb = xrealloc (buf, bufact); |
875 | lr->token.val.str.lenmb = bufact; |
876 | |
877 | return &lr->token; |
878 | } |
879 | |