1/* Copyright (C) 1995-2021 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <errno.h>
23#include <stdlib.h>
24#include <wchar.h>
25#include <stdint.h>
26#include <sys/param.h>
27
28#include "localedef.h"
29#include "charmap.h"
30#include "localeinfo.h"
31#include "linereader.h"
32#include "locfile.h"
33#include "elem-hash.h"
34
35/* Uncomment the following line in the production version. */
36/* #define NDEBUG 1 */
37#include <assert.h>
38
39#define obstack_chunk_alloc malloc
40#define obstack_chunk_free free
41
42static inline void
43__attribute ((always_inline))
44obstack_int32_grow (struct obstack *obstack, int32_t data)
45{
46 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47 data = maybe_swap_uint32 (data);
48 if (sizeof (int32_t) == sizeof (int))
49 obstack_int_grow (obstack, data);
50 else
51 obstack_grow (obstack, &data, sizeof (int32_t));
52}
53
54static inline void
55__attribute ((always_inline))
56obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57{
58 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59 data = maybe_swap_uint32 (data);
60 if (sizeof (int32_t) == sizeof (int))
61 obstack_int_grow_fast (obstack, data);
62 else
63 obstack_grow (obstack, &data, sizeof (int32_t));
64}
65
66/* Forward declaration. */
67struct element_t;
68
69/* Data type for list of strings. */
70struct section_list
71{
72 /* Successor in the known_sections list. */
73 struct section_list *def_next;
74 /* Successor in the sections list. */
75 struct section_list *next;
76 /* Name of the section. */
77 const char *name;
78 /* First element of this section. */
79 struct element_t *first;
80 /* Last element of this section. */
81 struct element_t *last;
82 /* These are the rules for this section. */
83 enum coll_sort_rule *rules;
84 /* Index of the rule set in the appropriate section of the output file. */
85 int ruleidx;
86};
87
88struct element_t;
89
90struct element_list_t
91{
92 /* Number of elements. */
93 int cnt;
94
95 struct element_t **w;
96};
97
98/* Data type for collating element. */
99struct element_t
100{
101 const char *name;
102
103 const char *mbs;
104 size_t nmbs;
105 const uint32_t *wcs;
106 size_t nwcs;
107 int *mborder;
108 int wcorder;
109
110 /* The following is a bit mask which bits are set if this element is
111 used in the appropriate level. Interesting for the singlebyte
112 weight computation.
113
114 XXX The type here restricts the number of levels to 32. It could
115 be changed if necessary but I doubt this is necessary. */
116 unsigned int used_in_level;
117
118 struct element_list_t *weights;
119
120 /* Nonzero if this is a real character definition. */
121 int is_character;
122
123 /* Order of the character in the sequence. This information will
124 be used in range expressions. */
125 int mbseqorder;
126 int wcseqorder;
127
128 /* Where does the definition come from. */
129 const char *file;
130 size_t line;
131
132 /* Which section does this belong to. */
133 struct section_list *section;
134
135 /* Predecessor and successor in the order list. */
136 struct element_t *last;
137 struct element_t *next;
138
139 /* Next element in multibyte output list. */
140 struct element_t *mbnext;
141 struct element_t *mblast;
142
143 /* Next element in wide character output list. */
144 struct element_t *wcnext;
145 struct element_t *wclast;
146};
147
148/* Special element value. */
149#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153/* Data type for collating symbol. */
154struct symbol_t
155{
156 const char *name;
157
158 /* Point to place in the order list. */
159 struct element_t *order;
160
161 /* Where does the definition come from. */
162 const char *file;
163 size_t line;
164};
165
166/* Sparse table of struct element_t *. */
167#define TABLE wchead_table
168#define ELEMENT struct element_t *
169#define DEFAULT NULL
170#define ITERATE
171#define NO_ADD_LOCALE
172#include "3level.h"
173
174/* Sparse table of int32_t. */
175#define TABLE collidx_table
176#define ELEMENT int32_t
177#define DEFAULT 0
178#include "3level.h"
179
180/* Sparse table of uint32_t. */
181#define TABLE collseq_table
182#define ELEMENT uint32_t
183#define DEFAULT ~((uint32_t) 0)
184#include "3level.h"
185
186
187/* Simple name list for the preprocessor. */
188struct name_list
189{
190 struct name_list *next;
191 char str[0];
192};
193
194
195/* The real definition of the struct for the LC_COLLATE locale. */
196struct locale_collate_t
197{
198 int col_weight_max;
199 int cur_weight_max;
200
201 /* List of known scripts. */
202 struct section_list *known_sections;
203 /* List of used sections. */
204 struct section_list *sections;
205 /* Current section using definition. */
206 struct section_list *current_section;
207 /* There always can be an unnamed section. */
208 struct section_list unnamed_section;
209 /* Flag whether the unnamed section has been defined. */
210 bool unnamed_section_defined;
211 /* To make handling of errors easier we have another section. */
212 struct section_list error_section;
213 /* Sometimes we are defining the values for collating symbols before
214 the first actual section. */
215 struct section_list symbol_section;
216
217 /* Start of the order list. */
218 struct element_t *start;
219
220 /* The undefined element. */
221 struct element_t undefined;
222
223 /* This is the cursor for `reorder_after' insertions. */
224 struct element_t *cursor;
225
226 /* This value is used when handling ellipsis. */
227 struct element_t ellipsis_weight;
228
229 /* Known collating elements. */
230 hash_table elem_table;
231
232 /* Known collating symbols. */
233 hash_table sym_table;
234
235 /* Known collation sequences. */
236 hash_table seq_table;
237
238 struct obstack mempool;
239
240 /* The LC_COLLATE category is a bit special as it is sometimes possible
241 that the definitions from more than one input file contains information.
242 Therefore we keep all relevant input in a list. */
243 struct locale_collate_t *next;
244
245 /* Arrays with heads of the list for each of the leading bytes in
246 the multibyte sequences. */
247 struct element_t *mbheads[256];
248
249 /* Arrays with heads of the list for each of the leading bytes in
250 the multibyte sequences. */
251 struct wchead_table wcheads;
252
253 /* The arrays with the collation sequence order. */
254 unsigned char mbseqorder[256];
255 struct collseq_table wcseqorder;
256
257 /* State of the preprocessor. */
258 enum
259 {
260 else_none = 0,
261 else_ignore,
262 else_seen
263 }
264 else_action;
265};
266
267
268/* We have a few global variables which are used for reading all
269 LC_COLLATE category descriptions in all files. */
270static uint32_t nrules;
271
272/* List of defined preprocessor symbols. */
273static struct name_list *defined;
274
275
276/* We need UTF-8 encoding of numbers. */
277static inline int
278__attribute ((always_inline))
279utf8_encode (char *buf, int val)
280{
281 int retval;
282
283 if (val < 0x80)
284 {
285 *buf++ = (char) val;
286 retval = 1;
287 }
288 else
289 {
290 int step;
291
292 for (step = 2; step < 6; ++step)
293 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
294 break;
295 retval = step;
296
297 *buf = (unsigned char) (~0xff >> step);
298 --step;
299 do
300 {
301 buf[step] = 0x80 | (val & 0x3f);
302 val >>= 6;
303 }
304 while (--step > 0);
305 *buf |= val;
306 }
307
308 return retval;
309}
310
311
312static struct section_list *
313make_seclist_elem (struct locale_collate_t *collate, const char *string,
314 struct section_list *next)
315{
316 struct section_list *newp;
317
318 newp = (struct section_list *) obstack_alloc (&collate->mempool,
319 sizeof (*newp));
320 newp->next = next;
321 newp->name = string;
322 newp->first = NULL;
323 newp->last = NULL;
324
325 return newp;
326}
327
328
329static struct element_t *
330new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
331 const uint32_t *wcs, const char *name, size_t namelen,
332 int is_character)
333{
334 struct element_t *newp;
335
336 newp = (struct element_t *) obstack_alloc (&collate->mempool,
337 sizeof (*newp));
338 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
339 name, namelen);
340 if (mbs != NULL)
341 {
342 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
343 newp->nmbs = mbslen;
344 }
345 else
346 {
347 newp->mbs = NULL;
348 newp->nmbs = 0;
349 }
350 if (wcs != NULL)
351 {
352 size_t nwcs = wcslen ((wchar_t *) wcs);
353 uint32_t zero = 0;
354 /* Handle <U0000> as a single character. */
355 if (nwcs == 0)
356 nwcs = 1;
357 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
358 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
359 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
360 newp->nwcs = nwcs;
361 }
362 else
363 {
364 newp->wcs = NULL;
365 newp->nwcs = 0;
366 }
367 newp->mborder = NULL;
368 newp->wcorder = 0;
369 newp->used_in_level = 0;
370 newp->is_character = is_character;
371
372 /* Will be assigned later. XXX */
373 newp->mbseqorder = 0;
374 newp->wcseqorder = 0;
375
376 /* Will be allocated later. */
377 newp->weights = NULL;
378
379 newp->file = NULL;
380 newp->line = 0;
381
382 newp->section = collate->current_section;
383
384 newp->last = NULL;
385 newp->next = NULL;
386
387 newp->mbnext = NULL;
388 newp->mblast = NULL;
389
390 newp->wcnext = NULL;
391 newp->wclast = NULL;
392
393 return newp;
394}
395
396
397static struct symbol_t *
398new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
399{
400 struct symbol_t *newp;
401
402 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
403
404 newp->name = obstack_copy0 (&collate->mempool, name, len);
405 newp->order = NULL;
406
407 newp->file = NULL;
408 newp->line = 0;
409
410 return newp;
411}
412
413
414/* Test whether this name is already defined somewhere. */
415static int
416check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
417 const struct charmap_t *charmap,
418 struct repertoire_t *repertoire, const char *symbol,
419 size_t symbol_len)
420{
421 void *ignore = NULL;
422
423 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
424 {
425 lr_error (ldfile, _("`%.*s' already defined in charmap"),
426 (int) symbol_len, symbol);
427 return 1;
428 }
429
430 if (repertoire != NULL
431 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
432 == 0))
433 {
434 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
435 (int) symbol_len, symbol);
436 return 1;
437 }
438
439 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
440 {
441 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
442 (int) symbol_len, symbol);
443 return 1;
444 }
445
446 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
447 {
448 lr_error (ldfile, _("`%.*s' already defined as collating element"),
449 (int) symbol_len, symbol);
450 return 1;
451 }
452
453 return 0;
454}
455
456
457/* Read the direction specification. */
458static void
459read_directions (struct linereader *ldfile, struct token *arg,
460 const struct charmap_t *charmap,
461 struct repertoire_t *repertoire, struct localedef_t *result)
462{
463 int cnt = 0;
464 int max = nrules ?: 10;
465 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
466 int warned = 0;
467 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
468
469 while (1)
470 {
471 int valid = 0;
472
473 if (arg->tok == tok_forward)
474 {
475 if (rules[cnt] & sort_backward)
476 {
477 if (! warned)
478 {
479 lr_error (ldfile, _("\
480%s: `forward' and `backward' are mutually excluding each other"),
481 "LC_COLLATE");
482 warned = 1;
483 }
484 }
485 else if (rules[cnt] & sort_forward)
486 {
487 if (! warned)
488 {
489 lr_error (ldfile, _("\
490%s: `%s' mentioned more than once in definition of weight %d"),
491 "LC_COLLATE", "forward", cnt + 1);
492 }
493 }
494 else
495 rules[cnt] |= sort_forward;
496
497 valid = 1;
498 }
499 else if (arg->tok == tok_backward)
500 {
501 if (rules[cnt] & sort_forward)
502 {
503 if (! warned)
504 {
505 lr_error (ldfile, _("\
506%s: `forward' and `backward' are mutually excluding each other"),
507 "LC_COLLATE");
508 warned = 1;
509 }
510 }
511 else if (rules[cnt] & sort_backward)
512 {
513 if (! warned)
514 {
515 lr_error (ldfile, _("\
516%s: `%s' mentioned more than once in definition of weight %d"),
517 "LC_COLLATE", "backward", cnt + 1);
518 }
519 }
520 else
521 rules[cnt] |= sort_backward;
522
523 valid = 1;
524 }
525 else if (arg->tok == tok_position)
526 {
527 if (rules[cnt] & sort_position)
528 {
529 if (! warned)
530 {
531 lr_error (ldfile, _("\
532%s: `%s' mentioned more than once in definition of weight %d"),
533 "LC_COLLATE", "position", cnt + 1);
534 }
535 }
536 else
537 rules[cnt] |= sort_position;
538
539 valid = 1;
540 }
541
542 if (valid)
543 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
544
545 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
546 || arg->tok == tok_semicolon)
547 {
548 if (! valid && ! warned)
549 {
550 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
551 warned = 1;
552 }
553
554 /* See whether we have to increment the counter. */
555 if (arg->tok != tok_comma && rules[cnt] != 0)
556 {
557 /* Add the default `forward' if we have seen only `position'. */
558 if (rules[cnt] == sort_position)
559 rules[cnt] = sort_position | sort_forward;
560
561 ++cnt;
562 }
563
564 if (arg->tok == tok_eof || arg->tok == tok_eol)
565 /* End of line or file, so we exit the loop. */
566 break;
567
568 if (nrules == 0)
569 {
570 /* See whether we have enough room in the array. */
571 if (cnt == max)
572 {
573 max += 10;
574 rules = (enum coll_sort_rule *) xrealloc (rules,
575 max
576 * sizeof (*rules));
577 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
578 }
579 }
580 else
581 {
582 if (cnt == nrules)
583 {
584 /* There must not be any more rule. */
585 if (! warned)
586 {
587 lr_error (ldfile, _("\
588%s: too many rules; first entry only had %d"),
589 "LC_COLLATE", nrules);
590 warned = 1;
591 }
592
593 lr_ignore_rest (ldfile, 0);
594 break;
595 }
596 }
597 }
598 else
599 {
600 if (! warned)
601 {
602 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
603 warned = 1;
604 }
605 }
606
607 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
608 }
609
610 if (nrules == 0)
611 {
612 /* Now we know how many rules we have. */
613 nrules = cnt;
614 rules = (enum coll_sort_rule *) xrealloc (rules,
615 nrules * sizeof (*rules));
616 }
617 else
618 {
619 if (cnt < nrules)
620 {
621 /* Not enough rules in this specification. */
622 if (! warned)
623 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
624
625 do
626 rules[cnt] = sort_forward;
627 while (++cnt < nrules);
628 }
629 }
630
631 collate->current_section->rules = rules;
632}
633
634
635static struct element_t *
636find_element (struct linereader *ldfile, struct locale_collate_t *collate,
637 const char *str, size_t len)
638{
639 void *result = NULL;
640
641 /* Search for the entries among the collation sequences already define. */
642 if (find_entry (&collate->seq_table, str, len, &result) != 0)
643 {
644 /* Nope, not define yet. So we see whether it is a
645 collation symbol. */
646 void *ptr;
647
648 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
649 {
650 /* It's a collation symbol. */
651 struct symbol_t *sym = (struct symbol_t *) ptr;
652 result = sym->order;
653
654 if (result == NULL)
655 result = sym->order = new_element (collate, NULL, 0, NULL,
656 NULL, 0, 0);
657 }
658 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
659 {
660 /* It's also no collation element. So it is a character
661 element defined later. */
662 result = new_element (collate, NULL, 0, NULL, str, len, 1);
663 /* Insert it into the sequence table. */
664 insert_entry (&collate->seq_table, str, len, result);
665 }
666 }
667
668 return (struct element_t *) result;
669}
670
671
672static void
673unlink_element (struct locale_collate_t *collate)
674{
675 if (collate->cursor == collate->start)
676 {
677 assert (collate->cursor->next == NULL);
678 assert (collate->cursor->last == NULL);
679 collate->cursor = NULL;
680 }
681 else
682 {
683 if (collate->cursor->next != NULL)
684 collate->cursor->next->last = collate->cursor->last;
685 if (collate->cursor->last != NULL)
686 collate->cursor->last->next = collate->cursor->next;
687 collate->cursor = collate->cursor->last;
688 }
689}
690
691
692static void
693insert_weights (struct linereader *ldfile, struct element_t *elem,
694 const struct charmap_t *charmap,
695 struct repertoire_t *repertoire, struct localedef_t *result,
696 enum token_t ellipsis)
697{
698 int weight_cnt;
699 struct token *arg;
700 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
701
702 /* Initialize all the fields. */
703 elem->file = ldfile->fname;
704 elem->line = ldfile->lineno;
705
706 elem->last = collate->cursor;
707 elem->next = collate->cursor ? collate->cursor->next : NULL;
708 if (collate->cursor != NULL && collate->cursor->next != NULL)
709 collate->cursor->next->last = elem;
710 if (collate->cursor != NULL)
711 collate->cursor->next = elem;
712 if (collate->start == NULL)
713 {
714 assert (collate->cursor == NULL);
715 collate->start = elem;
716 }
717
718 elem->section = collate->current_section;
719
720 if (collate->current_section->first == NULL)
721 collate->current_section->first = elem;
722 if (collate->current_section->last == collate->cursor)
723 collate->current_section->last = elem;
724
725 collate->cursor = elem;
726
727 elem->weights = (struct element_list_t *)
728 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
729 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
730
731 weight_cnt = 0;
732
733 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
734 do
735 {
736 if (arg->tok == tok_eof || arg->tok == tok_eol)
737 break;
738
739 if (arg->tok == tok_ignore)
740 {
741 /* The weight for this level has to be ignored. We use the
742 null pointer to indicate this. */
743 elem->weights[weight_cnt].w = (struct element_t **)
744 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
745 elem->weights[weight_cnt].w[0] = NULL;
746 elem->weights[weight_cnt].cnt = 1;
747 }
748 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
749 {
750 char ucs4str[10];
751 struct element_t *val;
752 char *symstr;
753 size_t symlen;
754
755 if (arg->tok == tok_bsymbol)
756 {
757 symstr = arg->val.str.startmb;
758 symlen = arg->val.str.lenmb;
759 }
760 else
761 {
762 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
763 symstr = ucs4str;
764 symlen = 9;
765 }
766
767 val = find_element (ldfile, collate, symstr, symlen);
768 if (val == NULL)
769 break;
770
771 elem->weights[weight_cnt].w = (struct element_t **)
772 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
773 elem->weights[weight_cnt].w[0] = val;
774 elem->weights[weight_cnt].cnt = 1;
775 }
776 else if (arg->tok == tok_string)
777 {
778 /* Split the string up in the individual characters and put
779 the element definitions in the list. */
780 const char *cp = arg->val.str.startmb;
781 int cnt = 0;
782 struct element_t *charelem;
783 struct element_t **weights = NULL;
784 int max = 0;
785
786 if (*cp == '\0')
787 {
788 lr_error (ldfile, _("%s: empty weight string not allowed"),
789 "LC_COLLATE");
790 lr_ignore_rest (ldfile, 0);
791 break;
792 }
793
794 do
795 {
796 if (*cp == '<')
797 {
798 /* Ahh, it's a bsymbol or an UCS4 value. If it's
799 the latter we have to unify the name. */
800 const char *startp = ++cp;
801 size_t len;
802
803 while (*cp != '>')
804 {
805 if (*cp == ldfile->escape_char)
806 ++cp;
807 if (*cp == '\0')
808 /* It's a syntax error. */
809 goto syntax;
810
811 ++cp;
812 }
813
814 if (cp - startp == 5 && startp[0] == 'U'
815 && isxdigit (startp[1]) && isxdigit (startp[2])
816 && isxdigit (startp[3]) && isxdigit (startp[4]))
817 {
818 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
819 char *newstr;
820
821 newstr = (char *) xmalloc (10);
822 snprintf (newstr, 10, "U%08X", ucs4);
823 startp = newstr;
824
825 len = 9;
826 }
827 else
828 len = cp - startp;
829
830 charelem = find_element (ldfile, collate, startp, len);
831 ++cp;
832 }
833 else
834 {
835 /* People really shouldn't use characters directly in
836 the string. Especially since it's not really clear
837 what this means. We interpret all characters in the
838 string as if that would be bsymbols. Otherwise we
839 would have to match back to bsymbols somehow and this
840 is normally not what people normally expect. */
841 charelem = find_element (ldfile, collate, cp++, 1);
842 }
843
844 if (charelem == NULL)
845 {
846 /* We ignore the rest of the line. */
847 lr_ignore_rest (ldfile, 0);
848 break;
849 }
850
851 /* Add the pointer. */
852 if (cnt >= max)
853 {
854 struct element_t **newp;
855 max += 10;
856 newp = (struct element_t **)
857 alloca (max * sizeof (struct element_t *));
858 memcpy (newp, weights, cnt * sizeof (struct element_t *));
859 weights = newp;
860 }
861 weights[cnt++] = charelem;
862 }
863 while (*cp != '\0');
864
865 /* Now store the information. */
866 elem->weights[weight_cnt].w = (struct element_t **)
867 obstack_alloc (&collate->mempool,
868 cnt * sizeof (struct element_t *));
869 memcpy (elem->weights[weight_cnt].w, weights,
870 cnt * sizeof (struct element_t *));
871 elem->weights[weight_cnt].cnt = cnt;
872
873 /* We don't need the string anymore. */
874 free (arg->val.str.startmb);
875 }
876 else if (ellipsis != tok_none
877 && (arg->tok == tok_ellipsis2
878 || arg->tok == tok_ellipsis3
879 || arg->tok == tok_ellipsis4))
880 {
881 /* It must be the same ellipsis as used in the initial column. */
882 if (arg->tok != ellipsis)
883 lr_error (ldfile, _("\
884%s: weights must use the same ellipsis symbol as the name"),
885 "LC_COLLATE");
886
887 /* The weight for this level will depend on the element
888 iterating over the range. Put a placeholder. */
889 elem->weights[weight_cnt].w = (struct element_t **)
890 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
891 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
892 elem->weights[weight_cnt].cnt = 1;
893 }
894 else
895 {
896 syntax:
897 /* It's a syntax error. */
898 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
899 lr_ignore_rest (ldfile, 0);
900 break;
901 }
902
903 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
904 /* This better should be the end of the line or a semicolon. */
905 if (arg->tok == tok_semicolon)
906 /* OK, ignore this and read the next token. */
907 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
908 else if (arg->tok != tok_eof && arg->tok != tok_eol)
909 {
910 /* It's a syntax error. */
911 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
912 lr_ignore_rest (ldfile, 0);
913 break;
914 }
915 }
916 while (++weight_cnt < nrules);
917
918 if (weight_cnt < nrules)
919 {
920 /* This means the rest of the line uses the current element as
921 the weight. */
922 do
923 {
924 elem->weights[weight_cnt].w = (struct element_t **)
925 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
926 if (ellipsis == tok_none)
927 elem->weights[weight_cnt].w[0] = elem;
928 else
929 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
930 elem->weights[weight_cnt].cnt = 1;
931 }
932 while (++weight_cnt < nrules);
933 }
934 else
935 {
936 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
937 {
938 /* Too many rule values. */
939 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
940 lr_ignore_rest (ldfile, 0);
941 }
942 else
943 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
944 }
945}
946
947
948static int
949insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
950 const struct charmap_t *charmap, struct repertoire_t *repertoire,
951 struct localedef_t *result)
952{
953 /* First find out what kind of symbol this is. */
954 struct charseq *seq;
955 uint32_t wc;
956 struct element_t *elem = NULL;
957 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
958
959 /* Try to find the character in the charmap. */
960 seq = charmap_find_value (charmap, symstr, symlen);
961
962 /* Determine the wide character. */
963 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
964 {
965 wc = repertoire_find_value (repertoire, symstr, symlen);
966 if (seq != NULL)
967 seq->ucs4 = wc;
968 }
969 else
970 wc = seq->ucs4;
971
972 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
973 {
974 /* It's no character, so look through the collation elements and
975 symbol list. */
976 void *ptr = elem;
977 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
978 {
979 void *result;
980 struct symbol_t *sym = NULL;
981
982 /* It's also collation element. Therefore it's either a
983 collating symbol or it's a character which is not
984 supported by the character set. In the later case we
985 simply create a dummy entry. */
986 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
987 {
988 /* It's a collation symbol. */
989 sym = (struct symbol_t *) result;
990
991 elem = sym->order;
992 }
993
994 if (elem == NULL)
995 {
996 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
997
998 if (sym != NULL)
999 sym->order = elem;
1000 else
1001 /* Enter a fake element in the sequence table. This
1002 won't cause anything in the output since there is
1003 no multibyte or wide character associated with
1004 it. */
1005 insert_entry (&collate->seq_table, symstr, symlen, elem);
1006 }
1007 }
1008 else
1009 /* Copy the result back. */
1010 elem = ptr;
1011 }
1012 else
1013 {
1014 /* Otherwise the symbols stands for a character. */
1015 void *ptr = elem;
1016 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1017 {
1018 uint32_t wcs[2] = { wc, 0 };
1019
1020 /* We have to allocate an entry. */
1021 elem = new_element (collate,
1022 seq != NULL ? (char *) seq->bytes : NULL,
1023 seq != NULL ? seq->nbytes : 0,
1024 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1025 symstr, symlen, 1);
1026
1027 /* And add it to the table. */
1028 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1029 /* This cannot happen. */
1030 assert (! "Internal error");
1031 }
1032 else
1033 {
1034 /* Copy the result back. */
1035 elem = ptr;
1036
1037 /* Maybe the character was used before the definition. In this case
1038 we have to insert the byte sequences now. */
1039 if (elem->mbs == NULL && seq != NULL)
1040 {
1041 elem->mbs = obstack_copy0 (&collate->mempool,
1042 seq->bytes, seq->nbytes);
1043 elem->nmbs = seq->nbytes;
1044 }
1045
1046 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1047 {
1048 uint32_t wcs[2] = { wc, 0 };
1049
1050 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1051 elem->nwcs = 1;
1052 }
1053 }
1054 }
1055
1056 /* Test whether this element is not already in the list. */
1057 if (elem->next != NULL || elem == collate->cursor)
1058 {
1059 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1060 (int) symlen, symstr, elem->file, elem->line);
1061 lr_ignore_rest (ldfile, 0);
1062 return 1;
1063 }
1064
1065 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1066
1067 return 0;
1068}
1069
1070
1071static void
1072handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1073 enum token_t ellipsis, const struct charmap_t *charmap,
1074 struct repertoire_t *repertoire,
1075 struct localedef_t *result)
1076{
1077 struct element_t *startp;
1078 struct element_t *endp;
1079 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1080
1081 /* Unlink the entry added for the ellipsis. */
1082 unlink_element (collate);
1083 startp = collate->cursor;
1084
1085 /* Process and add the end-entry. */
1086 if (symstr != NULL
1087 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1088 /* Something went wrong with inserting the to-value. This means
1089 we cannot process the ellipsis. */
1090 return;
1091
1092 /* Reset the cursor. */
1093 collate->cursor = startp;
1094
1095 /* Now we have to handle many different situations:
1096 - we have to distinguish between the three different ellipsis forms
1097 - the is the ellipsis at the beginning, in the middle, or at the end.
1098 */
1099 endp = collate->cursor->next;
1100 assert (symstr == NULL || endp != NULL);
1101
1102 /* XXX The following is probably very wrong since also collating symbols
1103 can appear in ranges. But do we want/can refine the test for that? */
1104#if 0
1105 /* Both, the start and the end symbol, must stand for characters. */
1106 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1107 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1108 {
1109 lr_error (ldfile, _("\
1110%s: the start and the end symbol of a range must stand for characters"),
1111 "LC_COLLATE");
1112 return;
1113 }
1114#endif
1115
1116 if (ellipsis == tok_ellipsis3)
1117 {
1118 /* One requirement we make here: the length of the byte
1119 sequences for the first and end character must be the same.
1120 This is mainly to prevent unwanted effects and this is often
1121 not what is wanted. */
1122 size_t len = (startp->mbs != NULL ? startp->nmbs
1123 : (endp->mbs != NULL ? endp->nmbs : 0));
1124 char mbcnt[len + 1];
1125 char mbend[len + 1];
1126
1127 /* Well, this should be caught somewhere else already. Just to
1128 make sure. */
1129 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1130 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1131
1132 if (startp != NULL && endp != NULL
1133 && startp->mbs != NULL && endp->mbs != NULL
1134 && startp->nmbs != endp->nmbs)
1135 {
1136 lr_error (ldfile, _("\
1137%s: byte sequences of first and last character must have the same length"),
1138 "LC_COLLATE");
1139 return;
1140 }
1141
1142 /* Determine whether we have to generate multibyte sequences. */
1143 if ((startp == NULL || startp->mbs != NULL)
1144 && (endp == NULL || endp->mbs != NULL))
1145 {
1146 int cnt;
1147 int ret;
1148
1149 /* Prepare the beginning byte sequence. This is either from the
1150 beginning byte sequence or it is all nulls if it was an
1151 initial ellipsis. */
1152 if (startp == NULL || startp->mbs == NULL)
1153 memset (mbcnt, '\0', len);
1154 else
1155 {
1156 memcpy (mbcnt, startp->mbs, len);
1157
1158 /* And increment it so that the value is the first one we will
1159 try to insert. */
1160 for (cnt = len - 1; cnt >= 0; --cnt)
1161 if (++mbcnt[cnt] != '\0')
1162 break;
1163 }
1164 mbcnt[len] = '\0';
1165
1166 /* And the end sequence. */
1167 if (endp == NULL || endp->mbs == NULL)
1168 memset (mbend, '\0', len);
1169 else
1170 memcpy (mbend, endp->mbs, len);
1171 mbend[len] = '\0';
1172
1173 /* Test whether we have a correct range. */
1174 ret = memcmp (mbcnt, mbend, len);
1175 if (ret >= 0)
1176 {
1177 if (ret > 0)
1178 lr_error (ldfile, _("%s: byte sequence of first character of \
1179range is not lower than that of the last character"), "LC_COLLATE");
1180 return;
1181 }
1182
1183 /* Generate the byte sequences data. */
1184 while (1)
1185 {
1186 struct charseq *seq;
1187
1188 /* Quite a bit of work ahead. We have to find the character
1189 definition for the byte sequence and then determine the
1190 wide character belonging to it. */
1191 seq = charmap_find_symbol (charmap, mbcnt, len);
1192 if (seq != NULL)
1193 {
1194 struct element_t *elem;
1195 size_t namelen;
1196
1197 /* I don't think this can ever happen. */
1198 assert (seq->name != NULL);
1199 namelen = strlen (seq->name);
1200
1201 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1202 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1203 namelen);
1204
1205 /* Now we are ready to insert the new value in the
1206 sequence. Find out whether the element is
1207 already known. */
1208 void *ptr;
1209 if (find_entry (&collate->seq_table, seq->name, namelen,
1210 &ptr) != 0)
1211 {
1212 uint32_t wcs[2] = { seq->ucs4, 0 };
1213
1214 /* We have to allocate an entry. */
1215 elem = new_element (collate, mbcnt, len,
1216 seq->ucs4 == ILLEGAL_CHAR_VALUE
1217 ? NULL : wcs, seq->name,
1218 namelen, 1);
1219
1220 /* And add it to the table. */
1221 if (insert_entry (&collate->seq_table, seq->name,
1222 namelen, elem) != 0)
1223 /* This cannot happen. */
1224 assert (! "Internal error");
1225 }
1226 else
1227 /* Copy the result. */
1228 elem = ptr;
1229
1230 /* Test whether this element is not already in the list. */
1231 if (elem->next != NULL || (collate->cursor != NULL
1232 && elem->next == collate->cursor))
1233 {
1234 lr_error (ldfile, _("\
1235order for `%.*s' already defined at %s:%Zu"),
1236 (int) namelen, seq->name,
1237 elem->file, elem->line);
1238 goto increment;
1239 }
1240
1241 /* Enqueue the new element. */
1242 elem->last = collate->cursor;
1243 if (collate->cursor == NULL)
1244 elem->next = NULL;
1245 else
1246 {
1247 elem->next = collate->cursor->next;
1248 elem->last->next = elem;
1249 if (elem->next != NULL)
1250 elem->next->last = elem;
1251 }
1252 if (collate->start == NULL)
1253 {
1254 assert (collate->cursor == NULL);
1255 collate->start = elem;
1256 }
1257 collate->cursor = elem;
1258
1259 /* Add the weight value. We take them from the
1260 `ellipsis_weights' member of `collate'. */
1261 elem->weights = (struct element_list_t *)
1262 obstack_alloc (&collate->mempool,
1263 nrules * sizeof (struct element_list_t));
1264 for (cnt = 0; cnt < nrules; ++cnt)
1265 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1266 && (collate->ellipsis_weight.weights[cnt].w[0]
1267 == ELEMENT_ELLIPSIS2))
1268 {
1269 elem->weights[cnt].w = (struct element_t **)
1270 obstack_alloc (&collate->mempool,
1271 sizeof (struct element_t *));
1272 elem->weights[cnt].w[0] = elem;
1273 elem->weights[cnt].cnt = 1;
1274 }
1275 else
1276 {
1277 /* Simply use the weight from `ellipsis_weight'. */
1278 elem->weights[cnt].w =
1279 collate->ellipsis_weight.weights[cnt].w;
1280 elem->weights[cnt].cnt =
1281 collate->ellipsis_weight.weights[cnt].cnt;
1282 }
1283 }
1284
1285 /* Increment for the next round. */
1286 increment:
1287 for (cnt = len - 1; cnt >= 0; --cnt)
1288 if (++mbcnt[cnt] != '\0')
1289 break;
1290
1291 /* Find out whether this was all. */
1292 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1293 /* Yep, that's all. */
1294 break;
1295 }
1296 }
1297 }
1298 else
1299 {
1300 /* For symbolic range we naturally must have a beginning and an
1301 end specified by the user. */
1302 if (startp == NULL)
1303 lr_error (ldfile, _("\
1304%s: symbolic range ellipsis must not directly follow `order_start'"),
1305 "LC_COLLATE");
1306 else if (endp == NULL)
1307 lr_error (ldfile, _("\
1308%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1309 "LC_COLLATE");
1310 else
1311 {
1312 /* Determine the range. To do so we have to determine the
1313 common prefix of the both names and then the numeric
1314 values of both ends. */
1315 size_t lenfrom = strlen (startp->name);
1316 size_t lento = strlen (endp->name);
1317 char buf[lento + 1];
1318 int preflen = 0;
1319 long int from;
1320 long int to;
1321 char *cp;
1322 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1323
1324 if (lenfrom != lento)
1325 {
1326 invalid_range:
1327 lr_error (ldfile, _("\
1328`%s' and `%.*s' are not valid names for symbolic range"),
1329 startp->name, (int) lento, endp->name);
1330 return;
1331 }
1332
1333 while (startp->name[preflen] == endp->name[preflen])
1334 if (startp->name[preflen] == '\0')
1335 /* Nothing to be done. The start and end point are identical
1336 and while inserting the end point we have already given
1337 the user an error message. */
1338 return;
1339 else
1340 ++preflen;
1341
1342 errno = 0;
1343 from = strtol (startp->name + preflen, &cp, base);
1344 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1345 goto invalid_range;
1346
1347 errno = 0;
1348 to = strtol (endp->name + preflen, &cp, base);
1349 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1350 goto invalid_range;
1351
1352 /* Copy the prefix. */
1353 memcpy (buf, startp->name, preflen);
1354
1355 /* Loop over all values. */
1356 for (++from; from < to; ++from)
1357 {
1358 struct element_t *elem = NULL;
1359 struct charseq *seq;
1360 uint32_t wc;
1361 int cnt;
1362
1363 /* Generate the name. */
1364 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1365 (int) (lenfrom - preflen), from);
1366
1367 /* Look whether this name is already defined. */
1368 void *ptr;
1369 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1370 {
1371 /* Copy back the result. */
1372 elem = ptr;
1373
1374 if (elem->next != NULL || (collate->cursor != NULL
1375 && elem->next == collate->cursor))
1376 {
1377 lr_error (ldfile, _("\
1378%s: order for `%.*s' already defined at %s:%Zu"),
1379 "LC_COLLATE", (int) lenfrom, buf,
1380 elem->file, elem->line);
1381 continue;
1382 }
1383
1384 if (elem->name == NULL)
1385 {
1386 lr_error (ldfile, _("%s: `%s' must be a character"),
1387 "LC_COLLATE", buf);
1388 continue;
1389 }
1390 }
1391
1392 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1393 {
1394 /* Search for a character of this name. */
1395 seq = charmap_find_value (charmap, buf, lenfrom);
1396 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1397 {
1398 wc = repertoire_find_value (repertoire, buf, lenfrom);
1399
1400 if (seq != NULL)
1401 seq->ucs4 = wc;
1402 }
1403 else
1404 wc = seq->ucs4;
1405
1406 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1407 /* We don't know anything about a character with this
1408 name. XXX Should we warn? */
1409 continue;
1410
1411 if (elem == NULL)
1412 {
1413 uint32_t wcs[2] = { wc, 0 };
1414
1415 /* We have to allocate an entry. */
1416 elem = new_element (collate,
1417 seq != NULL
1418 ? (char *) seq->bytes : NULL,
1419 seq != NULL ? seq->nbytes : 0,
1420 wc == ILLEGAL_CHAR_VALUE
1421 ? NULL : wcs, buf, lenfrom, 1);
1422 }
1423 else
1424 {
1425 /* Update the element. */
1426 if (seq != NULL)
1427 {
1428 elem->mbs = obstack_copy0 (&collate->mempool,
1429 seq->bytes, seq->nbytes);
1430 elem->nmbs = seq->nbytes;
1431 }
1432
1433 if (wc != ILLEGAL_CHAR_VALUE)
1434 {
1435 uint32_t zero = 0;
1436
1437 obstack_grow (&collate->mempool,
1438 &wc, sizeof (uint32_t));
1439 obstack_grow (&collate->mempool,
1440 &zero, sizeof (uint32_t));
1441 elem->wcs = obstack_finish (&collate->mempool);
1442 elem->nwcs = 1;
1443 }
1444 }
1445
1446 elem->file = ldfile->fname;
1447 elem->line = ldfile->lineno;
1448 elem->section = collate->current_section;
1449 }
1450
1451 /* Enqueue the new element. */
1452 elem->last = collate->cursor;
1453 elem->next = collate->cursor->next;
1454 elem->last->next = elem;
1455 if (elem->next != NULL)
1456 elem->next->last = elem;
1457 collate->cursor = elem;
1458
1459 /* Now add the weights. They come from the `ellipsis_weights'
1460 member of `collate'. */
1461 elem->weights = (struct element_list_t *)
1462 obstack_alloc (&collate->mempool,
1463 nrules * sizeof (struct element_list_t));
1464 for (cnt = 0; cnt < nrules; ++cnt)
1465 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1466 && (collate->ellipsis_weight.weights[cnt].w[0]
1467 == ELEMENT_ELLIPSIS2))
1468 {
1469 elem->weights[cnt].w = (struct element_t **)
1470 obstack_alloc (&collate->mempool,
1471 sizeof (struct element_t *));
1472 elem->weights[cnt].w[0] = elem;
1473 elem->weights[cnt].cnt = 1;
1474 }
1475 else
1476 {
1477 /* Simly use the weight from `ellipsis_weight'. */
1478 elem->weights[cnt].w =
1479 collate->ellipsis_weight.weights[cnt].w;
1480 elem->weights[cnt].cnt =
1481 collate->ellipsis_weight.weights[cnt].cnt;
1482 }
1483 }
1484 }
1485 }
1486 /* Move the cursor to the last entry in the ellipsis.
1487 Subsequent operations need to start from the last entry. */
1488 collate->cursor = endp;
1489}
1490
1491
1492static void
1493collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1494 struct localedef_t *copy_locale, int ignore_content)
1495{
1496 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1497 {
1498 struct locale_collate_t *collate;
1499
1500 if (copy_locale == NULL)
1501 {
1502 collate = locale->categories[LC_COLLATE].collate =
1503 (struct locale_collate_t *)
1504 xcalloc (1, sizeof (struct locale_collate_t));
1505
1506 /* Init the various data structures. */
1507 init_hash (&collate->elem_table, 100);
1508 init_hash (&collate->sym_table, 100);
1509 init_hash (&collate->seq_table, 500);
1510 obstack_init (&collate->mempool);
1511
1512 collate->col_weight_max = -1;
1513 }
1514 else
1515 /* Reuse the copy_locale's data structures. */
1516 collate = locale->categories[LC_COLLATE].collate =
1517 copy_locale->categories[LC_COLLATE].collate;
1518 }
1519
1520 ldfile->translate_strings = 0;
1521 ldfile->return_widestr = 0;
1522}
1523
1524
1525void
1526collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1527{
1528 /* Now is the time when we can assign the individual collation
1529 values for all the symbols. We have possibly different values
1530 for the wide- and the multibyte-character symbols. This is done
1531 since it might make a difference in the encoding if there is in
1532 some cases no multibyte-character but there are wide-characters.
1533 (The other way around it is not important since theencoded
1534 collation value in the wide-character case is 32 bits wide and
1535 therefore requires no encoding).
1536
1537 The lowest collation value assigned is 2. Zero is reserved for
1538 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1539 functions and 1 is used to separate the individual passes for the
1540 different rules.
1541
1542 We also have to construct is list with all the bytes/words which
1543 can come first in a sequence, followed by all the elements which
1544 also start with this byte/word. The order is reverse which has
1545 among others the important effect that longer strings are located
1546 first in the list. This is required for the output data since
1547 the algorithm used in `strcoll' etc depends on this.
1548
1549 The multibyte case is easy. We simply sort into an array with
1550 256 elements. */
1551 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1552 int mbact[nrules];
1553 int wcact;
1554 int mbseqact;
1555 int wcseqact;
1556 struct element_t *runp;
1557 int i;
1558 int need_undefined = 0;
1559 struct section_list *sect;
1560 int ruleidx;
1561 int nr_wide_elems = 0;
1562
1563 if (collate == NULL)
1564 {
1565 /* No data, no check. Issue a warning. */
1566 record_warning (_("No definition for %s category found"),
1567 "LC_COLLATE");
1568 return;
1569 }
1570
1571 /* If this assertion is hit change the type in `element_t'. */
1572 assert (nrules <= sizeof (runp->used_in_level) * 8);
1573
1574 /* Make sure that the `position' rule is used either in all sections
1575 or in none. */
1576 for (i = 0; i < nrules; ++i)
1577 for (sect = collate->sections; sect != NULL; sect = sect->next)
1578 if (sect != collate->current_section
1579 && sect->rules != NULL
1580 && ((sect->rules[i] & sort_position)
1581 != (collate->current_section->rules[i] & sort_position)))
1582 {
1583 record_error (0, 0, _("\
1584%s: `position' must be used for a specific level in all sections or none"),
1585 "LC_COLLATE");
1586 break;
1587 }
1588
1589 /* Find out which elements are used at which level. At the same
1590 time we find out whether we have any undefined symbols. */
1591 runp = collate->start;
1592 while (runp != NULL)
1593 {
1594 if (runp->mbs != NULL)
1595 {
1596 for (i = 0; i < nrules; ++i)
1597 {
1598 int j;
1599
1600 for (j = 0; j < runp->weights[i].cnt; ++j)
1601 /* A NULL pointer as the weight means IGNORE. */
1602 if (runp->weights[i].w[j] != NULL)
1603 {
1604 if (runp->weights[i].w[j]->weights == NULL)
1605 {
1606 record_error_at_line (0, 0, runp->file, runp->line,
1607 _("symbol `%s' not defined"),
1608 runp->weights[i].w[j]->name);
1609
1610 need_undefined = 1;
1611 runp->weights[i].w[j] = &collate->undefined;
1612 }
1613 else
1614 /* Set the bit for the level. */
1615 runp->weights[i].w[j]->used_in_level |= 1 << i;
1616 }
1617 }
1618 }
1619
1620 /* Up to the next entry. */
1621 runp = runp->next;
1622 }
1623
1624 /* Walk through the list of defined sequences and assign weights. Also
1625 create the data structure which will allow generating the single byte
1626 character based tables.
1627
1628 Since at each time only the weights for each of the rules are
1629 only compared to other weights for this rule it is possible to
1630 assign more compact weight values than simply counting all
1631 weights in sequence. We can assign weights from 3, one for each
1632 rule individually and only for those elements, which are actually
1633 used for this rule.
1634
1635 Why is this important? It is not for the wide char table. But
1636 it is for the singlebyte output since here larger numbers have to
1637 be encoded to make it possible to emit the value as a byte
1638 string. */
1639 for (i = 0; i < nrules; ++i)
1640 mbact[i] = 2;
1641 wcact = 2;
1642 mbseqact = 0;
1643 wcseqact = 0;
1644 runp = collate->start;
1645 while (runp != NULL)
1646 {
1647 /* Determine the order. */
1648 if (runp->used_in_level != 0)
1649 {
1650 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1651 nrules * sizeof (int));
1652
1653 for (i = 0; i < nrules; ++i)
1654 if ((runp->used_in_level & (1 << i)) != 0)
1655 runp->mborder[i] = mbact[i]++;
1656 else
1657 runp->mborder[i] = 0;
1658 }
1659
1660 if (runp->mbs != NULL)
1661 {
1662 struct element_t **eptr;
1663 struct element_t *lastp = NULL;
1664
1665 /* Find the point where to insert in the list. */
1666 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1667 while (*eptr != NULL)
1668 {
1669 if ((*eptr)->nmbs < runp->nmbs)
1670 break;
1671
1672 if ((*eptr)->nmbs == runp->nmbs)
1673 {
1674 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1675
1676 if (c == 0)
1677 {
1678 /* This should not happen. It means that we have
1679 to symbols with the same byte sequence. It is
1680 of course an error. */
1681 record_error_at_line (0, 0, (*eptr)->file,
1682 (*eptr)->line,
1683 _("\
1684symbol `%s' has the same encoding as"), (*eptr)->name);
1685
1686 record_error_at_line (0, 0, runp->file, runp->line,
1687 _("symbol `%s'"), runp->name);
1688 goto dont_insert;
1689 }
1690 else if (c < 0)
1691 /* Insert it here. */
1692 break;
1693 }
1694
1695 /* To the next entry. */
1696 lastp = *eptr;
1697 eptr = &(*eptr)->mbnext;
1698 }
1699
1700 /* Set the pointers. */
1701 runp->mbnext = *eptr;
1702 runp->mblast = lastp;
1703 if (*eptr != NULL)
1704 (*eptr)->mblast = runp;
1705 *eptr = runp;
1706 dont_insert:
1707 ;
1708 }
1709
1710 if (runp->used_in_level)
1711 {
1712 runp->wcorder = wcact++;
1713
1714 /* We take the opportunity to count the elements which have
1715 wide characters. */
1716 ++nr_wide_elems;
1717 }
1718
1719 if (runp->is_character)
1720 {
1721 if (runp->nmbs == 1)
1722 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1723
1724 runp->wcseqorder = wcseqact++;
1725 }
1726 else if (runp->mbs != NULL && runp->weights != NULL)
1727 /* This is for collation elements. */
1728 runp->wcseqorder = wcseqact++;
1729
1730 /* Up to the next entry. */
1731 runp = runp->next;
1732 }
1733
1734 /* Find out whether any of the `mbheads' entries is unset. In this
1735 case we use the UNDEFINED entry. */
1736 for (i = 1; i < 256; ++i)
1737 if (collate->mbheads[i] == NULL)
1738 {
1739 need_undefined = 1;
1740 collate->mbheads[i] = &collate->undefined;
1741 }
1742
1743 /* Now to the wide character case. */
1744 collate->wcheads.p = 6;
1745 collate->wcheads.q = 10;
1746 wchead_table_init (&collate->wcheads);
1747
1748 collate->wcseqorder.p = 6;
1749 collate->wcseqorder.q = 10;
1750 collseq_table_init (&collate->wcseqorder);
1751
1752 /* Start adding. */
1753 runp = collate->start;
1754 while (runp != NULL)
1755 {
1756 if (runp->wcs != NULL)
1757 {
1758 struct element_t *e;
1759 struct element_t **eptr;
1760 struct element_t *lastp;
1761
1762 /* Insert the collation sequence value. */
1763 if (runp->is_character)
1764 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1765 runp->wcseqorder);
1766
1767 /* Find the point where to insert in the list. */
1768 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1769 eptr = &e;
1770 lastp = NULL;
1771 while (*eptr != NULL)
1772 {
1773 if ((*eptr)->nwcs < runp->nwcs)
1774 break;
1775
1776 if ((*eptr)->nwcs == runp->nwcs)
1777 {
1778 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1779 (wchar_t *) runp->wcs, runp->nwcs);
1780
1781 if (c == 0)
1782 {
1783 /* This should not happen. It means that we have
1784 two symbols with the same byte sequence. It is
1785 of course an error. */
1786 record_error_at_line (0, 0, (*eptr)->file,
1787 (*eptr)->line,
1788 _("\
1789symbol `%s' has the same encoding as"), (*eptr)->name);
1790
1791 record_error_at_line (0, 0, runp->file, runp->line,
1792 _("symbol `%s'"), runp->name);
1793 goto dont_insertwc;
1794 }
1795 else if (c < 0)
1796 /* Insert it here. */
1797 break;
1798 }
1799
1800 /* To the next entry. */
1801 lastp = *eptr;
1802 eptr = &(*eptr)->wcnext;
1803 }
1804
1805 /* Set the pointers. */
1806 runp->wcnext = *eptr;
1807 runp->wclast = lastp;
1808 if (*eptr != NULL)
1809 (*eptr)->wclast = runp;
1810 *eptr = runp;
1811 if (eptr == &e)
1812 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1813 dont_insertwc:
1814 ;
1815 }
1816
1817 /* Up to the next entry. */
1818 runp = runp->next;
1819 }
1820
1821 /* Now determine whether the UNDEFINED entry is needed and if yes,
1822 whether it was defined. */
1823 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1824 if (collate->undefined.file == NULL)
1825 {
1826 if (need_undefined)
1827 {
1828 /* This seems not to be enforced by recent standards. Don't
1829 emit an error, simply append UNDEFINED at the end. */
1830 collate->undefined.mborder =
1831 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1832
1833 for (i = 0; i < nrules; ++i)
1834 collate->undefined.mborder[i] = mbact[i]++;
1835 }
1836
1837 /* In any case we will need the definition for the wide character
1838 case. But we will not complain that it is missing since the
1839 specification strangely enough does not seem to account for
1840 this. */
1841 collate->undefined.wcorder = wcact++;
1842 }
1843
1844 /* Finally, try to unify the rules for the sections. Whenever the rules
1845 for a section are the same as those for another section give the
1846 ruleset the same index. Since there are never many section we can
1847 use an O(n^2) algorithm here. */
1848 sect = collate->sections;
1849 while (sect != NULL && sect->rules == NULL)
1850 sect = sect->next;
1851
1852 /* Bail out if we have no sections because of earlier errors. */
1853 if (sect == NULL)
1854 {
1855 record_error (EXIT_FAILURE, 0, _("too many errors; giving up"));
1856 return;
1857 }
1858
1859 ruleidx = 0;
1860 do
1861 {
1862 struct section_list *osect = collate->sections;
1863
1864 while (osect != sect)
1865 if (osect->rules != NULL
1866 && memcmp (osect->rules, sect->rules,
1867 nrules * sizeof (osect->rules[0])) == 0)
1868 break;
1869 else
1870 osect = osect->next;
1871
1872 if (osect == sect)
1873 sect->ruleidx = ruleidx++;
1874 else
1875 sect->ruleidx = osect->ruleidx;
1876
1877 /* Next section. */
1878 do
1879 sect = sect->next;
1880 while (sect != NULL && sect->rules == NULL);
1881 }
1882 while (sect != NULL);
1883 /* We are currently not prepared for more than 128 rulesets. But this
1884 should never really be a problem. */
1885 assert (ruleidx <= 128);
1886}
1887
1888
1889static int32_t
1890output_weight (struct obstack *pool, struct locale_collate_t *collate,
1891 struct element_t *elem)
1892{
1893 size_t cnt;
1894 int32_t retval;
1895
1896 /* Optimize the use of UNDEFINED. */
1897 if (elem == &collate->undefined)
1898 /* The weights are already inserted. */
1899 return 0;
1900
1901 /* This byte can start exactly one collation element and this is
1902 a single byte. We can directly give the index to the weights. */
1903 retval = obstack_object_size (pool);
1904
1905 /* Construct the weight. */
1906 for (cnt = 0; cnt < nrules; ++cnt)
1907 {
1908 char buf[elem->weights[cnt].cnt * 7];
1909 int len = 0;
1910 int i;
1911
1912 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1913 /* Encode the weight value. We do nothing for IGNORE entries. */
1914 if (elem->weights[cnt].w[i] != NULL)
1915 len += utf8_encode (&buf[len],
1916 elem->weights[cnt].w[i]->mborder[cnt]);
1917
1918 /* And add the buffer content. */
1919 obstack_1grow (pool, len);
1920 obstack_grow (pool, buf, len);
1921 }
1922
1923 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1924}
1925
1926
1927static int32_t
1928output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1929 struct element_t *elem)
1930{
1931 size_t cnt;
1932 int32_t retval;
1933
1934 /* Optimize the use of UNDEFINED. */
1935 if (elem == &collate->undefined)
1936 /* The weights are already inserted. */
1937 return 0;
1938
1939 /* This byte can start exactly one collation element and this is
1940 a single byte. We can directly give the index to the weights. */
1941 retval = obstack_object_size (pool) / sizeof (int32_t);
1942
1943 /* Construct the weight. */
1944 for (cnt = 0; cnt < nrules; ++cnt)
1945 {
1946 int32_t buf[elem->weights[cnt].cnt];
1947 int i;
1948 int32_t j;
1949
1950 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1951 if (elem->weights[cnt].w[i] != NULL)
1952 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1953
1954 /* And add the buffer content. */
1955 obstack_int32_grow (pool, j);
1956
1957 obstack_grow (pool, buf, j * sizeof (int32_t));
1958 maybe_swap_uint32_obstack (pool, j);
1959 }
1960
1961 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1962}
1963
1964/* If localedef is every threaded, this would need to be __thread var. */
1965static struct
1966{
1967 struct obstack *weightpool;
1968 struct obstack *extrapool;
1969 struct obstack *indpool;
1970 struct locale_collate_t *collate;
1971 struct collidx_table *tablewc;
1972} atwc;
1973
1974static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1975
1976static void
1977add_to_tablewc (uint32_t ch, struct element_t *runp)
1978{
1979 if (runp->wcnext == NULL && runp->nwcs == 1)
1980 {
1981 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1982 runp);
1983 collidx_table_add (atwc.tablewc, ch, weigthidx);
1984 }
1985 else
1986 {
1987 /* As for the singlebyte table, we recognize sequences and
1988 compress them. */
1989
1990 collidx_table_add (atwc.tablewc, ch,
1991 -(obstack_object_size (atwc.extrapool)
1992 / sizeof (uint32_t)));
1993
1994 do
1995 {
1996 /* Store the current index in the weight table. We know that
1997 the current position in the `extrapool' is aligned on a
1998 32-bit address. */
1999 int32_t weightidx;
2000 int added;
2001
2002 /* Find out wether this is a single entry or we have more than
2003 one consecutive entry. */
2004 if (runp->wcnext != NULL
2005 && runp->nwcs == runp->wcnext->nwcs
2006 && wmemcmp ((wchar_t *) runp->wcs,
2007 (wchar_t *)runp->wcnext->wcs,
2008 runp->nwcs - 1) == 0
2009 && (runp->wcs[runp->nwcs - 1]
2010 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2011 {
2012 int i;
2013 struct element_t *series_startp = runp;
2014 struct element_t *curp;
2015
2016 /* Now add first the initial byte sequence. */
2017 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2018 if (sizeof (int32_t) == sizeof (int))
2019 obstack_make_room (atwc.extrapool, added);
2020
2021 /* More than one consecutive entry. We mark this by having
2022 a negative index into the indirect table. */
2023 obstack_int32_grow_fast (atwc.extrapool,
2024 -(obstack_object_size (atwc.indpool)
2025 / sizeof (int32_t)));
2026 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2027
2028 do
2029 runp = runp->wcnext;
2030 while (runp->wcnext != NULL
2031 && runp->nwcs == runp->wcnext->nwcs
2032 && wmemcmp ((wchar_t *) runp->wcs,
2033 (wchar_t *)runp->wcnext->wcs,
2034 runp->nwcs - 1) == 0
2035 && (runp->wcs[runp->nwcs - 1]
2036 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2037
2038 /* Now walk backward from here to the beginning. */
2039 curp = runp;
2040
2041 for (i = 1; i < runp->nwcs; ++i)
2042 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2043
2044 /* Now find the end of the consecutive sequence and
2045 add all the indices in the indirect pool. */
2046 do
2047 {
2048 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2049 curp);
2050 obstack_int32_grow (atwc.indpool, weightidx);
2051
2052 curp = curp->wclast;
2053 }
2054 while (curp != series_startp);
2055
2056 /* Add the final weight. */
2057 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2058 curp);
2059 obstack_int32_grow (atwc.indpool, weightidx);
2060
2061 /* And add the end byte sequence. Without length this
2062 time. */
2063 for (i = 1; i < curp->nwcs; ++i)
2064 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2065 }
2066 else
2067 {
2068 /* A single entry. Simply add the index and the length and
2069 string (except for the first character which is already
2070 tested for). */
2071 int i;
2072
2073 /* Output the weight info. */
2074 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2075 runp);
2076
2077 assert (runp->nwcs > 0);
2078 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2079 if (sizeof (int) == sizeof (int32_t))
2080 obstack_make_room (atwc.extrapool, added);
2081
2082 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2083 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2084 for (i = 1; i < runp->nwcs; ++i)
2085 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2086 }
2087
2088 /* Next entry. */
2089 runp = runp->wcnext;
2090 }
2091 while (runp != NULL);
2092 }
2093}
2094
2095void
2096collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2097 const char *output_path)
2098{
2099 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2100 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2101 struct locale_file file;
2102 size_t ch;
2103 int32_t tablemb[256];
2104 struct obstack weightpool;
2105 struct obstack extrapool;
2106 struct obstack indirectpool;
2107 struct section_list *sect;
2108 struct collidx_table tablewc;
2109 uint32_t elem_size;
2110 uint32_t *elem_table;
2111 int i;
2112 struct element_t *runp;
2113
2114 init_locale_data (&file, nelems);
2115 add_locale_uint32 (&file, nrules);
2116
2117 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2118 if (collate == NULL)
2119 {
2120 size_t idx;
2121 for (idx = 1; idx < nelems; idx++)
2122 {
2123 /* The words have to be handled specially. */
2124 if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2125 add_locale_uint32 (&file, 0);
2126 else
2127 add_locale_empty (&file);
2128 }
2129 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2130 return;
2131 }
2132
2133 obstack_init (&weightpool);
2134 obstack_init (&extrapool);
2135 obstack_init (&indirectpool);
2136
2137 /* Since we are using the sign of an integer to mark indirection the
2138 offsets in the arrays we are indirectly referring to must not be
2139 zero since -0 == 0. Therefore we add a bit of dummy content. */
2140 obstack_int32_grow (&extrapool, 0);
2141 obstack_int32_grow (&indirectpool, 0);
2142
2143 /* Prepare the ruleset table. */
2144 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2145 if (sect->rules != NULL && sect->ruleidx == i)
2146 {
2147 int j;
2148
2149 obstack_make_room (&weightpool, nrules);
2150
2151 for (j = 0; j < nrules; ++j)
2152 obstack_1grow_fast (&weightpool, sect->rules[j]);
2153 ++i;
2154 }
2155 /* And align the output. */
2156 i = (nrules * i) % LOCFILE_ALIGN;
2157 if (i > 0)
2158 do
2159 obstack_1grow (&weightpool, '\0');
2160 while (++i < LOCFILE_ALIGN);
2161
2162 add_locale_raw_obstack (&file, &weightpool);
2163
2164 /* Generate the 8-bit table. Walk through the lists of sequences
2165 starting with the same byte and add them one after the other to
2166 the table. In case we have more than one sequence starting with
2167 the same byte we have to use extra indirection.
2168
2169 First add a record for the NUL byte. This entry will never be used
2170 so it does not matter. */
2171 tablemb[0] = 0;
2172
2173 /* Now insert the `UNDEFINED' value if it is used. Since this value
2174 will probably be used more than once it is good to store the
2175 weights only once. */
2176 if (collate->undefined.used_in_level != 0)
2177 output_weight (&weightpool, collate, &collate->undefined);
2178
2179 for (ch = 1; ch < 256; ++ch)
2180 if (collate->mbheads[ch]->mbnext == NULL
2181 && collate->mbheads[ch]->nmbs <= 1)
2182 {
2183 tablemb[ch] = output_weight (&weightpool, collate,
2184 collate->mbheads[ch]);
2185 }
2186 else
2187 {
2188 /* The entries in the list are sorted by length and then
2189 alphabetically. This is the order in which we will add the
2190 elements to the collation table. This allows simply walking
2191 the table in sequence and stopping at the first matching
2192 entry. Since the longer sequences are coming first in the
2193 list they have the possibility to match first, just as it
2194 has to be. In the worst case we are walking to the end of
2195 the list where we put, if no singlebyte sequence is defined
2196 in the locale definition, the weights for UNDEFINED.
2197
2198 To reduce the length of the search list we compress them a bit.
2199 This happens by collecting sequences of consecutive byte
2200 sequences in one entry (having and begin and end byte sequence)
2201 and add only one index into the weight table. We can find the
2202 consecutive entries since they are also consecutive in the list. */
2203 struct element_t *runp = collate->mbheads[ch];
2204 struct element_t *lastp;
2205
2206 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2207
2208 tablemb[ch] = -obstack_object_size (&extrapool);
2209
2210 do
2211 {
2212 /* Store the current index in the weight table. We know that
2213 the current position in the `extrapool' is aligned on a
2214 32-bit address. */
2215 int32_t weightidx;
2216 int added;
2217
2218 /* Find out wether this is a single entry or we have more than
2219 one consecutive entry. */
2220 if (runp->mbnext != NULL
2221 && runp->nmbs == runp->mbnext->nmbs
2222 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2223 && (runp->mbs[runp->nmbs - 1]
2224 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2225 {
2226 int i;
2227 struct element_t *series_startp = runp;
2228 struct element_t *curp;
2229
2230 /* Compute how much space we will need. */
2231 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2232 + 2 * (runp->nmbs - 1));
2233 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2234 obstack_make_room (&extrapool, added);
2235
2236 /* More than one consecutive entry. We mark this by having
2237 a negative index into the indirect table. */
2238 obstack_int32_grow_fast (&extrapool,
2239 -(obstack_object_size (&indirectpool)
2240 / sizeof (int32_t)));
2241
2242 /* Now search first the end of the series. */
2243 do
2244 runp = runp->mbnext;
2245 while (runp->mbnext != NULL
2246 && runp->nmbs == runp->mbnext->nmbs
2247 && memcmp (runp->mbs, runp->mbnext->mbs,
2248 runp->nmbs - 1) == 0
2249 && (runp->mbs[runp->nmbs - 1]
2250 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2251
2252 /* Now walk backward from here to the beginning. */
2253 curp = runp;
2254
2255 assert (runp->nmbs <= 256);
2256 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2257 for (i = 1; i < curp->nmbs; ++i)
2258 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2259
2260 /* Now find the end of the consecutive sequence and
2261 add all the indices in the indirect pool. */
2262 do
2263 {
2264 weightidx = output_weight (&weightpool, collate, curp);
2265 obstack_int32_grow (&indirectpool, weightidx);
2266
2267 curp = curp->mblast;
2268 }
2269 while (curp != series_startp);
2270
2271 /* Add the final weight. */
2272 weightidx = output_weight (&weightpool, collate, curp);
2273 obstack_int32_grow (&indirectpool, weightidx);
2274
2275 /* And add the end byte sequence. Without length this
2276 time. */
2277 for (i = 1; i < curp->nmbs; ++i)
2278 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2279 }
2280 else
2281 {
2282 /* A single entry. Simply add the index and the length and
2283 string (except for the first character which is already
2284 tested for). */
2285 int i;
2286
2287 /* Output the weight info. */
2288 weightidx = output_weight (&weightpool, collate, runp);
2289
2290 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2291 + runp->nmbs - 1);
2292 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2293 obstack_make_room (&extrapool, added);
2294
2295 obstack_int32_grow_fast (&extrapool, weightidx);
2296 assert (runp->nmbs <= 256);
2297 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2298
2299 for (i = 1; i < runp->nmbs; ++i)
2300 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2301 }
2302
2303 /* Add alignment bytes if necessary. */
2304 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2305 obstack_1grow_fast (&extrapool, '\0');
2306
2307 /* Next entry. */
2308 lastp = runp;
2309 runp = runp->mbnext;
2310 }
2311 while (runp != NULL);
2312
2313 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2314
2315 /* If the final entry in the list is not a single character we
2316 add an UNDEFINED entry here. */
2317 if (lastp->nmbs != 1)
2318 {
2319 int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2320 obstack_make_room (&extrapool, added);
2321
2322 obstack_int32_grow_fast (&extrapool, 0);
2323 /* XXX What rule? We just pick the first. */
2324 obstack_1grow_fast (&extrapool, 0);
2325 /* Length is zero. */
2326 obstack_1grow_fast (&extrapool, 0);
2327
2328 /* Add alignment bytes if necessary. */
2329 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2330 obstack_1grow_fast (&extrapool, '\0');
2331 }
2332 }
2333
2334 /* Add padding to the tables if necessary. */
2335 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2336 obstack_1grow (&weightpool, 0);
2337
2338 /* Now add the four tables. */
2339 add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2340 add_locale_raw_obstack (&file, &weightpool);
2341 add_locale_raw_obstack (&file, &extrapool);
2342 add_locale_raw_obstack (&file, &indirectpool);
2343
2344 /* Now the same for the wide character table. We need to store some
2345 more information here. */
2346 add_locale_empty (&file);
2347 add_locale_empty (&file);
2348 add_locale_empty (&file);
2349
2350 /* Since we are using the sign of an integer to mark indirection the
2351 offsets in the arrays we are indirectly referring to must not be
2352 zero since -0 == 0. Therefore we add a bit of dummy content. */
2353 obstack_int32_grow (&extrapool, 0);
2354 obstack_int32_grow (&indirectpool, 0);
2355
2356 /* Now insert the `UNDEFINED' value if it is used. Since this value
2357 will probably be used more than once it is good to store the
2358 weights only once. */
2359 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2360 abort ();
2361
2362 /* Generate the table. Walk through the lists of sequences starting
2363 with the same wide character and add them one after the other to
2364 the table. In case we have more than one sequence starting with
2365 the same byte we have to use extra indirection. */
2366 tablewc.p = 6;
2367 tablewc.q = 10;
2368 collidx_table_init (&tablewc);
2369
2370 atwc.weightpool = &weightpool;
2371 atwc.extrapool = &extrapool;
2372 atwc.indpool = &indirectpool;
2373 atwc.collate = collate;
2374 atwc.tablewc = &tablewc;
2375
2376 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2377
2378 memset (&atwc, 0, sizeof (atwc));
2379
2380 /* Now add the four tables. */
2381 add_locale_collidx_table (&file, &tablewc);
2382 add_locale_raw_obstack (&file, &weightpool);
2383 add_locale_raw_obstack (&file, &extrapool);
2384 add_locale_raw_obstack (&file, &indirectpool);
2385
2386 /* Finally write the table with collation element names out. It is
2387 a hash table with a simple function which gets the name of the
2388 character as the input. One character might have many names. The
2389 value associated with the name is an index into the weight table
2390 where we are then interested in the first-level weight value.
2391
2392 To determine how large the table should be we are counting the
2393 elements have to put in. Since we are using internal chaining
2394 using a secondary hash function we have to make the table a bit
2395 larger to avoid extremely long search times. We can achieve
2396 good results with a 40% larger table than there are entries. */
2397 elem_size = 0;
2398 runp = collate->start;
2399 while (runp != NULL)
2400 {
2401 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2402 /* Yep, the element really counts. */
2403 ++elem_size;
2404
2405 runp = runp->next;
2406 }
2407 /* Add 50% and find the next prime number. */
2408 elem_size = next_prime (elem_size + (elem_size >> 1));
2409
2410 /* Allocate the table. Each entry consists of two words: the hash
2411 value and an index in a secondary table which provides the index
2412 into the weight table and the string itself (so that a match can
2413 be determined). */
2414 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2415 elem_size * 2 * sizeof (uint32_t));
2416 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2417
2418 /* Now add the elements. */
2419 runp = collate->start;
2420 while (runp != NULL)
2421 {
2422 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2423 {
2424 /* Compute the hash value of the name. */
2425 uint32_t namelen = strlen (runp->name);
2426 uint32_t hash = elem_hash (runp->name, namelen);
2427 size_t idx = hash % elem_size;
2428#ifndef NDEBUG
2429 size_t start_idx = idx;
2430#endif
2431
2432 if (elem_table[idx * 2] != 0)
2433 {
2434 /* The spot is already taken. Try iterating using the value
2435 from the secondary hashing function. */
2436 size_t iter = hash % (elem_size - 2) + 1;
2437
2438 do
2439 {
2440 idx += iter;
2441 if (idx >= elem_size)
2442 idx -= elem_size;
2443 assert (idx != start_idx);
2444 }
2445 while (elem_table[idx * 2] != 0);
2446 }
2447 /* This is the spot where we will insert the value. */
2448 elem_table[idx * 2] = hash;
2449 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2450
2451 /* The string itself including length. */
2452 obstack_1grow (&extrapool, namelen);
2453 obstack_grow (&extrapool, runp->name, namelen);
2454
2455 /* And the multibyte representation. */
2456 obstack_1grow (&extrapool, runp->nmbs);
2457 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2458
2459 /* And align again to 32 bits. */
2460 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2461 obstack_grow (&extrapool, "\0\0",
2462 (sizeof (int32_t)
2463 - ((1 + namelen + 1 + runp->nmbs)
2464 % sizeof (int32_t))));
2465
2466 /* Now some 32-bit values: multibyte collation sequence,
2467 wide char string (including length), and wide char
2468 collation sequence. */
2469 obstack_int32_grow (&extrapool, runp->mbseqorder);
2470
2471 obstack_int32_grow (&extrapool, runp->nwcs);
2472 obstack_grow (&extrapool, runp->wcs,
2473 runp->nwcs * sizeof (uint32_t));
2474 maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2475
2476 obstack_int32_grow (&extrapool, runp->wcseqorder);
2477 }
2478
2479 runp = runp->next;
2480 }
2481
2482 /* Prepare to write out this data. */
2483 add_locale_uint32 (&file, elem_size);
2484 add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2485 add_locale_raw_obstack (&file, &extrapool);
2486 add_locale_raw_data (&file, collate->mbseqorder, 256);
2487 add_locale_collseq_table (&file, &collate->wcseqorder);
2488 add_locale_string (&file, charmap->code_set_name);
2489 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2490
2491 obstack_free (&weightpool, NULL);
2492 obstack_free (&extrapool, NULL);
2493 obstack_free (&indirectpool, NULL);
2494}
2495
2496
2497static enum token_t
2498skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2499 const struct charmap_t *charmap, int to_endif)
2500{
2501 while (1)
2502 {
2503 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2504 enum token_t nowtok = now->tok;
2505
2506 if (nowtok == tok_eof || nowtok == tok_end)
2507 return nowtok;
2508
2509 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2510 {
2511 lr_error (ldfile, _("%s: nested conditionals not supported"),
2512 "LC_COLLATE");
2513 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2514 if (nowtok == tok_eof || nowtok == tok_end)
2515 return nowtok;
2516 }
2517 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2518 {
2519 lr_ignore_rest (ldfile, 1);
2520 return nowtok;
2521 }
2522 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2523 {
2524 /* Do not read the rest of the line. */
2525 return nowtok;
2526 }
2527 else if (nowtok == tok_else)
2528 {
2529 lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2530 }
2531
2532 lr_ignore_rest (ldfile, 0);
2533 }
2534}
2535
2536
2537void
2538collate_read (struct linereader *ldfile, struct localedef_t *result,
2539 const struct charmap_t *charmap, const char *repertoire_name,
2540 int ignore_content)
2541{
2542 struct repertoire_t *repertoire = NULL;
2543 struct locale_collate_t *collate;
2544 struct token *now;
2545 struct token *arg = NULL;
2546 enum token_t nowtok;
2547 enum token_t was_ellipsis = tok_none;
2548 struct localedef_t *copy_locale = NULL;
2549 /* Parsing state:
2550 0 - start
2551 1 - between `order-start' and `order-end'
2552 2 - after `order-end'
2553 3 - after `reorder-after', waiting for `reorder-end'
2554 4 - after `reorder-end'
2555 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2556 6 - after `reorder-sections-end'
2557 */
2558 int state = 0;
2559
2560 /* Get the repertoire we have to use. */
2561 if (repertoire_name != NULL)
2562 repertoire = repertoire_read (repertoire_name);
2563
2564 /* The rest of the line containing `LC_COLLATE' must be free. */
2565 lr_ignore_rest (ldfile, 1);
2566
2567 while (1)
2568 {
2569 do
2570 {
2571 now = lr_token (ldfile, charmap, result, NULL, verbose);
2572 nowtok = now->tok;
2573 }
2574 while (nowtok == tok_eol);
2575
2576 if (nowtok != tok_define)
2577 break;
2578
2579 if (ignore_content)
2580 lr_ignore_rest (ldfile, 0);
2581 else
2582 {
2583 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2584 if (arg->tok != tok_ident)
2585 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2586 else
2587 {
2588 /* Simply add the new symbol. */
2589 struct name_list *newsym = xmalloc (sizeof (*newsym)
2590 + arg->val.str.lenmb + 1);
2591 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2592 newsym->str[arg->val.str.lenmb] = '\0';
2593 newsym->next = defined;
2594 defined = newsym;
2595
2596 lr_ignore_rest (ldfile, 1);
2597 }
2598 }
2599 }
2600
2601 if (nowtok == tok_copy)
2602 {
2603 now = lr_token (ldfile, charmap, result, NULL, verbose);
2604 if (now->tok != tok_string)
2605 {
2606 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2607
2608 skip_category:
2609 do
2610 now = lr_token (ldfile, charmap, result, NULL, verbose);
2611 while (now->tok != tok_eof && now->tok != tok_end);
2612
2613 if (now->tok != tok_eof
2614 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2615 now->tok == tok_eof))
2616 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2617 else if (now->tok != tok_lc_collate)
2618 {
2619 lr_error (ldfile, _("\
2620%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2621 lr_ignore_rest (ldfile, 0);
2622 }
2623 else
2624 lr_ignore_rest (ldfile, 1);
2625
2626 return;
2627 }
2628
2629 if (! ignore_content)
2630 {
2631 /* Get the locale definition. */
2632 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2633 repertoire_name, charmap, NULL);
2634 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2635 {
2636 /* Not yet loaded. So do it now. */
2637 if (locfile_read (copy_locale, charmap) != 0)
2638 goto skip_category;
2639 }
2640
2641 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2642 return;
2643 }
2644
2645 lr_ignore_rest (ldfile, 1);
2646
2647 now = lr_token (ldfile, charmap, result, NULL, verbose);
2648 nowtok = now->tok;
2649 }
2650
2651 /* Prepare the data structures. */
2652 collate_startup (ldfile, result, copy_locale, ignore_content);
2653 collate = result->categories[LC_COLLATE].collate;
2654
2655 while (1)
2656 {
2657 char ucs4buf[10];
2658 char *symstr;
2659 size_t symlen;
2660
2661 /* Of course we don't proceed beyond the end of file. */
2662 if (nowtok == tok_eof)
2663 break;
2664
2665 /* Ingore empty lines. */
2666 if (nowtok == tok_eol)
2667 {
2668 now = lr_token (ldfile, charmap, result, NULL, verbose);
2669 nowtok = now->tok;
2670 continue;
2671 }
2672
2673 switch (nowtok)
2674 {
2675 case tok_copy:
2676 /* Allow copying other locales. */
2677 now = lr_token (ldfile, charmap, result, NULL, verbose);
2678 if (now->tok != tok_string)
2679 goto err_label;
2680
2681 if (! ignore_content)
2682 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2683 charmap, result);
2684
2685 lr_ignore_rest (ldfile, 1);
2686 break;
2687
2688 case tok_coll_weight_max:
2689 /* Ignore the rest of the line if we don't need the input of
2690 this line. */
2691 if (ignore_content)
2692 {
2693 lr_ignore_rest (ldfile, 0);
2694 break;
2695 }
2696
2697 if (state != 0)
2698 goto err_label;
2699
2700 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2701 if (arg->tok != tok_number)
2702 goto err_label;
2703 if (collate->col_weight_max != -1)
2704 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2705 "LC_COLLATE", "col_weight_max");
2706 else
2707 collate->col_weight_max = arg->val.num;
2708 lr_ignore_rest (ldfile, 1);
2709 break;
2710
2711 case tok_section_symbol:
2712 /* Ignore the rest of the line if we don't need the input of
2713 this line. */
2714 if (ignore_content)
2715 {
2716 lr_ignore_rest (ldfile, 0);
2717 break;
2718 }
2719
2720 if (state != 0)
2721 goto err_label;
2722
2723 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2724 if (arg->tok != tok_bsymbol)
2725 goto err_label;
2726 else if (!ignore_content)
2727 {
2728 /* Check whether this section is already known. */
2729 struct section_list *known = collate->sections;
2730 while (known != NULL)
2731 {
2732 if (strcmp (known->name, arg->val.str.startmb) == 0)
2733 break;
2734 known = known->next;
2735 }
2736
2737 if (known != NULL)
2738 {
2739 lr_error (ldfile,
2740 _("%s: duplicate declaration of section `%s'"),
2741 "LC_COLLATE", arg->val.str.startmb);
2742 free (arg->val.str.startmb);
2743 }
2744 else
2745 collate->sections = make_seclist_elem (collate,
2746 arg->val.str.startmb,
2747 collate->sections);
2748
2749 lr_ignore_rest (ldfile, known == NULL);
2750 }
2751 else
2752 {
2753 free (arg->val.str.startmb);
2754 lr_ignore_rest (ldfile, 0);
2755 }
2756 break;
2757
2758 case tok_collating_element:
2759 /* Ignore the rest of the line if we don't need the input of
2760 this line. */
2761 if (ignore_content)
2762 {
2763 lr_ignore_rest (ldfile, 0);
2764 break;
2765 }
2766
2767 if (state != 0 && state != 2)
2768 goto err_label;
2769
2770 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2771 if (arg->tok != tok_bsymbol)
2772 goto err_label;
2773 else
2774 {
2775 const char *symbol = arg->val.str.startmb;
2776 size_t symbol_len = arg->val.str.lenmb;
2777
2778 /* Next the `from' keyword. */
2779 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2780 if (arg->tok != tok_from)
2781 {
2782 free ((char *) symbol);
2783 goto err_label;
2784 }
2785
2786 ldfile->return_widestr = 1;
2787 ldfile->translate_strings = 1;
2788
2789 /* Finally the string with the replacement. */
2790 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2791
2792 ldfile->return_widestr = 0;
2793 ldfile->translate_strings = 0;
2794
2795 if (arg->tok != tok_string)
2796 goto err_label;
2797
2798 if (!ignore_content && symbol != NULL)
2799 {
2800 /* The name is already defined. */
2801 if (check_duplicate (ldfile, collate, charmap,
2802 repertoire, symbol, symbol_len))
2803 goto col_elem_free;
2804
2805 if (arg->val.str.startmb != NULL)
2806 insert_entry (&collate->elem_table, symbol, symbol_len,
2807 new_element (collate,
2808 arg->val.str.startmb,
2809 arg->val.str.lenmb - 1,
2810 arg->val.str.startwc,
2811 symbol, symbol_len, 0));
2812 }
2813 else
2814 {
2815 col_elem_free:
2816 free ((char *) symbol);
2817 free (arg->val.str.startmb);
2818 free (arg->val.str.startwc);
2819 }
2820 lr_ignore_rest (ldfile, 1);
2821 }
2822 break;
2823
2824 case tok_collating_symbol:
2825 /* Ignore the rest of the line if we don't need the input of
2826 this line. */
2827 if (ignore_content)
2828 {
2829 lr_ignore_rest (ldfile, 0);
2830 break;
2831 }
2832
2833 if (state != 0 && state != 2)
2834 goto err_label;
2835
2836 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2837 if (arg->tok != tok_bsymbol)
2838 goto err_label;
2839 else
2840 {
2841 char *symbol = arg->val.str.startmb;
2842 size_t symbol_len = arg->val.str.lenmb;
2843 char *endsymbol = NULL;
2844 size_t endsymbol_len = 0;
2845 enum token_t ellipsis = tok_none;
2846
2847 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2848 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2849 {
2850 ellipsis = arg->tok;
2851
2852 arg = lr_token (ldfile, charmap, result, repertoire,
2853 verbose);
2854 if (arg->tok != tok_bsymbol)
2855 {
2856 free (symbol);
2857 goto err_label;
2858 }
2859
2860 endsymbol = arg->val.str.startmb;
2861 endsymbol_len = arg->val.str.lenmb;
2862
2863 lr_ignore_rest (ldfile, 1);
2864 }
2865 else if (arg->tok != tok_eol)
2866 {
2867 free (symbol);
2868 goto err_label;
2869 }
2870
2871 if (!ignore_content)
2872 {
2873 if (symbol == NULL
2874 || (ellipsis != tok_none && endsymbol == NULL))
2875 {
2876 lr_error (ldfile, _("\
2877%s: unknown character in collating symbol name"),
2878 "LC_COLLATE");
2879 goto col_sym_free;
2880 }
2881 else if (ellipsis == tok_none)
2882 {
2883 /* A single symbol, no ellipsis. */
2884 if (check_duplicate (ldfile, collate, charmap,
2885 repertoire, symbol, symbol_len))
2886 /* The name is already defined. */
2887 goto col_sym_free;
2888
2889 insert_entry (&collate->sym_table, symbol, symbol_len,
2890 new_symbol (collate, symbol, symbol_len));
2891 }
2892 else if (symbol_len != endsymbol_len)
2893 {
2894 col_sym_inv_range:
2895 lr_error (ldfile,
2896 _("invalid names for character range"));
2897 goto col_sym_free;
2898 }
2899 else
2900 {
2901 /* Oh my, we have to handle an ellipsis. First, as
2902 usual, determine the common prefix and then
2903 convert the rest into a range. */
2904 size_t prefixlen;
2905 unsigned long int from;
2906 unsigned long int to;
2907 char *endp;
2908
2909 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2910 if (symbol[prefixlen] != endsymbol[prefixlen])
2911 break;
2912
2913 /* Convert the rest into numbers. */
2914 symbol[symbol_len] = '\0';
2915 from = strtoul (&symbol[prefixlen], &endp,
2916 ellipsis == tok_ellipsis2 ? 16 : 10);
2917 if (*endp != '\0')
2918 goto col_sym_inv_range;
2919
2920 endsymbol[symbol_len] = '\0';
2921 to = strtoul (&endsymbol[prefixlen], &endp,
2922 ellipsis == tok_ellipsis2 ? 16 : 10);
2923 if (*endp != '\0')
2924 goto col_sym_inv_range;
2925
2926 if (from > to)
2927 goto col_sym_inv_range;
2928
2929 /* Now loop over all entries. */
2930 while (from <= to)
2931 {
2932 char *symbuf;
2933
2934 symbuf = (char *) obstack_alloc (&collate->mempool,
2935 symbol_len + 1);
2936
2937 /* Create the name. */
2938 sprintf (symbuf,
2939 ellipsis == tok_ellipsis2
2940 ? "%.*s%.*lX" : "%.*s%.*lu",
2941 (int) prefixlen, symbol,
2942 (int) (symbol_len - prefixlen), from);
2943
2944 if (check_duplicate (ldfile, collate, charmap,
2945 repertoire, symbuf, symbol_len))
2946 /* The name is already defined. */
2947 goto col_sym_free;
2948
2949 insert_entry (&collate->sym_table, symbuf,
2950 symbol_len,
2951 new_symbol (collate, symbuf,
2952 symbol_len));
2953
2954 /* Increment the counter. */
2955 ++from;
2956 }
2957
2958 goto col_sym_free;
2959 }
2960 }
2961 else
2962 {
2963 col_sym_free:
2964 free (symbol);
2965 free (endsymbol);
2966 }
2967 }
2968 break;
2969
2970 case tok_symbol_equivalence:
2971 /* Ignore the rest of the line if we don't need the input of
2972 this line. */
2973 if (ignore_content)
2974 {
2975 lr_ignore_rest (ldfile, 0);
2976 break;
2977 }
2978
2979 if (state != 0)
2980 goto err_label;
2981
2982 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2983 if (arg->tok != tok_bsymbol)
2984 goto err_label;
2985 else
2986 {
2987 const char *newname = arg->val.str.startmb;
2988 size_t newname_len = arg->val.str.lenmb;
2989 const char *symname;
2990 size_t symname_len;
2991 void *symval; /* Actually struct symbol_t* */
2992
2993 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2994 if (arg->tok != tok_bsymbol)
2995 {
2996 free ((char *) newname);
2997 goto err_label;
2998 }
2999
3000 symname = arg->val.str.startmb;
3001 symname_len = arg->val.str.lenmb;
3002
3003 if (newname == NULL)
3004 {
3005 lr_error (ldfile, _("\
3006%s: unknown character in equivalent definition name"),
3007 "LC_COLLATE");
3008
3009 sym_equiv_free:
3010 free ((char *) newname);
3011 free ((char *) symname);
3012 break;
3013 }
3014 if (symname == NULL)
3015 {
3016 lr_error (ldfile, _("\
3017%s: unknown character in equivalent definition value"),
3018 "LC_COLLATE");
3019 goto sym_equiv_free;
3020 }
3021
3022 /* See whether the symbol name is already defined. */
3023 if (find_entry (&collate->sym_table, symname, symname_len,
3024 &symval) != 0)
3025 {
3026 lr_error (ldfile, _("\
3027%s: unknown symbol `%s' in equivalent definition"),
3028 "LC_COLLATE", symname);
3029 goto sym_equiv_free;
3030 }
3031
3032 if (insert_entry (&collate->sym_table,
3033 newname, newname_len, symval) < 0)
3034 {
3035 lr_error (ldfile, _("\
3036error while adding equivalent collating symbol"));
3037 goto sym_equiv_free;
3038 }
3039
3040 free ((char *) symname);
3041 }
3042 lr_ignore_rest (ldfile, 1);
3043 break;
3044
3045 case tok_script:
3046 /* Ignore the rest of the line if we don't need the input of
3047 this line. */
3048 if (ignore_content)
3049 {
3050 lr_ignore_rest (ldfile, 0);
3051 break;
3052 }
3053
3054 /* We get told about the scripts we know. */
3055 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3056 if (arg->tok != tok_bsymbol)
3057 goto err_label;
3058 else
3059 {
3060 struct section_list *runp = collate->known_sections;
3061 char *name;
3062
3063 while (runp != NULL)
3064 if (strncmp (runp->name, arg->val.str.startmb,
3065 arg->val.str.lenmb) == 0
3066 && runp->name[arg->val.str.lenmb] == '\0')
3067 break;
3068 else
3069 runp = runp->def_next;
3070
3071 if (runp != NULL)
3072 {
3073 lr_error (ldfile, _("duplicate definition of script `%s'"),
3074 runp->name);
3075 lr_ignore_rest (ldfile, 0);
3076 break;
3077 }
3078
3079 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3080 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3081 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3082 name[arg->val.str.lenmb] = '\0';
3083 runp->name = name;
3084
3085 runp->def_next = collate->known_sections;
3086 collate->known_sections = runp;
3087 }
3088 lr_ignore_rest (ldfile, 1);
3089 break;
3090
3091 case tok_order_start:
3092 /* Ignore the rest of the line if we don't need the input of
3093 this line. */
3094 if (ignore_content)
3095 {
3096 lr_ignore_rest (ldfile, 0);
3097 break;
3098 }
3099
3100 if (state != 0 && state != 1 && state != 2)
3101 goto err_label;
3102 state = 1;
3103
3104 /* The 14652 draft does not specify whether all `order_start' lines
3105 must contain the same number of sort-rules, but 14651 does. So
3106 we require this here as well. */
3107 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3108 if (arg->tok == tok_bsymbol)
3109 {
3110 /* This better should be a section name. */
3111 struct section_list *sp = collate->known_sections;
3112 while (sp != NULL
3113 && (sp->name == NULL
3114 || strncmp (sp->name, arg->val.str.startmb,
3115 arg->val.str.lenmb) != 0
3116 || sp->name[arg->val.str.lenmb] != '\0'))
3117 sp = sp->def_next;
3118
3119 if (sp == NULL)
3120 {
3121 lr_error (ldfile, _("\
3122%s: unknown section name `%.*s'"),
3123 "LC_COLLATE", (int) arg->val.str.lenmb,
3124 arg->val.str.startmb);
3125 /* We use the error section. */
3126 collate->current_section = &collate->error_section;
3127
3128 if (collate->error_section.first == NULL)
3129 {
3130 /* Insert &collate->error_section at the end of
3131 the collate->sections list. */
3132 if (collate->sections == NULL)
3133 collate->sections = &collate->error_section;
3134 else
3135 {
3136 sp = collate->sections;
3137 while (sp->next != NULL)
3138 sp = sp->next;
3139
3140 sp->next = &collate->error_section;
3141 }
3142 collate->error_section.next = NULL;
3143 }
3144 }
3145 else
3146 {
3147 /* One should not be allowed to open the same
3148 section twice. */
3149 if (sp->first != NULL)
3150 lr_error (ldfile, _("\
3151%s: multiple order definitions for section `%s'"),
3152 "LC_COLLATE", sp->name);
3153 else
3154 {
3155 /* Insert sp in the collate->sections list,
3156 right after collate->current_section. */
3157 if (collate->current_section != NULL)
3158 {
3159 sp->next = collate->current_section->next;
3160 collate->current_section->next = sp;
3161 }
3162 else if (collate->sections == NULL)
3163 /* This is the first section to be defined. */
3164 collate->sections = sp;
3165
3166 collate->current_section = sp;
3167 }
3168
3169 /* Next should come the end of the line or a semicolon. */
3170 arg = lr_token (ldfile, charmap, result, repertoire,
3171 verbose);
3172 if (arg->tok == tok_eol)
3173 {
3174 uint32_t cnt;
3175
3176 /* This means we have exactly one rule: `forward'. */
3177 if (nrules > 1)
3178 lr_error (ldfile, _("\
3179%s: invalid number of sorting rules"),
3180 "LC_COLLATE");
3181 else
3182 nrules = 1;
3183 sp->rules = obstack_alloc (&collate->mempool,
3184 (sizeof (enum coll_sort_rule)
3185 * nrules));
3186 for (cnt = 0; cnt < nrules; ++cnt)
3187 sp->rules[cnt] = sort_forward;
3188
3189 /* Next line. */
3190 break;
3191 }
3192
3193 /* Get the next token. */
3194 arg = lr_token (ldfile, charmap, result, repertoire,
3195 verbose);
3196 }
3197 }
3198 else
3199 {
3200 /* There is no section symbol. Therefore we use the unnamed
3201 section. */
3202 collate->current_section = &collate->unnamed_section;
3203
3204 if (collate->unnamed_section_defined)
3205 lr_error (ldfile, _("\
3206%s: multiple order definitions for unnamed section"),
3207 "LC_COLLATE");
3208 else
3209 {
3210 /* Insert &collate->unnamed_section at the beginning of
3211 the collate->sections list. */
3212 collate->unnamed_section.next = collate->sections;
3213 collate->sections = &collate->unnamed_section;
3214 collate->unnamed_section_defined = true;
3215 }
3216 }
3217
3218 /* Now read the direction names. */
3219 read_directions (ldfile, arg, charmap, repertoire, result);
3220
3221 /* From now we need the strings untranslated. */
3222 ldfile->translate_strings = 0;
3223 break;
3224
3225 case tok_order_end:
3226 /* Ignore the rest of the line if we don't need the input of
3227 this line. */
3228 if (ignore_content)
3229 {
3230 lr_ignore_rest (ldfile, 0);
3231 break;
3232 }
3233
3234 if (state != 1)
3235 goto err_label;
3236
3237 /* Handle ellipsis at end of list. */
3238 if (was_ellipsis != tok_none)
3239 {
3240 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3241 repertoire, result);
3242 was_ellipsis = tok_none;
3243 }
3244
3245 state = 2;
3246 lr_ignore_rest (ldfile, 1);
3247 break;
3248
3249 case tok_reorder_after:
3250 /* Ignore the rest of the line if we don't need the input of
3251 this line. */
3252 if (ignore_content)
3253 {
3254 lr_ignore_rest (ldfile, 0);
3255 break;
3256 }
3257
3258 if (state == 1)
3259 {
3260 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3261 "LC_COLLATE");
3262 state = 2;
3263
3264 /* Handle ellipsis at end of list. */
3265 if (was_ellipsis != tok_none)
3266 {
3267 handle_ellipsis (ldfile, arg->val.str.startmb,
3268 arg->val.str.lenmb, was_ellipsis, charmap,
3269 repertoire, result);
3270 was_ellipsis = tok_none;
3271 }
3272 }
3273 else if (state == 0 && copy_locale == NULL)
3274 goto err_label;
3275 else if (state != 0 && state != 2 && state != 3)
3276 goto err_label;
3277 state = 3;
3278
3279 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3280 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3281 {
3282 /* Find this symbol in the sequence table. */
3283 char ucsbuf[10];
3284 char *startmb;
3285 size_t lenmb;
3286 struct element_t *insp;
3287 int no_error = 1;
3288 void *ptr;
3289
3290 if (arg->tok == tok_bsymbol)
3291 {
3292 startmb = arg->val.str.startmb;
3293 lenmb = arg->val.str.lenmb;
3294 }
3295 else
3296 {
3297 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3298 startmb = ucsbuf;
3299 lenmb = 9;
3300 }
3301
3302 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3303 /* Yes, the symbol exists. Simply point the cursor
3304 to it. */
3305 collate->cursor = (struct element_t *) ptr;
3306 else
3307 {
3308 struct symbol_t *symbp;
3309 void *ptr;
3310
3311 if (find_entry (&collate->sym_table, startmb, lenmb,
3312 &ptr) == 0)
3313 {
3314 symbp = ptr;
3315
3316 if (symbp->order->last != NULL
3317 || symbp->order->next != NULL)
3318 collate->cursor = symbp->order;
3319 else
3320 {
3321 /* This is a collating symbol but its position
3322 is not yet defined. */
3323 lr_error (ldfile, _("\
3324%s: order for collating symbol %.*s not yet defined"),
3325 "LC_COLLATE", (int) lenmb, startmb);
3326 collate->cursor = NULL;
3327 no_error = 0;
3328 }
3329 }
3330 else if (find_entry (&collate->elem_table, startmb, lenmb,
3331 &ptr) == 0)
3332 {
3333 insp = (struct element_t *) ptr;
3334
3335 if (insp->last != NULL || insp->next != NULL)
3336 collate->cursor = insp;
3337 else
3338 {
3339 /* This is a collating element but its position
3340 is not yet defined. */
3341 lr_error (ldfile, _("\
3342%s: order for collating element %.*s not yet defined"),
3343 "LC_COLLATE", (int) lenmb, startmb);
3344 collate->cursor = NULL;
3345 no_error = 0;
3346 }
3347 }
3348 else
3349 {
3350 /* This is bad. The symbol after which we have to
3351 insert does not exist. */
3352 lr_error (ldfile, _("\
3353%s: cannot reorder after %.*s: symbol not known"),
3354 "LC_COLLATE", (int) lenmb, startmb);
3355 collate->cursor = NULL;
3356 no_error = 0;
3357 }
3358 }
3359
3360 lr_ignore_rest (ldfile, no_error);
3361 }
3362 else
3363 /* This must not happen. */
3364 goto err_label;
3365 break;
3366
3367 case tok_reorder_end:
3368 /* Ignore the rest of the line if we don't need the input of
3369 this line. */
3370 if (ignore_content)
3371 break;
3372
3373 if (state != 3)
3374 goto err_label;
3375 state = 4;
3376 lr_ignore_rest (ldfile, 1);
3377 break;
3378
3379 case tok_reorder_sections_after:
3380 /* Ignore the rest of the line if we don't need the input of
3381 this line. */
3382 if (ignore_content)
3383 {
3384 lr_ignore_rest (ldfile, 0);
3385 break;
3386 }
3387
3388 if (state == 1)
3389 {
3390 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3391 "LC_COLLATE");
3392 state = 2;
3393
3394 /* Handle ellipsis at end of list. */
3395 if (was_ellipsis != tok_none)
3396 {
3397 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3398 repertoire, result);
3399 was_ellipsis = tok_none;
3400 }
3401 }
3402 else if (state == 3)
3403 {
3404 record_error (0, 0, _("\
3405%s: missing `reorder-end' keyword"), "LC_COLLATE");
3406 state = 4;
3407 }
3408 else if (state != 2 && state != 4)
3409 goto err_label;
3410 state = 5;
3411
3412 /* Get the name of the sections we are adding after. */
3413 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3414 if (arg->tok == tok_bsymbol)
3415 {
3416 /* Now find a section with this name. */
3417 struct section_list *runp = collate->sections;
3418
3419 while (runp != NULL)
3420 {
3421 if (runp->name != NULL
3422 && strlen (runp->name) == arg->val.str.lenmb
3423 && memcmp (runp->name, arg->val.str.startmb,
3424 arg->val.str.lenmb) == 0)
3425 break;
3426
3427 runp = runp->next;
3428 }
3429
3430 if (runp != NULL)
3431 collate->current_section = runp;
3432 else
3433 {
3434 /* This is bad. The section after which we have to
3435 reorder does not exist. Therefore we cannot
3436 process the whole rest of this reorder
3437 specification. */
3438 lr_error (ldfile, _("%s: section `%.*s' not known"),
3439 "LC_COLLATE", (int) arg->val.str.lenmb,
3440 arg->val.str.startmb);
3441
3442 do
3443 {
3444 lr_ignore_rest (ldfile, 0);
3445
3446 now = lr_token (ldfile, charmap, result, NULL, verbose);
3447 }
3448 while (now->tok == tok_reorder_sections_after
3449 || now->tok == tok_reorder_sections_end
3450 || now->tok == tok_end);
3451
3452 /* Process the token we just saw. */
3453 nowtok = now->tok;
3454 continue;
3455 }
3456 }
3457 else
3458 /* This must not happen. */
3459 goto err_label;
3460 break;
3461
3462 case tok_reorder_sections_end:
3463 /* Ignore the rest of the line if we don't need the input of
3464 this line. */
3465 if (ignore_content)
3466 break;
3467
3468 if (state != 5)
3469 goto err_label;
3470 state = 6;
3471 lr_ignore_rest (ldfile, 1);
3472 break;
3473
3474 case tok_bsymbol:
3475 case tok_ucs4:
3476 /* Ignore the rest of the line if we don't need the input of
3477 this line. */
3478 if (ignore_content)
3479 {
3480 lr_ignore_rest (ldfile, 0);
3481 break;
3482 }
3483
3484 if (state != 0 && state != 1 && state != 3 && state != 5)
3485 goto err_label;
3486
3487 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3488 goto err_label;
3489
3490 if (nowtok == tok_ucs4)
3491 {
3492 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3493 symstr = ucs4buf;
3494 symlen = 9;
3495 }
3496 else if (arg != NULL)
3497 {
3498 symstr = arg->val.str.startmb;
3499 symlen = arg->val.str.lenmb;
3500 }
3501 else
3502 {
3503 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3504 (int) ldfile->token.val.str.lenmb,
3505 ldfile->token.val.str.startmb);
3506 break;
3507 }
3508
3509 struct element_t *seqp;
3510 if (state == 0)
3511 {
3512 /* We are outside an `order_start' region. This means
3513 we must only accept definitions of values for
3514 collation symbols since these are purely abstract
3515 values and don't need directions associated. */
3516 void *ptr;
3517
3518 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3519 {
3520 seqp = ptr;
3521
3522 /* It's already defined. First check whether this
3523 is really a collating symbol. */
3524 if (seqp->is_character)
3525 goto err_label;
3526
3527 goto move_entry;
3528 }
3529 else
3530 {
3531 void *result;
3532
3533 if (find_entry (&collate->sym_table, symstr, symlen,
3534 &result) != 0)
3535 /* No collating symbol, it's an error. */
3536 goto err_label;
3537
3538 /* Maybe this is the first time we define a symbol
3539 value and it is before the first actual section. */
3540 if (collate->sections == NULL)
3541 collate->sections = collate->current_section =
3542 &collate->symbol_section;
3543 }
3544
3545 if (was_ellipsis != tok_none)
3546 {
3547 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3548 charmap, repertoire, result);
3549
3550 /* Remember that we processed the ellipsis. */
3551 was_ellipsis = tok_none;
3552
3553 /* And don't add the value a second time. */
3554 break;
3555 }
3556 }
3557 else if (state == 3)
3558 {
3559 /* It is possible that we already have this collation sequence.
3560 In this case we move the entry. */
3561 void *sym;
3562 void *ptr;
3563
3564 /* If the symbol after which we have to insert was not found
3565 ignore all entries. */
3566 if (collate->cursor == NULL)
3567 {
3568 lr_ignore_rest (ldfile, 0);
3569 break;
3570 }
3571
3572 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3573 {
3574 seqp = (struct element_t *) ptr;
3575 goto move_entry;
3576 }
3577
3578 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3579 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3580 goto move_entry;
3581
3582 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3583 && (seqp = (struct element_t *) ptr,
3584 seqp->last != NULL || seqp->next != NULL
3585 || (collate->start != NULL && seqp == collate->start)))
3586 {
3587 move_entry:
3588 /* Remove the entry from the old position. */
3589 if (seqp->last == NULL)
3590 collate->start = seqp->next;
3591 else
3592 seqp->last->next = seqp->next;
3593 if (seqp->next != NULL)
3594 seqp->next->last = seqp->last;
3595
3596 /* We also have to check whether this entry is the
3597 first or last of a section. */
3598 if (seqp->section->first == seqp)
3599 {
3600 if (seqp->section->first == seqp->section->last)
3601 /* This section has no content anymore. */
3602 seqp->section->first = seqp->section->last = NULL;
3603 else
3604 seqp->section->first = seqp->next;
3605 }
3606 else if (seqp->section->last == seqp)
3607 seqp->section->last = seqp->last;
3608
3609 /* Now insert it in the new place. */
3610 insert_weights (ldfile, seqp, charmap, repertoire, result,
3611 tok_none);
3612 break;
3613 }
3614
3615 /* Otherwise we just add a new entry. */
3616 }
3617 else if (state == 5)
3618 {
3619 /* We are reordering sections. Find the named section. */
3620 struct section_list *runp = collate->sections;
3621 struct section_list *prevp = NULL;
3622
3623 while (runp != NULL)
3624 {
3625 if (runp->name != NULL
3626 && strlen (runp->name) == symlen
3627 && memcmp (runp->name, symstr, symlen) == 0)
3628 break;
3629
3630 prevp = runp;
3631 runp = runp->next;
3632 }
3633
3634 if (runp == NULL)
3635 {
3636 lr_error (ldfile, _("%s: section `%.*s' not known"),
3637 "LC_COLLATE", (int) symlen, symstr);
3638 lr_ignore_rest (ldfile, 0);
3639 }
3640 else
3641 {
3642 if (runp != collate->current_section)
3643 {
3644 /* Remove the named section from the old place and
3645 insert it in the new one. */
3646 prevp->next = runp->next;
3647
3648 runp->next = collate->current_section->next;
3649 collate->current_section->next = runp;
3650 collate->current_section = runp;
3651 }
3652
3653 /* Process the rest of the line which might change
3654 the collation rules. */
3655 arg = lr_token (ldfile, charmap, result, repertoire,
3656 verbose);
3657 if (arg->tok != tok_eof && arg->tok != tok_eol)
3658 read_directions (ldfile, arg, charmap, repertoire,
3659 result);
3660 }
3661 break;
3662 }
3663 else if (was_ellipsis != tok_none)
3664 {
3665 /* Using the information in the `ellipsis_weight'
3666 element and this and the last value we have to handle
3667 the ellipsis now. */
3668 assert (state == 1);
3669
3670 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3671 repertoire, result);
3672
3673 /* Remember that we processed the ellipsis. */
3674 was_ellipsis = tok_none;
3675
3676 /* And don't add the value a second time. */
3677 break;
3678 }
3679
3680 /* Now insert in the new place. */
3681 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3682 break;
3683
3684 case tok_undefined:
3685 /* Ignore the rest of the line if we don't need the input of
3686 this line. */
3687 if (ignore_content)
3688 {
3689 lr_ignore_rest (ldfile, 0);
3690 break;
3691 }
3692
3693 if (state != 1)
3694 goto err_label;
3695
3696 if (was_ellipsis != tok_none)
3697 {
3698 lr_error (ldfile,
3699 _("%s: cannot have `%s' as end of ellipsis range"),
3700 "LC_COLLATE", "UNDEFINED");
3701
3702 unlink_element (collate);
3703 was_ellipsis = tok_none;
3704 }
3705
3706 /* See whether UNDEFINED already appeared somewhere. */
3707 if (collate->undefined.next != NULL
3708 || &collate->undefined == collate->cursor)
3709 {
3710 lr_error (ldfile,
3711 _("%s: order for `%.*s' already defined at %s:%Zu"),
3712 "LC_COLLATE", 9, "UNDEFINED",
3713 collate->undefined.file,
3714 collate->undefined.line);
3715 lr_ignore_rest (ldfile, 0);
3716 }
3717 else
3718 /* Parse the weights. */
3719 insert_weights (ldfile, &collate->undefined, charmap,
3720 repertoire, result, tok_none);
3721 break;
3722
3723 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3724 case tok_ellipsis3: /* absolute ellipsis */
3725 case tok_ellipsis4: /* symbolic decimal ellipsis */
3726 /* This is the symbolic (decimal or hexadecimal) or absolute
3727 ellipsis. */
3728 if (was_ellipsis != tok_none)
3729 goto err_label;
3730
3731 if (state != 0 && state != 1 && state != 3)
3732 goto err_label;
3733
3734 was_ellipsis = nowtok;
3735
3736 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3737 repertoire, result, nowtok);
3738 break;
3739
3740 case tok_end:
3741 seen_end:
3742 /* Next we assume `LC_COLLATE'. */
3743 if (!ignore_content)
3744 {
3745 if (state == 0 && copy_locale == NULL)
3746 /* We must either see a copy statement or have
3747 ordering values. */
3748 lr_error (ldfile,
3749 _("%s: empty category description not allowed"),
3750 "LC_COLLATE");
3751 else if (state == 1)
3752 {
3753 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3754 "LC_COLLATE");
3755
3756 /* Handle ellipsis at end of list. */
3757 if (was_ellipsis != tok_none)
3758 {
3759 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3760 repertoire, result);
3761 was_ellipsis = tok_none;
3762 }
3763 }
3764 else if (state == 3)
3765 record_error (0, 0, _("\
3766%s: missing `reorder-end' keyword"), "LC_COLLATE");
3767 else if (state == 5)
3768 record_error (0, 0, _("\
3769%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3770 }
3771 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3772 if (arg->tok == tok_eof)
3773 break;
3774 if (arg->tok == tok_eol)
3775 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3776 else if (arg->tok != tok_lc_collate)
3777 lr_error (ldfile, _("\
3778%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3779 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3780 return;
3781
3782 case tok_define:
3783 if (ignore_content)
3784 {
3785 lr_ignore_rest (ldfile, 0);
3786 break;
3787 }
3788
3789 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3790 if (arg->tok != tok_ident)
3791 goto err_label;
3792
3793 /* Simply add the new symbol. */
3794 struct name_list *newsym = xmalloc (sizeof (*newsym)
3795 + arg->val.str.lenmb + 1);
3796 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3797 newsym->str[arg->val.str.lenmb] = '\0';
3798 newsym->next = defined;
3799 defined = newsym;
3800
3801 lr_ignore_rest (ldfile, 1);
3802 break;
3803
3804 case tok_undef:
3805 if (ignore_content)
3806 {
3807 lr_ignore_rest (ldfile, 0);
3808 break;
3809 }
3810
3811 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3812 if (arg->tok != tok_ident)
3813 goto err_label;
3814
3815 /* Remove _all_ occurrences of the symbol from the list. */
3816 struct name_list *prevdef = NULL;
3817 struct name_list *curdef = defined;
3818 while (curdef != NULL)
3819 if (strncmp (arg->val.str.startmb, curdef->str,
3820 arg->val.str.lenmb) == 0
3821 && curdef->str[arg->val.str.lenmb] == '\0')
3822 {
3823 if (prevdef == NULL)
3824 defined = curdef->next;
3825 else
3826 prevdef->next = curdef->next;
3827
3828 struct name_list *olddef = curdef;
3829 curdef = curdef->next;
3830
3831 free (olddef);
3832 }
3833 else
3834 {
3835 prevdef = curdef;
3836 curdef = curdef->next;
3837 }
3838
3839 lr_ignore_rest (ldfile, 1);
3840 break;
3841
3842 case tok_ifdef:
3843 case tok_ifndef:
3844 if (ignore_content)
3845 {
3846 lr_ignore_rest (ldfile, 0);
3847 break;
3848 }
3849
3850 found_ifdef:
3851 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3852 if (arg->tok != tok_ident)
3853 goto err_label;
3854 lr_ignore_rest (ldfile, 1);
3855
3856 if (collate->else_action == else_none)
3857 {
3858 curdef = defined;
3859 while (curdef != NULL)
3860 if (strncmp (arg->val.str.startmb, curdef->str,
3861 arg->val.str.lenmb) == 0
3862 && curdef->str[arg->val.str.lenmb] == '\0')
3863 break;
3864 else
3865 curdef = curdef->next;
3866
3867 if ((nowtok == tok_ifdef && curdef != NULL)
3868 || (nowtok == tok_ifndef && curdef == NULL))
3869 {
3870 /* We have to use the if-branch. */
3871 collate->else_action = else_ignore;
3872 }
3873 else
3874 {
3875 /* We have to use the else-branch, if there is one. */
3876 nowtok = skip_to (ldfile, collate, charmap, 0);
3877 if (nowtok == tok_else)
3878 collate->else_action = else_seen;
3879 else if (nowtok == tok_elifdef)
3880 {
3881 nowtok = tok_ifdef;
3882 goto found_ifdef;
3883 }
3884 else if (nowtok == tok_elifndef)
3885 {
3886 nowtok = tok_ifndef;
3887 goto found_ifdef;
3888 }
3889 else if (nowtok == tok_eof)
3890 goto seen_eof;
3891 else if (nowtok == tok_end)
3892 goto seen_end;
3893 }
3894 }
3895 else
3896 {
3897 /* XXX Should it really become necessary to support nested
3898 preprocessor handling we will push the state here. */
3899 lr_error (ldfile, _("%s: nested conditionals not supported"),
3900 "LC_COLLATE");
3901 nowtok = skip_to (ldfile, collate, charmap, 1);
3902 if (nowtok == tok_eof)
3903 goto seen_eof;
3904 else if (nowtok == tok_end)
3905 goto seen_end;
3906 }
3907 break;
3908
3909 case tok_elifdef:
3910 case tok_elifndef:
3911 case tok_else:
3912 if (ignore_content)
3913 {
3914 lr_ignore_rest (ldfile, 0);
3915 break;
3916 }
3917
3918 lr_ignore_rest (ldfile, 1);
3919
3920 if (collate->else_action == else_ignore)
3921 {
3922 /* Ignore everything until the endif. */
3923 nowtok = skip_to (ldfile, collate, charmap, 1);
3924 if (nowtok == tok_eof)
3925 goto seen_eof;
3926 else if (nowtok == tok_end)
3927 goto seen_end;
3928 }
3929 else
3930 {
3931 assert (collate->else_action == else_none);
3932 lr_error (ldfile, _("\
3933%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3934 nowtok == tok_else ? "else"
3935 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3936 }
3937 break;
3938
3939 case tok_endif:
3940 if (ignore_content)
3941 {
3942 lr_ignore_rest (ldfile, 0);
3943 break;
3944 }
3945
3946 lr_ignore_rest (ldfile, 1);
3947
3948 if (collate->else_action != else_ignore
3949 && collate->else_action != else_seen)
3950 lr_error (ldfile, _("\
3951%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3952
3953 /* XXX If we support nested preprocessor directives we pop
3954 the state here. */
3955 collate->else_action = else_none;
3956 break;
3957
3958 default:
3959 err_label:
3960 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3961 }
3962
3963 /* Prepare for the next round. */
3964 now = lr_token (ldfile, charmap, result, NULL, verbose);
3965 nowtok = now->tok;
3966 }
3967
3968 seen_eof:
3969 /* When we come here we reached the end of the file. */
3970 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3971}
3972