ld-collate.c source code [glibc/locale/programs/ld-collate.c]

1	/ Copyright (C) 1995-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <errno.h>
23	#include <stdlib.h>
24	#include <wchar.h>
25	#include <stdint.h>
26	#include <sys/param.h>
27
28	#include "localedef.h"
29	#include "charmap.h"
30	#include "localeinfo.h"
31	#include "linereader.h"
32	#include "locfile.h"
33	#include "elem-hash.h"
34
35	/ Uncomment the following line in the production version. /
36	/ #define NDEBUG 1 /
37	#include <assert.h>
38
39	#define obstack_chunk_alloc malloc
40	#define obstack_chunk_free free
41
42	static inline void
43	__attribute ((always_inline))
44	obstack_int32_grow (struct obstack *obstack, int32_t data)
45	{
46	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47	data = maybe_swap_uint32 (data);
48	if (sizeof (int32_t) == sizeof (int))
49	obstack_int_grow (obstack, data);
50	else
51	obstack_grow (obstack, &data, sizeof (int32_t));
52	}
53
54	static inline void
55	__attribute ((always_inline))
56	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57	{
58	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59	data = maybe_swap_uint32 (data);
60	if (sizeof (int32_t) == sizeof (int))
61	obstack_int_grow_fast (obstack, data);
62	else
63	obstack_grow (obstack, &data, sizeof (int32_t));
64	}
65
66	/ Forward declaration. /
67	struct element_t;
68
69	/ Data type for list of strings. /
70	struct section_list
71	{
72	/ Successor in the known_sections list. /
73	struct section_list *def_next;
74	/ Successor in the sections list. /
75	struct section_list *next;
76	/ Name of the section. /
77	const char *name;
78	/ First element of this section. /
79	struct element_t *first;
80	/ Last element of this section. /
81	struct element_t *last;
82	/ These are the rules for this section. /
83	enum coll_sort_rule *rules;
84	/ Index of the rule set in the appropriate section of the output file. /
85	int ruleidx;
86	};
87
88	struct element_t;
89
90	struct element_list_t
91	{
92	/ Number of elements. /
93	int cnt;
94
95	struct element_t **w;
96	};
97
98	/ Data type for collating element. /
99	struct element_t
100	{
101	const char *name;
102
103	const char *mbs;
104	size_t nmbs;
105	const uint32_t *wcs;
106	size_t nwcs;
107	int *mborder;
108	int wcorder;
109
110	/ The following is a bit mask which bits are set if this element is*
111	used in the appropriate level. Interesting for the singlebyte
112	weight computation.
113
114	XXX The type here restricts the number of levels to 32. It could
115	be changed if necessary but I doubt this is necessary. /*
116	unsigned int used_in_level;
117
118	struct element_list_t *weights;
119
120	/ Nonzero if this is a real character definition. /
121	int is_character;
122
123	/ Order of the character in the sequence. This information will*
124	be used in range expressions. /*
125	int mbseqorder;
126	int wcseqorder;
127
128	/ Where does the definition come from. /
129	const char *file;
130	size_t line;
131
132	/ Which section does this belong to. /
133	struct section_list *section;
134
135	/ Predecessor and successor in the order list. /
136	struct element_t *last;
137	struct element_t *next;
138
139	/ Next element in multibyte output list. /
140	struct element_t *mbnext;
141	struct element_t *mblast;
142
143	/ Next element in wide character output list. /
144	struct element_t *wcnext;
145	struct element_t *wclast;
146	};
147
148	/ Special element value. /
149	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153	/ Data type for collating symbol. /
154	struct symbol_t
155	{
156	const char *name;
157
158	/ Point to place in the order list. /
159	struct element_t *order;
160
161	/ Where does the definition come from. /
162	const char *file;
163	size_t line;
164	};
165
166	/ Sparse table of struct element_t . /*
167	#define TABLE wchead_table
168	#define ELEMENT struct element_t *
169	#define DEFAULT NULL
170	#define ITERATE
171	#define NO_ADD_LOCALE
172	#include "3level.h"
173
174	/ Sparse table of int32_t. /
175	#define TABLE collidx_table
176	#define ELEMENT int32_t
177	#define DEFAULT 0
178	#include "3level.h"
179
180	/ Sparse table of uint32_t. /
181	#define TABLE collseq_table
182	#define ELEMENT uint32_t
183	#define DEFAULT ~((uint32_t) 0)
184	#include "3level.h"
185
186
187	/ Simple name list for the preprocessor. /
188	struct name_list
189	{
190	struct name_list *next;
191	char str[`0`];
192	};
193
194
195	/ The real definition of the struct for the LC_COLLATE locale. /
196	struct locale_collate_t
197	{
198	int col_weight_max;
199	int cur_weight_max;
200
201	/ List of known scripts. /
202	struct section_list *known_sections;
203	/ List of used sections. /
204	struct section_list *sections;
205	/ Current section using definition. /
206	struct section_list *current_section;
207	/ There always can be an unnamed section. /
208	struct section_list unnamed_section;
209	/ Flag whether the unnamed section has been defined. /
210	bool unnamed_section_defined;
211	/ To make handling of errors easier we have another section. /
212	struct section_list error_section;
213	/ Sometimes we are defining the values for collating symbols before*
214	the first actual section. /*
215	struct section_list symbol_section;
216
217	/ Start of the order list. /
218	struct element_t *start;
219
220	/ The undefined element. /
221	struct element_t undefined;
222
223	/ This is the cursor for `reorder_after' insertions. /
224	struct element_t *cursor;
225
226	/ This value is used when handling ellipsis. /
227	struct element_t ellipsis_weight;
228
229	/ Known collating elements. /
230	hash_table elem_table;
231
232	/ Known collating symbols. /
233	hash_table sym_table;
234
235	/ Known collation sequences. /
236	hash_table seq_table;
237
238	struct obstack mempool;
239
240	/ The LC_COLLATE category is a bit special as it is sometimes possible*
241	that the definitions from more than one input file contains information.
242	Therefore we keep all relevant input in a list. /*
243	struct locale_collate_t *next;
244
245	/ Arrays with heads of the list for each of the leading bytes in*
246	the multibyte sequences. /*
247	struct element_t *mbheads[`256`];
248
249	/ Arrays with heads of the list for each of the leading bytes in*
250	the multibyte sequences. /*
251	struct wchead_table wcheads;
252
253	/ The arrays with the collation sequence order. /
254	unsigned char mbseqorder[`256`];
255	struct collseq_table wcseqorder;
256
257	/ State of the preprocessor. /
258	enum
259	{
260	else_none = `0`,
261	else_ignore,
262	else_seen
263	}
264	else_action;
265	};
266
267
268	/ We have a few global variables which are used for reading all*
269	LC_COLLATE category descriptions in all files. /*
270	static uint32_t nrules;
271
272	/ List of defined preprocessor symbols. /
273	static struct name_list *defined;
274
275
276	/ We need UTF-8 encoding of numbers. /
277	static inline int
278	__attribute ((always_inline))
279	utf8_encode (char buf, int* val)
280	{
281	int retval;
282
283	if (val < `0x80`)
284	{
285	buf++ = (char*) val;
286	retval = `1`;
287	}
288	else
289	{
290	int step;
291
292	for (step = `2`; step < `6`; ++step)
293	if ((val & (~(uint32_t)`0` << (`5` * step + `1`))) == `0`)
294	break;
295	retval = step;
296
297	buf = (unsigned* char) (~`0xff` >> step);
298	--step;
299	do
300	{
301	buf[step] = `0x80` \| (val & `0x3f`);
302	val >>= `6`;
303	}
304	while (--step > `0`);
305	*buf \|= val;
306	}
307
308	return retval;
309	}
310
311
312	static struct section_list *
313	make_seclist_elem (struct locale_collate_t collate, const* char *string,
314	struct section_list *next)
315	{
316	struct section_list *newp;
317
318	newp = (struct section_list *) obstack_alloc (&collate->mempool,
319	sizeof (*newp));
320	newp->next = next;
321	newp->name = string;
322	newp->first = NULL;
323	newp->last = NULL;
324
325	return newp;
326	}
327
328
329	static struct element_t *
330	new_element (struct locale_collate_t collate, const* char *mbs, size_t mbslen,
331	const uint32_t wcs, const* char *name, size_t namelen,
332	int is_character)
333	{
334	struct element_t *newp;
335
336	newp = (struct element_t *) obstack_alloc (&collate->mempool,
337	sizeof (*newp));
338	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
339	name, namelen);
340	if (mbs != NULL)
341	{
342	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
343	newp->nmbs = mbslen;
344	}
345	else
346	{
347	newp->mbs = NULL;
348	newp->nmbs = `0`;
349	}
350	if (wcs != NULL)
351	{
352	size_t nwcs = wcslen ((wchar_t *) wcs);
353	uint32_t zero = `0`;
354	/ Handle <U0000> as a single character. /
355	if (nwcs == `0`)
356	nwcs = `1`;
357	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
358	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
359	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
360	newp->nwcs = nwcs;
361	}
362	else
363	{
364	newp->wcs = NULL;
365	newp->nwcs = `0`;
366	}
367	newp->mborder = NULL;
368	newp->wcorder = `0`;
369	newp->used_in_level = `0`;
370	newp->is_character = is_character;
371
372	/ Will be assigned later. XXX /
373	newp->mbseqorder = `0`;
374	newp->wcseqorder = `0`;
375
376	/ Will be allocated later. /
377	newp->weights = NULL;
378
379	newp->file = NULL;
380	newp->line = `0`;
381
382	newp->section = collate->current_section;
383
384	newp->last = NULL;
385	newp->next = NULL;
386
387	newp->mbnext = NULL;
388	newp->mblast = NULL;
389
390	newp->wcnext = NULL;
391	newp->wclast = NULL;
392
393	return newp;
394	}
395
396
397	static struct symbol_t *
398	new_symbol (struct locale_collate_t collate, const* char *name, size_t len)
399	{
400	struct symbol_t *newp;
401
402	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof* (*newp));
403
404	newp->name = obstack_copy0 (&collate->mempool, name, len);
405	newp->order = NULL;
406
407	newp->file = NULL;
408	newp->line = `0`;
409
410	return newp;
411	}
412
413
414	/ Test whether this name is already defined somewhere. /
415	static int
416	check_duplicate (struct linereader ldfile, struct* locale_collate_t *collate,
417	const struct charmap_t *charmap,
418	struct repertoire_t repertoire, const* char *symbol,
419	size_t symbol_len)
420	{
421	void *ignore = NULL;
422
423	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == `0`)
424	{
425	lr_error (ldfile, _("`%.*s' already defined in charmap"),
426	(int) symbol_len, symbol);
427	return `1`;
428	}
429
430	if (repertoire != NULL
431	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
432	== `0`))
433	{
434	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
435	(int) symbol_len, symbol);
436	return `1`;
437	}
438
439	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == `0`)
440	{
441	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
442	(int) symbol_len, symbol);
443	return `1`;
444	}
445
446	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == `0`)
447	{
448	lr_error (ldfile, _("`%.*s' already defined as collating element"),
449	(int) symbol_len, symbol);
450	return `1`;
451	}
452
453	return `0`;
454	}
455
456
457	/ Read the direction specification. /
458	static void
459	read_directions (struct linereader ldfile, struct* token *arg,
460	const struct charmap_t *charmap,
461	struct repertoire_t repertoire, struct* localedef_t *result)
462	{
463	int cnt = `0`;
464	int max = nrules ?: `10`;
465	enum coll_sort_rule rules = calloc (max, sizeof* (*rules));
466	int warned = `0`;
467	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
468
469	while (`1`)
470	{
471	int valid = `0`;
472
473	if (arg->tok == tok_forward)
474	{
475	if (rules[cnt] & sort_backward)
476	{
477	if (! warned)
478	{
479	lr_error (ldfile, _("\
480	%s: `forward' and `backward' are mutually excluding each other"),
481	"LC_COLLATE");
482	warned = `1`;
483	}
484	}
485	else if (rules[cnt] & sort_forward)
486	{
487	if (! warned)
488	{
489	lr_error (ldfile, _("\
490	%s: `%s' mentioned more than once in definition of weight %d"),
491	"LC_COLLATE", "forward", cnt + `1`);
492	}
493	}
494	else
495	rules[cnt] \|= sort_forward;
496
497	valid = `1`;
498	}
499	else if (arg->tok == tok_backward)
500	{
501	if (rules[cnt] & sort_forward)
502	{
503	if (! warned)
504	{
505	lr_error (ldfile, _("\
506	%s: `forward' and `backward' are mutually excluding each other"),
507	"LC_COLLATE");
508	warned = `1`;
509	}
510	}
511	else if (rules[cnt] & sort_backward)
512	{
513	if (! warned)
514	{
515	lr_error (ldfile, _("\
516	%s: `%s' mentioned more than once in definition of weight %d"),
517	"LC_COLLATE", "backward", cnt + `1`);
518	}
519	}
520	else
521	rules[cnt] \|= sort_backward;
522
523	valid = `1`;
524	}
525	else if (arg->tok == tok_position)
526	{
527	if (rules[cnt] & sort_position)
528	{
529	if (! warned)
530	{
531	lr_error (ldfile, _("\
532	%s: `%s' mentioned more than once in definition of weight %d"),
533	"LC_COLLATE", "position", cnt + `1`);
534	}
535	}
536	else
537	rules[cnt] \|= sort_position;
538
539	valid = `1`;
540	}
541
542	if (valid)
543	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
544
545	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
546	\|\| arg->tok == tok_semicolon)
547	{
548	if (! valid && ! warned)
549	{
550	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
551	warned = `1`;
552	}
553
554	/ See whether we have to increment the counter. /
555	if (arg->tok != tok_comma && rules[cnt] != `0`)
556	{
557	/ Add the default `forward' if we have seen only `position'. /
558	if (rules[cnt] == sort_position)
559	rules[cnt] = sort_position \| sort_forward;
560
561	++cnt;
562	}
563
564	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
565	/ End of line or file, so we exit the loop. /
566	break;
567
568	if (nrules == `0`)
569	{
570	/ See whether we have enough room in the array. /
571	if (cnt == max)
572	{
573	max += `10`;
574	rules = (enum coll_sort_rule *) xrealloc (rules,
575	max
576	* sizeof (*rules));
577	memset (&rules[cnt], `'\0'`, (max - cnt) * sizeof (*rules));
578	}
579	}
580	else
581	{
582	if (cnt == nrules)
583	{
584	/ There must not be any more rule. /
585	if (! warned)
586	{
587	lr_error (ldfile, _("\
588	%s: too many rules; first entry only had %d"),
589	"LC_COLLATE", nrules);
590	warned = `1`;
591	}
592
593	lr_ignore_rest (ldfile, `0`);
594	break;
595	}
596	}
597	}
598	else
599	{
600	if (! warned)
601	{
602	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
603	warned = `1`;
604	}
605	}
606
607	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
608	}
609
610	if (nrules == `0`)
611	{
612	/ Now we know how many rules we have. /
613	nrules = cnt;
614	rules = (enum coll_sort_rule *) xrealloc (rules,
615	nrules * sizeof (*rules));
616	}
617	else
618	{
619	if (cnt < nrules)
620	{
621	/ Not enough rules in this specification. /
622	if (! warned)
623	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
624
625	do
626	rules[cnt] = sort_forward;
627	while (++cnt < nrules);
628	}
629	}
630
631	collate->current_section->rules = rules;
632	}
633
634
635	static struct element_t *
636	find_element (struct linereader ldfile, struct* locale_collate_t *collate,
637	const char *str, size_t len)
638	{
639	void *result = NULL;
640
641	/ Search for the entries among the collation sequences already define. /
642	if (find_entry (&collate->seq_table, str, len, &result) != `0`)
643	{
644	/ Nope, not define yet. So we see whether it is a*
645	collation symbol. /*
646	void *ptr;
647
648	if (find_entry (&collate->sym_table, str, len, &ptr) == `0`)
649	{
650	/ It's a collation symbol. /
651	struct symbol_t sym = (struct* symbol_t *) ptr;
652	result = sym->order;
653
654	if (result == NULL)
655	result = sym->order = new_element (collate, NULL, `0`, NULL,
656	NULL, `0`, `0`);
657	}
658	else if (find_entry (&collate->elem_table, str, len, &result) != `0`)
659	{
660	/ It's also no collation element. So it is a character*
661	element defined later. /*
662	result = new_element (collate, NULL, `0`, NULL, str, len, `1`);
663	/ Insert it into the sequence table. /
664	insert_entry (&collate->seq_table, str, len, result);
665	}
666	}
667
668	return (struct element_t *) result;
669	}
670
671
672	static void
673	unlink_element (struct locale_collate_t *collate)
674	{
675	if (collate->cursor == collate->start)
676	{
677	assert (collate->cursor->next == NULL);
678	assert (collate->cursor->last == NULL);
679	collate->cursor = NULL;
680	}
681	else
682	{
683	if (collate->cursor->next != NULL)
684	collate->cursor->next->last = collate->cursor->last;
685	if (collate->cursor->last != NULL)
686	collate->cursor->last->next = collate->cursor->next;
687	collate->cursor = collate->cursor->last;
688	}
689	}
690
691
692	static void
693	insert_weights (struct linereader ldfile, struct* element_t *elem,
694	const struct charmap_t *charmap,
695	struct repertoire_t repertoire, struct* localedef_t *result,
696	enum token_t ellipsis)
697	{
698	int weight_cnt;
699	struct token *arg;
700	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
701
702	/ Initialize all the fields. /
703	elem->file = ldfile->fname;
704	elem->line = ldfile->lineno;
705
706	elem->last = collate->cursor;
707	elem->next = collate->cursor ? collate->cursor->next : NULL;
708	if (collate->cursor != NULL && collate->cursor->next != NULL)
709	collate->cursor->next->last = elem;
710	if (collate->cursor != NULL)
711	collate->cursor->next = elem;
712	if (collate->start == NULL)
713	{
714	assert (collate->cursor == NULL);
715	collate->start = elem;
716	}
717
718	elem->section = collate->current_section;
719
720	if (collate->current_section->first == NULL)
721	collate->current_section->first = elem;
722	if (collate->current_section->last == collate->cursor)
723	collate->current_section->last = elem;
724
725	collate->cursor = elem;
726
727	elem->weights = (struct element_list_t *)
728	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
729	memset (elem->weights, `'\0'`, nrules * sizeof (struct element_list_t));
730
731	weight_cnt = `0`;
732
733	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
734	do
735	{
736	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
737	break;
738
739	if (arg->tok == tok_ignore)
740	{
741	/ The weight for this level has to be ignored. We use the*
742	null pointer to indicate this. /*
743	elem->weights[weight_cnt].w = (struct element_t **)
744	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
745	elem->weights[weight_cnt].w[`0`] = NULL;
746	elem->weights[weight_cnt].cnt = `1`;
747	}
748	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
749	{
750	char ucs4str[`10`];
751	struct element_t *val;
752	char *symstr;
753	size_t symlen;
754
755	if (arg->tok == tok_bsymbol)
756	{
757	symstr = arg->val.str.startmb;
758	symlen = arg->val.str.lenmb;
759	}
760	else
761	{
762	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
763	symstr = ucs4str;
764	symlen = `9`;
765	}
766
767	val = find_element (ldfile, collate, symstr, symlen);
768	if (val == NULL)
769	break;
770
771	elem->weights[weight_cnt].w = (struct element_t **)
772	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
773	elem->weights[weight_cnt].w[`0`] = val;
774	elem->weights[weight_cnt].cnt = `1`;
775	}
776	else if (arg->tok == tok_string)
777	{
778	/ Split the string up in the individual characters and put*
779	the element definitions in the list. /*
780	const char *cp = arg->val.str.startmb;
781	int cnt = `0`;
782	struct element_t *charelem;
783	struct element_t **weights = NULL;
784	int max = `0`;
785
786	if (*cp == `'\0'`)
787	{
788	lr_error (ldfile, _("%s: empty weight string not allowed"),
789	"LC_COLLATE");
790	lr_ignore_rest (ldfile, `0`);
791	break;
792	}
793
794	do
795	{
796	if (*cp == `'<'`)
797	{
798	/ Ahh, it's a bsymbol or an UCS4 value. If it's*
799	the latter we have to unify the name. /*
800	const char *startp = ++cp;
801	size_t len;
802
803	while (*cp != `'>'`)
804	{
805	if (*cp == ldfile->escape_char)
806	++cp;
807	if (*cp == `'\0'`)
808	/ It's a syntax error. /
809	goto syntax;
810
811	++cp;
812	}
813
814	if (cp - startp == `5` && startp[`0`] == `'U'`
815	&& isxdigit (startp[`1`]) && isxdigit (startp[`2`])
816	&& isxdigit (startp[`3`]) && isxdigit (startp[`4`]))
817	{
818	unsigned int ucs4 = strtoul (startp + `1`, NULL, `16`);
819	char *newstr;
820
821	newstr = (char *) xmalloc (`10`);
822	snprintf (newstr, `10`, "U%08X", ucs4);
823	startp = newstr;
824
825	len = `9`;
826	}
827	else
828	len = cp - startp;
829
830	charelem = find_element (ldfile, collate, startp, len);
831	++cp;
832	}
833	else
834	{
835	/ People really shouldn't use characters directly in*
836	the string. Especially since it's not really clear
837	what this means. We interpret all characters in the
838	string as if that would be bsymbols. Otherwise we
839	would have to match back to bsymbols somehow and this
840	is normally not what people normally expect. /*
841	charelem = find_element (ldfile, collate, cp++, `1`);
842	}
843
844	if (charelem == NULL)
845	{
846	/ We ignore the rest of the line. /
847	lr_ignore_rest (ldfile, `0`);
848	break;
849	}
850
851	/ Add the pointer. /
852	if (cnt >= max)
853	{
854	struct element_t **newp;
855	max += `10`;
856	newp = (struct element_t **)
857	alloca (max * sizeof (struct element_t *));
858	memcpy (newp, weights, cnt * sizeof (struct element_t *));
859	weights = newp;
860	}
861	weights[cnt++] = charelem;
862	}
863	while (*cp != `'\0'`);
864
865	/ Now store the information. /
866	elem->weights[weight_cnt].w = (struct element_t **)
867	obstack_alloc (&collate->mempool,
868	cnt * sizeof (struct element_t *));
869	memcpy (elem->weights[weight_cnt].w, weights,
870	cnt * sizeof (struct element_t *));
871	elem->weights[weight_cnt].cnt = cnt;
872
873	/ We don't need the string anymore. /
874	free (arg->val.str.startmb);
875	}
876	else if (ellipsis != tok_none
877	&& (arg->tok == tok_ellipsis2
878	\|\| arg->tok == tok_ellipsis3
879	\|\| arg->tok == tok_ellipsis4))
880	{
881	/ It must be the same ellipsis as used in the initial column. /
882	if (arg->tok != ellipsis)
883	lr_error (ldfile, _("\
884	%s: weights must use the same ellipsis symbol as the name"),
885	"LC_COLLATE");
886
887	/ The weight for this level will depend on the element*
888	iterating over the range. Put a placeholder. /*
889	elem->weights[weight_cnt].w = (struct element_t **)
890	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
891	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
892	elem->weights[weight_cnt].cnt = `1`;
893	}
894	else
895	{
896	syntax:
897	/ It's a syntax error. /
898	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
899	lr_ignore_rest (ldfile, `0`);
900	break;
901	}
902
903	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
904	/ This better should be the end of the line or a semicolon. /
905	if (arg->tok == tok_semicolon)
906	/ OK, ignore this and read the next token. /
907	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
908	else if (arg->tok != tok_eof && arg->tok != tok_eol)
909	{
910	/ It's a syntax error. /
911	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
912	lr_ignore_rest (ldfile, `0`);
913	break;
914	}
915	}
916	while (++weight_cnt < nrules);
917
918	if (weight_cnt < nrules)
919	{
920	/ This means the rest of the line uses the current element as*
921	the weight. /*
922	do
923	{
924	elem->weights[weight_cnt].w = (struct element_t **)
925	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
926	if (ellipsis == tok_none)
927	elem->weights[weight_cnt].w[`0`] = elem;
928	else
929	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
930	elem->weights[weight_cnt].cnt = `1`;
931	}
932	while (++weight_cnt < nrules);
933	}
934	else
935	{
936	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
937	{
938	/ Too many rule values. /
939	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
940	lr_ignore_rest (ldfile, `0`);
941	}
942	else
943	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
944	}
945	}
946
947
948	static int
949	insert_value (struct linereader ldfile, const* char *symstr, size_t symlen,
950	const struct charmap_t charmap, struct* repertoire_t *repertoire,
951	struct localedef_t *result)
952	{
953	/ First find out what kind of symbol this is. /
954	struct charseq *seq;
955	uint32_t wc;
956	struct element_t *elem = NULL;
957	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
958
959	/ Try to find the character in the charmap. /
960	seq = charmap_find_value (charmap, symstr, symlen);
961
962	/ Determine the wide character. /
963	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
964	{
965	wc = repertoire_find_value (repertoire, symstr, symlen);
966	if (seq != NULL)
967	seq->ucs4 = wc;
968	}
969	else
970	wc = seq->ucs4;
971
972	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
973	{
974	/ It's no character, so look through the collation elements and*
975	symbol list. /*
976	void *ptr = elem;
977	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != `0`)
978	{
979	void *result;
980	struct symbol_t *sym = NULL;
981
982	/ It's also collation element. Therefore it's either a*
983	collating symbol or it's a character which is not
984	supported by the character set. In the later case we
985	simply create a dummy entry. /*
986	if (find_entry (&collate->sym_table, symstr, symlen, &result) == `0`)
987	{
988	/ It's a collation symbol. /
989	sym = (struct symbol_t *) result;
990
991	elem = sym->order;
992	}
993
994	if (elem == NULL)
995	{
996	elem = new_element (collate, NULL, `0`, NULL, symstr, symlen, `0`);
997
998	if (sym != NULL)
999	sym->order = elem;
1000	else
1001	/ Enter a fake element in the sequence table. This*
1002	won't cause anything in the output since there is
1003	no multibyte or wide character associated with
1004	it. /*
1005	insert_entry (&collate->seq_table, symstr, symlen, elem);
1006	}
1007	}
1008	else
1009	/ Copy the result back. /
1010	elem = ptr;
1011	}
1012	else
1013	{
1014	/ Otherwise the symbols stands for a character. /
1015	void *ptr = elem;
1016	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != `0`)
1017	{
1018	uint32_t wcs[`2`] = { wc, `0` };
1019
1020	/ We have to allocate an entry. /
1021	elem = new_element (collate,
1022	seq != NULL ? (char *) seq->bytes : NULL,
1023	seq != NULL ? seq->nbytes : `0`,
1024	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1025	symstr, symlen, `1`);
1026
1027	/ And add it to the table. /
1028	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != `0`)
1029	/ This cannot happen. /
1030	assert (! "Internal error");
1031	}
1032	else
1033	{
1034	/ Copy the result back. /
1035	elem = ptr;
1036
1037	/ Maybe the character was used before the definition. In this case*
1038	we have to insert the byte sequences now. /*
1039	if (elem->mbs == NULL && seq != NULL)
1040	{
1041	elem->mbs = obstack_copy0 (&collate->mempool,
1042	seq->bytes, seq->nbytes);
1043	elem->nmbs = seq->nbytes;
1044	}
1045
1046	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1047	{
1048	uint32_t wcs[`2`] = { wc, `0` };
1049
1050	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1051	elem->nwcs = `1`;
1052	}
1053	}
1054	}
1055
1056	/ Test whether this element is not already in the list. /
1057	if (elem->next != NULL \|\| elem == collate->cursor)
1058	{
1059	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1060	(int) symlen, symstr, elem->file, elem->line);
1061	lr_ignore_rest (ldfile, `0`);
1062	return `1`;
1063	}
1064
1065	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1066
1067	return `0`;
1068	}
1069
1070
1071	static void
1072	handle_ellipsis (struct linereader ldfile, const* char *symstr, size_t symlen,
1073	enum token_t ellipsis, const struct charmap_t *charmap,
1074	struct repertoire_t *repertoire,
1075	struct localedef_t *result)
1076	{
1077	struct element_t *startp;
1078	struct element_t *endp;
1079	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1080
1081	/ Unlink the entry added for the ellipsis. /
1082	unlink_element (collate);
1083	startp = collate->cursor;
1084
1085	/ Process and add the end-entry. /
1086	if (symstr != NULL
1087	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1088	/ Something went wrong with inserting the to-value. This means*
1089	we cannot process the ellipsis. /*
1090	return;
1091
1092	/ Reset the cursor. /
1093	collate->cursor = startp;
1094
1095	/ Now we have to handle many different situations:*
1096	- we have to distinguish between the three different ellipsis forms
1097	- the is the ellipsis at the beginning, in the middle, or at the end.
1098	*/
1099	endp = collate->cursor->next;
1100	assert (symstr == NULL \|\| endp != NULL);
1101
1102	/ XXX The following is probably very wrong since also collating symbols*
1103	can appear in ranges. But do we want/can refine the test for that? /*
1104	#if 0
1105	/ Both, the start and the end symbol, must stand for characters. /
1106	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
1107	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
1108	{
1109	lr_error (ldfile, _("\
1110	%s: the start and the end symbol of a range must stand for characters"),
1111	"LC_COLLATE");
1112	return;
1113	}
1114	#endif
1115
1116	if (ellipsis == tok_ellipsis3)
1117	{
1118	/ One requirement we make here: the length of the byte*
1119	sequences for the first and end character must be the same.
1120	This is mainly to prevent unwanted effects and this is often
1121	not what is wanted. /*
1122	size_t len = (startp->mbs != NULL ? startp->nmbs
1123	: (endp->mbs != NULL ? endp->nmbs : `0`));
1124	char mbcnt[len + `1`];
1125	char mbend[len + `1`];
1126
1127	/ Well, this should be caught somewhere else already. Just to*
1128	make sure. /*
1129	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[`1`] == `0`);
1130	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[`1`] == `0`);
1131
1132	if (startp != NULL && endp != NULL
1133	&& startp->mbs != NULL && endp->mbs != NULL
1134	&& startp->nmbs != endp->nmbs)
1135	{
1136	lr_error (ldfile, _("\
1137	%s: byte sequences of first and last character must have the same length"),
1138	"LC_COLLATE");
1139	return;
1140	}
1141
1142	/ Determine whether we have to generate multibyte sequences. /
1143	if ((startp == NULL \|\| startp->mbs != NULL)
1144	&& (endp == NULL \|\| endp->mbs != NULL))
1145	{
1146	int cnt;
1147	int ret;
1148
1149	/ Prepare the beginning byte sequence. This is either from the*
1150	beginning byte sequence or it is all nulls if it was an
1151	initial ellipsis. /*
1152	if (startp == NULL \|\| startp->mbs == NULL)
1153	memset (mbcnt, `'\0'`, len);
1154	else
1155	{
1156	memcpy (mbcnt, startp->mbs, len);
1157
1158	/ And increment it so that the value is the first one we will*
1159	try to insert. /*
1160	for (cnt = len - `1`; cnt >= `0`; --cnt)
1161	if (++mbcnt[cnt] != `'\0'`)
1162	break;
1163	}
1164	mbcnt[len] = `'\0'`;
1165
1166	/ And the end sequence. /
1167	if (endp == NULL \|\| endp->mbs == NULL)
1168	memset (mbend, `'\0'`, len);
1169	else
1170	memcpy (mbend, endp->mbs, len);
1171	mbend[len] = `'\0'`;
1172
1173	/ Test whether we have a correct range. /
1174	ret = memcmp (mbcnt, mbend, len);
1175	if (ret >= `0`)
1176	{
1177	if (ret > `0`)
1178	lr_error (ldfile, _("%s: byte sequence of first character of \
1179	range is not lower than that of the last character"), "LC_COLLATE");
1180	return;
1181	}
1182
1183	/ Generate the byte sequences data. /
1184	while (`1`)
1185	{
1186	struct charseq *seq;
1187
1188	/ Quite a bit of work ahead. We have to find the character*
1189	definition for the byte sequence and then determine the
1190	wide character belonging to it. /*
1191	seq = charmap_find_symbol (charmap, mbcnt, len);
1192	if (seq != NULL)
1193	{
1194	struct element_t *elem;
1195	size_t namelen;
1196
1197	/ I don't think this can ever happen. /
1198	assert (seq->name != NULL);
1199	namelen = strlen (seq->name);
1200
1201	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1202	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1203	namelen);
1204
1205	/ Now we are ready to insert the new value in the*
1206	sequence. Find out whether the element is
1207	already known. /*
1208	void *ptr;
1209	if (find_entry (&collate->seq_table, seq->name, namelen,
1210	&ptr) != `0`)
1211	{
1212	uint32_t wcs[`2`] = { seq->ucs4, `0` };
1213
1214	/ We have to allocate an entry. /
1215	elem = new_element (collate, mbcnt, len,
1216	seq->ucs4 == ILLEGAL_CHAR_VALUE
1217	? NULL : wcs, seq->name,
1218	namelen, `1`);
1219
1220	/ And add it to the table. /
1221	if (insert_entry (&collate->seq_table, seq->name,
1222	namelen, elem) != `0`)
1223	/ This cannot happen. /
1224	assert (! "Internal error");
1225	}
1226	else
1227	/ Copy the result. /
1228	elem = ptr;
1229
1230	/ Test whether this element is not already in the list. /
1231	if (elem->next != NULL \|\| (collate->cursor != NULL
1232	&& elem->next == collate->cursor))
1233	{
1234	lr_error (ldfile, _("\
1235	order for `%.*s' already defined at %s:%Zu"),
1236	(int) namelen, seq->name,
1237	elem->file, elem->line);
1238	goto increment;
1239	}
1240
1241	/ Enqueue the new element. /
1242	elem->last = collate->cursor;
1243	if (collate->cursor == NULL)
1244	elem->next = NULL;
1245	else
1246	{
1247	elem->next = collate->cursor->next;
1248	elem->last->next = elem;
1249	if (elem->next != NULL)
1250	elem->next->last = elem;
1251	}
1252	if (collate->start == NULL)
1253	{
1254	assert (collate->cursor == NULL);
1255	collate->start = elem;
1256	}
1257	collate->cursor = elem;
1258
1259	/ Add the weight value. We take them from the*
1260	`ellipsis_weights' member of `collate'. /*
1261	elem->weights = (struct element_list_t *)
1262	obstack_alloc (&collate->mempool,
1263	nrules * sizeof (struct element_list_t));
1264	for (cnt = `0`; cnt < nrules; ++cnt)
1265	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1266	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1267	== ELEMENT_ELLIPSIS2))
1268	{
1269	elem->weights[cnt].w = (struct element_t **)
1270	obstack_alloc (&collate->mempool,
1271	sizeof (struct element_t *));
1272	elem->weights[cnt].w[`0`] = elem;
1273	elem->weights[cnt].cnt = `1`;
1274	}
1275	else
1276	{
1277	/ Simply use the weight from `ellipsis_weight'. /
1278	elem->weights[cnt].w =
1279	collate->ellipsis_weight.weights[cnt].w;
1280	elem->weights[cnt].cnt =
1281	collate->ellipsis_weight.weights[cnt].cnt;
1282	}
1283	}
1284
1285	/ Increment for the next round. /
1286	increment:
1287	for (cnt = len - `1`; cnt >= `0`; --cnt)
1288	if (++mbcnt[cnt] != `'\0'`)
1289	break;
1290
1291	/ Find out whether this was all. /
1292	if (cnt < `0` \|\| memcmp (mbcnt, mbend, len) >= `0`)
1293	/ Yep, that's all. /
1294	break;
1295	}
1296	}
1297	}
1298	else
1299	{
1300	/ For symbolic range we naturally must have a beginning and an*
1301	end specified by the user. /*
1302	if (startp == NULL)
1303	lr_error (ldfile, _("\
1304	%s: symbolic range ellipsis must not directly follow `order_start'"),
1305	"LC_COLLATE");
1306	else if (endp == NULL)
1307	lr_error (ldfile, _("\
1308	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1309	"LC_COLLATE");
1310	else
1311	{
1312	/ Determine the range. To do so we have to determine the*
1313	common prefix of the both names and then the numeric
1314	values of both ends. /*
1315	size_t lenfrom = strlen (startp->name);
1316	size_t lento = strlen (endp->name);
1317	char buf[lento + `1`];
1318	int preflen = `0`;
1319	long int from;
1320	long int to;
1321	char *cp;
1322	int base = ellipsis == tok_ellipsis2 ? `16` : `10`;
1323
1324	if (lenfrom != lento)
1325	{
1326	invalid_range:
1327	lr_error (ldfile, _("\
1328	`%s' and `%.*s' are not valid names for symbolic range"),
1329	startp->name, (int) lento, endp->name);
1330	return;
1331	}
1332
1333	while (startp->name[preflen] == endp->name[preflen])
1334	if (startp->name[preflen] == `'\0'`)
1335	/ Nothing to be done. The start and end point are identical*
1336	and while inserting the end point we have already given
1337	the user an error message. /*
1338	return;
1339	else
1340	++preflen;
1341
1342	errno = `0`;
1343	from = strtol (startp->name + preflen, &cp, base);
1344	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1345	goto invalid_range;
1346
1347	errno = `0`;
1348	to = strtol (endp->name + preflen, &cp, base);
1349	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1350	goto invalid_range;
1351
1352	/ Copy the prefix. /
1353	memcpy (buf, startp->name, preflen);
1354
1355	/ Loop over all values. /
1356	for (++from; from < to; ++from)
1357	{
1358	struct element_t *elem = NULL;
1359	struct charseq *seq;
1360	uint32_t wc;
1361	int cnt;
1362
1363	/ Generate the name. /
1364	sprintf (buf + preflen, base == `10` ? "%0ld" : "%0lX",
1365	(int) (lenfrom - preflen), from);
1366
1367	/ Look whether this name is already defined. /
1368	void *ptr;
1369	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == `0`)
1370	{
1371	/ Copy back the result. /
1372	elem = ptr;
1373
1374	if (elem->next != NULL \|\| (collate->cursor != NULL
1375	&& elem->next == collate->cursor))
1376	{
1377	lr_error (ldfile, _("\
1378	%s: order for `%.*s' already defined at %s:%Zu"),
1379	"LC_COLLATE", (int) lenfrom, buf,
1380	elem->file, elem->line);
1381	continue;
1382	}
1383
1384	if (elem->name == NULL)
1385	{
1386	lr_error (ldfile, _("%s: `%s' must be a character"),
1387	"LC_COLLATE", buf);
1388	continue;
1389	}
1390	}
1391
1392	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
1393	{
1394	/ Search for a character of this name. /
1395	seq = charmap_find_value (charmap, buf, lenfrom);
1396	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1397	{
1398	wc = repertoire_find_value (repertoire, buf, lenfrom);
1399
1400	if (seq != NULL)
1401	seq->ucs4 = wc;
1402	}
1403	else
1404	wc = seq->ucs4;
1405
1406	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1407	/ We don't know anything about a character with this*
1408	name. XXX Should we warn? /*
1409	continue;
1410
1411	if (elem == NULL)
1412	{
1413	uint32_t wcs[`2`] = { wc, `0` };
1414
1415	/ We have to allocate an entry. /
1416	elem = new_element (collate,
1417	seq != NULL
1418	? (char *) seq->bytes : NULL,
1419	seq != NULL ? seq->nbytes : `0`,
1420	wc == ILLEGAL_CHAR_VALUE
1421	? NULL : wcs, buf, lenfrom, `1`);
1422	}
1423	else
1424	{
1425	/ Update the element. /
1426	if (seq != NULL)
1427	{
1428	elem->mbs = obstack_copy0 (&collate->mempool,
1429	seq->bytes, seq->nbytes);
1430	elem->nmbs = seq->nbytes;
1431	}
1432
1433	if (wc != ILLEGAL_CHAR_VALUE)
1434	{
1435	uint32_t zero = `0`;
1436
1437	obstack_grow (&collate->mempool,
1438	&wc, sizeof (uint32_t));
1439	obstack_grow (&collate->mempool,
1440	&zero, sizeof (uint32_t));
1441	elem->wcs = obstack_finish (&collate->mempool);
1442	elem->nwcs = `1`;
1443	}
1444	}
1445
1446	elem->file = ldfile->fname;
1447	elem->line = ldfile->lineno;
1448	elem->section = collate->current_section;
1449	}
1450
1451	/ Enqueue the new element. /
1452	elem->last = collate->cursor;
1453	elem->next = collate->cursor->next;
1454	elem->last->next = elem;
1455	if (elem->next != NULL)
1456	elem->next->last = elem;
1457	collate->cursor = elem;
1458
1459	/ Now add the weights. They come from the `ellipsis_weights'*
1460	member of `collate'. /*
1461	elem->weights = (struct element_list_t *)
1462	obstack_alloc (&collate->mempool,
1463	nrules * sizeof (struct element_list_t));
1464	for (cnt = `0`; cnt < nrules; ++cnt)
1465	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1466	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1467	== ELEMENT_ELLIPSIS2))
1468	{
1469	elem->weights[cnt].w = (struct element_t **)
1470	obstack_alloc (&collate->mempool,
1471	sizeof (struct element_t *));
1472	elem->weights[cnt].w[`0`] = elem;
1473	elem->weights[cnt].cnt = `1`;
1474	}
1475	else
1476	{
1477	/ Simly use the weight from `ellipsis_weight'. /
1478	elem->weights[cnt].w =
1479	collate->ellipsis_weight.weights[cnt].w;
1480	elem->weights[cnt].cnt =
1481	collate->ellipsis_weight.weights[cnt].cnt;
1482	}
1483	}
1484	}
1485	}
1486	}
1487
1488
1489	static void
1490	collate_startup (struct linereader ldfile, struct* localedef_t *locale,
1491	struct localedef_t copy_locale, int* ignore_content)
1492	{
1493	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1494	{
1495	struct locale_collate_t *collate;
1496
1497	if (copy_locale == NULL)
1498	{
1499	collate = locale->categories[LC_COLLATE].collate =
1500	(struct locale_collate_t *)
1501	xcalloc (`1`, sizeof (struct locale_collate_t));
1502
1503	/ Init the various data structures. /
1504	init_hash (&collate->elem_table, `100`);
1505	init_hash (&collate->sym_table, `100`);
1506	init_hash (&collate->seq_table, `500`);
1507	obstack_init (&collate->mempool);
1508
1509	collate->col_weight_max = -`1`;
1510	}
1511	else
1512	/ Reuse the copy_locale's data structures. /
1513	collate = locale->categories[LC_COLLATE].collate =
1514	copy_locale->categories[LC_COLLATE].collate;
1515	}
1516
1517	ldfile->translate_strings = `0`;
1518	ldfile->return_widestr = `0`;
1519	}
1520
1521
1522	void
1523	collate_finish (struct localedef_t locale, const* struct charmap_t *charmap)
1524	{
1525	/ Now is the time when we can assign the individual collation*
1526	values for all the symbols. We have possibly different values
1527	for the wide- and the multibyte-character symbols. This is done
1528	since it might make a difference in the encoding if there is in
1529	some cases no multibyte-character but there are wide-characters.
1530	(The other way around it is not important since theencoded
1531	collation value in the wide-character case is 32 bits wide and
1532	therefore requires no encoding).
1533
1534	The lowest collation value assigned is 2. Zero is reserved for
1535	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1536	functions and 1 is used to separate the individual passes for the
1537	different rules.
1538
1539	We also have to construct is list with all the bytes/words which
1540	can come first in a sequence, followed by all the elements which
1541	also start with this byte/word. The order is reverse which has
1542	among others the important effect that longer strings are located
1543	first in the list. This is required for the output data since
1544	the algorithm used in `strcoll' etc depends on this.
1545
1546	The multibyte case is easy. We simply sort into an array with
1547	256 elements. /*
1548	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1549	int mbact[nrules];
1550	int wcact;
1551	int mbseqact;
1552	int wcseqact;
1553	struct element_t *runp;
1554	int i;
1555	int need_undefined = `0`;
1556	struct section_list *sect;
1557	int ruleidx;
1558	int nr_wide_elems = `0`;
1559
1560	if (collate == NULL)
1561	{
1562	/ No data, no check. Issue a warning. /
1563	record_warning (_("No definition for %s category found"),
1564	"LC_COLLATE");
1565	return;
1566	}
1567
1568	/ If this assertion is hit change the type in `element_t'. /
1569	assert (nrules <= sizeof (runp->used_in_level) * `8`);
1570
1571	/ Make sure that the `position' rule is used either in all sections*
1572	or in none. /*
1573	for (i = `0`; i < nrules; ++i)
1574	for (sect = collate->sections; sect != NULL; sect = sect->next)
1575	if (sect != collate->current_section
1576	&& sect->rules != NULL
1577	&& ((sect->rules[i] & sort_position)
1578	!= (collate->current_section->rules[i] & sort_position)))
1579	{
1580	record_error (`0`, `0`, _("\
1581	%s: `position' must be used for a specific level in all sections or none"),
1582	"LC_COLLATE");
1583	break;
1584	}
1585
1586	/ Find out which elements are used at which level. At the same*
1587	time we find out whether we have any undefined symbols. /*
1588	runp = collate->start;
1589	while (runp != NULL)
1590	{
1591	if (runp->mbs != NULL)
1592	{
1593	for (i = `0`; i < nrules; ++i)
1594	{
1595	int j;
1596
1597	for (j = `0`; j < runp->weights[i].cnt; ++j)
1598	/ A NULL pointer as the weight means IGNORE. /
1599	if (runp->weights[i].w[j] != NULL)
1600	{
1601	if (runp->weights[i].w[j]->weights == NULL)
1602	{
1603	record_error_at_line (`0`, `0`, runp->file, runp->line,
1604	_("symbol `%s' not defined"),
1605	runp->weights[i].w[j]->name);
1606
1607	need_undefined = `1`;
1608	runp->weights[i].w[j] = &collate->undefined;
1609	}
1610	else
1611	/ Set the bit for the level. /
1612	runp->weights[i].w[j]->used_in_level \|= `1` << i;
1613	}
1614	}
1615	}
1616
1617	/ Up to the next entry. /
1618	runp = runp->next;
1619	}
1620
1621	/ Walk through the list of defined sequences and assign weights. Also*
1622	create the data structure which will allow generating the single byte
1623	character based tables.
1624
1625	Since at each time only the weights for each of the rules are
1626	only compared to other weights for this rule it is possible to
1627	assign more compact weight values than simply counting all
1628	weights in sequence. We can assign weights from 3, one for each
1629	rule individually and only for those elements, which are actually
1630	used for this rule.
1631
1632	Why is this important? It is not for the wide char table. But
1633	it is for the singlebyte output since here larger numbers have to
1634	be encoded to make it possible to emit the value as a byte
1635	string. /*
1636	for (i = `0`; i < nrules; ++i)
1637	mbact[i] = `2`;
1638	wcact = `2`;
1639	mbseqact = `0`;
1640	wcseqact = `0`;
1641	runp = collate->start;
1642	while (runp != NULL)
1643	{
1644	/ Determine the order. /
1645	if (runp->used_in_level != `0`)
1646	{
1647	runp->mborder = (int *) obstack_alloc (&collate->mempool,
1648	nrules * sizeof (int));
1649
1650	for (i = `0`; i < nrules; ++i)
1651	if ((runp->used_in_level & (`1` << i)) != `0`)
1652	runp->mborder[i] = mbact[i]++;
1653	else
1654	runp->mborder[i] = `0`;
1655	}
1656
1657	if (runp->mbs != NULL)
1658	{
1659	struct element_t **eptr;
1660	struct element_t *lastp = NULL;
1661
1662	/ Find the point where to insert in the list. /
1663	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[`0`]];
1664	while (*eptr != NULL)
1665	{
1666	if ((*eptr)->nmbs < runp->nmbs)
1667	break;
1668
1669	if ((*eptr)->nmbs == runp->nmbs)
1670	{
1671	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1672
1673	if (c == `0`)
1674	{
1675	/ This should not happen. It means that we have*
1676	to symbols with the same byte sequence. It is
1677	of course an error. /*
1678	record_error_at_line (`0`, `0`, (*eptr)->file,
1679	(*eptr)->line,
1680	_("\
1681	symbol `%s' has the same encoding as"), (*eptr)->name);
1682
1683	record_error_at_line (`0`, `0`, runp->file, runp->line,
1684	_("symbol `%s'"), runp->name);
1685	goto dont_insert;
1686	}
1687	else if (c < `0`)
1688	/ Insert it here. /
1689	break;
1690	}
1691
1692	/ To the next entry. /
1693	lastp = *eptr;
1694	eptr = &(*eptr)->mbnext;
1695	}
1696
1697	/ Set the pointers. /
1698	runp->mbnext = *eptr;
1699	runp->mblast = lastp;
1700	if (*eptr != NULL)
1701	(*eptr)->mblast = runp;
1702	*eptr = runp;
1703	dont_insert:
1704	;
1705	}
1706
1707	if (runp->used_in_level)
1708	{
1709	runp->wcorder = wcact++;
1710
1711	/ We take the opportunity to count the elements which have*
1712	wide characters. /*
1713	++nr_wide_elems;
1714	}
1715
1716	if (runp->is_character)
1717	{
1718	if (runp->nmbs == `1`)
1719	collate->mbseqorder[((unsigned char *) runp->mbs)[`0`]] = mbseqact++;
1720
1721	runp->wcseqorder = wcseqact++;
1722	}
1723	else if (runp->mbs != NULL && runp->weights != NULL)
1724	/ This is for collation elements. /
1725	runp->wcseqorder = wcseqact++;
1726
1727	/ Up to the next entry. /
1728	runp = runp->next;
1729	}
1730
1731	/ Find out whether any of the `mbheads' entries is unset. In this*
1732	case we use the UNDEFINED entry. /*
1733	for (i = `1`; i < `256`; ++i)
1734	if (collate->mbheads[i] == NULL)
1735	{
1736	need_undefined = `1`;
1737	collate->mbheads[i] = &collate->undefined;
1738	}
1739
1740	/ Now to the wide character case. /
1741	collate->wcheads.p = `6`;
1742	collate->wcheads.q = `10`;
1743	wchead_table_init (&collate->wcheads);
1744
1745	collate->wcseqorder.p = `6`;
1746	collate->wcseqorder.q = `10`;
1747	collseq_table_init (&collate->wcseqorder);
1748
1749	/ Start adding. /
1750	runp = collate->start;
1751	while (runp != NULL)
1752	{
1753	if (runp->wcs != NULL)
1754	{
1755	struct element_t *e;
1756	struct element_t **eptr;
1757	struct element_t *lastp;
1758
1759	/ Insert the collation sequence value. /
1760	if (runp->is_character)
1761	collseq_table_add (&collate->wcseqorder, runp->wcs[`0`],
1762	runp->wcseqorder);
1763
1764	/ Find the point where to insert in the list. /
1765	e = wchead_table_get (&collate->wcheads, runp->wcs[`0`]);
1766	eptr = &e;
1767	lastp = NULL;
1768	while (*eptr != NULL)
1769	{
1770	if ((*eptr)->nwcs < runp->nwcs)
1771	break;
1772
1773	if ((*eptr)->nwcs == runp->nwcs)
1774	{
1775	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
1776	(wchar_t *) runp->wcs, runp->nwcs);
1777
1778	if (c == `0`)
1779	{
1780	/ This should not happen. It means that we have*
1781	two symbols with the same byte sequence. It is
1782	of course an error. /*
1783	record_error_at_line (`0`, `0`, (*eptr)->file,
1784	(*eptr)->line,
1785	_("\
1786	symbol `%s' has the same encoding as"), (*eptr)->name);
1787
1788	record_error_at_line (`0`, `0`, runp->file, runp->line,
1789	_("symbol `%s'"), runp->name);
1790	goto dont_insertwc;
1791	}
1792	else if (c < `0`)
1793	/ Insert it here. /
1794	break;
1795	}
1796
1797	/ To the next entry. /
1798	lastp = *eptr;
1799	eptr = &(*eptr)->wcnext;
1800	}
1801
1802	/ Set the pointers. /
1803	runp->wcnext = *eptr;
1804	runp->wclast = lastp;
1805	if (*eptr != NULL)
1806	(*eptr)->wclast = runp;
1807	*eptr = runp;
1808	if (eptr == &e)
1809	wchead_table_add (&collate->wcheads, runp->wcs[`0`], e);
1810	dont_insertwc:
1811	;
1812	}
1813
1814	/ Up to the next entry. /
1815	runp = runp->next;
1816	}
1817
1818	/ Now determine whether the UNDEFINED entry is needed and if yes,*
1819	whether it was defined. /*
1820	collate->undefined.used_in_level = need_undefined ? ~`0ul` : `0`;
1821	if (collate->undefined.file == NULL)
1822	{
1823	if (need_undefined)
1824	{
1825	/ This seems not to be enforced by recent standards. Don't*
1826	emit an error, simply append UNDEFINED at the end. /*
1827	collate->undefined.mborder =
1828	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
1829
1830	for (i = `0`; i < nrules; ++i)
1831	collate->undefined.mborder[i] = mbact[i]++;
1832	}
1833
1834	/ In any case we will need the definition for the wide character*
1835	case. But we will not complain that it is missing since the
1836	specification strangely enough does not seem to account for
1837	this. /*
1838	collate->undefined.wcorder = wcact++;
1839	}
1840
1841	/ Finally, try to unify the rules for the sections. Whenever the rules*
1842	for a section are the same as those for another section give the
1843	ruleset the same index. Since there are never many section we can
1844	use an O(n^2) algorithm here. /*
1845	sect = collate->sections;
1846	while (sect != NULL && sect->rules == NULL)
1847	sect = sect->next;
1848
1849	/ Bail out if we have no sections because of earlier errors. /
1850	if (sect == NULL)
1851	{
1852	record_error (EXIT_FAILURE, `0`, _("too many errors; giving up"));
1853	return;
1854	}
1855
1856	ruleidx = `0`;
1857	do
1858	{
1859	struct section_list *osect = collate->sections;
1860
1861	while (osect != sect)
1862	if (osect->rules != NULL
1863	&& memcmp (osect->rules, sect->rules,
1864	nrules * sizeof (osect->rules[`0`])) == `0`)
1865	break;
1866	else
1867	osect = osect->next;
1868
1869	if (osect == sect)
1870	sect->ruleidx = ruleidx++;
1871	else
1872	sect->ruleidx = osect->ruleidx;
1873
1874	/ Next section. /
1875	do
1876	sect = sect->next;
1877	while (sect != NULL && sect->rules == NULL);
1878	}
1879	while (sect != NULL);
1880	/ We are currently not prepared for more than 128 rulesets. But this*
1881	should never really be a problem. /*
1882	assert (ruleidx <= `128`);
1883	}
1884
1885
1886	static int32_t
1887	output_weight (struct obstack pool, struct* locale_collate_t *collate,
1888	struct element_t *elem)
1889	{
1890	size_t cnt;
1891	int32_t retval;
1892
1893	/ Optimize the use of UNDEFINED. /
1894	if (elem == &collate->undefined)
1895	/ The weights are already inserted. /
1896	return `0`;
1897
1898	/ This byte can start exactly one collation element and this is*
1899	a single byte. We can directly give the index to the weights. /*
1900	retval = obstack_object_size (pool);
1901
1902	/ Construct the weight. /
1903	for (cnt = `0`; cnt < nrules; ++cnt)
1904	{
1905	char buf[elem->weights[cnt].cnt * `7`];
1906	int len = `0`;
1907	int i;
1908
1909	for (i = `0`; i < elem->weights[cnt].cnt; ++i)
1910	/ Encode the weight value. We do nothing for IGNORE entries. /
1911	if (elem->weights[cnt].w[i] != NULL)
1912	len += utf8_encode (&buf[len],
1913	elem->weights[cnt].w[i]->mborder[cnt]);
1914
1915	/ And add the buffer content. /
1916	obstack_1grow (pool, len);
1917	obstack_grow (pool, buf, len);
1918	}
1919
1920	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1921	}
1922
1923
1924	static int32_t
1925	output_weightwc (struct obstack pool, struct* locale_collate_t *collate,
1926	struct element_t *elem)
1927	{
1928	size_t cnt;
1929	int32_t retval;
1930
1931	/ Optimize the use of UNDEFINED. /
1932	if (elem == &collate->undefined)
1933	/ The weights are already inserted. /
1934	return `0`;
1935
1936	/ This byte can start exactly one collation element and this is*
1937	a single byte. We can directly give the index to the weights. /*
1938	retval = obstack_object_size (pool) / sizeof (int32_t);
1939
1940	/ Construct the weight. /
1941	for (cnt = `0`; cnt < nrules; ++cnt)
1942	{
1943	int32_t buf[elem->weights[cnt].cnt];
1944	int i;
1945	int32_t j;
1946
1947	for (i = `0`, j = `0`; i < elem->weights[cnt].cnt; ++i)
1948	if (elem->weights[cnt].w[i] != NULL)
1949	buf[j++] = elem->weights[cnt].w[i]->wcorder;
1950
1951	/ And add the buffer content. /
1952	obstack_int32_grow (pool, j);
1953
1954	obstack_grow (pool, buf, j * sizeof (int32_t));
1955	maybe_swap_uint32_obstack (pool, j);
1956	}
1957
1958	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1959	}
1960
1961	/ If localedef is every threaded, this would need to be __thread var. /
1962	static struct
1963	{
1964	struct obstack *weightpool;
1965	struct obstack *extrapool;
1966	struct obstack *indpool;
1967	struct locale_collate_t *collate;
1968	struct collidx_table *tablewc;
1969	} atwc;
1970
1971	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1972
1973	static void
1974	add_to_tablewc (uint32_t ch, struct element_t *runp)
1975	{
1976	if (runp->wcnext == NULL && runp->nwcs == `1`)
1977	{
1978	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1979	runp);
1980	collidx_table_add (atwc.tablewc, ch, weigthidx);
1981	}
1982	else
1983	{
1984	/ As for the singlebyte table, we recognize sequences and*
1985	compress them. /*
1986
1987	collidx_table_add (atwc.tablewc, ch,
1988	-(obstack_object_size (atwc.extrapool)
1989	/ sizeof (uint32_t)));
1990
1991	do
1992	{
1993	/ Store the current index in the weight table. We know that*
1994	the current position in the `extrapool' is aligned on a
1995	32-bit address. /*
1996	int32_t weightidx;
1997	int added;
1998
1999	/ Find out wether this is a single entry or we have more than*
2000	one consecutive entry. /*
2001	if (runp->wcnext != NULL
2002	&& runp->nwcs == runp->wcnext->nwcs
2003	&& wmemcmp ((wchar_t *) runp->wcs,
2004	(wchar_t *)runp->wcnext->wcs,
2005	runp->nwcs - `1`) == `0`
2006	&& (runp->wcs[runp->nwcs - `1`]
2007	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`))
2008	{
2009	int i;
2010	struct element_t *series_startp = runp;
2011	struct element_t *curp;
2012
2013	/ Now add first the initial byte sequence. /
2014	added = (`1` + `1` + `2` * (runp->nwcs - `1`)) * sizeof (int32_t);
2015	if (sizeof (int32_t) == sizeof (int))
2016	obstack_make_room (atwc.extrapool, added);
2017
2018	/ More than one consecutive entry. We mark this by having*
2019	a negative index into the indirect table. /*
2020	obstack_int32_grow_fast (atwc.extrapool,
2021	-(obstack_object_size (atwc.indpool)
2022	/ sizeof (int32_t)));
2023	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2024
2025	do
2026	runp = runp->wcnext;
2027	while (runp->wcnext != NULL
2028	&& runp->nwcs == runp->wcnext->nwcs
2029	&& wmemcmp ((wchar_t *) runp->wcs,
2030	(wchar_t *)runp->wcnext->wcs,
2031	runp->nwcs - `1`) == `0`
2032	&& (runp->wcs[runp->nwcs - `1`]
2033	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`));
2034
2035	/ Now walk backward from here to the beginning. /
2036	curp = runp;
2037
2038	for (i = `1`; i < runp->nwcs; ++i)
2039	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2040
2041	/ Now find the end of the consecutive sequence and*
2042	add all the indices in the indirect pool. /*
2043	do
2044	{
2045	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2046	curp);
2047	obstack_int32_grow (atwc.indpool, weightidx);
2048
2049	curp = curp->wclast;
2050	}
2051	while (curp != series_startp);
2052
2053	/ Add the final weight. /
2054	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2055	curp);
2056	obstack_int32_grow (atwc.indpool, weightidx);
2057
2058	/ And add the end byte sequence. Without length this*
2059	time. /*
2060	for (i = `1`; i < curp->nwcs; ++i)
2061	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2062	}
2063	else
2064	{
2065	/ A single entry. Simply add the index and the length and*
2066	string (except for the first character which is already
2067	tested for). /*
2068	int i;
2069
2070	/ Output the weight info. /
2071	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2072	runp);
2073
2074	assert (runp->nwcs > `0`);
2075	added = (`1` + `1` + runp->nwcs - `1`) * sizeof (int32_t);
2076	if (sizeof (int) == sizeof (int32_t))
2077	obstack_make_room (atwc.extrapool, added);
2078
2079	obstack_int32_grow_fast (atwc.extrapool, weightidx);
2080	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2081	for (i = `1`; i < runp->nwcs; ++i)
2082	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2083	}
2084
2085	/ Next entry. /
2086	runp = runp->wcnext;
2087	}
2088	while (runp != NULL);
2089	}
2090	}
2091
2092	void
2093	collate_output (struct localedef_t locale, const* struct charmap_t *charmap,
2094	const char *output_path)
2095	{
2096	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2097	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2098	struct locale_file file;
2099	size_t ch;
2100	int32_t tablemb[`256`];
2101	struct obstack weightpool;
2102	struct obstack extrapool;
2103	struct obstack indirectpool;
2104	struct section_list *sect;
2105	struct collidx_table tablewc;
2106	uint32_t elem_size;
2107	uint32_t *elem_table;
2108	int i;
2109	struct element_t *runp;
2110
2111	init_locale_data (&file, nelems);
2112	add_locale_uint32 (&file, nrules);
2113
2114	/ If we have no LC_COLLATE data emit only the number of rules as zero. /
2115	if (collate == NULL)
2116	{
2117	size_t idx;
2118	for (idx = `1`; idx < nelems; idx++)
2119	{
2120	/ The words have to be handled specially. /
2121	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2122	add_locale_uint32 (&file, `0`);
2123	else
2124	add_locale_empty (&file);
2125	}
2126	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2127	return;
2128	}
2129
2130	obstack_init (&weightpool);
2131	obstack_init (&extrapool);
2132	obstack_init (&indirectpool);
2133
2134	/ Since we are using the sign of an integer to mark indirection the*
2135	offsets in the arrays we are indirectly referring to must not be
2136	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2137	obstack_int32_grow (&extrapool, `0`);
2138	obstack_int32_grow (&indirectpool, `0`);
2139
2140	/ Prepare the ruleset table. /
2141	for (sect = collate->sections, i = `0`; sect != NULL; sect = sect->next)
2142	if (sect->rules != NULL && sect->ruleidx == i)
2143	{
2144	int j;
2145
2146	obstack_make_room (&weightpool, nrules);
2147
2148	for (j = `0`; j < nrules; ++j)
2149	obstack_1grow_fast (&weightpool, sect->rules[j]);
2150	++i;
2151	}
2152	/ And align the output. /
2153	i = (nrules * i) % LOCFILE_ALIGN;
2154	if (i > `0`)
2155	do
2156	obstack_1grow (&weightpool, `'\0'`);
2157	while (++i < LOCFILE_ALIGN);
2158
2159	add_locale_raw_obstack (&file, &weightpool);
2160
2161	/ Generate the 8-bit table. Walk through the lists of sequences*
2162	starting with the same byte and add them one after the other to
2163	the table. In case we have more than one sequence starting with
2164	the same byte we have to use extra indirection.
2165
2166	First add a record for the NUL byte. This entry will never be used
2167	so it does not matter. /*
2168	tablemb[`0`] = `0`;
2169
2170	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2171	will probably be used more than once it is good to store the
2172	weights only once. /*
2173	if (collate->undefined.used_in_level != `0`)
2174	output_weight (&weightpool, collate, &collate->undefined);
2175
2176	for (ch = `1`; ch < `256`; ++ch)
2177	if (collate->mbheads[ch]->mbnext == NULL
2178	&& collate->mbheads[ch]->nmbs <= `1`)
2179	{
2180	tablemb[ch] = output_weight (&weightpool, collate,
2181	collate->mbheads[ch]);
2182	}
2183	else
2184	{
2185	/ The entries in the list are sorted by length and then*
2186	alphabetically. This is the order in which we will add the
2187	elements to the collation table. This allows simply walking
2188	the table in sequence and stopping at the first matching
2189	entry. Since the longer sequences are coming first in the
2190	list they have the possibility to match first, just as it
2191	has to be. In the worst case we are walking to the end of
2192	the list where we put, if no singlebyte sequence is defined
2193	in the locale definition, the weights for UNDEFINED.
2194
2195	To reduce the length of the search list we compress them a bit.
2196	This happens by collecting sequences of consecutive byte
2197	sequences in one entry (having and begin and end byte sequence)
2198	and add only one index into the weight table. We can find the
2199	consecutive entries since they are also consecutive in the list. /*
2200	struct element_t *runp = collate->mbheads[ch];
2201	struct element_t *lastp;
2202
2203	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2204
2205	tablemb[ch] = -obstack_object_size (&extrapool);
2206
2207	do
2208	{
2209	/ Store the current index in the weight table. We know that*
2210	the current position in the `extrapool' is aligned on a
2211	32-bit address. /*
2212	int32_t weightidx;
2213	int added;
2214
2215	/ Find out wether this is a single entry or we have more than*
2216	one consecutive entry. /*
2217	if (runp->mbnext != NULL
2218	&& runp->nmbs == runp->mbnext->nmbs
2219	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - `1`) == `0`
2220	&& (runp->mbs[runp->nmbs - `1`]
2221	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`))
2222	{
2223	int i;
2224	struct element_t *series_startp = runp;
2225	struct element_t *curp;
2226
2227	/ Compute how much space we will need. /
2228	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2229	+ `2` * (runp->nmbs - `1`));
2230	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2231	obstack_make_room (&extrapool, added);
2232
2233	/ More than one consecutive entry. We mark this by having*
2234	a negative index into the indirect table. /*
2235	obstack_int32_grow_fast (&extrapool,
2236	-(obstack_object_size (&indirectpool)
2237	/ sizeof (int32_t)));
2238
2239	/ Now search first the end of the series. /
2240	do
2241	runp = runp->mbnext;
2242	while (runp->mbnext != NULL
2243	&& runp->nmbs == runp->mbnext->nmbs
2244	&& memcmp (runp->mbs, runp->mbnext->mbs,
2245	runp->nmbs - `1`) == `0`
2246	&& (runp->mbs[runp->nmbs - `1`]
2247	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`));
2248
2249	/ Now walk backward from here to the beginning. /
2250	curp = runp;
2251
2252	assert (runp->nmbs <= `256`);
2253	obstack_1grow_fast (&extrapool, curp->nmbs - `1`);
2254	for (i = `1`; i < curp->nmbs; ++i)
2255	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2256
2257	/ Now find the end of the consecutive sequence and*
2258	add all the indices in the indirect pool. /*
2259	do
2260	{
2261	weightidx = output_weight (&weightpool, collate, curp);
2262	obstack_int32_grow (&indirectpool, weightidx);
2263
2264	curp = curp->mblast;
2265	}
2266	while (curp != series_startp);
2267
2268	/ Add the final weight. /
2269	weightidx = output_weight (&weightpool, collate, curp);
2270	obstack_int32_grow (&indirectpool, weightidx);
2271
2272	/ And add the end byte sequence. Without length this*
2273	time. /*
2274	for (i = `1`; i < curp->nmbs; ++i)
2275	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2276	}
2277	else
2278	{
2279	/ A single entry. Simply add the index and the length and*
2280	string (except for the first character which is already
2281	tested for). /*
2282	int i;
2283
2284	/ Output the weight info. /
2285	weightidx = output_weight (&weightpool, collate, runp);
2286
2287	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2288	+ runp->nmbs - `1`);
2289	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2290	obstack_make_room (&extrapool, added);
2291
2292	obstack_int32_grow_fast (&extrapool, weightidx);
2293	assert (runp->nmbs <= `256`);
2294	obstack_1grow_fast (&extrapool, runp->nmbs - `1`);
2295
2296	for (i = `1`; i < runp->nmbs; ++i)
2297	obstack_1grow_fast (&extrapool, runp->mbs[i]);
2298	}
2299
2300	/ Add alignment bytes if necessary. /
2301	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2302	obstack_1grow_fast (&extrapool, `'\0'`);
2303
2304	/ Next entry. /
2305	lastp = runp;
2306	runp = runp->mbnext;
2307	}
2308	while (runp != NULL);
2309
2310	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2311
2312	/ If the final entry in the list is not a single character we*
2313	add an UNDEFINED entry here. /*
2314	if (lastp->nmbs != `1`)
2315	{
2316	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1` + `1`);
2317	obstack_make_room (&extrapool, added);
2318
2319	obstack_int32_grow_fast (&extrapool, `0`);
2320	/ XXX What rule? We just pick the first. /
2321	obstack_1grow_fast (&extrapool, `0`);
2322	/ Length is zero. /
2323	obstack_1grow_fast (&extrapool, `0`);
2324
2325	/ Add alignment bytes if necessary. /
2326	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2327	obstack_1grow_fast (&extrapool, `'\0'`);
2328	}
2329	}
2330
2331	/ Add padding to the tables if necessary. /
2332	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2333	obstack_1grow (&weightpool, `0`);
2334
2335	/ Now add the four tables. /
2336	add_locale_uint32_array (&file, (const uint32_t *) tablemb, `256`);
2337	add_locale_raw_obstack (&file, &weightpool);
2338	add_locale_raw_obstack (&file, &extrapool);
2339	add_locale_raw_obstack (&file, &indirectpool);
2340
2341	/ Now the same for the wide character table. We need to store some*
2342	more information here. /*
2343	add_locale_empty (&file);
2344	add_locale_empty (&file);
2345	add_locale_empty (&file);
2346
2347	/ Since we are using the sign of an integer to mark indirection the*
2348	offsets in the arrays we are indirectly referring to must not be
2349	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2350	obstack_int32_grow (&extrapool, `0`);
2351	obstack_int32_grow (&indirectpool, `0`);
2352
2353	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2354	will probably be used more than once it is good to store the
2355	weights only once. /*
2356	if (output_weightwc (&weightpool, collate, &collate->undefined) != `0`)
2357	abort ();
2358
2359	/ Generate the table. Walk through the lists of sequences starting*
2360	with the same wide character and add them one after the other to
2361	the table. In case we have more than one sequence starting with
2362	the same byte we have to use extra indirection. /*
2363	tablewc.p = `6`;
2364	tablewc.q = `10`;
2365	collidx_table_init (&tablewc);
2366
2367	atwc.weightpool = &weightpool;
2368	atwc.extrapool = &extrapool;
2369	atwc.indpool = &indirectpool;
2370	atwc.collate = collate;
2371	atwc.tablewc = &tablewc;
2372
2373	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2374
2375	memset (&atwc, `0`, sizeof (atwc));
2376
2377	/ Now add the four tables. /
2378	add_locale_collidx_table (&file, &tablewc);
2379	add_locale_raw_obstack (&file, &weightpool);
2380	add_locale_raw_obstack (&file, &extrapool);
2381	add_locale_raw_obstack (&file, &indirectpool);
2382
2383	/ Finally write the table with collation element names out. It is*
2384	a hash table with a simple function which gets the name of the
2385	character as the input. One character might have many names. The
2386	value associated with the name is an index into the weight table
2387	where we are then interested in the first-level weight value.
2388
2389	To determine how large the table should be we are counting the
2390	elements have to put in. Since we are using internal chaining
2391	using a secondary hash function we have to make the table a bit
2392	larger to avoid extremely long search times. We can achieve
2393	good results with a 40% larger table than there are entries. /*
2394	elem_size = `0`;
2395	runp = collate->start;
2396	while (runp != NULL)
2397	{
2398	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2399	/ Yep, the element really counts. /
2400	++elem_size;
2401
2402	runp = runp->next;
2403	}
2404	/ Add 50% and find the next prime number. /
2405	elem_size = next_prime (elem_size + (elem_size >> `1`));
2406
2407	/ Allocate the table. Each entry consists of two words: the hash*
2408	value and an index in a secondary table which provides the index
2409	into the weight table and the string itself (so that a match can
2410	be determined). /*
2411	elem_table = (uint32_t *) obstack_alloc (&extrapool,
2412	elem_size * `2` * sizeof (uint32_t));
2413	memset (elem_table, `'\0'`, elem_size * `2` * sizeof (uint32_t));
2414
2415	/ Now add the elements. /
2416	runp = collate->start;
2417	while (runp != NULL)
2418	{
2419	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2420	{
2421	/ Compute the hash value of the name. /
2422	uint32_t namelen = strlen (runp->name);
2423	uint32_t hash = elem_hash (runp->name, namelen);
2424	size_t idx = hash % elem_size;
2425	#ifndef NDEBUG
2426	size_t start_idx = idx;
2427	#endif
2428
2429	if (elem_table[idx * `2`] != `0`)
2430	{
2431	/ The spot is already taken. Try iterating using the value*
2432	from the secondary hashing function. /*
2433	size_t iter = hash % (elem_size - `2`) + `1`;
2434
2435	do
2436	{
2437	idx += iter;
2438	if (idx >= elem_size)
2439	idx -= elem_size;
2440	assert (idx != start_idx);
2441	}
2442	while (elem_table[idx * `2`] != `0`);
2443	}
2444	/ This is the spot where we will insert the value. /
2445	elem_table[idx * `2`] = hash;
2446	elem_table[idx * `2` + `1`] = obstack_object_size (&extrapool);
2447
2448	/ The string itself including length. /
2449	obstack_1grow (&extrapool, namelen);
2450	obstack_grow (&extrapool, runp->name, namelen);
2451
2452	/ And the multibyte representation. /
2453	obstack_1grow (&extrapool, runp->nmbs);
2454	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2455
2456	/ And align again to 32 bits. /
2457	if ((`1` + namelen + `1` + runp->nmbs) % sizeof (int32_t) != `0`)
2458	obstack_grow (&extrapool, "\0\0",
2459	(sizeof (int32_t)
2460	- ((`1` + namelen + `1` + runp->nmbs)
2461	% sizeof (int32_t))));
2462
2463	/ Now some 32-bit values: multibyte collation sequence,*
2464	wide char string (including length), and wide char
2465	collation sequence. /*
2466	obstack_int32_grow (&extrapool, runp->mbseqorder);
2467
2468	obstack_int32_grow (&extrapool, runp->nwcs);
2469	obstack_grow (&extrapool, runp->wcs,
2470	runp->nwcs * sizeof (uint32_t));
2471	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2472
2473	obstack_int32_grow (&extrapool, runp->wcseqorder);
2474	}
2475
2476	runp = runp->next;
2477	}
2478
2479	/ Prepare to write out this data. /
2480	add_locale_uint32 (&file, elem_size);
2481	add_locale_uint32_array (&file, elem_table, `2` * elem_size);
2482	add_locale_raw_obstack (&file, &extrapool);
2483	add_locale_raw_data (&file, collate->mbseqorder, `256`);
2484	add_locale_collseq_table (&file, &collate->wcseqorder);
2485	add_locale_string (&file, charmap->code_set_name);
2486	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2487
2488	obstack_free (&weightpool, NULL);
2489	obstack_free (&extrapool, NULL);
2490	obstack_free (&indirectpool, NULL);
2491	}
2492
2493
2494	static enum token_t
2495	skip_to (struct linereader ldfile, struct* locale_collate_t *collate,
2496	const struct charmap_t charmap, int* to_endif)
2497	{
2498	while (`1`)
2499	{
2500	struct token *now = lr_token (ldfile, charmap, NULL, NULL, `0`);
2501	enum token_t nowtok = now->tok;
2502
2503	if (nowtok == tok_eof \|\| nowtok == tok_end)
2504	return nowtok;
2505
2506	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
2507	{
2508	lr_error (ldfile, _("%s: nested conditionals not supported"),
2509	"LC_COLLATE");
2510	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2511	if (nowtok == tok_eof \|\| nowtok == tok_end)
2512	return nowtok;
2513	}
2514	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
2515	{
2516	lr_ignore_rest (ldfile, `1`);
2517	return nowtok;
2518	}
2519	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
2520	{
2521	/ Do not read the rest of the line. /
2522	return nowtok;
2523	}
2524	else if (nowtok == tok_else)
2525	{
2526	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2527	}
2528
2529	lr_ignore_rest (ldfile, `0`);
2530	}
2531	}
2532
2533
2534	void
2535	collate_read (struct linereader ldfile, struct* localedef_t *result,
2536	const struct charmap_t charmap, const* char *repertoire_name,
2537	int ignore_content)
2538	{
2539	struct repertoire_t *repertoire = NULL;
2540	struct locale_collate_t *collate;
2541	struct token *now;
2542	struct token *arg = NULL;
2543	enum token_t nowtok;
2544	enum token_t was_ellipsis = tok_none;
2545	struct localedef_t *copy_locale = NULL;
2546	/ Parsing state:*
2547	0 - start
2548	1 - between `order-start' and `order-end'
2549	2 - after `order-end'
2550	3 - after `reorder-after', waiting for `reorder-end'
2551	4 - after `reorder-end'
2552	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2553	6 - after `reorder-sections-end'
2554	*/
2555	int state = `0`;
2556
2557	/ Get the repertoire we have to use. /
2558	if (repertoire_name != NULL)
2559	repertoire = repertoire_read (repertoire_name);
2560
2561	/ The rest of the line containing `LC_COLLATE' must be free. /
2562	lr_ignore_rest (ldfile, `1`);
2563
2564	while (`1`)
2565	{
2566	do
2567	{
2568	now = lr_token (ldfile, charmap, result, NULL, verbose);
2569	nowtok = now->tok;
2570	}
2571	while (nowtok == tok_eol);
2572
2573	if (nowtok != tok_define)
2574	break;
2575
2576	if (ignore_content)
2577	lr_ignore_rest (ldfile, `0`);
2578	else
2579	{
2580	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2581	if (arg->tok != tok_ident)
2582	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2583	else
2584	{
2585	/ Simply add the new symbol. /
2586	struct name_list newsym = xmalloc (sizeof* (*newsym)
2587	+ arg->val.str.lenmb + `1`);
2588	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2589	newsym->str[arg->val.str.lenmb] = `'\0'`;
2590	newsym->next = defined;
2591	defined = newsym;
2592
2593	lr_ignore_rest (ldfile, `1`);
2594	}
2595	}
2596	}
2597
2598	if (nowtok == tok_copy)
2599	{
2600	now = lr_token (ldfile, charmap, result, NULL, verbose);
2601	if (now->tok != tok_string)
2602	{
2603	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2604
2605	skip_category:
2606	do
2607	now = lr_token (ldfile, charmap, result, NULL, verbose);
2608	while (now->tok != tok_eof && now->tok != tok_end);
2609
2610	if (now->tok != tok_eof
2611	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
2612	now->tok == tok_eof))
2613	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2614	else if (now->tok != tok_lc_collate)
2615	{
2616	lr_error (ldfile, _("\
2617	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2618	lr_ignore_rest (ldfile, `0`);
2619	}
2620	else
2621	lr_ignore_rest (ldfile, `1`);
2622
2623	return;
2624	}
2625
2626	if (! ignore_content)
2627	{
2628	/ Get the locale definition. /
2629	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2630	repertoire_name, charmap, NULL);
2631	if ((copy_locale->avail & COLLATE_LOCALE) == `0`)
2632	{
2633	/ Not yet loaded. So do it now. /
2634	if (locfile_read (copy_locale, charmap) != `0`)
2635	goto skip_category;
2636	}
2637
2638	if (copy_locale->categories[LC_COLLATE].collate == NULL)
2639	return;
2640	}
2641
2642	lr_ignore_rest (ldfile, `1`);
2643
2644	now = lr_token (ldfile, charmap, result, NULL, verbose);
2645	nowtok = now->tok;
2646	}
2647
2648	/ Prepare the data structures. /
2649	collate_startup (ldfile, result, copy_locale, ignore_content);
2650	collate = result->categories[LC_COLLATE].collate;
2651
2652	while (`1`)
2653	{
2654	char ucs4buf[`10`];
2655	char *symstr;
2656	size_t symlen;
2657
2658	/ Of course we don't proceed beyond the end of file. /
2659	if (nowtok == tok_eof)
2660	break;
2661
2662	/ Ingore empty lines. /
2663	if (nowtok == tok_eol)
2664	{
2665	now = lr_token (ldfile, charmap, result, NULL, verbose);
2666	nowtok = now->tok;
2667	continue;
2668	}
2669
2670	switch (nowtok)
2671	{
2672	case tok_copy:
2673	/ Allow copying other locales. /
2674	now = lr_token (ldfile, charmap, result, NULL, verbose);
2675	if (now->tok != tok_string)
2676	goto err_label;
2677
2678	if (! ignore_content)
2679	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2680	charmap, result);
2681
2682	lr_ignore_rest (ldfile, `1`);
2683	break;
2684
2685	case tok_coll_weight_max:
2686	/ Ignore the rest of the line if we don't need the input of*
2687	this line. /*
2688	if (ignore_content)
2689	{
2690	lr_ignore_rest (ldfile, `0`);
2691	break;
2692	}
2693
2694	if (state != `0`)
2695	goto err_label;
2696
2697	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2698	if (arg->tok != tok_number)
2699	goto err_label;
2700	if (collate->col_weight_max != -`1`)
2701	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2702	"LC_COLLATE", "col_weight_max");
2703	else
2704	collate->col_weight_max = arg->val.num;
2705	lr_ignore_rest (ldfile, `1`);
2706	break;
2707
2708	case tok_section_symbol:
2709	/ Ignore the rest of the line if we don't need the input of*
2710	this line. /*
2711	if (ignore_content)
2712	{
2713	lr_ignore_rest (ldfile, `0`);
2714	break;
2715	}
2716
2717	if (state != `0`)
2718	goto err_label;
2719
2720	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2721	if (arg->tok != tok_bsymbol)
2722	goto err_label;
2723	else if (!ignore_content)
2724	{
2725	/ Check whether this section is already known. /
2726	struct section_list *known = collate->sections;
2727	while (known != NULL)
2728	{
2729	if (strcmp (known->name, arg->val.str.startmb) == `0`)
2730	break;
2731	known = known->next;
2732	}
2733
2734	if (known != NULL)
2735	{
2736	lr_error (ldfile,
2737	_("%s: duplicate declaration of section `%s'"),
2738	"LC_COLLATE", arg->val.str.startmb);
2739	free (arg->val.str.startmb);
2740	}
2741	else
2742	collate->sections = make_seclist_elem (collate,
2743	arg->val.str.startmb,
2744	collate->sections);
2745
2746	lr_ignore_rest (ldfile, known == NULL);
2747	}
2748	else
2749	{
2750	free (arg->val.str.startmb);
2751	lr_ignore_rest (ldfile, `0`);
2752	}
2753	break;
2754
2755	case tok_collating_element:
2756	/ Ignore the rest of the line if we don't need the input of*
2757	this line. /*
2758	if (ignore_content)
2759	{
2760	lr_ignore_rest (ldfile, `0`);
2761	break;
2762	}
2763
2764	if (state != `0` && state != `2`)
2765	goto err_label;
2766
2767	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2768	if (arg->tok != tok_bsymbol)
2769	goto err_label;
2770	else
2771	{
2772	const char *symbol = arg->val.str.startmb;
2773	size_t symbol_len = arg->val.str.lenmb;
2774
2775	/ Next the `from' keyword. /
2776	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2777	if (arg->tok != tok_from)
2778	{
2779	free ((char *) symbol);
2780	goto err_label;
2781	}
2782
2783	ldfile->return_widestr = `1`;
2784	ldfile->translate_strings = `1`;
2785
2786	/ Finally the string with the replacement. /
2787	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2788
2789	ldfile->return_widestr = `0`;
2790	ldfile->translate_strings = `0`;
2791
2792	if (arg->tok != tok_string)
2793	goto err_label;
2794
2795	if (!ignore_content && symbol != NULL)
2796	{
2797	/ The name is already defined. /
2798	if (check_duplicate (ldfile, collate, charmap,
2799	repertoire, symbol, symbol_len))
2800	goto col_elem_free;
2801
2802	if (arg->val.str.startmb != NULL)
2803	insert_entry (&collate->elem_table, symbol, symbol_len,
2804	new_element (collate,
2805	arg->val.str.startmb,
2806	arg->val.str.lenmb - `1`,
2807	arg->val.str.startwc,
2808	symbol, symbol_len, `0`));
2809	}
2810	else
2811	{
2812	col_elem_free:
2813	free ((char *) symbol);
2814	free (arg->val.str.startmb);
2815	free (arg->val.str.startwc);
2816	}
2817	lr_ignore_rest (ldfile, `1`);
2818	}
2819	break;
2820
2821	case tok_collating_symbol:
2822	/ Ignore the rest of the line if we don't need the input of*
2823	this line. /*
2824	if (ignore_content)
2825	{
2826	lr_ignore_rest (ldfile, `0`);
2827	break;
2828	}
2829
2830	if (state != `0` && state != `2`)
2831	goto err_label;
2832
2833	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2834	if (arg->tok != tok_bsymbol)
2835	goto err_label;
2836	else
2837	{
2838	char *symbol = arg->val.str.startmb;
2839	size_t symbol_len = arg->val.str.lenmb;
2840	char *endsymbol = NULL;
2841	size_t endsymbol_len = `0`;
2842	enum token_t ellipsis = tok_none;
2843
2844	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2845	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
2846	{
2847	ellipsis = arg->tok;
2848
2849	arg = lr_token (ldfile, charmap, result, repertoire,
2850	verbose);
2851	if (arg->tok != tok_bsymbol)
2852	{
2853	free (symbol);
2854	goto err_label;
2855	}
2856
2857	endsymbol = arg->val.str.startmb;
2858	endsymbol_len = arg->val.str.lenmb;
2859
2860	lr_ignore_rest (ldfile, `1`);
2861	}
2862	else if (arg->tok != tok_eol)
2863	{
2864	free (symbol);
2865	goto err_label;
2866	}
2867
2868	if (!ignore_content)
2869	{
2870	if (symbol == NULL
2871	\|\| (ellipsis != tok_none && endsymbol == NULL))
2872	{
2873	lr_error (ldfile, _("\
2874	%s: unknown character in collating symbol name"),
2875	"LC_COLLATE");
2876	goto col_sym_free;
2877	}
2878	else if (ellipsis == tok_none)
2879	{
2880	/ A single symbol, no ellipsis. /
2881	if (check_duplicate (ldfile, collate, charmap,
2882	repertoire, symbol, symbol_len))
2883	/ The name is already defined. /
2884	goto col_sym_free;
2885
2886	insert_entry (&collate->sym_table, symbol, symbol_len,
2887	new_symbol (collate, symbol, symbol_len));
2888	}
2889	else if (symbol_len != endsymbol_len)
2890	{
2891	col_sym_inv_range:
2892	lr_error (ldfile,
2893	_("invalid names for character range"));
2894	goto col_sym_free;
2895	}
2896	else
2897	{
2898	/ Oh my, we have to handle an ellipsis. First, as*
2899	usual, determine the common prefix and then
2900	convert the rest into a range. /*
2901	size_t prefixlen;
2902	unsigned long int from;
2903	unsigned long int to;
2904	char *endp;
2905
2906	for (prefixlen = `0`; prefixlen < symbol_len; ++prefixlen)
2907	if (symbol[prefixlen] != endsymbol[prefixlen])
2908	break;
2909
2910	/ Convert the rest into numbers. /
2911	symbol[symbol_len] = `'\0'`;
2912	from = strtoul (&symbol[prefixlen], &endp,
2913	ellipsis == tok_ellipsis2 ? `16` : `10`);
2914	if (*endp != `'\0'`)
2915	goto col_sym_inv_range;
2916
2917	endsymbol[symbol_len] = `'\0'`;
2918	to = strtoul (&endsymbol[prefixlen], &endp,
2919	ellipsis == tok_ellipsis2 ? `16` : `10`);
2920	if (*endp != `'\0'`)
2921	goto col_sym_inv_range;
2922
2923	if (from > to)
2924	goto col_sym_inv_range;
2925
2926	/ Now loop over all entries. /
2927	while (from <= to)
2928	{
2929	char *symbuf;
2930
2931	symbuf = (char *) obstack_alloc (&collate->mempool,
2932	symbol_len + `1`);
2933
2934	/ Create the name. /
2935	sprintf (symbuf,
2936	ellipsis == tok_ellipsis2
2937	? "%.s%.lX" : "%.s%.lu",
2938	(int) prefixlen, symbol,
2939	(int) (symbol_len - prefixlen), from);
2940
2941	if (check_duplicate (ldfile, collate, charmap,
2942	repertoire, symbuf, symbol_len))
2943	/ The name is already defined. /
2944	goto col_sym_free;
2945
2946	insert_entry (&collate->sym_table, symbuf,
2947	symbol_len,
2948	new_symbol (collate, symbuf,
2949	symbol_len));
2950
2951	/ Increment the counter. /
2952	++from;
2953	}
2954
2955	goto col_sym_free;
2956	}
2957	}
2958	else
2959	{
2960	col_sym_free:
2961	free (symbol);
2962	free (endsymbol);
2963	}
2964	}
2965	break;
2966
2967	case tok_symbol_equivalence:
2968	/ Ignore the rest of the line if we don't need the input of*
2969	this line. /*
2970	if (ignore_content)
2971	{
2972	lr_ignore_rest (ldfile, `0`);
2973	break;
2974	}
2975
2976	if (state != `0`)
2977	goto err_label;
2978
2979	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2980	if (arg->tok != tok_bsymbol)
2981	goto err_label;
2982	else
2983	{
2984	const char *newname = arg->val.str.startmb;
2985	size_t newname_len = arg->val.str.lenmb;
2986	const char *symname;
2987	size_t symname_len;
2988	void symval; /* Actually struct symbol_t* /
2989
2990	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2991	if (arg->tok != tok_bsymbol)
2992	{
2993	free ((char *) newname);
2994	goto err_label;
2995	}
2996
2997	symname = arg->val.str.startmb;
2998	symname_len = arg->val.str.lenmb;
2999
3000	if (newname == NULL)
3001	{
3002	lr_error (ldfile, _("\
3003	%s: unknown character in equivalent definition name"),
3004	"LC_COLLATE");
3005
3006	sym_equiv_free:
3007	free ((char *) newname);
3008	free ((char *) symname);
3009	break;
3010	}
3011	if (symname == NULL)
3012	{
3013	lr_error (ldfile, _("\
3014	%s: unknown character in equivalent definition value"),
3015	"LC_COLLATE");
3016	goto sym_equiv_free;
3017	}
3018
3019	/ See whether the symbol name is already defined. /
3020	if (find_entry (&collate->sym_table, symname, symname_len,
3021	&symval) != `0`)
3022	{
3023	lr_error (ldfile, _("\
3024	%s: unknown symbol `%s' in equivalent definition"),
3025	"LC_COLLATE", symname);
3026	goto sym_equiv_free;
3027	}
3028
3029	if (insert_entry (&collate->sym_table,
3030	newname, newname_len, symval) < `0`)
3031	{
3032	lr_error (ldfile, _("\
3033	error while adding equivalent collating symbol"));
3034	goto sym_equiv_free;
3035	}
3036
3037	free ((char *) symname);
3038	}
3039	lr_ignore_rest (ldfile, `1`);
3040	break;
3041
3042	case tok_script:
3043	/ Ignore the rest of the line if we don't need the input of*
3044	this line. /*
3045	if (ignore_content)
3046	{
3047	lr_ignore_rest (ldfile, `0`);
3048	break;
3049	}
3050
3051	/ We get told about the scripts we know. /
3052	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3053	if (arg->tok != tok_bsymbol)
3054	goto err_label;
3055	else
3056	{
3057	struct section_list *runp = collate->known_sections;
3058	char *name;
3059
3060	while (runp != NULL)
3061	if (strncmp (runp->name, arg->val.str.startmb,
3062	arg->val.str.lenmb) == `0`
3063	&& runp->name[arg->val.str.lenmb] == `'\0'`)
3064	break;
3065	else
3066	runp = runp->def_next;
3067
3068	if (runp != NULL)
3069	{
3070	lr_error (ldfile, _("duplicate definition of script `%s'"),
3071	runp->name);
3072	lr_ignore_rest (ldfile, `0`);
3073	break;
3074	}
3075
3076	runp = (struct section_list ) xcalloc (`1`, sizeof* (*runp));
3077	name = (char *) xmalloc (arg->val.str.lenmb + `1`);
3078	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3079	name[arg->val.str.lenmb] = `'\0'`;
3080	runp->name = name;
3081
3082	runp->def_next = collate->known_sections;
3083	collate->known_sections = runp;
3084	}
3085	lr_ignore_rest (ldfile, `1`);
3086	break;
3087
3088	case tok_order_start:
3089	/ Ignore the rest of the line if we don't need the input of*
3090	this line. /*
3091	if (ignore_content)
3092	{
3093	lr_ignore_rest (ldfile, `0`);
3094	break;
3095	}
3096
3097	if (state != `0` && state != `1` && state != `2`)
3098	goto err_label;
3099	state = `1`;
3100
3101	/ The 14652 draft does not specify whether all `order_start' lines*
3102	must contain the same number of sort-rules, but 14651 does. So
3103	we require this here as well. /*
3104	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3105	if (arg->tok == tok_bsymbol)
3106	{
3107	/ This better should be a section name. /
3108	struct section_list *sp = collate->known_sections;
3109	while (sp != NULL
3110	&& (sp->name == NULL
3111	\|\| strncmp (sp->name, arg->val.str.startmb,
3112	arg->val.str.lenmb) != `0`
3113	\|\| sp->name[arg->val.str.lenmb] != `'\0'`))
3114	sp = sp->def_next;
3115
3116	if (sp == NULL)
3117	{
3118	lr_error (ldfile, _("\
3119	%s: unknown section name `%.*s'"),
3120	"LC_COLLATE", (int) arg->val.str.lenmb,
3121	arg->val.str.startmb);
3122	/ We use the error section. /
3123	collate->current_section = &collate->error_section;
3124
3125	if (collate->error_section.first == NULL)
3126	{
3127	/ Insert &collate->error_section at the end of*
3128	the collate->sections list. /*
3129	if (collate->sections == NULL)
3130	collate->sections = &collate->error_section;
3131	else
3132	{
3133	sp = collate->sections;
3134	while (sp->next != NULL)
3135	sp = sp->next;
3136
3137	sp->next = &collate->error_section;
3138	}
3139	collate->error_section.next = NULL;
3140	}
3141	}
3142	else
3143	{
3144	/ One should not be allowed to open the same*
3145	section twice. /*
3146	if (sp->first != NULL)
3147	lr_error (ldfile, _("\
3148	%s: multiple order definitions for section `%s'"),
3149	"LC_COLLATE", sp->name);
3150	else
3151	{
3152	/ Insert sp in the collate->sections list,*
3153	right after collate->current_section. /*
3154	if (collate->current_section != NULL)
3155	{
3156	sp->next = collate->current_section->next;
3157	collate->current_section->next = sp;
3158	}
3159	else if (collate->sections == NULL)
3160	/ This is the first section to be defined. /
3161	collate->sections = sp;
3162
3163	collate->current_section = sp;
3164	}
3165
3166	/ Next should come the end of the line or a semicolon. /
3167	arg = lr_token (ldfile, charmap, result, repertoire,
3168	verbose);
3169	if (arg->tok == tok_eol)
3170	{
3171	uint32_t cnt;
3172
3173	/ This means we have exactly one rule: `forward'. /
3174	if (nrules > `1`)
3175	lr_error (ldfile, _("\
3176	%s: invalid number of sorting rules"),
3177	"LC_COLLATE");
3178	else
3179	nrules = `1`;
3180	sp->rules = obstack_alloc (&collate->mempool,
3181	(sizeof (enum coll_sort_rule)
3182	* nrules));
3183	for (cnt = `0`; cnt < nrules; ++cnt)
3184	sp->rules[cnt] = sort_forward;
3185
3186	/ Next line. /
3187	break;
3188	}
3189
3190	/ Get the next token. /
3191	arg = lr_token (ldfile, charmap, result, repertoire,
3192	verbose);
3193	}
3194	}
3195	else
3196	{
3197	/ There is no section symbol. Therefore we use the unnamed*
3198	section. /*
3199	collate->current_section = &collate->unnamed_section;
3200
3201	if (collate->unnamed_section_defined)
3202	lr_error (ldfile, _("\
3203	%s: multiple order definitions for unnamed section"),
3204	"LC_COLLATE");
3205	else
3206	{
3207	/ Insert &collate->unnamed_section at the beginning of*
3208	the collate->sections list. /*
3209	collate->unnamed_section.next = collate->sections;
3210	collate->sections = &collate->unnamed_section;
3211	collate->unnamed_section_defined = true;
3212	}
3213	}
3214
3215	/ Now read the direction names. /
3216	read_directions (ldfile, arg, charmap, repertoire, result);
3217
3218	/ From now we need the strings untranslated. /
3219	ldfile->translate_strings = `0`;
3220	break;
3221
3222	case tok_order_end:
3223	/ Ignore the rest of the line if we don't need the input of*
3224	this line. /*
3225	if (ignore_content)
3226	{
3227	lr_ignore_rest (ldfile, `0`);
3228	break;
3229	}
3230
3231	if (state != `1`)
3232	goto err_label;
3233
3234	/ Handle ellipsis at end of list. /
3235	if (was_ellipsis != tok_none)
3236	{
3237	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3238	repertoire, result);
3239	was_ellipsis = tok_none;
3240	}
3241
3242	state = `2`;
3243	lr_ignore_rest (ldfile, `1`);
3244	break;
3245
3246	case tok_reorder_after:
3247	/ Ignore the rest of the line if we don't need the input of*
3248	this line. /*
3249	if (ignore_content)
3250	{
3251	lr_ignore_rest (ldfile, `0`);
3252	break;
3253	}
3254
3255	if (state == `1`)
3256	{
3257	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3258	"LC_COLLATE");
3259	state = `2`;
3260
3261	/ Handle ellipsis at end of list. /
3262	if (was_ellipsis != tok_none)
3263	{
3264	handle_ellipsis (ldfile, arg->val.str.startmb,
3265	arg->val.str.lenmb, was_ellipsis, charmap,
3266	repertoire, result);
3267	was_ellipsis = tok_none;
3268	}
3269	}
3270	else if (state == `0` && copy_locale == NULL)
3271	goto err_label;
3272	else if (state != `0` && state != `2` && state != `3`)
3273	goto err_label;
3274	state = `3`;
3275
3276	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3277	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
3278	{
3279	/ Find this symbol in the sequence table. /
3280	char ucsbuf[`10`];
3281	char *startmb;
3282	size_t lenmb;
3283	struct element_t *insp;
3284	int no_error = `1`;
3285	void *ptr;
3286
3287	if (arg->tok == tok_bsymbol)
3288	{
3289	startmb = arg->val.str.startmb;
3290	lenmb = arg->val.str.lenmb;
3291	}
3292	else
3293	{
3294	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3295	startmb = ucsbuf;
3296	lenmb = `9`;
3297	}
3298
3299	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == `0`)
3300	/ Yes, the symbol exists. Simply point the cursor*
3301	to it. /*
3302	collate->cursor = (struct element_t *) ptr;
3303	else
3304	{
3305	struct symbol_t *symbp;
3306	void *ptr;
3307
3308	if (find_entry (&collate->sym_table, startmb, lenmb,
3309	&ptr) == `0`)
3310	{
3311	symbp = ptr;
3312
3313	if (symbp->order->last != NULL
3314	\|\| symbp->order->next != NULL)
3315	collate->cursor = symbp->order;
3316	else
3317	{
3318	/ This is a collating symbol but its position*
3319	is not yet defined. /*
3320	lr_error (ldfile, _("\
3321	%s: order for collating symbol %.*s not yet defined"),
3322	"LC_COLLATE", (int) lenmb, startmb);
3323	collate->cursor = NULL;
3324	no_error = `0`;
3325	}
3326	}
3327	else if (find_entry (&collate->elem_table, startmb, lenmb,
3328	&ptr) == `0`)
3329	{
3330	insp = (struct element_t *) ptr;
3331
3332	if (insp->last != NULL \|\| insp->next != NULL)
3333	collate->cursor = insp;
3334	else
3335	{
3336	/ This is a collating element but its position*
3337	is not yet defined. /*
3338	lr_error (ldfile, _("\
3339	%s: order for collating element %.*s not yet defined"),
3340	"LC_COLLATE", (int) lenmb, startmb);
3341	collate->cursor = NULL;
3342	no_error = `0`;
3343	}
3344	}
3345	else
3346	{
3347	/ This is bad. The symbol after which we have to*
3348	insert does not exist. /*
3349	lr_error (ldfile, _("\
3350	%s: cannot reorder after %.*s: symbol not known"),
3351	"LC_COLLATE", (int) lenmb, startmb);
3352	collate->cursor = NULL;
3353	no_error = `0`;
3354	}
3355	}
3356
3357	lr_ignore_rest (ldfile, no_error);
3358	}
3359	else
3360	/ This must not happen. /
3361	goto err_label;
3362	break;
3363
3364	case tok_reorder_end:
3365	/ Ignore the rest of the line if we don't need the input of*
3366	this line. /*
3367	if (ignore_content)
3368	break;
3369
3370	if (state != `3`)
3371	goto err_label;
3372	state = `4`;
3373	lr_ignore_rest (ldfile, `1`);
3374	break;
3375
3376	case tok_reorder_sections_after:
3377	/ Ignore the rest of the line if we don't need the input of*
3378	this line. /*
3379	if (ignore_content)
3380	{
3381	lr_ignore_rest (ldfile, `0`);
3382	break;
3383	}
3384
3385	if (state == `1`)
3386	{
3387	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3388	"LC_COLLATE");
3389	state = `2`;
3390
3391	/ Handle ellipsis at end of list. /
3392	if (was_ellipsis != tok_none)
3393	{
3394	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3395	repertoire, result);
3396	was_ellipsis = tok_none;
3397	}
3398	}
3399	else if (state == `3`)
3400	{
3401	record_error (`0`, `0`, _("\
3402	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3403	state = `4`;
3404	}
3405	else if (state != `2` && state != `4`)
3406	goto err_label;
3407	state = `5`;
3408
3409	/ Get the name of the sections we are adding after. /
3410	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3411	if (arg->tok == tok_bsymbol)
3412	{
3413	/ Now find a section with this name. /
3414	struct section_list *runp = collate->sections;
3415
3416	while (runp != NULL)
3417	{
3418	if (runp->name != NULL
3419	&& strlen (runp->name) == arg->val.str.lenmb
3420	&& memcmp (runp->name, arg->val.str.startmb,
3421	arg->val.str.lenmb) == `0`)
3422	break;
3423
3424	runp = runp->next;
3425	}
3426
3427	if (runp != NULL)
3428	collate->current_section = runp;
3429	else
3430	{
3431	/ This is bad. The section after which we have to*
3432	reorder does not exist. Therefore we cannot
3433	process the whole rest of this reorder
3434	specification. /*
3435	lr_error (ldfile, _("%s: section `%.*s' not known"),
3436	"LC_COLLATE", (int) arg->val.str.lenmb,
3437	arg->val.str.startmb);
3438
3439	do
3440	{
3441	lr_ignore_rest (ldfile, `0`);
3442
3443	now = lr_token (ldfile, charmap, result, NULL, verbose);
3444	}
3445	while (now->tok == tok_reorder_sections_after
3446	\|\| now->tok == tok_reorder_sections_end
3447	\|\| now->tok == tok_end);
3448
3449	/ Process the token we just saw. /
3450	nowtok = now->tok;
3451	continue;
3452	}
3453	}
3454	else
3455	/ This must not happen. /
3456	goto err_label;
3457	break;
3458
3459	case tok_reorder_sections_end:
3460	/ Ignore the rest of the line if we don't need the input of*
3461	this line. /*
3462	if (ignore_content)
3463	break;
3464
3465	if (state != `5`)
3466	goto err_label;
3467	state = `6`;
3468	lr_ignore_rest (ldfile, `1`);
3469	break;
3470
3471	case tok_bsymbol:
3472	case tok_ucs4:
3473	/ Ignore the rest of the line if we don't need the input of*
3474	this line. /*
3475	if (ignore_content)
3476	{
3477	lr_ignore_rest (ldfile, `0`);
3478	break;
3479	}
3480
3481	if (state != `0` && state != `1` && state != `3` && state != `5`)
3482	goto err_label;
3483
3484	if ((state == `0` \|\| state == `5`) && nowtok == tok_ucs4)
3485	goto err_label;
3486
3487	if (nowtok == tok_ucs4)
3488	{
3489	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3490	symstr = ucs4buf;
3491	symlen = `9`;
3492	}
3493	else if (arg != NULL)
3494	{
3495	symstr = arg->val.str.startmb;
3496	symlen = arg->val.str.lenmb;
3497	}
3498	else
3499	{
3500	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3501	(int) ldfile->token.val.str.lenmb,
3502	ldfile->token.val.str.startmb);
3503	break;
3504	}
3505
3506	struct element_t *seqp;
3507	if (state == `0`)
3508	{
3509	/ We are outside an `order_start' region. This means*
3510	we must only accept definitions of values for
3511	collation symbols since these are purely abstract
3512	values and don't need directions associated. /*
3513	void *ptr;
3514
3515	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3516	{
3517	seqp = ptr;
3518
3519	/ It's already defined. First check whether this*
3520	is really a collating symbol. /*
3521	if (seqp->is_character)
3522	goto err_label;
3523
3524	goto move_entry;
3525	}
3526	else
3527	{
3528	void *result;
3529
3530	if (find_entry (&collate->sym_table, symstr, symlen,
3531	&result) != `0`)
3532	/ No collating symbol, it's an error. /
3533	goto err_label;
3534
3535	/ Maybe this is the first time we define a symbol*
3536	value and it is before the first actual section. /*
3537	if (collate->sections == NULL)
3538	collate->sections = collate->current_section =
3539	&collate->symbol_section;
3540	}
3541
3542	if (was_ellipsis != tok_none)
3543	{
3544	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3545	charmap, repertoire, result);
3546
3547	/ Remember that we processed the ellipsis. /
3548	was_ellipsis = tok_none;
3549
3550	/ And don't add the value a second time. /
3551	break;
3552	}
3553	}
3554	else if (state == `3`)
3555	{
3556	/ It is possible that we already have this collation sequence.*
3557	In this case we move the entry. /*
3558	void *sym;
3559	void *ptr;
3560
3561	/ If the symbol after which we have to insert was not found*
3562	ignore all entries. /*
3563	if (collate->cursor == NULL)
3564	{
3565	lr_ignore_rest (ldfile, `0`);
3566	break;
3567	}
3568
3569	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3570	{
3571	seqp = (struct element_t *) ptr;
3572	goto move_entry;
3573	}
3574
3575	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == `0`
3576	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
3577	goto move_entry;
3578
3579	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == `0`
3580	&& (seqp = (struct element_t *) ptr,
3581	seqp->last != NULL \|\| seqp->next != NULL
3582	\|\| (collate->start != NULL && seqp == collate->start)))
3583	{
3584	move_entry:
3585	/ Remove the entry from the old position. /
3586	if (seqp->last == NULL)
3587	collate->start = seqp->next;
3588	else
3589	seqp->last->next = seqp->next;
3590	if (seqp->next != NULL)
3591	seqp->next->last = seqp->last;
3592
3593	/ We also have to check whether this entry is the*
3594	first or last of a section. /*
3595	if (seqp->section->first == seqp)
3596	{
3597	if (seqp->section->first == seqp->section->last)
3598	/ This section has no content anymore. /
3599	seqp->section->first = seqp->section->last = NULL;
3600	else
3601	seqp->section->first = seqp->next;
3602	}
3603	else if (seqp->section->last == seqp)
3604	seqp->section->last = seqp->last;
3605
3606	/ Now insert it in the new place. /
3607	insert_weights (ldfile, seqp, charmap, repertoire, result,
3608	tok_none);
3609	break;
3610	}
3611
3612	/ Otherwise we just add a new entry. /
3613	}
3614	else if (state == `5`)
3615	{
3616	/ We are reordering sections. Find the named section. /
3617	struct section_list *runp = collate->sections;
3618	struct section_list *prevp = NULL;
3619
3620	while (runp != NULL)
3621	{
3622	if (runp->name != NULL
3623	&& strlen (runp->name) == symlen
3624	&& memcmp (runp->name, symstr, symlen) == `0`)
3625	break;
3626
3627	prevp = runp;
3628	runp = runp->next;
3629	}
3630
3631	if (runp == NULL)
3632	{
3633	lr_error (ldfile, _("%s: section `%.*s' not known"),
3634	"LC_COLLATE", (int) symlen, symstr);
3635	lr_ignore_rest (ldfile, `0`);
3636	}
3637	else
3638	{
3639	if (runp != collate->current_section)
3640	{
3641	/ Remove the named section from the old place and*
3642	insert it in the new one. /*
3643	prevp->next = runp->next;
3644
3645	runp->next = collate->current_section->next;
3646	collate->current_section->next = runp;
3647	collate->current_section = runp;
3648	}
3649
3650	/ Process the rest of the line which might change*
3651	the collation rules. /*
3652	arg = lr_token (ldfile, charmap, result, repertoire,
3653	verbose);
3654	if (arg->tok != tok_eof && arg->tok != tok_eol)
3655	read_directions (ldfile, arg, charmap, repertoire,
3656	result);
3657	}
3658	break;
3659	}
3660	else if (was_ellipsis != tok_none)
3661	{
3662	/ Using the information in the `ellipsis_weight'*
3663	element and this and the last value we have to handle
3664	the ellipsis now. /*
3665	assert (state == `1`);
3666
3667	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3668	repertoire, result);
3669
3670	/ Remember that we processed the ellipsis. /
3671	was_ellipsis = tok_none;
3672
3673	/ And don't add the value a second time. /
3674	break;
3675	}
3676
3677	/ Now insert in the new place. /
3678	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3679	break;
3680
3681	case tok_undefined:
3682	/ Ignore the rest of the line if we don't need the input of*
3683	this line. /*
3684	if (ignore_content)
3685	{
3686	lr_ignore_rest (ldfile, `0`);
3687	break;
3688	}
3689
3690	if (state != `1`)
3691	goto err_label;
3692
3693	if (was_ellipsis != tok_none)
3694	{
3695	lr_error (ldfile,
3696	_("%s: cannot have `%s' as end of ellipsis range"),
3697	"LC_COLLATE", "UNDEFINED");
3698
3699	unlink_element (collate);
3700	was_ellipsis = tok_none;
3701	}
3702
3703	/ See whether UNDEFINED already appeared somewhere. /
3704	if (collate->undefined.next != NULL
3705	\|\| &collate->undefined == collate->cursor)
3706	{
3707	lr_error (ldfile,
3708	_("%s: order for `%.*s' already defined at %s:%Zu"),
3709	"LC_COLLATE", `9`, "UNDEFINED",
3710	collate->undefined.file,
3711	collate->undefined.line);
3712	lr_ignore_rest (ldfile, `0`);
3713	}
3714	else
3715	/ Parse the weights. /
3716	insert_weights (ldfile, &collate->undefined, charmap,
3717	repertoire, result, tok_none);
3718	break;
3719
3720	case tok_ellipsis2: / symbolic hexadecimal ellipsis /
3721	case tok_ellipsis3: / absolute ellipsis /
3722	case tok_ellipsis4: / symbolic decimal ellipsis /
3723	/ This is the symbolic (decimal or hexadecimal) or absolute*
3724	ellipsis. /*
3725	if (was_ellipsis != tok_none)
3726	goto err_label;
3727
3728	if (state != `0` && state != `1` && state != `3`)
3729	goto err_label;
3730
3731	was_ellipsis = nowtok;
3732
3733	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3734	repertoire, result, nowtok);
3735	break;
3736
3737	case tok_end:
3738	seen_end:
3739	/ Next we assume `LC_COLLATE'. /
3740	if (!ignore_content)
3741	{
3742	if (state == `0` && copy_locale == NULL)
3743	/ We must either see a copy statement or have*
3744	ordering values. /*
3745	lr_error (ldfile,
3746	_("%s: empty category description not allowed"),
3747	"LC_COLLATE");
3748	else if (state == `1`)
3749	{
3750	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3751	"LC_COLLATE");
3752
3753	/ Handle ellipsis at end of list. /
3754	if (was_ellipsis != tok_none)
3755	{
3756	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3757	repertoire, result);
3758	was_ellipsis = tok_none;
3759	}
3760	}
3761	else if (state == `3`)
3762	record_error (`0`, `0`, _("\
3763	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3764	else if (state == `5`)
3765	record_error (`0`, `0`, _("\
3766	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3767	}
3768	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3769	if (arg->tok == tok_eof)
3770	break;
3771	if (arg->tok == tok_eol)
3772	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3773	else if (arg->tok != tok_lc_collate)
3774	lr_error (ldfile, _("\
3775	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3776	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3777	return;
3778
3779	case tok_define:
3780	if (ignore_content)
3781	{
3782	lr_ignore_rest (ldfile, `0`);
3783	break;
3784	}
3785
3786	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3787	if (arg->tok != tok_ident)
3788	goto err_label;
3789
3790	/ Simply add the new symbol. /
3791	struct name_list newsym = xmalloc (sizeof* (*newsym)
3792	+ arg->val.str.lenmb + `1`);
3793	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3794	newsym->str[arg->val.str.lenmb] = `'\0'`;
3795	newsym->next = defined;
3796	defined = newsym;
3797
3798	lr_ignore_rest (ldfile, `1`);
3799	break;
3800
3801	case tok_undef:
3802	if (ignore_content)
3803	{
3804	lr_ignore_rest (ldfile, `0`);
3805	break;
3806	}
3807
3808	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3809	if (arg->tok != tok_ident)
3810	goto err_label;
3811
3812	/ Remove _all_ occurrences of the symbol from the list. /
3813	struct name_list *prevdef = NULL;
3814	struct name_list *curdef = defined;
3815	while (curdef != NULL)
3816	if (strncmp (arg->val.str.startmb, curdef->str,
3817	arg->val.str.lenmb) == `0`
3818	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3819	{
3820	if (prevdef == NULL)
3821	defined = curdef->next;
3822	else
3823	prevdef->next = curdef->next;
3824
3825	struct name_list *olddef = curdef;
3826	curdef = curdef->next;
3827
3828	free (olddef);
3829	}
3830	else
3831	{
3832	prevdef = curdef;
3833	curdef = curdef->next;
3834	}
3835
3836	lr_ignore_rest (ldfile, `1`);
3837	break;
3838
3839	case tok_ifdef:
3840	case tok_ifndef:
3841	if (ignore_content)
3842	{
3843	lr_ignore_rest (ldfile, `0`);
3844	break;
3845	}
3846
3847	found_ifdef:
3848	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3849	if (arg->tok != tok_ident)
3850	goto err_label;
3851	lr_ignore_rest (ldfile, `1`);
3852
3853	if (collate->else_action == else_none)
3854	{
3855	curdef = defined;
3856	while (curdef != NULL)
3857	if (strncmp (arg->val.str.startmb, curdef->str,
3858	arg->val.str.lenmb) == `0`
3859	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3860	break;
3861	else
3862	curdef = curdef->next;
3863
3864	if ((nowtok == tok_ifdef && curdef != NULL)
3865	\|\| (nowtok == tok_ifndef && curdef == NULL))
3866	{
3867	/ We have to use the if-branch. /
3868	collate->else_action = else_ignore;
3869	}
3870	else
3871	{
3872	/ We have to use the else-branch, if there is one. /
3873	nowtok = skip_to (ldfile, collate, charmap, `0`);
3874	if (nowtok == tok_else)
3875	collate->else_action = else_seen;
3876	else if (nowtok == tok_elifdef)
3877	{
3878	nowtok = tok_ifdef;
3879	goto found_ifdef;
3880	}
3881	else if (nowtok == tok_elifndef)
3882	{
3883	nowtok = tok_ifndef;
3884	goto found_ifdef;
3885	}
3886	else if (nowtok == tok_eof)
3887	goto seen_eof;
3888	else if (nowtok == tok_end)
3889	goto seen_end;
3890	}
3891	}
3892	else
3893	{
3894	/ XXX Should it really become necessary to support nested*
3895	preprocessor handling we will push the state here. /*
3896	lr_error (ldfile, _("%s: nested conditionals not supported"),
3897	"LC_COLLATE");
3898	nowtok = skip_to (ldfile, collate, charmap, `1`);
3899	if (nowtok == tok_eof)
3900	goto seen_eof;
3901	else if (nowtok == tok_end)
3902	goto seen_end;
3903	}
3904	break;
3905
3906	case tok_elifdef:
3907	case tok_elifndef:
3908	case tok_else:
3909	if (ignore_content)
3910	{
3911	lr_ignore_rest (ldfile, `0`);
3912	break;
3913	}
3914
3915	lr_ignore_rest (ldfile, `1`);
3916
3917	if (collate->else_action == else_ignore)
3918	{
3919	/ Ignore everything until the endif. /
3920	nowtok = skip_to (ldfile, collate, charmap, `1`);
3921	if (nowtok == tok_eof)
3922	goto seen_eof;
3923	else if (nowtok == tok_end)
3924	goto seen_end;
3925	}
3926	else
3927	{
3928	assert (collate->else_action == else_none);
3929	lr_error (ldfile, _("\
3930	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3931	nowtok == tok_else ? "else"
3932	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3933	}
3934	break;
3935
3936	case tok_endif:
3937	if (ignore_content)
3938	{
3939	lr_ignore_rest (ldfile, `0`);
3940	break;
3941	}
3942
3943	lr_ignore_rest (ldfile, `1`);
3944
3945	if (collate->else_action != else_ignore
3946	&& collate->else_action != else_seen)
3947	lr_error (ldfile, _("\
3948	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3949
3950	/ XXX If we support nested preprocessor directives we pop*
3951	the state here. /*
3952	collate->else_action = else_none;
3953	break;
3954
3955	default:
3956	err_label:
3957	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3958	}
3959
3960	/ Prepare for the next round. /
3961	now = lr_token (ldfile, charmap, result, NULL, verbose);
3962	nowtok = now->tok;
3963	}
3964
3965	seen_eof:
3966	/ When we come here we reached the end of the file. /
3967	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3968	}
3969

Browse the source code of glibc/locale/programs/ld-collate.c