ld-collate.c source code [glibc/locale/programs/ld-collate.c]

1	/ Copyright (C) 1995-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <errno.h>
23	#include <stdlib.h>
24	#include <wchar.h>
25	#include <stdint.h>
26	#include <sys/param.h>
27
28	#include "localedef.h"
29	#include "charmap.h"
30	#include "localeinfo.h"
31	#include "linereader.h"
32	#include "locfile.h"
33	#include "elem-hash.h"
34
35	/ Uncomment the following line in the production version. /
36	/ #define NDEBUG 1 /
37	#include <assert.h>
38
39	#define obstack_chunk_alloc malloc
40	#define obstack_chunk_free free
41
42	static inline void
43	__attribute ((always_inline))
44	obstack_int32_grow (struct obstack *obstack, int32_t data)
45	{
46	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47	data = maybe_swap_uint32 (data);
48	if (sizeof (int32_t) == sizeof (int))
49	obstack_int_grow (obstack, data);
50	else
51	obstack_grow (obstack, &data, sizeof (int32_t));
52	}
53
54	static inline void
55	__attribute ((always_inline))
56	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57	{
58	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59	data = maybe_swap_uint32 (data);
60	if (sizeof (int32_t) == sizeof (int))
61	obstack_int_grow_fast (obstack, data);
62	else
63	obstack_grow (obstack, &data, sizeof (int32_t));
64	}
65
66	/ Forward declaration. /
67	struct element_t;
68
69	/ Data type for list of strings. /
70	struct section_list
71	{
72	/ Successor in the known_sections list. /
73	struct section_list *def_next;
74	/ Successor in the sections list. /
75	struct section_list *next;
76	/ Name of the section. /
77	const char *name;
78	/ First element of this section. /
79	struct element_t *first;
80	/ Last element of this section. /
81	struct element_t *last;
82	/ These are the rules for this section. /
83	enum coll_sort_rule *rules;
84	/ Index of the rule set in the appropriate section of the output file. /
85	int ruleidx;
86	};
87
88	struct element_t;
89
90	struct element_list_t
91	{
92	/ Number of elements. /
93	int cnt;
94
95	struct element_t **w;
96	};
97
98	/ Data type for collating element. /
99	struct element_t
100	{
101	const char *name;
102
103	const char *mbs;
104	size_t nmbs;
105	const uint32_t *wcs;
106	size_t nwcs;
107	int *mborder;
108	int wcorder;
109
110	/ The following is a bit mask which bits are set if this element is*
111	used in the appropriate level. Interesting for the singlebyte
112	weight computation.
113
114	XXX The type here restricts the number of levels to 32. It could
115	be changed if necessary but I doubt this is necessary. /*
116	unsigned int used_in_level;
117
118	struct element_list_t *weights;
119
120	/ Nonzero if this is a real character definition. /
121	int is_character;
122
123	/ Order of the character in the sequence. This information will*
124	be used in range expressions. /*
125	int mbseqorder;
126	int wcseqorder;
127
128	/ Where does the definition come from. /
129	const char *file;
130	size_t line;
131
132	/ Which section does this belong to. /
133	struct section_list *section;
134
135	/ Predecessor and successor in the order list. /
136	struct element_t *last;
137	struct element_t *next;
138
139	/ Next element in multibyte output list. /
140	struct element_t *mbnext;
141	struct element_t *mblast;
142
143	/ Next element in wide character output list. /
144	struct element_t *wcnext;
145	struct element_t *wclast;
146	};
147
148	/ Special element value. /
149	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153	/ Data type for collating symbol. /
154	struct symbol_t
155	{
156	const char *name;
157
158	/ Point to place in the order list. /
159	struct element_t *order;
160
161	/ Where does the definition come from. /
162	const char *file;
163	size_t line;
164	};
165
166	/ Sparse table of struct element_t . /*
167	#define TABLE wchead_table
168	#define ELEMENT struct element_t *
169	#define DEFAULT NULL
170	#define ITERATE
171	#define NO_ADD_LOCALE
172	#include "3level.h"
173
174	/ Sparse table of int32_t. /
175	#define TABLE collidx_table
176	#define ELEMENT int32_t
177	#define DEFAULT 0
178	#include "3level.h"
179
180	/ Sparse table of uint32_t. /
181	#define TABLE collseq_table
182	#define ELEMENT uint32_t
183	#define DEFAULT ~((uint32_t) 0)
184	#include "3level.h"
185
186
187	/ Simple name list for the preprocessor. /
188	struct name_list
189	{
190	struct name_list *next;
191	char str[`0`];
192	};
193
194
195	/ The real definition of the struct for the LC_COLLATE locale. /
196	struct locale_collate_t
197	{
198	int col_weight_max;
199	int cur_weight_max;
200
201	/ List of known scripts. /
202	struct section_list *known_sections;
203	/ List of used sections. /
204	struct section_list *sections;
205	/ Current section using definition. /
206	struct section_list *current_section;
207	/ There always can be an unnamed section. /
208	struct section_list unnamed_section;
209	/ Flag whether the unnamed section has been defined. /
210	bool unnamed_section_defined;
211	/ To make handling of errors easier we have another section. /
212	struct section_list error_section;
213	/ Sometimes we are defining the values for collating symbols before*
214	the first actual section. /*
215	struct section_list symbol_section;
216
217	/ Start of the order list. /
218	struct element_t *start;
219
220	/ The undefined element. /
221	struct element_t undefined;
222
223	/ This is the cursor for `reorder_after' insertions. /
224	struct element_t *cursor;
225
226	/ This value is used when handling ellipsis. /
227	struct element_t ellipsis_weight;
228
229	/ Known collating elements. /
230	hash_table elem_table;
231
232	/ Known collating symbols. /
233	hash_table sym_table;
234
235	/ Known collation sequences. /
236	hash_table seq_table;
237
238	struct obstack mempool;
239
240	/ The LC_COLLATE category is a bit special as it is sometimes possible*
241	that the definitions from more than one input file contains information.
242	Therefore we keep all relevant input in a list. /*
243	struct locale_collate_t *next;
244
245	/ Arrays with heads of the list for each of the leading bytes in*
246	the multibyte sequences. /*
247	struct element_t *mbheads[`256`];
248
249	/ Arrays with heads of the list for each of the leading bytes in*
250	the multibyte sequences. /*
251	struct wchead_table wcheads;
252
253	/ The arrays with the collation sequence order. /
254	unsigned char mbseqorder[`256`];
255	struct collseq_table wcseqorder;
256
257	/ State of the preprocessor. /
258	enum
259	{
260	else_none = `0`,
261	else_ignore,
262	else_seen
263	}
264	else_action;
265	};
266
267
268	/ We have a few global variables which are used for reading all*
269	LC_COLLATE category descriptions in all files. /*
270	static uint32_t nrules;
271
272	/ List of defined preprocessor symbols. /
273	static struct name_list *defined;
274
275
276	/ We need UTF-8 encoding of numbers. /
277	static inline int
278	__attribute ((always_inline))
279	utf8_encode (char buf, int* val)
280	{
281	int retval;
282
283	if (val < `0x80`)
284	{
285	buf++ = (char*) val;
286	retval = `1`;
287	}
288	else
289	{
290	int step;
291
292	for (step = `2`; step < `6`; ++step)
293	if ((val & (~(uint32_t)`0` << (`5` * step + `1`))) == `0`)
294	break;
295	retval = step;
296
297	buf = (unsigned* char) (~`0xff` >> step);
298	--step;
299	do
300	{
301	buf[step] = `0x80` \| (val & `0x3f`);
302	val >>= `6`;
303	}
304	while (--step > `0`);
305	*buf \|= val;
306	}
307
308	return retval;
309	}
310
311
312	static struct section_list *
313	make_seclist_elem (struct locale_collate_t collate, const* char *string,
314	struct section_list *next)
315	{
316	struct section_list *newp;
317
318	newp = (struct section_list *) obstack_alloc (&collate->mempool,
319	sizeof (*newp));
320	newp->next = next;
321	newp->name = string;
322	newp->first = NULL;
323	newp->last = NULL;
324
325	return newp;
326	}
327
328
329	static struct element_t *
330	new_element (struct locale_collate_t collate, const* char *mbs, size_t mbslen,
331	const uint32_t wcs, const* char *name, size_t namelen,
332	int is_character)
333	{
334	struct element_t *newp;
335
336	newp = (struct element_t *) obstack_alloc (&collate->mempool,
337	sizeof (*newp));
338	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
339	name, namelen);
340	if (mbs != NULL)
341	{
342	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
343	newp->nmbs = mbslen;
344	}
345	else
346	{
347	newp->mbs = NULL;
348	newp->nmbs = `0`;
349	}
350	if (wcs != NULL)
351	{
352	size_t nwcs = wcslen ((wchar_t *) wcs);
353	uint32_t zero = `0`;
354	/ Handle <U0000> as a single character. /
355	if (nwcs == `0`)
356	nwcs = `1`;
357	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
358	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
359	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
360	newp->nwcs = nwcs;
361	}
362	else
363	{
364	newp->wcs = NULL;
365	newp->nwcs = `0`;
366	}
367	newp->mborder = NULL;
368	newp->wcorder = `0`;
369	newp->used_in_level = `0`;
370	newp->is_character = is_character;
371
372	/ Will be assigned later. XXX /
373	newp->mbseqorder = `0`;
374	newp->wcseqorder = `0`;
375
376	/ Will be allocated later. /
377	newp->weights = NULL;
378
379	newp->file = NULL;
380	newp->line = `0`;
381
382	newp->section = collate->current_section;
383
384	newp->last = NULL;
385	newp->next = NULL;
386
387	newp->mbnext = NULL;
388	newp->mblast = NULL;
389
390	newp->wcnext = NULL;
391	newp->wclast = NULL;
392
393	return newp;
394	}
395
396
397	static struct symbol_t *
398	new_symbol (struct locale_collate_t collate, const* char *name, size_t len)
399	{
400	struct symbol_t *newp;
401
402	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof* (*newp));
403
404	newp->name = obstack_copy0 (&collate->mempool, name, len);
405	newp->order = NULL;
406
407	newp->file = NULL;
408	newp->line = `0`;
409
410	return newp;
411	}
412
413
414	/ Test whether this name is already defined somewhere. /
415	static int
416	check_duplicate (struct linereader ldfile, struct* locale_collate_t *collate,
417	const struct charmap_t *charmap,
418	struct repertoire_t repertoire, const* char *symbol,
419	size_t symbol_len)
420	{
421	void *ignore = NULL;
422
423	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == `0`)
424	{
425	lr_error (ldfile, _("`%.*s' already defined in charmap"),
426	(int) symbol_len, symbol);
427	return `1`;
428	}
429
430	if (repertoire != NULL
431	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
432	== `0`))
433	{
434	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
435	(int) symbol_len, symbol);
436	return `1`;
437	}
438
439	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == `0`)
440	{
441	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
442	(int) symbol_len, symbol);
443	return `1`;
444	}
445
446	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == `0`)
447	{
448	lr_error (ldfile, _("`%.*s' already defined as collating element"),
449	(int) symbol_len, symbol);
450	return `1`;
451	}
452
453	return `0`;
454	}
455
456
457	/ Read the direction specification. /
458	static void
459	read_directions (struct linereader ldfile, struct* token *arg,
460	const struct charmap_t *charmap,
461	struct repertoire_t repertoire, struct* localedef_t *result)
462	{
463	int cnt = `0`;
464	int max = nrules ?: `10`;
465	enum coll_sort_rule rules = calloc (max, sizeof* (*rules));
466	int warned = `0`;
467	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
468
469	while (`1`)
470	{
471	int valid = `0`;
472
473	if (arg->tok == tok_forward)
474	{
475	if (rules[cnt] & sort_backward)
476	{
477	if (! warned)
478	{
479	lr_error (ldfile, _("\
480	%s: `forward' and `backward' are mutually excluding each other"),
481	"LC_COLLATE");
482	warned = `1`;
483	}
484	}
485	else if (rules[cnt] & sort_forward)
486	{
487	if (! warned)
488	{
489	lr_error (ldfile, _("\
490	%s: `%s' mentioned more than once in definition of weight %d"),
491	"LC_COLLATE", "forward", cnt + `1`);
492	}
493	}
494	else
495	rules[cnt] \|= sort_forward;
496
497	valid = `1`;
498	}
499	else if (arg->tok == tok_backward)
500	{
501	if (rules[cnt] & sort_forward)
502	{
503	if (! warned)
504	{
505	lr_error (ldfile, _("\
506	%s: `forward' and `backward' are mutually excluding each other"),
507	"LC_COLLATE");
508	warned = `1`;
509	}
510	}
511	else if (rules[cnt] & sort_backward)
512	{
513	if (! warned)
514	{
515	lr_error (ldfile, _("\
516	%s: `%s' mentioned more than once in definition of weight %d"),
517	"LC_COLLATE", "backward", cnt + `1`);
518	}
519	}
520	else
521	rules[cnt] \|= sort_backward;
522
523	valid = `1`;
524	}
525	else if (arg->tok == tok_position)
526	{
527	if (rules[cnt] & sort_position)
528	{
529	if (! warned)
530	{
531	lr_error (ldfile, _("\
532	%s: `%s' mentioned more than once in definition of weight %d"),
533	"LC_COLLATE", "position", cnt + `1`);
534	}
535	}
536	else
537	rules[cnt] \|= sort_position;
538
539	valid = `1`;
540	}
541
542	if (valid)
543	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
544
545	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
546	\|\| arg->tok == tok_semicolon)
547	{
548	if (! valid && ! warned)
549	{
550	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
551	warned = `1`;
552	}
553
554	/ See whether we have to increment the counter. /
555	if (arg->tok != tok_comma && rules[cnt] != `0`)
556	{
557	/ Add the default `forward' if we have seen only `position'. /
558	if (rules[cnt] == sort_position)
559	rules[cnt] = sort_position \| sort_forward;
560
561	++cnt;
562	}
563
564	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
565	/ End of line or file, so we exit the loop. /
566	break;
567
568	if (nrules == `0`)
569	{
570	/ See whether we have enough room in the array. /
571	if (cnt == max)
572	{
573	max += `10`;
574	rules = (enum coll_sort_rule *) xrealloc (rules,
575	max
576	* sizeof (*rules));
577	memset (&rules[cnt], `'\0'`, (max - cnt) * sizeof (*rules));
578	}
579	}
580	else
581	{
582	if (cnt == nrules)
583	{
584	/ There must not be any more rule. /
585	if (! warned)
586	{
587	lr_error (ldfile, _("\
588	%s: too many rules; first entry only had %d"),
589	"LC_COLLATE", nrules);
590	warned = `1`;
591	}
592
593	lr_ignore_rest (ldfile, `0`);
594	break;
595	}
596	}
597	}
598	else
599	{
600	if (! warned)
601	{
602	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
603	warned = `1`;
604	}
605	}
606
607	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
608	}
609
610	if (nrules == `0`)
611	{
612	/ Now we know how many rules we have. /
613	nrules = cnt;
614	rules = (enum coll_sort_rule *) xrealloc (rules,
615	nrules * sizeof (*rules));
616	}
617	else
618	{
619	if (cnt < nrules)
620	{
621	/ Not enough rules in this specification. /
622	if (! warned)
623	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
624
625	do
626	rules[cnt] = sort_forward;
627	while (++cnt < nrules);
628	}
629	}
630
631	collate->current_section->rules = rules;
632	}
633
634
635	static struct element_t *
636	find_element (struct linereader ldfile, struct* locale_collate_t *collate,
637	const char *str, size_t len)
638	{
639	void *result = NULL;
640
641	/ Search for the entries among the collation sequences already define. /
642	if (find_entry (&collate->seq_table, str, len, &result) != `0`)
643	{
644	/ Nope, not define yet. So we see whether it is a*
645	collation symbol. /*
646	void *ptr;
647
648	if (find_entry (&collate->sym_table, str, len, &ptr) == `0`)
649	{
650	/ It's a collation symbol. /
651	struct symbol_t sym = (struct* symbol_t *) ptr;
652	result = sym->order;
653
654	if (result == NULL)
655	result = sym->order = new_element (collate, NULL, `0`, NULL,
656	NULL, `0`, `0`);
657	}
658	else if (find_entry (&collate->elem_table, str, len, &result) != `0`)
659	{
660	/ It's also no collation element. So it is a character*
661	element defined later. /*
662	result = new_element (collate, NULL, `0`, NULL, str, len, `1`);
663	/ Insert it into the sequence table. /
664	insert_entry (&collate->seq_table, str, len, result);
665	}
666	}
667
668	return (struct element_t *) result;
669	}
670
671
672	static void
673	unlink_element (struct locale_collate_t *collate)
674	{
675	if (collate->cursor == collate->start)
676	{
677	assert (collate->cursor->next == NULL);
678	assert (collate->cursor->last == NULL);
679	collate->cursor = NULL;
680	}
681	else
682	{
683	if (collate->cursor->next != NULL)
684	collate->cursor->next->last = collate->cursor->last;
685	if (collate->cursor->last != NULL)
686	collate->cursor->last->next = collate->cursor->next;
687	collate->cursor = collate->cursor->last;
688	}
689	}
690
691
692	static void
693	insert_weights (struct linereader ldfile, struct* element_t *elem,
694	const struct charmap_t *charmap,
695	struct repertoire_t repertoire, struct* localedef_t *result,
696	enum token_t ellipsis)
697	{
698	int weight_cnt;
699	struct token *arg;
700	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
701
702	/ Initialize all the fields. /
703	elem->file = ldfile->fname;
704	elem->line = ldfile->lineno;
705
706	elem->last = collate->cursor;
707	elem->next = collate->cursor ? collate->cursor->next : NULL;
708	if (collate->cursor != NULL && collate->cursor->next != NULL)
709	collate->cursor->next->last = elem;
710	if (collate->cursor != NULL)
711	collate->cursor->next = elem;
712	if (collate->start == NULL)
713	{
714	assert (collate->cursor == NULL);
715	collate->start = elem;
716	}
717
718	elem->section = collate->current_section;
719
720	if (collate->current_section->first == NULL)
721	collate->current_section->first = elem;
722	if (collate->current_section->last == collate->cursor)
723	collate->current_section->last = elem;
724
725	collate->cursor = elem;
726
727	elem->weights = (struct element_list_t *)
728	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
729	memset (elem->weights, `'\0'`, nrules * sizeof (struct element_list_t));
730
731	weight_cnt = `0`;
732
733	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
734	do
735	{
736	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
737	break;
738
739	if (arg->tok == tok_ignore)
740	{
741	/ The weight for this level has to be ignored. We use the*
742	null pointer to indicate this. /*
743	elem->weights[weight_cnt].w = (struct element_t **)
744	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
745	elem->weights[weight_cnt].w[`0`] = NULL;
746	elem->weights[weight_cnt].cnt = `1`;
747	}
748	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
749	{
750	char ucs4str[`10`];
751	struct element_t *val;
752	char *symstr;
753	size_t symlen;
754
755	if (arg->tok == tok_bsymbol)
756	{
757	symstr = arg->val.str.startmb;
758	symlen = arg->val.str.lenmb;
759	}
760	else
761	{
762	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
763	symstr = ucs4str;
764	symlen = `9`;
765	}
766
767	val = find_element (ldfile, collate, symstr, symlen);
768	if (val == NULL)
769	break;
770
771	elem->weights[weight_cnt].w = (struct element_t **)
772	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
773	elem->weights[weight_cnt].w[`0`] = val;
774	elem->weights[weight_cnt].cnt = `1`;
775	}
776	else if (arg->tok == tok_string)
777	{
778	/ Split the string up in the individual characters and put*
779	the element definitions in the list. /*
780	const char *cp = arg->val.str.startmb;
781	int cnt = `0`;
782	struct element_t *charelem;
783	struct element_t **weights = NULL;
784	int max = `0`;
785
786	if (*cp == `'\0'`)
787	{
788	lr_error (ldfile, _("%s: empty weight string not allowed"),
789	"LC_COLLATE");
790	lr_ignore_rest (ldfile, `0`);
791	break;
792	}
793
794	do
795	{
796	if (*cp == `'<'`)
797	{
798	/ Ahh, it's a bsymbol or an UCS4 value. If it's*
799	the latter we have to unify the name. /*
800	const char *startp = ++cp;
801	size_t len;
802
803	while (*cp != `'>'`)
804	{
805	if (*cp == ldfile->escape_char)
806	++cp;
807	if (*cp == `'\0'`)
808	/ It's a syntax error. /
809	goto syntax;
810
811	++cp;
812	}
813
814	if (cp - startp == `5` && startp[`0`] == `'U'`
815	&& isxdigit (startp[`1`]) && isxdigit (startp[`2`])
816	&& isxdigit (startp[`3`]) && isxdigit (startp[`4`]))
817	{
818	unsigned int ucs4 = strtoul (startp + `1`, NULL, `16`);
819	char *newstr;
820
821	newstr = (char *) xmalloc (`10`);
822	snprintf (newstr, `10`, "U%08X", ucs4);
823	startp = newstr;
824
825	len = `9`;
826	}
827	else
828	len = cp - startp;
829
830	charelem = find_element (ldfile, collate, startp, len);
831	++cp;
832	}
833	else
834	{
835	/ People really shouldn't use characters directly in*
836	the string. Especially since it's not really clear
837	what this means. We interpret all characters in the
838	string as if that would be bsymbols. Otherwise we
839	would have to match back to bsymbols somehow and this
840	is normally not what people normally expect. /*
841	charelem = find_element (ldfile, collate, cp++, `1`);
842	}
843
844	if (charelem == NULL)
845	{
846	/ We ignore the rest of the line. /
847	lr_ignore_rest (ldfile, `0`);
848	break;
849	}
850
851	/ Add the pointer. /
852	if (cnt >= max)
853	{
854	struct element_t **newp;
855	max += `10`;
856	newp = (struct element_t **)
857	alloca (max * sizeof (struct element_t *));
858	memcpy (newp, weights, cnt * sizeof (struct element_t *));
859	weights = newp;
860	}
861	weights[cnt++] = charelem;
862	}
863	while (*cp != `'\0'`);
864
865	/ Now store the information. /
866	elem->weights[weight_cnt].w = (struct element_t **)
867	obstack_alloc (&collate->mempool,
868	cnt * sizeof (struct element_t *));
869	memcpy (elem->weights[weight_cnt].w, weights,
870	cnt * sizeof (struct element_t *));
871	elem->weights[weight_cnt].cnt = cnt;
872
873	/ We don't need the string anymore. /
874	free (arg->val.str.startmb);
875	}
876	else if (ellipsis != tok_none
877	&& (arg->tok == tok_ellipsis2
878	\|\| arg->tok == tok_ellipsis3
879	\|\| arg->tok == tok_ellipsis4))
880	{
881	/ It must be the same ellipsis as used in the initial column. /
882	if (arg->tok != ellipsis)
883	lr_error (ldfile, _("\
884	%s: weights must use the same ellipsis symbol as the name"),
885	"LC_COLLATE");
886
887	/ The weight for this level will depend on the element*
888	iterating over the range. Put a placeholder. /*
889	elem->weights[weight_cnt].w = (struct element_t **)
890	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
891	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
892	elem->weights[weight_cnt].cnt = `1`;
893	}
894	else
895	{
896	syntax:
897	/ It's a syntax error. /
898	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
899	lr_ignore_rest (ldfile, `0`);
900	break;
901	}
902
903	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
904	/ This better should be the end of the line or a semicolon. /
905	if (arg->tok == tok_semicolon)
906	/ OK, ignore this and read the next token. /
907	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
908	else if (arg->tok != tok_eof && arg->tok != tok_eol)
909	{
910	/ It's a syntax error. /
911	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
912	lr_ignore_rest (ldfile, `0`);
913	break;
914	}
915	}
916	while (++weight_cnt < nrules);
917
918	if (weight_cnt < nrules)
919	{
920	/ This means the rest of the line uses the current element as*
921	the weight. /*
922	do
923	{
924	elem->weights[weight_cnt].w = (struct element_t **)
925	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
926	if (ellipsis == tok_none)
927	elem->weights[weight_cnt].w[`0`] = elem;
928	else
929	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
930	elem->weights[weight_cnt].cnt = `1`;
931	}
932	while (++weight_cnt < nrules);
933	}
934	else
935	{
936	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
937	{
938	/ Too many rule values. /
939	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
940	lr_ignore_rest (ldfile, `0`);
941	}
942	else
943	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
944	}
945	}
946
947
948	static int
949	insert_value (struct linereader ldfile, const* char *symstr, size_t symlen,
950	const struct charmap_t charmap, struct* repertoire_t *repertoire,
951	struct localedef_t *result)
952	{
953	/ First find out what kind of symbol this is. /
954	struct charseq *seq;
955	uint32_t wc;
956	struct element_t *elem = NULL;
957	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
958
959	/ Try to find the character in the charmap. /
960	seq = charmap_find_value (charmap, symstr, symlen);
961
962	/ Determine the wide character. /
963	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
964	{
965	wc = repertoire_find_value (repertoire, symstr, symlen);
966	if (seq != NULL)
967	seq->ucs4 = wc;
968	}
969	else
970	wc = seq->ucs4;
971
972	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
973	{
974	/ It's no character, so look through the collation elements and*
975	symbol list. /*
976	void *ptr = elem;
977	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != `0`)
978	{
979	void *result;
980	struct symbol_t *sym = NULL;
981
982	/ It's also collation element. Therefore it's either a*
983	collating symbol or it's a character which is not
984	supported by the character set. In the later case we
985	simply create a dummy entry. /*
986	if (find_entry (&collate->sym_table, symstr, symlen, &result) == `0`)
987	{
988	/ It's a collation symbol. /
989	sym = (struct symbol_t *) result;
990
991	elem = sym->order;
992	}
993
994	if (elem == NULL)
995	{
996	elem = new_element (collate, NULL, `0`, NULL, symstr, symlen, `0`);
997
998	if (sym != NULL)
999	sym->order = elem;
1000	else
1001	/ Enter a fake element in the sequence table. This*
1002	won't cause anything in the output since there is
1003	no multibyte or wide character associated with
1004	it. /*
1005	insert_entry (&collate->seq_table, symstr, symlen, elem);
1006	}
1007	}
1008	else
1009	/ Copy the result back. /
1010	elem = ptr;
1011	}
1012	else
1013	{
1014	/ Otherwise the symbols stands for a character. /
1015	void *ptr = elem;
1016	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != `0`)
1017	{
1018	uint32_t wcs[`2`] = { wc, `0` };
1019
1020	/ We have to allocate an entry. /
1021	elem = new_element (collate,
1022	seq != NULL ? (char *) seq->bytes : NULL,
1023	seq != NULL ? seq->nbytes : `0`,
1024	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1025	symstr, symlen, `1`);
1026
1027	/ And add it to the table. /
1028	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != `0`)
1029	/ This cannot happen. /
1030	assert (! "Internal error");
1031	}
1032	else
1033	{
1034	/ Copy the result back. /
1035	elem = ptr;
1036
1037	/ Maybe the character was used before the definition. In this case*
1038	we have to insert the byte sequences now. /*
1039	if (elem->mbs == NULL && seq != NULL)
1040	{
1041	elem->mbs = obstack_copy0 (&collate->mempool,
1042	seq->bytes, seq->nbytes);
1043	elem->nmbs = seq->nbytes;
1044	}
1045
1046	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1047	{
1048	uint32_t wcs[`2`] = { wc, `0` };
1049
1050	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1051	elem->nwcs = `1`;
1052	}
1053	}
1054	}
1055
1056	/ Test whether this element is not already in the list. /
1057	if (elem->next != NULL \|\| elem == collate->cursor)
1058	{
1059	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1060	(int) symlen, symstr, elem->file, elem->line);
1061	lr_ignore_rest (ldfile, `0`);
1062	return `1`;
1063	}
1064
1065	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1066
1067	return `0`;
1068	}
1069
1070
1071	static void
1072	handle_ellipsis (struct linereader ldfile, const* char *symstr, size_t symlen,
1073	enum token_t ellipsis, const struct charmap_t *charmap,
1074	struct repertoire_t *repertoire,
1075	struct localedef_t *result)
1076	{
1077	struct element_t *startp;
1078	struct element_t *endp;
1079	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1080
1081	/ Unlink the entry added for the ellipsis. /
1082	unlink_element (collate);
1083	startp = collate->cursor;
1084
1085	/ Process and add the end-entry. /
1086	if (symstr != NULL
1087	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1088	/ Something went wrong with inserting the to-value. This means*
1089	we cannot process the ellipsis. /*
1090	return;
1091
1092	/ Reset the cursor. /
1093	collate->cursor = startp;
1094
1095	/ Now we have to handle many different situations:*
1096	- we have to distinguish between the three different ellipsis forms
1097	- the is the ellipsis at the beginning, in the middle, or at the end.
1098	*/
1099	endp = collate->cursor->next;
1100	assert (symstr == NULL \|\| endp != NULL);
1101
1102	/ XXX The following is probably very wrong since also collating symbols*
1103	can appear in ranges. But do we want/can refine the test for that? /*
1104	#if 0
1105	/ Both, the start and the end symbol, must stand for characters. /
1106	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
1107	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
1108	{
1109	lr_error (ldfile, _("\
1110	%s: the start and the end symbol of a range must stand for characters"),
1111	"LC_COLLATE");
1112	return;
1113	}
1114	#endif
1115
1116	if (ellipsis == tok_ellipsis3)
1117	{
1118	/ One requirement we make here: the length of the byte*
1119	sequences for the first and end character must be the same.
1120	This is mainly to prevent unwanted effects and this is often
1121	not what is wanted. /*
1122	size_t len = (startp->mbs != NULL ? startp->nmbs
1123	: (endp->mbs != NULL ? endp->nmbs : `0`));
1124	char mbcnt[len + `1`];
1125	char mbend[len + `1`];
1126
1127	/ Well, this should be caught somewhere else already. Just to*
1128	make sure. /*
1129	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[`1`] == `0`);
1130	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[`1`] == `0`);
1131
1132	if (startp != NULL && endp != NULL
1133	&& startp->mbs != NULL && endp->mbs != NULL
1134	&& startp->nmbs != endp->nmbs)
1135	{
1136	lr_error (ldfile, _("\
1137	%s: byte sequences of first and last character must have the same length"),
1138	"LC_COLLATE");
1139	return;
1140	}
1141
1142	/ Determine whether we have to generate multibyte sequences. /
1143	if ((startp == NULL \|\| startp->mbs != NULL)
1144	&& (endp == NULL \|\| endp->mbs != NULL))
1145	{
1146	int cnt;
1147	int ret;
1148
1149	/ Prepare the beginning byte sequence. This is either from the*
1150	beginning byte sequence or it is all nulls if it was an
1151	initial ellipsis. /*
1152	if (startp == NULL \|\| startp->mbs == NULL)
1153	memset (mbcnt, `'\0'`, len);
1154	else
1155	{
1156	memcpy (mbcnt, startp->mbs, len);
1157
1158	/ And increment it so that the value is the first one we will*
1159	try to insert. /*
1160	for (cnt = len - `1`; cnt >= `0`; --cnt)
1161	if (++mbcnt[cnt] != `'\0'`)
1162	break;
1163	}
1164	mbcnt[len] = `'\0'`;
1165
1166	/ And the end sequence. /
1167	if (endp == NULL \|\| endp->mbs == NULL)
1168	memset (mbend, `'\0'`, len);
1169	else
1170	memcpy (mbend, endp->mbs, len);
1171	mbend[len] = `'\0'`;
1172
1173	/ Test whether we have a correct range. /
1174	ret = memcmp (mbcnt, mbend, len);
1175	if (ret >= `0`)
1176	{
1177	if (ret > `0`)
1178	lr_error (ldfile, _("%s: byte sequence of first character of \
1179	range is not lower than that of the last character"), "LC_COLLATE");
1180	return;
1181	}
1182
1183	/ Generate the byte sequences data. /
1184	while (`1`)
1185	{
1186	struct charseq *seq;
1187
1188	/ Quite a bit of work ahead. We have to find the character*
1189	definition for the byte sequence and then determine the
1190	wide character belonging to it. /*
1191	seq = charmap_find_symbol (charmap, mbcnt, len);
1192	if (seq != NULL)
1193	{
1194	struct element_t *elem;
1195	size_t namelen;
1196
1197	/ I don't think this can ever happen. /
1198	assert (seq->name != NULL);
1199	namelen = strlen (seq->name);
1200
1201	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1202	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1203	namelen);
1204
1205	/ Now we are ready to insert the new value in the*
1206	sequence. Find out whether the element is
1207	already known. /*
1208	void *ptr;
1209	if (find_entry (&collate->seq_table, seq->name, namelen,
1210	&ptr) != `0`)
1211	{
1212	uint32_t wcs[`2`] = { seq->ucs4, `0` };
1213
1214	/ We have to allocate an entry. /
1215	elem = new_element (collate, mbcnt, len,
1216	seq->ucs4 == ILLEGAL_CHAR_VALUE
1217	? NULL : wcs, seq->name,
1218	namelen, `1`);
1219
1220	/ And add it to the table. /
1221	if (insert_entry (&collate->seq_table, seq->name,
1222	namelen, elem) != `0`)
1223	/ This cannot happen. /
1224	assert (! "Internal error");
1225	}
1226	else
1227	/ Copy the result. /
1228	elem = ptr;
1229
1230	/ Test whether this element is not already in the list. /
1231	if (elem->next != NULL \|\| (collate->cursor != NULL
1232	&& elem->next == collate->cursor))
1233	{
1234	lr_error (ldfile, _("\
1235	order for `%.*s' already defined at %s:%Zu"),
1236	(int) namelen, seq->name,
1237	elem->file, elem->line);
1238	goto increment;
1239	}
1240
1241	/ Enqueue the new element. /
1242	elem->last = collate->cursor;
1243	if (collate->cursor == NULL)
1244	elem->next = NULL;
1245	else
1246	{
1247	elem->next = collate->cursor->next;
1248	elem->last->next = elem;
1249	if (elem->next != NULL)
1250	elem->next->last = elem;
1251	}
1252	if (collate->start == NULL)
1253	{
1254	assert (collate->cursor == NULL);
1255	collate->start = elem;
1256	}
1257	collate->cursor = elem;
1258
1259	/ Add the weight value. We take them from the*
1260	`ellipsis_weights' member of `collate'. /*
1261	elem->weights = (struct element_list_t *)
1262	obstack_alloc (&collate->mempool,
1263	nrules * sizeof (struct element_list_t));
1264	for (cnt = `0`; cnt < nrules; ++cnt)
1265	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1266	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1267	== ELEMENT_ELLIPSIS2))
1268	{
1269	elem->weights[cnt].w = (struct element_t **)
1270	obstack_alloc (&collate->mempool,
1271	sizeof (struct element_t *));
1272	elem->weights[cnt].w[`0`] = elem;
1273	elem->weights[cnt].cnt = `1`;
1274	}
1275	else
1276	{
1277	/ Simply use the weight from `ellipsis_weight'. /
1278	elem->weights[cnt].w =
1279	collate->ellipsis_weight.weights[cnt].w;
1280	elem->weights[cnt].cnt =
1281	collate->ellipsis_weight.weights[cnt].cnt;
1282	}
1283	}
1284
1285	/ Increment for the next round. /
1286	increment:
1287	for (cnt = len - `1`; cnt >= `0`; --cnt)
1288	if (++mbcnt[cnt] != `'\0'`)
1289	break;
1290
1291	/ Find out whether this was all. /
1292	if (cnt < `0` \|\| memcmp (mbcnt, mbend, len) >= `0`)
1293	/ Yep, that's all. /
1294	break;
1295	}
1296	}
1297	}
1298	else
1299	{
1300	/ For symbolic range we naturally must have a beginning and an*
1301	end specified by the user. /*
1302	if (startp == NULL)
1303	lr_error (ldfile, _("\
1304	%s: symbolic range ellipsis must not directly follow `order_start'"),
1305	"LC_COLLATE");
1306	else if (endp == NULL)
1307	lr_error (ldfile, _("\
1308	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1309	"LC_COLLATE");
1310	else
1311	{
1312	/ Determine the range. To do so we have to determine the*
1313	common prefix of the both names and then the numeric
1314	values of both ends. /*
1315	size_t lenfrom = strlen (startp->name);
1316	size_t lento = strlen (endp->name);
1317	char buf[lento + `1`];
1318	int preflen = `0`;
1319	long int from;
1320	long int to;
1321	char *cp;
1322	int base = ellipsis == tok_ellipsis2 ? `16` : `10`;
1323
1324	if (lenfrom != lento)
1325	{
1326	invalid_range:
1327	lr_error (ldfile, _("\
1328	`%s' and `%.*s' are not valid names for symbolic range"),
1329	startp->name, (int) lento, endp->name);
1330	return;
1331	}
1332
1333	while (startp->name[preflen] == endp->name[preflen])
1334	if (startp->name[preflen] == `'\0'`)
1335	/ Nothing to be done. The start and end point are identical*
1336	and while inserting the end point we have already given
1337	the user an error message. /*
1338	return;
1339	else
1340	++preflen;
1341
1342	errno = `0`;
1343	from = strtol (startp->name + preflen, &cp, base);
1344	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1345	goto invalid_range;
1346
1347	errno = `0`;
1348	to = strtol (endp->name + preflen, &cp, base);
1349	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1350	goto invalid_range;
1351
1352	/ Copy the prefix. /
1353	memcpy (buf, startp->name, preflen);
1354
1355	/ Loop over all values. /
1356	for (++from; from < to; ++from)
1357	{
1358	struct element_t *elem = NULL;
1359	struct charseq *seq;
1360	uint32_t wc;
1361	int cnt;
1362
1363	/ Generate the name. /
1364	sprintf (buf + preflen, base == `10` ? "%0ld" : "%0lX",
1365	(int) (lenfrom - preflen), from);
1366
1367	/ Look whether this name is already defined. /
1368	void *ptr;
1369	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == `0`)
1370	{
1371	/ Copy back the result. /
1372	elem = ptr;
1373
1374	if (elem->next != NULL \|\| (collate->cursor != NULL
1375	&& elem->next == collate->cursor))
1376	{
1377	lr_error (ldfile, _("\
1378	%s: order for `%.*s' already defined at %s:%Zu"),
1379	"LC_COLLATE", (int) lenfrom, buf,
1380	elem->file, elem->line);
1381	continue;
1382	}
1383
1384	if (elem->name == NULL)
1385	{
1386	lr_error (ldfile, _("%s: `%s' must be a character"),
1387	"LC_COLLATE", buf);
1388	continue;
1389	}
1390	}
1391
1392	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
1393	{
1394	/ Search for a character of this name. /
1395	seq = charmap_find_value (charmap, buf, lenfrom);
1396	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1397	{
1398	wc = repertoire_find_value (repertoire, buf, lenfrom);
1399
1400	if (seq != NULL)
1401	seq->ucs4 = wc;
1402	}
1403	else
1404	wc = seq->ucs4;
1405
1406	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1407	/ We don't know anything about a character with this*
1408	name. XXX Should we warn? /*
1409	continue;
1410
1411	if (elem == NULL)
1412	{
1413	uint32_t wcs[`2`] = { wc, `0` };
1414
1415	/ We have to allocate an entry. /
1416	elem = new_element (collate,
1417	seq != NULL
1418	? (char *) seq->bytes : NULL,
1419	seq != NULL ? seq->nbytes : `0`,
1420	wc == ILLEGAL_CHAR_VALUE
1421	? NULL : wcs, buf, lenfrom, `1`);
1422	}
1423	else
1424	{
1425	/ Update the element. /
1426	if (seq != NULL)
1427	{
1428	elem->mbs = obstack_copy0 (&collate->mempool,
1429	seq->bytes, seq->nbytes);
1430	elem->nmbs = seq->nbytes;
1431	}
1432
1433	if (wc != ILLEGAL_CHAR_VALUE)
1434	{
1435	uint32_t zero = `0`;
1436
1437	obstack_grow (&collate->mempool,
1438	&wc, sizeof (uint32_t));
1439	obstack_grow (&collate->mempool,
1440	&zero, sizeof (uint32_t));
1441	elem->wcs = obstack_finish (&collate->mempool);
1442	elem->nwcs = `1`;
1443	}
1444	}
1445
1446	elem->file = ldfile->fname;
1447	elem->line = ldfile->lineno;
1448	elem->section = collate->current_section;
1449	}
1450
1451	/ Enqueue the new element. /
1452	elem->last = collate->cursor;
1453	elem->next = collate->cursor->next;
1454	elem->last->next = elem;
1455	if (elem->next != NULL)
1456	elem->next->last = elem;
1457	collate->cursor = elem;
1458
1459	/ Now add the weights. They come from the `ellipsis_weights'*
1460	member of `collate'. /*
1461	elem->weights = (struct element_list_t *)
1462	obstack_alloc (&collate->mempool,
1463	nrules * sizeof (struct element_list_t));
1464	for (cnt = `0`; cnt < nrules; ++cnt)
1465	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1466	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1467	== ELEMENT_ELLIPSIS2))
1468	{
1469	elem->weights[cnt].w = (struct element_t **)
1470	obstack_alloc (&collate->mempool,
1471	sizeof (struct element_t *));
1472	elem->weights[cnt].w[`0`] = elem;
1473	elem->weights[cnt].cnt = `1`;
1474	}
1475	else
1476	{
1477	/ Simly use the weight from `ellipsis_weight'. /
1478	elem->weights[cnt].w =
1479	collate->ellipsis_weight.weights[cnt].w;
1480	elem->weights[cnt].cnt =
1481	collate->ellipsis_weight.weights[cnt].cnt;
1482	}
1483	}
1484	}
1485	}
1486	/ Move the cursor to the last entry in the ellipsis.*
1487	Subsequent operations need to start from the last entry. /*
1488	collate->cursor = endp;
1489	}
1490
1491
1492	static void
1493	collate_startup (struct linereader ldfile, struct* localedef_t *locale,
1494	struct localedef_t copy_locale, int* ignore_content)
1495	{
1496	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1497	{
1498	struct locale_collate_t *collate;
1499
1500	if (copy_locale == NULL)
1501	{
1502	collate = locale->categories[LC_COLLATE].collate =
1503	(struct locale_collate_t *)
1504	xcalloc (`1`, sizeof (struct locale_collate_t));
1505
1506	/ Init the various data structures. /
1507	init_hash (&collate->elem_table, `100`);
1508	init_hash (&collate->sym_table, `100`);
1509	init_hash (&collate->seq_table, `500`);
1510	obstack_init (&collate->mempool);
1511
1512	collate->col_weight_max = -`1`;
1513	}
1514	else
1515	/ Reuse the copy_locale's data structures. /
1516	collate = locale->categories[LC_COLLATE].collate =
1517	copy_locale->categories[LC_COLLATE].collate;
1518	}
1519
1520	ldfile->translate_strings = `0`;
1521	ldfile->return_widestr = `0`;
1522	}
1523
1524
1525	void
1526	collate_finish (struct localedef_t locale, const* struct charmap_t *charmap)
1527	{
1528	/ Now is the time when we can assign the individual collation*
1529	values for all the symbols. We have possibly different values
1530	for the wide- and the multibyte-character symbols. This is done
1531	since it might make a difference in the encoding if there is in
1532	some cases no multibyte-character but there are wide-characters.
1533	(The other way around it is not important since theencoded
1534	collation value in the wide-character case is 32 bits wide and
1535	therefore requires no encoding).
1536
1537	The lowest collation value assigned is 2. Zero is reserved for
1538	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1539	functions and 1 is used to separate the individual passes for the
1540	different rules.
1541
1542	We also have to construct is list with all the bytes/words which
1543	can come first in a sequence, followed by all the elements which
1544	also start with this byte/word. The order is reverse which has
1545	among others the important effect that longer strings are located
1546	first in the list. This is required for the output data since
1547	the algorithm used in `strcoll' etc depends on this.
1548
1549	The multibyte case is easy. We simply sort into an array with
1550	256 elements. /*
1551	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1552	int mbact[nrules];
1553	int wcact;
1554	int mbseqact;
1555	int wcseqact;
1556	struct element_t *runp;
1557	int i;
1558	int need_undefined = `0`;
1559	struct section_list *sect;
1560	int ruleidx;
1561	int nr_wide_elems = `0`;
1562
1563	if (collate == NULL)
1564	{
1565	/ No data, no check. Issue a warning. /
1566	record_warning (_("No definition for %s category found"),
1567	"LC_COLLATE");
1568	return;
1569	}
1570
1571	/ If this assertion is hit change the type in `element_t'. /
1572	assert (nrules <= sizeof (runp->used_in_level) * `8`);
1573
1574	/ Make sure that the `position' rule is used either in all sections*
1575	or in none. /*
1576	for (i = `0`; i < nrules; ++i)
1577	for (sect = collate->sections; sect != NULL; sect = sect->next)
1578	if (sect != collate->current_section
1579	&& sect->rules != NULL
1580	&& ((sect->rules[i] & sort_position)
1581	!= (collate->current_section->rules[i] & sort_position)))
1582	{
1583	record_error (`0`, `0`, _("\
1584	%s: `position' must be used for a specific level in all sections or none"),
1585	"LC_COLLATE");
1586	break;
1587	}
1588
1589	/ Find out which elements are used at which level. At the same*
1590	time we find out whether we have any undefined symbols. /*
1591	runp = collate->start;
1592	while (runp != NULL)
1593	{
1594	if (runp->mbs != NULL)
1595	{
1596	for (i = `0`; i < nrules; ++i)
1597	{
1598	int j;
1599
1600	for (j = `0`; j < runp->weights[i].cnt; ++j)
1601	/ A NULL pointer as the weight means IGNORE. /
1602	if (runp->weights[i].w[j] != NULL)
1603	{
1604	if (runp->weights[i].w[j]->weights == NULL)
1605	{
1606	record_error_at_line (`0`, `0`, runp->file, runp->line,
1607	_("symbol `%s' not defined"),
1608	runp->weights[i].w[j]->name);
1609
1610	need_undefined = `1`;
1611	runp->weights[i].w[j] = &collate->undefined;
1612	}
1613	else
1614	/ Set the bit for the level. /
1615	runp->weights[i].w[j]->used_in_level \|= `1` << i;
1616	}
1617	}
1618	}
1619
1620	/ Up to the next entry. /
1621	runp = runp->next;
1622	}
1623
1624	/ Walk through the list of defined sequences and assign weights. Also*
1625	create the data structure which will allow generating the single byte
1626	character based tables.
1627
1628	Since at each time only the weights for each of the rules are
1629	only compared to other weights for this rule it is possible to
1630	assign more compact weight values than simply counting all
1631	weights in sequence. We can assign weights from 3, one for each
1632	rule individually and only for those elements, which are actually
1633	used for this rule.
1634
1635	Why is this important? It is not for the wide char table. But
1636	it is for the singlebyte output since here larger numbers have to
1637	be encoded to make it possible to emit the value as a byte
1638	string. /*
1639	for (i = `0`; i < nrules; ++i)
1640	mbact[i] = `2`;
1641	wcact = `2`;
1642	mbseqact = `0`;
1643	wcseqact = `0`;
1644	runp = collate->start;
1645	while (runp != NULL)
1646	{
1647	/ Determine the order. /
1648	if (runp->used_in_level != `0`)
1649	{
1650	runp->mborder = (int *) obstack_alloc (&collate->mempool,
1651	nrules * sizeof (int));
1652
1653	for (i = `0`; i < nrules; ++i)
1654	if ((runp->used_in_level & (`1` << i)) != `0`)
1655	runp->mborder[i] = mbact[i]++;
1656	else
1657	runp->mborder[i] = `0`;
1658	}
1659
1660	if (runp->mbs != NULL)
1661	{
1662	struct element_t **eptr;
1663	struct element_t *lastp = NULL;
1664
1665	/ Find the point where to insert in the list. /
1666	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[`0`]];
1667	while (*eptr != NULL)
1668	{
1669	if ((*eptr)->nmbs < runp->nmbs)
1670	break;
1671
1672	if ((*eptr)->nmbs == runp->nmbs)
1673	{
1674	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1675
1676	if (c == `0`)
1677	{
1678	/ This should not happen. It means that we have*
1679	to symbols with the same byte sequence. It is
1680	of course an error. /*
1681	record_error_at_line (`0`, `0`, (*eptr)->file,
1682	(*eptr)->line,
1683	_("\
1684	symbol `%s' has the same encoding as"), (*eptr)->name);
1685
1686	record_error_at_line (`0`, `0`, runp->file, runp->line,
1687	_("symbol `%s'"), runp->name);
1688	goto dont_insert;
1689	}
1690	else if (c < `0`)
1691	/ Insert it here. /
1692	break;
1693	}
1694
1695	/ To the next entry. /
1696	lastp = *eptr;
1697	eptr = &(*eptr)->mbnext;
1698	}
1699
1700	/ Set the pointers. /
1701	runp->mbnext = *eptr;
1702	runp->mblast = lastp;
1703	if (*eptr != NULL)
1704	(*eptr)->mblast = runp;
1705	*eptr = runp;
1706	dont_insert:
1707	;
1708	}
1709
1710	if (runp->used_in_level)
1711	{
1712	runp->wcorder = wcact++;
1713
1714	/ We take the opportunity to count the elements which have*
1715	wide characters. /*
1716	++nr_wide_elems;
1717	}
1718
1719	if (runp->is_character)
1720	{
1721	if (runp->nmbs == `1`)
1722	collate->mbseqorder[((unsigned char *) runp->mbs)[`0`]] = mbseqact++;
1723
1724	runp->wcseqorder = wcseqact++;
1725	}
1726	else if (runp->mbs != NULL && runp->weights != NULL)
1727	/ This is for collation elements. /
1728	runp->wcseqorder = wcseqact++;
1729
1730	/ Up to the next entry. /
1731	runp = runp->next;
1732	}
1733
1734	/ Find out whether any of the `mbheads' entries is unset. In this*
1735	case we use the UNDEFINED entry. /*
1736	for (i = `1`; i < `256`; ++i)
1737	if (collate->mbheads[i] == NULL)
1738	{
1739	need_undefined = `1`;
1740	collate->mbheads[i] = &collate->undefined;
1741	}
1742
1743	/ Now to the wide character case. /
1744	collate->wcheads.p = `6`;
1745	collate->wcheads.q = `10`;
1746	wchead_table_init (&collate->wcheads);
1747
1748	collate->wcseqorder.p = `6`;
1749	collate->wcseqorder.q = `10`;
1750	collseq_table_init (&collate->wcseqorder);
1751
1752	/ Start adding. /
1753	runp = collate->start;
1754	while (runp != NULL)
1755	{
1756	if (runp->wcs != NULL)
1757	{
1758	struct element_t *e;
1759	struct element_t **eptr;
1760	struct element_t *lastp;
1761
1762	/ Insert the collation sequence value. /
1763	if (runp->is_character)
1764	collseq_table_add (&collate->wcseqorder, runp->wcs[`0`],
1765	runp->wcseqorder);
1766
1767	/ Find the point where to insert in the list. /
1768	e = wchead_table_get (&collate->wcheads, runp->wcs[`0`]);
1769	eptr = &e;
1770	lastp = NULL;
1771	while (*eptr != NULL)
1772	{
1773	if ((*eptr)->nwcs < runp->nwcs)
1774	break;
1775
1776	if ((*eptr)->nwcs == runp->nwcs)
1777	{
1778	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
1779	(wchar_t *) runp->wcs, runp->nwcs);
1780
1781	if (c == `0`)
1782	{
1783	/ This should not happen. It means that we have*
1784	two symbols with the same byte sequence. It is
1785	of course an error. /*
1786	record_error_at_line (`0`, `0`, (*eptr)->file,
1787	(*eptr)->line,
1788	_("\
1789	symbol `%s' has the same encoding as"), (*eptr)->name);
1790
1791	record_error_at_line (`0`, `0`, runp->file, runp->line,
1792	_("symbol `%s'"), runp->name);
1793	goto dont_insertwc;
1794	}
1795	else if (c < `0`)
1796	/ Insert it here. /
1797	break;
1798	}
1799
1800	/ To the next entry. /
1801	lastp = *eptr;
1802	eptr = &(*eptr)->wcnext;
1803	}
1804
1805	/ Set the pointers. /
1806	runp->wcnext = *eptr;
1807	runp->wclast = lastp;
1808	if (*eptr != NULL)
1809	(*eptr)->wclast = runp;
1810	*eptr = runp;
1811	if (eptr == &e)
1812	wchead_table_add (&collate->wcheads, runp->wcs[`0`], e);
1813	dont_insertwc:
1814	;
1815	}
1816
1817	/ Up to the next entry. /
1818	runp = runp->next;
1819	}
1820
1821	/ Now determine whether the UNDEFINED entry is needed and if yes,*
1822	whether it was defined. /*
1823	collate->undefined.used_in_level = need_undefined ? ~`0ul` : `0`;
1824	if (collate->undefined.file == NULL)
1825	{
1826	if (need_undefined)
1827	{
1828	/ This seems not to be enforced by recent standards. Don't*
1829	emit an error, simply append UNDEFINED at the end. /*
1830	collate->undefined.mborder =
1831	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
1832
1833	for (i = `0`; i < nrules; ++i)
1834	collate->undefined.mborder[i] = mbact[i]++;
1835	}
1836
1837	/ In any case we will need the definition for the wide character*
1838	case. But we will not complain that it is missing since the
1839	specification strangely enough does not seem to account for
1840	this. /*
1841	collate->undefined.wcorder = wcact++;
1842	}
1843
1844	/ Finally, try to unify the rules for the sections. Whenever the rules*
1845	for a section are the same as those for another section give the
1846	ruleset the same index. Since there are never many section we can
1847	use an O(n^2) algorithm here. /*
1848	sect = collate->sections;
1849	while (sect != NULL && sect->rules == NULL)
1850	sect = sect->next;
1851
1852	/ Bail out if we have no sections because of earlier errors. /
1853	if (sect == NULL)
1854	{
1855	record_error (EXIT_FAILURE, `0`, _("too many errors; giving up"));
1856	return;
1857	}
1858
1859	ruleidx = `0`;
1860	do
1861	{
1862	struct section_list *osect = collate->sections;
1863
1864	while (osect != sect)
1865	if (osect->rules != NULL
1866	&& memcmp (osect->rules, sect->rules,
1867	nrules * sizeof (osect->rules[`0`])) == `0`)
1868	break;
1869	else
1870	osect = osect->next;
1871
1872	if (osect == sect)
1873	sect->ruleidx = ruleidx++;
1874	else
1875	sect->ruleidx = osect->ruleidx;
1876
1877	/ Next section. /
1878	do
1879	sect = sect->next;
1880	while (sect != NULL && sect->rules == NULL);
1881	}
1882	while (sect != NULL);
1883	/ We are currently not prepared for more than 128 rulesets. But this*
1884	should never really be a problem. /*
1885	assert (ruleidx <= `128`);
1886	}
1887
1888
1889	static int32_t
1890	output_weight (struct obstack pool, struct* locale_collate_t *collate,
1891	struct element_t *elem)
1892	{
1893	size_t cnt;
1894	int32_t retval;
1895
1896	/ Optimize the use of UNDEFINED. /
1897	if (elem == &collate->undefined)
1898	/ The weights are already inserted. /
1899	return `0`;
1900
1901	/ This byte can start exactly one collation element and this is*
1902	a single byte. We can directly give the index to the weights. /*
1903	retval = obstack_object_size (pool);
1904
1905	/ Construct the weight. /
1906	for (cnt = `0`; cnt < nrules; ++cnt)
1907	{
1908	char buf[elem->weights[cnt].cnt * `7`];
1909	int len = `0`;
1910	int i;
1911
1912	for (i = `0`; i < elem->weights[cnt].cnt; ++i)
1913	/ Encode the weight value. We do nothing for IGNORE entries. /
1914	if (elem->weights[cnt].w[i] != NULL)
1915	len += utf8_encode (&buf[len],
1916	elem->weights[cnt].w[i]->mborder[cnt]);
1917
1918	/ And add the buffer content. /
1919	obstack_1grow (pool, len);
1920	obstack_grow (pool, buf, len);
1921	}
1922
1923	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1924	}
1925
1926
1927	static int32_t
1928	output_weightwc (struct obstack pool, struct* locale_collate_t *collate,
1929	struct element_t *elem)
1930	{
1931	size_t cnt;
1932	int32_t retval;
1933
1934	/ Optimize the use of UNDEFINED. /
1935	if (elem == &collate->undefined)
1936	/ The weights are already inserted. /
1937	return `0`;
1938
1939	/ This byte can start exactly one collation element and this is*
1940	a single byte. We can directly give the index to the weights. /*
1941	retval = obstack_object_size (pool) / sizeof (int32_t);
1942
1943	/ Construct the weight. /
1944	for (cnt = `0`; cnt < nrules; ++cnt)
1945	{
1946	int32_t buf[elem->weights[cnt].cnt];
1947	int i;
1948	int32_t j;
1949
1950	for (i = `0`, j = `0`; i < elem->weights[cnt].cnt; ++i)
1951	if (elem->weights[cnt].w[i] != NULL)
1952	buf[j++] = elem->weights[cnt].w[i]->wcorder;
1953
1954	/ And add the buffer content. /
1955	obstack_int32_grow (pool, j);
1956
1957	obstack_grow (pool, buf, j * sizeof (int32_t));
1958	maybe_swap_uint32_obstack (pool, j);
1959	}
1960
1961	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1962	}
1963
1964	/ If localedef is every threaded, this would need to be __thread var. /
1965	static struct
1966	{
1967	struct obstack *weightpool;
1968	struct obstack *extrapool;
1969	struct obstack *indpool;
1970	struct locale_collate_t *collate;
1971	struct collidx_table *tablewc;
1972	} atwc;
1973
1974	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1975
1976	static void
1977	add_to_tablewc (uint32_t ch, struct element_t *runp)
1978	{
1979	if (runp->wcnext == NULL && runp->nwcs == `1`)
1980	{
1981	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1982	runp);
1983	collidx_table_add (atwc.tablewc, ch, weigthidx);
1984	}
1985	else
1986	{
1987	/ As for the singlebyte table, we recognize sequences and*
1988	compress them. /*
1989
1990	collidx_table_add (atwc.tablewc, ch,
1991	-(obstack_object_size (atwc.extrapool)
1992	/ sizeof (uint32_t)));
1993
1994	do
1995	{
1996	/ Store the current index in the weight table. We know that*
1997	the current position in the `extrapool' is aligned on a
1998	32-bit address. /*
1999	int32_t weightidx;
2000	int added;
2001
2002	/ Find out wether this is a single entry or we have more than*
2003	one consecutive entry. /*
2004	if (runp->wcnext != NULL
2005	&& runp->nwcs == runp->wcnext->nwcs
2006	&& wmemcmp ((wchar_t *) runp->wcs,
2007	(wchar_t *)runp->wcnext->wcs,
2008	runp->nwcs - `1`) == `0`
2009	&& (runp->wcs[runp->nwcs - `1`]
2010	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`))
2011	{
2012	int i;
2013	struct element_t *series_startp = runp;
2014	struct element_t *curp;
2015
2016	/ Now add first the initial byte sequence. /
2017	added = (`1` + `1` + `2` * (runp->nwcs - `1`)) * sizeof (int32_t);
2018	if (sizeof (int32_t) == sizeof (int))
2019	obstack_make_room (atwc.extrapool, added);
2020
2021	/ More than one consecutive entry. We mark this by having*
2022	a negative index into the indirect table. /*
2023	obstack_int32_grow_fast (atwc.extrapool,
2024	-(obstack_object_size (atwc.indpool)
2025	/ sizeof (int32_t)));
2026	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2027
2028	do
2029	runp = runp->wcnext;
2030	while (runp->wcnext != NULL
2031	&& runp->nwcs == runp->wcnext->nwcs
2032	&& wmemcmp ((wchar_t *) runp->wcs,
2033	(wchar_t *)runp->wcnext->wcs,
2034	runp->nwcs - `1`) == `0`
2035	&& (runp->wcs[runp->nwcs - `1`]
2036	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`));
2037
2038	/ Now walk backward from here to the beginning. /
2039	curp = runp;
2040
2041	for (i = `1`; i < runp->nwcs; ++i)
2042	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2043
2044	/ Now find the end of the consecutive sequence and*
2045	add all the indices in the indirect pool. /*
2046	do
2047	{
2048	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2049	curp);
2050	obstack_int32_grow (atwc.indpool, weightidx);
2051
2052	curp = curp->wclast;
2053	}
2054	while (curp != series_startp);
2055
2056	/ Add the final weight. /
2057	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2058	curp);
2059	obstack_int32_grow (atwc.indpool, weightidx);
2060
2061	/ And add the end byte sequence. Without length this*
2062	time. /*
2063	for (i = `1`; i < curp->nwcs; ++i)
2064	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2065	}
2066	else
2067	{
2068	/ A single entry. Simply add the index and the length and*
2069	string (except for the first character which is already
2070	tested for). /*
2071	int i;
2072
2073	/ Output the weight info. /
2074	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2075	runp);
2076
2077	assert (runp->nwcs > `0`);
2078	added = (`1` + `1` + runp->nwcs - `1`) * sizeof (int32_t);
2079	if (sizeof (int) == sizeof (int32_t))
2080	obstack_make_room (atwc.extrapool, added);
2081
2082	obstack_int32_grow_fast (atwc.extrapool, weightidx);
2083	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2084	for (i = `1`; i < runp->nwcs; ++i)
2085	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2086	}
2087
2088	/ Next entry. /
2089	runp = runp->wcnext;
2090	}
2091	while (runp != NULL);
2092	}
2093	}
2094
2095	void
2096	collate_output (struct localedef_t locale, const* struct charmap_t *charmap,
2097	const char *output_path)
2098	{
2099	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2100	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2101	struct locale_file file;
2102	size_t ch;
2103	int32_t tablemb[`256`];
2104	struct obstack weightpool;
2105	struct obstack extrapool;
2106	struct obstack indirectpool;
2107	struct section_list *sect;
2108	struct collidx_table tablewc;
2109	uint32_t elem_size;
2110	uint32_t *elem_table;
2111	int i;
2112	struct element_t *runp;
2113
2114	init_locale_data (&file, nelems);
2115	add_locale_uint32 (&file, nrules);
2116
2117	/ If we have no LC_COLLATE data emit only the number of rules as zero. /
2118	if (collate == NULL)
2119	{
2120	size_t idx;
2121	for (idx = `1`; idx < nelems; idx++)
2122	{
2123	/ The words have to be handled specially. /
2124	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2125	add_locale_uint32 (&file, `0`);
2126	else
2127	add_locale_empty (&file);
2128	}
2129	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2130	return;
2131	}
2132
2133	obstack_init (&weightpool);
2134	obstack_init (&extrapool);
2135	obstack_init (&indirectpool);
2136
2137	/ Since we are using the sign of an integer to mark indirection the*
2138	offsets in the arrays we are indirectly referring to must not be
2139	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2140	obstack_int32_grow (&extrapool, `0`);
2141	obstack_int32_grow (&indirectpool, `0`);
2142
2143	/ Prepare the ruleset table. /
2144	for (sect = collate->sections, i = `0`; sect != NULL; sect = sect->next)
2145	if (sect->rules != NULL && sect->ruleidx == i)
2146	{
2147	int j;
2148
2149	obstack_make_room (&weightpool, nrules);
2150
2151	for (j = `0`; j < nrules; ++j)
2152	obstack_1grow_fast (&weightpool, sect->rules[j]);
2153	++i;
2154	}
2155	/ And align the output. /
2156	i = (nrules * i) % LOCFILE_ALIGN;
2157	if (i > `0`)
2158	do
2159	obstack_1grow (&weightpool, `'\0'`);
2160	while (++i < LOCFILE_ALIGN);
2161
2162	add_locale_raw_obstack (&file, &weightpool);
2163
2164	/ Generate the 8-bit table. Walk through the lists of sequences*
2165	starting with the same byte and add them one after the other to
2166	the table. In case we have more than one sequence starting with
2167	the same byte we have to use extra indirection.
2168
2169	First add a record for the NUL byte. This entry will never be used
2170	so it does not matter. /*
2171	tablemb[`0`] = `0`;
2172
2173	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2174	will probably be used more than once it is good to store the
2175	weights only once. /*
2176	if (collate->undefined.used_in_level != `0`)
2177	output_weight (&weightpool, collate, &collate->undefined);
2178
2179	for (ch = `1`; ch < `256`; ++ch)
2180	if (collate->mbheads[ch]->mbnext == NULL
2181	&& collate->mbheads[ch]->nmbs <= `1`)
2182	{
2183	tablemb[ch] = output_weight (&weightpool, collate,
2184	collate->mbheads[ch]);
2185	}
2186	else
2187	{
2188	/ The entries in the list are sorted by length and then*
2189	alphabetically. This is the order in which we will add the
2190	elements to the collation table. This allows simply walking
2191	the table in sequence and stopping at the first matching
2192	entry. Since the longer sequences are coming first in the
2193	list they have the possibility to match first, just as it
2194	has to be. In the worst case we are walking to the end of
2195	the list where we put, if no singlebyte sequence is defined
2196	in the locale definition, the weights for UNDEFINED.
2197
2198	To reduce the length of the search list we compress them a bit.
2199	This happens by collecting sequences of consecutive byte
2200	sequences in one entry (having and begin and end byte sequence)
2201	and add only one index into the weight table. We can find the
2202	consecutive entries since they are also consecutive in the list. /*
2203	struct element_t *runp = collate->mbheads[ch];
2204	struct element_t *lastp;
2205
2206	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2207
2208	tablemb[ch] = -obstack_object_size (&extrapool);
2209
2210	do
2211	{
2212	/ Store the current index in the weight table. We know that*
2213	the current position in the `extrapool' is aligned on a
2214	32-bit address. /*
2215	int32_t weightidx;
2216	int added;
2217
2218	/ Find out wether this is a single entry or we have more than*
2219	one consecutive entry. /*
2220	if (runp->mbnext != NULL
2221	&& runp->nmbs == runp->mbnext->nmbs
2222	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - `1`) == `0`
2223	&& (runp->mbs[runp->nmbs - `1`]
2224	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`))
2225	{
2226	int i;
2227	struct element_t *series_startp = runp;
2228	struct element_t *curp;
2229
2230	/ Compute how much space we will need. /
2231	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2232	+ `2` * (runp->nmbs - `1`));
2233	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2234	obstack_make_room (&extrapool, added);
2235
2236	/ More than one consecutive entry. We mark this by having*
2237	a negative index into the indirect table. /*
2238	obstack_int32_grow_fast (&extrapool,
2239	-(obstack_object_size (&indirectpool)
2240	/ sizeof (int32_t)));
2241
2242	/ Now search first the end of the series. /
2243	do
2244	runp = runp->mbnext;
2245	while (runp->mbnext != NULL
2246	&& runp->nmbs == runp->mbnext->nmbs
2247	&& memcmp (runp->mbs, runp->mbnext->mbs,
2248	runp->nmbs - `1`) == `0`
2249	&& (runp->mbs[runp->nmbs - `1`]
2250	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`));
2251
2252	/ Now walk backward from here to the beginning. /
2253	curp = runp;
2254
2255	assert (runp->nmbs <= `256`);
2256	obstack_1grow_fast (&extrapool, curp->nmbs - `1`);
2257	for (i = `1`; i < curp->nmbs; ++i)
2258	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2259
2260	/ Now find the end of the consecutive sequence and*
2261	add all the indices in the indirect pool. /*
2262	do
2263	{
2264	weightidx = output_weight (&weightpool, collate, curp);
2265	obstack_int32_grow (&indirectpool, weightidx);
2266
2267	curp = curp->mblast;
2268	}
2269	while (curp != series_startp);
2270
2271	/ Add the final weight. /
2272	weightidx = output_weight (&weightpool, collate, curp);
2273	obstack_int32_grow (&indirectpool, weightidx);
2274
2275	/ And add the end byte sequence. Without length this*
2276	time. /*
2277	for (i = `1`; i < curp->nmbs; ++i)
2278	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2279	}
2280	else
2281	{
2282	/ A single entry. Simply add the index and the length and*
2283	string (except for the first character which is already
2284	tested for). /*
2285	int i;
2286
2287	/ Output the weight info. /
2288	weightidx = output_weight (&weightpool, collate, runp);
2289
2290	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2291	+ runp->nmbs - `1`);
2292	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2293	obstack_make_room (&extrapool, added);
2294
2295	obstack_int32_grow_fast (&extrapool, weightidx);
2296	assert (runp->nmbs <= `256`);
2297	obstack_1grow_fast (&extrapool, runp->nmbs - `1`);
2298
2299	for (i = `1`; i < runp->nmbs; ++i)
2300	obstack_1grow_fast (&extrapool, runp->mbs[i]);
2301	}
2302
2303	/ Add alignment bytes if necessary. /
2304	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2305	obstack_1grow_fast (&extrapool, `'\0'`);
2306
2307	/ Next entry. /
2308	lastp = runp;
2309	runp = runp->mbnext;
2310	}
2311	while (runp != NULL);
2312
2313	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2314
2315	/ If the final entry in the list is not a single character we*
2316	add an UNDEFINED entry here. /*
2317	if (lastp->nmbs != `1`)
2318	{
2319	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1` + `1`);
2320	obstack_make_room (&extrapool, added);
2321
2322	obstack_int32_grow_fast (&extrapool, `0`);
2323	/ XXX What rule? We just pick the first. /
2324	obstack_1grow_fast (&extrapool, `0`);
2325	/ Length is zero. /
2326	obstack_1grow_fast (&extrapool, `0`);
2327
2328	/ Add alignment bytes if necessary. /
2329	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2330	obstack_1grow_fast (&extrapool, `'\0'`);
2331	}
2332	}
2333
2334	/ Add padding to the tables if necessary. /
2335	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2336	obstack_1grow (&weightpool, `0`);
2337
2338	/ Now add the four tables. /
2339	add_locale_uint32_array (&file, (const uint32_t *) tablemb, `256`);
2340	add_locale_raw_obstack (&file, &weightpool);
2341	add_locale_raw_obstack (&file, &extrapool);
2342	add_locale_raw_obstack (&file, &indirectpool);
2343
2344	/ Now the same for the wide character table. We need to store some*
2345	more information here. /*
2346	add_locale_empty (&file);
2347	add_locale_empty (&file);
2348	add_locale_empty (&file);
2349
2350	/ Since we are using the sign of an integer to mark indirection the*
2351	offsets in the arrays we are indirectly referring to must not be
2352	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2353	obstack_int32_grow (&extrapool, `0`);
2354	obstack_int32_grow (&indirectpool, `0`);
2355
2356	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2357	will probably be used more than once it is good to store the
2358	weights only once. /*
2359	if (output_weightwc (&weightpool, collate, &collate->undefined) != `0`)
2360	abort ();
2361
2362	/ Generate the table. Walk through the lists of sequences starting*
2363	with the same wide character and add them one after the other to
2364	the table. In case we have more than one sequence starting with
2365	the same byte we have to use extra indirection. /*
2366	tablewc.p = `6`;
2367	tablewc.q = `10`;
2368	collidx_table_init (&tablewc);
2369
2370	atwc.weightpool = &weightpool;
2371	atwc.extrapool = &extrapool;
2372	atwc.indpool = &indirectpool;
2373	atwc.collate = collate;
2374	atwc.tablewc = &tablewc;
2375
2376	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2377
2378	memset (&atwc, `0`, sizeof (atwc));
2379
2380	/ Now add the four tables. /
2381	add_locale_collidx_table (&file, &tablewc);
2382	add_locale_raw_obstack (&file, &weightpool);
2383	add_locale_raw_obstack (&file, &extrapool);
2384	add_locale_raw_obstack (&file, &indirectpool);
2385
2386	/ Finally write the table with collation element names out. It is*
2387	a hash table with a simple function which gets the name of the
2388	character as the input. One character might have many names. The
2389	value associated with the name is an index into the weight table
2390	where we are then interested in the first-level weight value.
2391
2392	To determine how large the table should be we are counting the
2393	elements have to put in. Since we are using internal chaining
2394	using a secondary hash function we have to make the table a bit
2395	larger to avoid extremely long search times. We can achieve
2396	good results with a 40% larger table than there are entries. /*
2397	elem_size = `0`;
2398	runp = collate->start;
2399	while (runp != NULL)
2400	{
2401	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2402	/ Yep, the element really counts. /
2403	++elem_size;
2404
2405	runp = runp->next;
2406	}
2407	/ Add 50% and find the next prime number. /
2408	elem_size = next_prime (elem_size + (elem_size >> `1`));
2409
2410	/ Allocate the table. Each entry consists of two words: the hash*
2411	value and an index in a secondary table which provides the index
2412	into the weight table and the string itself (so that a match can
2413	be determined). /*
2414	elem_table = (uint32_t *) obstack_alloc (&extrapool,
2415	elem_size * `2` * sizeof (uint32_t));
2416	memset (elem_table, `'\0'`, elem_size * `2` * sizeof (uint32_t));
2417
2418	/ Now add the elements. /
2419	runp = collate->start;
2420	while (runp != NULL)
2421	{
2422	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2423	{
2424	/ Compute the hash value of the name. /
2425	uint32_t namelen = strlen (runp->name);
2426	uint32_t hash = elem_hash (runp->name, namelen);
2427	size_t idx = hash % elem_size;
2428	#ifndef NDEBUG
2429	size_t start_idx = idx;
2430	#endif
2431
2432	if (elem_table[idx * `2`] != `0`)
2433	{
2434	/ The spot is already taken. Try iterating using the value*
2435	from the secondary hashing function. /*
2436	size_t iter = hash % (elem_size - `2`) + `1`;
2437
2438	do
2439	{
2440	idx += iter;
2441	if (idx >= elem_size)
2442	idx -= elem_size;
2443	assert (idx != start_idx);
2444	}
2445	while (elem_table[idx * `2`] != `0`);
2446	}
2447	/ This is the spot where we will insert the value. /
2448	elem_table[idx * `2`] = hash;
2449	elem_table[idx * `2` + `1`] = obstack_object_size (&extrapool);
2450
2451	/ The string itself including length. /
2452	obstack_1grow (&extrapool, namelen);
2453	obstack_grow (&extrapool, runp->name, namelen);
2454
2455	/ And the multibyte representation. /
2456	obstack_1grow (&extrapool, runp->nmbs);
2457	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2458
2459	/ And align again to 32 bits. /
2460	if ((`1` + namelen + `1` + runp->nmbs) % sizeof (int32_t) != `0`)
2461	obstack_grow (&extrapool, "\0\0",
2462	(sizeof (int32_t)
2463	- ((`1` + namelen + `1` + runp->nmbs)
2464	% sizeof (int32_t))));
2465
2466	/ Now some 32-bit values: multibyte collation sequence,*
2467	wide char string (including length), and wide char
2468	collation sequence. /*
2469	obstack_int32_grow (&extrapool, runp->mbseqorder);
2470
2471	obstack_int32_grow (&extrapool, runp->nwcs);
2472	obstack_grow (&extrapool, runp->wcs,
2473	runp->nwcs * sizeof (uint32_t));
2474	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2475
2476	obstack_int32_grow (&extrapool, runp->wcseqorder);
2477	}
2478
2479	runp = runp->next;
2480	}
2481
2482	/ Prepare to write out this data. /
2483	add_locale_uint32 (&file, elem_size);
2484	add_locale_uint32_array (&file, elem_table, `2` * elem_size);
2485	add_locale_raw_obstack (&file, &extrapool);
2486	add_locale_raw_data (&file, collate->mbseqorder, `256`);
2487	add_locale_collseq_table (&file, &collate->wcseqorder);
2488	add_locale_string (&file, charmap->code_set_name);
2489	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2490
2491	obstack_free (&weightpool, NULL);
2492	obstack_free (&extrapool, NULL);
2493	obstack_free (&indirectpool, NULL);
2494	}
2495
2496
2497	static enum token_t
2498	skip_to (struct linereader ldfile, struct* locale_collate_t *collate,
2499	const struct charmap_t charmap, int* to_endif)
2500	{
2501	while (`1`)
2502	{
2503	struct token *now = lr_token (ldfile, charmap, NULL, NULL, `0`);
2504	enum token_t nowtok = now->tok;
2505
2506	if (nowtok == tok_eof \|\| nowtok == tok_end)
2507	return nowtok;
2508
2509	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
2510	{
2511	lr_error (ldfile, _("%s: nested conditionals not supported"),
2512	"LC_COLLATE");
2513	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2514	if (nowtok == tok_eof \|\| nowtok == tok_end)
2515	return nowtok;
2516	}
2517	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
2518	{
2519	lr_ignore_rest (ldfile, `1`);
2520	return nowtok;
2521	}
2522	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
2523	{
2524	/ Do not read the rest of the line. /
2525	return nowtok;
2526	}
2527	else if (nowtok == tok_else)
2528	{
2529	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2530	}
2531
2532	lr_ignore_rest (ldfile, `0`);
2533	}
2534	}
2535
2536
2537	void
2538	collate_read (struct linereader ldfile, struct* localedef_t *result,
2539	const struct charmap_t charmap, const* char *repertoire_name,
2540	int ignore_content)
2541	{
2542	struct repertoire_t *repertoire = NULL;
2543	struct locale_collate_t *collate;
2544	struct token *now;
2545	struct token *arg = NULL;
2546	enum token_t nowtok;
2547	enum token_t was_ellipsis = tok_none;
2548	struct localedef_t *copy_locale = NULL;
2549	/ Parsing state:*
2550	0 - start
2551	1 - between `order-start' and `order-end'
2552	2 - after `order-end'
2553	3 - after `reorder-after', waiting for `reorder-end'
2554	4 - after `reorder-end'
2555	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2556	6 - after `reorder-sections-end'
2557	*/
2558	int state = `0`;
2559
2560	/ Get the repertoire we have to use. /
2561	if (repertoire_name != NULL)
2562	repertoire = repertoire_read (repertoire_name);
2563
2564	/ The rest of the line containing `LC_COLLATE' must be free. /
2565	lr_ignore_rest (ldfile, `1`);
2566
2567	while (`1`)
2568	{
2569	do
2570	{
2571	now = lr_token (ldfile, charmap, result, NULL, verbose);
2572	nowtok = now->tok;
2573	}
2574	while (nowtok == tok_eol);
2575
2576	if (nowtok != tok_define)
2577	break;
2578
2579	if (ignore_content)
2580	lr_ignore_rest (ldfile, `0`);
2581	else
2582	{
2583	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2584	if (arg->tok != tok_ident)
2585	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2586	else
2587	{
2588	/ Simply add the new symbol. /
2589	struct name_list newsym = xmalloc (sizeof* (*newsym)
2590	+ arg->val.str.lenmb + `1`);
2591	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2592	newsym->str[arg->val.str.lenmb] = `'\0'`;
2593	newsym->next = defined;
2594	defined = newsym;
2595
2596	lr_ignore_rest (ldfile, `1`);
2597	}
2598	}
2599	}
2600
2601	if (nowtok == tok_copy)
2602	{
2603	now = lr_token (ldfile, charmap, result, NULL, verbose);
2604	if (now->tok != tok_string)
2605	{
2606	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2607
2608	skip_category:
2609	do
2610	now = lr_token (ldfile, charmap, result, NULL, verbose);
2611	while (now->tok != tok_eof && now->tok != tok_end);
2612
2613	if (now->tok != tok_eof
2614	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
2615	now->tok == tok_eof))
2616	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2617	else if (now->tok != tok_lc_collate)
2618	{
2619	lr_error (ldfile, _("\
2620	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2621	lr_ignore_rest (ldfile, `0`);
2622	}
2623	else
2624	lr_ignore_rest (ldfile, `1`);
2625
2626	return;
2627	}
2628
2629	if (! ignore_content)
2630	{
2631	/ Get the locale definition. /
2632	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2633	repertoire_name, charmap, NULL);
2634	if ((copy_locale->avail & COLLATE_LOCALE) == `0`)
2635	{
2636	/ Not yet loaded. So do it now. /
2637	if (locfile_read (copy_locale, charmap) != `0`)
2638	goto skip_category;
2639	}
2640
2641	if (copy_locale->categories[LC_COLLATE].collate == NULL)
2642	return;
2643	}
2644
2645	lr_ignore_rest (ldfile, `1`);
2646
2647	now = lr_token (ldfile, charmap, result, NULL, verbose);
2648	nowtok = now->tok;
2649	}
2650
2651	/ Prepare the data structures. /
2652	collate_startup (ldfile, result, copy_locale, ignore_content);
2653	collate = result->categories[LC_COLLATE].collate;
2654
2655	while (`1`)
2656	{
2657	char ucs4buf[`10`];
2658	char *symstr;
2659	size_t symlen;
2660
2661	/ Of course we don't proceed beyond the end of file. /
2662	if (nowtok == tok_eof)
2663	break;
2664
2665	/ Ingore empty lines. /
2666	if (nowtok == tok_eol)
2667	{
2668	now = lr_token (ldfile, charmap, result, NULL, verbose);
2669	nowtok = now->tok;
2670	continue;
2671	}
2672
2673	switch (nowtok)
2674	{
2675	case tok_copy:
2676	/ Allow copying other locales. /
2677	now = lr_token (ldfile, charmap, result, NULL, verbose);
2678	if (now->tok != tok_string)
2679	goto err_label;
2680
2681	if (! ignore_content)
2682	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2683	charmap, result);
2684
2685	lr_ignore_rest (ldfile, `1`);
2686	break;
2687
2688	case tok_coll_weight_max:
2689	/ Ignore the rest of the line if we don't need the input of*
2690	this line. /*
2691	if (ignore_content)
2692	{
2693	lr_ignore_rest (ldfile, `0`);
2694	break;
2695	}
2696
2697	if (state != `0`)
2698	goto err_label;
2699
2700	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2701	if (arg->tok != tok_number)
2702	goto err_label;
2703	if (collate->col_weight_max != -`1`)
2704	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2705	"LC_COLLATE", "col_weight_max");
2706	else
2707	collate->col_weight_max = arg->val.num;
2708	lr_ignore_rest (ldfile, `1`);
2709	break;
2710
2711	case tok_section_symbol:
2712	/ Ignore the rest of the line if we don't need the input of*
2713	this line. /*
2714	if (ignore_content)
2715	{
2716	lr_ignore_rest (ldfile, `0`);
2717	break;
2718	}
2719
2720	if (state != `0`)
2721	goto err_label;
2722
2723	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2724	if (arg->tok != tok_bsymbol)
2725	goto err_label;
2726	else if (!ignore_content)
2727	{
2728	/ Check whether this section is already known. /
2729	struct section_list *known = collate->sections;
2730	while (known != NULL)
2731	{
2732	if (strcmp (known->name, arg->val.str.startmb) == `0`)
2733	break;
2734	known = known->next;
2735	}
2736
2737	if (known != NULL)
2738	{
2739	lr_error (ldfile,
2740	_("%s: duplicate declaration of section `%s'"),
2741	"LC_COLLATE", arg->val.str.startmb);
2742	free (arg->val.str.startmb);
2743	}
2744	else
2745	collate->sections = make_seclist_elem (collate,
2746	arg->val.str.startmb,
2747	collate->sections);
2748
2749	lr_ignore_rest (ldfile, known == NULL);
2750	}
2751	else
2752	{
2753	free (arg->val.str.startmb);
2754	lr_ignore_rest (ldfile, `0`);
2755	}
2756	break;
2757
2758	case tok_collating_element:
2759	/ Ignore the rest of the line if we don't need the input of*
2760	this line. /*
2761	if (ignore_content)
2762	{
2763	lr_ignore_rest (ldfile, `0`);
2764	break;
2765	}
2766
2767	if (state != `0` && state != `2`)
2768	goto err_label;
2769
2770	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2771	if (arg->tok != tok_bsymbol)
2772	goto err_label;
2773	else
2774	{
2775	const char *symbol = arg->val.str.startmb;
2776	size_t symbol_len = arg->val.str.lenmb;
2777
2778	/ Next the `from' keyword. /
2779	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2780	if (arg->tok != tok_from)
2781	{
2782	free ((char *) symbol);
2783	goto err_label;
2784	}
2785
2786	ldfile->return_widestr = `1`;
2787	ldfile->translate_strings = `1`;
2788
2789	/ Finally the string with the replacement. /
2790	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2791
2792	ldfile->return_widestr = `0`;
2793	ldfile->translate_strings = `0`;
2794
2795	if (arg->tok != tok_string)
2796	goto err_label;
2797
2798	if (!ignore_content && symbol != NULL)
2799	{
2800	/ The name is already defined. /
2801	if (check_duplicate (ldfile, collate, charmap,
2802	repertoire, symbol, symbol_len))
2803	goto col_elem_free;
2804
2805	if (arg->val.str.startmb != NULL)
2806	insert_entry (&collate->elem_table, symbol, symbol_len,
2807	new_element (collate,
2808	arg->val.str.startmb,
2809	arg->val.str.lenmb - `1`,
2810	arg->val.str.startwc,
2811	symbol, symbol_len, `0`));
2812	}
2813	else
2814	{
2815	col_elem_free:
2816	free ((char *) symbol);
2817	free (arg->val.str.startmb);
2818	free (arg->val.str.startwc);
2819	}
2820	lr_ignore_rest (ldfile, `1`);
2821	}
2822	break;
2823
2824	case tok_collating_symbol:
2825	/ Ignore the rest of the line if we don't need the input of*
2826	this line. /*
2827	if (ignore_content)
2828	{
2829	lr_ignore_rest (ldfile, `0`);
2830	break;
2831	}
2832
2833	if (state != `0` && state != `2`)
2834	goto err_label;
2835
2836	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2837	if (arg->tok != tok_bsymbol)
2838	goto err_label;
2839	else
2840	{
2841	char *symbol = arg->val.str.startmb;
2842	size_t symbol_len = arg->val.str.lenmb;
2843	char *endsymbol = NULL;
2844	size_t endsymbol_len = `0`;
2845	enum token_t ellipsis = tok_none;
2846
2847	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2848	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
2849	{
2850	ellipsis = arg->tok;
2851
2852	arg = lr_token (ldfile, charmap, result, repertoire,
2853	verbose);
2854	if (arg->tok != tok_bsymbol)
2855	{
2856	free (symbol);
2857	goto err_label;
2858	}
2859
2860	endsymbol = arg->val.str.startmb;
2861	endsymbol_len = arg->val.str.lenmb;
2862
2863	lr_ignore_rest (ldfile, `1`);
2864	}
2865	else if (arg->tok != tok_eol)
2866	{
2867	free (symbol);
2868	goto err_label;
2869	}
2870
2871	if (!ignore_content)
2872	{
2873	if (symbol == NULL
2874	\|\| (ellipsis != tok_none && endsymbol == NULL))
2875	{
2876	lr_error (ldfile, _("\
2877	%s: unknown character in collating symbol name"),
2878	"LC_COLLATE");
2879	goto col_sym_free;
2880	}
2881	else if (ellipsis == tok_none)
2882	{
2883	/ A single symbol, no ellipsis. /
2884	if (check_duplicate (ldfile, collate, charmap,
2885	repertoire, symbol, symbol_len))
2886	/ The name is already defined. /
2887	goto col_sym_free;
2888
2889	insert_entry (&collate->sym_table, symbol, symbol_len,
2890	new_symbol (collate, symbol, symbol_len));
2891	}
2892	else if (symbol_len != endsymbol_len)
2893	{
2894	col_sym_inv_range:
2895	lr_error (ldfile,
2896	_("invalid names for character range"));
2897	goto col_sym_free;
2898	}
2899	else
2900	{
2901	/ Oh my, we have to handle an ellipsis. First, as*
2902	usual, determine the common prefix and then
2903	convert the rest into a range. /*
2904	size_t prefixlen;
2905	unsigned long int from;
2906	unsigned long int to;
2907	char *endp;
2908
2909	for (prefixlen = `0`; prefixlen < symbol_len; ++prefixlen)
2910	if (symbol[prefixlen] != endsymbol[prefixlen])
2911	break;
2912
2913	/ Convert the rest into numbers. /
2914	symbol[symbol_len] = `'\0'`;
2915	from = strtoul (&symbol[prefixlen], &endp,
2916	ellipsis == tok_ellipsis2 ? `16` : `10`);
2917	if (*endp != `'\0'`)
2918	goto col_sym_inv_range;
2919
2920	endsymbol[symbol_len] = `'\0'`;
2921	to = strtoul (&endsymbol[prefixlen], &endp,
2922	ellipsis == tok_ellipsis2 ? `16` : `10`);
2923	if (*endp != `'\0'`)
2924	goto col_sym_inv_range;
2925
2926	if (from > to)
2927	goto col_sym_inv_range;
2928
2929	/ Now loop over all entries. /
2930	while (from <= to)
2931	{
2932	char *symbuf;
2933
2934	symbuf = (char *) obstack_alloc (&collate->mempool,
2935	symbol_len + `1`);
2936
2937	/ Create the name. /
2938	sprintf (symbuf,
2939	ellipsis == tok_ellipsis2
2940	? "%.s%.lX" : "%.s%.lu",
2941	(int) prefixlen, symbol,
2942	(int) (symbol_len - prefixlen), from);
2943
2944	if (check_duplicate (ldfile, collate, charmap,
2945	repertoire, symbuf, symbol_len))
2946	/ The name is already defined. /
2947	goto col_sym_free;
2948
2949	insert_entry (&collate->sym_table, symbuf,
2950	symbol_len,
2951	new_symbol (collate, symbuf,
2952	symbol_len));
2953
2954	/ Increment the counter. /
2955	++from;
2956	}
2957
2958	goto col_sym_free;
2959	}
2960	}
2961	else
2962	{
2963	col_sym_free:
2964	free (symbol);
2965	free (endsymbol);
2966	}
2967	}
2968	break;
2969
2970	case tok_symbol_equivalence:
2971	/ Ignore the rest of the line if we don't need the input of*
2972	this line. /*
2973	if (ignore_content)
2974	{
2975	lr_ignore_rest (ldfile, `0`);
2976	break;
2977	}
2978
2979	if (state != `0`)
2980	goto err_label;
2981
2982	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2983	if (arg->tok != tok_bsymbol)
2984	goto err_label;
2985	else
2986	{
2987	const char *newname = arg->val.str.startmb;
2988	size_t newname_len = arg->val.str.lenmb;
2989	const char *symname;
2990	size_t symname_len;
2991	void symval; /* Actually struct symbol_t* /
2992
2993	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2994	if (arg->tok != tok_bsymbol)
2995	{
2996	free ((char *) newname);
2997	goto err_label;
2998	}
2999
3000	symname = arg->val.str.startmb;
3001	symname_len = arg->val.str.lenmb;
3002
3003	if (newname == NULL)
3004	{
3005	lr_error (ldfile, _("\
3006	%s: unknown character in equivalent definition name"),
3007	"LC_COLLATE");
3008
3009	sym_equiv_free:
3010	free ((char *) newname);
3011	free ((char *) symname);
3012	break;
3013	}
3014	if (symname == NULL)
3015	{
3016	lr_error (ldfile, _("\
3017	%s: unknown character in equivalent definition value"),
3018	"LC_COLLATE");
3019	goto sym_equiv_free;
3020	}
3021
3022	/ See whether the symbol name is already defined. /
3023	if (find_entry (&collate->sym_table, symname, symname_len,
3024	&symval) != `0`)
3025	{
3026	lr_error (ldfile, _("\
3027	%s: unknown symbol `%s' in equivalent definition"),
3028	"LC_COLLATE", symname);
3029	goto sym_equiv_free;
3030	}
3031
3032	if (insert_entry (&collate->sym_table,
3033	newname, newname_len, symval) < `0`)
3034	{
3035	lr_error (ldfile, _("\
3036	error while adding equivalent collating symbol"));
3037	goto sym_equiv_free;
3038	}
3039
3040	free ((char *) symname);
3041	}
3042	lr_ignore_rest (ldfile, `1`);
3043	break;
3044
3045	case tok_script:
3046	/ Ignore the rest of the line if we don't need the input of*
3047	this line. /*
3048	if (ignore_content)
3049	{
3050	lr_ignore_rest (ldfile, `0`);
3051	break;
3052	}
3053
3054	/ We get told about the scripts we know. /
3055	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3056	if (arg->tok != tok_bsymbol)
3057	goto err_label;
3058	else
3059	{
3060	struct section_list *runp = collate->known_sections;
3061	char *name;
3062
3063	while (runp != NULL)
3064	if (strncmp (runp->name, arg->val.str.startmb,
3065	arg->val.str.lenmb) == `0`
3066	&& runp->name[arg->val.str.lenmb] == `'\0'`)
3067	break;
3068	else
3069	runp = runp->def_next;
3070
3071	if (runp != NULL)
3072	{
3073	lr_error (ldfile, _("duplicate definition of script `%s'"),
3074	runp->name);
3075	lr_ignore_rest (ldfile, `0`);
3076	break;
3077	}
3078
3079	runp = (struct section_list ) xcalloc (`1`, sizeof* (*runp));
3080	name = (char *) xmalloc (arg->val.str.lenmb + `1`);
3081	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3082	name[arg->val.str.lenmb] = `'\0'`;
3083	runp->name = name;
3084
3085	runp->def_next = collate->known_sections;
3086	collate->known_sections = runp;
3087	}
3088	lr_ignore_rest (ldfile, `1`);
3089	break;
3090
3091	case tok_order_start:
3092	/ Ignore the rest of the line if we don't need the input of*
3093	this line. /*
3094	if (ignore_content)
3095	{
3096	lr_ignore_rest (ldfile, `0`);
3097	break;
3098	}
3099
3100	if (state != `0` && state != `1` && state != `2`)
3101	goto err_label;
3102	state = `1`;
3103
3104	/ The 14652 draft does not specify whether all `order_start' lines*
3105	must contain the same number of sort-rules, but 14651 does. So
3106	we require this here as well. /*
3107	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3108	if (arg->tok == tok_bsymbol)
3109	{
3110	/ This better should be a section name. /
3111	struct section_list *sp = collate->known_sections;
3112	while (sp != NULL
3113	&& (sp->name == NULL
3114	\|\| strncmp (sp->name, arg->val.str.startmb,
3115	arg->val.str.lenmb) != `0`
3116	\|\| sp->name[arg->val.str.lenmb] != `'\0'`))
3117	sp = sp->def_next;
3118
3119	if (sp == NULL)
3120	{
3121	lr_error (ldfile, _("\
3122	%s: unknown section name `%.*s'"),
3123	"LC_COLLATE", (int) arg->val.str.lenmb,
3124	arg->val.str.startmb);
3125	/ We use the error section. /
3126	collate->current_section = &collate->error_section;
3127
3128	if (collate->error_section.first == NULL)
3129	{
3130	/ Insert &collate->error_section at the end of*
3131	the collate->sections list. /*
3132	if (collate->sections == NULL)
3133	collate->sections = &collate->error_section;
3134	else
3135	{
3136	sp = collate->sections;
3137	while (sp->next != NULL)
3138	sp = sp->next;
3139
3140	sp->next = &collate->error_section;
3141	}
3142	collate->error_section.next = NULL;
3143	}
3144	}
3145	else
3146	{
3147	/ One should not be allowed to open the same*
3148	section twice. /*
3149	if (sp->first != NULL)
3150	lr_error (ldfile, _("\
3151	%s: multiple order definitions for section `%s'"),
3152	"LC_COLLATE", sp->name);
3153	else
3154	{
3155	/ Insert sp in the collate->sections list,*
3156	right after collate->current_section. /*
3157	if (collate->current_section != NULL)
3158	{
3159	sp->next = collate->current_section->next;
3160	collate->current_section->next = sp;
3161	}
3162	else if (collate->sections == NULL)
3163	/ This is the first section to be defined. /
3164	collate->sections = sp;
3165
3166	collate->current_section = sp;
3167	}
3168
3169	/ Next should come the end of the line or a semicolon. /
3170	arg = lr_token (ldfile, charmap, result, repertoire,
3171	verbose);
3172	if (arg->tok == tok_eol)
3173	{
3174	uint32_t cnt;
3175
3176	/ This means we have exactly one rule: `forward'. /
3177	if (nrules > `1`)
3178	lr_error (ldfile, _("\
3179	%s: invalid number of sorting rules"),
3180	"LC_COLLATE");
3181	else
3182	nrules = `1`;
3183	sp->rules = obstack_alloc (&collate->mempool,
3184	(sizeof (enum coll_sort_rule)
3185	* nrules));
3186	for (cnt = `0`; cnt < nrules; ++cnt)
3187	sp->rules[cnt] = sort_forward;
3188
3189	/ Next line. /
3190	break;
3191	}
3192
3193	/ Get the next token. /
3194	arg = lr_token (ldfile, charmap, result, repertoire,
3195	verbose);
3196	}
3197	}
3198	else
3199	{
3200	/ There is no section symbol. Therefore we use the unnamed*
3201	section. /*
3202	collate->current_section = &collate->unnamed_section;
3203
3204	if (collate->unnamed_section_defined)
3205	lr_error (ldfile, _("\
3206	%s: multiple order definitions for unnamed section"),
3207	"LC_COLLATE");
3208	else
3209	{
3210	/ Insert &collate->unnamed_section at the beginning of*
3211	the collate->sections list. /*
3212	collate->unnamed_section.next = collate->sections;
3213	collate->sections = &collate->unnamed_section;
3214	collate->unnamed_section_defined = true;
3215	}
3216	}
3217
3218	/ Now read the direction names. /
3219	read_directions (ldfile, arg, charmap, repertoire, result);
3220
3221	/ From now we need the strings untranslated. /
3222	ldfile->translate_strings = `0`;
3223	break;
3224
3225	case tok_order_end:
3226	/ Ignore the rest of the line if we don't need the input of*
3227	this line. /*
3228	if (ignore_content)
3229	{
3230	lr_ignore_rest (ldfile, `0`);
3231	break;
3232	}
3233
3234	if (state != `1`)
3235	goto err_label;
3236
3237	/ Handle ellipsis at end of list. /
3238	if (was_ellipsis != tok_none)
3239	{
3240	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3241	repertoire, result);
3242	was_ellipsis = tok_none;
3243	}
3244
3245	state = `2`;
3246	lr_ignore_rest (ldfile, `1`);
3247	break;
3248
3249	case tok_reorder_after:
3250	/ Ignore the rest of the line if we don't need the input of*
3251	this line. /*
3252	if (ignore_content)
3253	{
3254	lr_ignore_rest (ldfile, `0`);
3255	break;
3256	}
3257
3258	if (state == `1`)
3259	{
3260	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3261	"LC_COLLATE");
3262	state = `2`;
3263
3264	/ Handle ellipsis at end of list. /
3265	if (was_ellipsis != tok_none)
3266	{
3267	handle_ellipsis (ldfile, arg->val.str.startmb,
3268	arg->val.str.lenmb, was_ellipsis, charmap,
3269	repertoire, result);
3270	was_ellipsis = tok_none;
3271	}
3272	}
3273	else if (state == `0` && copy_locale == NULL)
3274	goto err_label;
3275	else if (state != `0` && state != `2` && state != `3`)
3276	goto err_label;
3277	state = `3`;
3278
3279	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3280	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
3281	{
3282	/ Find this symbol in the sequence table. /
3283	char ucsbuf[`10`];
3284	char *startmb;
3285	size_t lenmb;
3286	struct element_t *insp;
3287	int no_error = `1`;
3288	void *ptr;
3289
3290	if (arg->tok == tok_bsymbol)
3291	{
3292	startmb = arg->val.str.startmb;
3293	lenmb = arg->val.str.lenmb;
3294	}
3295	else
3296	{
3297	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3298	startmb = ucsbuf;
3299	lenmb = `9`;
3300	}
3301
3302	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == `0`)
3303	/ Yes, the symbol exists. Simply point the cursor*
3304	to it. /*
3305	collate->cursor = (struct element_t *) ptr;
3306	else
3307	{
3308	struct symbol_t *symbp;
3309	void *ptr;
3310
3311	if (find_entry (&collate->sym_table, startmb, lenmb,
3312	&ptr) == `0`)
3313	{
3314	symbp = ptr;
3315
3316	if (symbp->order->last != NULL
3317	\|\| symbp->order->next != NULL)
3318	collate->cursor = symbp->order;
3319	else
3320	{
3321	/ This is a collating symbol but its position*
3322	is not yet defined. /*
3323	lr_error (ldfile, _("\
3324	%s: order for collating symbol %.*s not yet defined"),
3325	"LC_COLLATE", (int) lenmb, startmb);
3326	collate->cursor = NULL;
3327	no_error = `0`;
3328	}
3329	}
3330	else if (find_entry (&collate->elem_table, startmb, lenmb,
3331	&ptr) == `0`)
3332	{
3333	insp = (struct element_t *) ptr;
3334
3335	if (insp->last != NULL \|\| insp->next != NULL)
3336	collate->cursor = insp;
3337	else
3338	{
3339	/ This is a collating element but its position*
3340	is not yet defined. /*
3341	lr_error (ldfile, _("\
3342	%s: order for collating element %.*s not yet defined"),
3343	"LC_COLLATE", (int) lenmb, startmb);
3344	collate->cursor = NULL;
3345	no_error = `0`;
3346	}
3347	}
3348	else
3349	{
3350	/ This is bad. The symbol after which we have to*
3351	insert does not exist. /*
3352	lr_error (ldfile, _("\
3353	%s: cannot reorder after %.*s: symbol not known"),
3354	"LC_COLLATE", (int) lenmb, startmb);
3355	collate->cursor = NULL;
3356	no_error = `0`;
3357	}
3358	}
3359
3360	lr_ignore_rest (ldfile, no_error);
3361	}
3362	else
3363	/ This must not happen. /
3364	goto err_label;
3365	break;
3366
3367	case tok_reorder_end:
3368	/ Ignore the rest of the line if we don't need the input of*
3369	this line. /*
3370	if (ignore_content)
3371	break;
3372
3373	if (state != `3`)
3374	goto err_label;
3375	state = `4`;
3376	lr_ignore_rest (ldfile, `1`);
3377	break;
3378
3379	case tok_reorder_sections_after:
3380	/ Ignore the rest of the line if we don't need the input of*
3381	this line. /*
3382	if (ignore_content)
3383	{
3384	lr_ignore_rest (ldfile, `0`);
3385	break;
3386	}
3387
3388	if (state == `1`)
3389	{
3390	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3391	"LC_COLLATE");
3392	state = `2`;
3393
3394	/ Handle ellipsis at end of list. /
3395	if (was_ellipsis != tok_none)
3396	{
3397	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3398	repertoire, result);
3399	was_ellipsis = tok_none;
3400	}
3401	}
3402	else if (state == `3`)
3403	{
3404	record_error (`0`, `0`, _("\
3405	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3406	state = `4`;
3407	}
3408	else if (state != `2` && state != `4`)
3409	goto err_label;
3410	state = `5`;
3411
3412	/ Get the name of the sections we are adding after. /
3413	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3414	if (arg->tok == tok_bsymbol)
3415	{
3416	/ Now find a section with this name. /
3417	struct section_list *runp = collate->sections;
3418
3419	while (runp != NULL)
3420	{
3421	if (runp->name != NULL
3422	&& strlen (runp->name) == arg->val.str.lenmb
3423	&& memcmp (runp->name, arg->val.str.startmb,
3424	arg->val.str.lenmb) == `0`)
3425	break;
3426
3427	runp = runp->next;
3428	}
3429
3430	if (runp != NULL)
3431	collate->current_section = runp;
3432	else
3433	{
3434	/ This is bad. The section after which we have to*
3435	reorder does not exist. Therefore we cannot
3436	process the whole rest of this reorder
3437	specification. /*
3438	lr_error (ldfile, _("%s: section `%.*s' not known"),
3439	"LC_COLLATE", (int) arg->val.str.lenmb,
3440	arg->val.str.startmb);
3441
3442	do
3443	{
3444	lr_ignore_rest (ldfile, `0`);
3445
3446	now = lr_token (ldfile, charmap, result, NULL, verbose);
3447	}
3448	while (now->tok == tok_reorder_sections_after
3449	\|\| now->tok == tok_reorder_sections_end
3450	\|\| now->tok == tok_end);
3451
3452	/ Process the token we just saw. /
3453	nowtok = now->tok;
3454	continue;
3455	}
3456	}
3457	else
3458	/ This must not happen. /
3459	goto err_label;
3460	break;
3461
3462	case tok_reorder_sections_end:
3463	/ Ignore the rest of the line if we don't need the input of*
3464	this line. /*
3465	if (ignore_content)
3466	break;
3467
3468	if (state != `5`)
3469	goto err_label;
3470	state = `6`;
3471	lr_ignore_rest (ldfile, `1`);
3472	break;
3473
3474	case tok_bsymbol:
3475	case tok_ucs4:
3476	/ Ignore the rest of the line if we don't need the input of*
3477	this line. /*
3478	if (ignore_content)
3479	{
3480	lr_ignore_rest (ldfile, `0`);
3481	break;
3482	}
3483
3484	if (state != `0` && state != `1` && state != `3` && state != `5`)
3485	goto err_label;
3486
3487	if ((state == `0` \|\| state == `5`) && nowtok == tok_ucs4)
3488	goto err_label;
3489
3490	if (nowtok == tok_ucs4)
3491	{
3492	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3493	symstr = ucs4buf;
3494	symlen = `9`;
3495	}
3496	else if (arg != NULL)
3497	{
3498	symstr = arg->val.str.startmb;
3499	symlen = arg->val.str.lenmb;
3500	}
3501	else
3502	{
3503	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3504	(int) ldfile->token.val.str.lenmb,
3505	ldfile->token.val.str.startmb);
3506	break;
3507	}
3508
3509	struct element_t *seqp;
3510	if (state == `0`)
3511	{
3512	/ We are outside an `order_start' region. This means*
3513	we must only accept definitions of values for
3514	collation symbols since these are purely abstract
3515	values and don't need directions associated. /*
3516	void *ptr;
3517
3518	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3519	{
3520	seqp = ptr;
3521
3522	/ It's already defined. First check whether this*
3523	is really a collating symbol. /*
3524	if (seqp->is_character)
3525	goto err_label;
3526
3527	goto move_entry;
3528	}
3529	else
3530	{
3531	void *result;
3532
3533	if (find_entry (&collate->sym_table, symstr, symlen,
3534	&result) != `0`)
3535	/ No collating symbol, it's an error. /
3536	goto err_label;
3537
3538	/ Maybe this is the first time we define a symbol*
3539	value and it is before the first actual section. /*
3540	if (collate->sections == NULL)
3541	collate->sections = collate->current_section =
3542	&collate->symbol_section;
3543	}
3544
3545	if (was_ellipsis != tok_none)
3546	{
3547	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3548	charmap, repertoire, result);
3549
3550	/ Remember that we processed the ellipsis. /
3551	was_ellipsis = tok_none;
3552
3553	/ And don't add the value a second time. /
3554	break;
3555	}
3556	}
3557	else if (state == `3`)
3558	{
3559	/ It is possible that we already have this collation sequence.*
3560	In this case we move the entry. /*
3561	void *sym;
3562	void *ptr;
3563
3564	/ If the symbol after which we have to insert was not found*
3565	ignore all entries. /*
3566	if (collate->cursor == NULL)
3567	{
3568	lr_ignore_rest (ldfile, `0`);
3569	break;
3570	}
3571
3572	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3573	{
3574	seqp = (struct element_t *) ptr;
3575	goto move_entry;
3576	}
3577
3578	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == `0`
3579	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
3580	goto move_entry;
3581
3582	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == `0`
3583	&& (seqp = (struct element_t *) ptr,
3584	seqp->last != NULL \|\| seqp->next != NULL
3585	\|\| (collate->start != NULL && seqp == collate->start)))
3586	{
3587	move_entry:
3588	/ Remove the entry from the old position. /
3589	if (seqp->last == NULL)
3590	collate->start = seqp->next;
3591	else
3592	seqp->last->next = seqp->next;
3593	if (seqp->next != NULL)
3594	seqp->next->last = seqp->last;
3595
3596	/ We also have to check whether this entry is the*
3597	first or last of a section. /*
3598	if (seqp->section->first == seqp)
3599	{
3600	if (seqp->section->first == seqp->section->last)
3601	/ This section has no content anymore. /
3602	seqp->section->first = seqp->section->last = NULL;
3603	else
3604	seqp->section->first = seqp->next;
3605	}
3606	else if (seqp->section->last == seqp)
3607	seqp->section->last = seqp->last;
3608
3609	/ Now insert it in the new place. /
3610	insert_weights (ldfile, seqp, charmap, repertoire, result,
3611	tok_none);
3612	break;
3613	}
3614
3615	/ Otherwise we just add a new entry. /
3616	}
3617	else if (state == `5`)
3618	{
3619	/ We are reordering sections. Find the named section. /
3620	struct section_list *runp = collate->sections;
3621	struct section_list *prevp = NULL;
3622
3623	while (runp != NULL)
3624	{
3625	if (runp->name != NULL
3626	&& strlen (runp->name) == symlen
3627	&& memcmp (runp->name, symstr, symlen) == `0`)
3628	break;
3629
3630	prevp = runp;
3631	runp = runp->next;
3632	}
3633
3634	if (runp == NULL)
3635	{
3636	lr_error (ldfile, _("%s: section `%.*s' not known"),
3637	"LC_COLLATE", (int) symlen, symstr);
3638	lr_ignore_rest (ldfile, `0`);
3639	}
3640	else
3641	{
3642	if (runp != collate->current_section)
3643	{
3644	/ Remove the named section from the old place and*
3645	insert it in the new one. /*
3646	prevp->next = runp->next;
3647
3648	runp->next = collate->current_section->next;
3649	collate->current_section->next = runp;
3650	collate->current_section = runp;
3651	}
3652
3653	/ Process the rest of the line which might change*
3654	the collation rules. /*
3655	arg = lr_token (ldfile, charmap, result, repertoire,
3656	verbose);
3657	if (arg->tok != tok_eof && arg->tok != tok_eol)
3658	read_directions (ldfile, arg, charmap, repertoire,
3659	result);
3660	}
3661	break;
3662	}
3663	else if (was_ellipsis != tok_none)
3664	{
3665	/ Using the information in the `ellipsis_weight'*
3666	element and this and the last value we have to handle
3667	the ellipsis now. /*
3668	assert (state == `1`);
3669
3670	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3671	repertoire, result);
3672
3673	/ Remember that we processed the ellipsis. /
3674	was_ellipsis = tok_none;
3675
3676	/ And don't add the value a second time. /
3677	break;
3678	}
3679
3680	/ Now insert in the new place. /
3681	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3682	break;
3683
3684	case tok_undefined:
3685	/ Ignore the rest of the line if we don't need the input of*
3686	this line. /*
3687	if (ignore_content)
3688	{
3689	lr_ignore_rest (ldfile, `0`);
3690	break;
3691	}
3692
3693	if (state != `1`)
3694	goto err_label;
3695
3696	if (was_ellipsis != tok_none)
3697	{
3698	lr_error (ldfile,
3699	_("%s: cannot have `%s' as end of ellipsis range"),
3700	"LC_COLLATE", "UNDEFINED");
3701
3702	unlink_element (collate);
3703	was_ellipsis = tok_none;
3704	}
3705
3706	/ See whether UNDEFINED already appeared somewhere. /
3707	if (collate->undefined.next != NULL
3708	\|\| &collate->undefined == collate->cursor)
3709	{
3710	lr_error (ldfile,
3711	_("%s: order for `%.*s' already defined at %s:%Zu"),
3712	"LC_COLLATE", `9`, "UNDEFINED",
3713	collate->undefined.file,
3714	collate->undefined.line);
3715	lr_ignore_rest (ldfile, `0`);
3716	}
3717	else
3718	/ Parse the weights. /
3719	insert_weights (ldfile, &collate->undefined, charmap,
3720	repertoire, result, tok_none);
3721	break;
3722
3723	case tok_ellipsis2: / symbolic hexadecimal ellipsis /
3724	case tok_ellipsis3: / absolute ellipsis /
3725	case tok_ellipsis4: / symbolic decimal ellipsis /
3726	/ This is the symbolic (decimal or hexadecimal) or absolute*
3727	ellipsis. /*
3728	if (was_ellipsis != tok_none)
3729	goto err_label;
3730
3731	if (state != `0` && state != `1` && state != `3`)
3732	goto err_label;
3733
3734	was_ellipsis = nowtok;
3735
3736	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3737	repertoire, result, nowtok);
3738	break;
3739
3740	case tok_end:
3741	seen_end:
3742	/ Next we assume `LC_COLLATE'. /
3743	if (!ignore_content)
3744	{
3745	if (state == `0` && copy_locale == NULL)
3746	/ We must either see a copy statement or have*
3747	ordering values. /*
3748	lr_error (ldfile,
3749	_("%s: empty category description not allowed"),
3750	"LC_COLLATE");
3751	else if (state == `1`)
3752	{
3753	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3754	"LC_COLLATE");
3755
3756	/ Handle ellipsis at end of list. /
3757	if (was_ellipsis != tok_none)
3758	{
3759	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3760	repertoire, result);
3761	was_ellipsis = tok_none;
3762	}
3763	}
3764	else if (state == `3`)
3765	record_error (`0`, `0`, _("\
3766	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3767	else if (state == `5`)
3768	record_error (`0`, `0`, _("\
3769	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3770	}
3771	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3772	if (arg->tok == tok_eof)
3773	break;
3774	if (arg->tok == tok_eol)
3775	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3776	else if (arg->tok != tok_lc_collate)
3777	lr_error (ldfile, _("\
3778	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3779	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3780	return;
3781
3782	case tok_define:
3783	if (ignore_content)
3784	{
3785	lr_ignore_rest (ldfile, `0`);
3786	break;
3787	}
3788
3789	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3790	if (arg->tok != tok_ident)
3791	goto err_label;
3792
3793	/ Simply add the new symbol. /
3794	struct name_list newsym = xmalloc (sizeof* (*newsym)
3795	+ arg->val.str.lenmb + `1`);
3796	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3797	newsym->str[arg->val.str.lenmb] = `'\0'`;
3798	newsym->next = defined;
3799	defined = newsym;
3800
3801	lr_ignore_rest (ldfile, `1`);
3802	break;
3803
3804	case tok_undef:
3805	if (ignore_content)
3806	{
3807	lr_ignore_rest (ldfile, `0`);
3808	break;
3809	}
3810
3811	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3812	if (arg->tok != tok_ident)
3813	goto err_label;
3814
3815	/ Remove _all_ occurrences of the symbol from the list. /
3816	struct name_list *prevdef = NULL;
3817	struct name_list *curdef = defined;
3818	while (curdef != NULL)
3819	if (strncmp (arg->val.str.startmb, curdef->str,
3820	arg->val.str.lenmb) == `0`
3821	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3822	{
3823	if (prevdef == NULL)
3824	defined = curdef->next;
3825	else
3826	prevdef->next = curdef->next;
3827
3828	struct name_list *olddef = curdef;
3829	curdef = curdef->next;
3830
3831	free (olddef);
3832	}
3833	else
3834	{
3835	prevdef = curdef;
3836	curdef = curdef->next;
3837	}
3838
3839	lr_ignore_rest (ldfile, `1`);
3840	break;
3841
3842	case tok_ifdef:
3843	case tok_ifndef:
3844	if (ignore_content)
3845	{
3846	lr_ignore_rest (ldfile, `0`);
3847	break;
3848	}
3849
3850	found_ifdef:
3851	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3852	if (arg->tok != tok_ident)
3853	goto err_label;
3854	lr_ignore_rest (ldfile, `1`);
3855
3856	if (collate->else_action == else_none)
3857	{
3858	curdef = defined;
3859	while (curdef != NULL)
3860	if (strncmp (arg->val.str.startmb, curdef->str,
3861	arg->val.str.lenmb) == `0`
3862	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3863	break;
3864	else
3865	curdef = curdef->next;
3866
3867	if ((nowtok == tok_ifdef && curdef != NULL)
3868	\|\| (nowtok == tok_ifndef && curdef == NULL))
3869	{
3870	/ We have to use the if-branch. /
3871	collate->else_action = else_ignore;
3872	}
3873	else
3874	{
3875	/ We have to use the else-branch, if there is one. /
3876	nowtok = skip_to (ldfile, collate, charmap, `0`);
3877	if (nowtok == tok_else)
3878	collate->else_action = else_seen;
3879	else if (nowtok == tok_elifdef)
3880	{
3881	nowtok = tok_ifdef;
3882	goto found_ifdef;
3883	}
3884	else if (nowtok == tok_elifndef)
3885	{
3886	nowtok = tok_ifndef;
3887	goto found_ifdef;
3888	}
3889	else if (nowtok == tok_eof)
3890	goto seen_eof;
3891	else if (nowtok == tok_end)
3892	goto seen_end;
3893	}
3894	}
3895	else
3896	{
3897	/ XXX Should it really become necessary to support nested*
3898	preprocessor handling we will push the state here. /*
3899	lr_error (ldfile, _("%s: nested conditionals not supported"),
3900	"LC_COLLATE");
3901	nowtok = skip_to (ldfile, collate, charmap, `1`);
3902	if (nowtok == tok_eof)
3903	goto seen_eof;
3904	else if (nowtok == tok_end)
3905	goto seen_end;
3906	}
3907	break;
3908
3909	case tok_elifdef:
3910	case tok_elifndef:
3911	case tok_else:
3912	if (ignore_content)
3913	{
3914	lr_ignore_rest (ldfile, `0`);
3915	break;
3916	}
3917
3918	lr_ignore_rest (ldfile, `1`);
3919
3920	if (collate->else_action == else_ignore)
3921	{
3922	/ Ignore everything until the endif. /
3923	nowtok = skip_to (ldfile, collate, charmap, `1`);
3924	if (nowtok == tok_eof)
3925	goto seen_eof;
3926	else if (nowtok == tok_end)
3927	goto seen_end;
3928	}
3929	else
3930	{
3931	assert (collate->else_action == else_none);
3932	lr_error (ldfile, _("\
3933	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3934	nowtok == tok_else ? "else"
3935	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3936	}
3937	break;
3938
3939	case tok_endif:
3940	if (ignore_content)
3941	{
3942	lr_ignore_rest (ldfile, `0`);
3943	break;
3944	}
3945
3946	lr_ignore_rest (ldfile, `1`);
3947
3948	if (collate->else_action != else_ignore
3949	&& collate->else_action != else_seen)
3950	lr_error (ldfile, _("\
3951	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3952
3953	/ XXX If we support nested preprocessor directives we pop*
3954	the state here. /*
3955	collate->else_action = else_none;
3956	break;
3957
3958	default:
3959	err_label:
3960	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3961	}
3962
3963	/ Prepare for the next round. /
3964	now = lr_token (ldfile, charmap, result, NULL, verbose);
3965	nowtok = now->tok;
3966	}
3967
3968	seen_eof:
3969	/ When we come here we reached the end of the file. /
3970	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3971	}
3972

Browse the source code of glibc/locale/programs/ld-collate.c