ld-collate.c source code [glibc/locale/programs/ld-collate.c]

1	/ Copyright (C) 1995-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published
6	by the Free Software Foundation; version 2 of the License, or
7	(at your option) any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
16
17	#ifdef HAVE_CONFIG_H
18	# include <config.h>
19	#endif
20
21	#include <errno.h>
22	#include <stdlib.h>
23	#include <wchar.h>
24	#include <stdint.h>
25	#include <sys/param.h>
26	#include <array_length.h>
27
28	#include "localedef.h"
29	#include "charmap.h"
30	#include "localeinfo.h"
31	#include "linereader.h"
32	#include "locfile.h"
33	#include "elem-hash.h"
34
35	/ Uncomment the following line in the production version. /
36	/ #define NDEBUG 1 /
37	#include <assert.h>
38
39	#define obstack_chunk_alloc malloc
40	#define obstack_chunk_free free
41
42	static inline void
43	__attribute ((always_inline))
44	obstack_int32_grow (struct obstack *obstack, int32_t data)
45	{
46	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47	data = maybe_swap_uint32 (data);
48	if (sizeof (int32_t) == sizeof (int))
49	obstack_int_grow (obstack, data);
50	else
51	obstack_grow (obstack, &data, sizeof (int32_t));
52	}
53
54	static inline void
55	__attribute ((always_inline))
56	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57	{
58	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59	data = maybe_swap_uint32 (data);
60	if (sizeof (int32_t) == sizeof (int))
61	obstack_int_grow_fast (obstack, data);
62	else
63	obstack_grow (obstack, &data, sizeof (int32_t));
64	}
65
66	/ Forward declaration. /
67	struct element_t;
68
69	/ Data type for list of strings. /
70	struct section_list
71	{
72	/ Successor in the known_sections list. /
73	struct section_list *def_next;
74	/ Successor in the sections list. /
75	struct section_list *next;
76	/ Name of the section. /
77	const char *name;
78	/ First element of this section. /
79	struct element_t *first;
80	/ Last element of this section. /
81	struct element_t *last;
82	/ These are the rules for this section. /
83	enum coll_sort_rule *rules;
84	/ Index of the rule set in the appropriate section of the output file. /
85	int ruleidx;
86	};
87
88	struct element_t;
89
90	struct element_list_t
91	{
92	/ Number of elements. /
93	int cnt;
94
95	struct element_t **w;
96	};
97
98	/ Data type for collating element. /
99	struct element_t
100	{
101	const char *name;
102
103	const char *mbs;
104	size_t nmbs;
105	const uint32_t *wcs;
106	size_t nwcs;
107	int *mborder;
108	int wcorder;
109
110	/ The following is a bit mask which bits are set if this element is*
111	used in the appropriate level. Interesting for the singlebyte
112	weight computation.
113
114	XXX The type here restricts the number of levels to 32. It could
115	be changed if necessary but I doubt this is necessary. /*
116	unsigned int used_in_level;
117
118	struct element_list_t *weights;
119
120	/ Nonzero if this is a real character definition. /
121	int is_character;
122
123	/ Order of the character in the sequence. This information will*
124	be used in range expressions. /*
125	int mbseqorder;
126	int wcseqorder;
127
128	/ Where does the definition come from. /
129	const char *file;
130	size_t line;
131
132	/ Which section does this belong to. /
133	struct section_list *section;
134
135	/ Predecessor and successor in the order list. /
136	struct element_t *last;
137	struct element_t *next;
138
139	/ Next element in multibyte output list. /
140	struct element_t *mbnext;
141	struct element_t *mblast;
142
143	/ Next element in wide character output list. /
144	struct element_t *wcnext;
145	struct element_t *wclast;
146	};
147
148	/ Special element value. /
149	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153	/ Data type for collating symbol. /
154	struct symbol_t
155	{
156	const char *name;
157
158	/ Point to place in the order list. /
159	struct element_t *order;
160
161	/ Where does the definition come from. /
162	const char *file;
163	size_t line;
164	};
165
166	/ Sparse table of struct element_t . /*
167	#define TABLE wchead_table
168	#define ELEMENT struct element_t *
169	#define DEFAULT NULL
170	#define ITERATE
171	#define NO_ADD_LOCALE
172	#include "3level.h"
173
174	/ Sparse table of int32_t. /
175	#define TABLE collidx_table
176	#define ELEMENT int32_t
177	#define DEFAULT 0
178	#include "3level.h"
179
180	/ Sparse table of uint32_t. /
181	#define TABLE collseq_table
182	#define ELEMENT uint32_t
183	#define DEFAULT ~((uint32_t) 0)
184	#include "3level.h"
185
186
187	/ Simple name list for the preprocessor. /
188	struct name_list
189	{
190	struct name_list *next;
191	char str[`0`];
192	};
193
194
195	/ The real definition of the struct for the LC_COLLATE locale. /
196	struct locale_collate_t
197	{
198	/ Does the locale use code points to compare the encoding? /
199	bool codepoint_collation;
200
201	int col_weight_max;
202	int cur_weight_max;
203
204	/ List of known scripts. /
205	struct section_list *known_sections;
206	/ List of used sections. /
207	struct section_list *sections;
208	/ Current section using definition. /
209	struct section_list *current_section;
210	/ There always can be an unnamed section. /
211	struct section_list unnamed_section;
212	/ Flag whether the unnamed section has been defined. /
213	bool unnamed_section_defined;
214	/ To make handling of errors easier we have another section. /
215	struct section_list error_section;
216	/ Sometimes we are defining the values for collating symbols before*
217	the first actual section. /*
218	struct section_list symbol_section;
219
220	/ Start of the order list. /
221	struct element_t *start;
222
223	/ The undefined element. /
224	struct element_t undefined;
225
226	/ This is the cursor for `reorder_after' insertions. /
227	struct element_t *cursor;
228
229	/ This value is used when handling ellipsis. /
230	struct element_t ellipsis_weight;
231
232	/ Known collating elements. /
233	hash_table elem_table;
234
235	/ Known collating symbols. /
236	hash_table sym_table;
237
238	/ Known collation sequences. /
239	hash_table seq_table;
240
241	struct obstack mempool;
242
243	/ The LC_COLLATE category is a bit special as it is sometimes possible*
244	that the definitions from more than one input file contains information.
245	Therefore we keep all relevant input in a list. /*
246	struct locale_collate_t *next;
247
248	/ Arrays with heads of the list for each of the leading bytes in*
249	the multibyte sequences. /*
250	struct element_t *mbheads[`256`];
251
252	/ Arrays with heads of the list for each of the leading bytes in*
253	the multibyte sequences. /*
254	struct wchead_table wcheads;
255
256	/ The arrays with the collation sequence order. /
257	unsigned char mbseqorder[`256`];
258	struct collseq_table wcseqorder;
259
260	/ State of the preprocessor. /
261	enum
262	{
263	else_none = `0`,
264	else_ignore,
265	else_seen
266	}
267	else_action;
268	};
269
270
271	/ We have a few global variables which are used for reading all*
272	LC_COLLATE category descriptions in all files. /*
273	static uint32_t nrules;
274
275	/ List of defined preprocessor symbols. /
276	static struct name_list *defined;
277
278
279	/ We need UTF-8 encoding of numbers. /
280	static inline int
281	__attribute ((always_inline))
282	utf8_encode (char buf, int* val)
283	{
284	int retval;
285
286	if (val < `0x80`)
287	{
288	buf++ = (char*) val;
289	retval = `1`;
290	}
291	else
292	{
293	int step;
294
295	for (step = `2`; step < `6`; ++step)
296	if ((val & (~(uint32_t)`0` << (`5` * step + `1`))) == `0`)
297	break;
298	retval = step;
299
300	buf = (unsigned* char) (~`0xff` >> step);
301	--step;
302	do
303	{
304	buf[step] = `0x80` \| (val & `0x3f`);
305	val >>= `6`;
306	}
307	while (--step > `0`);
308	*buf \|= val;
309	}
310
311	return retval;
312	}
313
314
315	static struct section_list *
316	make_seclist_elem (struct locale_collate_t collate, const* char *string,
317	struct section_list *next)
318	{
319	struct section_list *newp;
320
321	newp = (struct section_list *) obstack_alloc (&collate->mempool,
322	sizeof (*newp));
323	newp->next = next;
324	newp->name = string;
325	newp->first = NULL;
326	newp->last = NULL;
327
328	return newp;
329	}
330
331
332	static struct element_t *
333	new_element (struct locale_collate_t collate, const* char *mbs, size_t mbslen,
334	const uint32_t wcs, const* char *name, size_t namelen,
335	int is_character)
336	{
337	struct element_t *newp;
338
339	newp = (struct element_t *) obstack_alloc (&collate->mempool,
340	sizeof (*newp));
341	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
342	name, namelen);
343	if (mbs != NULL)
344	{
345	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
346	newp->nmbs = mbslen;
347	}
348	else
349	{
350	newp->mbs = NULL;
351	newp->nmbs = `0`;
352	}
353	if (wcs != NULL)
354	{
355	size_t nwcs = wcslen ((wchar_t *) wcs);
356	uint32_t zero = `0`;
357	/ Handle <U0000> as a single character. /
358	if (nwcs == `0`)
359	nwcs = `1`;
360	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
361	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
362	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
363	newp->nwcs = nwcs;
364	}
365	else
366	{
367	newp->wcs = NULL;
368	newp->nwcs = `0`;
369	}
370	newp->mborder = NULL;
371	newp->wcorder = `0`;
372	newp->used_in_level = `0`;
373	newp->is_character = is_character;
374
375	/ Will be assigned later. XXX /
376	newp->mbseqorder = `0`;
377	newp->wcseqorder = `0`;
378
379	/ Will be allocated later. /
380	newp->weights = NULL;
381
382	newp->file = NULL;
383	newp->line = `0`;
384
385	newp->section = collate->current_section;
386
387	newp->last = NULL;
388	newp->next = NULL;
389
390	newp->mbnext = NULL;
391	newp->mblast = NULL;
392
393	newp->wcnext = NULL;
394	newp->wclast = NULL;
395
396	return newp;
397	}
398
399
400	static struct symbol_t *
401	new_symbol (struct locale_collate_t collate, const* char *name, size_t len)
402	{
403	struct symbol_t *newp;
404
405	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof* (*newp));
406
407	newp->name = obstack_copy0 (&collate->mempool, name, len);
408	newp->order = NULL;
409
410	newp->file = NULL;
411	newp->line = `0`;
412
413	return newp;
414	}
415
416
417	/ Test whether this name is already defined somewhere. /
418	static int
419	check_duplicate (struct linereader ldfile, struct* locale_collate_t *collate,
420	const struct charmap_t *charmap,
421	struct repertoire_t repertoire, const* char *symbol,
422	size_t symbol_len)
423	{
424	void *ignore = NULL;
425
426	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == `0`)
427	{
428	lr_error (ldfile, _("`%.*s' already defined in charmap"),
429	(int) symbol_len, symbol);
430	return `1`;
431	}
432
433	if (repertoire != NULL
434	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
435	== `0`))
436	{
437	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
438	(int) symbol_len, symbol);
439	return `1`;
440	}
441
442	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == `0`)
443	{
444	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
445	(int) symbol_len, symbol);
446	return `1`;
447	}
448
449	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == `0`)
450	{
451	lr_error (ldfile, _("`%.*s' already defined as collating element"),
452	(int) symbol_len, symbol);
453	return `1`;
454	}
455
456	return `0`;
457	}
458
459
460	/ Read the direction specification. /
461	static void
462	read_directions (struct linereader ldfile, struct* token *arg,
463	const struct charmap_t *charmap,
464	struct repertoire_t repertoire, struct* localedef_t *result)
465	{
466	int cnt = `0`;
467	int max = nrules ?: `10`;
468	enum coll_sort_rule rules = calloc (max, sizeof* (*rules));
469	int warned = `0`;
470	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
471
472	while (`1`)
473	{
474	int valid = `0`;
475
476	if (arg->tok == tok_forward)
477	{
478	if (rules[cnt] & sort_backward)
479	{
480	if (! warned)
481	{
482	lr_error (ldfile, _("\
483	%s: `forward' and `backward' are mutually excluding each other"),
484	"LC_COLLATE");
485	warned = `1`;
486	}
487	}
488	else if (rules[cnt] & sort_forward)
489	{
490	if (! warned)
491	{
492	lr_error (ldfile, _("\
493	%s: `%s' mentioned more than once in definition of weight %d"),
494	"LC_COLLATE", "forward", cnt + `1`);
495	}
496	}
497	else
498	rules[cnt] \|= sort_forward;
499
500	valid = `1`;
501	}
502	else if (arg->tok == tok_backward)
503	{
504	if (rules[cnt] & sort_forward)
505	{
506	if (! warned)
507	{
508	lr_error (ldfile, _("\
509	%s: `forward' and `backward' are mutually excluding each other"),
510	"LC_COLLATE");
511	warned = `1`;
512	}
513	}
514	else if (rules[cnt] & sort_backward)
515	{
516	if (! warned)
517	{
518	lr_error (ldfile, _("\
519	%s: `%s' mentioned more than once in definition of weight %d"),
520	"LC_COLLATE", "backward", cnt + `1`);
521	}
522	}
523	else
524	rules[cnt] \|= sort_backward;
525
526	valid = `1`;
527	}
528	else if (arg->tok == tok_position)
529	{
530	if (rules[cnt] & sort_position)
531	{
532	if (! warned)
533	{
534	lr_error (ldfile, _("\
535	%s: `%s' mentioned more than once in definition of weight %d"),
536	"LC_COLLATE", "position", cnt + `1`);
537	}
538	}
539	else
540	rules[cnt] \|= sort_position;
541
542	valid = `1`;
543	}
544
545	if (valid)
546	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
547
548	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
549	\|\| arg->tok == tok_semicolon)
550	{
551	if (! valid && ! warned)
552	{
553	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
554	warned = `1`;
555	}
556
557	/ See whether we have to increment the counter. /
558	if (arg->tok != tok_comma && rules[cnt] != `0`)
559	{
560	/ Add the default `forward' if we have seen only `position'. /
561	if (rules[cnt] == sort_position)
562	rules[cnt] = sort_position \| sort_forward;
563
564	++cnt;
565	}
566
567	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
568	/ End of line or file, so we exit the loop. /
569	break;
570
571	if (nrules == `0`)
572	{
573	/ See whether we have enough room in the array. /
574	if (cnt == max)
575	{
576	max += `10`;
577	rules = (enum coll_sort_rule *) xrealloc (rules,
578	max
579	* sizeof (*rules));
580	memset (&rules[cnt], `'\0'`, (max - cnt) * sizeof (*rules));
581	}
582	}
583	else
584	{
585	if (cnt == nrules)
586	{
587	/ There must not be any more rule. /
588	if (! warned)
589	{
590	lr_error (ldfile, _("\
591	%s: too many rules; first entry only had %d"),
592	"LC_COLLATE", nrules);
593	warned = `1`;
594	}
595
596	lr_ignore_rest (ldfile, `0`);
597	break;
598	}
599	}
600	}
601	else
602	{
603	if (! warned)
604	{
605	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
606	warned = `1`;
607	}
608	}
609
610	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
611	}
612
613	if (nrules == `0`)
614	{
615	/ Now we know how many rules we have. /
616	nrules = cnt;
617	rules = (enum coll_sort_rule *) xrealloc (rules,
618	nrules * sizeof (*rules));
619	}
620	else
621	{
622	if (cnt < nrules)
623	{
624	/ Not enough rules in this specification. /
625	if (! warned)
626	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
627
628	do
629	rules[cnt] = sort_forward;
630	while (++cnt < nrules);
631	}
632	}
633
634	collate->current_section->rules = rules;
635	}
636
637
638	static struct element_t *
639	find_element (struct linereader ldfile, struct* locale_collate_t *collate,
640	const char *str, size_t len)
641	{
642	void *result = NULL;
643
644	/ Search for the entries among the collation sequences already define. /
645	if (find_entry (&collate->seq_table, str, len, &result) != `0`)
646	{
647	/ Nope, not define yet. So we see whether it is a*
648	collation symbol. /*
649	void *ptr;
650
651	if (find_entry (&collate->sym_table, str, len, &ptr) == `0`)
652	{
653	/ It's a collation symbol. /
654	struct symbol_t sym = (struct* symbol_t *) ptr;
655	result = sym->order;
656
657	if (result == NULL)
658	result = sym->order = new_element (collate, NULL, `0`, NULL,
659	NULL, `0`, `0`);
660	}
661	else if (find_entry (&collate->elem_table, str, len, &result) != `0`)
662	{
663	/ It's also no collation element. So it is a character*
664	element defined later. /*
665	result = new_element (collate, NULL, `0`, NULL, str, len, `1`);
666	/ Insert it into the sequence table. /
667	insert_entry (&collate->seq_table, str, len, result);
668	}
669	}
670
671	return (struct element_t *) result;
672	}
673
674
675	static void
676	unlink_element (struct locale_collate_t *collate)
677	{
678	if (collate->cursor == collate->start)
679	{
680	assert (collate->cursor->next == NULL);
681	assert (collate->cursor->last == NULL);
682	collate->cursor = NULL;
683	}
684	else
685	{
686	if (collate->cursor->next != NULL)
687	collate->cursor->next->last = collate->cursor->last;
688	if (collate->cursor->last != NULL)
689	collate->cursor->last->next = collate->cursor->next;
690	collate->cursor = collate->cursor->last;
691	}
692	}
693
694
695	static void
696	insert_weights (struct linereader ldfile, struct* element_t *elem,
697	const struct charmap_t *charmap,
698	struct repertoire_t repertoire, struct* localedef_t *result,
699	enum token_t ellipsis)
700	{
701	int weight_cnt;
702	struct token *arg;
703	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
704
705	/ Initialize all the fields. /
706	elem->file = ldfile->fname;
707	elem->line = ldfile->lineno;
708
709	elem->last = collate->cursor;
710	elem->next = collate->cursor ? collate->cursor->next : NULL;
711	if (collate->cursor != NULL && collate->cursor->next != NULL)
712	collate->cursor->next->last = elem;
713	if (collate->cursor != NULL)
714	collate->cursor->next = elem;
715	if (collate->start == NULL)
716	{
717	assert (collate->cursor == NULL);
718	collate->start = elem;
719	}
720
721	elem->section = collate->current_section;
722
723	if (collate->current_section->first == NULL)
724	collate->current_section->first = elem;
725	if (collate->current_section->last == collate->cursor)
726	collate->current_section->last = elem;
727
728	collate->cursor = elem;
729
730	elem->weights = (struct element_list_t *)
731	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
732	memset (elem->weights, `'\0'`, nrules * sizeof (struct element_list_t));
733
734	weight_cnt = `0`;
735
736	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
737	do
738	{
739	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
740	break;
741
742	if (arg->tok == tok_ignore)
743	{
744	/ The weight for this level has to be ignored. We use the*
745	null pointer to indicate this. /*
746	elem->weights[weight_cnt].w = (struct element_t **)
747	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
748	elem->weights[weight_cnt].w[`0`] = NULL;
749	elem->weights[weight_cnt].cnt = `1`;
750	}
751	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
752	{
753	char ucs4str[`10`];
754	struct element_t *val;
755	char *symstr;
756	size_t symlen;
757
758	if (arg->tok == tok_bsymbol)
759	{
760	symstr = arg->val.str.startmb;
761	symlen = arg->val.str.lenmb;
762	}
763	else
764	{
765	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
766	symstr = ucs4str;
767	symlen = `9`;
768	}
769
770	val = find_element (ldfile, collate, symstr, symlen);
771	if (val == NULL)
772	break;
773
774	elem->weights[weight_cnt].w = (struct element_t **)
775	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
776	elem->weights[weight_cnt].w[`0`] = val;
777	elem->weights[weight_cnt].cnt = `1`;
778	}
779	else if (arg->tok == tok_string)
780	{
781	/ Split the string up in the individual characters and put*
782	the element definitions in the list. /*
783	const char *cp = arg->val.str.startmb;
784	int cnt = `0`;
785	struct element_t *charelem;
786	struct element_t **weights = NULL;
787	int max = `0`;
788
789	if (*cp == `'\0'`)
790	{
791	lr_error (ldfile, _("%s: empty weight string not allowed"),
792	"LC_COLLATE");
793	lr_ignore_rest (ldfile, `0`);
794	break;
795	}
796
797	do
798	{
799	if (*cp == `'<'`)
800	{
801	/ Ahh, it's a bsymbol or an UCS4 value. If it's*
802	the latter we have to unify the name. /*
803	const char *startp = ++cp;
804	size_t len;
805
806	while (*cp != `'>'`)
807	{
808	if (*cp == ldfile->escape_char)
809	++cp;
810	if (*cp == `'\0'`)
811	/ It's a syntax error. /
812	goto syntax;
813
814	++cp;
815	}
816
817	if (cp - startp == `5` && startp[`0`] == `'U'`
818	&& isxdigit (startp[`1`]) && isxdigit (startp[`2`])
819	&& isxdigit (startp[`3`]) && isxdigit (startp[`4`]))
820	{
821	unsigned int ucs4 = strtoul (startp + `1`, NULL, `16`);
822	char *newstr;
823
824	newstr = (char *) xmalloc (`10`);
825	snprintf (newstr, `10`, "U%08X", ucs4);
826	startp = newstr;
827
828	len = `9`;
829	}
830	else
831	len = cp - startp;
832
833	charelem = find_element (ldfile, collate, startp, len);
834	++cp;
835	}
836	else
837	{
838	/ People really shouldn't use characters directly in*
839	the string. Especially since it's not really clear
840	what this means. We interpret all characters in the
841	string as if that would be bsymbols. Otherwise we
842	would have to match back to bsymbols somehow and this
843	is normally not what people normally expect. /*
844	charelem = find_element (ldfile, collate, cp++, `1`);
845	}
846
847	if (charelem == NULL)
848	{
849	/ We ignore the rest of the line. /
850	lr_ignore_rest (ldfile, `0`);
851	break;
852	}
853
854	/ Add the pointer. /
855	if (cnt >= max)
856	{
857	struct element_t **newp;
858	max += `10`;
859	newp = (struct element_t **)
860	alloca (max * sizeof (struct element_t *));
861	memcpy (newp, weights, cnt * sizeof (struct element_t *));
862	weights = newp;
863	}
864	weights[cnt++] = charelem;
865	}
866	while (*cp != `'\0'`);
867
868	/ Now store the information. /
869	elem->weights[weight_cnt].w = (struct element_t **)
870	obstack_alloc (&collate->mempool,
871	cnt * sizeof (struct element_t *));
872	memcpy (elem->weights[weight_cnt].w, weights,
873	cnt * sizeof (struct element_t *));
874	elem->weights[weight_cnt].cnt = cnt;
875
876	/ We don't need the string anymore. /
877	free (arg->val.str.startmb);
878	}
879	else if (ellipsis != tok_none
880	&& (arg->tok == tok_ellipsis2
881	\|\| arg->tok == tok_ellipsis3
882	\|\| arg->tok == tok_ellipsis4))
883	{
884	/ It must be the same ellipsis as used in the initial column. /
885	if (arg->tok != ellipsis)
886	lr_error (ldfile, _("\
887	%s: weights must use the same ellipsis symbol as the name"),
888	"LC_COLLATE");
889
890	/ The weight for this level will depend on the element*
891	iterating over the range. Put a placeholder. /*
892	elem->weights[weight_cnt].w = (struct element_t **)
893	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
894	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
895	elem->weights[weight_cnt].cnt = `1`;
896	}
897	else
898	{
899	syntax:
900	/ It's a syntax error. /
901	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
902	lr_ignore_rest (ldfile, `0`);
903	break;
904	}
905
906	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
907	/ This better should be the end of the line or a semicolon. /
908	if (arg->tok == tok_semicolon)
909	/ OK, ignore this and read the next token. /
910	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
911	else if (arg->tok != tok_eof && arg->tok != tok_eol)
912	{
913	/ It's a syntax error. /
914	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
915	lr_ignore_rest (ldfile, `0`);
916	break;
917	}
918	}
919	while (++weight_cnt < nrules);
920
921	if (weight_cnt < nrules)
922	{
923	/ This means the rest of the line uses the current element as*
924	the weight. /*
925	do
926	{
927	elem->weights[weight_cnt].w = (struct element_t **)
928	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
929	if (ellipsis == tok_none)
930	elem->weights[weight_cnt].w[`0`] = elem;
931	else
932	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
933	elem->weights[weight_cnt].cnt = `1`;
934	}
935	while (++weight_cnt < nrules);
936	}
937	else
938	{
939	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
940	{
941	/ Too many rule values. /
942	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
943	lr_ignore_rest (ldfile, `0`);
944	}
945	else
946	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
947	}
948	}
949
950
951	static int
952	insert_value (struct linereader ldfile, const* char *symstr, size_t symlen,
953	const struct charmap_t charmap, struct* repertoire_t *repertoire,
954	struct localedef_t *result)
955	{
956	/ First find out what kind of symbol this is. /
957	struct charseq *seq;
958	uint32_t wc;
959	struct element_t *elem = NULL;
960	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
961
962	/ Try to find the character in the charmap. /
963	seq = charmap_find_value (charmap, symstr, symlen);
964
965	/ Determine the wide character. /
966	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
967	{
968	wc = repertoire_find_value (repertoire, symstr, symlen);
969	if (seq != NULL)
970	seq->ucs4 = wc;
971	}
972	else
973	wc = seq->ucs4;
974
975	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
976	{
977	/ It's no character, so look through the collation elements and*
978	symbol list. /*
979	void *ptr = elem;
980	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != `0`)
981	{
982	void *result;
983	struct symbol_t *sym = NULL;
984
985	/ It's also collation element. Therefore it's either a*
986	collating symbol or it's a character which is not
987	supported by the character set. In the later case we
988	simply create a dummy entry. /*
989	if (find_entry (&collate->sym_table, symstr, symlen, &result) == `0`)
990	{
991	/ It's a collation symbol. /
992	sym = (struct symbol_t *) result;
993
994	elem = sym->order;
995	}
996
997	if (elem == NULL)
998	{
999	elem = new_element (collate, NULL, `0`, NULL, symstr, symlen, `0`);
1000
1001	if (sym != NULL)
1002	sym->order = elem;
1003	else
1004	/ Enter a fake element in the sequence table. This*
1005	won't cause anything in the output since there is
1006	no multibyte or wide character associated with
1007	it. /*
1008	insert_entry (&collate->seq_table, symstr, symlen, elem);
1009	}
1010	}
1011	else
1012	/ Copy the result back. /
1013	elem = ptr;
1014	}
1015	else
1016	{
1017	/ Otherwise the symbols stands for a character. /
1018	void *ptr = elem;
1019	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != `0`)
1020	{
1021	uint32_t wcs[`2`] = { wc, `0` };
1022
1023	/ We have to allocate an entry. /
1024	elem = new_element (collate,
1025	seq != NULL ? (char *) seq->bytes : NULL,
1026	seq != NULL ? seq->nbytes : `0`,
1027	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1028	symstr, symlen, `1`);
1029
1030	/ And add it to the table. /
1031	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != `0`)
1032	/ This cannot happen. /
1033	assert (! "Internal error");
1034	}
1035	else
1036	{
1037	/ Copy the result back. /
1038	elem = ptr;
1039
1040	/ Maybe the character was used before the definition. In this case*
1041	we have to insert the byte sequences now. /*
1042	if (elem->mbs == NULL && seq != NULL)
1043	{
1044	elem->mbs = obstack_copy0 (&collate->mempool,
1045	seq->bytes, seq->nbytes);
1046	elem->nmbs = seq->nbytes;
1047	}
1048
1049	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1050	{
1051	uint32_t wcs[`2`] = { wc, `0` };
1052
1053	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1054	elem->nwcs = `1`;
1055	}
1056	}
1057	}
1058
1059	/ Test whether this element is not already in the list. /
1060	if (elem->next != NULL \|\| elem == collate->cursor)
1061	{
1062	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1063	(int) symlen, symstr, elem->file, elem->line);
1064	lr_ignore_rest (ldfile, `0`);
1065	return `1`;
1066	}
1067
1068	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1069
1070	return `0`;
1071	}
1072
1073
1074	static void
1075	handle_ellipsis (struct linereader ldfile, const* char *symstr, size_t symlen,
1076	enum token_t ellipsis, const struct charmap_t *charmap,
1077	struct repertoire_t *repertoire,
1078	struct localedef_t *result)
1079	{
1080	struct element_t *startp;
1081	struct element_t *endp;
1082	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1083
1084	/ Unlink the entry added for the ellipsis. /
1085	unlink_element (collate);
1086	startp = collate->cursor;
1087
1088	/ Process and add the end-entry. /
1089	if (symstr != NULL
1090	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1091	/ Something went wrong with inserting the to-value. This means*
1092	we cannot process the ellipsis. /*
1093	return;
1094
1095	/ Reset the cursor. /
1096	collate->cursor = startp;
1097
1098	/ Now we have to handle many different situations:*
1099	- we have to distinguish between the three different ellipsis forms
1100	- the is the ellipsis at the beginning, in the middle, or at the end.
1101	*/
1102	endp = collate->cursor->next;
1103	assert (symstr == NULL \|\| endp != NULL);
1104
1105	/ XXX The following is probably very wrong since also collating symbols*
1106	can appear in ranges. But do we want/can refine the test for that? /*
1107	#if 0
1108	/ Both, the start and the end symbol, must stand for characters. /
1109	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
1110	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
1111	{
1112	lr_error (ldfile, _("\
1113	%s: the start and the end symbol of a range must stand for characters"),
1114	"LC_COLLATE");
1115	return;
1116	}
1117	#endif
1118
1119	if (ellipsis == tok_ellipsis3)
1120	{
1121	/ One requirement we make here: the length of the byte*
1122	sequences for the first and end character must be the same.
1123	This is mainly to prevent unwanted effects and this is often
1124	not what is wanted. /*
1125	size_t len = (startp->mbs != NULL ? startp->nmbs
1126	: (endp->mbs != NULL ? endp->nmbs : `0`));
1127	char mbcnt[len + `1`];
1128	char mbend[len + `1`];
1129
1130	/ Well, this should be caught somewhere else already. Just to*
1131	make sure. /*
1132	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[`1`] == `0`);
1133	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[`1`] == `0`);
1134
1135	if (startp != NULL && endp != NULL
1136	&& startp->mbs != NULL && endp->mbs != NULL
1137	&& startp->nmbs != endp->nmbs)
1138	{
1139	lr_error (ldfile, _("\
1140	%s: byte sequences of first and last character must have the same length"),
1141	"LC_COLLATE");
1142	return;
1143	}
1144
1145	/ Determine whether we have to generate multibyte sequences. /
1146	if ((startp == NULL \|\| startp->mbs != NULL)
1147	&& (endp == NULL \|\| endp->mbs != NULL))
1148	{
1149	int cnt;
1150	int ret;
1151
1152	/ Prepare the beginning byte sequence. This is either from the*
1153	beginning byte sequence or it is all nulls if it was an
1154	initial ellipsis. /*
1155	if (startp == NULL \|\| startp->mbs == NULL)
1156	memset (mbcnt, `'\0'`, len);
1157	else
1158	{
1159	memcpy (mbcnt, startp->mbs, len);
1160
1161	/ And increment it so that the value is the first one we will*
1162	try to insert. /*
1163	for (cnt = len - `1`; cnt >= `0`; --cnt)
1164	if (++mbcnt[cnt] != `'\0'`)
1165	break;
1166	}
1167	mbcnt[len] = `'\0'`;
1168
1169	/ And the end sequence. /
1170	if (endp == NULL \|\| endp->mbs == NULL)
1171	memset (mbend, `'\0'`, len);
1172	else
1173	memcpy (mbend, endp->mbs, len);
1174	mbend[len] = `'\0'`;
1175
1176	/ Test whether we have a correct range. /
1177	ret = memcmp (mbcnt, mbend, len);
1178	if (ret >= `0`)
1179	{
1180	if (ret > `0`)
1181	lr_error (ldfile, _("%s: byte sequence of first character of \
1182	range is not lower than that of the last character"), "LC_COLLATE");
1183	return;
1184	}
1185
1186	/ Generate the byte sequences data. /
1187	while (`1`)
1188	{
1189	struct charseq *seq;
1190
1191	/ Quite a bit of work ahead. We have to find the character*
1192	definition for the byte sequence and then determine the
1193	wide character belonging to it. /*
1194	seq = charmap_find_symbol (charmap, mbcnt, len);
1195	if (seq != NULL)
1196	{
1197	struct element_t *elem;
1198	size_t namelen;
1199
1200	/ I don't think this can ever happen. /
1201	assert (seq->name != NULL);
1202	namelen = strlen (seq->name);
1203
1204	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1205	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1206	namelen);
1207
1208	/ Now we are ready to insert the new value in the*
1209	sequence. Find out whether the element is
1210	already known. /*
1211	void *ptr;
1212	if (find_entry (&collate->seq_table, seq->name, namelen,
1213	&ptr) != `0`)
1214	{
1215	uint32_t wcs[`2`] = { seq->ucs4, `0` };
1216
1217	/ We have to allocate an entry. /
1218	elem = new_element (collate, mbcnt, len,
1219	seq->ucs4 == ILLEGAL_CHAR_VALUE
1220	? NULL : wcs, seq->name,
1221	namelen, `1`);
1222
1223	/ And add it to the table. /
1224	if (insert_entry (&collate->seq_table, seq->name,
1225	namelen, elem) != `0`)
1226	/ This cannot happen. /
1227	assert (! "Internal error");
1228	}
1229	else
1230	/ Copy the result. /
1231	elem = ptr;
1232
1233	/ Test whether this element is not already in the list. /
1234	if (elem->next != NULL \|\| (collate->cursor != NULL
1235	&& elem->next == collate->cursor))
1236	{
1237	lr_error (ldfile, _("\
1238	order for `%.*s' already defined at %s:%Zu"),
1239	(int) namelen, seq->name,
1240	elem->file, elem->line);
1241	goto increment;
1242	}
1243
1244	/ Enqueue the new element. /
1245	elem->last = collate->cursor;
1246	if (collate->cursor == NULL)
1247	elem->next = NULL;
1248	else
1249	{
1250	elem->next = collate->cursor->next;
1251	elem->last->next = elem;
1252	if (elem->next != NULL)
1253	elem->next->last = elem;
1254	}
1255	if (collate->start == NULL)
1256	{
1257	assert (collate->cursor == NULL);
1258	collate->start = elem;
1259	}
1260	collate->cursor = elem;
1261
1262	/ Add the weight value. We take them from the*
1263	`ellipsis_weights' member of `collate'. /*
1264	elem->weights = (struct element_list_t *)
1265	obstack_alloc (&collate->mempool,
1266	nrules * sizeof (struct element_list_t));
1267	for (cnt = `0`; cnt < nrules; ++cnt)
1268	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1269	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1270	== ELEMENT_ELLIPSIS2))
1271	{
1272	elem->weights[cnt].w = (struct element_t **)
1273	obstack_alloc (&collate->mempool,
1274	sizeof (struct element_t *));
1275	elem->weights[cnt].w[`0`] = elem;
1276	elem->weights[cnt].cnt = `1`;
1277	}
1278	else
1279	{
1280	/ Simply use the weight from `ellipsis_weight'. /
1281	elem->weights[cnt].w =
1282	collate->ellipsis_weight.weights[cnt].w;
1283	elem->weights[cnt].cnt =
1284	collate->ellipsis_weight.weights[cnt].cnt;
1285	}
1286	}
1287
1288	/ Increment for the next round. /
1289	increment:
1290	for (cnt = len - `1`; cnt >= `0`; --cnt)
1291	if (++mbcnt[cnt] != `'\0'`)
1292	break;
1293
1294	/ Find out whether this was all. /
1295	if (cnt < `0` \|\| memcmp (mbcnt, mbend, len) >= `0`)
1296	/ Yep, that's all. /
1297	break;
1298	}
1299	}
1300	}
1301	else
1302	{
1303	/ For symbolic range we naturally must have a beginning and an*
1304	end specified by the user. /*
1305	if (startp == NULL)
1306	lr_error (ldfile, _("\
1307	%s: symbolic range ellipsis must not directly follow `order_start'"),
1308	"LC_COLLATE");
1309	else if (endp == NULL)
1310	lr_error (ldfile, _("\
1311	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312	"LC_COLLATE");
1313	else
1314	{
1315	/ Determine the range. To do so we have to determine the*
1316	common prefix of the both names and then the numeric
1317	values of both ends. /*
1318	size_t lenfrom = strlen (startp->name);
1319	size_t lento = strlen (endp->name);
1320	char buf[lento + `1`];
1321	int preflen = `0`;
1322	long int from;
1323	long int to;
1324	char *cp;
1325	int base = ellipsis == tok_ellipsis2 ? `16` : `10`;
1326
1327	if (lenfrom != lento)
1328	{
1329	invalid_range:
1330	lr_error (ldfile, _("\
1331	`%s' and `%.*s' are not valid names for symbolic range"),
1332	startp->name, (int) lento, endp->name);
1333	return;
1334	}
1335
1336	while (startp->name[preflen] == endp->name[preflen])
1337	if (startp->name[preflen] == `'\0'`)
1338	/ Nothing to be done. The start and end point are identical*
1339	and while inserting the end point we have already given
1340	the user an error message. /*
1341	return;
1342	else
1343	++preflen;
1344
1345	errno = `0`;
1346	from = strtol (startp->name + preflen, &cp, base);
1347	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1348	goto invalid_range;
1349
1350	errno = `0`;
1351	to = strtol (endp->name + preflen, &cp, base);
1352	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1353	goto invalid_range;
1354
1355	/ Copy the prefix. /
1356	memcpy (buf, startp->name, preflen);
1357
1358	/ Loop over all values. /
1359	for (++from; from < to; ++from)
1360	{
1361	struct element_t *elem = NULL;
1362	struct charseq *seq;
1363	uint32_t wc;
1364	int cnt;
1365
1366	/ Generate the name. /
1367	sprintf (buf + preflen, base == `10` ? "%0ld" : "%0lX",
1368	(int) (lenfrom - preflen), from);
1369
1370	/ Look whether this name is already defined. /
1371	void *ptr;
1372	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == `0`)
1373	{
1374	/ Copy back the result. /
1375	elem = ptr;
1376
1377	if (elem->next != NULL \|\| (collate->cursor != NULL
1378	&& elem->next == collate->cursor))
1379	{
1380	lr_error (ldfile, _("\
1381	%s: order for `%.*s' already defined at %s:%Zu"),
1382	"LC_COLLATE", (int) lenfrom, buf,
1383	elem->file, elem->line);
1384	continue;
1385	}
1386
1387	if (elem->name == NULL)
1388	{
1389	lr_error (ldfile, _("%s: `%s' must be a character"),
1390	"LC_COLLATE", buf);
1391	continue;
1392	}
1393	}
1394
1395	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
1396	{
1397	/ Search for a character of this name. /
1398	seq = charmap_find_value (charmap, buf, lenfrom);
1399	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1400	{
1401	wc = repertoire_find_value (repertoire, buf, lenfrom);
1402
1403	if (seq != NULL)
1404	seq->ucs4 = wc;
1405	}
1406	else
1407	wc = seq->ucs4;
1408
1409	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1410	/ We don't know anything about a character with this*
1411	name. XXX Should we warn? /*
1412	continue;
1413
1414	if (elem == NULL)
1415	{
1416	uint32_t wcs[`2`] = { wc, `0` };
1417
1418	/ We have to allocate an entry. /
1419	elem = new_element (collate,
1420	seq != NULL
1421	? (char *) seq->bytes : NULL,
1422	seq != NULL ? seq->nbytes : `0`,
1423	wc == ILLEGAL_CHAR_VALUE
1424	? NULL : wcs, buf, lenfrom, `1`);
1425	}
1426	else
1427	{
1428	/ Update the element. /
1429	if (seq != NULL)
1430	{
1431	elem->mbs = obstack_copy0 (&collate->mempool,
1432	seq->bytes, seq->nbytes);
1433	elem->nmbs = seq->nbytes;
1434	}
1435
1436	if (wc != ILLEGAL_CHAR_VALUE)
1437	{
1438	uint32_t zero = `0`;
1439
1440	obstack_grow (&collate->mempool,
1441	&wc, sizeof (uint32_t));
1442	obstack_grow (&collate->mempool,
1443	&zero, sizeof (uint32_t));
1444	elem->wcs = obstack_finish (&collate->mempool);
1445	elem->nwcs = `1`;
1446	}
1447	}
1448
1449	elem->file = ldfile->fname;
1450	elem->line = ldfile->lineno;
1451	elem->section = collate->current_section;
1452	}
1453
1454	/ Enqueue the new element. /
1455	elem->last = collate->cursor;
1456	elem->next = collate->cursor->next;
1457	elem->last->next = elem;
1458	if (elem->next != NULL)
1459	elem->next->last = elem;
1460	collate->cursor = elem;
1461
1462	/ Now add the weights. They come from the `ellipsis_weights'*
1463	member of `collate'. /*
1464	elem->weights = (struct element_list_t *)
1465	obstack_alloc (&collate->mempool,
1466	nrules * sizeof (struct element_list_t));
1467	for (cnt = `0`; cnt < nrules; ++cnt)
1468	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1469	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1470	== ELEMENT_ELLIPSIS2))
1471	{
1472	elem->weights[cnt].w = (struct element_t **)
1473	obstack_alloc (&collate->mempool,
1474	sizeof (struct element_t *));
1475	elem->weights[cnt].w[`0`] = elem;
1476	elem->weights[cnt].cnt = `1`;
1477	}
1478	else
1479	{
1480	/ Simly use the weight from `ellipsis_weight'. /
1481	elem->weights[cnt].w =
1482	collate->ellipsis_weight.weights[cnt].w;
1483	elem->weights[cnt].cnt =
1484	collate->ellipsis_weight.weights[cnt].cnt;
1485	}
1486	}
1487	}
1488	}
1489	/ Move the cursor to the last entry in the ellipsis.*
1490	Subsequent operations need to start from the last entry. /*
1491	collate->cursor = endp;
1492	}
1493
1494
1495	static void
1496	collate_startup (struct linereader ldfile, struct* localedef_t *locale,
1497	struct localedef_t copy_locale, int* ignore_content)
1498	{
1499	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1500	{
1501	struct locale_collate_t *collate;
1502
1503	if (copy_locale == NULL)
1504	{
1505	collate = locale->categories[LC_COLLATE].collate =
1506	(struct locale_collate_t *)
1507	xcalloc (`1`, sizeof (struct locale_collate_t));
1508
1509	/ Init the various data structures. /
1510	init_hash (&collate->elem_table, `100`);
1511	init_hash (&collate->sym_table, `100`);
1512	init_hash (&collate->seq_table, `500`);
1513	obstack_init (&collate->mempool);
1514
1515	collate->col_weight_max = -`1`;
1516	collate->codepoint_collation = false;
1517	}
1518	else
1519	/ Reuse the copy_locale's data structures. /
1520	collate = locale->categories[LC_COLLATE].collate =
1521	copy_locale->categories[LC_COLLATE].collate;
1522	}
1523
1524	ldfile->translate_strings = `0`;
1525	ldfile->return_widestr = `0`;
1526	}
1527
1528
1529	void
1530	collate_finish (struct localedef_t locale, const* struct charmap_t *charmap)
1531	{
1532	/ Now is the time when we can assign the individual collation*
1533	values for all the symbols. We have possibly different values
1534	for the wide- and the multibyte-character symbols. This is done
1535	since it might make a difference in the encoding if there is in
1536	some cases no multibyte-character but there are wide-characters.
1537	(The other way around it is not important since theencoded
1538	collation value in the wide-character case is 32 bits wide and
1539	therefore requires no encoding).
1540
1541	The lowest collation value assigned is 2. Zero is reserved for
1542	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543	functions and 1 is used to separate the individual passes for the
1544	different rules.
1545
1546	We also have to construct is list with all the bytes/words which
1547	can come first in a sequence, followed by all the elements which
1548	also start with this byte/word. The order is reverse which has
1549	among others the important effect that longer strings are located
1550	first in the list. This is required for the output data since
1551	the algorithm used in `strcoll' etc depends on this.
1552
1553	The multibyte case is easy. We simply sort into an array with
1554	256 elements. /*
1555	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1556	int mbact[nrules];
1557	int wcact;
1558	int mbseqact;
1559	int wcseqact;
1560	struct element_t *runp;
1561	int i;
1562	int need_undefined = `0`;
1563	struct section_list *sect;
1564	int ruleidx;
1565	int nr_wide_elems = `0`;
1566
1567	if (collate == NULL)
1568	{
1569	/ No data, no check. Issue a warning. /
1570	record_warning (_("No definition for %s category found"),
1571	"LC_COLLATE");
1572	return;
1573	}
1574
1575	/ No data required. /
1576	if (collate->codepoint_collation)
1577	return;
1578
1579	/ If this assertion is hit change the type in `element_t'. /
1580	assert (nrules <= sizeof (runp->used_in_level) * `8`);
1581
1582	/ Make sure that the `position' rule is used either in all sections*
1583	or in none. /*
1584	for (i = `0`; i < nrules; ++i)
1585	for (sect = collate->sections; sect != NULL; sect = sect->next)
1586	if (sect != collate->current_section
1587	&& sect->rules != NULL
1588	&& ((sect->rules[i] & sort_position)
1589	!= (collate->current_section->rules[i] & sort_position)))
1590	{
1591	record_error (`0`, `0`, _("\
1592	%s: `position' must be used for a specific level in all sections or none"),
1593	"LC_COLLATE");
1594	break;
1595	}
1596
1597	/ Find out which elements are used at which level. At the same*
1598	time we find out whether we have any undefined symbols. /*
1599	runp = collate->start;
1600	while (runp != NULL)
1601	{
1602	if (runp->mbs != NULL)
1603	{
1604	for (i = `0`; i < nrules; ++i)
1605	{
1606	int j;
1607
1608	for (j = `0`; j < runp->weights[i].cnt; ++j)
1609	/ A NULL pointer as the weight means IGNORE. /
1610	if (runp->weights[i].w[j] != NULL)
1611	{
1612	if (runp->weights[i].w[j]->weights == NULL)
1613	{
1614	record_error_at_line (`0`, `0`, runp->file, runp->line,
1615	_("symbol `%s' not defined"),
1616	runp->weights[i].w[j]->name);
1617
1618	need_undefined = `1`;
1619	runp->weights[i].w[j] = &collate->undefined;
1620	}
1621	else
1622	/ Set the bit for the level. /
1623	runp->weights[i].w[j]->used_in_level \|= `1` << i;
1624	}
1625	}
1626	}
1627
1628	/ Up to the next entry. /
1629	runp = runp->next;
1630	}
1631
1632	/ Walk through the list of defined sequences and assign weights. Also*
1633	create the data structure which will allow generating the single byte
1634	character based tables.
1635
1636	Since at each time only the weights for each of the rules are
1637	only compared to other weights for this rule it is possible to
1638	assign more compact weight values than simply counting all
1639	weights in sequence. We can assign weights from 3, one for each
1640	rule individually and only for those elements, which are actually
1641	used for this rule.
1642
1643	Why is this important? It is not for the wide char table. But
1644	it is for the singlebyte output since here larger numbers have to
1645	be encoded to make it possible to emit the value as a byte
1646	string. /*
1647	for (i = `0`; i < nrules; ++i)
1648	mbact[i] = `2`;
1649	wcact = `2`;
1650	mbseqact = `0`;
1651	wcseqact = `0`;
1652	runp = collate->start;
1653	while (runp != NULL)
1654	{
1655	/ Determine the order. /
1656	if (runp->used_in_level != `0`)
1657	{
1658	runp->mborder = (int *) obstack_alloc (&collate->mempool,
1659	nrules * sizeof (int));
1660
1661	for (i = `0`; i < nrules; ++i)
1662	if ((runp->used_in_level & (`1` << i)) != `0`)
1663	runp->mborder[i] = mbact[i]++;
1664	else
1665	runp->mborder[i] = `0`;
1666	}
1667
1668	if (runp->mbs != NULL)
1669	{
1670	struct element_t **eptr;
1671	struct element_t *lastp = NULL;
1672
1673	/ Find the point where to insert in the list. /
1674	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[`0`]];
1675	while (*eptr != NULL)
1676	{
1677	if ((*eptr)->nmbs < runp->nmbs)
1678	break;
1679
1680	if ((*eptr)->nmbs == runp->nmbs)
1681	{
1682	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1683
1684	if (c == `0`)
1685	{
1686	/ This should not happen. It means that we have*
1687	to symbols with the same byte sequence. It is
1688	of course an error. /*
1689	record_error_at_line (`0`, `0`, (*eptr)->file,
1690	(*eptr)->line,
1691	_("\
1692	symbol `%s' has the same encoding as"), (*eptr)->name);
1693
1694	record_error_at_line (`0`, `0`, runp->file, runp->line,
1695	_("symbol `%s'"), runp->name);
1696	goto dont_insert;
1697	}
1698	else if (c < `0`)
1699	/ Insert it here. /
1700	break;
1701	}
1702
1703	/ To the next entry. /
1704	lastp = *eptr;
1705	eptr = &(*eptr)->mbnext;
1706	}
1707
1708	/ Set the pointers. /
1709	runp->mbnext = *eptr;
1710	runp->mblast = lastp;
1711	if (*eptr != NULL)
1712	(*eptr)->mblast = runp;
1713	*eptr = runp;
1714	dont_insert:
1715	;
1716	}
1717
1718	if (runp->used_in_level)
1719	{
1720	runp->wcorder = wcact++;
1721
1722	/ We take the opportunity to count the elements which have*
1723	wide characters. /*
1724	++nr_wide_elems;
1725	}
1726
1727	if (runp->is_character)
1728	{
1729	if (runp->nmbs == `1`)
1730	collate->mbseqorder[((unsigned char *) runp->mbs)[`0`]] = mbseqact++;
1731
1732	runp->wcseqorder = wcseqact++;
1733	}
1734	else if (runp->mbs != NULL && runp->weights != NULL)
1735	/ This is for collation elements. /
1736	runp->wcseqorder = wcseqact++;
1737
1738	/ Up to the next entry. /
1739	runp = runp->next;
1740	}
1741
1742	/ Find out whether any of the `mbheads' entries is unset. In this*
1743	case we use the UNDEFINED entry. /*
1744	for (i = `1`; i < `256`; ++i)
1745	if (collate->mbheads[i] == NULL)
1746	{
1747	need_undefined = `1`;
1748	collate->mbheads[i] = &collate->undefined;
1749	}
1750
1751	/ Now to the wide character case. /
1752	collate->wcheads.p = `6`;
1753	collate->wcheads.q = `10`;
1754	wchead_table_init (&collate->wcheads);
1755
1756	collate->wcseqorder.p = `6`;
1757	collate->wcseqorder.q = `10`;
1758	collseq_table_init (&collate->wcseqorder);
1759
1760	/ Start adding. /
1761	runp = collate->start;
1762	while (runp != NULL)
1763	{
1764	if (runp->wcs != NULL)
1765	{
1766	struct element_t *e;
1767	struct element_t **eptr;
1768	struct element_t *lastp;
1769
1770	/ Insert the collation sequence value. /
1771	if (runp->is_character)
1772	collseq_table_add (&collate->wcseqorder, runp->wcs[`0`],
1773	runp->wcseqorder);
1774
1775	/ Find the point where to insert in the list. /
1776	e = wchead_table_get (&collate->wcheads, runp->wcs[`0`]);
1777	eptr = &e;
1778	lastp = NULL;
1779	while (*eptr != NULL)
1780	{
1781	if ((*eptr)->nwcs < runp->nwcs)
1782	break;
1783
1784	if ((*eptr)->nwcs == runp->nwcs)
1785	{
1786	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
1787	(wchar_t *) runp->wcs, runp->nwcs);
1788
1789	if (c == `0`)
1790	{
1791	/ This should not happen. It means that we have*
1792	two symbols with the same byte sequence. It is
1793	of course an error. /*
1794	record_error_at_line (`0`, `0`, (*eptr)->file,
1795	(*eptr)->line,
1796	_("\
1797	symbol `%s' has the same encoding as"), (*eptr)->name);
1798
1799	record_error_at_line (`0`, `0`, runp->file, runp->line,
1800	_("symbol `%s'"), runp->name);
1801	goto dont_insertwc;
1802	}
1803	else if (c < `0`)
1804	/ Insert it here. /
1805	break;
1806	}
1807
1808	/ To the next entry. /
1809	lastp = *eptr;
1810	eptr = &(*eptr)->wcnext;
1811	}
1812
1813	/ Set the pointers. /
1814	runp->wcnext = *eptr;
1815	runp->wclast = lastp;
1816	if (*eptr != NULL)
1817	(*eptr)->wclast = runp;
1818	*eptr = runp;
1819	if (eptr == &e)
1820	wchead_table_add (&collate->wcheads, runp->wcs[`0`], e);
1821	dont_insertwc:
1822	;
1823	}
1824
1825	/ Up to the next entry. /
1826	runp = runp->next;
1827	}
1828
1829	/ Now determine whether the UNDEFINED entry is needed and if yes,*
1830	whether it was defined. /*
1831	collate->undefined.used_in_level = need_undefined ? ~`0ul` : `0`;
1832	if (collate->undefined.file == NULL)
1833	{
1834	if (need_undefined)
1835	{
1836	/ This seems not to be enforced by recent standards. Don't*
1837	emit an error, simply append UNDEFINED at the end. /*
1838	collate->undefined.mborder =
1839	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
1840
1841	for (i = `0`; i < nrules; ++i)
1842	collate->undefined.mborder[i] = mbact[i]++;
1843	}
1844
1845	/ In any case we will need the definition for the wide character*
1846	case. But we will not complain that it is missing since the
1847	specification strangely enough does not seem to account for
1848	this. /*
1849	collate->undefined.wcorder = wcact++;
1850	}
1851
1852	/ Finally, try to unify the rules for the sections. Whenever the rules*
1853	for a section are the same as those for another section give the
1854	ruleset the same index. Since there are never many section we can
1855	use an O(n^2) algorithm here. /*
1856	sect = collate->sections;
1857	while (sect != NULL && sect->rules == NULL)
1858	sect = sect->next;
1859
1860	/ Bail out if we have no sections because of earlier errors. /
1861	if (sect == NULL)
1862	{
1863	record_error (EXIT_FAILURE, `0`, _("too many errors; giving up"));
1864	return;
1865	}
1866
1867	ruleidx = `0`;
1868	do
1869	{
1870	struct section_list *osect = collate->sections;
1871
1872	while (osect != sect)
1873	if (osect->rules != NULL
1874	&& memcmp (osect->rules, sect->rules,
1875	nrules * sizeof (osect->rules[`0`])) == `0`)
1876	break;
1877	else
1878	osect = osect->next;
1879
1880	if (osect == sect)
1881	sect->ruleidx = ruleidx++;
1882	else
1883	sect->ruleidx = osect->ruleidx;
1884
1885	/ Next section. /
1886	do
1887	sect = sect->next;
1888	while (sect != NULL && sect->rules == NULL);
1889	}
1890	while (sect != NULL);
1891	/ We are currently not prepared for more than 128 rulesets. But this*
1892	should never really be a problem. /*
1893	assert (ruleidx <= `128`);
1894	}
1895
1896
1897	static int32_t
1898	output_weight (struct obstack pool, struct* locale_collate_t *collate,
1899	struct element_t *elem)
1900	{
1901	size_t cnt;
1902	int32_t retval;
1903
1904	/ Optimize the use of UNDEFINED. /
1905	if (elem == &collate->undefined)
1906	/ The weights are already inserted. /
1907	return `0`;
1908
1909	/ This byte can start exactly one collation element and this is*
1910	a single byte. We can directly give the index to the weights. /*
1911	retval = obstack_object_size (pool);
1912
1913	/ Construct the weight. /
1914	for (cnt = `0`; cnt < nrules; ++cnt)
1915	{
1916	char buf[elem->weights[cnt].cnt * `7`];
1917	int len = `0`;
1918	int i;
1919
1920	for (i = `0`; i < elem->weights[cnt].cnt; ++i)
1921	/ Encode the weight value. We do nothing for IGNORE entries. /
1922	if (elem->weights[cnt].w[i] != NULL)
1923	len += utf8_encode (&buf[len],
1924	elem->weights[cnt].w[i]->mborder[cnt]);
1925
1926	/ And add the buffer content. /
1927	obstack_1grow (pool, len);
1928	obstack_grow (pool, buf, len);
1929	}
1930
1931	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1932	}
1933
1934
1935	static int32_t
1936	output_weightwc (struct obstack pool, struct* locale_collate_t *collate,
1937	struct element_t *elem)
1938	{
1939	size_t cnt;
1940	int32_t retval;
1941
1942	/ Optimize the use of UNDEFINED. /
1943	if (elem == &collate->undefined)
1944	/ The weights are already inserted. /
1945	return `0`;
1946
1947	/ This byte can start exactly one collation element and this is*
1948	a single byte. We can directly give the index to the weights. /*
1949	retval = obstack_object_size (pool) / sizeof (int32_t);
1950
1951	/ Construct the weight. /
1952	for (cnt = `0`; cnt < nrules; ++cnt)
1953	{
1954	int32_t buf[elem->weights[cnt].cnt];
1955	int i;
1956	int32_t j;
1957
1958	for (i = `0`, j = `0`; i < elem->weights[cnt].cnt; ++i)
1959	if (elem->weights[cnt].w[i] != NULL)
1960	buf[j++] = elem->weights[cnt].w[i]->wcorder;
1961
1962	/ And add the buffer content. /
1963	obstack_int32_grow (pool, j);
1964
1965	obstack_grow (pool, buf, j * sizeof (int32_t));
1966	maybe_swap_uint32_obstack (pool, j);
1967	}
1968
1969	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1970	}
1971
1972	/ If localedef is every threaded, this would need to be __thread var. /
1973	static struct
1974	{
1975	struct obstack *weightpool;
1976	struct obstack *extrapool;
1977	struct obstack *indpool;
1978	struct locale_collate_t *collate;
1979	struct collidx_table *tablewc;
1980	} atwc;
1981
1982	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1983
1984	static void
1985	add_to_tablewc (uint32_t ch, struct element_t *runp)
1986	{
1987	if (runp->wcnext == NULL && runp->nwcs == `1`)
1988	{
1989	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1990	runp);
1991	collidx_table_add (atwc.tablewc, ch, weigthidx);
1992	}
1993	else
1994	{
1995	/ As for the singlebyte table, we recognize sequences and*
1996	compress them. /*
1997
1998	collidx_table_add (atwc.tablewc, ch,
1999	-(obstack_object_size (atwc.extrapool)
2000	/ sizeof (uint32_t)));
2001
2002	do
2003	{
2004	/ Store the current index in the weight table. We know that*
2005	the current position in the `extrapool' is aligned on a
2006	32-bit address. /*
2007	int32_t weightidx;
2008	int added;
2009
2010	/ Find out wether this is a single entry or we have more than*
2011	one consecutive entry. /*
2012	if (runp->wcnext != NULL
2013	&& runp->nwcs == runp->wcnext->nwcs
2014	&& wmemcmp ((wchar_t *) runp->wcs,
2015	(wchar_t *)runp->wcnext->wcs,
2016	runp->nwcs - `1`) == `0`
2017	&& (runp->wcs[runp->nwcs - `1`]
2018	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`))
2019	{
2020	int i;
2021	struct element_t *series_startp = runp;
2022	struct element_t *curp;
2023
2024	/ Now add first the initial byte sequence. /
2025	added = (`1` + `1` + `2` * (runp->nwcs - `1`)) * sizeof (int32_t);
2026	if (sizeof (int32_t) == sizeof (int))
2027	obstack_make_room (atwc.extrapool, added);
2028
2029	/ More than one consecutive entry. We mark this by having*
2030	a negative index into the indirect table. /*
2031	obstack_int32_grow_fast (atwc.extrapool,
2032	-(obstack_object_size (atwc.indpool)
2033	/ sizeof (int32_t)));
2034	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2035
2036	do
2037	runp = runp->wcnext;
2038	while (runp->wcnext != NULL
2039	&& runp->nwcs == runp->wcnext->nwcs
2040	&& wmemcmp ((wchar_t *) runp->wcs,
2041	(wchar_t *)runp->wcnext->wcs,
2042	runp->nwcs - `1`) == `0`
2043	&& (runp->wcs[runp->nwcs - `1`]
2044	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`));
2045
2046	/ Now walk backward from here to the beginning. /
2047	curp = runp;
2048
2049	for (i = `1`; i < runp->nwcs; ++i)
2050	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2051
2052	/ Now find the end of the consecutive sequence and*
2053	add all the indices in the indirect pool. /*
2054	do
2055	{
2056	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2057	curp);
2058	obstack_int32_grow (atwc.indpool, weightidx);
2059
2060	curp = curp->wclast;
2061	}
2062	while (curp != series_startp);
2063
2064	/ Add the final weight. /
2065	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2066	curp);
2067	obstack_int32_grow (atwc.indpool, weightidx);
2068
2069	/ And add the end byte sequence. Without length this*
2070	time. /*
2071	for (i = `1`; i < curp->nwcs; ++i)
2072	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2073	}
2074	else
2075	{
2076	/ A single entry. Simply add the index and the length and*
2077	string (except for the first character which is already
2078	tested for). /*
2079	int i;
2080
2081	/ Output the weight info. /
2082	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2083	runp);
2084
2085	assert (runp->nwcs > `0`);
2086	added = (`1` + `1` + runp->nwcs - `1`) * sizeof (int32_t);
2087	if (sizeof (int) == sizeof (int32_t))
2088	obstack_make_room (atwc.extrapool, added);
2089
2090	obstack_int32_grow_fast (atwc.extrapool, weightidx);
2091	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2092	for (i = `1`; i < runp->nwcs; ++i)
2093	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2094	}
2095
2096	/ Next entry. /
2097	runp = runp->wcnext;
2098	}
2099	while (runp != NULL);
2100	}
2101	}
2102
2103	/ Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and*
2104	_NL_COLLATE_COLLSEQWC. /*
2105	#include "C-collate-seq.c"
2106
2107	void
2108	collate_output (struct localedef_t locale, const* struct charmap_t *charmap,
2109	const char *output_path)
2110	{
2111	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2112	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2113	struct locale_file file;
2114	size_t ch;
2115	int32_t tablemb[`256`];
2116	struct obstack weightpool;
2117	struct obstack extrapool;
2118	struct obstack indirectpool;
2119	struct section_list *sect;
2120	struct collidx_table tablewc;
2121	uint32_t elem_size;
2122	uint32_t *elem_table;
2123	int i;
2124	struct element_t *runp;
2125
2126	init_locale_data (&file, nelems);
2127	add_locale_uint32 (&file, nrules);
2128
2129	/ If we have no LC_COLLATE data emit only the number of rules as zero. /
2130	if (collate == NULL \|\| collate->codepoint_collation)
2131	{
2132	size_t idx;
2133	for (idx = `1`; idx < nelems; idx++)
2134	{
2135	/ The words have to be handled specially. /
2136	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2137	add_locale_uint32 (&file, `0`);
2138	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
2139	&& collate != NULL)
2140	/ A valid LC_COLLATE must have a code set name. /
2141	add_locale_string (&file, charmap->code_set_name);
2142	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
2143	&& collate != NULL)
2144	add_locale_raw_data (&file, collseqmb, sizeof (collseqmb));
2145	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
2146	&& collate != NULL)
2147	add_locale_uint32_array (&file, collseqwc,
2148	array_length (collseqwc));
2149	else
2150	add_locale_empty (&file);
2151	}
2152	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2153	return;
2154	}
2155
2156	obstack_init (&weightpool);
2157	obstack_init (&extrapool);
2158	obstack_init (&indirectpool);
2159
2160	/ Since we are using the sign of an integer to mark indirection the*
2161	offsets in the arrays we are indirectly referring to must not be
2162	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2163	obstack_int32_grow (&extrapool, `0`);
2164	obstack_int32_grow (&indirectpool, `0`);
2165
2166	/ Prepare the ruleset table. /
2167	for (sect = collate->sections, i = `0`; sect != NULL; sect = sect->next)
2168	if (sect->rules != NULL && sect->ruleidx == i)
2169	{
2170	int j;
2171
2172	obstack_make_room (&weightpool, nrules);
2173
2174	for (j = `0`; j < nrules; ++j)
2175	obstack_1grow_fast (&weightpool, sect->rules[j]);
2176	++i;
2177	}
2178	/ And align the output. /
2179	i = (nrules * i) % LOCFILE_ALIGN;
2180	if (i > `0`)
2181	do
2182	obstack_1grow (&weightpool, `'\0'`);
2183	while (++i < LOCFILE_ALIGN);
2184
2185	add_locale_raw_obstack (&file, &weightpool);
2186
2187	/ Generate the 8-bit table. Walk through the lists of sequences*
2188	starting with the same byte and add them one after the other to
2189	the table. In case we have more than one sequence starting with
2190	the same byte we have to use extra indirection.
2191
2192	First add a record for the NUL byte. This entry will never be used
2193	so it does not matter. /*
2194	tablemb[`0`] = `0`;
2195
2196	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2197	will probably be used more than once it is good to store the
2198	weights only once. /*
2199	if (collate->undefined.used_in_level != `0`)
2200	output_weight (&weightpool, collate, &collate->undefined);
2201
2202	for (ch = `1`; ch < `256`; ++ch)
2203	if (collate->mbheads[ch]->mbnext == NULL
2204	&& collate->mbheads[ch]->nmbs <= `1`)
2205	{
2206	tablemb[ch] = output_weight (&weightpool, collate,
2207	collate->mbheads[ch]);
2208	}
2209	else
2210	{
2211	/ The entries in the list are sorted by length and then*
2212	alphabetically. This is the order in which we will add the
2213	elements to the collation table. This allows simply walking
2214	the table in sequence and stopping at the first matching
2215	entry. Since the longer sequences are coming first in the
2216	list they have the possibility to match first, just as it
2217	has to be. In the worst case we are walking to the end of
2218	the list where we put, if no singlebyte sequence is defined
2219	in the locale definition, the weights for UNDEFINED.
2220
2221	To reduce the length of the search list we compress them a bit.
2222	This happens by collecting sequences of consecutive byte
2223	sequences in one entry (having and begin and end byte sequence)
2224	and add only one index into the weight table. We can find the
2225	consecutive entries since they are also consecutive in the list. /*
2226	struct element_t *runp = collate->mbheads[ch];
2227	struct element_t *lastp;
2228
2229	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2230
2231	tablemb[ch] = -obstack_object_size (&extrapool);
2232
2233	do
2234	{
2235	/ Store the current index in the weight table. We know that*
2236	the current position in the `extrapool' is aligned on a
2237	32-bit address. /*
2238	int32_t weightidx;
2239	int added;
2240
2241	/ Find out wether this is a single entry or we have more than*
2242	one consecutive entry. /*
2243	if (runp->mbnext != NULL
2244	&& runp->nmbs == runp->mbnext->nmbs
2245	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - `1`) == `0`
2246	&& (runp->mbs[runp->nmbs - `1`]
2247	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`))
2248	{
2249	int i;
2250	struct element_t *series_startp = runp;
2251	struct element_t *curp;
2252
2253	/ Compute how much space we will need. /
2254	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2255	+ `2` * (runp->nmbs - `1`));
2256	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2257	obstack_make_room (&extrapool, added);
2258
2259	/ More than one consecutive entry. We mark this by having*
2260	a negative index into the indirect table. /*
2261	obstack_int32_grow_fast (&extrapool,
2262	-(obstack_object_size (&indirectpool)
2263	/ sizeof (int32_t)));
2264
2265	/ Now search first the end of the series. /
2266	do
2267	runp = runp->mbnext;
2268	while (runp->mbnext != NULL
2269	&& runp->nmbs == runp->mbnext->nmbs
2270	&& memcmp (runp->mbs, runp->mbnext->mbs,
2271	runp->nmbs - `1`) == `0`
2272	&& (runp->mbs[runp->nmbs - `1`]
2273	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`));
2274
2275	/ Now walk backward from here to the beginning. /
2276	curp = runp;
2277
2278	assert (runp->nmbs <= `256`);
2279	obstack_1grow_fast (&extrapool, curp->nmbs - `1`);
2280	for (i = `1`; i < curp->nmbs; ++i)
2281	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2282
2283	/ Now find the end of the consecutive sequence and*
2284	add all the indices in the indirect pool. /*
2285	do
2286	{
2287	weightidx = output_weight (&weightpool, collate, curp);
2288	obstack_int32_grow (&indirectpool, weightidx);
2289
2290	curp = curp->mblast;
2291	}
2292	while (curp != series_startp);
2293
2294	/ Add the final weight. /
2295	weightidx = output_weight (&weightpool, collate, curp);
2296	obstack_int32_grow (&indirectpool, weightidx);
2297
2298	/ And add the end byte sequence. Without length this*
2299	time. /*
2300	for (i = `1`; i < curp->nmbs; ++i)
2301	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2302	}
2303	else
2304	{
2305	/ A single entry. Simply add the index and the length and*
2306	string (except for the first character which is already
2307	tested for). /*
2308	int i;
2309
2310	/ Output the weight info. /
2311	weightidx = output_weight (&weightpool, collate, runp);
2312
2313	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2314	+ runp->nmbs - `1`);
2315	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2316	obstack_make_room (&extrapool, added);
2317
2318	obstack_int32_grow_fast (&extrapool, weightidx);
2319	assert (runp->nmbs <= `256`);
2320	obstack_1grow_fast (&extrapool, runp->nmbs - `1`);
2321
2322	for (i = `1`; i < runp->nmbs; ++i)
2323	obstack_1grow_fast (&extrapool, runp->mbs[i]);
2324	}
2325
2326	/ Add alignment bytes if necessary. /
2327	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2328	obstack_1grow_fast (&extrapool, `'\0'`);
2329
2330	/ Next entry. /
2331	lastp = runp;
2332	runp = runp->mbnext;
2333	}
2334	while (runp != NULL);
2335
2336	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2337
2338	/ If the final entry in the list is not a single character we*
2339	add an UNDEFINED entry here. /*
2340	if (lastp->nmbs != `1`)
2341	{
2342	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1` + `1`);
2343	obstack_make_room (&extrapool, added);
2344
2345	obstack_int32_grow_fast (&extrapool, `0`);
2346	/ XXX What rule? We just pick the first. /
2347	obstack_1grow_fast (&extrapool, `0`);
2348	/ Length is zero. /
2349	obstack_1grow_fast (&extrapool, `0`);
2350
2351	/ Add alignment bytes if necessary. /
2352	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2353	obstack_1grow_fast (&extrapool, `'\0'`);
2354	}
2355	}
2356
2357	/ Add padding to the tables if necessary. /
2358	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2359	obstack_1grow (&weightpool, `0`);
2360
2361	/ Now add the four tables. /
2362	add_locale_uint32_array (&file, (const uint32_t *) tablemb, `256`);
2363	add_locale_raw_obstack (&file, &weightpool);
2364	add_locale_raw_obstack (&file, &extrapool);
2365	add_locale_raw_obstack (&file, &indirectpool);
2366
2367	/ Now the same for the wide character table. We need to store some*
2368	more information here. /*
2369	add_locale_empty (&file);
2370	add_locale_empty (&file);
2371	add_locale_empty (&file);
2372
2373	/ Since we are using the sign of an integer to mark indirection the*
2374	offsets in the arrays we are indirectly referring to must not be
2375	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2376	obstack_int32_grow (&extrapool, `0`);
2377	obstack_int32_grow (&indirectpool, `0`);
2378
2379	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2380	will probably be used more than once it is good to store the
2381	weights only once. /*
2382	if (output_weightwc (&weightpool, collate, &collate->undefined) != `0`)
2383	abort ();
2384
2385	/ Generate the table. Walk through the lists of sequences starting*
2386	with the same wide character and add them one after the other to
2387	the table. In case we have more than one sequence starting with
2388	the same byte we have to use extra indirection. /*
2389	tablewc.p = `6`;
2390	tablewc.q = `10`;
2391	collidx_table_init (&tablewc);
2392
2393	atwc.weightpool = &weightpool;
2394	atwc.extrapool = &extrapool;
2395	atwc.indpool = &indirectpool;
2396	atwc.collate = collate;
2397	atwc.tablewc = &tablewc;
2398
2399	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2400
2401	memset (&atwc, `0`, sizeof (atwc));
2402
2403	/ Now add the four tables. /
2404	add_locale_collidx_table (&file, &tablewc);
2405	add_locale_raw_obstack (&file, &weightpool);
2406	add_locale_raw_obstack (&file, &extrapool);
2407	add_locale_raw_obstack (&file, &indirectpool);
2408
2409	/ Finally write the table with collation element names out. It is*
2410	a hash table with a simple function which gets the name of the
2411	character as the input. One character might have many names. The
2412	value associated with the name is an index into the weight table
2413	where we are then interested in the first-level weight value.
2414
2415	To determine how large the table should be we are counting the
2416	elements have to put in. Since we are using internal chaining
2417	using a secondary hash function we have to make the table a bit
2418	larger to avoid extremely long search times. We can achieve
2419	good results with a 40% larger table than there are entries. /*
2420	elem_size = `0`;
2421	runp = collate->start;
2422	while (runp != NULL)
2423	{
2424	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2425	/ Yep, the element really counts. /
2426	++elem_size;
2427
2428	runp = runp->next;
2429	}
2430	/ Add 50% and find the next prime number. /
2431	elem_size = next_prime (elem_size + (elem_size >> `1`));
2432
2433	/ Allocate the table. Each entry consists of two words: the hash*
2434	value and an index in a secondary table which provides the index
2435	into the weight table and the string itself (so that a match can
2436	be determined). /*
2437	elem_table = (uint32_t *) obstack_alloc (&extrapool,
2438	elem_size * `2` * sizeof (uint32_t));
2439	memset (elem_table, `'\0'`, elem_size * `2` * sizeof (uint32_t));
2440
2441	/ Now add the elements. /
2442	runp = collate->start;
2443	while (runp != NULL)
2444	{
2445	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2446	{
2447	/ Compute the hash value of the name. /
2448	uint32_t namelen = strlen (runp->name);
2449	uint32_t hash = elem_hash (runp->name, namelen);
2450	size_t idx = hash % elem_size;
2451	#ifndef NDEBUG
2452	size_t start_idx = idx;
2453	#endif
2454
2455	if (elem_table[idx * `2`] != `0`)
2456	{
2457	/ The spot is already taken. Try iterating using the value*
2458	from the secondary hashing function. /*
2459	size_t iter = hash % (elem_size - `2`) + `1`;
2460
2461	do
2462	{
2463	idx += iter;
2464	if (idx >= elem_size)
2465	idx -= elem_size;
2466	assert (idx != start_idx);
2467	}
2468	while (elem_table[idx * `2`] != `0`);
2469	}
2470	/ This is the spot where we will insert the value. /
2471	elem_table[idx * `2`] = hash;
2472	elem_table[idx * `2` + `1`] = obstack_object_size (&extrapool);
2473
2474	/ The string itself including length. /
2475	obstack_1grow (&extrapool, namelen);
2476	obstack_grow (&extrapool, runp->name, namelen);
2477
2478	/ And the multibyte representation. /
2479	obstack_1grow (&extrapool, runp->nmbs);
2480	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2481
2482	/ And align again to 32 bits. /
2483	if ((`1` + namelen + `1` + runp->nmbs) % sizeof (int32_t) != `0`)
2484	obstack_grow (&extrapool, "\0\0",
2485	(sizeof (int32_t)
2486	- ((`1` + namelen + `1` + runp->nmbs)
2487	% sizeof (int32_t))));
2488
2489	/ Now some 32-bit values: multibyte collation sequence,*
2490	wide char string (including length), and wide char
2491	collation sequence. /*
2492	obstack_int32_grow (&extrapool, runp->mbseqorder);
2493
2494	obstack_int32_grow (&extrapool, runp->nwcs);
2495	obstack_grow (&extrapool, runp->wcs,
2496	runp->nwcs * sizeof (uint32_t));
2497	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2498
2499	obstack_int32_grow (&extrapool, runp->wcseqorder);
2500	}
2501
2502	runp = runp->next;
2503	}
2504
2505	/ Prepare to write out this data. /
2506	add_locale_uint32 (&file, elem_size);
2507	add_locale_uint32_array (&file, elem_table, `2` * elem_size);
2508	add_locale_raw_obstack (&file, &extrapool);
2509	add_locale_raw_data (&file, collate->mbseqorder, `256`);
2510	add_locale_collseq_table (&file, &collate->wcseqorder);
2511	add_locale_string (&file, charmap->code_set_name);
2512	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2513
2514	obstack_free (&weightpool, NULL);
2515	obstack_free (&extrapool, NULL);
2516	obstack_free (&indirectpool, NULL);
2517	}
2518
2519
2520	static enum token_t
2521	skip_to (struct linereader ldfile, struct* locale_collate_t *collate,
2522	const struct charmap_t charmap, int* to_endif)
2523	{
2524	while (`1`)
2525	{
2526	struct token *now = lr_token (ldfile, charmap, NULL, NULL, `0`);
2527	enum token_t nowtok = now->tok;
2528
2529	if (nowtok == tok_eof \|\| nowtok == tok_end)
2530	return nowtok;
2531
2532	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
2533	{
2534	lr_error (ldfile, _("%s: nested conditionals not supported"),
2535	"LC_COLLATE");
2536	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2537	if (nowtok == tok_eof \|\| nowtok == tok_end)
2538	return nowtok;
2539	}
2540	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
2541	{
2542	lr_ignore_rest (ldfile, `1`);
2543	return nowtok;
2544	}
2545	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
2546	{
2547	/ Do not read the rest of the line. /
2548	return nowtok;
2549	}
2550	else if (nowtok == tok_else)
2551	{
2552	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2553	}
2554
2555	lr_ignore_rest (ldfile, `0`);
2556	}
2557	}
2558
2559
2560	void
2561	collate_read (struct linereader ldfile, struct* localedef_t *result,
2562	const struct charmap_t charmap, const* char *repertoire_name,
2563	int ignore_content)
2564	{
2565	struct repertoire_t *repertoire = NULL;
2566	struct locale_collate_t *collate;
2567	struct token *now;
2568	struct token *arg = NULL;
2569	enum token_t nowtok;
2570	enum token_t was_ellipsis = tok_none;
2571	struct localedef_t *copy_locale = NULL;
2572	/ Parsing state:*
2573	0 - start
2574	1 - between `order-start' and `order-end'
2575	2 - after `order-end'
2576	3 - after `reorder-after', waiting for `reorder-end'
2577	4 - after `reorder-end'
2578	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2579	6 - after `reorder-sections-end'
2580	*/
2581	int state = `0`;
2582
2583	/ Get the repertoire we have to use. /
2584	if (repertoire_name != NULL)
2585	repertoire = repertoire_read (repertoire_name);
2586
2587	/ The rest of the line containing `LC_COLLATE' must be free. /
2588	lr_ignore_rest (ldfile, `1`);
2589
2590	while (`1`)
2591	{
2592	do
2593	{
2594	now = lr_token (ldfile, charmap, result, NULL, verbose);
2595	nowtok = now->tok;
2596	}
2597	while (nowtok == tok_eol);
2598
2599	if (nowtok != tok_define)
2600	break;
2601
2602	if (ignore_content)
2603	lr_ignore_rest (ldfile, `0`);
2604	else
2605	{
2606	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2607	if (arg->tok != tok_ident)
2608	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2609	else
2610	{
2611	/ Simply add the new symbol. /
2612	struct name_list newsym = xmalloc (sizeof* (*newsym)
2613	+ arg->val.str.lenmb + `1`);
2614	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2615	newsym->str[arg->val.str.lenmb] = `'\0'`;
2616	newsym->next = defined;
2617	defined = newsym;
2618
2619	lr_ignore_rest (ldfile, `1`);
2620	}
2621	}
2622	}
2623
2624	if (nowtok == tok_copy)
2625	{
2626	now = lr_token (ldfile, charmap, result, NULL, verbose);
2627	if (now->tok != tok_string)
2628	{
2629	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2630
2631	skip_category:
2632	do
2633	now = lr_token (ldfile, charmap, result, NULL, verbose);
2634	while (now->tok != tok_eof && now->tok != tok_end);
2635
2636	if (now->tok != tok_eof
2637	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
2638	now->tok == tok_eof))
2639	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2640	else if (now->tok != tok_lc_collate)
2641	{
2642	lr_error (ldfile, _("\
2643	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2644	lr_ignore_rest (ldfile, `0`);
2645	}
2646	else
2647	lr_ignore_rest (ldfile, `1`);
2648
2649	return;
2650	}
2651
2652	if (! ignore_content)
2653	{
2654	/ Get the locale definition. /
2655	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2656	repertoire_name, charmap, NULL);
2657	if ((copy_locale->avail & COLLATE_LOCALE) == `0`)
2658	{
2659	/ Not yet loaded. So do it now. /
2660	if (locfile_read (copy_locale, charmap) != `0`)
2661	goto skip_category;
2662	}
2663
2664	if (copy_locale->categories[LC_COLLATE].collate == NULL)
2665	return;
2666	}
2667
2668	lr_ignore_rest (ldfile, `1`);
2669
2670	now = lr_token (ldfile, charmap, result, NULL, verbose);
2671	nowtok = now->tok;
2672	}
2673
2674	/ Prepare the data structures. /
2675	collate_startup (ldfile, result, copy_locale, ignore_content);
2676	collate = result->categories[LC_COLLATE].collate;
2677
2678	while (`1`)
2679	{
2680	char ucs4buf[`10`];
2681	char *symstr;
2682	size_t symlen;
2683
2684	/ Of course we don't proceed beyond the end of file. /
2685	if (nowtok == tok_eof)
2686	break;
2687
2688	/ Ingore empty lines. /
2689	if (nowtok == tok_eol)
2690	{
2691	now = lr_token (ldfile, charmap, result, NULL, verbose);
2692	nowtok = now->tok;
2693	continue;
2694	}
2695
2696	switch (nowtok)
2697	{
2698	case tok_codepoint_collation:
2699	collate->codepoint_collation = true;
2700	break;
2701
2702	case tok_copy:
2703	/ Allow copying other locales. /
2704	now = lr_token (ldfile, charmap, result, NULL, verbose);
2705	if (now->tok != tok_string)
2706	goto err_label;
2707
2708	if (! ignore_content)
2709	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2710	charmap, result);
2711
2712	lr_ignore_rest (ldfile, `1`);
2713	break;
2714
2715	case tok_coll_weight_max:
2716	/ Ignore the rest of the line if we don't need the input of*
2717	this line. /*
2718	if (ignore_content)
2719	{
2720	lr_ignore_rest (ldfile, `0`);
2721	break;
2722	}
2723
2724	if (state != `0`)
2725	goto err_label;
2726
2727	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2728	if (arg->tok != tok_number)
2729	goto err_label;
2730	if (collate->col_weight_max != -`1`)
2731	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2732	"LC_COLLATE", "col_weight_max");
2733	else
2734	collate->col_weight_max = arg->val.num;
2735	lr_ignore_rest (ldfile, `1`);
2736	break;
2737
2738	case tok_section_symbol:
2739	/ Ignore the rest of the line if we don't need the input of*
2740	this line. /*
2741	if (ignore_content)
2742	{
2743	lr_ignore_rest (ldfile, `0`);
2744	break;
2745	}
2746
2747	if (state != `0`)
2748	goto err_label;
2749
2750	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2751	if (arg->tok != tok_bsymbol)
2752	goto err_label;
2753	else if (!ignore_content)
2754	{
2755	/ Check whether this section is already known. /
2756	struct section_list *known = collate->sections;
2757	while (known != NULL)
2758	{
2759	if (strcmp (known->name, arg->val.str.startmb) == `0`)
2760	break;
2761	known = known->next;
2762	}
2763
2764	if (known != NULL)
2765	{
2766	lr_error (ldfile,
2767	_("%s: duplicate declaration of section `%s'"),
2768	"LC_COLLATE", arg->val.str.startmb);
2769	free (arg->val.str.startmb);
2770	}
2771	else
2772	collate->sections = make_seclist_elem (collate,
2773	arg->val.str.startmb,
2774	collate->sections);
2775
2776	lr_ignore_rest (ldfile, known == NULL);
2777	}
2778	else
2779	{
2780	free (arg->val.str.startmb);
2781	lr_ignore_rest (ldfile, `0`);
2782	}
2783	break;
2784
2785	case tok_collating_element:
2786	/ Ignore the rest of the line if we don't need the input of*
2787	this line. /*
2788	if (ignore_content)
2789	{
2790	lr_ignore_rest (ldfile, `0`);
2791	break;
2792	}
2793
2794	if (state != `0` && state != `2`)
2795	goto err_label;
2796
2797	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2798	if (arg->tok != tok_bsymbol)
2799	goto err_label;
2800	else
2801	{
2802	const char *symbol = arg->val.str.startmb;
2803	size_t symbol_len = arg->val.str.lenmb;
2804
2805	/ Next the `from' keyword. /
2806	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2807	if (arg->tok != tok_from)
2808	{
2809	free ((char *) symbol);
2810	goto err_label;
2811	}
2812
2813	ldfile->return_widestr = `1`;
2814	ldfile->translate_strings = `1`;
2815
2816	/ Finally the string with the replacement. /
2817	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2818
2819	ldfile->return_widestr = `0`;
2820	ldfile->translate_strings = `0`;
2821
2822	if (arg->tok != tok_string)
2823	goto err_label;
2824
2825	if (!ignore_content && symbol != NULL)
2826	{
2827	/ The name is already defined. /
2828	if (check_duplicate (ldfile, collate, charmap,
2829	repertoire, symbol, symbol_len))
2830	goto col_elem_free;
2831
2832	if (arg->val.str.startmb != NULL)
2833	insert_entry (&collate->elem_table, symbol, symbol_len,
2834	new_element (collate,
2835	arg->val.str.startmb,
2836	arg->val.str.lenmb - `1`,
2837	arg->val.str.startwc,
2838	symbol, symbol_len, `0`));
2839	}
2840	else
2841	{
2842	col_elem_free:
2843	free ((char *) symbol);
2844	free (arg->val.str.startmb);
2845	free (arg->val.str.startwc);
2846	}
2847	lr_ignore_rest (ldfile, `1`);
2848	}
2849	break;
2850
2851	case tok_collating_symbol:
2852	/ Ignore the rest of the line if we don't need the input of*
2853	this line. /*
2854	if (ignore_content)
2855	{
2856	lr_ignore_rest (ldfile, `0`);
2857	break;
2858	}
2859
2860	if (state != `0` && state != `2`)
2861	goto err_label;
2862
2863	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2864	if (arg->tok != tok_bsymbol)
2865	goto err_label;
2866	else
2867	{
2868	char *symbol = arg->val.str.startmb;
2869	size_t symbol_len = arg->val.str.lenmb;
2870	char *endsymbol = NULL;
2871	size_t endsymbol_len = `0`;
2872	enum token_t ellipsis = tok_none;
2873
2874	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2875	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
2876	{
2877	ellipsis = arg->tok;
2878
2879	arg = lr_token (ldfile, charmap, result, repertoire,
2880	verbose);
2881	if (arg->tok != tok_bsymbol)
2882	{
2883	free (symbol);
2884	goto err_label;
2885	}
2886
2887	endsymbol = arg->val.str.startmb;
2888	endsymbol_len = arg->val.str.lenmb;
2889
2890	lr_ignore_rest (ldfile, `1`);
2891	}
2892	else if (arg->tok != tok_eol)
2893	{
2894	free (symbol);
2895	goto err_label;
2896	}
2897
2898	if (!ignore_content)
2899	{
2900	if (symbol == NULL
2901	\|\| (ellipsis != tok_none && endsymbol == NULL))
2902	{
2903	lr_error (ldfile, _("\
2904	%s: unknown character in collating symbol name"),
2905	"LC_COLLATE");
2906	goto col_sym_free;
2907	}
2908	else if (ellipsis == tok_none)
2909	{
2910	/ A single symbol, no ellipsis. /
2911	if (check_duplicate (ldfile, collate, charmap,
2912	repertoire, symbol, symbol_len))
2913	/ The name is already defined. /
2914	goto col_sym_free;
2915
2916	insert_entry (&collate->sym_table, symbol, symbol_len,
2917	new_symbol (collate, symbol, symbol_len));
2918	}
2919	else if (symbol_len != endsymbol_len)
2920	{
2921	col_sym_inv_range:
2922	lr_error (ldfile,
2923	_("invalid names for character range"));
2924	goto col_sym_free;
2925	}
2926	else
2927	{
2928	/ Oh my, we have to handle an ellipsis. First, as*
2929	usual, determine the common prefix and then
2930	convert the rest into a range. /*
2931	size_t prefixlen;
2932	unsigned long int from;
2933	unsigned long int to;
2934	char *endp;
2935
2936	for (prefixlen = `0`; prefixlen < symbol_len; ++prefixlen)
2937	if (symbol[prefixlen] != endsymbol[prefixlen])
2938	break;
2939
2940	/ Convert the rest into numbers. /
2941	symbol[symbol_len] = `'\0'`;
2942	from = strtoul (&symbol[prefixlen], &endp,
2943	ellipsis == tok_ellipsis2 ? `16` : `10`);
2944	if (*endp != `'\0'`)
2945	goto col_sym_inv_range;
2946
2947	endsymbol[symbol_len] = `'\0'`;
2948	to = strtoul (&endsymbol[prefixlen], &endp,
2949	ellipsis == tok_ellipsis2 ? `16` : `10`);
2950	if (*endp != `'\0'`)
2951	goto col_sym_inv_range;
2952
2953	if (from > to)
2954	goto col_sym_inv_range;
2955
2956	/ Now loop over all entries. /
2957	while (from <= to)
2958	{
2959	char *symbuf;
2960
2961	symbuf = (char *) obstack_alloc (&collate->mempool,
2962	symbol_len + `1`);
2963
2964	/ Create the name. /
2965	sprintf (symbuf,
2966	ellipsis == tok_ellipsis2
2967	? "%.s%.lX" : "%.s%.lu",
2968	(int) prefixlen, symbol,
2969	(int) (symbol_len - prefixlen), from);
2970
2971	if (check_duplicate (ldfile, collate, charmap,
2972	repertoire, symbuf, symbol_len))
2973	/ The name is already defined. /
2974	goto col_sym_free;
2975
2976	insert_entry (&collate->sym_table, symbuf,
2977	symbol_len,
2978	new_symbol (collate, symbuf,
2979	symbol_len));
2980
2981	/ Increment the counter. /
2982	++from;
2983	}
2984
2985	goto col_sym_free;
2986	}
2987	}
2988	else
2989	{
2990	col_sym_free:
2991	free (symbol);
2992	free (endsymbol);
2993	}
2994	}
2995	break;
2996
2997	case tok_symbol_equivalence:
2998	/ Ignore the rest of the line if we don't need the input of*
2999	this line. /*
3000	if (ignore_content)
3001	{
3002	lr_ignore_rest (ldfile, `0`);
3003	break;
3004	}
3005
3006	if (state != `0`)
3007	goto err_label;
3008
3009	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3010	if (arg->tok != tok_bsymbol)
3011	goto err_label;
3012	else
3013	{
3014	const char *newname = arg->val.str.startmb;
3015	size_t newname_len = arg->val.str.lenmb;
3016	const char *symname;
3017	size_t symname_len;
3018	void symval; /* Actually struct symbol_t* /
3019
3020	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3021	if (arg->tok != tok_bsymbol)
3022	{
3023	free ((char *) newname);
3024	goto err_label;
3025	}
3026
3027	symname = arg->val.str.startmb;
3028	symname_len = arg->val.str.lenmb;
3029
3030	if (newname == NULL)
3031	{
3032	lr_error (ldfile, _("\
3033	%s: unknown character in equivalent definition name"),
3034	"LC_COLLATE");
3035
3036	sym_equiv_free:
3037	free ((char *) newname);
3038	free ((char *) symname);
3039	break;
3040	}
3041	if (symname == NULL)
3042	{
3043	lr_error (ldfile, _("\
3044	%s: unknown character in equivalent definition value"),
3045	"LC_COLLATE");
3046	goto sym_equiv_free;
3047	}
3048
3049	/ See whether the symbol name is already defined. /
3050	if (find_entry (&collate->sym_table, symname, symname_len,
3051	&symval) != `0`)
3052	{
3053	lr_error (ldfile, _("\
3054	%s: unknown symbol `%s' in equivalent definition"),
3055	"LC_COLLATE", symname);
3056	goto sym_equiv_free;
3057	}
3058
3059	if (insert_entry (&collate->sym_table,
3060	newname, newname_len, symval) < `0`)
3061	{
3062	lr_error (ldfile, _("\
3063	error while adding equivalent collating symbol"));
3064	goto sym_equiv_free;
3065	}
3066
3067	free ((char *) symname);
3068	}
3069	lr_ignore_rest (ldfile, `1`);
3070	break;
3071
3072	case tok_script:
3073	/ Ignore the rest of the line if we don't need the input of*
3074	this line. /*
3075	if (ignore_content)
3076	{
3077	lr_ignore_rest (ldfile, `0`);
3078	break;
3079	}
3080
3081	/ We get told about the scripts we know. /
3082	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3083	if (arg->tok != tok_bsymbol)
3084	goto err_label;
3085	else
3086	{
3087	struct section_list *runp = collate->known_sections;
3088	char *name;
3089
3090	while (runp != NULL)
3091	if (strncmp (runp->name, arg->val.str.startmb,
3092	arg->val.str.lenmb) == `0`
3093	&& runp->name[arg->val.str.lenmb] == `'\0'`)
3094	break;
3095	else
3096	runp = runp->def_next;
3097
3098	if (runp != NULL)
3099	{
3100	lr_error (ldfile, _("duplicate definition of script `%s'"),
3101	runp->name);
3102	lr_ignore_rest (ldfile, `0`);
3103	break;
3104	}
3105
3106	runp = (struct section_list ) xcalloc (`1`, sizeof* (*runp));
3107	name = (char *) xmalloc (arg->val.str.lenmb + `1`);
3108	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3109	name[arg->val.str.lenmb] = `'\0'`;
3110	runp->name = name;
3111
3112	runp->def_next = collate->known_sections;
3113	collate->known_sections = runp;
3114	}
3115	lr_ignore_rest (ldfile, `1`);
3116	break;
3117
3118	case tok_order_start:
3119	/ Ignore the rest of the line if we don't need the input of*
3120	this line. /*
3121	if (ignore_content)
3122	{
3123	lr_ignore_rest (ldfile, `0`);
3124	break;
3125	}
3126
3127	if (state != `0` && state != `1` && state != `2`)
3128	goto err_label;
3129	state = `1`;
3130
3131	/ The 14652 draft does not specify whether all `order_start' lines*
3132	must contain the same number of sort-rules, but 14651 does. So
3133	we require this here as well. /*
3134	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3135	if (arg->tok == tok_bsymbol)
3136	{
3137	/ This better should be a section name. /
3138	struct section_list *sp = collate->known_sections;
3139	while (sp != NULL
3140	&& (sp->name == NULL
3141	\|\| strncmp (sp->name, arg->val.str.startmb,
3142	arg->val.str.lenmb) != `0`
3143	\|\| sp->name[arg->val.str.lenmb] != `'\0'`))
3144	sp = sp->def_next;
3145
3146	if (sp == NULL)
3147	{
3148	lr_error (ldfile, _("\
3149	%s: unknown section name `%.*s'"),
3150	"LC_COLLATE", (int) arg->val.str.lenmb,
3151	arg->val.str.startmb);
3152	/ We use the error section. /
3153	collate->current_section = &collate->error_section;
3154
3155	if (collate->error_section.first == NULL)
3156	{
3157	/ Insert &collate->error_section at the end of*
3158	the collate->sections list. /*
3159	if (collate->sections == NULL)
3160	collate->sections = &collate->error_section;
3161	else
3162	{
3163	sp = collate->sections;
3164	while (sp->next != NULL)
3165	sp = sp->next;
3166
3167	sp->next = &collate->error_section;
3168	}
3169	collate->error_section.next = NULL;
3170	}
3171	}
3172	else
3173	{
3174	/ One should not be allowed to open the same*
3175	section twice. /*
3176	if (sp->first != NULL)
3177	lr_error (ldfile, _("\
3178	%s: multiple order definitions for section `%s'"),
3179	"LC_COLLATE", sp->name);
3180	else
3181	{
3182	/ Insert sp in the collate->sections list,*
3183	right after collate->current_section. /*
3184	if (collate->current_section != NULL)
3185	{
3186	sp->next = collate->current_section->next;
3187	collate->current_section->next = sp;
3188	}
3189	else if (collate->sections == NULL)
3190	/ This is the first section to be defined. /
3191	collate->sections = sp;
3192
3193	collate->current_section = sp;
3194	}
3195
3196	/ Next should come the end of the line or a semicolon. /
3197	arg = lr_token (ldfile, charmap, result, repertoire,
3198	verbose);
3199	if (arg->tok == tok_eol)
3200	{
3201	uint32_t cnt;
3202
3203	/ This means we have exactly one rule: `forward'. /
3204	if (nrules > `1`)
3205	lr_error (ldfile, _("\
3206	%s: invalid number of sorting rules"),
3207	"LC_COLLATE");
3208	else
3209	nrules = `1`;
3210	sp->rules = obstack_alloc (&collate->mempool,
3211	(sizeof (enum coll_sort_rule)
3212	* nrules));
3213	for (cnt = `0`; cnt < nrules; ++cnt)
3214	sp->rules[cnt] = sort_forward;
3215
3216	/ Next line. /
3217	break;
3218	}
3219
3220	/ Get the next token. /
3221	arg = lr_token (ldfile, charmap, result, repertoire,
3222	verbose);
3223	}
3224	}
3225	else
3226	{
3227	/ There is no section symbol. Therefore we use the unnamed*
3228	section. /*
3229	collate->current_section = &collate->unnamed_section;
3230
3231	if (collate->unnamed_section_defined)
3232	lr_error (ldfile, _("\
3233	%s: multiple order definitions for unnamed section"),
3234	"LC_COLLATE");
3235	else
3236	{
3237	/ Insert &collate->unnamed_section at the beginning of*
3238	the collate->sections list. /*
3239	collate->unnamed_section.next = collate->sections;
3240	collate->sections = &collate->unnamed_section;
3241	collate->unnamed_section_defined = true;
3242	}
3243	}
3244
3245	/ Now read the direction names. /
3246	read_directions (ldfile, arg, charmap, repertoire, result);
3247
3248	/ From now we need the strings untranslated. /
3249	ldfile->translate_strings = `0`;
3250	break;
3251
3252	case tok_order_end:
3253	/ Ignore the rest of the line if we don't need the input of*
3254	this line. /*
3255	if (ignore_content)
3256	{
3257	lr_ignore_rest (ldfile, `0`);
3258	break;
3259	}
3260
3261	if (state != `1`)
3262	goto err_label;
3263
3264	/ Handle ellipsis at end of list. /
3265	if (was_ellipsis != tok_none)
3266	{
3267	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3268	repertoire, result);
3269	was_ellipsis = tok_none;
3270	}
3271
3272	state = `2`;
3273	lr_ignore_rest (ldfile, `1`);
3274	break;
3275
3276	case tok_reorder_after:
3277	/ Ignore the rest of the line if we don't need the input of*
3278	this line. /*
3279	if (ignore_content)
3280	{
3281	lr_ignore_rest (ldfile, `0`);
3282	break;
3283	}
3284
3285	if (state == `1`)
3286	{
3287	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3288	"LC_COLLATE");
3289	state = `2`;
3290
3291	/ Handle ellipsis at end of list. /
3292	if (was_ellipsis != tok_none)
3293	{
3294	handle_ellipsis (ldfile, arg->val.str.startmb,
3295	arg->val.str.lenmb, was_ellipsis, charmap,
3296	repertoire, result);
3297	was_ellipsis = tok_none;
3298	}
3299	}
3300	else if (state == `0` && copy_locale == NULL)
3301	goto err_label;
3302	else if (state != `0` && state != `2` && state != `3`)
3303	goto err_label;
3304	state = `3`;
3305
3306	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3307	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
3308	{
3309	/ Find this symbol in the sequence table. /
3310	char ucsbuf[`10`];
3311	char *startmb;
3312	size_t lenmb;
3313	struct element_t *insp;
3314	int no_error = `1`;
3315	void *ptr;
3316
3317	if (arg->tok == tok_bsymbol)
3318	{
3319	startmb = arg->val.str.startmb;
3320	lenmb = arg->val.str.lenmb;
3321	}
3322	else
3323	{
3324	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3325	startmb = ucsbuf;
3326	lenmb = `9`;
3327	}
3328
3329	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == `0`)
3330	/ Yes, the symbol exists. Simply point the cursor*
3331	to it. /*
3332	collate->cursor = (struct element_t *) ptr;
3333	else
3334	{
3335	struct symbol_t *symbp;
3336	void *ptr;
3337
3338	if (find_entry (&collate->sym_table, startmb, lenmb,
3339	&ptr) == `0`)
3340	{
3341	symbp = ptr;
3342
3343	if (symbp->order->last != NULL
3344	\|\| symbp->order->next != NULL)
3345	collate->cursor = symbp->order;
3346	else
3347	{
3348	/ This is a collating symbol but its position*
3349	is not yet defined. /*
3350	lr_error (ldfile, _("\
3351	%s: order for collating symbol %.*s not yet defined"),
3352	"LC_COLLATE", (int) lenmb, startmb);
3353	collate->cursor = NULL;
3354	no_error = `0`;
3355	}
3356	}
3357	else if (find_entry (&collate->elem_table, startmb, lenmb,
3358	&ptr) == `0`)
3359	{
3360	insp = (struct element_t *) ptr;
3361
3362	if (insp->last != NULL \|\| insp->next != NULL)
3363	collate->cursor = insp;
3364	else
3365	{
3366	/ This is a collating element but its position*
3367	is not yet defined. /*
3368	lr_error (ldfile, _("\
3369	%s: order for collating element %.*s not yet defined"),
3370	"LC_COLLATE", (int) lenmb, startmb);
3371	collate->cursor = NULL;
3372	no_error = `0`;
3373	}
3374	}
3375	else
3376	{
3377	/ This is bad. The symbol after which we have to*
3378	insert does not exist. /*
3379	lr_error (ldfile, _("\
3380	%s: cannot reorder after %.*s: symbol not known"),
3381	"LC_COLLATE", (int) lenmb, startmb);
3382	collate->cursor = NULL;
3383	no_error = `0`;
3384	}
3385	}
3386
3387	lr_ignore_rest (ldfile, no_error);
3388	}
3389	else
3390	/ This must not happen. /
3391	goto err_label;
3392	break;
3393
3394	case tok_reorder_end:
3395	/ Ignore the rest of the line if we don't need the input of*
3396	this line. /*
3397	if (ignore_content)
3398	break;
3399
3400	if (state != `3`)
3401	goto err_label;
3402	state = `4`;
3403	lr_ignore_rest (ldfile, `1`);
3404	break;
3405
3406	case tok_reorder_sections_after:
3407	/ Ignore the rest of the line if we don't need the input of*
3408	this line. /*
3409	if (ignore_content)
3410	{
3411	lr_ignore_rest (ldfile, `0`);
3412	break;
3413	}
3414
3415	if (state == `1`)
3416	{
3417	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3418	"LC_COLLATE");
3419	state = `2`;
3420
3421	/ Handle ellipsis at end of list. /
3422	if (was_ellipsis != tok_none)
3423	{
3424	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3425	repertoire, result);
3426	was_ellipsis = tok_none;
3427	}
3428	}
3429	else if (state == `3`)
3430	{
3431	record_error (`0`, `0`, _("\
3432	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3433	state = `4`;
3434	}
3435	else if (state != `2` && state != `4`)
3436	goto err_label;
3437	state = `5`;
3438
3439	/ Get the name of the sections we are adding after. /
3440	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3441	if (arg->tok == tok_bsymbol)
3442	{
3443	/ Now find a section with this name. /
3444	struct section_list *runp = collate->sections;
3445
3446	while (runp != NULL)
3447	{
3448	if (runp->name != NULL
3449	&& strlen (runp->name) == arg->val.str.lenmb
3450	&& memcmp (runp->name, arg->val.str.startmb,
3451	arg->val.str.lenmb) == `0`)
3452	break;
3453
3454	runp = runp->next;
3455	}
3456
3457	if (runp != NULL)
3458	collate->current_section = runp;
3459	else
3460	{
3461	/ This is bad. The section after which we have to*
3462	reorder does not exist. Therefore we cannot
3463	process the whole rest of this reorder
3464	specification. /*
3465	lr_error (ldfile, _("%s: section `%.*s' not known"),
3466	"LC_COLLATE", (int) arg->val.str.lenmb,
3467	arg->val.str.startmb);
3468
3469	do
3470	{
3471	lr_ignore_rest (ldfile, `0`);
3472
3473	now = lr_token (ldfile, charmap, result, NULL, verbose);
3474	}
3475	while (now->tok == tok_reorder_sections_after
3476	\|\| now->tok == tok_reorder_sections_end
3477	\|\| now->tok == tok_end);
3478
3479	/ Process the token we just saw. /
3480	nowtok = now->tok;
3481	continue;
3482	}
3483	}
3484	else
3485	/ This must not happen. /
3486	goto err_label;
3487	break;
3488
3489	case tok_reorder_sections_end:
3490	/ Ignore the rest of the line if we don't need the input of*
3491	this line. /*
3492	if (ignore_content)
3493	break;
3494
3495	if (state != `5`)
3496	goto err_label;
3497	state = `6`;
3498	lr_ignore_rest (ldfile, `1`);
3499	break;
3500
3501	case tok_bsymbol:
3502	case tok_ucs4:
3503	/ Ignore the rest of the line if we don't need the input of*
3504	this line. /*
3505	if (ignore_content)
3506	{
3507	lr_ignore_rest (ldfile, `0`);
3508	break;
3509	}
3510
3511	if (state != `0` && state != `1` && state != `3` && state != `5`)
3512	goto err_label;
3513
3514	if ((state == `0` \|\| state == `5`) && nowtok == tok_ucs4)
3515	goto err_label;
3516
3517	if (nowtok == tok_ucs4)
3518	{
3519	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3520	symstr = ucs4buf;
3521	symlen = `9`;
3522	}
3523	else if (arg != NULL)
3524	{
3525	symstr = arg->val.str.startmb;
3526	symlen = arg->val.str.lenmb;
3527	}
3528	else
3529	{
3530	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3531	(int) ldfile->token.val.str.lenmb,
3532	ldfile->token.val.str.startmb);
3533	break;
3534	}
3535
3536	struct element_t *seqp;
3537	if (state == `0`)
3538	{
3539	/ We are outside an `order_start' region. This means*
3540	we must only accept definitions of values for
3541	collation symbols since these are purely abstract
3542	values and don't need directions associated. /*
3543	void *ptr;
3544
3545	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3546	{
3547	seqp = ptr;
3548
3549	/ It's already defined. First check whether this*
3550	is really a collating symbol. /*
3551	if (seqp->is_character)
3552	goto err_label;
3553
3554	goto move_entry;
3555	}
3556	else
3557	{
3558	void *result;
3559
3560	if (find_entry (&collate->sym_table, symstr, symlen,
3561	&result) != `0`)
3562	/ No collating symbol, it's an error. /
3563	goto err_label;
3564
3565	/ Maybe this is the first time we define a symbol*
3566	value and it is before the first actual section. /*
3567	if (collate->sections == NULL)
3568	collate->sections = collate->current_section =
3569	&collate->symbol_section;
3570	}
3571
3572	if (was_ellipsis != tok_none)
3573	{
3574	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3575	charmap, repertoire, result);
3576
3577	/ Remember that we processed the ellipsis. /
3578	was_ellipsis = tok_none;
3579
3580	/ And don't add the value a second time. /
3581	break;
3582	}
3583	}
3584	else if (state == `3`)
3585	{
3586	/ It is possible that we already have this collation sequence.*
3587	In this case we move the entry. /*
3588	void *sym;
3589	void *ptr;
3590
3591	/ If the symbol after which we have to insert was not found*
3592	ignore all entries. /*
3593	if (collate->cursor == NULL)
3594	{
3595	lr_ignore_rest (ldfile, `0`);
3596	break;
3597	}
3598
3599	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3600	{
3601	seqp = (struct element_t *) ptr;
3602	goto move_entry;
3603	}
3604
3605	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == `0`
3606	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
3607	goto move_entry;
3608
3609	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == `0`
3610	&& (seqp = (struct element_t *) ptr,
3611	seqp->last != NULL \|\| seqp->next != NULL
3612	\|\| (collate->start != NULL && seqp == collate->start)))
3613	{
3614	move_entry:
3615	/ Remove the entry from the old position. /
3616	if (seqp->last == NULL)
3617	collate->start = seqp->next;
3618	else
3619	seqp->last->next = seqp->next;
3620	if (seqp->next != NULL)
3621	seqp->next->last = seqp->last;
3622
3623	/ We also have to check whether this entry is the*
3624	first or last of a section. /*
3625	if (seqp->section->first == seqp)
3626	{
3627	if (seqp->section->first == seqp->section->last)
3628	/ This section has no content anymore. /
3629	seqp->section->first = seqp->section->last = NULL;
3630	else
3631	seqp->section->first = seqp->next;
3632	}
3633	else if (seqp->section->last == seqp)
3634	seqp->section->last = seqp->last;
3635
3636	/ Now insert it in the new place. /
3637	insert_weights (ldfile, seqp, charmap, repertoire, result,
3638	tok_none);
3639	break;
3640	}
3641
3642	/ Otherwise we just add a new entry. /
3643	}
3644	else if (state == `5`)
3645	{
3646	/ We are reordering sections. Find the named section. /
3647	struct section_list *runp = collate->sections;
3648	struct section_list *prevp = NULL;
3649
3650	while (runp != NULL)
3651	{
3652	if (runp->name != NULL
3653	&& strlen (runp->name) == symlen
3654	&& memcmp (runp->name, symstr, symlen) == `0`)
3655	break;
3656
3657	prevp = runp;
3658	runp = runp->next;
3659	}
3660
3661	if (runp == NULL)
3662	{
3663	lr_error (ldfile, _("%s: section `%.*s' not known"),
3664	"LC_COLLATE", (int) symlen, symstr);
3665	lr_ignore_rest (ldfile, `0`);
3666	}
3667	else
3668	{
3669	if (runp != collate->current_section)
3670	{
3671	/ Remove the named section from the old place and*
3672	insert it in the new one. /*
3673	prevp->next = runp->next;
3674
3675	runp->next = collate->current_section->next;
3676	collate->current_section->next = runp;
3677	collate->current_section = runp;
3678	}
3679
3680	/ Process the rest of the line which might change*
3681	the collation rules. /*
3682	arg = lr_token (ldfile, charmap, result, repertoire,
3683	verbose);
3684	if (arg->tok != tok_eof && arg->tok != tok_eol)
3685	read_directions (ldfile, arg, charmap, repertoire,
3686	result);
3687	}
3688	break;
3689	}
3690	else if (was_ellipsis != tok_none)
3691	{
3692	/ Using the information in the `ellipsis_weight'*
3693	element and this and the last value we have to handle
3694	the ellipsis now. /*
3695	assert (state == `1`);
3696
3697	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3698	repertoire, result);
3699
3700	/ Remember that we processed the ellipsis. /
3701	was_ellipsis = tok_none;
3702
3703	/ And don't add the value a second time. /
3704	break;
3705	}
3706
3707	/ Now insert in the new place. /
3708	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3709	break;
3710
3711	case tok_undefined:
3712	/ Ignore the rest of the line if we don't need the input of*
3713	this line. /*
3714	if (ignore_content)
3715	{
3716	lr_ignore_rest (ldfile, `0`);
3717	break;
3718	}
3719
3720	if (state != `1`)
3721	goto err_label;
3722
3723	if (was_ellipsis != tok_none)
3724	{
3725	lr_error (ldfile,
3726	_("%s: cannot have `%s' as end of ellipsis range"),
3727	"LC_COLLATE", "UNDEFINED");
3728
3729	unlink_element (collate);
3730	was_ellipsis = tok_none;
3731	}
3732
3733	/ See whether UNDEFINED already appeared somewhere. /
3734	if (collate->undefined.next != NULL
3735	\|\| &collate->undefined == collate->cursor)
3736	{
3737	lr_error (ldfile,
3738	_("%s: order for `%.*s' already defined at %s:%Zu"),
3739	"LC_COLLATE", `9`, "UNDEFINED",
3740	collate->undefined.file,
3741	collate->undefined.line);
3742	lr_ignore_rest (ldfile, `0`);
3743	}
3744	else
3745	/ Parse the weights. /
3746	insert_weights (ldfile, &collate->undefined, charmap,
3747	repertoire, result, tok_none);
3748	break;
3749
3750	case tok_ellipsis2: / symbolic hexadecimal ellipsis /
3751	case tok_ellipsis3: / absolute ellipsis /
3752	case tok_ellipsis4: / symbolic decimal ellipsis /
3753	/ This is the symbolic (decimal or hexadecimal) or absolute*
3754	ellipsis. /*
3755	if (was_ellipsis != tok_none)
3756	goto err_label;
3757
3758	if (state != `0` && state != `1` && state != `3`)
3759	goto err_label;
3760
3761	was_ellipsis = nowtok;
3762
3763	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3764	repertoire, result, nowtok);
3765	break;
3766
3767	case tok_end:
3768	seen_end:
3769	/ Next we assume `LC_COLLATE'. /
3770	if (!ignore_content)
3771	{
3772	if (state == `0`
3773	&& copy_locale == NULL
3774	&& !collate->codepoint_collation)
3775	/ We must either see a copy statement or have*
3776	ordering values, or codepoint_collation. /*
3777	lr_error (ldfile,
3778	_("%s: empty category description not allowed"),
3779	"LC_COLLATE");
3780	else if (state == `1`)
3781	{
3782	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3783	"LC_COLLATE");
3784
3785	/ Handle ellipsis at end of list. /
3786	if (was_ellipsis != tok_none)
3787	{
3788	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3789	repertoire, result);
3790	was_ellipsis = tok_none;
3791	}
3792	}
3793	else if (state == `3`)
3794	record_error (`0`, `0`, _("\
3795	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3796	else if (state == `5`)
3797	record_error (`0`, `0`, _("\
3798	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3799	}
3800	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3801	if (arg->tok == tok_eof)
3802	break;
3803	if (arg->tok == tok_eol)
3804	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3805	else if (arg->tok != tok_lc_collate)
3806	lr_error (ldfile, _("\
3807	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3808	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3809	return;
3810
3811	case tok_define:
3812	if (ignore_content)
3813	{
3814	lr_ignore_rest (ldfile, `0`);
3815	break;
3816	}
3817
3818	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3819	if (arg->tok != tok_ident)
3820	goto err_label;
3821
3822	/ Simply add the new symbol. /
3823	struct name_list newsym = xmalloc (sizeof* (*newsym)
3824	+ arg->val.str.lenmb + `1`);
3825	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3826	newsym->str[arg->val.str.lenmb] = `'\0'`;
3827	newsym->next = defined;
3828	defined = newsym;
3829
3830	lr_ignore_rest (ldfile, `1`);
3831	break;
3832
3833	case tok_undef:
3834	if (ignore_content)
3835	{
3836	lr_ignore_rest (ldfile, `0`);
3837	break;
3838	}
3839
3840	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3841	if (arg->tok != tok_ident)
3842	goto err_label;
3843
3844	/ Remove _all_ occurrences of the symbol from the list. /
3845	struct name_list *prevdef = NULL;
3846	struct name_list *curdef = defined;
3847	while (curdef != NULL)
3848	if (strncmp (arg->val.str.startmb, curdef->str,
3849	arg->val.str.lenmb) == `0`
3850	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3851	{
3852	if (prevdef == NULL)
3853	defined = curdef->next;
3854	else
3855	prevdef->next = curdef->next;
3856
3857	struct name_list *olddef = curdef;
3858	curdef = curdef->next;
3859
3860	free (olddef);
3861	}
3862	else
3863	{
3864	prevdef = curdef;
3865	curdef = curdef->next;
3866	}
3867
3868	lr_ignore_rest (ldfile, `1`);
3869	break;
3870
3871	case tok_ifdef:
3872	case tok_ifndef:
3873	if (ignore_content)
3874	{
3875	lr_ignore_rest (ldfile, `0`);
3876	break;
3877	}
3878
3879	found_ifdef:
3880	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3881	if (arg->tok != tok_ident)
3882	goto err_label;
3883	lr_ignore_rest (ldfile, `1`);
3884
3885	if (collate->else_action == else_none)
3886	{
3887	curdef = defined;
3888	while (curdef != NULL)
3889	if (strncmp (arg->val.str.startmb, curdef->str,
3890	arg->val.str.lenmb) == `0`
3891	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3892	break;
3893	else
3894	curdef = curdef->next;
3895
3896	if ((nowtok == tok_ifdef && curdef != NULL)
3897	\|\| (nowtok == tok_ifndef && curdef == NULL))
3898	{
3899	/ We have to use the if-branch. /
3900	collate->else_action = else_ignore;
3901	}
3902	else
3903	{
3904	/ We have to use the else-branch, if there is one. /
3905	nowtok = skip_to (ldfile, collate, charmap, `0`);
3906	if (nowtok == tok_else)
3907	collate->else_action = else_seen;
3908	else if (nowtok == tok_elifdef)
3909	{
3910	nowtok = tok_ifdef;
3911	goto found_ifdef;
3912	}
3913	else if (nowtok == tok_elifndef)
3914	{
3915	nowtok = tok_ifndef;
3916	goto found_ifdef;
3917	}
3918	else if (nowtok == tok_eof)
3919	goto seen_eof;
3920	else if (nowtok == tok_end)
3921	goto seen_end;
3922	}
3923	}
3924	else
3925	{
3926	/ XXX Should it really become necessary to support nested*
3927	preprocessor handling we will push the state here. /*
3928	lr_error (ldfile, _("%s: nested conditionals not supported"),
3929	"LC_COLLATE");
3930	nowtok = skip_to (ldfile, collate, charmap, `1`);
3931	if (nowtok == tok_eof)
3932	goto seen_eof;
3933	else if (nowtok == tok_end)
3934	goto seen_end;
3935	}
3936	break;
3937
3938	case tok_elifdef:
3939	case tok_elifndef:
3940	case tok_else:
3941	if (ignore_content)
3942	{
3943	lr_ignore_rest (ldfile, `0`);
3944	break;
3945	}
3946
3947	lr_ignore_rest (ldfile, `1`);
3948
3949	if (collate->else_action == else_ignore)
3950	{
3951	/ Ignore everything until the endif. /
3952	nowtok = skip_to (ldfile, collate, charmap, `1`);
3953	if (nowtok == tok_eof)
3954	goto seen_eof;
3955	else if (nowtok == tok_end)
3956	goto seen_end;
3957	}
3958	else
3959	{
3960	assert (collate->else_action == else_none);
3961	lr_error (ldfile, _("\
3962	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3963	nowtok == tok_else ? "else"
3964	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3965	}
3966	break;
3967
3968	case tok_endif:
3969	if (ignore_content)
3970	{
3971	lr_ignore_rest (ldfile, `0`);
3972	break;
3973	}
3974
3975	lr_ignore_rest (ldfile, `1`);
3976
3977	if (collate->else_action != else_ignore
3978	&& collate->else_action != else_seen)
3979	lr_error (ldfile, _("\
3980	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3981
3982	/ XXX If we support nested preprocessor directives we pop*
3983	the state here. /*
3984	collate->else_action = else_none;
3985	break;
3986
3987	default:
3988	err_label:
3989	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3990	}
3991
3992	/ Prepare for the next round. /
3993	now = lr_token (ldfile, charmap, result, NULL, verbose);
3994	nowtok = now->tok;
3995	}
3996
3997	seen_eof:
3998	/ When we come here we reached the end of the file. /
3999	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
4000	}
4001

Browse the source code of glibc/locale/programs/ld-collate.c