ld-collate.c source code [glibc/locale/programs/ld-collate.c]

1	/ Copyright (C) 1995-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published
6	by the Free Software Foundation; version 2 of the License, or
7	(at your option) any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
16
17	#ifdef HAVE_CONFIG_H
18	# include <config.h>
19	#endif
20
21	#include <errno.h>
22	#include <stdlib.h>
23	#include <wchar.h>
24	#include <stdint.h>
25	#include <sys/param.h>
26	#include <array_length.h>
27
28	#include "localedef.h"
29	#include "charmap.h"
30	#include "localeinfo.h"
31	#include "linereader.h"
32	#include "locfile.h"
33	#include "elem-hash.h"
34
35	/ Uncomment the following line in the production version. /
36	/ #define NDEBUG 1 /
37	#include <assert.h>
38
39	#define obstack_chunk_alloc malloc
40	#define obstack_chunk_free free
41
42	static inline void
43	__attribute ((always_inline))
44	obstack_int32_grow (struct obstack *obstack, int32_t data)
45	{
46	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47	data = maybe_swap_uint32 (data);
48	if (sizeof (int32_t) == sizeof (int))
49	obstack_int_grow (obstack, data);
50	else
51	obstack_grow (obstack, &data, sizeof (int32_t));
52	}
53
54	static inline void
55	__attribute ((always_inline))
56	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57	{
58	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59	data = maybe_swap_uint32 (data);
60	if (sizeof (int32_t) == sizeof (int))
61	obstack_int_grow_fast (obstack, data);
62	else
63	obstack_grow (obstack, &data, sizeof (int32_t));
64	}
65
66	/ Forward declaration. /
67	struct element_t;
68
69	/ Data type for list of strings. /
70	struct section_list
71	{
72	/ Successor in the known_sections list. /
73	struct section_list *def_next;
74	/ Successor in the sections list. /
75	struct section_list *next;
76	/ Name of the section. /
77	const char *name;
78	/ First element of this section. /
79	struct element_t *first;
80	/ Last element of this section. /
81	struct element_t *last;
82	/ These are the rules for this section. /
83	enum coll_sort_rule *rules;
84	/ Index of the rule set in the appropriate section of the output file. /
85	int ruleidx;
86	};
87
88	struct element_t;
89
90	struct element_list_t
91	{
92	/ Number of elements. /
93	int cnt;
94
95	struct element_t **w;
96	};
97
98	/ Data type for collating element. /
99	struct element_t
100	{
101	const char *name;
102
103	const char *mbs;
104	size_t nmbs;
105	const uint32_t *wcs;
106	size_t nwcs;
107	int *mborder;
108	int wcorder;
109
110	/ The following is a bit mask which bits are set if this element is*
111	used in the appropriate level. Interesting for the singlebyte
112	weight computation.
113
114	XXX The type here restricts the number of levels to 32. It could
115	be changed if necessary but I doubt this is necessary. /*
116	unsigned int used_in_level;
117
118	struct element_list_t *weights;
119
120	/ Nonzero if this is a real character definition. /
121	int is_character;
122
123	/ Order of the character in the sequence. This information will*
124	be used in range expressions. /*
125	int mbseqorder;
126	int wcseqorder;
127
128	/ Where does the definition come from. /
129	const char *file;
130	size_t line;
131
132	/ Which section does this belong to. /
133	struct section_list *section;
134
135	/ Predecessor and successor in the order list. /
136	struct element_t *last;
137	struct element_t *next;
138
139	/ Next element in multibyte output list. /
140	struct element_t *mbnext;
141	struct element_t *mblast;
142
143	/ Next element in wide character output list. /
144	struct element_t *wcnext;
145	struct element_t *wclast;
146	};
147
148	/ Special element value. /
149	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153	/ Data type for collating symbol. /
154	struct symbol_t
155	{
156	const char *name;
157
158	/ Point to place in the order list. /
159	struct element_t *order;
160
161	/ Where does the definition come from. /
162	const char *file;
163	size_t line;
164	};
165
166	/ Sparse table of struct element_t . /*
167	#define TABLE wchead_table
168	#define ELEMENT struct element_t *
169	#define DEFAULT NULL
170	#define ITERATE
171	#define NO_ADD_LOCALE
172	#include "3level.h"
173
174	/ Sparse table of int32_t. /
175	#define TABLE collidx_table
176	#define ELEMENT int32_t
177	#define DEFAULT 0
178	#include "3level.h"
179
180	/ Sparse table of uint32_t. /
181	#define TABLE collseq_table
182	#define ELEMENT uint32_t
183	#define DEFAULT ~((uint32_t) 0)
184	#include "3level.h"
185
186
187	/ Simple name list for the preprocessor. /
188	struct name_list
189	{
190	struct name_list *next;
191	char str[`0`];
192	};
193
194
195	/ The real definition of the struct for the LC_COLLATE locale. /
196	struct locale_collate_t
197	{
198	/ Does the locale use code points to compare the encoding? /
199	bool codepoint_collation;
200
201	int col_weight_max;
202	int cur_weight_max;
203
204	/ List of known scripts. /
205	struct section_list *known_sections;
206	/ List of used sections. /
207	struct section_list *sections;
208	/ Current section using definition. /
209	struct section_list *current_section;
210	/ There always can be an unnamed section. /
211	struct section_list unnamed_section;
212	/ Flag whether the unnamed section has been defined. /
213	bool unnamed_section_defined;
214	/ To make handling of errors easier we have another section. /
215	struct section_list error_section;
216	/ Sometimes we are defining the values for collating symbols before*
217	the first actual section. /*
218	struct section_list symbol_section;
219
220	/ Start of the order list. /
221	struct element_t *start;
222
223	/ The undefined element. /
224	struct element_t undefined;
225
226	/ This is the cursor for `reorder_after' insertions. /
227	struct element_t *cursor;
228
229	/ This value is used when handling ellipsis. /
230	struct element_t ellipsis_weight;
231
232	/ Known collating elements. /
233	hash_table elem_table;
234
235	/ Known collating symbols. /
236	hash_table sym_table;
237
238	/ Known collation sequences. /
239	hash_table seq_table;
240
241	struct obstack mempool;
242
243	/ The LC_COLLATE category is a bit special as it is sometimes possible*
244	that the definitions from more than one input file contains information.
245	Therefore we keep all relevant input in a list. /*
246	struct locale_collate_t *next;
247
248	/ Arrays with heads of the list for each of the leading bytes in*
249	the multibyte sequences. /*
250	struct element_t *mbheads[`256`];
251
252	/ Arrays with heads of the list for each of the leading bytes in*
253	the multibyte sequences. /*
254	struct wchead_table wcheads;
255
256	/ The arrays with the collation sequence order. /
257	unsigned char mbseqorder[`256`];
258	struct collseq_table wcseqorder;
259
260	/ State of the preprocessor. /
261	enum
262	{
263	else_none = `0`,
264	else_ignore,
265	else_seen
266	}
267	else_action;
268	};
269
270
271	/ We have a few global variables which are used for reading all*
272	LC_COLLATE category descriptions in all files. /*
273	static uint32_t nrules;
274
275	/ List of defined preprocessor symbols. /
276	static struct name_list *defined;
277
278
279	/ We need UTF-8 encoding of numbers. /
280	static inline int
281	__attribute ((always_inline))
282	utf8_encode (char buf, int* val)
283	{
284	int retval;
285
286	if (val < `0x80`)
287	{
288	buf++ = (char*) val;
289	retval = `1`;
290	}
291	else
292	{
293	int step;
294
295	for (step = `2`; step < `6`; ++step)
296	if ((val & (~(uint32_t)`0` << (`5` * step + `1`))) == `0`)
297	break;
298	retval = step;
299
300	buf = (unsigned* char) (~`0xff` >> step);
301	--step;
302	do
303	{
304	buf[step] = `0x80` \| (val & `0x3f`);
305	val >>= `6`;
306	}
307	while (--step > `0`);
308	*buf \|= val;
309	}
310
311	return retval;
312	}
313
314
315	static struct section_list *
316	make_seclist_elem (struct locale_collate_t collate, const* char *string,
317	struct section_list *next)
318	{
319	struct section_list *newp;
320
321	newp = (struct section_list *) obstack_alloc (&collate->mempool,
322	sizeof (*newp));
323	newp->next = next;
324	newp->name = string;
325	newp->first = NULL;
326	newp->last = NULL;
327
328	return newp;
329	}
330
331
332	static struct element_t *
333	new_element (struct locale_collate_t collate, const* char *mbs, size_t mbslen,
334	const uint32_t wcs, const* char *name, size_t namelen,
335	int is_character)
336	{
337	struct element_t *newp;
338
339	newp = (struct element_t *) obstack_alloc (&collate->mempool,
340	sizeof (*newp));
341	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
342	name, namelen);
343	if (mbs != NULL)
344	{
345	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
346	newp->nmbs = mbslen;
347	}
348	else
349	{
350	newp->mbs = NULL;
351	newp->nmbs = `0`;
352	}
353	if (wcs != NULL)
354	{
355	size_t nwcs = wcslen ((wchar_t *) wcs);
356	uint32_t zero = `0`;
357	/ Handle <U0000> as a single character. /
358	if (nwcs == `0`)
359	nwcs = `1`;
360	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
361	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
362	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
363	newp->nwcs = nwcs;
364	}
365	else
366	{
367	newp->wcs = NULL;
368	newp->nwcs = `0`;
369	}
370	newp->mborder = NULL;
371	newp->wcorder = `0`;
372	newp->used_in_level = `0`;
373	newp->is_character = is_character;
374
375	/ Will be assigned later. XXX /
376	newp->mbseqorder = `0`;
377	newp->wcseqorder = `0`;
378
379	/ Will be allocated later. /
380	newp->weights = NULL;
381
382	newp->file = NULL;
383	newp->line = `0`;
384
385	newp->section = collate->current_section;
386
387	newp->last = NULL;
388	newp->next = NULL;
389
390	newp->mbnext = NULL;
391	newp->mblast = NULL;
392
393	newp->wcnext = NULL;
394	newp->wclast = NULL;
395
396	return newp;
397	}
398
399
400	static struct symbol_t *
401	new_symbol (struct locale_collate_t collate, const* char *name, size_t len)
402	{
403	struct symbol_t *newp;
404
405	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof* (*newp));
406
407	newp->name = obstack_copy0 (&collate->mempool, name, len);
408	newp->order = NULL;
409
410	newp->file = NULL;
411	newp->line = `0`;
412
413	return newp;
414	}
415
416
417	/ Test whether this name is already defined somewhere. /
418	static int
419	check_duplicate (struct linereader ldfile, struct* locale_collate_t *collate,
420	const struct charmap_t *charmap,
421	struct repertoire_t repertoire, const* char *symbol,
422	size_t symbol_len)
423	{
424	void *ignore = NULL;
425
426	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == `0`)
427	{
428	lr_error (ldfile, _("`%.*s' already defined in charmap"),
429	(int) symbol_len, symbol);
430	return `1`;
431	}
432
433	if (repertoire != NULL
434	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
435	== `0`))
436	{
437	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
438	(int) symbol_len, symbol);
439	return `1`;
440	}
441
442	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == `0`)
443	{
444	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
445	(int) symbol_len, symbol);
446	return `1`;
447	}
448
449	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == `0`)
450	{
451	lr_error (ldfile, _("`%.*s' already defined as collating element"),
452	(int) symbol_len, symbol);
453	return `1`;
454	}
455
456	return `0`;
457	}
458
459
460	/ Read the direction specification. /
461	static void
462	read_directions (struct linereader ldfile, struct* token *arg,
463	const struct charmap_t *charmap,
464	struct repertoire_t repertoire, struct* localedef_t *result)
465	{
466	int cnt = `0`;
467	int max = nrules ?: `10`;
468	enum coll_sort_rule rules = calloc (max, sizeof* (*rules));
469	int warned = `0`;
470	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
471
472	while (`1`)
473	{
474	int valid = `0`;
475
476	if (arg->tok == tok_forward)
477	{
478	if (rules[cnt] & sort_backward)
479	{
480	if (! warned)
481	{
482	lr_error (ldfile, _("\
483	%s: `forward' and `backward' are mutually excluding each other"),
484	"LC_COLLATE");
485	warned = `1`;
486	}
487	}
488	else if (rules[cnt] & sort_forward)
489	{
490	if (! warned)
491	{
492	lr_error (ldfile, _("\
493	%s: `%s' mentioned more than once in definition of weight %d"),
494	"LC_COLLATE", "forward", cnt + `1`);
495	}
496	}
497	else
498	rules[cnt] \|= sort_forward;
499
500	valid = `1`;
501	}
502	else if (arg->tok == tok_backward)
503	{
504	if (rules[cnt] & sort_forward)
505	{
506	if (! warned)
507	{
508	lr_error (ldfile, _("\
509	%s: `forward' and `backward' are mutually excluding each other"),
510	"LC_COLLATE");
511	warned = `1`;
512	}
513	}
514	else if (rules[cnt] & sort_backward)
515	{
516	if (! warned)
517	{
518	lr_error (ldfile, _("\
519	%s: `%s' mentioned more than once in definition of weight %d"),
520	"LC_COLLATE", "backward", cnt + `1`);
521	}
522	}
523	else
524	rules[cnt] \|= sort_backward;
525
526	valid = `1`;
527	}
528	else if (arg->tok == tok_position)
529	{
530	if (rules[cnt] & sort_position)
531	{
532	if (! warned)
533	{
534	lr_error (ldfile, _("\
535	%s: `%s' mentioned more than once in definition of weight %d"),
536	"LC_COLLATE", "position", cnt + `1`);
537	}
538	}
539	else
540	rules[cnt] \|= sort_position;
541
542	valid = `1`;
543	}
544
545	if (valid)
546	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
547
548	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
549	\|\| arg->tok == tok_semicolon)
550	{
551	if (! valid && ! warned)
552	{
553	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
554	warned = `1`;
555	}
556
557	/ See whether we have to increment the counter. /
558	if (arg->tok != tok_comma && rules[cnt] != `0`)
559	{
560	/ Add the default `forward' if we have seen only `position'. /
561	if (rules[cnt] == sort_position)
562	rules[cnt] = sort_position \| sort_forward;
563
564	++cnt;
565	}
566
567	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
568	/ End of line or file, so we exit the loop. /
569	break;
570
571	if (nrules == `0`)
572	{
573	/ See whether we have enough room in the array. /
574	if (cnt == max)
575	{
576	max += `10`;
577	rules = (enum coll_sort_rule *) xrealloc (rules,
578	max
579	* sizeof (*rules));
580	memset (&rules[cnt], `'\0'`, (max - cnt) * sizeof (*rules));
581	}
582	}
583	else
584	{
585	if (cnt == nrules)
586	{
587	/ There must not be any more rule. /
588	if (! warned)
589	{
590	lr_error (ldfile, _("\
591	%s: too many rules; first entry only had %d"),
592	"LC_COLLATE", nrules);
593	warned = `1`;
594	}
595
596	lr_ignore_rest (ldfile, `0`);
597	break;
598	}
599	}
600	}
601	else
602	{
603	if (! warned)
604	{
605	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
606	warned = `1`;
607	}
608	}
609
610	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
611	}
612
613	if (nrules == `0`)
614	{
615	/ Now we know how many rules we have. /
616	nrules = cnt;
617	rules = (enum coll_sort_rule *) xrealloc (rules,
618	nrules * sizeof (*rules));
619	}
620	else
621	{
622	if (cnt < nrules)
623	{
624	/ Not enough rules in this specification. /
625	if (! warned)
626	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
627
628	do
629	rules[cnt] = sort_forward;
630	while (++cnt < nrules);
631	}
632	}
633
634	collate->current_section->rules = rules;
635	}
636
637
638	static struct element_t *
639	find_element (struct linereader ldfile, struct* locale_collate_t *collate,
640	const char *str, size_t len)
641	{
642	void *result = NULL;
643
644	/ Search for the entries among the collation sequences already define. /
645	if (find_entry (&collate->seq_table, str, len, &result) != `0`)
646	{
647	/ Nope, not define yet. So we see whether it is a*
648	collation symbol. /*
649	void *ptr;
650
651	if (find_entry (&collate->sym_table, str, len, &ptr) == `0`)
652	{
653	/ It's a collation symbol. /
654	struct symbol_t sym = (struct* symbol_t *) ptr;
655	result = sym->order;
656
657	if (result == NULL)
658	result = sym->order = new_element (collate, NULL, `0`, NULL,
659	NULL, `0`, `0`);
660	}
661	else if (find_entry (&collate->elem_table, str, len, &result) != `0`)
662	{
663	/ It's also no collation element. So it is a character*
664	element defined later. /*
665	result = new_element (collate, NULL, `0`, NULL, str, len, `1`);
666	/ Insert it into the sequence table. /
667	insert_entry (&collate->seq_table, str, len, result);
668	}
669	}
670
671	return (struct element_t *) result;
672	}
673
674
675	static void
676	unlink_element (struct locale_collate_t *collate)
677	{
678	if (collate->cursor == collate->start)
679	{
680	assert (collate->cursor->next == NULL);
681	assert (collate->cursor->last == NULL);
682	collate->cursor = NULL;
683	}
684	else
685	{
686	if (collate->cursor->next != NULL)
687	collate->cursor->next->last = collate->cursor->last;
688	if (collate->cursor->last != NULL)
689	collate->cursor->last->next = collate->cursor->next;
690	collate->cursor = collate->cursor->last;
691	}
692	}
693
694
695	static void
696	insert_weights (struct linereader ldfile, struct* element_t *elem,
697	const struct charmap_t *charmap,
698	struct repertoire_t repertoire, struct* localedef_t *result,
699	enum token_t ellipsis)
700	{
701	int weight_cnt;
702	struct token *arg;
703	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
704
705	/ Initialize all the fields. /
706	elem->file = ldfile->fname;
707	elem->line = ldfile->lineno;
708
709	elem->last = collate->cursor;
710	elem->next = collate->cursor ? collate->cursor->next : NULL;
711	if (collate->cursor != NULL && collate->cursor->next != NULL)
712	collate->cursor->next->last = elem;
713	if (collate->cursor != NULL)
714	collate->cursor->next = elem;
715	if (collate->start == NULL)
716	{
717	assert (collate->cursor == NULL);
718	collate->start = elem;
719	}
720
721	elem->section = collate->current_section;
722
723	if (collate->current_section->first == NULL)
724	collate->current_section->first = elem;
725	if (collate->current_section->last == collate->cursor)
726	collate->current_section->last = elem;
727
728	collate->cursor = elem;
729
730	elem->weights = (struct element_list_t *)
731	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
732	memset (elem->weights, `'\0'`, nrules * sizeof (struct element_list_t));
733
734	weight_cnt = `0`;
735
736	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
737	do
738	{
739	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
740	break;
741
742	if (arg->tok == tok_ignore)
743	{
744	/ The weight for this level has to be ignored. We use the*
745	null pointer to indicate this. /*
746	elem->weights[weight_cnt].w = (struct element_t **)
747	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
748	elem->weights[weight_cnt].w[`0`] = NULL;
749	elem->weights[weight_cnt].cnt = `1`;
750	}
751	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
752	{
753	char ucs4str[`10`];
754	struct element_t *val;
755	char *symstr;
756	size_t symlen;
757
758	if (arg->tok == tok_bsymbol)
759	{
760	symstr = arg->val.str.startmb;
761	symlen = arg->val.str.lenmb;
762	}
763	else
764	{
765	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
766	symstr = ucs4str;
767	symlen = `9`;
768	}
769
770	val = find_element (ldfile, collate, symstr, symlen);
771	if (val == NULL)
772	break;
773
774	elem->weights[weight_cnt].w = (struct element_t **)
775	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
776	elem->weights[weight_cnt].w[`0`] = val;
777	elem->weights[weight_cnt].cnt = `1`;
778	}
779	else if (arg->tok == tok_string)
780	{
781	/ Split the string up in the individual characters and put*
782	the element definitions in the list. /*
783	const char *cp = arg->val.str.startmb;
784	int cnt = `0`;
785	struct element_t *charelem;
786	struct element_t **weights = NULL;
787	int max = `0`;
788
789	if (*cp == `'\0'`)
790	{
791	lr_error (ldfile, _("%s: empty weight string not allowed"),
792	"LC_COLLATE");
793	lr_ignore_rest (ldfile, `0`);
794	break;
795	}
796
797	do
798	{
799	if (*cp == `'<'`)
800	{
801	/ Ahh, it's a bsymbol or an UCS4 value. If it's*
802	the latter we have to unify the name. /*
803	const char *startp = ++cp;
804	size_t len;
805
806	while (*cp != `'>'`)
807	{
808	if (*cp == ldfile->escape_char)
809	++cp;
810	if (*cp == `'\0'`)
811	/ It's a syntax error. /
812	goto syntax;
813
814	++cp;
815	}
816
817	if (cp - startp == `5` && startp[`0`] == `'U'`
818	&& isxdigit (startp[`1`]) && isxdigit (startp[`2`])
819	&& isxdigit (startp[`3`]) && isxdigit (startp[`4`]))
820	{
821	unsigned int ucs4 = strtoul (startp + `1`, NULL, `16`);
822	char *newstr;
823
824	newstr = (char *) xmalloc (`10`);
825	snprintf (newstr, `10`, "U%08X", ucs4);
826	startp = newstr;
827
828	len = `9`;
829	}
830	else
831	len = cp - startp;
832
833	charelem = find_element (ldfile, collate, startp, len);
834	++cp;
835	}
836	else
837	{
838	/ People really shouldn't use characters directly in*
839	the string. Especially since it's not really clear
840	what this means. We interpret all characters in the
841	string as if that would be bsymbols. Otherwise we
842	would have to match back to bsymbols somehow and this
843	is normally not what people normally expect. /*
844	charelem = find_element (ldfile, collate, cp++, `1`);
845	}
846
847	if (charelem == NULL)
848	{
849	/ We ignore the rest of the line. /
850	lr_ignore_rest (ldfile, `0`);
851	break;
852	}
853
854	/ Add the pointer. /
855	if (cnt >= max)
856	{
857	struct element_t **newp;
858	max += `10`;
859	newp = (struct element_t **)
860	alloca (max * sizeof (struct element_t *));
861	memcpy (newp, weights, cnt * sizeof (struct element_t *));
862	weights = newp;
863	}
864	weights[cnt++] = charelem;
865	}
866	while (*cp != `'\0'`);
867
868	/ Now store the information. /
869	elem->weights[weight_cnt].w = (struct element_t **)
870	obstack_alloc (&collate->mempool,
871	cnt * sizeof (struct element_t *));
872	memcpy (elem->weights[weight_cnt].w, weights,
873	cnt * sizeof (struct element_t *));
874	elem->weights[weight_cnt].cnt = cnt;
875
876	/ We don't need the string anymore. /
877	free (arg->val.str.startmb);
878	}
879	else if (ellipsis != tok_none
880	&& (arg->tok == tok_ellipsis2
881	\|\| arg->tok == tok_ellipsis3
882	\|\| arg->tok == tok_ellipsis4))
883	{
884	/ It must be the same ellipsis as used in the initial column. /
885	if (arg->tok != ellipsis)
886	lr_error (ldfile, _("\
887	%s: weights must use the same ellipsis symbol as the name"),
888	"LC_COLLATE");
889
890	/ The weight for this level will depend on the element*
891	iterating over the range. Put a placeholder. /*
892	elem->weights[weight_cnt].w = (struct element_t **)
893	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
894	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
895	elem->weights[weight_cnt].cnt = `1`;
896	}
897	else
898	{
899	syntax:
900	/ It's a syntax error. /
901	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
902	lr_ignore_rest (ldfile, `0`);
903	break;
904	}
905
906	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
907	/ This better should be the end of the line or a semicolon. /
908	if (arg->tok == tok_semicolon)
909	/ OK, ignore this and read the next token. /
910	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
911	else if (arg->tok != tok_eof && arg->tok != tok_eol)
912	{
913	/ It's a syntax error. /
914	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
915	lr_ignore_rest (ldfile, `0`);
916	break;
917	}
918	}
919	while (++weight_cnt < nrules);
920
921	if (weight_cnt < nrules)
922	{
923	/ This means the rest of the line uses the current element as*
924	the weight. /*
925	do
926	{
927	elem->weights[weight_cnt].w = (struct element_t **)
928	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
929	if (ellipsis == tok_none)
930	elem->weights[weight_cnt].w[`0`] = elem;
931	else
932	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
933	elem->weights[weight_cnt].cnt = `1`;
934	}
935	while (++weight_cnt < nrules);
936	}
937	else
938	{
939	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
940	{
941	/ Too many rule values. /
942	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
943	lr_ignore_rest (ldfile, `0`);
944	}
945	else
946	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
947	}
948	}
949
950
951	static int
952	insert_value (struct linereader ldfile, const* char *symstr, size_t symlen,
953	const struct charmap_t charmap, struct* repertoire_t *repertoire,
954	struct localedef_t *result)
955	{
956	/ First find out what kind of symbol this is. /
957	struct charseq *seq;
958	uint32_t wc;
959	struct element_t *elem = NULL;
960	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
961
962	/ Try to find the character in the charmap. /
963	seq = charmap_find_value (charmap, symstr, symlen);
964
965	/ Determine the wide character. /
966	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
967	{
968	wc = repertoire_find_value (repertoire, symstr, symlen);
969	if (seq != NULL)
970	seq->ucs4 = wc;
971	}
972	else
973	wc = seq->ucs4;
974
975	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
976	{
977	/ It's no character, so look through the collation elements and*
978	symbol list. /*
979	void *ptr = elem;
980	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != `0`)
981	{
982	void *result;
983	struct symbol_t *sym = NULL;
984
985	/ It's also collation element. Therefore it's either a*
986	collating symbol or it's a character which is not
987	supported by the character set. In the later case we
988	simply create a dummy entry. /*
989	if (find_entry (&collate->sym_table, symstr, symlen, &result) == `0`)
990	{
991	/ It's a collation symbol. /
992	sym = (struct symbol_t *) result;
993
994	elem = sym->order;
995	}
996
997	if (elem == NULL)
998	{
999	elem = new_element (collate, NULL, `0`, NULL, symstr, symlen, `0`);
1000
1001	if (sym != NULL)
1002	sym->order = elem;
1003	else
1004	/ Enter a fake element in the sequence table. This*
1005	won't cause anything in the output since there is
1006	no multibyte or wide character associated with
1007	it. /*
1008	insert_entry (&collate->seq_table, symstr, symlen, elem);
1009	}
1010	}
1011	else
1012	/ Copy the result back. /
1013	elem = ptr;
1014	}
1015	else
1016	{
1017	/ Otherwise the symbols stands for a character. /
1018	void *ptr = elem;
1019	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != `0`)
1020	{
1021	uint32_t wcs[`2`] = { wc, `0` };
1022
1023	/ We have to allocate an entry. /
1024	elem = new_element (collate,
1025	seq != NULL ? (char *) seq->bytes : NULL,
1026	seq != NULL ? seq->nbytes : `0`,
1027	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1028	symstr, symlen, `1`);
1029
1030	/ And add it to the table. /
1031	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != `0`)
1032	/ This cannot happen. /
1033	assert (! "Internal error");
1034	}
1035	else
1036	{
1037	/ Copy the result back. /
1038	elem = ptr;
1039
1040	/ Maybe the character was used before the definition. In this case*
1041	we have to insert the byte sequences now. /*
1042	if (elem->mbs == NULL && seq != NULL)
1043	{
1044	elem->mbs = obstack_copy0 (&collate->mempool,
1045	seq->bytes, seq->nbytes);
1046	elem->nmbs = seq->nbytes;
1047	}
1048
1049	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1050	{
1051	uint32_t wcs[`2`] = { wc, `0` };
1052
1053	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1054	elem->nwcs = `1`;
1055	}
1056	}
1057	}
1058
1059	/ Test whether this element is not already in the list. /
1060	if (elem->next != NULL \|\| elem == collate->cursor)
1061	{
1062	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1063	(int) symlen, symstr, elem->file, elem->line);
1064	lr_ignore_rest (ldfile, `0`);
1065	return `1`;
1066	}
1067
1068	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1069
1070	return `0`;
1071	}
1072
1073
1074	static void
1075	handle_ellipsis (struct linereader ldfile, const* char *symstr, size_t symlen,
1076	enum token_t ellipsis, const struct charmap_t *charmap,
1077	struct repertoire_t *repertoire,
1078	struct localedef_t *result)
1079	{
1080	struct element_t *startp;
1081	struct element_t *endp;
1082	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1083
1084	/ Unlink the entry added for the ellipsis. /
1085	unlink_element (collate);
1086	startp = collate->cursor;
1087
1088	/ Process and add the end-entry. /
1089	if (symstr != NULL
1090	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1091	/ Something went wrong with inserting the to-value. This means*
1092	we cannot process the ellipsis. /*
1093	return;
1094
1095	/ Reset the cursor. /
1096	collate->cursor = startp;
1097
1098	/ Now we have to handle many different situations:*
1099	- we have to distinguish between the three different ellipsis forms
1100	- the is the ellipsis at the beginning, in the middle, or at the end.
1101	*/
1102	endp = collate->cursor->next;
1103	assert (symstr == NULL \|\| endp != NULL);
1104
1105	/ XXX The following is probably very wrong since also collating symbols*
1106	can appear in ranges. But do we want/can refine the test for that? /*
1107	#if 0
1108	/ Both, the start and the end symbol, must stand for characters. /
1109	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
1110	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
1111	{
1112	lr_error (ldfile, _("\
1113	%s: the start and the end symbol of a range must stand for characters"),
1114	"LC_COLLATE");
1115	return;
1116	}
1117	#endif
1118
1119	if (ellipsis == tok_ellipsis3)
1120	{
1121	/ One requirement we make here: the length of the byte*
1122	sequences for the first and end character must be the same.
1123	This is mainly to prevent unwanted effects and this is often
1124	not what is wanted. /*
1125	size_t len = (startp->mbs != NULL ? startp->nmbs
1126	: (endp->mbs != NULL ? endp->nmbs : `0`));
1127	char mbcnt[len + `1`];
1128	char mbend[len + `1`];
1129
1130	/ Well, this should be caught somewhere else already. Just to*
1131	make sure. /*
1132	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[`1`] == `0`);
1133	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[`1`] == `0`);
1134
1135	if (startp != NULL && endp != NULL
1136	&& startp->mbs != NULL && endp->mbs != NULL
1137	&& startp->nmbs != endp->nmbs)
1138	{
1139	lr_error (ldfile, _("\
1140	%s: byte sequences of first and last character must have the same length"),
1141	"LC_COLLATE");
1142	return;
1143	}
1144
1145	/ Determine whether we have to generate multibyte sequences. /
1146	if ((startp == NULL \|\| startp->mbs != NULL)
1147	&& (endp == NULL \|\| endp->mbs != NULL))
1148	{
1149	int cnt;
1150	int ret;
1151
1152	/ Prepare the beginning byte sequence. This is either from the*
1153	beginning byte sequence or it is all nulls if it was an
1154	initial ellipsis. /*
1155	if (startp == NULL \|\| startp->mbs == NULL)
1156	memset (mbcnt, `'\0'`, len);
1157	else
1158	{
1159	memcpy (mbcnt, startp->mbs, len);
1160
1161	/ And increment it so that the value is the first one we will*
1162	try to insert. /*
1163	for (cnt = len - `1`; cnt >= `0`; --cnt)
1164	if (++mbcnt[cnt] != `'\0'`)
1165	break;
1166	}
1167	mbcnt[len] = `'\0'`;
1168
1169	/ And the end sequence. /
1170	if (endp == NULL \|\| endp->mbs == NULL)
1171	memset (mbend, `'\0'`, len);
1172	else
1173	memcpy (mbend, endp->mbs, len);
1174	mbend[len] = `'\0'`;
1175
1176	/ Test whether we have a correct range. /
1177	ret = memcmp (mbcnt, mbend, len);
1178	if (ret >= `0`)
1179	{
1180	if (ret > `0`)
1181	lr_error (ldfile, _("%s: byte sequence of first character of \
1182	range is not lower than that of the last character"), "LC_COLLATE");
1183	return;
1184	}
1185
1186	/ Generate the byte sequences data. /
1187	while (`1`)
1188	{
1189	struct charseq *seq;
1190
1191	/ Quite a bit of work ahead. We have to find the character*
1192	definition for the byte sequence and then determine the
1193	wide character belonging to it. /*
1194	seq = charmap_find_symbol (charmap, mbcnt, len);
1195	if (seq != NULL)
1196	{
1197	struct element_t *elem;
1198	size_t namelen;
1199
1200	/ I don't think this can ever happen. /
1201	assert (seq->name != NULL);
1202	namelen = strlen (seq->name);
1203
1204	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1205	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1206	namelen);
1207
1208	/ Now we are ready to insert the new value in the*
1209	sequence. Find out whether the element is
1210	already known. /*
1211	void *ptr;
1212	if (find_entry (&collate->seq_table, seq->name, namelen,
1213	&ptr) != `0`)
1214	{
1215	uint32_t wcs[`2`] = { seq->ucs4, `0` };
1216
1217	/ We have to allocate an entry. /
1218	elem = new_element (collate, mbcnt, len,
1219	seq->ucs4 == ILLEGAL_CHAR_VALUE
1220	? NULL : wcs, seq->name,
1221	namelen, `1`);
1222
1223	/ And add it to the table. /
1224	if (insert_entry (&collate->seq_table, seq->name,
1225	namelen, elem) != `0`)
1226	/ This cannot happen. /
1227	assert (! "Internal error");
1228	}
1229	else
1230	/ Copy the result. /
1231	elem = ptr;
1232
1233	/ Test whether this element is not already in the list. /
1234	if (elem->next != NULL \|\| (collate->cursor != NULL
1235	&& elem->next == collate->cursor))
1236	{
1237	lr_error (ldfile, _("\
1238	order for `%.*s' already defined at %s:%Zu"),
1239	(int) namelen, seq->name,
1240	elem->file, elem->line);
1241	goto increment;
1242	}
1243
1244	/ Enqueue the new element. /
1245	elem->last = collate->cursor;
1246	if (collate->cursor == NULL)
1247	elem->next = NULL;
1248	else
1249	{
1250	elem->next = collate->cursor->next;
1251	elem->last->next = elem;
1252	if (elem->next != NULL)
1253	elem->next->last = elem;
1254	}
1255	if (collate->start == NULL)
1256	{
1257	assert (collate->cursor == NULL);
1258	collate->start = elem;
1259	}
1260	collate->cursor = elem;
1261
1262	/ Add the weight value. We take them from the*
1263	`ellipsis_weights' member of `collate'. /*
1264	elem->weights = (struct element_list_t *)
1265	obstack_alloc (&collate->mempool,
1266	nrules * sizeof (struct element_list_t));
1267	for (cnt = `0`; cnt < nrules; ++cnt)
1268	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1269	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1270	== ELEMENT_ELLIPSIS2))
1271	{
1272	elem->weights[cnt].w = (struct element_t **)
1273	obstack_alloc (&collate->mempool,
1274	sizeof (struct element_t *));
1275	elem->weights[cnt].w[`0`] = elem;
1276	elem->weights[cnt].cnt = `1`;
1277	}
1278	else
1279	{
1280	/ Simply use the weight from `ellipsis_weight'. /
1281	elem->weights[cnt].w =
1282	collate->ellipsis_weight.weights[cnt].w;
1283	elem->weights[cnt].cnt =
1284	collate->ellipsis_weight.weights[cnt].cnt;
1285	}
1286	}
1287
1288	/ Increment for the next round. /
1289	increment:
1290	for (cnt = len - `1`; cnt >= `0`; --cnt)
1291	if (++mbcnt[cnt] != `'\0'`)
1292	break;
1293
1294	/ Find out whether this was all. /
1295	if (cnt < `0` \|\| memcmp (mbcnt, mbend, len) >= `0`)
1296	/ Yep, that's all. /
1297	break;
1298	}
1299	}
1300	}
1301	else
1302	{
1303	/ For symbolic range we naturally must have a beginning and an*
1304	end specified by the user. /*
1305	if (startp == NULL)
1306	lr_error (ldfile, _("\
1307	%s: symbolic range ellipsis must not directly follow `order_start'"),
1308	"LC_COLLATE");
1309	else if (endp == NULL)
1310	lr_error (ldfile, _("\
1311	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312	"LC_COLLATE");
1313	else
1314	{
1315	/ Determine the range. To do so we have to determine the*
1316	common prefix of the both names and then the numeric
1317	values of both ends. /*
1318	size_t lenfrom = strlen (startp->name);
1319	size_t lento = strlen (endp->name);
1320	char buf[lento + `1`];
1321	int preflen = `0`;
1322	long int from;
1323	long int to;
1324	char *cp;
1325	int base = ellipsis == tok_ellipsis2 ? `16` : `10`;
1326
1327	if (lenfrom != lento)
1328	{
1329	invalid_range:
1330	lr_error (ldfile, _("\
1331	`%s' and `%.*s' are not valid names for symbolic range"),
1332	startp->name, (int) lento, endp->name);
1333	return;
1334	}
1335
1336	while (startp->name[preflen] == endp->name[preflen])
1337	if (startp->name[preflen] == `'\0'`)
1338	/ Nothing to be done. The start and end point are identical*
1339	and while inserting the end point we have already given
1340	the user an error message. /*
1341	return;
1342	else
1343	++preflen;
1344
1345	errno = `0`;
1346	from = strtol (startp->name + preflen, &cp, base);
1347	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1348	goto invalid_range;
1349
1350	errno = `0`;
1351	to = strtol (endp->name + preflen, &cp, base);
1352	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1353	goto invalid_range;
1354
1355	/ Copy the prefix. /
1356	memcpy (buf, startp->name, preflen);
1357
1358	/ Loop over all values. /
1359	for (++from; from < to; ++from)
1360	{
1361	struct element_t *elem = NULL;
1362	struct charseq *seq;
1363	uint32_t wc;
1364	int cnt;
1365
1366	/ Generate the name. /
1367	sprintf (buf + preflen, base == `10` ? "%0ld" : "%0lX",
1368	(int) (lenfrom - preflen), from);
1369
1370	/ Look whether this name is already defined. /
1371	void *ptr;
1372	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == `0`)
1373	{
1374	/ Copy back the result. /
1375	elem = ptr;
1376
1377	if (elem->next != NULL \|\| (collate->cursor != NULL
1378	&& elem->next == collate->cursor))
1379	{
1380	lr_error (ldfile, _("\
1381	%s: order for `%.*s' already defined at %s:%Zu"),
1382	"LC_COLLATE", (int) lenfrom, buf,
1383	elem->file, elem->line);
1384	continue;
1385	}
1386
1387	if (elem->name == NULL)
1388	{
1389	lr_error (ldfile, _("%s: `%s' must be a character"),
1390	"LC_COLLATE", buf);
1391	continue;
1392	}
1393	}
1394
1395	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
1396	{
1397	/ Search for a character of this name. /
1398	seq = charmap_find_value (charmap, buf, lenfrom);
1399	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1400	{
1401	wc = repertoire_find_value (repertoire, buf, lenfrom);
1402
1403	if (seq != NULL)
1404	seq->ucs4 = wc;
1405	}
1406	else
1407	wc = seq->ucs4;
1408
1409	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1410	/ We don't know anything about a character with this*
1411	name. XXX Should we warn? /*
1412	continue;
1413
1414	if (elem == NULL)
1415	{
1416	uint32_t wcs[`2`] = { wc, `0` };
1417
1418	/ We have to allocate an entry. /
1419	elem = new_element (collate,
1420	seq != NULL
1421	? (char *) seq->bytes : NULL,
1422	seq != NULL ? seq->nbytes : `0`,
1423	wc == ILLEGAL_CHAR_VALUE
1424	? NULL : wcs, buf, lenfrom, `1`);
1425	}
1426	else
1427	{
1428	/ Update the element. /
1429	if (seq != NULL)
1430	{
1431	elem->mbs = obstack_copy0 (&collate->mempool,
1432	seq->bytes, seq->nbytes);
1433	elem->nmbs = seq->nbytes;
1434	}
1435
1436	if (wc != ILLEGAL_CHAR_VALUE)
1437	{
1438	uint32_t zero = `0`;
1439
1440	obstack_grow (&collate->mempool,
1441	&wc, sizeof (uint32_t));
1442	obstack_grow (&collate->mempool,
1443	&zero, sizeof (uint32_t));
1444	elem->wcs = obstack_finish (&collate->mempool);
1445	elem->nwcs = `1`;
1446	}
1447	}
1448
1449	elem->file = ldfile->fname;
1450	elem->line = ldfile->lineno;
1451	elem->section = collate->current_section;
1452	}
1453
1454	/ Enqueue the new element. /
1455	elem->last = collate->cursor;
1456	elem->next = collate->cursor->next;
1457	elem->last->next = elem;
1458	if (elem->next != NULL)
1459	elem->next->last = elem;
1460	collate->cursor = elem;
1461
1462	/ Now add the weights. They come from the `ellipsis_weights'*
1463	member of `collate'. /*
1464	elem->weights = (struct element_list_t *)
1465	obstack_alloc (&collate->mempool,
1466	nrules * sizeof (struct element_list_t));
1467	for (cnt = `0`; cnt < nrules; ++cnt)
1468	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1469	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1470	== ELEMENT_ELLIPSIS2))
1471	{
1472	elem->weights[cnt].w = (struct element_t **)
1473	obstack_alloc (&collate->mempool,
1474	sizeof (struct element_t *));
1475	elem->weights[cnt].w[`0`] = elem;
1476	elem->weights[cnt].cnt = `1`;
1477	}
1478	else
1479	{
1480	/ Simly use the weight from `ellipsis_weight'. /
1481	elem->weights[cnt].w =
1482	collate->ellipsis_weight.weights[cnt].w;
1483	elem->weights[cnt].cnt =
1484	collate->ellipsis_weight.weights[cnt].cnt;
1485	}
1486	}
1487	}
1488	}
1489	/ Move the cursor to the last entry in the ellipsis.*
1490	Subsequent operations need to start from the last entry. /*
1491	collate->cursor = endp;
1492	}
1493
1494
1495	static void
1496	collate_startup (struct linereader ldfile, struct* localedef_t *locale,
1497	struct localedef_t copy_locale, int* ignore_content)
1498	{
1499	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1500	{
1501	struct locale_collate_t *collate;
1502
1503	if (copy_locale == NULL)
1504	{
1505	collate = locale->categories[LC_COLLATE].collate =
1506	(struct locale_collate_t *)
1507	xcalloc (`1`, sizeof (struct locale_collate_t));
1508
1509	/ Init the various data structures. /
1510	init_hash (&collate->elem_table, `100`);
1511	init_hash (&collate->sym_table, `100`);
1512	init_hash (&collate->seq_table, `500`);
1513	obstack_init (&collate->mempool);
1514
1515	collate->col_weight_max = -`1`;
1516	collate->codepoint_collation = false;
1517	}
1518	else
1519	/ Reuse the copy_locale's data structures. /
1520	collate = locale->categories[LC_COLLATE].collate =
1521	copy_locale->categories[LC_COLLATE].collate;
1522	}
1523
1524	ldfile->translate_strings = `0`;
1525	ldfile->return_widestr = `0`;
1526	}
1527
1528
1529	void
1530	collate_finish (struct localedef_t locale, const* struct charmap_t *charmap)
1531	{
1532	/ Now is the time when we can assign the individual collation*
1533	values for all the symbols. We have possibly different values
1534	for the wide- and the multibyte-character symbols. This is done
1535	since it might make a difference in the encoding if there is in
1536	some cases no multibyte-character but there are wide-characters.
1537	(The other way around it is not important since theencoded
1538	collation value in the wide-character case is 32 bits wide and
1539	therefore requires no encoding).
1540
1541	The lowest collation value assigned is 2. Zero is reserved for
1542	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543	functions and 1 is used to separate the individual passes for the
1544	different rules.
1545
1546	We also have to construct is list with all the bytes/words which
1547	can come first in a sequence, followed by all the elements which
1548	also start with this byte/word. The order is reverse which has
1549	among others the important effect that longer strings are located
1550	first in the list. This is required for the output data since
1551	the algorithm used in `strcoll' etc depends on this.
1552
1553	The multibyte case is easy. We simply sort into an array with
1554	256 elements. /*
1555	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1556	int mbact[nrules];
1557	int wcact;
1558	int mbseqact;
1559	int wcseqact;
1560	struct element_t *runp;
1561	int i;
1562	int need_undefined = `0`;
1563	struct section_list *sect;
1564	int ruleidx;
1565
1566	if (collate == NULL)
1567	{
1568	/ No data, no check. Issue a warning. /
1569	record_warning (_("No definition for %s category found"),
1570	"LC_COLLATE");
1571	return;
1572	}
1573
1574	/ No data required. /
1575	if (collate->codepoint_collation)
1576	return;
1577
1578	/ If this assertion is hit change the type in `element_t'. /
1579	assert (nrules <= sizeof (runp->used_in_level) * `8`);
1580
1581	/ Make sure that the `position' rule is used either in all sections*
1582	or in none. /*
1583	for (i = `0`; i < nrules; ++i)
1584	for (sect = collate->sections; sect != NULL; sect = sect->next)
1585	if (sect != collate->current_section
1586	&& sect->rules != NULL
1587	&& ((sect->rules[i] & sort_position)
1588	!= (collate->current_section->rules[i] & sort_position)))
1589	{
1590	record_error (`0`, `0`, _("\
1591	%s: `position' must be used for a specific level in all sections or none"),
1592	"LC_COLLATE");
1593	break;
1594	}
1595
1596	/ Find out which elements are used at which level. At the same*
1597	time we find out whether we have any undefined symbols. /*
1598	runp = collate->start;
1599	while (runp != NULL)
1600	{
1601	if (runp->mbs != NULL)
1602	{
1603	for (i = `0`; i < nrules; ++i)
1604	{
1605	int j;
1606
1607	for (j = `0`; j < runp->weights[i].cnt; ++j)
1608	/ A NULL pointer as the weight means IGNORE. /
1609	if (runp->weights[i].w[j] != NULL)
1610	{
1611	if (runp->weights[i].w[j]->weights == NULL)
1612	{
1613	record_error_at_line (`0`, `0`, runp->file, runp->line,
1614	_("symbol `%s' not defined"),
1615	runp->weights[i].w[j]->name);
1616
1617	need_undefined = `1`;
1618	runp->weights[i].w[j] = &collate->undefined;
1619	}
1620	else
1621	/ Set the bit for the level. /
1622	runp->weights[i].w[j]->used_in_level \|= `1` << i;
1623	}
1624	}
1625	}
1626
1627	/ Up to the next entry. /
1628	runp = runp->next;
1629	}
1630
1631	/ Walk through the list of defined sequences and assign weights. Also*
1632	create the data structure which will allow generating the single byte
1633	character based tables.
1634
1635	Since at each time only the weights for each of the rules are
1636	only compared to other weights for this rule it is possible to
1637	assign more compact weight values than simply counting all
1638	weights in sequence. We can assign weights from 3, one for each
1639	rule individually and only for those elements, which are actually
1640	used for this rule.
1641
1642	Why is this important? It is not for the wide char table. But
1643	it is for the singlebyte output since here larger numbers have to
1644	be encoded to make it possible to emit the value as a byte
1645	string. /*
1646	for (i = `0`; i < nrules; ++i)
1647	mbact[i] = `2`;
1648	wcact = `2`;
1649	mbseqact = `0`;
1650	wcseqact = `0`;
1651	runp = collate->start;
1652	while (runp != NULL)
1653	{
1654	/ Determine the order. /
1655	if (runp->used_in_level != `0`)
1656	{
1657	runp->mborder = (int *) obstack_alloc (&collate->mempool,
1658	nrules * sizeof (int));
1659
1660	for (i = `0`; i < nrules; ++i)
1661	if ((runp->used_in_level & (`1` << i)) != `0`)
1662	runp->mborder[i] = mbact[i]++;
1663	else
1664	runp->mborder[i] = `0`;
1665	}
1666
1667	if (runp->mbs != NULL)
1668	{
1669	struct element_t **eptr;
1670	struct element_t *lastp = NULL;
1671
1672	/ Find the point where to insert in the list. /
1673	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[`0`]];
1674	while (*eptr != NULL)
1675	{
1676	if ((*eptr)->nmbs < runp->nmbs)
1677	break;
1678
1679	if ((*eptr)->nmbs == runp->nmbs)
1680	{
1681	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1682
1683	if (c == `0`)
1684	{
1685	/ This should not happen. It means that we have*
1686	to symbols with the same byte sequence. It is
1687	of course an error. /*
1688	record_error_at_line (`0`, `0`, (*eptr)->file,
1689	(*eptr)->line,
1690	_("\
1691	symbol `%s' has the same encoding as"), (*eptr)->name);
1692
1693	record_error_at_line (`0`, `0`, runp->file, runp->line,
1694	_("symbol `%s'"), runp->name);
1695	goto dont_insert;
1696	}
1697	else if (c < `0`)
1698	/ Insert it here. /
1699	break;
1700	}
1701
1702	/ To the next entry. /
1703	lastp = *eptr;
1704	eptr = &(*eptr)->mbnext;
1705	}
1706
1707	/ Set the pointers. /
1708	runp->mbnext = *eptr;
1709	runp->mblast = lastp;
1710	if (*eptr != NULL)
1711	(*eptr)->mblast = runp;
1712	*eptr = runp;
1713	dont_insert:
1714	;
1715	}
1716
1717	if (runp->used_in_level)
1718	runp->wcorder = wcact++;
1719
1720	if (runp->is_character)
1721	{
1722	if (runp->nmbs == `1`)
1723	collate->mbseqorder[((unsigned char *) runp->mbs)[`0`]] = mbseqact++;
1724
1725	runp->wcseqorder = wcseqact++;
1726	}
1727	else if (runp->mbs != NULL && runp->weights != NULL)
1728	/ This is for collation elements. /
1729	runp->wcseqorder = wcseqact++;
1730
1731	/ Up to the next entry. /
1732	runp = runp->next;
1733	}
1734
1735	/ Find out whether any of the `mbheads' entries is unset. In this*
1736	case we use the UNDEFINED entry. /*
1737	for (i = `1`; i < `256`; ++i)
1738	if (collate->mbheads[i] == NULL)
1739	{
1740	need_undefined = `1`;
1741	collate->mbheads[i] = &collate->undefined;
1742	}
1743
1744	/ Now to the wide character case. /
1745	collate->wcheads.p = `6`;
1746	collate->wcheads.q = `10`;
1747	wchead_table_init (&collate->wcheads);
1748
1749	collate->wcseqorder.p = `6`;
1750	collate->wcseqorder.q = `10`;
1751	collseq_table_init (&collate->wcseqorder);
1752
1753	/ Start adding. /
1754	runp = collate->start;
1755	while (runp != NULL)
1756	{
1757	if (runp->wcs != NULL)
1758	{
1759	struct element_t *e;
1760	struct element_t **eptr;
1761	struct element_t *lastp;
1762
1763	/ Insert the collation sequence value. /
1764	if (runp->is_character)
1765	collseq_table_add (&collate->wcseqorder, runp->wcs[`0`],
1766	runp->wcseqorder);
1767
1768	/ Find the point where to insert in the list. /
1769	e = wchead_table_get (&collate->wcheads, runp->wcs[`0`]);
1770	eptr = &e;
1771	lastp = NULL;
1772	while (*eptr != NULL)
1773	{
1774	if ((*eptr)->nwcs < runp->nwcs)
1775	break;
1776
1777	if ((*eptr)->nwcs == runp->nwcs)
1778	{
1779	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
1780	(wchar_t *) runp->wcs, runp->nwcs);
1781
1782	if (c == `0`)
1783	{
1784	/ This should not happen. It means that we have*
1785	two symbols with the same byte sequence. It is
1786	of course an error. /*
1787	record_error_at_line (`0`, `0`, (*eptr)->file,
1788	(*eptr)->line,
1789	_("\
1790	symbol `%s' has the same encoding as"), (*eptr)->name);
1791
1792	record_error_at_line (`0`, `0`, runp->file, runp->line,
1793	_("symbol `%s'"), runp->name);
1794	goto dont_insertwc;
1795	}
1796	else if (c < `0`)
1797	/ Insert it here. /
1798	break;
1799	}
1800
1801	/ To the next entry. /
1802	lastp = *eptr;
1803	eptr = &(*eptr)->wcnext;
1804	}
1805
1806	/ Set the pointers. /
1807	runp->wcnext = *eptr;
1808	runp->wclast = lastp;
1809	if (*eptr != NULL)
1810	(*eptr)->wclast = runp;
1811	*eptr = runp;
1812	if (eptr == &e)
1813	wchead_table_add (&collate->wcheads, runp->wcs[`0`], e);
1814	dont_insertwc:
1815	;
1816	}
1817
1818	/ Up to the next entry. /
1819	runp = runp->next;
1820	}
1821
1822	/ Now determine whether the UNDEFINED entry is needed and if yes,*
1823	whether it was defined. /*
1824	collate->undefined.used_in_level = need_undefined ? ~`0ul` : `0`;
1825	if (collate->undefined.file == NULL)
1826	{
1827	if (need_undefined)
1828	{
1829	/ This seems not to be enforced by recent standards. Don't*
1830	emit an error, simply append UNDEFINED at the end. /*
1831	collate->undefined.mborder =
1832	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
1833
1834	for (i = `0`; i < nrules; ++i)
1835	collate->undefined.mborder[i] = mbact[i]++;
1836	}
1837
1838	/ In any case we will need the definition for the wide character*
1839	case. But we will not complain that it is missing since the
1840	specification strangely enough does not seem to account for
1841	this. /*
1842	collate->undefined.wcorder = wcact++;
1843	}
1844
1845	/ Finally, try to unify the rules for the sections. Whenever the rules*
1846	for a section are the same as those for another section give the
1847	ruleset the same index. Since there are never many section we can
1848	use an O(n^2) algorithm here. /*
1849	sect = collate->sections;
1850	while (sect != NULL && sect->rules == NULL)
1851	sect = sect->next;
1852
1853	/ Bail out if we have no sections because of earlier errors. /
1854	if (sect == NULL)
1855	{
1856	record_error (EXIT_FAILURE, `0`, _("too many errors; giving up"));
1857	return;
1858	}
1859
1860	ruleidx = `0`;
1861	do
1862	{
1863	struct section_list *osect = collate->sections;
1864
1865	while (osect != sect)
1866	if (osect->rules != NULL
1867	&& memcmp (osect->rules, sect->rules,
1868	nrules * sizeof (osect->rules[`0`])) == `0`)
1869	break;
1870	else
1871	osect = osect->next;
1872
1873	if (osect == sect)
1874	sect->ruleidx = ruleidx++;
1875	else
1876	sect->ruleidx = osect->ruleidx;
1877
1878	/ Next section. /
1879	do
1880	sect = sect->next;
1881	while (sect != NULL && sect->rules == NULL);
1882	}
1883	while (sect != NULL);
1884	/ We are currently not prepared for more than 128 rulesets. But this*
1885	should never really be a problem. /*
1886	assert (ruleidx <= `128`);
1887	}
1888
1889
1890	static int32_t
1891	output_weight (struct obstack pool, struct* locale_collate_t *collate,
1892	struct element_t *elem)
1893	{
1894	size_t cnt;
1895	int32_t retval;
1896
1897	/ Optimize the use of UNDEFINED. /
1898	if (elem == &collate->undefined)
1899	/ The weights are already inserted. /
1900	return `0`;
1901
1902	/ This byte can start exactly one collation element and this is*
1903	a single byte. We can directly give the index to the weights. /*
1904	retval = obstack_object_size (pool);
1905
1906	/ Construct the weight. /
1907	for (cnt = `0`; cnt < nrules; ++cnt)
1908	{
1909	char buf[elem->weights[cnt].cnt * `7`];
1910	int len = `0`;
1911	int i;
1912
1913	for (i = `0`; i < elem->weights[cnt].cnt; ++i)
1914	/ Encode the weight value. We do nothing for IGNORE entries. /
1915	if (elem->weights[cnt].w[i] != NULL)
1916	len += utf8_encode (&buf[len],
1917	elem->weights[cnt].w[i]->mborder[cnt]);
1918
1919	/ And add the buffer content. /
1920	obstack_1grow (pool, len);
1921	obstack_grow (pool, buf, len);
1922	}
1923
1924	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1925	}
1926
1927
1928	static int32_t
1929	output_weightwc (struct obstack pool, struct* locale_collate_t *collate,
1930	struct element_t *elem)
1931	{
1932	size_t cnt;
1933	int32_t retval;
1934
1935	/ Optimize the use of UNDEFINED. /
1936	if (elem == &collate->undefined)
1937	/ The weights are already inserted. /
1938	return `0`;
1939
1940	/ This byte can start exactly one collation element and this is*
1941	a single byte. We can directly give the index to the weights. /*
1942	retval = obstack_object_size (pool) / sizeof (int32_t);
1943
1944	/ Construct the weight. /
1945	for (cnt = `0`; cnt < nrules; ++cnt)
1946	{
1947	int32_t buf[elem->weights[cnt].cnt];
1948	int i;
1949	int32_t j;
1950
1951	for (i = `0`, j = `0`; i < elem->weights[cnt].cnt; ++i)
1952	if (elem->weights[cnt].w[i] != NULL)
1953	buf[j++] = elem->weights[cnt].w[i]->wcorder;
1954
1955	/ And add the buffer content. /
1956	obstack_int32_grow (pool, j);
1957
1958	obstack_grow (pool, buf, j * sizeof (int32_t));
1959	maybe_swap_uint32_obstack (pool, j);
1960	}
1961
1962	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1963	}
1964
1965	/ If localedef is every threaded, this would need to be __thread var. /
1966	static struct
1967	{
1968	struct obstack *weightpool;
1969	struct obstack *extrapool;
1970	struct obstack *indpool;
1971	struct locale_collate_t *collate;
1972	struct collidx_table *tablewc;
1973	} atwc;
1974
1975	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1976
1977	static void
1978	add_to_tablewc (uint32_t ch, struct element_t *runp)
1979	{
1980	if (runp->wcnext == NULL && runp->nwcs == `1`)
1981	{
1982	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1983	runp);
1984	collidx_table_add (atwc.tablewc, ch, weigthidx);
1985	}
1986	else
1987	{
1988	/ As for the singlebyte table, we recognize sequences and*
1989	compress them. /*
1990
1991	collidx_table_add (atwc.tablewc, ch,
1992	-(obstack_object_size (atwc.extrapool)
1993	/ sizeof (uint32_t)));
1994
1995	do
1996	{
1997	/ Store the current index in the weight table. We know that*
1998	the current position in the `extrapool' is aligned on a
1999	32-bit address. /*
2000	int32_t weightidx;
2001	int added;
2002
2003	/ Find out wether this is a single entry or we have more than*
2004	one consecutive entry. /*
2005	if (runp->wcnext != NULL
2006	&& runp->nwcs == runp->wcnext->nwcs
2007	&& wmemcmp ((wchar_t *) runp->wcs,
2008	(wchar_t *)runp->wcnext->wcs,
2009	runp->nwcs - `1`) == `0`
2010	&& (runp->wcs[runp->nwcs - `1`]
2011	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`))
2012	{
2013	int i;
2014	struct element_t *series_startp = runp;
2015	struct element_t *curp;
2016
2017	/ Now add first the initial byte sequence. /
2018	added = (`1` + `1` + `2` * (runp->nwcs - `1`)) * sizeof (int32_t);
2019	if (sizeof (int32_t) == sizeof (int))
2020	obstack_make_room (atwc.extrapool, added);
2021
2022	/ More than one consecutive entry. We mark this by having*
2023	a negative index into the indirect table. /*
2024	obstack_int32_grow_fast (atwc.extrapool,
2025	-(obstack_object_size (atwc.indpool)
2026	/ sizeof (int32_t)));
2027	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2028
2029	do
2030	runp = runp->wcnext;
2031	while (runp->wcnext != NULL
2032	&& runp->nwcs == runp->wcnext->nwcs
2033	&& wmemcmp ((wchar_t *) runp->wcs,
2034	(wchar_t *)runp->wcnext->wcs,
2035	runp->nwcs - `1`) == `0`
2036	&& (runp->wcs[runp->nwcs - `1`]
2037	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`));
2038
2039	/ Now walk backward from here to the beginning. /
2040	curp = runp;
2041
2042	for (i = `1`; i < runp->nwcs; ++i)
2043	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2044
2045	/ Now find the end of the consecutive sequence and*
2046	add all the indices in the indirect pool. /*
2047	do
2048	{
2049	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2050	curp);
2051	obstack_int32_grow (atwc.indpool, weightidx);
2052
2053	curp = curp->wclast;
2054	}
2055	while (curp != series_startp);
2056
2057	/ Add the final weight. /
2058	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2059	curp);
2060	obstack_int32_grow (atwc.indpool, weightidx);
2061
2062	/ And add the end byte sequence. Without length this*
2063	time. /*
2064	for (i = `1`; i < curp->nwcs; ++i)
2065	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2066	}
2067	else
2068	{
2069	/ A single entry. Simply add the index and the length and*
2070	string (except for the first character which is already
2071	tested for). /*
2072	int i;
2073
2074	/ Output the weight info. /
2075	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2076	runp);
2077
2078	assert (runp->nwcs > `0`);
2079	added = (`1` + `1` + runp->nwcs - `1`) * sizeof (int32_t);
2080	if (sizeof (int) == sizeof (int32_t))
2081	obstack_make_room (atwc.extrapool, added);
2082
2083	obstack_int32_grow_fast (atwc.extrapool, weightidx);
2084	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2085	for (i = `1`; i < runp->nwcs; ++i)
2086	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087	}
2088
2089	/ Next entry. /
2090	runp = runp->wcnext;
2091	}
2092	while (runp != NULL);
2093	}
2094	}
2095
2096	/ Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and*
2097	_NL_COLLATE_COLLSEQWC. /*
2098	#include "C-collate-seq.c"
2099
2100	void
2101	collate_output (struct localedef_t locale, const* struct charmap_t *charmap,
2102	const char *output_path)
2103	{
2104	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2105	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2106	struct locale_file file;
2107	size_t ch;
2108	int32_t tablemb[`256`];
2109	struct obstack weightpool;
2110	struct obstack extrapool;
2111	struct obstack indirectpool;
2112	struct section_list *sect;
2113	struct collidx_table tablewc;
2114	uint32_t elem_size;
2115	uint32_t *elem_table;
2116	int i;
2117	struct element_t *runp;
2118
2119	init_locale_data (&file, nelems);
2120	add_locale_uint32 (&file, nrules);
2121
2122	/ If we have no LC_COLLATE data emit only the number of rules as zero. /
2123	if (collate == NULL \|\| collate->codepoint_collation)
2124	{
2125	size_t idx;
2126	for (idx = `1`; idx < nelems; idx++)
2127	{
2128	/ The words have to be handled specially. /
2129	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2130	add_locale_uint32 (&file, `0`);
2131	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
2132	&& collate != NULL)
2133	/ A valid LC_COLLATE must have a code set name. /
2134	add_locale_string (&file, charmap->code_set_name);
2135	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
2136	&& collate != NULL)
2137	add_locale_raw_data (&file, collseqmb, sizeof (collseqmb));
2138	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
2139	&& collate != NULL)
2140	add_locale_uint32_array (&file, collseqwc,
2141	array_length (collseqwc));
2142	else
2143	add_locale_empty (&file);
2144	}
2145	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2146	return;
2147	}
2148
2149	obstack_init (&weightpool);
2150	obstack_init (&extrapool);
2151	obstack_init (&indirectpool);
2152
2153	/ Since we are using the sign of an integer to mark indirection the*
2154	offsets in the arrays we are indirectly referring to must not be
2155	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2156	obstack_int32_grow (&extrapool, `0`);
2157	obstack_int32_grow (&indirectpool, `0`);
2158
2159	/ Prepare the ruleset table. /
2160	for (sect = collate->sections, i = `0`; sect != NULL; sect = sect->next)
2161	if (sect->rules != NULL && sect->ruleidx == i)
2162	{
2163	int j;
2164
2165	obstack_make_room (&weightpool, nrules);
2166
2167	for (j = `0`; j < nrules; ++j)
2168	obstack_1grow_fast (&weightpool, sect->rules[j]);
2169	++i;
2170	}
2171	/ And align the output. /
2172	i = (nrules * i) % LOCFILE_ALIGN;
2173	if (i > `0`)
2174	do
2175	obstack_1grow (&weightpool, `'\0'`);
2176	while (++i < LOCFILE_ALIGN);
2177
2178	add_locale_raw_obstack (&file, &weightpool);
2179
2180	/ Generate the 8-bit table. Walk through the lists of sequences*
2181	starting with the same byte and add them one after the other to
2182	the table. In case we have more than one sequence starting with
2183	the same byte we have to use extra indirection.
2184
2185	First add a record for the NUL byte. This entry will never be used
2186	so it does not matter. /*
2187	tablemb[`0`] = `0`;
2188
2189	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2190	will probably be used more than once it is good to store the
2191	weights only once. /*
2192	if (collate->undefined.used_in_level != `0`)
2193	output_weight (&weightpool, collate, &collate->undefined);
2194
2195	for (ch = `1`; ch < `256`; ++ch)
2196	if (collate->mbheads[ch]->mbnext == NULL
2197	&& collate->mbheads[ch]->nmbs <= `1`)
2198	{
2199	tablemb[ch] = output_weight (&weightpool, collate,
2200	collate->mbheads[ch]);
2201	}
2202	else
2203	{
2204	/ The entries in the list are sorted by length and then*
2205	alphabetically. This is the order in which we will add the
2206	elements to the collation table. This allows simply walking
2207	the table in sequence and stopping at the first matching
2208	entry. Since the longer sequences are coming first in the
2209	list they have the possibility to match first, just as it
2210	has to be. In the worst case we are walking to the end of
2211	the list where we put, if no singlebyte sequence is defined
2212	in the locale definition, the weights for UNDEFINED.
2213
2214	To reduce the length of the search list we compress them a bit.
2215	This happens by collecting sequences of consecutive byte
2216	sequences in one entry (having and begin and end byte sequence)
2217	and add only one index into the weight table. We can find the
2218	consecutive entries since they are also consecutive in the list. /*
2219	struct element_t *runp = collate->mbheads[ch];
2220	struct element_t *lastp;
2221
2222	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2223
2224	tablemb[ch] = -obstack_object_size (&extrapool);
2225
2226	do
2227	{
2228	/ Store the current index in the weight table. We know that*
2229	the current position in the `extrapool' is aligned on a
2230	32-bit address. /*
2231	int32_t weightidx;
2232	int added;
2233
2234	/ Find out wether this is a single entry or we have more than*
2235	one consecutive entry. /*
2236	if (runp->mbnext != NULL
2237	&& runp->nmbs == runp->mbnext->nmbs
2238	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - `1`) == `0`
2239	&& (runp->mbs[runp->nmbs - `1`]
2240	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`))
2241	{
2242	int i;
2243	struct element_t *series_startp = runp;
2244	struct element_t *curp;
2245
2246	/ Compute how much space we will need. /
2247	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2248	+ `2` * (runp->nmbs - `1`));
2249	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2250	obstack_make_room (&extrapool, added);
2251
2252	/ More than one consecutive entry. We mark this by having*
2253	a negative index into the indirect table. /*
2254	obstack_int32_grow_fast (&extrapool,
2255	-(obstack_object_size (&indirectpool)
2256	/ sizeof (int32_t)));
2257
2258	/ Now search first the end of the series. /
2259	do
2260	runp = runp->mbnext;
2261	while (runp->mbnext != NULL
2262	&& runp->nmbs == runp->mbnext->nmbs
2263	&& memcmp (runp->mbs, runp->mbnext->mbs,
2264	runp->nmbs - `1`) == `0`
2265	&& (runp->mbs[runp->nmbs - `1`]
2266	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`));
2267
2268	/ Now walk backward from here to the beginning. /
2269	curp = runp;
2270
2271	assert (runp->nmbs <= `256`);
2272	obstack_1grow_fast (&extrapool, curp->nmbs - `1`);
2273	for (i = `1`; i < curp->nmbs; ++i)
2274	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2275
2276	/ Now find the end of the consecutive sequence and*
2277	add all the indices in the indirect pool. /*
2278	do
2279	{
2280	weightidx = output_weight (&weightpool, collate, curp);
2281	obstack_int32_grow (&indirectpool, weightidx);
2282
2283	curp = curp->mblast;
2284	}
2285	while (curp != series_startp);
2286
2287	/ Add the final weight. /
2288	weightidx = output_weight (&weightpool, collate, curp);
2289	obstack_int32_grow (&indirectpool, weightidx);
2290
2291	/ And add the end byte sequence. Without length this*
2292	time. /*
2293	for (i = `1`; i < curp->nmbs; ++i)
2294	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2295	}
2296	else
2297	{
2298	/ A single entry. Simply add the index and the length and*
2299	string (except for the first character which is already
2300	tested for). /*
2301	int i;
2302
2303	/ Output the weight info. /
2304	weightidx = output_weight (&weightpool, collate, runp);
2305
2306	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2307	+ runp->nmbs - `1`);
2308	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2309	obstack_make_room (&extrapool, added);
2310
2311	obstack_int32_grow_fast (&extrapool, weightidx);
2312	assert (runp->nmbs <= `256`);
2313	obstack_1grow_fast (&extrapool, runp->nmbs - `1`);
2314
2315	for (i = `1`; i < runp->nmbs; ++i)
2316	obstack_1grow_fast (&extrapool, runp->mbs[i]);
2317	}
2318
2319	/ Add alignment bytes if necessary. /
2320	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2321	obstack_1grow_fast (&extrapool, `'\0'`);
2322
2323	/ Next entry. /
2324	lastp = runp;
2325	runp = runp->mbnext;
2326	}
2327	while (runp != NULL);
2328
2329	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2330
2331	/ If the final entry in the list is not a single character we*
2332	add an UNDEFINED entry here. /*
2333	if (lastp->nmbs != `1`)
2334	{
2335	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1` + `1`);
2336	obstack_make_room (&extrapool, added);
2337
2338	obstack_int32_grow_fast (&extrapool, `0`);
2339	/ XXX What rule? We just pick the first. /
2340	obstack_1grow_fast (&extrapool, `0`);
2341	/ Length is zero. /
2342	obstack_1grow_fast (&extrapool, `0`);
2343
2344	/ Add alignment bytes if necessary. /
2345	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2346	obstack_1grow_fast (&extrapool, `'\0'`);
2347	}
2348	}
2349
2350	/ Add padding to the tables if necessary. /
2351	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2352	obstack_1grow (&weightpool, `0`);
2353
2354	/ Now add the four tables. /
2355	add_locale_uint32_array (&file, (const uint32_t *) tablemb, `256`);
2356	add_locale_raw_obstack (&file, &weightpool);
2357	add_locale_raw_obstack (&file, &extrapool);
2358	add_locale_raw_obstack (&file, &indirectpool);
2359
2360	/ Now the same for the wide character table. We need to store some*
2361	more information here. /*
2362	add_locale_empty (&file);
2363	add_locale_empty (&file);
2364	add_locale_empty (&file);
2365
2366	/ Since we are using the sign of an integer to mark indirection the*
2367	offsets in the arrays we are indirectly referring to must not be
2368	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2369	obstack_int32_grow (&extrapool, `0`);
2370	obstack_int32_grow (&indirectpool, `0`);
2371
2372	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2373	will probably be used more than once it is good to store the
2374	weights only once. /*
2375	if (output_weightwc (&weightpool, collate, &collate->undefined) != `0`)
2376	abort ();
2377
2378	/ Generate the table. Walk through the lists of sequences starting*
2379	with the same wide character and add them one after the other to
2380	the table. In case we have more than one sequence starting with
2381	the same byte we have to use extra indirection. /*
2382	tablewc.p = `6`;
2383	tablewc.q = `10`;
2384	collidx_table_init (&tablewc);
2385
2386	atwc.weightpool = &weightpool;
2387	atwc.extrapool = &extrapool;
2388	atwc.indpool = &indirectpool;
2389	atwc.collate = collate;
2390	atwc.tablewc = &tablewc;
2391
2392	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2393
2394	memset (&atwc, `0`, sizeof (atwc));
2395
2396	/ Now add the four tables. /
2397	add_locale_collidx_table (&file, &tablewc);
2398	add_locale_raw_obstack (&file, &weightpool);
2399	add_locale_raw_obstack (&file, &extrapool);
2400	add_locale_raw_obstack (&file, &indirectpool);
2401
2402	/ Finally write the table with collation element names out. It is*
2403	a hash table with a simple function which gets the name of the
2404	character as the input. One character might have many names. The
2405	value associated with the name is an index into the weight table
2406	where we are then interested in the first-level weight value.
2407
2408	To determine how large the table should be we are counting the
2409	elements have to put in. Since we are using internal chaining
2410	using a secondary hash function we have to make the table a bit
2411	larger to avoid extremely long search times. We can achieve
2412	good results with a 40% larger table than there are entries. /*
2413	elem_size = `0`;
2414	runp = collate->start;
2415	while (runp != NULL)
2416	{
2417	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2418	/ Yep, the element really counts. /
2419	++elem_size;
2420
2421	runp = runp->next;
2422	}
2423	/ Add 50% and find the next prime number. /
2424	elem_size = next_prime (elem_size + (elem_size >> `1`));
2425
2426	/ Allocate the table. Each entry consists of two words: the hash*
2427	value and an index in a secondary table which provides the index
2428	into the weight table and the string itself (so that a match can
2429	be determined). /*
2430	elem_table = (uint32_t *) obstack_alloc (&extrapool,
2431	elem_size * `2` * sizeof (uint32_t));
2432	memset (elem_table, `'\0'`, elem_size * `2` * sizeof (uint32_t));
2433
2434	/ Now add the elements. /
2435	runp = collate->start;
2436	while (runp != NULL)
2437	{
2438	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2439	{
2440	/ Compute the hash value of the name. /
2441	uint32_t namelen = strlen (runp->name);
2442	uint32_t hash = elem_hash (runp->name, namelen);
2443	size_t idx = hash % elem_size;
2444	#ifndef NDEBUG
2445	size_t start_idx = idx;
2446	#endif
2447
2448	if (elem_table[idx * `2`] != `0`)
2449	{
2450	/ The spot is already taken. Try iterating using the value*
2451	from the secondary hashing function. /*
2452	size_t iter = hash % (elem_size - `2`) + `1`;
2453
2454	do
2455	{
2456	idx += iter;
2457	if (idx >= elem_size)
2458	idx -= elem_size;
2459	assert (idx != start_idx);
2460	}
2461	while (elem_table[idx * `2`] != `0`);
2462	}
2463	/ This is the spot where we will insert the value. /
2464	elem_table[idx * `2`] = hash;
2465	elem_table[idx * `2` + `1`] = obstack_object_size (&extrapool);
2466
2467	/ The string itself including length. /
2468	obstack_1grow (&extrapool, namelen);
2469	obstack_grow (&extrapool, runp->name, namelen);
2470
2471	/ And the multibyte representation. /
2472	obstack_1grow (&extrapool, runp->nmbs);
2473	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2474
2475	/ And align again to 32 bits. /
2476	if ((`1` + namelen + `1` + runp->nmbs) % sizeof (int32_t) != `0`)
2477	obstack_grow (&extrapool, "\0\0",
2478	(sizeof (int32_t)
2479	- ((`1` + namelen + `1` + runp->nmbs)
2480	% sizeof (int32_t))));
2481
2482	/ Now some 32-bit values: multibyte collation sequence,*
2483	wide char string (including length), and wide char
2484	collation sequence. /*
2485	obstack_int32_grow (&extrapool, runp->mbseqorder);
2486
2487	obstack_int32_grow (&extrapool, runp->nwcs);
2488	obstack_grow (&extrapool, runp->wcs,
2489	runp->nwcs * sizeof (uint32_t));
2490	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2491
2492	obstack_int32_grow (&extrapool, runp->wcseqorder);
2493	}
2494
2495	runp = runp->next;
2496	}
2497
2498	/ Prepare to write out this data. /
2499	add_locale_uint32 (&file, elem_size);
2500	add_locale_uint32_array (&file, elem_table, `2` * elem_size);
2501	add_locale_raw_obstack (&file, &extrapool);
2502	add_locale_raw_data (&file, collate->mbseqorder, `256`);
2503	add_locale_collseq_table (&file, &collate->wcseqorder);
2504	add_locale_string (&file, charmap->code_set_name);
2505	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2506
2507	obstack_free (&weightpool, NULL);
2508	obstack_free (&extrapool, NULL);
2509	obstack_free (&indirectpool, NULL);
2510	}
2511
2512
2513	static enum token_t
2514	skip_to (struct linereader ldfile, struct* locale_collate_t *collate,
2515	const struct charmap_t charmap, int* to_endif)
2516	{
2517	while (`1`)
2518	{
2519	struct token *now = lr_token (ldfile, charmap, NULL, NULL, `0`);
2520	enum token_t nowtok = now->tok;
2521
2522	if (nowtok == tok_eof \|\| nowtok == tok_end)
2523	return nowtok;
2524
2525	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
2526	{
2527	lr_error (ldfile, _("%s: nested conditionals not supported"),
2528	"LC_COLLATE");
2529	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2530	if (nowtok == tok_eof \|\| nowtok == tok_end)
2531	return nowtok;
2532	}
2533	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
2534	{
2535	lr_ignore_rest (ldfile, `1`);
2536	return nowtok;
2537	}
2538	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
2539	{
2540	/ Do not read the rest of the line. /
2541	return nowtok;
2542	}
2543	else if (nowtok == tok_else)
2544	{
2545	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2546	}
2547
2548	lr_ignore_rest (ldfile, `0`);
2549	}
2550	}
2551
2552
2553	void
2554	collate_read (struct linereader ldfile, struct* localedef_t *result,
2555	const struct charmap_t charmap, const* char *repertoire_name,
2556	int ignore_content)
2557	{
2558	struct repertoire_t *repertoire = NULL;
2559	struct locale_collate_t *collate;
2560	struct token *now;
2561	struct token *arg = NULL;
2562	enum token_t nowtok;
2563	enum token_t was_ellipsis = tok_none;
2564	struct localedef_t *copy_locale = NULL;
2565	/ Parsing state:*
2566	0 - start
2567	1 - between `order-start' and `order-end'
2568	2 - after `order-end'
2569	3 - after `reorder-after', waiting for `reorder-end'
2570	4 - after `reorder-end'
2571	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572	6 - after `reorder-sections-end'
2573	*/
2574	int state = `0`;
2575
2576	/ Get the repertoire we have to use. /
2577	if (repertoire_name != NULL)
2578	repertoire = repertoire_read (repertoire_name);
2579
2580	/ The rest of the line containing `LC_COLLATE' must be free. /
2581	lr_ignore_rest (ldfile, `1`);
2582
2583	while (`1`)
2584	{
2585	do
2586	{
2587	now = lr_token (ldfile, charmap, result, NULL, verbose);
2588	nowtok = now->tok;
2589	}
2590	while (nowtok == tok_eol);
2591
2592	if (nowtok != tok_define)
2593	break;
2594
2595	if (ignore_content)
2596	lr_ignore_rest (ldfile, `0`);
2597	else
2598	{
2599	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2600	if (arg->tok != tok_ident)
2601	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2602	else
2603	{
2604	/ Simply add the new symbol. /
2605	struct name_list newsym = xmalloc (sizeof* (*newsym)
2606	+ arg->val.str.lenmb + `1`);
2607	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2608	newsym->str[arg->val.str.lenmb] = `'\0'`;
2609	newsym->next = defined;
2610	defined = newsym;
2611
2612	lr_ignore_rest (ldfile, `1`);
2613	}
2614	}
2615	}
2616
2617	if (nowtok == tok_copy)
2618	{
2619	now = lr_token (ldfile, charmap, result, NULL, verbose);
2620	if (now->tok != tok_string)
2621	{
2622	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2623
2624	skip_category:
2625	do
2626	now = lr_token (ldfile, charmap, result, NULL, verbose);
2627	while (now->tok != tok_eof && now->tok != tok_end);
2628
2629	if (now->tok != tok_eof
2630	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
2631	now->tok == tok_eof))
2632	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2633	else if (now->tok != tok_lc_collate)
2634	{
2635	lr_error (ldfile, _("\
2636	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637	lr_ignore_rest (ldfile, `0`);
2638	}
2639	else
2640	lr_ignore_rest (ldfile, `1`);
2641
2642	return;
2643	}
2644
2645	if (! ignore_content)
2646	{
2647	/ Get the locale definition. /
2648	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2649	repertoire_name, charmap, NULL);
2650	if ((copy_locale->avail & COLLATE_LOCALE) == `0`)
2651	{
2652	/ Not yet loaded. So do it now. /
2653	if (locfile_read (copy_locale, charmap) != `0`)
2654	goto skip_category;
2655	}
2656
2657	if (copy_locale->categories[LC_COLLATE].collate == NULL)
2658	return;
2659	}
2660
2661	lr_ignore_rest (ldfile, `1`);
2662
2663	now = lr_token (ldfile, charmap, result, NULL, verbose);
2664	nowtok = now->tok;
2665	}
2666
2667	/ Prepare the data structures. /
2668	collate_startup (ldfile, result, copy_locale, ignore_content);
2669	collate = result->categories[LC_COLLATE].collate;
2670
2671	while (`1`)
2672	{
2673	char ucs4buf[`10`];
2674	char *symstr;
2675	size_t symlen;
2676
2677	/ Of course we don't proceed beyond the end of file. /
2678	if (nowtok == tok_eof)
2679	break;
2680
2681	/ Ingore empty lines. /
2682	if (nowtok == tok_eol)
2683	{
2684	now = lr_token (ldfile, charmap, result, NULL, verbose);
2685	nowtok = now->tok;
2686	continue;
2687	}
2688
2689	switch (nowtok)
2690	{
2691	case tok_codepoint_collation:
2692	collate->codepoint_collation = true;
2693	break;
2694
2695	case tok_copy:
2696	/ Allow copying other locales. /
2697	now = lr_token (ldfile, charmap, result, NULL, verbose);
2698	if (now->tok != tok_string)
2699	goto err_label;
2700
2701	if (! ignore_content)
2702	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2703	charmap, result);
2704
2705	lr_ignore_rest (ldfile, `1`);
2706	break;
2707
2708	case tok_coll_weight_max:
2709	/ Ignore the rest of the line if we don't need the input of*
2710	this line. /*
2711	if (ignore_content)
2712	{
2713	lr_ignore_rest (ldfile, `0`);
2714	break;
2715	}
2716
2717	if (state != `0`)
2718	goto err_label;
2719
2720	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2721	if (arg->tok != tok_number)
2722	goto err_label;
2723	if (collate->col_weight_max != -`1`)
2724	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2725	"LC_COLLATE", "col_weight_max");
2726	else
2727	collate->col_weight_max = arg->val.num;
2728	lr_ignore_rest (ldfile, `1`);
2729	break;
2730
2731	case tok_section_symbol:
2732	/ Ignore the rest of the line if we don't need the input of*
2733	this line. /*
2734	if (ignore_content)
2735	{
2736	lr_ignore_rest (ldfile, `0`);
2737	break;
2738	}
2739
2740	if (state != `0`)
2741	goto err_label;
2742
2743	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2744	if (arg->tok != tok_bsymbol)
2745	goto err_label;
2746	else if (!ignore_content)
2747	{
2748	/ Check whether this section is already known. /
2749	struct section_list *known = collate->sections;
2750	while (known != NULL)
2751	{
2752	if (strcmp (known->name, arg->val.str.startmb) == `0`)
2753	break;
2754	known = known->next;
2755	}
2756
2757	if (known != NULL)
2758	{
2759	lr_error (ldfile,
2760	_("%s: duplicate declaration of section `%s'"),
2761	"LC_COLLATE", arg->val.str.startmb);
2762	free (arg->val.str.startmb);
2763	}
2764	else
2765	collate->sections = make_seclist_elem (collate,
2766	arg->val.str.startmb,
2767	collate->sections);
2768
2769	lr_ignore_rest (ldfile, known == NULL);
2770	}
2771	else
2772	{
2773	free (arg->val.str.startmb);
2774	lr_ignore_rest (ldfile, `0`);
2775	}
2776	break;
2777
2778	case tok_collating_element:
2779	/ Ignore the rest of the line if we don't need the input of*
2780	this line. /*
2781	if (ignore_content)
2782	{
2783	lr_ignore_rest (ldfile, `0`);
2784	break;
2785	}
2786
2787	if (state != `0` && state != `2`)
2788	goto err_label;
2789
2790	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2791	if (arg->tok != tok_bsymbol)
2792	goto err_label;
2793	else
2794	{
2795	const char *symbol = arg->val.str.startmb;
2796	size_t symbol_len = arg->val.str.lenmb;
2797
2798	/ Next the `from' keyword. /
2799	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2800	if (arg->tok != tok_from)
2801	{
2802	free ((char *) symbol);
2803	goto err_label;
2804	}
2805
2806	ldfile->return_widestr = `1`;
2807	ldfile->translate_strings = `1`;
2808
2809	/ Finally the string with the replacement. /
2810	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2811
2812	ldfile->return_widestr = `0`;
2813	ldfile->translate_strings = `0`;
2814
2815	if (arg->tok != tok_string)
2816	goto err_label;
2817
2818	if (!ignore_content && symbol != NULL)
2819	{
2820	/ The name is already defined. /
2821	if (check_duplicate (ldfile, collate, charmap,
2822	repertoire, symbol, symbol_len))
2823	goto col_elem_free;
2824
2825	if (arg->val.str.startmb != NULL)
2826	insert_entry (&collate->elem_table, symbol, symbol_len,
2827	new_element (collate,
2828	arg->val.str.startmb,
2829	arg->val.str.lenmb - `1`,
2830	arg->val.str.startwc,
2831	symbol, symbol_len, `0`));
2832	}
2833	else
2834	{
2835	col_elem_free:
2836	free ((char *) symbol);
2837	free (arg->val.str.startmb);
2838	free (arg->val.str.startwc);
2839	}
2840	lr_ignore_rest (ldfile, `1`);
2841	}
2842	break;
2843
2844	case tok_collating_symbol:
2845	/ Ignore the rest of the line if we don't need the input of*
2846	this line. /*
2847	if (ignore_content)
2848	{
2849	lr_ignore_rest (ldfile, `0`);
2850	break;
2851	}
2852
2853	if (state != `0` && state != `2`)
2854	goto err_label;
2855
2856	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2857	if (arg->tok != tok_bsymbol)
2858	goto err_label;
2859	else
2860	{
2861	char *symbol = arg->val.str.startmb;
2862	size_t symbol_len = arg->val.str.lenmb;
2863	char *endsymbol = NULL;
2864	size_t endsymbol_len = `0`;
2865	enum token_t ellipsis = tok_none;
2866
2867	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2868	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
2869	{
2870	ellipsis = arg->tok;
2871
2872	arg = lr_token (ldfile, charmap, result, repertoire,
2873	verbose);
2874	if (arg->tok != tok_bsymbol)
2875	{
2876	free (symbol);
2877	goto err_label;
2878	}
2879
2880	endsymbol = arg->val.str.startmb;
2881	endsymbol_len = arg->val.str.lenmb;
2882
2883	lr_ignore_rest (ldfile, `1`);
2884	}
2885	else if (arg->tok != tok_eol)
2886	{
2887	free (symbol);
2888	goto err_label;
2889	}
2890
2891	if (!ignore_content)
2892	{
2893	if (symbol == NULL
2894	\|\| (ellipsis != tok_none && endsymbol == NULL))
2895	{
2896	lr_error (ldfile, _("\
2897	%s: unknown character in collating symbol name"),
2898	"LC_COLLATE");
2899	goto col_sym_free;
2900	}
2901	else if (ellipsis == tok_none)
2902	{
2903	/ A single symbol, no ellipsis. /
2904	if (check_duplicate (ldfile, collate, charmap,
2905	repertoire, symbol, symbol_len))
2906	/ The name is already defined. /
2907	goto col_sym_free;
2908
2909	insert_entry (&collate->sym_table, symbol, symbol_len,
2910	new_symbol (collate, symbol, symbol_len));
2911	}
2912	else if (symbol_len != endsymbol_len)
2913	{
2914	col_sym_inv_range:
2915	lr_error (ldfile,
2916	_("invalid names for character range"));
2917	goto col_sym_free;
2918	}
2919	else
2920	{
2921	/ Oh my, we have to handle an ellipsis. First, as*
2922	usual, determine the common prefix and then
2923	convert the rest into a range. /*
2924	size_t prefixlen;
2925	unsigned long int from;
2926	unsigned long int to;
2927	char *endp;
2928
2929	for (prefixlen = `0`; prefixlen < symbol_len; ++prefixlen)
2930	if (symbol[prefixlen] != endsymbol[prefixlen])
2931	break;
2932
2933	/ Convert the rest into numbers. /
2934	symbol[symbol_len] = `'\0'`;
2935	from = strtoul (&symbol[prefixlen], &endp,
2936	ellipsis == tok_ellipsis2 ? `16` : `10`);
2937	if (*endp != `'\0'`)
2938	goto col_sym_inv_range;
2939
2940	endsymbol[symbol_len] = `'\0'`;
2941	to = strtoul (&endsymbol[prefixlen], &endp,
2942	ellipsis == tok_ellipsis2 ? `16` : `10`);
2943	if (*endp != `'\0'`)
2944	goto col_sym_inv_range;
2945
2946	if (from > to)
2947	goto col_sym_inv_range;
2948
2949	/ Now loop over all entries. /
2950	while (from <= to)
2951	{
2952	char *symbuf;
2953
2954	symbuf = (char *) obstack_alloc (&collate->mempool,
2955	symbol_len + `1`);
2956
2957	/ Create the name. /
2958	sprintf (symbuf,
2959	ellipsis == tok_ellipsis2
2960	? "%.s%.lX" : "%.s%.lu",
2961	(int) prefixlen, symbol,
2962	(int) (symbol_len - prefixlen), from);
2963
2964	if (check_duplicate (ldfile, collate, charmap,
2965	repertoire, symbuf, symbol_len))
2966	/ The name is already defined. /
2967	goto col_sym_free;
2968
2969	insert_entry (&collate->sym_table, symbuf,
2970	symbol_len,
2971	new_symbol (collate, symbuf,
2972	symbol_len));
2973
2974	/ Increment the counter. /
2975	++from;
2976	}
2977
2978	goto col_sym_free;
2979	}
2980	}
2981	else
2982	{
2983	col_sym_free:
2984	free (symbol);
2985	free (endsymbol);
2986	}
2987	}
2988	break;
2989
2990	case tok_symbol_equivalence:
2991	/ Ignore the rest of the line if we don't need the input of*
2992	this line. /*
2993	if (ignore_content)
2994	{
2995	lr_ignore_rest (ldfile, `0`);
2996	break;
2997	}
2998
2999	if (state != `0`)
3000	goto err_label;
3001
3002	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3003	if (arg->tok != tok_bsymbol)
3004	goto err_label;
3005	else
3006	{
3007	const char *newname = arg->val.str.startmb;
3008	size_t newname_len = arg->val.str.lenmb;
3009	const char *symname;
3010	size_t symname_len;
3011	void symval; /* Actually struct symbol_t* /
3012
3013	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3014	if (arg->tok != tok_bsymbol)
3015	{
3016	free ((char *) newname);
3017	goto err_label;
3018	}
3019
3020	symname = arg->val.str.startmb;
3021	symname_len = arg->val.str.lenmb;
3022
3023	if (newname == NULL)
3024	{
3025	lr_error (ldfile, _("\
3026	%s: unknown character in equivalent definition name"),
3027	"LC_COLLATE");
3028
3029	sym_equiv_free:
3030	free ((char *) newname);
3031	free ((char *) symname);
3032	break;
3033	}
3034	if (symname == NULL)
3035	{
3036	lr_error (ldfile, _("\
3037	%s: unknown character in equivalent definition value"),
3038	"LC_COLLATE");
3039	goto sym_equiv_free;
3040	}
3041
3042	/ See whether the symbol name is already defined. /
3043	if (find_entry (&collate->sym_table, symname, symname_len,
3044	&symval) != `0`)
3045	{
3046	lr_error (ldfile, _("\
3047	%s: unknown symbol `%s' in equivalent definition"),
3048	"LC_COLLATE", symname);
3049	goto sym_equiv_free;
3050	}
3051
3052	if (insert_entry (&collate->sym_table,
3053	newname, newname_len, symval) < `0`)
3054	{
3055	lr_error (ldfile, _("\
3056	error while adding equivalent collating symbol"));
3057	goto sym_equiv_free;
3058	}
3059
3060	free ((char *) symname);
3061	}
3062	lr_ignore_rest (ldfile, `1`);
3063	break;
3064
3065	case tok_script:
3066	/ Ignore the rest of the line if we don't need the input of*
3067	this line. /*
3068	if (ignore_content)
3069	{
3070	lr_ignore_rest (ldfile, `0`);
3071	break;
3072	}
3073
3074	/ We get told about the scripts we know. /
3075	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3076	if (arg->tok != tok_bsymbol)
3077	goto err_label;
3078	else
3079	{
3080	struct section_list *runp = collate->known_sections;
3081	char *name;
3082
3083	while (runp != NULL)
3084	if (strncmp (runp->name, arg->val.str.startmb,
3085	arg->val.str.lenmb) == `0`
3086	&& runp->name[arg->val.str.lenmb] == `'\0'`)
3087	break;
3088	else
3089	runp = runp->def_next;
3090
3091	if (runp != NULL)
3092	{
3093	lr_error (ldfile, _("duplicate definition of script `%s'"),
3094	runp->name);
3095	lr_ignore_rest (ldfile, `0`);
3096	break;
3097	}
3098
3099	runp = (struct section_list ) xcalloc (`1`, sizeof* (*runp));
3100	name = (char *) xmalloc (arg->val.str.lenmb + `1`);
3101	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3102	name[arg->val.str.lenmb] = `'\0'`;
3103	runp->name = name;
3104
3105	runp->def_next = collate->known_sections;
3106	collate->known_sections = runp;
3107	}
3108	lr_ignore_rest (ldfile, `1`);
3109	break;
3110
3111	case tok_order_start:
3112	/ Ignore the rest of the line if we don't need the input of*
3113	this line. /*
3114	if (ignore_content)
3115	{
3116	lr_ignore_rest (ldfile, `0`);
3117	break;
3118	}
3119
3120	if (state != `0` && state != `1` && state != `2`)
3121	goto err_label;
3122	state = `1`;
3123
3124	/ The 14652 draft does not specify whether all `order_start' lines*
3125	must contain the same number of sort-rules, but 14651 does. So
3126	we require this here as well. /*
3127	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3128	if (arg->tok == tok_bsymbol)
3129	{
3130	/ This better should be a section name. /
3131	struct section_list *sp = collate->known_sections;
3132	while (sp != NULL
3133	&& (sp->name == NULL
3134	\|\| strncmp (sp->name, arg->val.str.startmb,
3135	arg->val.str.lenmb) != `0`
3136	\|\| sp->name[arg->val.str.lenmb] != `'\0'`))
3137	sp = sp->def_next;
3138
3139	if (sp == NULL)
3140	{
3141	lr_error (ldfile, _("\
3142	%s: unknown section name `%.*s'"),
3143	"LC_COLLATE", (int) arg->val.str.lenmb,
3144	arg->val.str.startmb);
3145	/ We use the error section. /
3146	collate->current_section = &collate->error_section;
3147
3148	if (collate->error_section.first == NULL)
3149	{
3150	/ Insert &collate->error_section at the end of*
3151	the collate->sections list. /*
3152	if (collate->sections == NULL)
3153	collate->sections = &collate->error_section;
3154	else
3155	{
3156	sp = collate->sections;
3157	while (sp->next != NULL)
3158	sp = sp->next;
3159
3160	sp->next = &collate->error_section;
3161	}
3162	collate->error_section.next = NULL;
3163	}
3164	}
3165	else
3166	{
3167	/ One should not be allowed to open the same*
3168	section twice. /*
3169	if (sp->first != NULL)
3170	lr_error (ldfile, _("\
3171	%s: multiple order definitions for section `%s'"),
3172	"LC_COLLATE", sp->name);
3173	else
3174	{
3175	/ Insert sp in the collate->sections list,*
3176	right after collate->current_section. /*
3177	if (collate->current_section != NULL)
3178	{
3179	sp->next = collate->current_section->next;
3180	collate->current_section->next = sp;
3181	}
3182	else if (collate->sections == NULL)
3183	/ This is the first section to be defined. /
3184	collate->sections = sp;
3185
3186	collate->current_section = sp;
3187	}
3188
3189	/ Next should come the end of the line or a semicolon. /
3190	arg = lr_token (ldfile, charmap, result, repertoire,
3191	verbose);
3192	if (arg->tok == tok_eol)
3193	{
3194	uint32_t cnt;
3195
3196	/ This means we have exactly one rule: `forward'. /
3197	if (nrules > `1`)
3198	lr_error (ldfile, _("\
3199	%s: invalid number of sorting rules"),
3200	"LC_COLLATE");
3201	else
3202	nrules = `1`;
3203	sp->rules = obstack_alloc (&collate->mempool,
3204	(sizeof (enum coll_sort_rule)
3205	* nrules));
3206	for (cnt = `0`; cnt < nrules; ++cnt)
3207	sp->rules[cnt] = sort_forward;
3208
3209	/ Next line. /
3210	break;
3211	}
3212
3213	/ Get the next token. /
3214	arg = lr_token (ldfile, charmap, result, repertoire,
3215	verbose);
3216	}
3217	}
3218	else
3219	{
3220	/ There is no section symbol. Therefore we use the unnamed*
3221	section. /*
3222	collate->current_section = &collate->unnamed_section;
3223
3224	if (collate->unnamed_section_defined)
3225	lr_error (ldfile, _("\
3226	%s: multiple order definitions for unnamed section"),
3227	"LC_COLLATE");
3228	else
3229	{
3230	/ Insert &collate->unnamed_section at the beginning of*
3231	the collate->sections list. /*
3232	collate->unnamed_section.next = collate->sections;
3233	collate->sections = &collate->unnamed_section;
3234	collate->unnamed_section_defined = true;
3235	}
3236	}
3237
3238	/ Now read the direction names. /
3239	read_directions (ldfile, arg, charmap, repertoire, result);
3240
3241	/ From now we need the strings untranslated. /
3242	ldfile->translate_strings = `0`;
3243	break;
3244
3245	case tok_order_end:
3246	/ Ignore the rest of the line if we don't need the input of*
3247	this line. /*
3248	if (ignore_content)
3249	{
3250	lr_ignore_rest (ldfile, `0`);
3251	break;
3252	}
3253
3254	if (state != `1`)
3255	goto err_label;
3256
3257	/ Handle ellipsis at end of list. /
3258	if (was_ellipsis != tok_none)
3259	{
3260	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3261	repertoire, result);
3262	was_ellipsis = tok_none;
3263	}
3264
3265	state = `2`;
3266	lr_ignore_rest (ldfile, `1`);
3267	break;
3268
3269	case tok_reorder_after:
3270	/ Ignore the rest of the line if we don't need the input of*
3271	this line. /*
3272	if (ignore_content)
3273	{
3274	lr_ignore_rest (ldfile, `0`);
3275	break;
3276	}
3277
3278	if (state == `1`)
3279	{
3280	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3281	"LC_COLLATE");
3282	state = `2`;
3283
3284	/ Handle ellipsis at end of list. /
3285	if (was_ellipsis != tok_none)
3286	{
3287	handle_ellipsis (ldfile, arg->val.str.startmb,
3288	arg->val.str.lenmb, was_ellipsis, charmap,
3289	repertoire, result);
3290	was_ellipsis = tok_none;
3291	}
3292	}
3293	else if (state == `0` && copy_locale == NULL)
3294	goto err_label;
3295	else if (state != `0` && state != `2` && state != `3`)
3296	goto err_label;
3297	state = `3`;
3298
3299	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3300	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
3301	{
3302	/ Find this symbol in the sequence table. /
3303	char ucsbuf[`10`];
3304	char *startmb;
3305	size_t lenmb;
3306	struct element_t *insp;
3307	int no_error = `1`;
3308	void *ptr;
3309
3310	if (arg->tok == tok_bsymbol)
3311	{
3312	startmb = arg->val.str.startmb;
3313	lenmb = arg->val.str.lenmb;
3314	}
3315	else
3316	{
3317	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3318	startmb = ucsbuf;
3319	lenmb = `9`;
3320	}
3321
3322	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == `0`)
3323	/ Yes, the symbol exists. Simply point the cursor*
3324	to it. /*
3325	collate->cursor = (struct element_t *) ptr;
3326	else
3327	{
3328	struct symbol_t *symbp;
3329	void *ptr;
3330
3331	if (find_entry (&collate->sym_table, startmb, lenmb,
3332	&ptr) == `0`)
3333	{
3334	symbp = ptr;
3335
3336	if (symbp->order->last != NULL
3337	\|\| symbp->order->next != NULL)
3338	collate->cursor = symbp->order;
3339	else
3340	{
3341	/ This is a collating symbol but its position*
3342	is not yet defined. /*
3343	lr_error (ldfile, _("\
3344	%s: order for collating symbol %.*s not yet defined"),
3345	"LC_COLLATE", (int) lenmb, startmb);
3346	collate->cursor = NULL;
3347	no_error = `0`;
3348	}
3349	}
3350	else if (find_entry (&collate->elem_table, startmb, lenmb,
3351	&ptr) == `0`)
3352	{
3353	insp = (struct element_t *) ptr;
3354
3355	if (insp->last != NULL \|\| insp->next != NULL)
3356	collate->cursor = insp;
3357	else
3358	{
3359	/ This is a collating element but its position*
3360	is not yet defined. /*
3361	lr_error (ldfile, _("\
3362	%s: order for collating element %.*s not yet defined"),
3363	"LC_COLLATE", (int) lenmb, startmb);
3364	collate->cursor = NULL;
3365	no_error = `0`;
3366	}
3367	}
3368	else
3369	{
3370	/ This is bad. The symbol after which we have to*
3371	insert does not exist. /*
3372	lr_error (ldfile, _("\
3373	%s: cannot reorder after %.*s: symbol not known"),
3374	"LC_COLLATE", (int) lenmb, startmb);
3375	collate->cursor = NULL;
3376	no_error = `0`;
3377	}
3378	}
3379
3380	lr_ignore_rest (ldfile, no_error);
3381	}
3382	else
3383	/ This must not happen. /
3384	goto err_label;
3385	break;
3386
3387	case tok_reorder_end:
3388	/ Ignore the rest of the line if we don't need the input of*
3389	this line. /*
3390	if (ignore_content)
3391	break;
3392
3393	if (state != `3`)
3394	goto err_label;
3395	state = `4`;
3396	lr_ignore_rest (ldfile, `1`);
3397	break;
3398
3399	case tok_reorder_sections_after:
3400	/ Ignore the rest of the line if we don't need the input of*
3401	this line. /*
3402	if (ignore_content)
3403	{
3404	lr_ignore_rest (ldfile, `0`);
3405	break;
3406	}
3407
3408	if (state == `1`)
3409	{
3410	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3411	"LC_COLLATE");
3412	state = `2`;
3413
3414	/ Handle ellipsis at end of list. /
3415	if (was_ellipsis != tok_none)
3416	{
3417	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3418	repertoire, result);
3419	was_ellipsis = tok_none;
3420	}
3421	}
3422	else if (state == `3`)
3423	{
3424	record_error (`0`, `0`, _("\
3425	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3426	state = `4`;
3427	}
3428	else if (state != `2` && state != `4`)
3429	goto err_label;
3430	state = `5`;
3431
3432	/ Get the name of the sections we are adding after. /
3433	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3434	if (arg->tok == tok_bsymbol)
3435	{
3436	/ Now find a section with this name. /
3437	struct section_list *runp = collate->sections;
3438
3439	while (runp != NULL)
3440	{
3441	if (runp->name != NULL
3442	&& strlen (runp->name) == arg->val.str.lenmb
3443	&& memcmp (runp->name, arg->val.str.startmb,
3444	arg->val.str.lenmb) == `0`)
3445	break;
3446
3447	runp = runp->next;
3448	}
3449
3450	if (runp != NULL)
3451	collate->current_section = runp;
3452	else
3453	{
3454	/ This is bad. The section after which we have to*
3455	reorder does not exist. Therefore we cannot
3456	process the whole rest of this reorder
3457	specification. /*
3458	lr_error (ldfile, _("%s: section `%.*s' not known"),
3459	"LC_COLLATE", (int) arg->val.str.lenmb,
3460	arg->val.str.startmb);
3461
3462	do
3463	{
3464	lr_ignore_rest (ldfile, `0`);
3465
3466	now = lr_token (ldfile, charmap, result, NULL, verbose);
3467	}
3468	while (now->tok == tok_reorder_sections_after
3469	\|\| now->tok == tok_reorder_sections_end
3470	\|\| now->tok == tok_end);
3471
3472	/ Process the token we just saw. /
3473	nowtok = now->tok;
3474	continue;
3475	}
3476	}
3477	else
3478	/ This must not happen. /
3479	goto err_label;
3480	break;
3481
3482	case tok_reorder_sections_end:
3483	/ Ignore the rest of the line if we don't need the input of*
3484	this line. /*
3485	if (ignore_content)
3486	break;
3487
3488	if (state != `5`)
3489	goto err_label;
3490	state = `6`;
3491	lr_ignore_rest (ldfile, `1`);
3492	break;
3493
3494	case tok_bsymbol:
3495	case tok_ucs4:
3496	/ Ignore the rest of the line if we don't need the input of*
3497	this line. /*
3498	if (ignore_content)
3499	{
3500	lr_ignore_rest (ldfile, `0`);
3501	break;
3502	}
3503
3504	if (state != `0` && state != `1` && state != `3` && state != `5`)
3505	goto err_label;
3506
3507	if ((state == `0` \|\| state == `5`) && nowtok == tok_ucs4)
3508	goto err_label;
3509
3510	if (nowtok == tok_ucs4)
3511	{
3512	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3513	symstr = ucs4buf;
3514	symlen = `9`;
3515	}
3516	else if (arg != NULL)
3517	{
3518	symstr = arg->val.str.startmb;
3519	symlen = arg->val.str.lenmb;
3520	}
3521	else
3522	{
3523	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3524	(int) ldfile->token.val.str.lenmb,
3525	ldfile->token.val.str.startmb);
3526	break;
3527	}
3528
3529	struct element_t *seqp;
3530	if (state == `0`)
3531	{
3532	/ We are outside an `order_start' region. This means*
3533	we must only accept definitions of values for
3534	collation symbols since these are purely abstract
3535	values and don't need directions associated. /*
3536	void *ptr;
3537
3538	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3539	{
3540	seqp = ptr;
3541
3542	/ It's already defined. First check whether this*
3543	is really a collating symbol. /*
3544	if (seqp->is_character)
3545	goto err_label;
3546
3547	goto move_entry;
3548	}
3549	else
3550	{
3551	void *result;
3552
3553	if (find_entry (&collate->sym_table, symstr, symlen,
3554	&result) != `0`)
3555	/ No collating symbol, it's an error. /
3556	goto err_label;
3557
3558	/ Maybe this is the first time we define a symbol*
3559	value and it is before the first actual section. /*
3560	if (collate->sections == NULL)
3561	collate->sections = collate->current_section =
3562	&collate->symbol_section;
3563	}
3564
3565	if (was_ellipsis != tok_none)
3566	{
3567	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3568	charmap, repertoire, result);
3569
3570	/ Remember that we processed the ellipsis. /
3571	was_ellipsis = tok_none;
3572
3573	/ And don't add the value a second time. /
3574	break;
3575	}
3576	}
3577	else if (state == `3`)
3578	{
3579	/ It is possible that we already have this collation sequence.*
3580	In this case we move the entry. /*
3581	void *sym;
3582	void *ptr;
3583
3584	/ If the symbol after which we have to insert was not found*
3585	ignore all entries. /*
3586	if (collate->cursor == NULL)
3587	{
3588	lr_ignore_rest (ldfile, `0`);
3589	break;
3590	}
3591
3592	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3593	{
3594	seqp = (struct element_t *) ptr;
3595	goto move_entry;
3596	}
3597
3598	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == `0`
3599	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
3600	goto move_entry;
3601
3602	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == `0`
3603	&& (seqp = (struct element_t *) ptr,
3604	seqp->last != NULL \|\| seqp->next != NULL
3605	\|\| (collate->start != NULL && seqp == collate->start)))
3606	{
3607	move_entry:
3608	/ Remove the entry from the old position. /
3609	if (seqp->last == NULL)
3610	collate->start = seqp->next;
3611	else
3612	seqp->last->next = seqp->next;
3613	if (seqp->next != NULL)
3614	seqp->next->last = seqp->last;
3615
3616	/ We also have to check whether this entry is the*
3617	first or last of a section. /*
3618	if (seqp->section->first == seqp)
3619	{
3620	if (seqp->section->first == seqp->section->last)
3621	/ This section has no content anymore. /
3622	seqp->section->first = seqp->section->last = NULL;
3623	else
3624	seqp->section->first = seqp->next;
3625	}
3626	else if (seqp->section->last == seqp)
3627	seqp->section->last = seqp->last;
3628
3629	/ Now insert it in the new place. /
3630	insert_weights (ldfile, seqp, charmap, repertoire, result,
3631	tok_none);
3632	break;
3633	}
3634
3635	/ Otherwise we just add a new entry. /
3636	}
3637	else if (state == `5`)
3638	{
3639	/ We are reordering sections. Find the named section. /
3640	struct section_list *runp = collate->sections;
3641	struct section_list *prevp = NULL;
3642
3643	while (runp != NULL)
3644	{
3645	if (runp->name != NULL
3646	&& strlen (runp->name) == symlen
3647	&& memcmp (runp->name, symstr, symlen) == `0`)
3648	break;
3649
3650	prevp = runp;
3651	runp = runp->next;
3652	}
3653
3654	if (runp == NULL)
3655	{
3656	lr_error (ldfile, _("%s: section `%.*s' not known"),
3657	"LC_COLLATE", (int) symlen, symstr);
3658	lr_ignore_rest (ldfile, `0`);
3659	}
3660	else
3661	{
3662	if (runp != collate->current_section)
3663	{
3664	/ Remove the named section from the old place and*
3665	insert it in the new one. /*
3666	prevp->next = runp->next;
3667
3668	runp->next = collate->current_section->next;
3669	collate->current_section->next = runp;
3670	collate->current_section = runp;
3671	}
3672
3673	/ Process the rest of the line which might change*
3674	the collation rules. /*
3675	arg = lr_token (ldfile, charmap, result, repertoire,
3676	verbose);
3677	if (arg->tok != tok_eof && arg->tok != tok_eol)
3678	read_directions (ldfile, arg, charmap, repertoire,
3679	result);
3680	}
3681	break;
3682	}
3683	else if (was_ellipsis != tok_none)
3684	{
3685	/ Using the information in the `ellipsis_weight'*
3686	element and this and the last value we have to handle
3687	the ellipsis now. /*
3688	assert (state == `1`);
3689
3690	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3691	repertoire, result);
3692
3693	/ Remember that we processed the ellipsis. /
3694	was_ellipsis = tok_none;
3695
3696	/ And don't add the value a second time. /
3697	break;
3698	}
3699
3700	/ Now insert in the new place. /
3701	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3702	break;
3703
3704	case tok_undefined:
3705	/ Ignore the rest of the line if we don't need the input of*
3706	this line. /*
3707	if (ignore_content)
3708	{
3709	lr_ignore_rest (ldfile, `0`);
3710	break;
3711	}
3712
3713	if (state != `1`)
3714	goto err_label;
3715
3716	if (was_ellipsis != tok_none)
3717	{
3718	lr_error (ldfile,
3719	_("%s: cannot have `%s' as end of ellipsis range"),
3720	"LC_COLLATE", "UNDEFINED");
3721
3722	unlink_element (collate);
3723	was_ellipsis = tok_none;
3724	}
3725
3726	/ See whether UNDEFINED already appeared somewhere. /
3727	if (collate->undefined.next != NULL
3728	\|\| &collate->undefined == collate->cursor)
3729	{
3730	lr_error (ldfile,
3731	_("%s: order for `%.*s' already defined at %s:%Zu"),
3732	"LC_COLLATE", `9`, "UNDEFINED",
3733	collate->undefined.file,
3734	collate->undefined.line);
3735	lr_ignore_rest (ldfile, `0`);
3736	}
3737	else
3738	/ Parse the weights. /
3739	insert_weights (ldfile, &collate->undefined, charmap,
3740	repertoire, result, tok_none);
3741	break;
3742
3743	case tok_ellipsis2: / symbolic hexadecimal ellipsis /
3744	case tok_ellipsis3: / absolute ellipsis /
3745	case tok_ellipsis4: / symbolic decimal ellipsis /
3746	/ This is the symbolic (decimal or hexadecimal) or absolute*
3747	ellipsis. /*
3748	if (was_ellipsis != tok_none)
3749	goto err_label;
3750
3751	if (state != `0` && state != `1` && state != `3`)
3752	goto err_label;
3753
3754	was_ellipsis = nowtok;
3755
3756	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3757	repertoire, result, nowtok);
3758	break;
3759
3760	case tok_end:
3761	seen_end:
3762	/ Next we assume `LC_COLLATE'. /
3763	if (!ignore_content)
3764	{
3765	if (state == `0`
3766	&& copy_locale == NULL
3767	&& !collate->codepoint_collation)
3768	/ We must either see a copy statement or have*
3769	ordering values, or codepoint_collation. /*
3770	lr_error (ldfile,
3771	_("%s: empty category description not allowed"),
3772	"LC_COLLATE");
3773	else if (state == `1`)
3774	{
3775	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3776	"LC_COLLATE");
3777
3778	/ Handle ellipsis at end of list. /
3779	if (was_ellipsis != tok_none)
3780	{
3781	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3782	repertoire, result);
3783	was_ellipsis = tok_none;
3784	}
3785	}
3786	else if (state == `3`)
3787	record_error (`0`, `0`, _("\
3788	%s: missing `reorder-end' keyword"), "LC_COLLATE");
3789	else if (state == `5`)
3790	record_error (`0`, `0`, _("\
3791	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3792	}
3793	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3794	if (arg->tok == tok_eof)
3795	break;
3796	if (arg->tok == tok_eol)
3797	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3798	else if (arg->tok != tok_lc_collate)
3799	lr_error (ldfile, _("\
3800	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3801	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3802	return;
3803
3804	case tok_define:
3805	if (ignore_content)
3806	{
3807	lr_ignore_rest (ldfile, `0`);
3808	break;
3809	}
3810
3811	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3812	if (arg->tok != tok_ident)
3813	goto err_label;
3814
3815	/ Simply add the new symbol. /
3816	struct name_list newsym = xmalloc (sizeof* (*newsym)
3817	+ arg->val.str.lenmb + `1`);
3818	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3819	newsym->str[arg->val.str.lenmb] = `'\0'`;
3820	newsym->next = defined;
3821	defined = newsym;
3822
3823	lr_ignore_rest (ldfile, `1`);
3824	break;
3825
3826	case tok_undef:
3827	if (ignore_content)
3828	{
3829	lr_ignore_rest (ldfile, `0`);
3830	break;
3831	}
3832
3833	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3834	if (arg->tok != tok_ident)
3835	goto err_label;
3836
3837	/ Remove _all_ occurrences of the symbol from the list. /
3838	struct name_list *prevdef = NULL;
3839	struct name_list *curdef = defined;
3840	while (curdef != NULL)
3841	if (strncmp (arg->val.str.startmb, curdef->str,
3842	arg->val.str.lenmb) == `0`
3843	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3844	{
3845	if (prevdef == NULL)
3846	defined = curdef->next;
3847	else
3848	prevdef->next = curdef->next;
3849
3850	struct name_list *olddef = curdef;
3851	curdef = curdef->next;
3852
3853	free (olddef);
3854	}
3855	else
3856	{
3857	prevdef = curdef;
3858	curdef = curdef->next;
3859	}
3860
3861	lr_ignore_rest (ldfile, `1`);
3862	break;
3863
3864	case tok_ifdef:
3865	case tok_ifndef:
3866	if (ignore_content)
3867	{
3868	lr_ignore_rest (ldfile, `0`);
3869	break;
3870	}
3871
3872	found_ifdef:
3873	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3874	if (arg->tok != tok_ident)
3875	goto err_label;
3876	lr_ignore_rest (ldfile, `1`);
3877
3878	if (collate->else_action == else_none)
3879	{
3880	curdef = defined;
3881	while (curdef != NULL)
3882	if (strncmp (arg->val.str.startmb, curdef->str,
3883	arg->val.str.lenmb) == `0`
3884	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3885	break;
3886	else
3887	curdef = curdef->next;
3888
3889	if ((nowtok == tok_ifdef && curdef != NULL)
3890	\|\| (nowtok == tok_ifndef && curdef == NULL))
3891	{
3892	/ We have to use the if-branch. /
3893	collate->else_action = else_ignore;
3894	}
3895	else
3896	{
3897	/ We have to use the else-branch, if there is one. /
3898	nowtok = skip_to (ldfile, collate, charmap, `0`);
3899	if (nowtok == tok_else)
3900	collate->else_action = else_seen;
3901	else if (nowtok == tok_elifdef)
3902	{
3903	nowtok = tok_ifdef;
3904	goto found_ifdef;
3905	}
3906	else if (nowtok == tok_elifndef)
3907	{
3908	nowtok = tok_ifndef;
3909	goto found_ifdef;
3910	}
3911	else if (nowtok == tok_eof)
3912	goto seen_eof;
3913	else if (nowtok == tok_end)
3914	goto seen_end;
3915	}
3916	}
3917	else
3918	{
3919	/ XXX Should it really become necessary to support nested*
3920	preprocessor handling we will push the state here. /*
3921	lr_error (ldfile, _("%s: nested conditionals not supported"),
3922	"LC_COLLATE");
3923	nowtok = skip_to (ldfile, collate, charmap, `1`);
3924	if (nowtok == tok_eof)
3925	goto seen_eof;
3926	else if (nowtok == tok_end)
3927	goto seen_end;
3928	}
3929	break;
3930
3931	case tok_elifdef:
3932	case tok_elifndef:
3933	case tok_else:
3934	if (ignore_content)
3935	{
3936	lr_ignore_rest (ldfile, `0`);
3937	break;
3938	}
3939
3940	lr_ignore_rest (ldfile, `1`);
3941
3942	if (collate->else_action == else_ignore)
3943	{
3944	/ Ignore everything until the endif. /
3945	nowtok = skip_to (ldfile, collate, charmap, `1`);
3946	if (nowtok == tok_eof)
3947	goto seen_eof;
3948	else if (nowtok == tok_end)
3949	goto seen_end;
3950	}
3951	else
3952	{
3953	assert (collate->else_action == else_none);
3954	lr_error (ldfile, _("\
3955	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3956	nowtok == tok_else ? "else"
3957	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3958	}
3959	break;
3960
3961	case tok_endif:
3962	if (ignore_content)
3963	{
3964	lr_ignore_rest (ldfile, `0`);
3965	break;
3966	}
3967
3968	lr_ignore_rest (ldfile, `1`);
3969
3970	if (collate->else_action != else_ignore
3971	&& collate->else_action != else_seen)
3972	lr_error (ldfile, _("\
3973	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3974
3975	/ XXX If we support nested preprocessor directives we pop*
3976	the state here. /*
3977	collate->else_action = else_none;
3978	break;
3979
3980	default:
3981	err_label:
3982	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3983	}
3984
3985	/ Prepare for the next round. /
3986	now = lr_token (ldfile, charmap, result, NULL, verbose);
3987	nowtok = now->tok;
3988	}
3989
3990	seen_eof:
3991	/ When we come here we reached the end of the file. /
3992	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3993	}
3994

Browse the source code of glibc/locale/programs/ld-collate.c