ld-ctype.c source code [glibc/locale/programs/ld-ctype.c]

1	/ Copyright (C) 1995-2019 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <http://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <alloca.h>
23	#include <byteswap.h>
24	#include <endian.h>
25	#include <errno.h>
26	#include <limits.h>
27	#include <obstack.h>
28	#include <stdlib.h>
29	#include <string.h>
30	#include <wchar.h>
31	#include <wctype.h>
32	#include <stdint.h>
33	#include <sys/uio.h>
34
35	#include "localedef.h"
36	#include "charmap.h"
37	#include "localeinfo.h"
38	#include "langinfo.h"
39	#include "linereader.h"
40	#include "locfile-token.h"
41	#include "locfile.h"
42
43	#include <assert.h>
44
45
46	/ The bit used for representing a special class. /
47	#define BITPOS(class) ((class) - tok_upper)
48	#define BIT(class) (_ISbit (BITPOS (class)))
49	#define BITw(class) (_ISwbit (BITPOS (class)))
50
51	#define ELEM(ctype, collection, idx, value) \
52	*find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53	&ctype->collection##_act idx, value)
54
55
56	/ To be compatible with former implementations we for now restrict*
57	the number of bits for character classes to 16. When compatibility
58	is not necessary anymore increase the number to 32. /*
59	#define char_class_t uint16_t
60	#define char_class32_t uint32_t
61
62
63	/ Type to describe a transliteration action. We have a possibly*
64	multiple character from-string and a set of multiple character
65	to-strings. All are 32bit values since this is what is used in
66	the gconv functions. /*
67	struct translit_to_t
68	{
69	uint32_t *str;
70
71	struct translit_to_t *next;
72	};
73
74	struct translit_t
75	{
76	uint32_t *from;
77
78	const char *fname;
79	size_t lineno;
80
81	struct translit_to_t *to;
82
83	struct translit_t *next;
84	};
85
86	struct translit_ignore_t
87	{
88	uint32_t from;
89	uint32_t to;
90	uint32_t step;
91
92	const char *fname;
93	size_t lineno;
94
95	struct translit_ignore_t *next;
96	};
97
98
99	/ Type to describe a transliteration include statement. /
100	struct translit_include_t
101	{
102	const char *copy_locale;
103	const char *copy_repertoire;
104
105	struct translit_include_t *next;
106	};
107
108	/ Provide some dummy pointer for empty string. /
109	static uint32_t no_str[] = { `0` };
110
111
112	/ Sparse table of uint32_t. /
113	#define TABLE idx_table
114	#define ELEMENT uint32_t
115	#define DEFAULT ((uint32_t) ~0)
116	#define NO_ADD_LOCALE
117	#include "3level.h"
118
119	#define TABLE wcwidth_table
120	#define ELEMENT uint8_t
121	#define DEFAULT 0xff
122	#include "3level.h"
123
124	#define TABLE wctrans_table
125	#define ELEMENT int32_t
126	#define DEFAULT 0
127	#define wctrans_table_add wctrans_table_add_internal
128	#include "3level.h"
129	#undef wctrans_table_add
130	/ The wctrans_table must actually store the difference between the*
131	desired result and the argument. /*
132	static inline void
133	wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134	{
135	wctrans_table_add_internal (t, wc, mapped_wc - wc);
136	}
137
138	/ Construction of sparse 3-level tables.*
139	See wchar-lookup.h for their structure and the meaning of p and q. /*
140
141	struct wctype_table
142	{
143	/ Parameters. /
144	unsigned int p;
145	unsigned int q;
146	/ Working representation. /
147	size_t level1_alloc;
148	size_t level1_size;
149	uint32_t *level1;
150	size_t level2_alloc;
151	size_t level2_size;
152	uint32_t *level2;
153	size_t level3_alloc;
154	size_t level3_size;
155	uint32_t *level3;
156	size_t result_size;
157	};
158
159	static void add_locale_wctype_table (struct locale_file *file,
160	struct wctype_table *t);
161
162	/ The real definition of the struct for the LC_CTYPE locale. /
163	struct locale_ctype_t
164	{
165	uint32_t *charnames;
166	size_t charnames_max;
167	size_t charnames_act;
168	/ An index lookup table, to speedup find_idx. /
169	struct idx_table charnames_idx;
170
171	struct repertoire_t *repertoire;
172
173	/ We will allow up to 8 * sizeof (uint32_t) character classes. /
174	#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
175	size_t nr_charclass;
176	const char *classnames[MAX_NR_CHARCLASS];
177	uint32_t last_class_char;
178	uint32_t class256_collection[`256`];
179	uint32_t *class_collection;
180	size_t class_collection_max;
181	size_t class_collection_act;
182	uint32_t class_done;
183	uint32_t class_offset;
184
185	struct charseq **mbdigits;
186	size_t mbdigits_act;
187	size_t mbdigits_max;
188	uint32_t *wcdigits;
189	size_t wcdigits_act;
190	size_t wcdigits_max;
191
192	struct charseq *mboutdigits[`10`];
193	uint32_t wcoutdigits[`10`];
194	size_t outdigits_act;
195
196	/ If the following number ever turns out to be too small simply*
197	increase it. But I doubt it will. --drepper@gnu /*
198	#define MAX_NR_CHARMAP 16
199	const char *mapnames[MAX_NR_CHARMAP];
200	uint32_t *map_collection[MAX_NR_CHARMAP];
201	uint32_t map256_collection[`2`][`256`];
202	size_t map_collection_max[MAX_NR_CHARMAP];
203	size_t map_collection_act[MAX_NR_CHARMAP];
204	size_t map_collection_nr;
205	size_t last_map_idx;
206	int tomap_done[MAX_NR_CHARMAP];
207	uint32_t map_offset;
208
209	/ Transliteration information. /
210	struct translit_include_t *translit_include;
211	struct translit_t *translit;
212	struct translit_ignore_t *translit_ignore;
213	uint32_t ntranslit_ignore;
214
215	uint32_t *default_missing;
216	const char *default_missing_file;
217	size_t default_missing_lineno;
218
219	uint32_t to_nonascii;
220	uint32_t nonascii_case;
221
222	/ The arrays for the binary representation. /
223	char_class_t *ctype_b;
224	char_class32_t *ctype32_b;
225	uint32_t **map_b;
226	uint32_t **map32_b;
227	uint32_t **class_b;
228	struct wctype_table *class_3level;
229	struct wctrans_table *map_3level;
230	uint32_t *class_name_ptr;
231	uint32_t *map_name_ptr;
232	struct wcwidth_table width;
233	uint32_t mb_cur_max;
234	const char *codeset_name;
235	uint32_t *translit_from_idx;
236	uint32_t *translit_from_tbl;
237	uint32_t *translit_to_idx;
238	uint32_t *translit_to_tbl;
239	uint32_t translit_idx_size;
240	size_t translit_from_tbl_size;
241	size_t translit_to_tbl_size;
242
243	struct obstack mempool;
244	};
245
246
247	/ Marker for an empty slot. This has the value 0xFFFFFFFF, regardless*
248	whether 'int' is 16 bit, 32 bit, or 64 bit. /*
249	#define EMPTY ((uint32_t) ~0)
250
251
252	#define obstack_chunk_alloc xmalloc
253	#define obstack_chunk_free free
254
255
256	/ Prototypes for local functions. /
257	static void ctype_startup (struct linereader lr, struct* localedef_t *locale,
258	const struct charmap_t *charmap,
259	struct localedef_t *copy_locale,
260	int ignore_content);
261	static void ctype_class_new (struct linereader *lr,
262	struct locale_ctype_t ctype, const* char *name);
263	static void ctype_map_new (struct linereader *lr,
264	struct locale_ctype_t *ctype,
265	const char name, const* struct charmap_t *charmap);
266	static uint32_t find_idx (struct* locale_ctype_t ctype, uint32_t *table,
267	size_t max, size_t act, uint32_t idx);
268	static void set_class_defaults (struct locale_ctype_t *ctype,
269	const struct charmap_t *charmap,
270	struct repertoire_t *repertoire);
271	static void allocate_arrays (struct locale_ctype_t *ctype,
272	const struct charmap_t *charmap,
273	struct repertoire_t *repertoire);
274
275
276	static const char *longnames[] =
277	{
278	"zero", "one", "two", "three", "four",
279	"five", "six", "seven", "eight", "nine"
280	};
281	static const char *uninames[] =
282	{
283	"U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284	"U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285	};
286	static const unsigned char digits[] = "0123456789";
287
288
289	static void
290	ctype_startup (struct linereader lr, struct* localedef_t *locale,
291	const struct charmap_t *charmap,
292	struct localedef_t copy_locale, int* ignore_content)
293	{
294	unsigned int cnt;
295	struct locale_ctype_t *ctype;
296
297	if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
298	{
299	if (copy_locale == NULL)
300	{
301	/ Allocate the needed room. /
302	locale->categories[LC_CTYPE].ctype = ctype =
303	(struct locale_ctype_t *) xcalloc (`1`,
304	sizeof (struct locale_ctype_t));
305
306	/ We have seen no names yet. /
307	ctype->charnames_max = charmap->mb_cur_max == `1` ? `256` : `512`;
308	ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309	* sizeof (uint32_t));
310	for (cnt = `0`; cnt < `256`; ++cnt)
311	ctype->charnames[cnt] = cnt;
312	ctype->charnames_act = `256`;
313	idx_table_init (&ctype->charnames_idx);
314
315	/ Fill character class information. /
316	ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317	/ The order of the following instructions determines the bit*
318	positions! /*
319	ctype_class_new (lr, ctype, "upper");
320	ctype_class_new (lr, ctype, "lower");
321	ctype_class_new (lr, ctype, "alpha");
322	ctype_class_new (lr, ctype, "digit");
323	ctype_class_new (lr, ctype, "xdigit");
324	ctype_class_new (lr, ctype, "space");
325	ctype_class_new (lr, ctype, "print");
326	ctype_class_new (lr, ctype, "graph");
327	ctype_class_new (lr, ctype, "blank");
328	ctype_class_new (lr, ctype, "cntrl");
329	ctype_class_new (lr, ctype, "punct");
330	ctype_class_new (lr, ctype, "alnum");
331
332	ctype->class_collection_max = charmap->mb_cur_max == `1` ? `256` : `512`;
333	ctype->class_collection
334	= (uint32_t ) xcalloc (sizeof* (unsigned long int),
335	ctype->class_collection_max);
336	ctype->class_collection_act = `256`;
337
338	/ Fill character map information. /
339	ctype->last_map_idx = MAX_NR_CHARMAP;
340	ctype_map_new (lr, ctype, "toupper", charmap);
341	ctype_map_new (lr, ctype, "tolower", charmap);
342
343	/ Fill first 256 entries in `toXXX' arrays. /
344	for (cnt = `0`; cnt < `256`; ++cnt)
345	{
346	ctype->map_collection[`0`][cnt] = cnt;
347	ctype->map_collection[`1`][cnt] = cnt;
348
349	ctype->map256_collection[`0`][cnt] = cnt;
350	ctype->map256_collection[`1`][cnt] = cnt;
351	}
352
353	if (enc_not_ascii_compatible)
354	ctype->to_nonascii = `1`;
355
356	obstack_init (&ctype->mempool);
357	}
358	else
359	ctype = locale->categories[LC_CTYPE].ctype =
360	copy_locale->categories[LC_CTYPE].ctype;
361	}
362	}
363
364
365	void
366	ctype_finish (struct localedef_t locale, const* struct charmap_t *charmap)
367	{
368	/ See POSIX.2, table 2-6 for the meaning of the following table. /
369	#define NCLASS 12
370	static const struct
371	{
372	const char *name;
373	const char allow[NCLASS];
374	}
375	valid_table[NCLASS] =
376	{
377	/ The order is important. See token.h for more information.*
378	M = Always, D = Default, - = Permitted, X = Mutually exclusive /*
379	{ "upper", "--MX-XDDXXX-" },
380	{ "lower", "--MX-XDDXXX-" },
381	{ "alpha", "---X-XDDXXX-" },
382	{ "digit", "XXX--XDDXXX-" },
383	{ "xdigit", "-----XDDXXX-" },
384	{ "space", "XXXXX------X" },
385	{ "print", "---------X--" },
386	{ "graph", "---------X--" },
387	{ "blank", "XXXXXM-----X" },
388	{ "cntrl", "XXXXX-XX--XX" },
389	{ "punct", "XXXXX-DD-X-X" },
390	{ "alnum", "-----XDDXXX-" }
391	};
392	size_t cnt;
393	int cls1, cls2;
394	uint32_t space_value;
395	struct charseq *space_seq;
396	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
397	int warned;
398	const void *key;
399	size_t len;
400	void *vdata;
401	void *curs;
402
403	/ Now resolve copying and also handle completely missing definitions. /
404	if (ctype == NULL)
405	{
406	const char *repertoire_name;
407
408	/ First see whether we were supposed to copy. If yes, find the*
409	actual definition. /*
410	if (locale->copy_name[LC_CTYPE] != NULL)
411	{
412	/ Find the copying locale. This has to happen transitively since*
413	the locale we are copying from might also copying another one. /*
414	struct localedef_t *from = locale;
415
416	do
417	from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418	from->repertoire_name, charmap);
419	while (from->categories[LC_CTYPE].ctype == NULL
420	&& from->copy_name[LC_CTYPE] != NULL);
421
422	ctype = locale->categories[LC_CTYPE].ctype
423	= from->categories[LC_CTYPE].ctype;
424	}
425
426	/ If there is still no definition issue an warning and create an*
427	empty one. /*
428	if (ctype == NULL)
429	{
430	record_warning (_("\
431	No definition for %s category found"), "LC_CTYPE");
432	ctype_startup (NULL, locale, charmap, NULL, `0`);
433	ctype = locale->categories[LC_CTYPE].ctype;
434	}
435
436	/ Get the repertoire we have to use. /
437	repertoire_name = locale->repertoire_name ?: repertoire_global;
438	if (repertoire_name != NULL)
439	ctype->repertoire = repertoire_read (repertoire_name);
440	}
441
442	/ We need the name of the currently used 8-bit character set to*
443	make correct conversion between this 8-bit representation and the
444	ISO 10646 character set used internally for wide characters. /*
445	ctype->codeset_name = charmap->code_set_name;
446	if (ctype->codeset_name == NULL)
447	{
448	record_error (`0`, `0`, _("\
449	No character set name specified in charmap"));
450	ctype->codeset_name = "//UNKNOWN//";
451	}
452
453	/ Set default value for classes not specified. /
454	set_class_defaults (ctype, charmap, ctype->repertoire);
455
456	/ Check according to table. /
457	for (cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
458	{
459	uint32_t tmp = ctype->class_collection[cnt];
460
461	if (tmp != `0`)
462	{
463	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
464	if ((tmp & _ISwbit (cls1)) != `0`)
465	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
466	if (valid_table[cls1].allow[cls2] != `'-'`)
467	{
468	int eq = (tmp & _ISwbit (cls2)) != `0`;
469	switch (valid_table[cls1].allow[cls2])
470	{
471	case `'M'`:
472	if (!eq)
473	{
474	uint32_t value = ctype->charnames[cnt];
475
476	record_error (`0`, `0`, _("\
477	character L'\\u%0*x' in class `%s' must be in class `%s'"),
478	value > `0xffff` ? `8` : `4`,
479	value,
480	valid_table[cls1].name,
481	valid_table[cls2].name);
482	}
483	break;
484
485	case `'X'`:
486	if (eq)
487	{
488	uint32_t value = ctype->charnames[cnt];
489
490	record_error (`0`, `0`, _("\
491	character L'\\u%0*x' in class `%s' must not be in class `%s'"),
492	value > `0xffff` ? `8` : `4`,
493	value,
494	valid_table[cls1].name,
495	valid_table[cls2].name);
496	}
497	break;
498
499	case `'D'`:
500	ctype->class_collection[cnt] \|= _ISwbit (cls2);
501	break;
502
503	default:
504	record_error (`5`, `0`, _("\
505	internal error in %s, line %u"), __FUNCTION__, __LINE__);
506	}
507	}
508	}
509	}
510
511	for (cnt = `0`; cnt < `256`; ++cnt)
512	{
513	uint32_t tmp = ctype->class256_collection[cnt];
514
515	if (tmp != `0`)
516	{
517	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
518	if ((tmp & _ISbit (cls1)) != `0`)
519	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
520	if (valid_table[cls1].allow[cls2] != `'-'`)
521	{
522	int eq = (tmp & _ISbit (cls2)) != `0`;
523	switch (valid_table[cls1].allow[cls2])
524	{
525	case `'M'`:
526	if (!eq)
527	{
528	char buf[`17`];
529
530	snprintf (buf, sizeof buf, "\\%Zo", cnt);
531
532	record_error (`0`, `0`, _("\
533	character '%s' in class `%s' must be in class `%s'"),
534	buf,
535	valid_table[cls1].name,
536	valid_table[cls2].name);
537	}
538	break;
539
540	case `'X'`:
541	if (eq)
542	{
543	char buf[`17`];
544
545	snprintf (buf, sizeof buf, "\\%Zo", cnt);
546
547	record_error (`0`, `0`, _("\
548	character '%s' in class `%s' must not be in class `%s'"),
549	buf,
550	valid_table[cls1].name,
551	valid_table[cls2].name);
552	}
553	break;
554
555	case `'D'`:
556	ctype->class256_collection[cnt] \|= _ISbit (cls2);
557	break;
558
559	default:
560	record_error (`5`, `0`, _("\
561	internal error in %s, line %u"), __FUNCTION__, __LINE__);
562	}
563	}
564	}
565	}
566
567	/ ... and now test <SP> as a special case. /
568	space_value = `32`;
569	if (((cnt = BITPOS (tok_space),
570	(ELEM (ctype, class_collection, , space_value)
571	& BITw (tok_space)) == `0`)
572	\|\| (cnt = BITPOS (tok_blank),
573	(ELEM (ctype, class_collection, , space_value)
574	& BITw (tok_blank)) == `0`)))
575	{
576	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
577	valid_table[cnt].name);
578	}
579	else if (((cnt = BITPOS (tok_punct),
580	(ELEM (ctype, class_collection, , space_value)
581	& BITw (tok_punct)) != `0`)
582	\|\| (cnt = BITPOS (tok_graph),
583	(ELEM (ctype, class_collection, , space_value)
584	& BITw (tok_graph))
585	!= `0`)))
586	{
587	record_error (`0`, `0`, _("\
588	<SP> character must not be in class `%s'"),
589	valid_table[cnt].name);
590	}
591	else
592	ELEM (ctype, class_collection, , space_value) \|= BITw (tok_print);
593
594	space_seq = charmap_find_value (charmap, "SP", `2`);
595	if (space_seq == NULL)
596	space_seq = charmap_find_value (charmap, "space", `5`);
597	if (space_seq == NULL)
598	space_seq = charmap_find_value (charmap, "U00000020", `9`);
599	if (space_seq == NULL \|\| space_seq->nbytes != `1`)
600	{
601	record_error (`0`, `0`, _("\
602	character <SP> not defined in character map"));
603	}
604	else if (((cnt = BITPOS (tok_space),
605	(ctype->class256_collection[space_seq->bytes[`0`]]
606	& BIT (tok_space)) == `0`)
607	\|\| (cnt = BITPOS (tok_blank),
608	(ctype->class256_collection[space_seq->bytes[`0`]]
609	& BIT (tok_blank)) == `0`)))
610	{
611	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
612	valid_table[cnt].name);
613	}
614	else if (((cnt = BITPOS (tok_punct),
615	(ctype->class256_collection[space_seq->bytes[`0`]]
616	& BIT (tok_punct)) != `0`)
617	\|\| (cnt = BITPOS (tok_graph),
618	(ctype->class256_collection[space_seq->bytes[`0`]]
619	& BIT (tok_graph)) != `0`)))
620	{
621	record_error (`0`, `0`, _("\
622	<SP> character must not be in class `%s'"),
623	valid_table[cnt].name);
624	}
625	else
626	ctype->class256_collection[space_seq->bytes[`0`]] \|= BIT (tok_print);
627
628	/ Check whether all single-byte characters make to their upper/lowercase*
629	equivalent according to the ASCII rules. /*
630	for (cnt = `'A'`; cnt <= `'Z'`; ++cnt)
631	{
632	uint32_t uppval = ctype->map256_collection[`0`][cnt];
633	uint32_t lowval = ctype->map256_collection[`1`][cnt];
634	uint32_t lowuppval = ctype->map256_collection[`0`][lowval];
635	uint32_t lowlowval = ctype->map256_collection[`1`][lowval];
636
637	if (uppval != cnt
638	\|\| lowval != cnt + `0x20`
639	\|\| lowuppval != cnt
640	\|\| lowlowval != cnt + `0x20`)
641	ctype->nonascii_case = `1`;
642	}
643	for (cnt = `0`; cnt < `256`; ++cnt)
644	if (cnt < `'A'` \|\| (cnt > `'Z'` && cnt < `'a'`) \|\| cnt > `'z'`)
645	if (ctype->map256_collection[`0`][cnt] != cnt
646	\|\| ctype->map256_collection[`1`][cnt] != cnt)
647	ctype->nonascii_case = `1`;
648
649	/ Now that the tests are done make sure the name array contains all*
650	characters which are handled in the WIDTH section of the
651	character set definition file. /*
652	if (charmap->width_rules != NULL)
653	for (cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
654	{
655	unsigned char bytes[charmap->mb_cur_max];
656	int nbytes = charmap->width_rules[cnt].from->nbytes;
657
658	/ We have the range of character for which the width is*
659	specified described using byte sequences of the multibyte
660	charset. We have to convert this to UCS4 now. And we
661	cannot simply convert the beginning and the end of the
662	sequence, we have to iterate over the byte sequence and
663	convert it for every single character. /*
664	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
665
666	while (nbytes < charmap->width_rules[cnt].to->nbytes
667	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
668	nbytes) <= `0`)
669	{
670	/ Find the UCS value for `bytes'. /
671	int inner;
672	uint32_t wch;
673	struct charseq *seq
674	= charmap_find_symbol (charmap, (char *) bytes, nbytes);
675
676	if (seq == NULL)
677	wch = ILLEGAL_CHAR_VALUE;
678	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
679	wch = seq->ucs4;
680	else
681	wch = repertoire_find_value (ctype->repertoire, seq->name,
682	strlen (seq->name));
683
684	if (wch != ILLEGAL_CHAR_VALUE)
685	/ We are only interested in the side-effects of the*
686	`find_idx' call. It will add appropriate entries in
687	the name array if this is necessary. /*
688	(void) find_idx (ctype, NULL, NULL, NULL, wch);
689
690	/ "Increment" the bytes sequence. /
691	inner = nbytes - `1`;
692	while (inner >= `0` && bytes[inner] == `0xff`)
693	--inner;
694
695	if (inner < `0`)
696	{
697	/ We have to extend the byte sequence. /
698	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
699	break;
700
701	bytes[`0`] = `1`;
702	memset (&bytes[`1`], `0`, nbytes);
703	++nbytes;
704	}
705	else
706	{
707	++bytes[inner];
708	while (++inner < nbytes)
709	bytes[inner] = `0`;
710	}
711	}
712	}
713
714	/ Now set all the other characters of the character set to the*
715	default width. /*
716	curs = NULL;
717	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
718	{
719	struct charseq data = (struct* charseq *) vdata;
720
721	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
722	data->ucs4 = repertoire_find_value (ctype->repertoire,
723	data->name, len);
724
725	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
726	(void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
727	}
728
729	/ There must be a multiple of 10 digits. /
730	if (ctype->mbdigits_act % `10` != `0`)
731	{
732	assert (ctype->mbdigits_act == ctype->wcdigits_act);
733	ctype->wcdigits_act -= ctype->mbdigits_act % `10`;
734	ctype->mbdigits_act -= ctype->mbdigits_act % `10`;
735	record_error (`0`, `0`, _("\
736	`digit' category has not entries in groups of ten"));
737	}
738
739	/ Check the input digits. There must be a multiple of ten available.*
740	In each group it could be that one or the other character is missing.
741	In this case the whole group must be removed. /*
742	cnt = `0`;
743	while (cnt < ctype->mbdigits_act)
744	{
745	size_t inner;
746	for (inner = `0`; inner < `10`; ++inner)
747	if (ctype->mbdigits[cnt + inner] == NULL)
748	break;
749
750	if (inner == `10`)
751	cnt += `10`;
752	else
753	{
754	/ Remove the group. /
755	memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + `10`],
756	((ctype->wcdigits_act - cnt - `10`)
757	* sizeof (ctype->mbdigits[`0`])));
758	ctype->mbdigits_act -= `10`;
759	}
760	}
761
762	/ If no input digits are given use the default. /
763	if (ctype->mbdigits_act == `0`)
764	{
765	if (ctype->mbdigits_max == `0`)
766	{
767	ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
768	`10` * sizeof (struct charseq *));
769	ctype->mbdigits_max = `10`;
770	}
771
772	for (cnt = `0`; cnt < `10`; ++cnt)
773	{
774	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
775	(char *) digits + cnt, `1`);
776	if (ctype->mbdigits[cnt] == NULL)
777	{
778	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
779	longnames[cnt],
780	strlen (longnames[cnt]));
781	if (ctype->mbdigits[cnt] == NULL)
782	{
783	/ Hum, this ain't good. /
784	record_error (`0`, `0`, _("\
785	no input digits defined and none of the standard names in the charmap"));
786
787	ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
788	sizeof (struct charseq) + `1`);
789
790	/ This is better than nothing. /
791	ctype->mbdigits[cnt]->bytes[`0`] = digits[cnt];
792	ctype->mbdigits[cnt]->nbytes = `1`;
793	}
794	}
795	}
796
797	ctype->mbdigits_act = `10`;
798	}
799
800	/ Check the wide character input digits. There must be a multiple*
801	of ten available. In each group it could be that one or the other
802	character is missing. In this case the whole group must be
803	removed. /*
804	cnt = `0`;
805	while (cnt < ctype->wcdigits_act)
806	{
807	size_t inner;
808	for (inner = `0`; inner < `10`; ++inner)
809	if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
810	break;
811
812	if (inner == `10`)
813	cnt += `10`;
814	else
815	{
816	/ Remove the group. /
817	memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + `10`],
818	((ctype->wcdigits_act - cnt - `10`)
819	* sizeof (ctype->wcdigits[`0`])));
820	ctype->wcdigits_act -= `10`;
821	}
822	}
823
824	/ If no input digits are given use the default. /
825	if (ctype->wcdigits_act == `0`)
826	{
827	if (ctype->wcdigits_max == `0`)
828	{
829	ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
830	`10` * sizeof (uint32_t));
831	ctype->wcdigits_max = `10`;
832	}
833
834	for (cnt = `0`; cnt < `10`; ++cnt)
835	ctype->wcdigits[cnt] = L`'0'` + cnt;
836
837	ctype->mbdigits_act = `10`;
838	}
839
840	/ Check the outdigits. /
841	warned = `0`;
842	for (cnt = `0`; cnt < `10`; ++cnt)
843	if (ctype->mboutdigits[cnt] == NULL)
844	{
845	static struct charseq replace[`2`];
846
847	if (!warned)
848	{
849	record_error (`0`, `0`, _("\
850	not all characters used in `outdigit' are available in the charmap"));
851	warned = `1`;
852	}
853
854	replace[`0`].nbytes = `1`;
855	replace[`0`].bytes[`0`] = `'?'`;
856	replace[`0`].bytes[`1`] = `'\0'`;
857	ctype->mboutdigits[cnt] = &replace[`0`];
858	}
859
860	warned = `0`;
861	for (cnt = `0`; cnt < `10`; ++cnt)
862	if (ctype->wcoutdigits[cnt] == `0`)
863	{
864	if (!warned)
865	{
866	record_error (`0`, `0`, _("\
867	not all characters used in `outdigit' are available in the repertoire"));
868	warned = `1`;
869	}
870
871	ctype->wcoutdigits[cnt] = L`'?'`;
872	}
873
874	/ Sort the entries in the translit_ignore list. /
875	if (ctype->translit_ignore != NULL)
876	{
877	struct translit_ignore_t *firstp = ctype->translit_ignore;
878	struct translit_ignore_t *runp;
879
880	ctype->ntranslit_ignore = `1`;
881
882	for (runp = firstp->next; runp != NULL; runp = runp->next)
883	{
884	struct translit_ignore_t *lastp = NULL;
885	struct translit_ignore_t *cmpp;
886
887	++ctype->ntranslit_ignore;
888
889	for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
890	if (runp->from < cmpp->from)
891	break;
892
893	runp->next = lastp;
894	if (lastp == NULL)
895	firstp = runp;
896	}
897
898	ctype->translit_ignore = firstp;
899	}
900	}
901
902
903	void
904	ctype_output (struct localedef_t locale, const* struct charmap_t *charmap,
905	const char *output_path)
906	{
907	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
908	const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
909	+ ctype->nr_charclass + ctype->map_collection_nr);
910	struct locale_file file;
911	uint32_t default_missing_len;
912	size_t elem, cnt;
913
914	/ Now prepare the output: Find the sizes of the table we can use. /
915	allocate_arrays (ctype, charmap, ctype->repertoire);
916
917	default_missing_len = (ctype->default_missing
918	? wcslen ((wchar_t *) ctype->default_missing)
919	: `0`);
920
921	init_locale_data (&file, nelems);
922	for (elem = `0`; elem < nelems; ++elem)
923	{
924	if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
925	switch (elem)
926	{
927	#define CTYPE_EMPTY(name) \
928	case name: \
929	add_locale_empty (&file); \
930	break
931
932	CTYPE_EMPTY(_NL_CTYPE_GAP1);
933	CTYPE_EMPTY(_NL_CTYPE_GAP2);
934	CTYPE_EMPTY(_NL_CTYPE_GAP3);
935	CTYPE_EMPTY(_NL_CTYPE_GAP4);
936	CTYPE_EMPTY(_NL_CTYPE_GAP5);
937	CTYPE_EMPTY(_NL_CTYPE_GAP6);
938
939	#define CTYPE_RAW_DATA(name, base, size) \
940	case _NL_ITEM_INDEX (name): \
941	add_locale_raw_data (&file, base, size); \
942	break
943
944	CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
945	ctype->ctype_b,
946	(`256` + `128`) * sizeof (char_class_t));
947
948	#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949	case _NL_ITEM_INDEX (name): \
950	add_locale_uint32_array (&file, base, n_elems); \
951	break
952
953	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[`0`], `256` + `128`);
954	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[`1`], `256` + `128`);
955	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[`0`], `256`);
956	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[`1`], `256`);
957	CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
958	ctype->ctype32_b,
959	`256` * sizeof (char_class32_t));
960
961	#define CTYPE_UINT32(name, value) \
962	case _NL_ITEM_INDEX (name): \
963	add_locale_uint32 (&file, value); \
964	break
965
966	CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
967	CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
968	CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
969
970	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
971	ctype->translit_from_idx,
972	ctype->translit_idx_size);
973
974	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
975	ctype->translit_from_tbl,
976	ctype->translit_from_tbl_size
977	/ sizeof (uint32_t));
978
979	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
980	ctype->translit_to_idx,
981	ctype->translit_idx_size);
982
983	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
984	ctype->translit_to_tbl,
985	ctype->translit_to_tbl_size / sizeof (uint32_t));
986
987	case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
988	/ The class name array. /
989	start_locale_structure (&file);
990	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
991	add_locale_string (&file, ctype->classnames[cnt]);
992	add_locale_char (&file, `0`);
993	align_locale_data (&file, LOCFILE_ALIGN);
994	end_locale_structure (&file);
995	break;
996
997	case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
998	/ The class name array. /
999	start_locale_structure (&file);
1000	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1001	add_locale_string (&file, ctype->mapnames[cnt]);
1002	add_locale_char (&file, `0`);
1003	align_locale_data (&file, LOCFILE_ALIGN);
1004	end_locale_structure (&file);
1005	break;
1006
1007	case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1008	add_locale_wcwidth_table (&file, &ctype->width);
1009	break;
1010
1011	CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1012
1013	case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1014	add_locale_string (&file, ctype->codeset_name);
1015	break;
1016
1017	CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1018
1019	CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1020
1021	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1022	add_locale_uint32 (&file, ctype->mbdigits_act / `10`);
1023	break;
1024
1025	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1026	add_locale_uint32 (&file, ctype->wcdigits_act / `10`);
1027	break;
1028
1029	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1030	start_locale_structure (&file);
1031	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1032	cnt < ctype->mbdigits_act; cnt += `10`)
1033	{
1034	add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1035	ctype->mbdigits[cnt]->nbytes);
1036	add_locale_char (&file, `0`);
1037	}
1038	end_locale_structure (&file);
1039	break;
1040
1041	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1042	start_locale_structure (&file);
1043	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1044	add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1045	ctype->mboutdigits[cnt]->nbytes);
1046	add_locale_char (&file, `0`);
1047	end_locale_structure (&file);
1048	break;
1049
1050	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1051	start_locale_structure (&file);
1052	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1053	cnt < ctype->wcdigits_act; cnt += `10`)
1054	add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1055	end_locale_structure (&file);
1056	break;
1057
1058	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1059	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1060	add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1061	break;
1062
1063	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1064	add_locale_uint32 (&file, default_missing_len);
1065	break;
1066
1067	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1068	add_locale_uint32_array (&file, ctype->default_missing,
1069	default_missing_len);
1070	break;
1071
1072	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1073	add_locale_uint32 (&file, ctype->ntranslit_ignore);
1074	break;
1075
1076	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1077	start_locale_structure (&file);
1078	{
1079	struct translit_ignore_t *runp;
1080	for (runp = ctype->translit_ignore; runp != NULL;
1081	runp = runp->next)
1082	{
1083	add_locale_uint32 (&file, runp->from);
1084	add_locale_uint32 (&file, runp->to);
1085	add_locale_uint32 (&file, runp->step);
1086	}
1087	}
1088	end_locale_structure (&file);
1089	break;
1090
1091	default:
1092	assert (! "unknown CTYPE element");
1093	}
1094	else
1095	{
1096	/ Handle extra maps. /
1097	size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1098	if (nr < ctype->nr_charclass)
1099	{
1100	start_locale_prelude (&file);
1101	add_locale_uint32_array (&file, ctype->class_b[nr], `256` / `32`);
1102	end_locale_prelude (&file);
1103	add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1104	}
1105	else
1106	{
1107	nr -= ctype->nr_charclass;
1108	assert (nr < ctype->map_collection_nr);
1109	add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1110	}
1111	}
1112	}
1113
1114	write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1115	}
1116
1117
1118	/ Local functions. /
1119	static void
1120	ctype_class_new (struct linereader lr, struct* locale_ctype_t *ctype,
1121	const char *name)
1122	{
1123	size_t cnt;
1124
1125	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
1126	if (strcmp (ctype->classnames[cnt], name) == `0`)
1127	break;
1128
1129	if (cnt < ctype->nr_charclass)
1130	{
1131	lr_error (lr, _("character class `%s' already defined"), name);
1132	return;
1133	}
1134
1135	if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1136	/ Exit code 2 is prescribed in P1003.2b. /
1137	record_error (`2`, `0`, _("\
1138	implementation limit: no more than %Zd character classes allowed"),
1139	MAX_NR_CHARCLASS);
1140
1141	ctype->classnames[ctype->nr_charclass++] = name;
1142	}
1143
1144
1145	static void
1146	ctype_map_new (struct linereader lr, struct* locale_ctype_t *ctype,
1147	const char name, const* struct charmap_t *charmap)
1148	{
1149	size_t max_chars = `0`;
1150	size_t cnt;
1151
1152	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1153	{
1154	if (strcmp (ctype->mapnames[cnt], name) == `0`)
1155	break;
1156
1157	if (max_chars < ctype->map_collection_max[cnt])
1158	max_chars = ctype->map_collection_max[cnt];
1159	}
1160
1161	if (cnt < ctype->map_collection_nr)
1162	{
1163	lr_error (lr, _("character map `%s' already defined"), name);
1164	return;
1165	}
1166
1167	if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1168	/ Exit code 2 is prescribed in P1003.2b. /
1169	record_error (`2`, `0`, _("\
1170	implementation limit: no more than %d character maps allowed"),
1171	MAX_NR_CHARMAP);
1172
1173	ctype->mapnames[cnt] = name;
1174
1175	if (max_chars == `0`)
1176	ctype->map_collection_max[cnt] = charmap->mb_cur_max == `1` ? `256` : `512`;
1177	else
1178	ctype->map_collection_max[cnt] = max_chars;
1179
1180	ctype->map_collection[cnt] = (uint32_t *)
1181	xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1182	ctype->map_collection_act[cnt] = `256`;
1183
1184	++ctype->map_collection_nr;
1185	}
1186
1187
1188	/ We have to be prepared that TABLE, MAX, and ACT can be NULL. This*
1189	is possible if we only want to extend the name array. /*
1190	static uint32_t *
1191	find_idx (struct locale_ctype_t ctype, uint32_t table, size_t max,
1192	size_t *act, uint32_t idx)
1193	{
1194	size_t cnt;
1195
1196	if (idx < `256`)
1197	return table == NULL ? NULL : &(*table)[idx];
1198
1199	/ Use the charnames_idx lookup table instead of the slow search loop. /
1200	#if 1
1201	cnt = idx_table_get (&ctype->charnames_idx, idx);
1202	if (cnt == EMPTY)
1203	/ Not found. /
1204	cnt = ctype->charnames_act;
1205	#else
1206	for (cnt = `256`; cnt < ctype->charnames_act; ++cnt)
1207	if (ctype->charnames[cnt] == idx)
1208	break;
1209	#endif
1210
1211	/ We have to distinguish two cases: the name is found or not. /
1212	if (cnt == ctype->charnames_act)
1213	{
1214	/ Extend the name array. /
1215	if (ctype->charnames_act == ctype->charnames_max)
1216	{
1217	ctype->charnames_max *= `2`;
1218	ctype->charnames = (uint32_t *)
1219	xrealloc (ctype->charnames,
1220	sizeof (uint32_t) * ctype->charnames_max);
1221	}
1222	ctype->charnames[ctype->charnames_act++] = idx;
1223	idx_table_add (&ctype->charnames_idx, idx, cnt);
1224	}
1225
1226	if (table == NULL)
1227	/ We have done everything we are asked to do. /
1228	return NULL;
1229
1230	if (max == NULL)
1231	/ The caller does not want to extend the table. /
1232	return (cnt >= act ? NULL : &(table)[cnt]);
1233
1234	if (cnt >= *act)
1235	{
1236	if (cnt >= *max)
1237	{
1238	size_t old_max = *max;
1239	do
1240	max = `2`;
1241	while (*max <= cnt);
1242
1243	*table =
1244	(uint32_t ) xrealloc (table, max sizeof (uint32_t));
1245	memset (&(*table)[old_max], `'\0'`,
1246	(max - old_max) sizeof (uint32_t));
1247	}
1248
1249	*act = cnt + `1`;
1250	}
1251
1252	return &(*table)[cnt];
1253	}
1254
1255
1256	static int
1257	get_character (struct token now, const* struct charmap_t *charmap,
1258	struct repertoire_t *repertoire,
1259	struct charseq *seqp, uint32_t wchp)
1260	{
1261	if (now->tok == tok_bsymbol)
1262	{
1263	/ This will hopefully be the normal case. /
1264	*wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1265	now->val.str.lenmb);
1266	*seqp = charmap_find_value (charmap, now->val.str.startmb,
1267	now->val.str.lenmb);
1268	}
1269	else if (now->tok == tok_ucs4)
1270	{
1271	char utmp[`10`];
1272
1273	snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1274	*seqp = charmap_find_value (charmap, utmp, `9`);
1275
1276	if (*seqp == NULL)
1277	*seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1278
1279	if (*seqp == NULL)
1280	{
1281	/ Compute the value in the charmap from the UCS value. /
1282	const char *symbol = repertoire_find_symbol (repertoire,
1283	now->val.ucs4);
1284
1285	if (symbol == NULL)
1286	*seqp = NULL;
1287	else
1288	*seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1289
1290	if (*seqp == NULL)
1291	{
1292	if (repertoire != NULL)
1293	{
1294	/ Insert a negative entry. /
1295	static const struct charseq negative
1296	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1297	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1298	sizeof (uint32_t));
1299	*newp = now->val.ucs4;
1300
1301	insert_entry (&repertoire->seq_table, newp,
1302	sizeof (uint32_t), (void *) &negative);
1303	}
1304	}
1305	else
1306	(*seqp)->ucs4 = now->val.ucs4;
1307	}
1308	else if ((*seqp)->ucs4 != now->val.ucs4)
1309	*seqp = NULL;
1310
1311	*wchp = now->val.ucs4;
1312	}
1313	else if (now->tok == tok_charcode)
1314	{
1315	/ We must map from the byte code to UCS4. /
1316	*seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1317	now->val.str.lenmb);
1318
1319	if (*seqp == NULL)
1320	*wchp = ILLEGAL_CHAR_VALUE;
1321	else
1322	{
1323	if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1324	(seqp)->ucs4 = repertoire_find_value (repertoire, (seqp)->name,
1325	strlen ((*seqp)->name));
1326	wchp = (seqp)->ucs4;
1327	}
1328	}
1329	else
1330	return `1`;
1331
1332	return `0`;
1333	}
1334
1335
1336	/ Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and*
1337	the .(2). counterparts. /*
1338	static void
1339	charclass_symbolic_ellipsis (struct linereader *ldfile,
1340	struct locale_ctype_t *ctype,
1341	const struct charmap_t *charmap,
1342	struct repertoire_t *repertoire,
1343	struct token *now,
1344	const char *last_str,
1345	unsigned long int class256_bit,
1346	unsigned long int class_bit, int base,
1347	int ignore_content, int handle_digits, int step)
1348	{
1349	const char *nowstr = now->val.str.startmb;
1350	char tmp[now->val.str.lenmb + `1`];
1351	const char *cp;
1352	char *endp;
1353	unsigned long int from;
1354	unsigned long int to;
1355
1356	/ We have to compute the ellipsis values using the symbolic names. /
1357	assert (last_str != NULL);
1358
1359	if (strlen (last_str) != now->val.str.lenmb)
1360	{
1361	invalid_range:
1362	lr_error (ldfile,
1363	_("`%s' and `%.*s' are not valid names for symbolic range"),
1364	last_str, (int) now->val.str.lenmb, nowstr);
1365	return;
1366	}
1367
1368	if (memcmp (last_str, nowstr, now->val.str.lenmb) == `0`)
1369	/ Nothing to do, the names are the same. /
1370	return;
1371
1372	for (cp = last_str; cp == (nowstr + (cp - last_str)); ++cp)
1373	;
1374
1375	errno = `0`;
1376	from = strtoul (cp, &endp, base);
1377	if ((from == UINT_MAX && errno == ERANGE) \|\| *endp != `'\0'`)
1378	goto invalid_range;
1379
1380	to = strtoul (nowstr + (cp - last_str), &endp, base);
1381	if ((to == UINT_MAX && errno == ERANGE)
1382	\|\| (endp - nowstr) != now->val.str.lenmb \|\| from >= to)
1383	goto invalid_range;
1384
1385	/ OK, we have a range FROM - TO. Now we can create the symbolic names. /
1386	if (!ignore_content)
1387	{
1388	now->val.str.startmb = tmp;
1389	while ((from += step) <= to)
1390	{
1391	struct charseq *seq;
1392	uint32_t wch;
1393
1394	sprintf (tmp, (base == `10` ? "%.s%0ld" : "%.s%0lX"),
1395	(int) (cp - last_str), last_str,
1396	(int) (now->val.str.lenmb - (cp - last_str)),
1397	from);
1398
1399	get_character (now, charmap, repertoire, &seq, &wch);
1400
1401	if (seq != NULL && seq->nbytes == `1`)
1402	/ Yep, we can store information about this byte sequence. /
1403	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
1404
1405	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1406	/ We have the UCS4 position. /
1407	*find_idx (ctype, &ctype->class_collection,
1408	&ctype->class_collection_max,
1409	&ctype->class_collection_act, wch) \|= class_bit;
1410
1411	if (handle_digits == `1`)
1412	{
1413	/ We must store the digit values. /
1414	if (ctype->mbdigits_act == ctype->mbdigits_max)
1415	{
1416	ctype->mbdigits_max *= `2`;
1417	ctype->mbdigits = xrealloc (ctype->mbdigits,
1418	(ctype->mbdigits_max
1419	* sizeof (char *)));
1420	ctype->wcdigits_max *= `2`;
1421	ctype->wcdigits = xrealloc (ctype->wcdigits,
1422	(ctype->wcdigits_max
1423	* sizeof (uint32_t)));
1424	}
1425
1426	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428	}
1429	else if (handle_digits == `2`)
1430	{
1431	/ We must store the digit values. /
1432	if (ctype->outdigits_act >= `10`)
1433	{
1434	lr_error (ldfile, _("\
1435	%s: field `%s' does not contain exactly ten entries"),
1436	"LC_CTYPE", "outdigit");
1437	return;
1438	}
1439
1440	ctype->mboutdigits[ctype->outdigits_act] = seq;
1441	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442	++ctype->outdigits_act;
1443	}
1444	}
1445	}
1446	}
1447
1448
1449	/ Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. /
1450	static void
1451	charclass_ucs4_ellipsis (struct linereader *ldfile,
1452	struct locale_ctype_t *ctype,
1453	const struct charmap_t *charmap,
1454	struct repertoire_t *repertoire,
1455	struct token *now, uint32_t last_wch,
1456	unsigned long int class256_bit,
1457	unsigned long int class_bit, int ignore_content,
1458	int handle_digits, int step)
1459	{
1460	if (last_wch > now->val.ucs4)
1461	{
1462	lr_error (ldfile, _("\
1463	to-value <U%0X> of range is smaller than from-value <U%0X>"),
1464	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, now->val.ucs4,
1465	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, last_wch);
1466	return;
1467	}
1468
1469	if (!ignore_content)
1470	while ((last_wch += step) <= now->val.ucs4)
1471	{
1472	/ We have to find out whether there is a byte sequence corresponding*
1473	to this UCS4 value. /*
1474	struct charseq *seq;
1475	char utmp[`10`];
1476
1477	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478	seq = charmap_find_value (charmap, utmp, `9`);
1479	if (seq == NULL)
1480	{
1481	snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482	seq = charmap_find_value (charmap, utmp, `5`);
1483	}
1484
1485	if (seq == NULL)
1486	/ Try looking in the repertoire map. /
1487	seq = repertoire_find_seq (repertoire, last_wch);
1488
1489	/ If this is the first time we look for this sequence create a new*
1490	entry. /*
1491	if (seq == NULL)
1492	{
1493	static const struct charseq negative
1494	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1495
1496	/ Find the symbolic name for this UCS4 value. /
1497	if (repertoire != NULL)
1498	{
1499	const char *symbol = repertoire_find_symbol (repertoire,
1500	last_wch);
1501	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502	sizeof (uint32_t));
1503	*newp = last_wch;
1504
1505	if (symbol != NULL)
1506	/ We have a name, now search the multibyte value. /
1507	seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508
1509	if (seq == NULL)
1510	/ We have to create a fake entry. /
1511	seq = (struct charseq *) &negative;
1512	else
1513	seq->ucs4 = last_wch;
1514
1515	insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516	seq);
1517	}
1518	else
1519	/ We have to create a fake entry. /
1520	seq = (struct charseq *) &negative;
1521	}
1522
1523	/ We have a name, now search the multibyte value. /
1524	if (seq->ucs4 == last_wch && seq->nbytes == `1`)
1525	/ Yep, we can store information about this byte sequence. /
1526	ctype->class256_collection[(size_t) seq->bytes[`0`]]
1527	\|= class256_bit;
1528
1529	/ And of course we have the UCS4 position. /
1530	if (class_bit != `0`)
1531	*find_idx (ctype, &ctype->class_collection,
1532	&ctype->class_collection_max,
1533	&ctype->class_collection_act, last_wch) \|= class_bit;
1534
1535	if (handle_digits == `1`)
1536	{
1537	/ We must store the digit values. /
1538	if (ctype->mbdigits_act == ctype->mbdigits_max)
1539	{
1540	ctype->mbdigits_max *= `2`;
1541	ctype->mbdigits = xrealloc (ctype->mbdigits,
1542	(ctype->mbdigits_max
1543	* sizeof (char *)));
1544	ctype->wcdigits_max *= `2`;
1545	ctype->wcdigits = xrealloc (ctype->wcdigits,
1546	(ctype->wcdigits_max
1547	* sizeof (uint32_t)));
1548	}
1549
1550	ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551	? seq : NULL);
1552	ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553	}
1554	else if (handle_digits == `2`)
1555	{
1556	/ We must store the digit values. /
1557	if (ctype->outdigits_act >= `10`)
1558	{
1559	lr_error (ldfile, _("\
1560	%s: field `%s' does not contain exactly ten entries"),
1561	"LC_CTYPE", "outdigit");
1562	return;
1563	}
1564
1565	ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566	? seq : NULL);
1567	ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568	++ctype->outdigits_act;
1569	}
1570	}
1571	}
1572
1573
1574	/ Ellipsis as in `/xea/x12.../xea/x34'. /
1575	static void
1576	charclass_charcode_ellipsis (struct linereader *ldfile,
1577	struct locale_ctype_t *ctype,
1578	const struct charmap_t *charmap,
1579	struct repertoire_t *repertoire,
1580	struct token now, char* *last_charcode,
1581	uint32_t last_charcode_len,
1582	unsigned long int class256_bit,
1583	unsigned long int class_bit, int ignore_content,
1584	int handle_digits)
1585	{
1586	/ First check whether the to-value is larger. /
1587	if (now->val.charcode.nbytes != last_charcode_len)
1588	{
1589	lr_error (ldfile, _("\
1590	start and end character sequence of range must have the same length"));
1591	return;
1592	}
1593
1594	if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > `0`)
1595	{
1596	lr_error (ldfile, _("\
1597	to-value character sequence is smaller than from-value sequence"));
1598	return;
1599	}
1600
1601	if (!ignore_content)
1602	{
1603	do
1604	{
1605	/ Increment the byte sequence value. /
1606	struct charseq *seq;
1607	uint32_t wch;
1608	int i;
1609
1610	for (i = last_charcode_len - `1`; i >= `0`; --i)
1611	if (++last_charcode[i] != `0`)
1612	break;
1613
1614	if (last_charcode_len == `1`)
1615	/ Of course we have the charcode value. /
1616	ctype->class256_collection[(size_t) last_charcode[`0`]]
1617	\|= class256_bit;
1618
1619	/ Find the symbolic name. /
1620	seq = charmap_find_symbol (charmap, last_charcode,
1621	last_charcode_len);
1622	if (seq != NULL)
1623	{
1624	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626	strlen (seq->name));
1627	wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1628
1629	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1630	*find_idx (ctype, &ctype->class_collection,
1631	&ctype->class_collection_max,
1632	&ctype->class_collection_act, wch) \|= class_bit;
1633	}
1634	else
1635	wch = ILLEGAL_CHAR_VALUE;
1636
1637	if (handle_digits == `1`)
1638	{
1639	/ We must store the digit values. /
1640	if (ctype->mbdigits_act == ctype->mbdigits_max)
1641	{
1642	ctype->mbdigits_max *= `2`;
1643	ctype->mbdigits = xrealloc (ctype->mbdigits,
1644	(ctype->mbdigits_max
1645	* sizeof (char *)));
1646	ctype->wcdigits_max *= `2`;
1647	ctype->wcdigits = xrealloc (ctype->wcdigits,
1648	(ctype->wcdigits_max
1649	* sizeof (uint32_t)));
1650	}
1651
1652	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1654	seq->nbytes = last_charcode_len;
1655
1656	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658	}
1659	else if (handle_digits == `2`)
1660	{
1661	struct charseq *seq;
1662	/ We must store the digit values. /
1663	if (ctype->outdigits_act >= `10`)
1664	{
1665	lr_error (ldfile, _("\
1666	%s: field `%s' does not contain exactly ten entries"),
1667	"LC_CTYPE", "outdigit");
1668	return;
1669	}
1670
1671	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1673	seq->nbytes = last_charcode_len;
1674
1675	ctype->mboutdigits[ctype->outdigits_act] = seq;
1676	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677	++ctype->outdigits_act;
1678	}
1679	}
1680	while (memcmp (last_charcode, now->val.charcode.bytes,
1681	last_charcode_len) != `0`);
1682	}
1683	}
1684
1685
1686	static uint32_t *
1687	find_translit2 (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
1688	uint32_t wch)
1689	{
1690	struct translit_t *trunp = ctype->translit;
1691	struct translit_ignore_t *tirunp = ctype->translit_ignore;
1692
1693	while (trunp != NULL)
1694	{
1695	/ XXX We simplify things here. The transliterations we look*
1696	for are only allowed to have one character. /*
1697	if (trunp->from[`0`] == wch && trunp->from[`1`] == `0`)
1698	{
1699	/ Found it. Now look for a transliteration which can be*
1700	represented with the character set. /*
1701	struct translit_to_t *torunp = trunp->to;
1702
1703	while (torunp != NULL)
1704	{
1705	int i;
1706
1707	for (i = `0`; torunp->str[i] != `0`; ++i)
1708	{
1709	char utmp[`10`];
1710
1711	snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1712	if (charmap_find_value (charmap, utmp, `9`) == NULL)
1713	/ This character cannot be represented. /
1714	break;
1715	}
1716
1717	if (torunp->str[i] == `0`)
1718	return torunp->str;
1719
1720	torunp = torunp->next;
1721	}
1722
1723	break;
1724	}
1725
1726	trunp = trunp->next;
1727	}
1728
1729	/ Check for ignored chars. /
1730	while (tirunp != NULL)
1731	{
1732	if (tirunp->from <= wch && tirunp->to >= wch)
1733	{
1734	uint32_t wi;
1735
1736	for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1737	if (wi == wch)
1738	return no_str;
1739	}
1740	}
1741
1742	/ Nothing found. /
1743	return NULL;
1744	}
1745
1746
1747	uint32_t *
1748	find_translit (struct localedef_t locale, const* struct charmap_t *charmap,
1749	uint32_t wch)
1750	{
1751	struct locale_ctype_t *ctype;
1752	uint32_t *result = NULL;
1753
1754	assert (locale != NULL);
1755	ctype = locale->categories[LC_CTYPE].ctype;
1756
1757	if (ctype == NULL)
1758	return NULL;
1759
1760	if (ctype->translit != NULL)
1761	result = find_translit2 (ctype, charmap, wch);
1762
1763	if (result == NULL)
1764	{
1765	struct translit_include_t *irunp = ctype->translit_include;
1766
1767	while (irunp != NULL && result == NULL)
1768	{
1769	result = find_translit (find_locale (CTYPE_LOCALE,
1770	irunp->copy_locale,
1771	irunp->copy_repertoire,
1772	charmap),
1773	charmap, wch);
1774	irunp = irunp->next;
1775	}
1776	}
1777
1778	return result;
1779	}
1780
1781
1782	/ Read one transliteration entry. /
1783	static uint32_t *
1784	read_widestring (struct linereader ldfile, struct* token *now,
1785	const struct charmap_t *charmap,
1786	struct repertoire_t *repertoire)
1787	{
1788	uint32_t *wstr;
1789
1790	if (now->tok == tok_default_missing)
1791	/ The special name "" will denote this case. /
1792	wstr = no_str;
1793	else if (now->tok == tok_bsymbol)
1794	{
1795	/ Get the value from the repertoire. /
1796	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1797	wstr[`0`] = repertoire_find_value (repertoire, now->val.str.startmb,
1798	now->val.str.lenmb);
1799	if (wstr[`0`] == ILLEGAL_CHAR_VALUE)
1800	{
1801	/ We cannot proceed, we don't know the UCS4 value. /
1802	free (wstr);
1803	return NULL;
1804	}
1805
1806	wstr[`1`] = `0`;
1807	}
1808	else if (now->tok == tok_ucs4)
1809	{
1810	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1811	wstr[`0`] = now->val.ucs4;
1812	wstr[`1`] = `0`;
1813	}
1814	else if (now->tok == tok_charcode)
1815	{
1816	/ Argh, we have to convert to the symbol name first and then to the*
1817	UCS4 value. /*
1818	struct charseq *seq = charmap_find_symbol (charmap,
1819	now->val.str.startmb,
1820	now->val.str.lenmb);
1821	if (seq == NULL)
1822	/ Cannot find the UCS4 value. /
1823	return NULL;
1824
1825	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1826	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1827	strlen (seq->name));
1828	if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1829	/ We cannot proceed, we don't know the UCS4 value. /
1830	return NULL;
1831
1832	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1833	wstr[`0`] = seq->ucs4;
1834	wstr[`1`] = `0`;
1835	}
1836	else if (now->tok == tok_string)
1837	{
1838	wstr = now->val.str.startwc;
1839	if (wstr == NULL \|\| wstr[`0`] == `0`)
1840	return NULL;
1841	}
1842	else
1843	{
1844	if (now->tok != tok_eol && now->tok != tok_eof)
1845	lr_ignore_rest (ldfile, `0`);
1846	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847	return (uint32_t *) -`1l`;
1848	}
1849
1850	return wstr;
1851	}
1852
1853
1854	static void
1855	read_translit_entry (struct linereader ldfile, struct* locale_ctype_t *ctype,
1856	struct token now, const* struct charmap_t *charmap,
1857	struct repertoire_t *repertoire)
1858	{
1859	uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1860	struct translit_t *result;
1861	struct translit_to_t **top;
1862	struct obstack *ob = &ctype->mempool;
1863	int first;
1864	int ignore;
1865
1866	if (from_wstr == NULL)
1867	/ There is no valid from string. /
1868	return;
1869
1870	result = (struct translit_t *) obstack_alloc (ob,
1871	sizeof (struct translit_t));
1872	result->from = from_wstr;
1873	result->fname = ldfile->fname;
1874	result->lineno = ldfile->lineno;
1875	result->next = NULL;
1876	result->to = NULL;
1877	top = &result->to;
1878	first = `1`;
1879	ignore = `0`;
1880
1881	while (`1`)
1882	{
1883	uint32_t *to_wstr;
1884
1885	/ Next we have one or more transliterations. They are*
1886	separated by semicolons. /*
1887	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1888
1889	if (!first && (now->tok == tok_semicolon \|\| now->tok == tok_eol))
1890	{
1891	/ One string read. /
1892	const uint32_t zero = `0`;
1893
1894	if (!ignore)
1895	{
1896	obstack_grow (ob, &zero, `4`);
1897	to_wstr = obstack_finish (ob);
1898
1899	top = obstack_alloc (ob, sizeof* (struct translit_to_t));
1900	(*top)->str = to_wstr;
1901	(*top)->next = NULL;
1902	}
1903
1904	if (now->tok == tok_eol)
1905	{
1906	result->next = ctype->translit;
1907	ctype->translit = result;
1908	return;
1909	}
1910
1911	if (!ignore)
1912	top = &(*top)->next;
1913	ignore = `0`;
1914	}
1915	else
1916	{
1917	to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1918	if (to_wstr == (uint32_t *) -`1l`)
1919	{
1920	/ An error occurred. /
1921	obstack_free (ob, result);
1922	return;
1923	}
1924
1925	if (to_wstr == NULL)
1926	ignore = `1`;
1927	else
1928	/ This value is usable. /
1929	obstack_grow (ob, to_wstr, wcslen ((wchar_t ) to_wstr) `4`);
1930
1931	first = `0`;
1932	}
1933	}
1934	}
1935
1936
1937	static void
1938	read_translit_ignore_entry (struct linereader *ldfile,
1939	struct locale_ctype_t *ctype,
1940	const struct charmap_t *charmap,
1941	struct repertoire_t *repertoire)
1942	{
1943	/ We expect a semicolon-separated list of characters we ignore. We are*
1944	only interested in the wide character definitions. These must be
1945	single characters, possibly defining a range when an ellipsis is used. /*
1946	while (`1`)
1947	{
1948	struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1949	verbose);
1950	struct translit_ignore_t *newp;
1951	uint32_t from;
1952
1953	if (now->tok == tok_eol \|\| now->tok == tok_eof)
1954	{
1955	lr_error (ldfile,
1956	_("premature end of `translit_ignore' definition"));
1957	return;
1958	}
1959
1960	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1961	{
1962	lr_error (ldfile, _("syntax error"));
1963	lr_ignore_rest (ldfile, `0`);
1964	return;
1965	}
1966
1967	if (now->tok == tok_ucs4)
1968	from = now->val.ucs4;
1969	else
1970	/ Try to get the value. /
1971	from = repertoire_find_value (repertoire, now->val.str.startmb,
1972	now->val.str.lenmb);
1973
1974	if (from == ILLEGAL_CHAR_VALUE)
1975	{
1976	lr_error (ldfile, "invalid character name");
1977	newp = NULL;
1978	}
1979	else
1980	{
1981	newp = (struct translit_ignore_t *)
1982	obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1983	newp->from = from;
1984	newp->to = from;
1985	newp->step = `1`;
1986
1987	newp->next = ctype->translit_ignore;
1988	ctype->translit_ignore = newp;
1989	}
1990
1991	/ Now we expect either a semicolon, an ellipsis, or the end of the*
1992	line. /*
1993	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1994
1995	if (now->tok == tok_ellipsis2 \|\| now->tok == tok_ellipsis2_2)
1996	{
1997	/ XXX Should we bother implementing `....'? `...' certainly*
1998	will not be implemented. /*
1999	uint32_t to;
2000	int step = now->tok == tok_ellipsis2_2 ? `2` : `1`;
2001
2002	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003
2004	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2005	{
2006	lr_error (ldfile,
2007	_("premature end of `translit_ignore' definition"));
2008	return;
2009	}
2010
2011	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2012	{
2013	lr_error (ldfile, _("syntax error"));
2014	lr_ignore_rest (ldfile, `0`);
2015	return;
2016	}
2017
2018	if (now->tok == tok_ucs4)
2019	to = now->val.ucs4;
2020	else
2021	/ Try to get the value. /
2022	to = repertoire_find_value (repertoire, now->val.str.startmb,
2023	now->val.str.lenmb);
2024
2025	if (to == ILLEGAL_CHAR_VALUE)
2026	lr_error (ldfile, "invalid character name");
2027	else
2028	{
2029	/ Make sure the `to'-value is larger. /
2030	if (to >= from)
2031	{
2032	newp->to = to;
2033	newp->step = step;
2034	}
2035	else
2036	lr_error (ldfile, _("\
2037	to-value <U%0X> of range is smaller than from-value <U%0X>"),
2038	(to \| from) < `65536` ? `4` : `8`, to,
2039	(to \| from) < `65536` ? `4` : `8`, from);
2040	}
2041
2042	/ And the next token. /
2043	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2044	}
2045
2046	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2047	/ We are done. /
2048	return;
2049
2050	if (now->tok == tok_semicolon)
2051	/ Next round. /
2052	continue;
2053
2054	/ If we come here something is wrong. /
2055	lr_error (ldfile, _("syntax error"));
2056	lr_ignore_rest (ldfile, `0`);
2057	return;
2058	}
2059	}
2060
2061
2062	/ The parser for the LC_CTYPE section of the locale definition. /
2063	void
2064	ctype_read (struct linereader ldfile, struct* localedef_t *result,
2065	const struct charmap_t charmap, const* char *repertoire_name,
2066	int ignore_content)
2067	{
2068	struct repertoire_t *repertoire = NULL;
2069	struct locale_ctype_t *ctype;
2070	struct token *now;
2071	enum token_t nowtok;
2072	size_t cnt;
2073	uint32_t last_wch = `0`;
2074	enum token_t last_token;
2075	enum token_t ellipsis_token;
2076	int step;
2077	char last_charcode[`16`];
2078	size_t last_charcode_len = `0`;
2079	const char *last_str = NULL;
2080	int mapidx;
2081	struct localedef_t *copy_locale = NULL;
2082
2083	/ Get the repertoire we have to use. /
2084	if (repertoire_name != NULL)
2085	repertoire = repertoire_read (repertoire_name);
2086
2087	/ The rest of the line containing `LC_CTYPE' must be free. /
2088	lr_ignore_rest (ldfile, `1`);
2089
2090
2091	do
2092	{
2093	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2094	nowtok = now->tok;
2095	}
2096	while (nowtok == tok_eol);
2097
2098	/ If we see `copy' now we are almost done. /
2099	if (nowtok == tok_copy)
2100	{
2101	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2102	if (now->tok != tok_string)
2103	{
2104	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2105
2106	skip_category:
2107	do
2108	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2109	while (now->tok != tok_eof && now->tok != tok_end);
2110
2111	if (now->tok != tok_eof
2112	\|\| (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2113	now->tok == tok_eof))
2114	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2115	else if (now->tok != tok_lc_ctype)
2116	{
2117	lr_error (ldfile, _("\
2118	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119	lr_ignore_rest (ldfile, `0`);
2120	}
2121	else
2122	lr_ignore_rest (ldfile, `1`);
2123
2124	return;
2125	}
2126
2127	if (! ignore_content)
2128	{
2129	/ Get the locale definition. /
2130	copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2131	repertoire_name, charmap, NULL);
2132	if ((copy_locale->avail & CTYPE_LOCALE) == `0`)
2133	{
2134	/ Not yet loaded. So do it now. /
2135	if (locfile_read (copy_locale, charmap) != `0`)
2136	goto skip_category;
2137	}
2138
2139	if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2140	return;
2141	}
2142
2143	lr_ignore_rest (ldfile, `1`);
2144
2145	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2146	nowtok = now->tok;
2147	}
2148
2149	/ Prepare the data structures. /
2150	ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2151	ctype = result->categories[LC_CTYPE].ctype;
2152
2153	/ Remember the repertoire we use. /
2154	if (!ignore_content)
2155	ctype->repertoire = repertoire;
2156
2157	while (`1`)
2158	{
2159	unsigned long int class_bit = `0`;
2160	unsigned long int class256_bit = `0`;
2161	int handle_digits = `0`;
2162
2163	/ Of course we don't proceed beyond the end of file. /
2164	if (nowtok == tok_eof)
2165	break;
2166
2167	/ Ingore empty lines. /
2168	if (nowtok == tok_eol)
2169	{
2170	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2171	nowtok = now->tok;
2172	continue;
2173	}
2174
2175	switch (nowtok)
2176	{
2177	case tok_charclass:
2178	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2179	while (now->tok == tok_ident \|\| now->tok == tok_string)
2180	{
2181	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2182	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2183	if (now->tok != tok_semicolon)
2184	break;
2185	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2186	}
2187	if (now->tok != tok_eol)
2188	SYNTAX_ERROR (_("\
2189	%s: syntax error in definition of new character class"), "LC_CTYPE");
2190	break;
2191
2192	case tok_charconv:
2193	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194	while (now->tok == tok_ident \|\| now->tok == tok_string)
2195	{
2196	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2197	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2198	if (now->tok != tok_semicolon)
2199	break;
2200	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2201	}
2202	if (now->tok != tok_eol)
2203	SYNTAX_ERROR (_("\
2204	%s: syntax error in definition of new character map"), "LC_CTYPE");
2205	break;
2206
2207	case tok_class:
2208	/ Ignore the rest of the line if we don't need the input of*
2209	this line. /*
2210	if (ignore_content)
2211	{
2212	lr_ignore_rest (ldfile, `0`);
2213	break;
2214	}
2215
2216	/ We simply forget the `class' keyword and use the following*
2217	operand to determine the bit. /*
2218	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219	if (now->tok == tok_ident \|\| now->tok == tok_string)
2220	{
2221	/ Must can be one of the predefined class names. /
2222	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2223	if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == `0`)
2224	break;
2225	if (cnt >= ctype->nr_charclass)
2226	{
2227	/ OK, it's a new class. /
2228	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2229
2230	class_bit = _ISwbit (ctype->nr_charclass - `1`);
2231	}
2232	else
2233	{
2234	class_bit = _ISwbit (cnt);
2235
2236	free (now->val.str.startmb);
2237	}
2238	}
2239	else if (now->tok == tok_digit)
2240	goto handle_tok_digit;
2241	else if (now->tok < tok_upper \|\| now->tok > tok_blank)
2242	goto err_label;
2243	else
2244	{
2245	class_bit = BITw (now->tok);
2246	class256_bit = BIT (now->tok);
2247	}
2248
2249	/ The next character must be a semicolon. /
2250	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2251	if (now->tok != tok_semicolon)
2252	goto err_label;
2253	goto read_charclass;
2254
2255	case tok_upper:
2256	case tok_lower:
2257	case tok_alpha:
2258	case tok_alnum:
2259	case tok_space:
2260	case tok_cntrl:
2261	case tok_punct:
2262	case tok_graph:
2263	case tok_print:
2264	case tok_xdigit:
2265	case tok_blank:
2266	/ Ignore the rest of the line if we don't need the input of*
2267	this line. /*
2268	if (ignore_content)
2269	{
2270	lr_ignore_rest (ldfile, `0`);
2271	break;
2272	}
2273
2274	class_bit = BITw (now->tok);
2275	class256_bit = BIT (now->tok);
2276	handle_digits = `0`;
2277	read_charclass:
2278	ctype->class_done \|= class_bit;
2279	last_token = tok_none;
2280	ellipsis_token = tok_none;
2281	step = `1`;
2282	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2283	while (now->tok != tok_eol && now->tok != tok_eof)
2284	{
2285	uint32_t wch;
2286	struct charseq *seq;
2287
2288	if (ellipsis_token == tok_none)
2289	{
2290	if (get_character (now, charmap, repertoire, &seq, &wch))
2291	goto err_label;
2292
2293	if (!ignore_content && seq != NULL && seq->nbytes == `1`)
2294	/ Yep, we can store information about this byte*
2295	sequence. /*
2296	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
2297
2298	if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2299	&& class_bit != `0`)
2300	/ We have the UCS4 position. /
2301	*find_idx (ctype, &ctype->class_collection,
2302	&ctype->class_collection_max,
2303	&ctype->class_collection_act, wch) \|= class_bit;
2304
2305	last_token = now->tok;
2306	/ Terminate the string. /
2307	if (last_token == tok_bsymbol)
2308	{
2309	now->val.str.startmb[now->val.str.lenmb] = `'\0'`;
2310	last_str = now->val.str.startmb;
2311	}
2312	else
2313	last_str = NULL;
2314	last_wch = wch;
2315	memcpy (last_charcode, now->val.charcode.bytes, `16`);
2316	last_charcode_len = now->val.charcode.nbytes;
2317
2318	if (!ignore_content && handle_digits == `1`)
2319	{
2320	/ We must store the digit values. /
2321	if (ctype->mbdigits_act == ctype->mbdigits_max)
2322	{
2323	ctype->mbdigits_max += `10`;
2324	ctype->mbdigits = xrealloc (ctype->mbdigits,
2325	(ctype->mbdigits_max
2326	* sizeof (char *)));
2327	ctype->wcdigits_max += `10`;
2328	ctype->wcdigits = xrealloc (ctype->wcdigits,
2329	(ctype->wcdigits_max
2330	* sizeof (uint32_t)));
2331	}
2332
2333	ctype->mbdigits[ctype->mbdigits_act++] = seq;
2334	ctype->wcdigits[ctype->wcdigits_act++] = wch;
2335	}
2336	else if (!ignore_content && handle_digits == `2`)
2337	{
2338	/ We must store the digit values. /
2339	if (ctype->outdigits_act >= `10`)
2340	{
2341	lr_error (ldfile, _("\
2342	%s: field `%s' does not contain exactly ten entries"),
2343	"LC_CTYPE", "outdigit");
2344	lr_ignore_rest (ldfile, `0`);
2345	break;
2346	}
2347
2348	ctype->mboutdigits[ctype->outdigits_act] = seq;
2349	ctype->wcoutdigits[ctype->outdigits_act] = wch;
2350	++ctype->outdigits_act;
2351	}
2352	}
2353	else
2354	{
2355	/ Now it gets complicated. We have to resolve the*
2356	ellipsis problem. First we must distinguish between
2357	the different kind of ellipsis and this must match the
2358	tokens we have seen. /*
2359	assert (last_token != tok_none);
2360
2361	if (last_token != now->tok)
2362	{
2363	lr_error (ldfile, _("\
2364	ellipsis range must be marked by two operands of same type"));
2365	lr_ignore_rest (ldfile, `0`);
2366	break;
2367	}
2368
2369	if (last_token == tok_bsymbol)
2370	{
2371	if (ellipsis_token == tok_ellipsis3)
2372	lr_error (ldfile, _("with symbolic name range values \
2373	the absolute ellipsis `...' must not be used"));
2374
2375	charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2376	repertoire, now, last_str,
2377	class256_bit, class_bit,
2378	(ellipsis_token
2379	== tok_ellipsis4
2380	? `10` : `16`),
2381	ignore_content,
2382	handle_digits, step);
2383	}
2384	else if (last_token == tok_ucs4)
2385	{
2386	if (ellipsis_token != tok_ellipsis2)
2387	lr_error (ldfile, _("\
2388	with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2389
2390	charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2391	repertoire, now, last_wch,
2392	class256_bit, class_bit,
2393	ignore_content, handle_digits,
2394	step);
2395	}
2396	else
2397	{
2398	assert (last_token == tok_charcode);
2399
2400	if (ellipsis_token != tok_ellipsis3)
2401	lr_error (ldfile, _("\
2402	with character code range values one must use the absolute ellipsis `...'"));
2403
2404	charclass_charcode_ellipsis (ldfile, ctype, charmap,
2405	repertoire, now,
2406	last_charcode,
2407	last_charcode_len,
2408	class256_bit, class_bit,
2409	ignore_content,
2410	handle_digits);
2411	}
2412
2413	/ Now we have used the last value. /
2414	last_token = tok_none;
2415	}
2416
2417	/ Next we expect a semicolon or the end of the line. /
2418	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2419	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2420	break;
2421
2422	if (last_token != tok_none
2423	&& now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2424	{
2425	if (now->tok == tok_ellipsis2_2)
2426	{
2427	now->tok = tok_ellipsis2;
2428	step = `2`;
2429	}
2430	else if (now->tok == tok_ellipsis4_2)
2431	{
2432	now->tok = tok_ellipsis4;
2433	step = `2`;
2434	}
2435
2436	ellipsis_token = now->tok;
2437
2438	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2439	continue;
2440	}
2441
2442	if (now->tok != tok_semicolon)
2443	goto err_label;
2444
2445	/ And get the next character. /
2446	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2447
2448	ellipsis_token = tok_none;
2449	step = `1`;
2450	}
2451	break;
2452
2453	case tok_digit:
2454	/ Ignore the rest of the line if we don't need the input of*
2455	this line. /*
2456	if (ignore_content)
2457	{
2458	lr_ignore_rest (ldfile, `0`);
2459	break;
2460	}
2461
2462	handle_tok_digit:
2463	class_bit = _ISwdigit;
2464	class256_bit = _ISdigit;
2465	handle_digits = `1`;
2466	goto read_charclass;
2467
2468	case tok_outdigit:
2469	/ Ignore the rest of the line if we don't need the input of*
2470	this line. /*
2471	if (ignore_content)
2472	{
2473	lr_ignore_rest (ldfile, `0`);
2474	break;
2475	}
2476
2477	if (ctype->outdigits_act != `0`)
2478	lr_error (ldfile, _("\
2479	%s: field `%s' declared more than once"),
2480	"LC_CTYPE", "outdigit");
2481	class_bit = `0`;
2482	class256_bit = `0`;
2483	handle_digits = `2`;
2484	goto read_charclass;
2485
2486	case tok_toupper:
2487	/ Ignore the rest of the line if we don't need the input of*
2488	this line. /*
2489	if (ignore_content)
2490	{
2491	lr_ignore_rest (ldfile, `0`);
2492	break;
2493	}
2494
2495	mapidx = `0`;
2496	goto read_mapping;
2497
2498	case tok_tolower:
2499	/ Ignore the rest of the line if we don't need the input of*
2500	this line. /*
2501	if (ignore_content)
2502	{
2503	lr_ignore_rest (ldfile, `0`);
2504	break;
2505	}
2506
2507	mapidx = `1`;
2508	goto read_mapping;
2509
2510	case tok_map:
2511	/ Ignore the rest of the line if we don't need the input of*
2512	this line. /*
2513	if (ignore_content)
2514	{
2515	lr_ignore_rest (ldfile, `0`);
2516	break;
2517	}
2518
2519	/ We simply forget the `map' keyword and use the following*
2520	operand to determine the mapping. /*
2521	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522	if (now->tok == tok_ident \|\| now->tok == tok_string)
2523	{
2524	size_t cnt;
2525
2526	for (cnt = `2`; cnt < ctype->map_collection_nr; ++cnt)
2527	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2528	break;
2529
2530	if (cnt < ctype->map_collection_nr)
2531	free (now->val.str.startmb);
2532	else
2533	/ OK, it's a new map. /
2534	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2535
2536	mapidx = cnt;
2537	}
2538	else if (now->tok < tok_toupper \|\| now->tok > tok_tolower)
2539	goto err_label;
2540	else
2541	mapidx = now->tok - tok_toupper;
2542
2543	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544	/ This better should be a semicolon. /
2545	if (now->tok != tok_semicolon)
2546	goto err_label;
2547
2548	read_mapping:
2549	/ Test whether this mapping was already defined. /
2550	if (ctype->tomap_done[mapidx])
2551	{
2552	lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2553	ctype->mapnames[mapidx]);
2554	lr_ignore_rest (ldfile, `0`);
2555	break;
2556	}
2557	ctype->tomap_done[mapidx] = `1`;
2558
2559	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2560	while (now->tok != tok_eol && now->tok != tok_eof)
2561	{
2562	struct charseq *from_seq;
2563	uint32_t from_wch;
2564	struct charseq *to_seq;
2565	uint32_t to_wch;
2566
2567	/ Every pair starts with an opening brace. /
2568	if (now->tok != tok_open_brace)
2569	goto err_label;
2570
2571	/ Next comes the from-value. /
2572	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573	if (get_character (now, charmap, repertoire, &from_seq,
2574	&from_wch) != `0`)
2575	goto err_label;
2576
2577	/ The next is a comma. /
2578	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2579	if (now->tok != tok_comma)
2580	goto err_label;
2581
2582	/ And the other value. /
2583	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584	if (get_character (now, charmap, repertoire, &to_seq,
2585	&to_wch) != `0`)
2586	goto err_label;
2587
2588	/ And the last thing is the closing brace. /
2589	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590	if (now->tok != tok_close_brace)
2591	goto err_label;
2592
2593	if (!ignore_content)
2594	{
2595	/ Check whether the mapping converts from an ASCII value*
2596	to a non-ASCII value. /*
2597	if (from_seq != NULL && from_seq->nbytes == `1`
2598	&& isascii (from_seq->bytes[`0`])
2599	&& to_seq != NULL && (to_seq->nbytes != `1`
2600	\|\| !isascii (to_seq->bytes[`0`])))
2601	ctype->to_nonascii = `1`;
2602
2603	if (mapidx < `2` && from_seq != NULL && to_seq != NULL
2604	&& from_seq->nbytes == `1` && to_seq->nbytes == `1`)
2605	/ We can use this value. /
2606	ctype->map256_collection[mapidx][from_seq->bytes[`0`]]
2607	= to_seq->bytes[`0`];
2608
2609	if (from_wch != ILLEGAL_CHAR_VALUE
2610	&& to_wch != ILLEGAL_CHAR_VALUE)
2611	/ Both correct values. /
2612	*find_idx (ctype, &ctype->map_collection[mapidx],
2613	&ctype->map_collection_max[mapidx],
2614	&ctype->map_collection_act[mapidx],
2615	from_wch) = to_wch;
2616	}
2617
2618	/ Now comes a semicolon or the end of the line/file. /
2619	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2620	if (now->tok == tok_semicolon)
2621	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2622	}
2623	break;
2624
2625	case tok_translit_start:
2626	/ Ignore the entire translit section with its peculiar syntax*
2627	if we don't need the input. /*
2628	if (ignore_content)
2629	{
2630	do
2631	{
2632	lr_ignore_rest (ldfile, `0`);
2633	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2634	}
2635	while (now->tok != tok_translit_end && now->tok != tok_eof);
2636
2637	if (now->tok == tok_eof)
2638	lr_error (ldfile, _(\
2639	"%s: `translit_start' section does not end with `translit_end'"),
2640	"LC_CTYPE");
2641
2642	break;
2643	}
2644
2645	/ The rest of the line better should be empty. /
2646	lr_ignore_rest (ldfile, `1`);
2647
2648	/ We count here the number of allocated entries in the `translit'*
2649	array. /*
2650	cnt = `0`;
2651
2652	ldfile->translate_strings = `1`;
2653	ldfile->return_widestr = `1`;
2654
2655	/ We proceed until we see the `translit_end' token. /
2656	while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2657	now->tok != tok_translit_end && now->tok != tok_eof)
2658	{
2659	if (now->tok == tok_eol)
2660	/ Ignore empty lines. /
2661	continue;
2662
2663	if (now->tok == tok_include)
2664	{
2665	/ We have to include locale. /
2666	const char *locale_name;
2667	const char *repertoire_name;
2668	struct translit_include_t include_stmt, *include_ptr;
2669
2670	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2671	/ This should be a string or an identifier. In any*
2672	case something to name a locale. /*
2673	if (now->tok != tok_string && now->tok != tok_ident)
2674	{
2675	translit_syntax:
2676	lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2677	lr_ignore_rest (ldfile, `0`);
2678	continue;
2679	}
2680	locale_name = now->val.str.startmb;
2681
2682	/ Next should be a semicolon. /
2683	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684	if (now->tok != tok_semicolon)
2685	goto translit_syntax;
2686
2687	/ Now the repertoire name. /
2688	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2689	if ((now->tok != tok_string && now->tok != tok_ident)
2690	\|\| now->val.str.startmb == NULL)
2691	goto translit_syntax;
2692	repertoire_name = now->val.str.startmb;
2693	if (repertoire_name[`0`] == `'\0'`)
2694	/ Ignore the empty string. /
2695	repertoire_name = NULL;
2696
2697	/ Save the include statement for later processing. /
2698	include_stmt = (struct translit_include_t *)
2699	xmalloc (sizeof (struct translit_include_t));
2700	include_stmt->copy_locale = locale_name;
2701	include_stmt->copy_repertoire = repertoire_name;
2702	include_stmt->next = NULL;
2703
2704	include_ptr = &ctype->translit_include;
2705	while (*include_ptr != NULL)
2706	include_ptr = &(*include_ptr)->next;
2707	*include_ptr = include_stmt;
2708
2709	/ The rest of the line must be empty. /
2710	lr_ignore_rest (ldfile, `1`);
2711
2712	/ Make sure the locale is read. /
2713	add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2714	`1`, NULL);
2715	continue;
2716	}
2717	else if (now->tok == tok_default_missing)
2718	{
2719	uint32_t *wstr;
2720
2721	while (`1`)
2722	{
2723	/ We expect a single character or string as the*
2724	argument. /*
2725	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726	wstr = read_widestring (ldfile, now, charmap,
2727	repertoire);
2728
2729	if (wstr != NULL)
2730	{
2731	if (ctype->default_missing != NULL)
2732	{
2733	lr_error (ldfile, _("\
2734	%s: duplicate `default_missing' definition"), "LC_CTYPE");
2735	record_error_at_line (`0`, `0`,
2736	ctype->default_missing_file,
2737	ctype->default_missing_lineno,
2738	_("\
2739	previous definition was here"));
2740	}
2741	else
2742	{
2743	ctype->default_missing = wstr;
2744	ctype->default_missing_file = ldfile->fname;
2745	ctype->default_missing_lineno = ldfile->lineno;
2746	}
2747	/ We can have more entries, ignore them. /
2748	lr_ignore_rest (ldfile, `0`);
2749	break;
2750	}
2751	else if (wstr == (uint32_t *) -`1l`)
2752	/ This was an syntax error. /
2753	break;
2754
2755	/ Maybe there is another replacement we can use. /
2756	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2758	{
2759	/ Nothing found. We tell the user. /
2760	lr_error (ldfile, _("\
2761	%s: no representable `default_missing' definition found"), "LC_CTYPE");
2762	break;
2763	}
2764	if (now->tok != tok_semicolon)
2765	goto translit_syntax;
2766	}
2767
2768	continue;
2769	}
2770	else if (now->tok == tok_translit_ignore)
2771	{
2772	read_translit_ignore_entry (ldfile, ctype, charmap,
2773	repertoire);
2774	continue;
2775	}
2776
2777	read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2778	}
2779	ldfile->return_widestr = `0`;
2780
2781	if (now->tok == tok_eof)
2782	lr_error (ldfile, _(\
2783	"%s: `translit_start' section does not end with `translit_end'"),
2784	"LC_CTYPE");
2785
2786	break;
2787
2788	case tok_ident:
2789	/ Ignore the rest of the line if we don't need the input of*
2790	this line. /*
2791	if (ignore_content)
2792	{
2793	lr_ignore_rest (ldfile, `0`);
2794	break;
2795	}
2796
2797	/ This could mean one of several things. First test whether*
2798	it's a character class name. /*
2799	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2800	if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == `0`)
2801	break;
2802	if (cnt < ctype->nr_charclass)
2803	{
2804	class_bit = _ISwbit (cnt);
2805	class256_bit = cnt <= `11` ? _ISbit (cnt) : `0`;
2806	free (now->val.str.startmb);
2807	goto read_charclass;
2808	}
2809	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
2810	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2811	break;
2812	if (cnt < ctype->map_collection_nr)
2813	{
2814	mapidx = cnt;
2815	free (now->val.str.startmb);
2816	goto read_mapping;
2817	}
2818	break;
2819
2820	case tok_end:
2821	/ Next we assume `LC_CTYPE'. /
2822	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823	if (now->tok == tok_eof)
2824	break;
2825	if (now->tok == tok_eol)
2826	lr_error (ldfile, _("%s: incomplete `END' line"),
2827	"LC_CTYPE");
2828	else if (now->tok != tok_lc_ctype)
2829	lr_error (ldfile, _("\
2830	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831	lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2832	return;
2833
2834	default:
2835	err_label:
2836	if (now->tok != tok_eof)
2837	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2838	}
2839
2840	/ Prepare for the next round. /
2841	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2842	nowtok = now->tok;
2843	}
2844
2845	/ When we come here we reached the end of the file. /
2846	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2847	}
2848
2849
2850	/ Subroutine of set_class_defaults, below. /
2851	static void
2852	set_one_default (struct locale_ctype_t *ctype,
2853	const struct charmap_t *charmap,
2854	int bitpos, int from, int to)
2855	{
2856	char tmp[`2`];
2857	int ch;
2858	int bit = _ISbit (bitpos);
2859	int bitw = _ISwbit (bitpos);
2860	/ Define string. /
2861	strcpy (tmp, "?");
2862
2863	for (ch = from; ch <= to; ++ch)
2864	{
2865	struct charseq *seq;
2866	tmp[`0`] = ch;
2867
2868	seq = charmap_find_value (charmap, tmp, `1`);
2869	if (seq == NULL)
2870	{
2871	char buf[`10`];
2872	sprintf (buf, "U%08X", ch);
2873	seq = charmap_find_value (charmap, buf, `9`);
2874	}
2875	if (seq == NULL)
2876	{
2877	record_error (`0`, `0`, _("\
2878	%s: character `%s' not defined while needed as default value"),
2879	"LC_CTYPE", tmp);
2880	}
2881	else if (seq->nbytes != `1`)
2882	record_error (`0`, `0`, _("\
2883	%s: character `%s' in charmap not representable with one byte"),
2884	"LC_CTYPE", tmp);
2885	else
2886	ctype->class256_collection[seq->bytes[`0`]] \|= bit;
2887
2888	/ No need to search here, the ASCII value is also the Unicode*
2889	value. /*
2890	ELEM (ctype, class_collection, , ch) \|= bitw;
2891	}
2892	}
2893
2894	static void
2895	set_class_defaults (struct locale_ctype_t *ctype,
2896	const struct charmap_t *charmap,
2897	struct repertoire_t *repertoire)
2898	{
2899	#define set_default(bitpos, from, to) \
2900	set_one_default (ctype, charmap, bitpos, from, to)
2901
2902	/ These function defines the default values for the classes and conversions*
2903	according to POSIX.2 2.5.2.1.
2904	It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905	Don't move them unless you know what you do! /*
2906
2907	/ Set default values if keyword was not present. /
2908	if ((ctype->class_done & BITw (tok_upper)) == `0`)
2909	/ "If this keyword [lower] is not specified, the lowercase letters*
2910	`A' through `Z', ..., shall automatically belong to this class,
2911	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2912	set_default (BITPOS (tok_upper), `'A'`, `'Z'`);
2913
2914	if ((ctype->class_done & BITw (tok_lower)) == `0`)
2915	/ "If this keyword [lower] is not specified, the lowercase letters*
2916	`a' through `z', ..., shall automatically belong to this class,
2917	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2918	set_default (BITPOS (tok_lower), `'a'`, `'z'`);
2919
2920	if ((ctype->class_done & BITw (tok_alpha)) == `0`)
2921	{
2922	/ Table 2-6 in P1003.2 says that characters in class `upper' or*
2923	class `lower' must* be in class `alpha'. /
2924	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower);
2925	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower);
2926
2927	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2928	if ((ctype->class256_collection[cnt] & mask) != `0`)
2929	ctype->class256_collection[cnt] \|= BIT (tok_alpha);
2930
2931	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2932	if ((ctype->class_collection[cnt] & maskw) != `0`)
2933	ctype->class_collection[cnt] \|= BITw (tok_alpha);
2934	}
2935
2936	if ((ctype->class_done & BITw (tok_digit)) == `0`)
2937	/ "If this keyword [digit] is not specified, the digits `0' through*
2938	`9', ..., shall automatically belong to this class, with
2939	implementation-defined character values." [P1003.2, 2.5.2.1] /*
2940	set_default (BITPOS (tok_digit), `'0'`, `'9'`);
2941
2942	/ "Only characters specified for the `alpha' and `digit' keyword*
2943	shall be specified. Characters specified for the keyword `alpha'
2944	and `digit' are automatically included in this class. /*
2945	{
2946	unsigned long int mask = BIT (tok_alpha) \| BIT (tok_digit);
2947	unsigned long int maskw = BITw (tok_alpha) \| BITw (tok_digit);
2948
2949	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2950	if ((ctype->class256_collection[cnt] & mask) != `0`)
2951	ctype->class256_collection[cnt] \|= BIT (tok_alnum);
2952
2953	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2954	if ((ctype->class_collection[cnt] & maskw) != `0`)
2955	ctype->class_collection[cnt] \|= BITw (tok_alnum);
2956	}
2957
2958	if ((ctype->class_done & BITw (tok_space)) == `0`)
2959	/ "If this keyword [space] is not specified, the characters <space>,*
2960	<form-feed>, <newline>, <carriage-return>, <tab>, and
2961	<vertical-tab>, ..., shall automatically belong to this class,
2962	with implementation-defined character values." [P1003.2, 2.5.2.1] /*
2963	{
2964	struct charseq *seq;
2965
2966	seq = charmap_find_value (charmap, "space", `5`);
2967	if (seq == NULL)
2968	seq = charmap_find_value (charmap, "SP", `2`);
2969	if (seq == NULL)
2970	seq = charmap_find_value (charmap, "U00000020", `9`);
2971	if (seq == NULL)
2972	{
2973	record_error (`0`, `0`, _("\
2974	%s: character `%s' not defined while needed as default value"),
2975	"LC_CTYPE", "<space>");
2976	}
2977	else if (seq->nbytes != `1`)
2978	record_error (`0`, `0`, _("\
2979	%s: character `%s' in charmap not representable with one byte"),
2980	"LC_CTYPE", "<space>");
2981	else
2982	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
2983
2984	/ No need to search. /
2985	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_space);
2986
2987	seq = charmap_find_value (charmap, "form-feed", `9`);
2988	if (seq == NULL)
2989	seq = charmap_find_value (charmap, "U0000000C", `9`);
2990	if (seq == NULL)
2991	{
2992	record_error (`0`, `0`, _("\
2993	%s: character `%s' not defined while needed as default value"),
2994	"LC_CTYPE", "<form-feed>");
2995	}
2996	else if (seq->nbytes != `1`)
2997	record_error (`0`, `0`, _("\
2998	%s: character `%s' in charmap not representable with one byte"),
2999	"LC_CTYPE", "<form-feed>");
3000	else
3001	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3002
3003	/ No need to search. /
3004	ELEM (ctype, class_collection, , L`'\f'`) \|= BITw (tok_space);
3005
3006
3007	seq = charmap_find_value (charmap, "newline", `7`);
3008	if (seq == NULL)
3009	seq = charmap_find_value (charmap, "U0000000A", `9`);
3010	if (seq == NULL)
3011	{
3012	record_error (`0`, `0`, _("\
3013	%s: character `%s' not defined while needed as default value"),
3014	"LC_CTYPE", "<newline>");
3015	}
3016	else if (seq->nbytes != `1`)
3017	record_error (`0`, `0`, _("\
3018	%s: character `%s' in charmap not representable with one byte"),
3019	"LC_CTYPE", "<newline>");
3020	else
3021	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3022
3023	/ No need to search. /
3024	ELEM (ctype, class_collection, , L`'\n'`) \|= BITw (tok_space);
3025
3026
3027	seq = charmap_find_value (charmap, "carriage-return", `15`);
3028	if (seq == NULL)
3029	seq = charmap_find_value (charmap, "U0000000D", `9`);
3030	if (seq == NULL)
3031	{
3032	record_error (`0`, `0`, _("\
3033	%s: character `%s' not defined while needed as default value"),
3034	"LC_CTYPE", "<carriage-return>");
3035	}
3036	else if (seq->nbytes != `1`)
3037	record_error (`0`, `0`, _("\
3038	%s: character `%s' in charmap not representable with one byte"),
3039	"LC_CTYPE", "<carriage-return>");
3040	else
3041	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3042
3043	/ No need to search. /
3044	ELEM (ctype, class_collection, , L`'\r'`) \|= BITw (tok_space);
3045
3046
3047	seq = charmap_find_value (charmap, "tab", `3`);
3048	if (seq == NULL)
3049	seq = charmap_find_value (charmap, "U00000009", `9`);
3050	if (seq == NULL)
3051	{
3052	record_error (`0`, `0`, _("\
3053	%s: character `%s' not defined while needed as default value"),
3054	"LC_CTYPE", "<tab>");
3055	}
3056	else if (seq->nbytes != `1`)
3057	record_error (`0`, `0`, _("\
3058	%s: character `%s' in charmap not representable with one byte"),
3059	"LC_CTYPE", "<tab>");
3060	else
3061	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3062
3063	/ No need to search. /
3064	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_space);
3065
3066
3067	seq = charmap_find_value (charmap, "vertical-tab", `12`);
3068	if (seq == NULL)
3069	seq = charmap_find_value (charmap, "U0000000B", `9`);
3070	if (seq == NULL)
3071	{
3072	record_error (`0`, `0`, _("\
3073	%s: character `%s' not defined while needed as default value"),
3074	"LC_CTYPE", "<vertical-tab>");
3075	}
3076	else if (seq->nbytes != `1`)
3077	record_error (`0`, `0`, _("\
3078	%s: character `%s' in charmap not representable with one byte"),
3079	"LC_CTYPE", "<vertical-tab>");
3080	else
3081	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3082
3083	/ No need to search. /
3084	ELEM (ctype, class_collection, , L`'\v'`) \|= BITw (tok_space);
3085	}
3086
3087	if ((ctype->class_done & BITw (tok_xdigit)) == `0`)
3088	/ "If this keyword is not specified, the digits `0' to `9', the*
3089	uppercase letters `A' through `F', and the lowercase letters `a'
3090	through `f', ..., shell automatically belong to this class, with
3091	implementation defined character values." [P1003.2, 2.5.2.1] /*
3092	{
3093	set_default (BITPOS (tok_xdigit), `'0'`, `'9'`);
3094	set_default (BITPOS (tok_xdigit), `'A'`, `'F'`);
3095	set_default (BITPOS (tok_xdigit), `'a'`, `'f'`);
3096	}
3097
3098	if ((ctype->class_done & BITw (tok_blank)) == `0`)
3099	/ "If this keyword [blank] is unspecified, the characters <space> and*
3100	<tab> shall belong to this character class." [P1003.2, 2.5.2.1] /*
3101	{
3102	struct charseq *seq;
3103
3104	seq = charmap_find_value (charmap, "space", `5`);
3105	if (seq == NULL)
3106	seq = charmap_find_value (charmap, "SP", `2`);
3107	if (seq == NULL)
3108	seq = charmap_find_value (charmap, "U00000020", `9`);
3109	if (seq == NULL)
3110	{
3111	record_error (`0`, `0`, _("\
3112	%s: character `%s' not defined while needed as default value"),
3113	"LC_CTYPE", "<space>");
3114	}
3115	else if (seq->nbytes != `1`)
3116	record_error (`0`, `0`, _("\
3117	%s: character `%s' in charmap not representable with one byte"),
3118	"LC_CTYPE", "<space>");
3119	else
3120	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3121
3122	/ No need to search. /
3123	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_blank);
3124
3125
3126	seq = charmap_find_value (charmap, "tab", `3`);
3127	if (seq == NULL)
3128	seq = charmap_find_value (charmap, "U00000009", `9`);
3129	if (seq == NULL)
3130	{
3131	record_error (`0`, `0`, _("\
3132	%s: character `%s' not defined while needed as default value"),
3133	"LC_CTYPE", "<tab>");
3134	}
3135	else if (seq->nbytes != `1`)
3136	record_error (`0`, `0`, _("\
3137	%s: character `%s' in charmap not representable with one byte"),
3138	"LC_CTYPE", "<tab>");
3139	else
3140	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3141
3142	/ No need to search. /
3143	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_blank);
3144	}
3145
3146	if ((ctype->class_done & BITw (tok_graph)) == `0`)
3147	/ "If this keyword [graph] is not specified, characters specified for*
3148	the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149	shall belong to this character class." [P1003.2, 2.5.2.1] /*
3150	{
3151	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower) \|
3152	BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit) \| BIT (tok_punct);
3153	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower) \|
3154	BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit) \|
3155	BITw (tok_punct);
3156
3157	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3158	if ((ctype->class_collection[cnt] & maskw) != `0`)
3159	ctype->class_collection[cnt] \|= BITw (tok_graph);
3160
3161	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3162	if ((ctype->class256_collection[cnt] & mask) != `0`)
3163	ctype->class256_collection[cnt] \|= BIT (tok_graph);
3164	}
3165
3166	if ((ctype->class_done & BITw (tok_print)) == `0`)
3167	/ "If this keyword [print] is not provided, characters specified for*
3168	the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3169	and the <space> character shall belong to this character class."
3170	[P1003.2, 2.5.2.1] /*
3171	{
3172	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower) \|
3173	BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit) \| BIT (tok_punct);
3174	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower) \|
3175	BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit) \|
3176	BITw (tok_punct);
3177	struct charseq *seq;
3178
3179	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3180	if ((ctype->class_collection[cnt] & maskw) != `0`)
3181	ctype->class_collection[cnt] \|= BITw (tok_print);
3182
3183	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3184	if ((ctype->class256_collection[cnt] & mask) != `0`)
3185	ctype->class256_collection[cnt] \|= BIT (tok_print);
3186
3187
3188	seq = charmap_find_value (charmap, "space", `5`);
3189	if (seq == NULL)
3190	seq = charmap_find_value (charmap, "SP", `2`);
3191	if (seq == NULL)
3192	seq = charmap_find_value (charmap, "U00000020", `9`);
3193	if (seq == NULL)
3194	{
3195	record_error (`0`, `0`, _("\
3196	%s: character `%s' not defined while needed as default value"),
3197	"LC_CTYPE", "<space>");
3198	}
3199	else if (seq->nbytes != `1`)
3200	record_error (`0`, `0`, _("\
3201	%s: character `%s' in charmap not representable with one byte"),
3202	"LC_CTYPE", "<space>");
3203	else
3204	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_print);
3205
3206	/ No need to search. /
3207	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_print);
3208	}
3209
3210	if (ctype->tomap_done[`0`] == `0`)
3211	/ "If this keyword [toupper] is not specified, the lowercase letters*
3212	`a' through `z', and their corresponding uppercase letters `A' to
3213	`Z', ..., shall automatically be included, with implementation-
3214	defined character values." [P1003.2, 2.5.2.1] /*
3215	{
3216	char tmp[`4`];
3217	int ch;
3218
3219	strcpy (tmp, "<?>");
3220
3221	for (ch = `'a'`; ch <= `'z'`; ++ch)
3222	{
3223	struct charseq seq_from, seq_to;
3224
3225	tmp[`1`] = (char) ch;
3226
3227	seq_from = charmap_find_value (charmap, &tmp[`1`], `1`);
3228	if (seq_from == NULL)
3229	{
3230	char buf[`10`];
3231	sprintf (buf, "U%08X", ch);
3232	seq_from = charmap_find_value (charmap, buf, `9`);
3233	}
3234	if (seq_from == NULL)
3235	{
3236	record_error (`0`, `0`, _("\
3237	%s: character `%s' not defined while needed as default value"),
3238	"LC_CTYPE", tmp);
3239	}
3240	else if (seq_from->nbytes != `1`)
3241	{
3242	record_error (`0`, `0`, _("\
3243	%s: character `%s' needed as default value not representable with one byte"),
3244	"LC_CTYPE", tmp);
3245	}
3246	else
3247	{
3248	/ This conversion is implementation defined. /
3249	tmp[`1`] = (char) (ch + (`'A'` - `'a'`));
3250	seq_to = charmap_find_value (charmap, &tmp[`1`], `1`);
3251	if (seq_to == NULL)
3252	{
3253	char buf[`10`];
3254	sprintf (buf, "U%08X", ch + (`'A'` - `'a'`));
3255	seq_to = charmap_find_value (charmap, buf, `9`);
3256	}
3257	if (seq_to == NULL)
3258	{
3259	record_error (`0`, `0`, _("\
3260	%s: character `%s' not defined while needed as default value"),
3261	"LC_CTYPE", tmp);
3262	}
3263	else if (seq_to->nbytes != `1`)
3264	{
3265	record_error (`0`, `0`, _("\
3266	%s: character `%s' needed as default value not representable with one byte"),
3267	"LC_CTYPE", tmp);
3268	}
3269	else
3270	/ The index [0] is determined by the order of the*
3271	`ctype_map_newP' calls in `ctype_startup'. /*
3272	ctype->map256_collection[`0`][seq_from->bytes[`0`]]
3273	= seq_to->bytes[`0`];
3274	}
3275
3276	/ No need to search. /
3277	ELEM (ctype, map_collection, [`0`], ch) = ch + (`'A'` - `'a'`);
3278	}
3279	}
3280
3281	if (ctype->tomap_done[`1`] == `0`)
3282	/ "If this keyword [tolower] is not specified, the mapping shall be*
3283	the reverse mapping of the one specified to `toupper'." [P1003.2] /*
3284	{
3285	for (size_t cnt = `0`; cnt < ctype->map_collection_act[`0`]; ++cnt)
3286	if (ctype->map_collection[`0`][cnt] != `0`)
3287	ELEM (ctype, map_collection, [`1`],
3288	ctype->map_collection[`0`][cnt])
3289	= ctype->charnames[cnt];
3290
3291	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3292	if (ctype->map256_collection[`0`][cnt] != `0`)
3293	ctype->map256_collection[`1`][ctype->map256_collection[`0`][cnt]] = cnt;
3294	}
3295
3296	if (ctype->outdigits_act != `10`)
3297	{
3298	if (ctype->outdigits_act != `0`)
3299	record_error (`0`, `0`, _("\
3300	%s: field `%s' does not contain exactly ten entries"),
3301	"LC_CTYPE", "outdigit");
3302
3303	for (size_t cnt = ctype->outdigits_act; cnt < `10`; ++cnt)
3304	{
3305	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3306	(char *) digits + cnt,
3307	`1`);
3308
3309	if (ctype->mboutdigits[cnt] == NULL)
3310	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3311	longnames[cnt],
3312	strlen (longnames[cnt]));
3313
3314	if (ctype->mboutdigits[cnt] == NULL)
3315	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3316	uninames[cnt], `9`);
3317
3318	if (ctype->mboutdigits[cnt] == NULL)
3319	{
3320	/ Provide a replacement. /
3321	record_error (`0`, `0`, _("\
3322	no output digits defined and none of the standard names in the charmap"));
3323
3324	ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3325	sizeof (struct charseq)
3326	+ `1`);
3327
3328	/ This is better than nothing. /
3329	ctype->mboutdigits[cnt]->bytes[`0`] = digits[cnt];
3330	ctype->mboutdigits[cnt]->nbytes = `1`;
3331	}
3332
3333	ctype->wcoutdigits[cnt] = L`'0'` + cnt;
3334	}
3335
3336	ctype->outdigits_act = `10`;
3337	}
3338
3339	#undef set_default
3340	}
3341
3342
3343	/ Initialize. Assumes t->p and t->q have already been set. /
3344	static inline void
3345	wctype_table_init (struct wctype_table *t)
3346	{
3347	t->level1 = NULL;
3348	t->level1_alloc = t->level1_size = `0`;
3349	t->level2 = NULL;
3350	t->level2_alloc = t->level2_size = `0`;
3351	t->level3 = NULL;
3352	t->level3_alloc = t->level3_size = `0`;
3353	}
3354
3355	/ Retrieve an entry. /
3356	static inline int
3357	wctype_table_get (struct wctype_table *t, uint32_t wc)
3358	{
3359	uint32_t index1 = wc >> (t->q + t->p + `5`);
3360	if (index1 < t->level1_size)
3361	{
3362	uint32_t lookup1 = t->level1[index1];
3363	if (lookup1 != EMPTY)
3364	{
3365	uint32_t index2 = ((wc >> (t->p + `5`)) & ((`1` << t->q) - `1`))
3366	+ (lookup1 << t->q);
3367	uint32_t lookup2 = t->level2[index2];
3368	if (lookup2 != EMPTY)
3369	{
3370	uint32_t index3 = ((wc >> `5`) & ((`1` << t->p) - `1`))
3371	+ (lookup2 << t->p);
3372	uint32_t lookup3 = t->level3[index3];
3373	uint32_t index4 = wc & `0x1f`;
3374
3375	return (lookup3 >> index4) & `1`;
3376	}
3377	}
3378	}
3379	return `0`;
3380	}
3381
3382	/ Add one entry. /
3383	static void
3384	wctype_table_add (struct wctype_table *t, uint32_t wc)
3385	{
3386	uint32_t index1 = wc >> (t->q + t->p + `5`);
3387	uint32_t index2 = (wc >> (t->p + `5`)) & ((`1` << t->q) - `1`);
3388	uint32_t index3 = (wc >> `5`) & ((`1` << t->p) - `1`);
3389	uint32_t index4 = wc & `0x1f`;
3390	size_t i, i1, i2;
3391
3392	if (index1 >= t->level1_size)
3393	{
3394	if (index1 >= t->level1_alloc)
3395	{
3396	size_t alloc = `2` * t->level1_alloc;
3397	if (alloc <= index1)
3398	alloc = index1 + `1`;
3399	t->level1 = (uint32_t ) xrealloc ((char* *) t->level1,
3400	alloc * sizeof (uint32_t));
3401	t->level1_alloc = alloc;
3402	}
3403	while (index1 >= t->level1_size)
3404	t->level1[t->level1_size++] = EMPTY;
3405	}
3406
3407	if (t->level1[index1] == EMPTY)
3408	{
3409	if (t->level2_size == t->level2_alloc)
3410	{
3411	size_t alloc = `2` * t->level2_alloc + `1`;
3412	t->level2 = (uint32_t ) xrealloc ((char* *) t->level2,
3413	(alloc << t->q) * sizeof (uint32_t));
3414	t->level2_alloc = alloc;
3415	}
3416	i1 = t->level2_size << t->q;
3417	i2 = (t->level2_size + `1`) << t->q;
3418	for (i = i1; i < i2; i++)
3419	t->level2[i] = EMPTY;
3420	t->level1[index1] = t->level2_size++;
3421	}
3422
3423	index2 += t->level1[index1] << t->q;
3424
3425	if (t->level2[index2] == EMPTY)
3426	{
3427	if (t->level3_size == t->level3_alloc)
3428	{
3429	size_t alloc = `2` * t->level3_alloc + `1`;
3430	t->level3 = (uint32_t ) xrealloc ((char* *) t->level3,
3431	(alloc << t->p) * sizeof (uint32_t));
3432	t->level3_alloc = alloc;
3433	}
3434	i1 = t->level3_size << t->p;
3435	i2 = (t->level3_size + `1`) << t->p;
3436	for (i = i1; i < i2; i++)
3437	t->level3[i] = `0`;
3438	t->level2[index2] = t->level3_size++;
3439	}
3440
3441	index3 += t->level2[index2] << t->p;
3442
3443	t->level3[index3] \|= (uint32_t)`1` << index4;
3444	}
3445
3446	/ Finalize and shrink. /
3447	static void
3448	add_locale_wctype_table (struct locale_file file, struct* wctype_table *t)
3449	{
3450	size_t i, j, k;
3451	uint32_t reorder3[t->level3_size];
3452	uint32_t reorder2[t->level2_size];
3453	uint32_t level2_offset, level3_offset;
3454
3455	/ Uniquify level3 blocks. /
3456	k = `0`;
3457	for (j = `0`; j < t->level3_size; j++)
3458	{
3459	for (i = `0`; i < k; i++)
3460	if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3461	(`1` << t->p) * sizeof (uint32_t)) == `0`)
3462	break;
3463	/ Relocate block j to block i. /
3464	reorder3[j] = i;
3465	if (i == k)
3466	{
3467	if (i != j)
3468	memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3469	(`1` << t->p) * sizeof (uint32_t));
3470	k++;
3471	}
3472	}
3473	t->level3_size = k;
3474
3475	for (i = `0`; i < (t->level2_size << t->q); i++)
3476	if (t->level2[i] != EMPTY)
3477	t->level2[i] = reorder3[t->level2[i]];
3478
3479	/ Uniquify level2 blocks. /
3480	k = `0`;
3481	for (j = `0`; j < t->level2_size; j++)
3482	{
3483	for (i = `0`; i < k; i++)
3484	if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3485	(`1` << t->q) * sizeof (uint32_t)) == `0`)
3486	break;
3487	/ Relocate block j to block i. /
3488	reorder2[j] = i;
3489	if (i == k)
3490	{
3491	if (i != j)
3492	memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3493	(`1` << t->q) * sizeof (uint32_t));
3494	k++;
3495	}
3496	}
3497	t->level2_size = k;
3498
3499	for (i = `0`; i < t->level1_size; i++)
3500	if (t->level1[i] != EMPTY)
3501	t->level1[i] = reorder2[t->level1[i]];
3502
3503	t->result_size =
3504	`5` * sizeof (uint32_t)
3505	+ t->level1_size * sizeof (uint32_t)
3506	+ (t->level2_size << t->q) * sizeof (uint32_t)
3507	+ (t->level3_size << t->p) * sizeof (uint32_t);
3508
3509	level2_offset =
3510	`5` * sizeof (uint32_t)
3511	+ t->level1_size * sizeof (uint32_t);
3512	level3_offset =
3513	`5` * sizeof (uint32_t)
3514	+ t->level1_size * sizeof (uint32_t)
3515	+ (t->level2_size << t->q) * sizeof (uint32_t);
3516
3517	start_locale_structure (file);
3518	add_locale_uint32 (file, t->q + t->p + `5`);
3519	add_locale_uint32 (file, t->level1_size);
3520	add_locale_uint32 (file, t->p + `5`);
3521	add_locale_uint32 (file, (`1` << t->q) - `1`);
3522	add_locale_uint32 (file, (`1` << t->p) - `1`);
3523
3524	for (i = `0`; i < t->level1_size; i++)
3525	add_locale_uint32
3526	(file,
3527	t->level1[i] == EMPTY
3528	? `0`
3529	: (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3530
3531	for (i = `0`; i < (t->level2_size << t->q); i++)
3532	add_locale_uint32
3533	(file,
3534	t->level2[i] == EMPTY
3535	? `0`
3536	: (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3537
3538	add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3539	end_locale_structure (file);
3540
3541	if (t->level1_alloc > `0`)
3542	free (t->level1);
3543	if (t->level2_alloc > `0`)
3544	free (t->level2);
3545	if (t->level3_alloc > `0`)
3546	free (t->level3);
3547	}
3548
3549	/ Flattens the included transliterations into a translit list.*
3550	Inserts them in the list at `cursor', and returns the new cursor. /*
3551	static struct translit_t **
3552	translit_flatten (struct locale_ctype_t *ctype,
3553	const struct charmap_t *charmap,
3554	struct translit_t **cursor)
3555	{
3556	while (ctype->translit_include != NULL)
3557	{
3558	const char *copy_locale = ctype->translit_include->copy_locale;
3559	const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3560	struct localedef_t *other;
3561
3562	/ Unchain the include statement. During the depth-first traversal*
3563	we don't want to visit any locale more than once. /*
3564	ctype->translit_include = ctype->translit_include->next;
3565
3566	other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3567
3568	if (other == NULL \|\| other->categories[LC_CTYPE].ctype == NULL)
3569	{
3570	record_error (`0`, `0`, _("\
3571	%s: transliteration data from locale `%s' not available"),
3572	"LC_CTYPE", copy_locale);
3573	}
3574	else
3575	{
3576	struct locale_ctype_t *other_ctype =
3577	other->categories[LC_CTYPE].ctype;
3578
3579	cursor = translit_flatten (other_ctype, charmap, cursor);
3580	assert (other_ctype->translit_include == NULL);
3581
3582	if (other_ctype->translit != NULL)
3583	{
3584	/ Insert the other_ctype->translit list at cursor. /*
3585	struct translit_t *endp = other_ctype->translit;
3586	while (endp->next != NULL)
3587	endp = endp->next;
3588
3589	endp->next = *cursor;
3590	*cursor = other_ctype->translit;
3591
3592	/ Avoid any risk of circular lists. /
3593	other_ctype->translit = NULL;
3594
3595	cursor = &endp->next;
3596	}
3597
3598	if (ctype->default_missing == NULL)
3599	ctype->default_missing = other_ctype->default_missing;
3600	}
3601	}
3602
3603	return cursor;
3604	}
3605
3606	static void
3607	allocate_arrays (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
3608	struct repertoire_t *repertoire)
3609	{
3610	size_t idx, nr;
3611	const void *key;
3612	size_t len;
3613	void *vdata;
3614	void *curs;
3615
3616	/ You wonder about this amount of memory? This is only because some*
3617	users do not manage to address the array with unsigned values or
3618	data types with range >= 256. '\200' would result in the array
3619	index -128. To help these poor people we duplicate the entries for
3620	128 up to 255 below the entry for \0. /*
3621	ctype->ctype_b = (char_class_t ) xcalloc (`256` + `128`, sizeof* (char_class_t));
3622	ctype->ctype32_b = (char_class32_t ) xcalloc (`256`, sizeof* (char_class32_t));
3623	ctype->class_b = (uint32_t **)
3624	xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3625	ctype->class_3level = (struct wctype_table *)
3626	xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3627
3628	/ This is the array accessed using the multibyte string elements. /
3629	for (idx = `0`; idx < `256`; ++idx)
3630	ctype->ctype_b[`128` + idx] = ctype->class256_collection[idx];
3631
3632	/ Mirror first 127 entries. We must take care that entry -1 is not*
3633	mirrored because EOF == -1. /*
3634	for (idx = `0`; idx < `127`; ++idx)
3635	ctype->ctype_b[idx] = ctype->ctype_b[`256` + idx];
3636
3637	/ The 32 bit array contains all characters < 0x100. /
3638	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3639	if (ctype->charnames[idx] < `0x100`)
3640	ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3641
3642	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3643	{
3644	ctype->class_b[nr] = (uint32_t ) xcalloc (`256` / `32`, sizeof* (uint32_t));
3645
3646	/ We only set CLASS_B for the bits in the ISO C classes, not*
3647	the user defined classes. The number should not change but
3648	who knows. /*
3649	#define LAST_ISO_C_BIT 11
3650	if (nr <= LAST_ISO_C_BIT)
3651	for (idx = `0`; idx < `256`; ++idx)
3652	if (ctype->class256_collection[idx] & _ISbit (nr))
3653	ctype->class_b[nr][idx >> `5`] \|= (uint32_t) `1` << (idx & `0x1f`);
3654	}
3655
3656	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3657	{
3658	struct wctype_table *t;
3659
3660	t = &ctype->class_3level[nr];
3661	t->p = `4`; / or: 5 /
3662	t->q = `7`; / or: 6 /
3663	wctype_table_init (t);
3664
3665	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3666	if (ctype->class_collection[idx] & _ISwbit (nr))
3667	wctype_table_add (t, ctype->charnames[idx]);
3668
3669	record_verbose (stderr, _("\
3670	%s: table for class \"%s\": %lu bytes"),
3671	"LC_CTYPE", ctype->classnames[nr],
3672	(unsigned long int) t->result_size);
3673	}
3674
3675	/ Room for table of mappings. /
3676	ctype->map_b = (uint32_t *) xmalloc (`2` sizeof (uint32_t *));
3677	ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3678	* sizeof (uint32_t *));
3679	ctype->map_3level = (struct wctrans_table *)
3680	xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3681
3682	/ Fill in all mappings. /
3683	for (idx = `0`; idx < `2`; ++idx)
3684	{
3685	unsigned int idx2;
3686
3687	/ Allocate table. /
3688	ctype->map_b[idx] = (uint32_t *)
3689	xmalloc ((`256` + `128`) * sizeof (uint32_t));
3690
3691	/ Copy values from collection. /
3692	for (idx2 = `0`; idx2 < `256`; ++idx2)
3693	ctype->map_b[idx][`128` + idx2] = ctype->map256_collection[idx][idx2];
3694
3695	/ Mirror first 127 entries. We must take care not to map entry*
3696	-1 because EOF == -1. /*
3697	for (idx2 = `0`; idx2 < `127`; ++idx2)
3698	ctype->map_b[idx][idx2] = ctype->map_b[idx][`256` + idx2];
3699
3700	/ EOF must map to EOF. /
3701	ctype->map_b[idx][`127`] = EOF;
3702	}
3703
3704	for (idx = `0`; idx < ctype->map_collection_nr; ++idx)
3705	{
3706	unsigned int idx2;
3707
3708	/ Allocate table. /
3709	ctype->map32_b[idx] = (uint32_t ) xmalloc (`256` sizeof (uint32_t));
3710
3711	/ Copy values from collection. Default is identity mapping. /
3712	for (idx2 = `0`; idx2 < `256`; ++idx2)
3713	ctype->map32_b[idx][idx2] =
3714	(ctype->map_collection[idx][idx2] != `0`
3715	? ctype->map_collection[idx][idx2]
3716	: idx2);
3717	}
3718
3719	for (nr = `0`; nr < ctype->map_collection_nr; nr++)
3720	{
3721	struct wctrans_table *t;
3722
3723	t = &ctype->map_3level[nr];
3724	t->p = `7`;
3725	t->q = `9`;
3726	wctrans_table_init (t);
3727
3728	for (idx = `0`; idx < ctype->map_collection_act[nr]; ++idx)
3729	if (ctype->map_collection[nr][idx] != `0`)
3730	wctrans_table_add (t, ctype->charnames[idx],
3731	ctype->map_collection[nr][idx]);
3732
3733	record_verbose (stderr, _("\
3734	%s: table for map \"%s\": %lu bytes"),
3735	"LC_CTYPE", ctype->mapnames[nr],
3736	(unsigned long int) t->result_size);
3737	}
3738
3739	/ Extra array for class and map names. /
3740	ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3741	* sizeof (uint32_t));
3742	ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3743	* sizeof (uint32_t));
3744
3745	ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3746	ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3747
3748	/ Array for width information. Because the expected widths are very*
3749	small (never larger than 2) we use only one single byte. This
3750	saves space.
3751	We put only printable characters in the table. wcwidth is specified
3752	to return -1 for non-printable characters. Doing the check here
3753	saves a run-time check.
3754	But we put L'\0' in the table. This again saves a run-time check. /*
3755	{
3756	struct wcwidth_table *t;
3757
3758	t = &ctype->width;
3759	t->p = `7`;
3760	t->q = `9`;
3761	wcwidth_table_init (t);
3762
3763	/ First set all the printable characters of the character set to*
3764	the default width. /*
3765	curs = NULL;
3766	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
3767	{
3768	struct charseq data = (struct* charseq *) vdata;
3769
3770	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3771	data->ucs4 = repertoire_find_value (ctype->repertoire,
3772	data->name, len);
3773
3774	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3775	{
3776	uint32_t *class_bits =
3777	find_idx (ctype, &ctype->class_collection, NULL,
3778	&ctype->class_collection_act, data->ucs4);
3779
3780	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3781	wcwidth_table_add (t, data->ucs4, charmap->width_default);
3782	}
3783	}
3784
3785	/ Now add the explicitly specified widths. /
3786	if (charmap->width_rules != NULL)
3787	for (size_t cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
3788	{
3789	unsigned char bytes[charmap->mb_cur_max];
3790	int nbytes = charmap->width_rules[cnt].from->nbytes;
3791
3792	/ We have the range of character for which the width is*
3793	specified described using byte sequences of the multibyte
3794	charset. We have to convert this to UCS4 now. And we
3795	cannot simply convert the beginning and the end of the
3796	sequence, we have to iterate over the byte sequence and
3797	convert it for every single character. /*
3798	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3799
3800	while (nbytes < charmap->width_rules[cnt].to->nbytes
3801	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3802	nbytes) <= `0`)
3803	{
3804	/ Find the UCS value for `bytes'. /
3805	int inner;
3806	uint32_t wch;
3807	struct charseq *seq =
3808	charmap_find_symbol (charmap, (char *) bytes, nbytes);
3809
3810	if (seq == NULL)
3811	wch = ILLEGAL_CHAR_VALUE;
3812	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3813	wch = seq->ucs4;
3814	else
3815	wch = repertoire_find_value (ctype->repertoire, seq->name,
3816	strlen (seq->name));
3817
3818	if (wch != ILLEGAL_CHAR_VALUE)
3819	{
3820	/ Store the value. /
3821	uint32_t *class_bits =
3822	find_idx (ctype, &ctype->class_collection, NULL,
3823	&ctype->class_collection_act, wch);
3824
3825	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3826	wcwidth_table_add (t, wch,
3827	charmap->width_rules[cnt].width);
3828	}
3829
3830	/ "Increment" the bytes sequence. /
3831	inner = nbytes - `1`;
3832	while (inner >= `0` && bytes[inner] == `0xff`)
3833	--inner;
3834
3835	if (inner < `0`)
3836	{
3837	/ We have to extend the byte sequence. /
3838	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3839	break;
3840
3841	bytes[`0`] = `1`;
3842	memset (&bytes[`1`], `0`, nbytes);
3843	++nbytes;
3844	}
3845	else
3846	{
3847	++bytes[inner];
3848	while (++inner < nbytes)
3849	bytes[inner] = `0`;
3850	}
3851	}
3852	}
3853
3854	/ Set the width of L'\0' to 0. /
3855	wcwidth_table_add (t, `0`, `0`);
3856
3857	record_verbose (stderr, _("%s: table for width: %lu bytes"),
3858	"LC_CTYPE", (unsigned long int) t->result_size);
3859	}
3860
3861	/ Set MB_CUR_MAX. /
3862	ctype->mb_cur_max = charmap->mb_cur_max;
3863
3864	/ Now determine the table for the transliteration information.*
3865
3866	XXX It is not yet clear to me whether it is worth implementing a
3867	complicated algorithm which uses a hash table to locate the entries.
3868	For now I'll use a simple array which can be searching using binary
3869	search. /*
3870	if (ctype->translit_include != NULL)
3871	/ Traverse the locales mentioned in the `include' statements in a*
3872	depth-first way and fold in their transliteration information. /*
3873	translit_flatten (ctype, charmap, &ctype->translit);
3874
3875	if (ctype->translit != NULL)
3876	{
3877	/ First count how many entries we have. This is the upper limit*
3878	since some entries from the included files might be overwritten. /*
3879	size_t number = `0`;
3880	struct translit_t *runp = ctype->translit;
3881	struct translit_t **sorted;
3882	size_t from_len, to_len;
3883
3884	while (runp != NULL)
3885	{
3886	++number;
3887	runp = runp->next;
3888	}
3889
3890	/ Next we allocate an array large enough and fill in the values. /
3891	sorted = (struct translit_t **) alloca (number
3892	* sizeof (struct translit_t **));
3893	runp = ctype->translit;
3894	number = `0`;
3895	do
3896	{
3897	/ Search for the place where to insert this string.*
3898	XXX Better use a real sorting algorithm later. /*
3899	size_t idx = `0`;
3900	int replace = `0`;
3901
3902	while (idx < number)
3903	{
3904	int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3905	(const wchar_t *) runp->from);
3906	if (res == `0`)
3907	{
3908	replace = `1`;
3909	break;
3910	}
3911	if (res > `0`)
3912	break;
3913	++idx;
3914	}
3915
3916	if (replace)
3917	sorted[idx] = runp;
3918	else
3919	{
3920	memmove (&sorted[idx + `1`], &sorted[idx],
3921	(number - idx) * sizeof (struct translit_t *));
3922	sorted[idx] = runp;
3923	++number;
3924	}
3925
3926	runp = runp->next;
3927	}
3928	while (runp != NULL);
3929
3930	/ The next step is putting all the possible transliteration*
3931	strings in one memory block so that we can write it out.
3932	We need several different blocks:
3933	- index to the from-string array
3934	- from-string array
3935	- index to the to-string array
3936	- to-string array.
3937	*/
3938	from_len = to_len = `0`;
3939	for (size_t cnt = `0`; cnt < number; ++cnt)
3940	{
3941	struct translit_to_t *srunp;
3942	from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3943	srunp = sorted[cnt]->to;
3944	while (srunp != NULL)
3945	{
3946	to_len += wcslen ((const wchar_t *) srunp->str) + `1`;
3947	srunp = srunp->next;
3948	}
3949	/ Plus one for the extra NUL character marking the end of*
3950	the list for the current entry. /*
3951	++to_len;
3952	}
3953
3954	/ We can allocate the arrays for the results. /
3955	ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3956	ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3957	ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3958	ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3959
3960	from_len = `0`;
3961	to_len = `0`;
3962	for (size_t cnt = `0`; cnt < number; ++cnt)
3963	{
3964	size_t len;
3965	struct translit_to_t *srunp;
3966
3967	ctype->translit_from_idx[cnt] = from_len;
3968	ctype->translit_to_idx[cnt] = to_len;
3969
3970	len = wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3971	wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3972	(const wchar_t *) sorted[cnt]->from, len);
3973	from_len += len;
3974
3975	ctype->translit_to_idx[cnt] = to_len;
3976	srunp = sorted[cnt]->to;
3977	while (srunp != NULL)
3978	{
3979	len = wcslen ((const wchar_t *) srunp->str) + `1`;
3980	wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3981	(const wchar_t *) srunp->str, len);
3982	to_len += len;
3983	srunp = srunp->next;
3984	}
3985	ctype->translit_to_tbl[to_len++] = L`'\0'`;
3986	}
3987
3988	/ Store the information about the length. /
3989	ctype->translit_idx_size = number;
3990	ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3991	ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3992	}
3993	else
3994	{
3995	ctype->translit_from_idx = no_str;
3996	ctype->translit_from_tbl = no_str;
3997	ctype->translit_to_tbl = no_str;
3998	ctype->translit_idx_size = `0`;
3999	ctype->translit_from_tbl_size = `0`;
4000	ctype->translit_to_tbl_size = `0`;
4001	}
4002	}
4003

Browse the source code of glibc/locale/programs/ld-ctype.c