ld-ctype.c source code [glibc/locale/programs/ld-ctype.c]

1	/ Copyright (C) 1995-2023 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published
6	by the Free Software Foundation; version 2 of the License, or
7	(at your option) any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
16
17	#ifdef HAVE_CONFIG_H
18	# include <config.h>
19	#endif
20
21	#include <alloca.h>
22	#include <byteswap.h>
23	#include <endian.h>
24	#include <errno.h>
25	#include <limits.h>
26	#include <obstack.h>
27	#include <stdlib.h>
28	#include <string.h>
29	#include <wchar.h>
30	#include <wctype.h>
31	#include <stdint.h>
32	#include <sys/uio.h>
33
34	#include "localedef.h"
35	#include "charmap.h"
36	#include "localeinfo.h"
37	#include "langinfo.h"
38	#include "linereader.h"
39	#include "locfile-token.h"
40	#include "locfile.h"
41
42	#include <assert.h>
43
44
45	/ The bit used for representing a special class. /
46	#define BITPOS(class) ((class) - tok_upper)
47	#define BIT(class) (_ISbit (BITPOS (class)))
48	#define BITw(class) (_ISwbit (BITPOS (class)))
49
50	#define ELEM(ctype, collection, idx, value) \
51	*find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
52	&ctype->collection##_act idx, value)
53
54
55	/ To be compatible with former implementations we for now restrict*
56	the number of bits for character classes to 16. When compatibility
57	is not necessary anymore increase the number to 32. /*
58	#define char_class_t uint16_t
59	#define char_class32_t uint32_t
60
61
62	/ Type to describe a transliteration action. We have a possibly*
63	multiple character from-string and a set of multiple character
64	to-strings. All are 32bit values since this is what is used in
65	the gconv functions. /*
66	struct translit_to_t
67	{
68	uint32_t *str;
69
70	struct translit_to_t *next;
71	};
72
73	struct translit_t
74	{
75	uint32_t *from;
76
77	const char *fname;
78	size_t lineno;
79
80	struct translit_to_t *to;
81
82	struct translit_t *next;
83	};
84
85	struct translit_ignore_t
86	{
87	uint32_t from;
88	uint32_t to;
89	uint32_t step;
90
91	const char *fname;
92	size_t lineno;
93
94	struct translit_ignore_t *next;
95	};
96
97
98	/ Type to describe a transliteration include statement. /
99	struct translit_include_t
100	{
101	const char *copy_locale;
102	const char *copy_repertoire;
103
104	struct translit_include_t *next;
105	};
106
107	/ Provide some dummy pointer for empty string. /
108	static uint32_t no_str[] = { `0` };
109
110
111	/ Sparse table of uint32_t. /
112	#define TABLE idx_table
113	#define ELEMENT uint32_t
114	#define DEFAULT ((uint32_t) ~0)
115	#define NO_ADD_LOCALE
116	#include "3level.h"
117
118	#define TABLE wcwidth_table
119	#define ELEMENT uint8_t
120	#define DEFAULT 0xff
121	#include "3level.h"
122
123	#define TABLE wctrans_table
124	#define ELEMENT int32_t
125	#define DEFAULT 0
126	#define wctrans_table_add wctrans_table_add_internal
127	#include "3level.h"
128	#undef wctrans_table_add
129	/ The wctrans_table must actually store the difference between the*
130	desired result and the argument. /*
131	static inline void
132	wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
133	{
134	wctrans_table_add_internal (t, wc, mapped_wc - wc);
135	}
136
137	/ Construction of sparse 3-level tables.*
138	See wchar-lookup.h for their structure and the meaning of p and q. /*
139
140	struct wctype_table
141	{
142	/ Parameters. /
143	unsigned int p;
144	unsigned int q;
145	/ Working representation. /
146	size_t level1_alloc;
147	size_t level1_size;
148	uint32_t *level1;
149	size_t level2_alloc;
150	size_t level2_size;
151	uint32_t *level2;
152	size_t level3_alloc;
153	size_t level3_size;
154	uint32_t *level3;
155	size_t result_size;
156	};
157
158	static void add_locale_wctype_table (struct locale_file *file,
159	struct wctype_table *t);
160
161	/ The real definition of the struct for the LC_CTYPE locale. /
162	struct locale_ctype_t
163	{
164	uint32_t *charnames;
165	size_t charnames_max;
166	size_t charnames_act;
167	/ An index lookup table, to speedup find_idx. /
168	struct idx_table charnames_idx;
169
170	struct repertoire_t *repertoire;
171
172	/ We will allow up to 8 * sizeof (uint32_t) character classes. /
173	#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
174	size_t nr_charclass;
175	const char *classnames[MAX_NR_CHARCLASS];
176	uint32_t last_class_char;
177	uint32_t class256_collection[`256`];
178	uint32_t *class_collection;
179	size_t class_collection_max;
180	size_t class_collection_act;
181	uint32_t class_done;
182	uint32_t class_offset;
183
184	struct charseq **mbdigits;
185	size_t mbdigits_act;
186	size_t mbdigits_max;
187	uint32_t *wcdigits;
188	size_t wcdigits_act;
189	size_t wcdigits_max;
190
191	struct charseq *mboutdigits[`10`];
192	uint32_t wcoutdigits[`10`];
193	size_t outdigits_act;
194
195	/ If the following number ever turns out to be too small simply*
196	increase it. But I doubt it will. --drepper@gnu /*
197	#define MAX_NR_CHARMAP 16
198	const char *mapnames[MAX_NR_CHARMAP];
199	uint32_t *map_collection[MAX_NR_CHARMAP];
200	uint32_t map256_collection[`2`][`256`];
201	size_t map_collection_max[MAX_NR_CHARMAP];
202	size_t map_collection_act[MAX_NR_CHARMAP];
203	size_t map_collection_nr;
204	size_t last_map_idx;
205	int tomap_done[MAX_NR_CHARMAP];
206	uint32_t map_offset;
207
208	/ Transliteration information. /
209	struct translit_include_t *translit_include;
210	struct translit_t *translit;
211	struct translit_ignore_t *translit_ignore;
212	uint32_t ntranslit_ignore;
213
214	uint32_t *default_missing;
215	const char *default_missing_file;
216	size_t default_missing_lineno;
217
218	uint32_t to_nonascii;
219	uint32_t nonascii_case;
220
221	/ The arrays for the binary representation. /
222	char_class_t *ctype_b;
223	char_class32_t *ctype32_b;
224	uint32_t **map_b;
225	uint32_t **map32_b;
226	uint32_t **class_b;
227	struct wctype_table *class_3level;
228	struct wctrans_table *map_3level;
229	uint32_t *class_name_ptr;
230	uint32_t *map_name_ptr;
231	struct wcwidth_table width;
232	uint32_t mb_cur_max;
233	const char *codeset_name;
234	uint32_t *translit_from_idx;
235	uint32_t *translit_from_tbl;
236	uint32_t *translit_to_idx;
237	uint32_t *translit_to_tbl;
238	uint32_t translit_idx_size;
239	size_t translit_from_tbl_size;
240	size_t translit_to_tbl_size;
241
242	struct obstack mempool;
243	};
244
245
246	/ Marker for an empty slot. This has the value 0xFFFFFFFF, regardless*
247	whether 'int' is 16 bit, 32 bit, or 64 bit. /*
248	#define EMPTY ((uint32_t) ~0)
249
250
251	#define obstack_chunk_alloc xmalloc
252	#define obstack_chunk_free free
253
254
255	/ Prototypes for local functions. /
256	static void ctype_startup (struct linereader lr, struct* localedef_t *locale,
257	const struct charmap_t *charmap,
258	struct localedef_t *copy_locale,
259	int ignore_content);
260	static void ctype_class_new (struct linereader *lr,
261	struct locale_ctype_t ctype, const* char *name);
262	static void ctype_map_new (struct linereader *lr,
263	struct locale_ctype_t *ctype,
264	const char name, const* struct charmap_t *charmap);
265	static uint32_t find_idx (struct* locale_ctype_t ctype, uint32_t *table,
266	size_t max, size_t act, uint32_t idx);
267	static void set_class_defaults (struct locale_ctype_t *ctype,
268	const struct charmap_t *charmap,
269	struct repertoire_t *repertoire);
270	static void allocate_arrays (struct locale_ctype_t *ctype,
271	const struct charmap_t *charmap,
272	struct repertoire_t *repertoire);
273
274
275	static const char *longnames[] =
276	{
277	"zero", "one", "two", "three", "four",
278	"five", "six", "seven", "eight", "nine"
279	};
280	static const char *uninames[] =
281	{
282	"U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
283	"U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
284	};
285	static const unsigned char digits[] = "0123456789";
286
287
288	static void
289	ctype_startup (struct linereader lr, struct* localedef_t *locale,
290	const struct charmap_t *charmap,
291	struct localedef_t copy_locale, int* ignore_content)
292	{
293	unsigned int cnt;
294	struct locale_ctype_t *ctype;
295
296	if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
297	{
298	if (copy_locale == NULL)
299	{
300	/ Allocate the needed room. /
301	locale->categories[LC_CTYPE].ctype = ctype =
302	(struct locale_ctype_t *) xcalloc (`1`,
303	sizeof (struct locale_ctype_t));
304
305	/ We have seen no names yet. /
306	ctype->charnames_max = charmap->mb_cur_max == `1` ? `256` : `512`;
307	ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
308	* sizeof (uint32_t));
309	for (cnt = `0`; cnt < `256`; ++cnt)
310	ctype->charnames[cnt] = cnt;
311	ctype->charnames_act = `256`;
312	idx_table_init (&ctype->charnames_idx);
313
314	/ Fill character class information. /
315	ctype->last_class_char = ILLEGAL_CHAR_VALUE;
316	/ The order of the following instructions determines the bit*
317	positions! /*
318	ctype_class_new (lr, ctype, "upper");
319	ctype_class_new (lr, ctype, "lower");
320	ctype_class_new (lr, ctype, "alpha");
321	ctype_class_new (lr, ctype, "digit");
322	ctype_class_new (lr, ctype, "xdigit");
323	ctype_class_new (lr, ctype, "space");
324	ctype_class_new (lr, ctype, "print");
325	ctype_class_new (lr, ctype, "graph");
326	ctype_class_new (lr, ctype, "blank");
327	ctype_class_new (lr, ctype, "cntrl");
328	ctype_class_new (lr, ctype, "punct");
329	ctype_class_new (lr, ctype, "alnum");
330
331	ctype->class_collection_max = charmap->mb_cur_max == `1` ? `256` : `512`;
332	ctype->class_collection
333	= (uint32_t ) xcalloc (sizeof* (unsigned long int),
334	ctype->class_collection_max);
335	ctype->class_collection_act = `256`;
336
337	/ Fill character map information. /
338	ctype->last_map_idx = MAX_NR_CHARMAP;
339	ctype_map_new (lr, ctype, "toupper", charmap);
340	ctype_map_new (lr, ctype, "tolower", charmap);
341
342	/ Fill first 256 entries in `toXXX' arrays. /
343	for (cnt = `0`; cnt < `256`; ++cnt)
344	{
345	ctype->map_collection[`0`][cnt] = cnt;
346	ctype->map_collection[`1`][cnt] = cnt;
347
348	ctype->map256_collection[`0`][cnt] = cnt;
349	ctype->map256_collection[`1`][cnt] = cnt;
350	}
351
352	if (enc_not_ascii_compatible)
353	ctype->to_nonascii = `1`;
354
355	obstack_init (&ctype->mempool);
356	}
357	else
358	ctype = locale->categories[LC_CTYPE].ctype =
359	copy_locale->categories[LC_CTYPE].ctype;
360	}
361	}
362
363
364	void
365	ctype_finish (struct localedef_t locale, const* struct charmap_t *charmap)
366	{
367	/ See POSIX.2, table 2-6 for the meaning of the following table. /
368	#define NCLASS 12
369	static const struct
370	{
371	const char *name;
372	const char allow[NCLASS];
373	}
374	valid_table[NCLASS] =
375	{
376	/ The order is important. See token.h for more information.*
377	M = Always, D = Default, - = Permitted, X = Mutually exclusive /*
378	{ "upper", "--MX-XDDXXX-" },
379	{ "lower", "--MX-XDDXXX-" },
380	{ "alpha", "---X-XDDXXX-" },
381	{ "digit", "XXX--XDDXXX-" },
382	{ "xdigit", "-----XDDXXX-" },
383	{ "space", "XXXXX------X" },
384	{ "print", "---------X--" },
385	{ "graph", "---------X--" },
386	{ "blank", "XXXXXM-----X" },
387	{ "cntrl", "XXXXX-XX--XX" },
388	{ "punct", "XXXXX-DD-X-X" },
389	{ "alnum", "-----XDDXXX-" }
390	};
391	size_t cnt;
392	int cls1, cls2;
393	uint32_t space_value;
394	struct charseq *space_seq;
395	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
396	int warned;
397	const void *key;
398	size_t len;
399	void *vdata;
400	void *curs;
401
402	/ Now resolve copying and also handle completely missing definitions. /
403	if (ctype == NULL)
404	{
405	const char *repertoire_name;
406
407	/ First see whether we were supposed to copy. If yes, find the*
408	actual definition. /*
409	if (locale->copy_name[LC_CTYPE] != NULL)
410	{
411	/ Find the copying locale. This has to happen transitively since*
412	the locale we are copying from might also copying another one. /*
413	struct localedef_t *from = locale;
414
415	do
416	from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
417	from->repertoire_name, charmap);
418	while (from->categories[LC_CTYPE].ctype == NULL
419	&& from->copy_name[LC_CTYPE] != NULL);
420
421	ctype = locale->categories[LC_CTYPE].ctype
422	= from->categories[LC_CTYPE].ctype;
423	}
424
425	/ If there is still no definition issue an warning and create an*
426	empty one. /*
427	if (ctype == NULL)
428	{
429	record_warning (_("\
430	No definition for %s category found"), "LC_CTYPE");
431	ctype_startup (NULL, locale, charmap, NULL, `0`);
432	ctype = locale->categories[LC_CTYPE].ctype;
433	}
434
435	/ Get the repertoire we have to use. /
436	repertoire_name = locale->repertoire_name ?: repertoire_global;
437	if (repertoire_name != NULL)
438	ctype->repertoire = repertoire_read (repertoire_name);
439	}
440
441	/ We need the name of the currently used 8-bit character set to*
442	make correct conversion between this 8-bit representation and the
443	ISO 10646 character set used internally for wide characters. /*
444	ctype->codeset_name = charmap->code_set_name;
445	if (ctype->codeset_name == NULL)
446	{
447	record_error (`0`, `0`, _("\
448	No character set name specified in charmap"));
449	ctype->codeset_name = "//UNKNOWN//";
450	}
451
452	/ Set default value for classes not specified. /
453	set_class_defaults (ctype, charmap, ctype->repertoire);
454
455	/ Check according to table. /
456	for (cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
457	{
458	uint32_t tmp = ctype->class_collection[cnt];
459
460	if (tmp != `0`)
461	{
462	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
463	if ((tmp & _ISwbit (cls1)) != `0`)
464	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
465	if (valid_table[cls1].allow[cls2] != `'-'`)
466	{
467	int eq = (tmp & _ISwbit (cls2)) != `0`;
468	switch (valid_table[cls1].allow[cls2])
469	{
470	case `'M'`:
471	if (!eq)
472	{
473	uint32_t value = ctype->charnames[cnt];
474
475	record_error (`0`, `0`, _("\
476	character L'\\u%0*x' in class `%s' must be in class `%s'"),
477	value > `0xffff` ? `8` : `4`,
478	value,
479	valid_table[cls1].name,
480	valid_table[cls2].name);
481	}
482	break;
483
484	case `'X'`:
485	if (eq)
486	{
487	uint32_t value = ctype->charnames[cnt];
488
489	record_error (`0`, `0`, _("\
490	character L'\\u%0*x' in class `%s' must not be in class `%s'"),
491	value > `0xffff` ? `8` : `4`,
492	value,
493	valid_table[cls1].name,
494	valid_table[cls2].name);
495	}
496	break;
497
498	case `'D'`:
499	ctype->class_collection[cnt] \|= _ISwbit (cls2);
500	break;
501
502	default:
503	record_error (`5`, `0`, _("\
504	internal error in %s, line %u"), __FUNCTION__, __LINE__);
505	}
506	}
507	}
508	}
509
510	for (cnt = `0`; cnt < `256`; ++cnt)
511	{
512	uint32_t tmp = ctype->class256_collection[cnt];
513
514	if (tmp != `0`)
515	{
516	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
517	if ((tmp & _ISbit (cls1)) != `0`)
518	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
519	if (valid_table[cls1].allow[cls2] != `'-'`)
520	{
521	int eq = (tmp & _ISbit (cls2)) != `0`;
522	switch (valid_table[cls1].allow[cls2])
523	{
524	case `'M'`:
525	if (!eq)
526	{
527	char buf[`17`];
528
529	snprintf (buf, sizeof buf, "\\%zo", cnt);
530
531	record_error (`0`, `0`, _("\
532	character '%s' in class `%s' must be in class `%s'"),
533	buf,
534	valid_table[cls1].name,
535	valid_table[cls2].name);
536	}
537	break;
538
539	case `'X'`:
540	if (eq)
541	{
542	char buf[`17`];
543
544	snprintf (buf, sizeof buf, "\\%zo", cnt);
545
546	record_error (`0`, `0`, _("\
547	character '%s' in class `%s' must not be in class `%s'"),
548	buf,
549	valid_table[cls1].name,
550	valid_table[cls2].name);
551	}
552	break;
553
554	case `'D'`:
555	ctype->class256_collection[cnt] \|= _ISbit (cls2);
556	break;
557
558	default:
559	record_error (`5`, `0`, _("\
560	internal error in %s, line %u"), __FUNCTION__, __LINE__);
561	}
562	}
563	}
564	}
565
566	/ ... and now test <SP> as a special case. /
567	space_value = `32`;
568	if (((cnt = BITPOS (tok_space),
569	(ELEM (ctype, class_collection, , space_value)
570	& BITw (tok_space)) == `0`)
571	\|\| (cnt = BITPOS (tok_blank),
572	(ELEM (ctype, class_collection, , space_value)
573	& BITw (tok_blank)) == `0`)))
574	{
575	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
576	valid_table[cnt].name);
577	}
578	else if (((cnt = BITPOS (tok_punct),
579	(ELEM (ctype, class_collection, , space_value)
580	& BITw (tok_punct)) != `0`)
581	\|\| (cnt = BITPOS (tok_graph),
582	(ELEM (ctype, class_collection, , space_value)
583	& BITw (tok_graph))
584	!= `0`)))
585	{
586	record_error (`0`, `0`, _("\
587	<SP> character must not be in class `%s'"),
588	valid_table[cnt].name);
589	}
590	else
591	ELEM (ctype, class_collection, , space_value) \|= BITw (tok_print);
592
593	space_seq = charmap_find_value (charmap, "SP", `2`);
594	if (space_seq == NULL)
595	space_seq = charmap_find_value (charmap, "space", `5`);
596	if (space_seq == NULL)
597	space_seq = charmap_find_value (charmap, "U00000020", `9`);
598	if (space_seq == NULL \|\| space_seq->nbytes != `1`)
599	{
600	record_error (`0`, `0`, _("\
601	character <SP> not defined in character map"));
602	}
603	else if (((cnt = BITPOS (tok_space),
604	(ctype->class256_collection[space_seq->bytes[`0`]]
605	& BIT (tok_space)) == `0`)
606	\|\| (cnt = BITPOS (tok_blank),
607	(ctype->class256_collection[space_seq->bytes[`0`]]
608	& BIT (tok_blank)) == `0`)))
609	{
610	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
611	valid_table[cnt].name);
612	}
613	else if (((cnt = BITPOS (tok_punct),
614	(ctype->class256_collection[space_seq->bytes[`0`]]
615	& BIT (tok_punct)) != `0`)
616	\|\| (cnt = BITPOS (tok_graph),
617	(ctype->class256_collection[space_seq->bytes[`0`]]
618	& BIT (tok_graph)) != `0`)))
619	{
620	record_error (`0`, `0`, _("\
621	<SP> character must not be in class `%s'"),
622	valid_table[cnt].name);
623	}
624	else
625	ctype->class256_collection[space_seq->bytes[`0`]] \|= BIT (tok_print);
626
627	/ Check whether all single-byte characters make to their upper/lowercase*
628	equivalent according to the ASCII rules. /*
629	for (cnt = `'A'`; cnt <= `'Z'`; ++cnt)
630	{
631	uint32_t uppval = ctype->map256_collection[`0`][cnt];
632	uint32_t lowval = ctype->map256_collection[`1`][cnt];
633	uint32_t lowuppval = ctype->map256_collection[`0`][lowval];
634	uint32_t lowlowval = ctype->map256_collection[`1`][lowval];
635
636	if (uppval != cnt
637	\|\| lowval != cnt + `0x20`
638	\|\| lowuppval != cnt
639	\|\| lowlowval != cnt + `0x20`)
640	ctype->nonascii_case = `1`;
641	}
642	for (cnt = `0`; cnt < `256`; ++cnt)
643	if (cnt < `'A'` \|\| (cnt > `'Z'` && cnt < `'a'`) \|\| cnt > `'z'`)
644	if (ctype->map256_collection[`0`][cnt] != cnt
645	\|\| ctype->map256_collection[`1`][cnt] != cnt)
646	ctype->nonascii_case = `1`;
647
648	/ Now that the tests are done make sure the name array contains all*
649	characters which are handled in the WIDTH section of the
650	character set definition file. /*
651	if (charmap->width_rules != NULL)
652	for (cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
653	{
654	unsigned char bytes[charmap->mb_cur_max];
655	int nbytes = charmap->width_rules[cnt].from->nbytes;
656
657	/ We have the range of character for which the width is*
658	specified described using byte sequences of the multibyte
659	charset. We have to convert this to UCS4 now. And we
660	cannot simply convert the beginning and the end of the
661	sequence, we have to iterate over the byte sequence and
662	convert it for every single character. /*
663	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
664
665	while (nbytes < charmap->width_rules[cnt].to->nbytes
666	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
667	nbytes) <= `0`)
668	{
669	/ Find the UCS value for `bytes'. /
670	int inner;
671	uint32_t wch;
672	struct charseq *seq
673	= charmap_find_symbol (charmap, (char *) bytes, nbytes);
674
675	if (seq == NULL)
676	wch = ILLEGAL_CHAR_VALUE;
677	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
678	wch = seq->ucs4;
679	else
680	wch = repertoire_find_value (ctype->repertoire, seq->name,
681	strlen (seq->name));
682
683	if (wch != ILLEGAL_CHAR_VALUE)
684	/ We are only interested in the side-effects of the*
685	`find_idx' call. It will add appropriate entries in
686	the name array if this is necessary. /*
687	(void) find_idx (ctype, NULL, NULL, NULL, wch);
688
689	/ "Increment" the bytes sequence. /
690	inner = nbytes - `1`;
691	while (inner >= `0` && bytes[inner] == `0xff`)
692	--inner;
693
694	if (inner < `0`)
695	{
696	/ We have to extend the byte sequence. /
697	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
698	break;
699
700	bytes[`0`] = `1`;
701	memset (&bytes[`1`], `0`, nbytes);
702	++nbytes;
703	}
704	else
705	{
706	++bytes[inner];
707	while (++inner < nbytes)
708	bytes[inner] = `0`;
709	}
710	}
711	}
712
713	/ Now set all the other characters of the character set to the*
714	default width. /*
715	curs = NULL;
716	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
717	{
718	struct charseq data = (struct* charseq *) vdata;
719
720	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
721	data->ucs4 = repertoire_find_value (ctype->repertoire,
722	data->name, len);
723
724	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
725	(void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
726	}
727
728	/ There must be a multiple of 10 digits. /
729	if (ctype->mbdigits_act % `10` != `0`)
730	{
731	assert (ctype->mbdigits_act == ctype->wcdigits_act);
732	ctype->wcdigits_act -= ctype->mbdigits_act % `10`;
733	ctype->mbdigits_act -= ctype->mbdigits_act % `10`;
734	record_error (`0`, `0`, _("\
735	`digit' category has not entries in groups of ten"));
736	}
737
738	/ Check the input digits. There must be a multiple of ten available.*
739	In each group it could be that one or the other character is missing.
740	In this case the whole group must be removed. /*
741	cnt = `0`;
742	while (cnt < ctype->mbdigits_act)
743	{
744	size_t inner;
745	for (inner = `0`; inner < `10`; ++inner)
746	if (ctype->mbdigits[cnt + inner] == NULL)
747	break;
748
749	if (inner == `10`)
750	cnt += `10`;
751	else
752	{
753	/ Remove the group. /
754	memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + `10`],
755	((ctype->wcdigits_act - cnt - `10`)
756	* sizeof (ctype->mbdigits[`0`])));
757	ctype->mbdigits_act -= `10`;
758	}
759	}
760
761	/ If no input digits are given use the default. /
762	if (ctype->mbdigits_act == `0`)
763	{
764	if (ctype->mbdigits_max == `0`)
765	{
766	ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
767	`10` * sizeof (struct charseq *));
768	ctype->mbdigits_max = `10`;
769	}
770
771	for (cnt = `0`; cnt < `10`; ++cnt)
772	{
773	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
774	(char *) digits + cnt, `1`);
775	if (ctype->mbdigits[cnt] == NULL)
776	{
777	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
778	longnames[cnt],
779	strlen (longnames[cnt]));
780	if (ctype->mbdigits[cnt] == NULL)
781	{
782	/ Hum, this ain't good. /
783	record_error (`0`, `0`, _("\
784	no input digits defined and none of the standard names in the charmap"));
785
786	ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
787	sizeof (struct charseq) + `1`);
788
789	/ This is better than nothing. /
790	ctype->mbdigits[cnt]->bytes[`0`] = digits[cnt];
791	ctype->mbdigits[cnt]->nbytes = `1`;
792	}
793	}
794	}
795
796	ctype->mbdigits_act = `10`;
797	}
798
799	/ Check the wide character input digits. There must be a multiple*
800	of ten available. In each group it could be that one or the other
801	character is missing. In this case the whole group must be
802	removed. /*
803	cnt = `0`;
804	while (cnt < ctype->wcdigits_act)
805	{
806	size_t inner;
807	for (inner = `0`; inner < `10`; ++inner)
808	if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
809	break;
810
811	if (inner == `10`)
812	cnt += `10`;
813	else
814	{
815	/ Remove the group. /
816	memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + `10`],
817	((ctype->wcdigits_act - cnt - `10`)
818	* sizeof (ctype->wcdigits[`0`])));
819	ctype->wcdigits_act -= `10`;
820	}
821	}
822
823	/ If no input digits are given use the default. /
824	if (ctype->wcdigits_act == `0`)
825	{
826	if (ctype->wcdigits_max == `0`)
827	{
828	ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
829	`10` * sizeof (uint32_t));
830	ctype->wcdigits_max = `10`;
831	}
832
833	for (cnt = `0`; cnt < `10`; ++cnt)
834	ctype->wcdigits[cnt] = L`'0'` + cnt;
835
836	ctype->mbdigits_act = `10`;
837	}
838
839	/ Check the outdigits. /
840	warned = `0`;
841	for (cnt = `0`; cnt < `10`; ++cnt)
842	if (ctype->mboutdigits[cnt] == NULL)
843	{
844	if (!warned)
845	{
846	record_error (`0`, `0`, _("\
847	not all characters used in `outdigit' are available in the charmap"));
848	warned = `1`;
849	}
850
851	static const struct charseq replace =
852	{
853	.nbytes = `1`,
854	.bytes = "?",
855	};
856	ctype->mboutdigits[cnt] = (struct charseq *) &replace;
857	}
858
859	warned = `0`;
860	for (cnt = `0`; cnt < `10`; ++cnt)
861	if (ctype->wcoutdigits[cnt] == `0`)
862	{
863	if (!warned)
864	{
865	record_error (`0`, `0`, _("\
866	not all characters used in `outdigit' are available in the repertoire"));
867	warned = `1`;
868	}
869
870	ctype->wcoutdigits[cnt] = L`'?'`;
871	}
872
873	/ Sort the entries in the translit_ignore list. /
874	if (ctype->translit_ignore != NULL)
875	{
876	struct translit_ignore_t *firstp = ctype->translit_ignore;
877	struct translit_ignore_t *runp;
878
879	ctype->ntranslit_ignore = `1`;
880
881	for (runp = firstp->next; runp != NULL; runp = runp->next)
882	{
883	struct translit_ignore_t *lastp = NULL;
884	struct translit_ignore_t *cmpp;
885
886	++ctype->ntranslit_ignore;
887
888	for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
889	if (runp->from < cmpp->from)
890	break;
891
892	runp->next = lastp;
893	if (lastp == NULL)
894	firstp = runp;
895	}
896
897	ctype->translit_ignore = firstp;
898	}
899	}
900
901
902	void
903	ctype_output (struct localedef_t locale, const* struct charmap_t *charmap,
904	const char *output_path)
905	{
906	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
907	const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
908	+ ctype->nr_charclass + ctype->map_collection_nr);
909	struct locale_file file;
910	uint32_t default_missing_len;
911	size_t elem, cnt;
912
913	/ Now prepare the output: Find the sizes of the table we can use. /
914	allocate_arrays (ctype, charmap, ctype->repertoire);
915
916	default_missing_len = (ctype->default_missing
917	? wcslen ((wchar_t *) ctype->default_missing)
918	: `0`);
919
920	init_locale_data (&file, nelems);
921	for (elem = `0`; elem < nelems; ++elem)
922	{
923	if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
924	switch (elem)
925	{
926	#define CTYPE_EMPTY(name) \
927	case name: \
928	add_locale_empty (&file); \
929	break
930
931	CTYPE_EMPTY(_NL_CTYPE_GAP1);
932	CTYPE_EMPTY(_NL_CTYPE_GAP2);
933	CTYPE_EMPTY(_NL_CTYPE_GAP3);
934	CTYPE_EMPTY(_NL_CTYPE_GAP4);
935	CTYPE_EMPTY(_NL_CTYPE_GAP5);
936	CTYPE_EMPTY(_NL_CTYPE_GAP6);
937
938	#define CTYPE_RAW_DATA(name, base, size) \
939	case _NL_ITEM_INDEX (name): \
940	add_locale_raw_data (&file, base, size); \
941	break
942
943	CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
944	ctype->ctype_b,
945	(`256` + `128`) * sizeof (char_class_t));
946
947	#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
948	case _NL_ITEM_INDEX (name): \
949	add_locale_uint32_array (&file, base, n_elems); \
950	break
951
952	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[`0`], `256` + `128`);
953	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[`1`], `256` + `128`);
954	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[`0`], `256`);
955	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[`1`], `256`);
956	CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
957	ctype->ctype32_b,
958	`256` * sizeof (char_class32_t));
959
960	#define CTYPE_UINT32(name, value) \
961	case _NL_ITEM_INDEX (name): \
962	add_locale_uint32 (&file, value); \
963	break
964
965	CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
966	CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
967	CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
968
969	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
970	ctype->translit_from_idx,
971	ctype->translit_idx_size);
972
973	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
974	ctype->translit_from_tbl,
975	ctype->translit_from_tbl_size
976	/ sizeof (uint32_t));
977
978	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
979	ctype->translit_to_idx,
980	ctype->translit_idx_size);
981
982	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
983	ctype->translit_to_tbl,
984	ctype->translit_to_tbl_size / sizeof (uint32_t));
985
986	case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
987	/ The class name array. /
988	start_locale_structure (&file);
989	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
990	add_locale_string (&file, ctype->classnames[cnt]);
991	add_locale_char (&file, `0`);
992	align_locale_data (&file, LOCFILE_ALIGN);
993	end_locale_structure (&file);
994	break;
995
996	case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
997	/ The class name array. /
998	start_locale_structure (&file);
999	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1000	add_locale_string (&file, ctype->mapnames[cnt]);
1001	add_locale_char (&file, `0`);
1002	align_locale_data (&file, LOCFILE_ALIGN);
1003	end_locale_structure (&file);
1004	break;
1005
1006	case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1007	add_locale_wcwidth_table (&file, &ctype->width);
1008	break;
1009
1010	CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1011
1012	case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1013	add_locale_string (&file, ctype->codeset_name);
1014	break;
1015
1016	CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1017
1018	CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1019
1020	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1021	add_locale_uint32 (&file, ctype->mbdigits_act / `10`);
1022	break;
1023
1024	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1025	add_locale_uint32 (&file, ctype->wcdigits_act / `10`);
1026	break;
1027
1028	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1029	start_locale_structure (&file);
1030	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1031	cnt < ctype->mbdigits_act; cnt += `10`)
1032	{
1033	add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1034	ctype->mbdigits[cnt]->nbytes);
1035	add_locale_char (&file, `0`);
1036	}
1037	end_locale_structure (&file);
1038	break;
1039
1040	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1041	start_locale_structure (&file);
1042	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1043	add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1044	ctype->mboutdigits[cnt]->nbytes);
1045	add_locale_char (&file, `0`);
1046	end_locale_structure (&file);
1047	break;
1048
1049	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1050	start_locale_structure (&file);
1051	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1052	cnt < ctype->wcdigits_act; cnt += `10`)
1053	add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1054	end_locale_structure (&file);
1055	break;
1056
1057	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1058	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1059	add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1060	break;
1061
1062	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1063	add_locale_uint32 (&file, default_missing_len);
1064	break;
1065
1066	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1067	add_locale_uint32_array (&file, ctype->default_missing,
1068	default_missing_len);
1069	break;
1070
1071	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1072	add_locale_uint32 (&file, ctype->ntranslit_ignore);
1073	break;
1074
1075	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1076	start_locale_structure (&file);
1077	{
1078	struct translit_ignore_t *runp;
1079	for (runp = ctype->translit_ignore; runp != NULL;
1080	runp = runp->next)
1081	{
1082	add_locale_uint32 (&file, runp->from);
1083	add_locale_uint32 (&file, runp->to);
1084	add_locale_uint32 (&file, runp->step);
1085	}
1086	}
1087	end_locale_structure (&file);
1088	break;
1089
1090	default:
1091	assert (! "unknown CTYPE element");
1092	}
1093	else
1094	{
1095	/ Handle extra maps. /
1096	size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1097	if (nr < ctype->nr_charclass)
1098	{
1099	start_locale_prelude (&file);
1100	add_locale_uint32_array (&file, ctype->class_b[nr], `256` / `32`);
1101	end_locale_prelude (&file);
1102	add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1103	}
1104	else
1105	{
1106	nr -= ctype->nr_charclass;
1107	assert (nr < ctype->map_collection_nr);
1108	add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1109	}
1110	}
1111	}
1112
1113	write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1114	}
1115
1116
1117	/ Local functions. /
1118	static void
1119	ctype_class_new (struct linereader lr, struct* locale_ctype_t *ctype,
1120	const char *name)
1121	{
1122	size_t cnt;
1123
1124	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
1125	if (strcmp (ctype->classnames[cnt], name) == `0`)
1126	break;
1127
1128	if (cnt < ctype->nr_charclass)
1129	{
1130	lr_error (lr, _("character class `%s' already defined"), name);
1131	return;
1132	}
1133
1134	if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1135	/ Exit code 2 is prescribed in P1003.2b. /
1136	record_error (`2`, `0`, _("\
1137	implementation limit: no more than %Zd character classes allowed"),
1138	MAX_NR_CHARCLASS);
1139
1140	ctype->classnames[ctype->nr_charclass++] = name;
1141	}
1142
1143
1144	static void
1145	ctype_map_new (struct linereader lr, struct* locale_ctype_t *ctype,
1146	const char name, const* struct charmap_t *charmap)
1147	{
1148	size_t max_chars = `0`;
1149	size_t cnt;
1150
1151	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1152	{
1153	if (strcmp (ctype->mapnames[cnt], name) == `0`)
1154	break;
1155
1156	if (max_chars < ctype->map_collection_max[cnt])
1157	max_chars = ctype->map_collection_max[cnt];
1158	}
1159
1160	if (cnt < ctype->map_collection_nr)
1161	{
1162	lr_error (lr, _("character map `%s' already defined"), name);
1163	return;
1164	}
1165
1166	if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1167	/ Exit code 2 is prescribed in P1003.2b. /
1168	record_error (`2`, `0`, _("\
1169	implementation limit: no more than %d character maps allowed"),
1170	MAX_NR_CHARMAP);
1171
1172	ctype->mapnames[cnt] = name;
1173
1174	if (max_chars == `0`)
1175	ctype->map_collection_max[cnt] = charmap->mb_cur_max == `1` ? `256` : `512`;
1176	else
1177	ctype->map_collection_max[cnt] = max_chars;
1178
1179	ctype->map_collection[cnt] = (uint32_t *)
1180	xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1181	ctype->map_collection_act[cnt] = `256`;
1182
1183	++ctype->map_collection_nr;
1184	}
1185
1186
1187	/ We have to be prepared that TABLE, MAX, and ACT can be NULL. This*
1188	is possible if we only want to extend the name array. /*
1189	static uint32_t *
1190	find_idx (struct locale_ctype_t ctype, uint32_t table, size_t max,
1191	size_t *act, uint32_t idx)
1192	{
1193	size_t cnt;
1194
1195	if (idx < `256`)
1196	return table == NULL ? NULL : &(*table)[idx];
1197
1198	/ Use the charnames_idx lookup table instead of the slow search loop. /
1199	#if 1
1200	cnt = idx_table_get (&ctype->charnames_idx, idx);
1201	if (cnt == EMPTY)
1202	/ Not found. /
1203	cnt = ctype->charnames_act;
1204	#else
1205	for (cnt = `256`; cnt < ctype->charnames_act; ++cnt)
1206	if (ctype->charnames[cnt] == idx)
1207	break;
1208	#endif
1209
1210	/ We have to distinguish two cases: the name is found or not. /
1211	if (cnt == ctype->charnames_act)
1212	{
1213	/ Extend the name array. /
1214	if (ctype->charnames_act == ctype->charnames_max)
1215	{
1216	ctype->charnames_max *= `2`;
1217	ctype->charnames = (uint32_t *)
1218	xrealloc (ctype->charnames,
1219	sizeof (uint32_t) * ctype->charnames_max);
1220	}
1221	ctype->charnames[ctype->charnames_act++] = idx;
1222	idx_table_add (&ctype->charnames_idx, idx, cnt);
1223	}
1224
1225	if (table == NULL)
1226	/ We have done everything we are asked to do. /
1227	return NULL;
1228
1229	if (max == NULL)
1230	/ The caller does not want to extend the table. /
1231	return (cnt >= act ? NULL : &(table)[cnt]);
1232
1233	if (cnt >= *act)
1234	{
1235	if (cnt >= *max)
1236	{
1237	size_t old_max = *max;
1238	do
1239	max = `2`;
1240	while (*max <= cnt);
1241
1242	*table =
1243	(uint32_t ) xrealloc (table, max sizeof (uint32_t));
1244	memset (&(*table)[old_max], `'\0'`,
1245	(max - old_max) sizeof (uint32_t));
1246	}
1247
1248	*act = cnt + `1`;
1249	}
1250
1251	return &(*table)[cnt];
1252	}
1253
1254
1255	static int
1256	get_character (struct token now, const* struct charmap_t *charmap,
1257	struct repertoire_t *repertoire,
1258	struct charseq *seqp, uint32_t wchp)
1259	{
1260	if (now->tok == tok_bsymbol)
1261	{
1262	/ This will hopefully be the normal case. /
1263	*wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1264	now->val.str.lenmb);
1265	*seqp = charmap_find_value (charmap, now->val.str.startmb,
1266	now->val.str.lenmb);
1267	}
1268	else if (now->tok == tok_ucs4)
1269	{
1270	char utmp[`10`];
1271
1272	snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1273	*seqp = charmap_find_value (charmap, utmp, `9`);
1274
1275	if (*seqp == NULL)
1276	*seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1277
1278	if (*seqp == NULL)
1279	{
1280	/ Compute the value in the charmap from the UCS value. /
1281	const char *symbol = repertoire_find_symbol (repertoire,
1282	now->val.ucs4);
1283
1284	if (symbol == NULL)
1285	*seqp = NULL;
1286	else
1287	*seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1288
1289	if (*seqp == NULL)
1290	{
1291	if (repertoire != NULL)
1292	{
1293	/ Insert a negative entry. /
1294	static const struct charseq negative
1295	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1296	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1297	sizeof (uint32_t));
1298	*newp = now->val.ucs4;
1299
1300	insert_entry (&repertoire->seq_table, newp,
1301	sizeof (uint32_t), (void *) &negative);
1302	}
1303	}
1304	else
1305	(*seqp)->ucs4 = now->val.ucs4;
1306	}
1307	else if ((*seqp)->ucs4 != now->val.ucs4)
1308	*seqp = NULL;
1309
1310	*wchp = now->val.ucs4;
1311	}
1312	else if (now->tok == tok_charcode)
1313	{
1314	/ We must map from the byte code to UCS4. /
1315	*seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1316	now->val.str.lenmb);
1317
1318	if (*seqp == NULL)
1319	*wchp = ILLEGAL_CHAR_VALUE;
1320	else
1321	{
1322	if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1323	(seqp)->ucs4 = repertoire_find_value (repertoire, (seqp)->name,
1324	strlen ((*seqp)->name));
1325	wchp = (seqp)->ucs4;
1326	}
1327	}
1328	else
1329	return `1`;
1330
1331	return `0`;
1332	}
1333
1334
1335	/ Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and*
1336	the .(2). counterparts. /*
1337	static void
1338	charclass_symbolic_ellipsis (struct linereader *ldfile,
1339	struct locale_ctype_t *ctype,
1340	const struct charmap_t *charmap,
1341	struct repertoire_t *repertoire,
1342	struct token *now,
1343	const char *last_str,
1344	unsigned long int class256_bit,
1345	unsigned long int class_bit, int base,
1346	int ignore_content, int handle_digits, int step)
1347	{
1348	const char *nowstr = now->val.str.startmb;
1349	char tmp[now->val.str.lenmb + `1`];
1350	const char *cp;
1351	char *endp;
1352	unsigned long int from;
1353	unsigned long int to;
1354
1355	/ We have to compute the ellipsis values using the symbolic names. /
1356	assert (last_str != NULL);
1357
1358	if (strlen (last_str) != now->val.str.lenmb)
1359	{
1360	invalid_range:
1361	lr_error (ldfile,
1362	_("`%s' and `%.*s' are not valid names for symbolic range"),
1363	last_str, (int) now->val.str.lenmb, nowstr);
1364	return;
1365	}
1366
1367	if (memcmp (last_str, nowstr, now->val.str.lenmb) == `0`)
1368	/ Nothing to do, the names are the same. /
1369	return;
1370
1371	for (cp = last_str; cp == (nowstr + (cp - last_str)); ++cp)
1372	;
1373
1374	errno = `0`;
1375	from = strtoul (cp, &endp, base);
1376	if ((from == UINT_MAX && errno == ERANGE) \|\| *endp != `'\0'`)
1377	goto invalid_range;
1378
1379	to = strtoul (nowstr + (cp - last_str), &endp, base);
1380	if ((to == UINT_MAX && errno == ERANGE)
1381	\|\| (endp - nowstr) != now->val.str.lenmb \|\| from >= to)
1382	goto invalid_range;
1383
1384	/ OK, we have a range FROM - TO. Now we can create the symbolic names. /
1385	if (!ignore_content)
1386	{
1387	now->val.str.startmb = tmp;
1388	while ((from += step) <= to)
1389	{
1390	struct charseq *seq;
1391	uint32_t wch;
1392
1393	sprintf (tmp, (base == `10` ? "%.s%0ld" : "%.s%0lX"),
1394	(int) (cp - last_str), last_str,
1395	(int) (now->val.str.lenmb - (cp - last_str)),
1396	from);
1397
1398	if (get_character (now, charmap, repertoire, &seq, &wch))
1399	goto invalid_range;
1400
1401	if (seq != NULL && seq->nbytes == `1`)
1402	/ Yep, we can store information about this byte sequence. /
1403	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
1404
1405	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1406	/ We have the UCS4 position. /
1407	*find_idx (ctype, &ctype->class_collection,
1408	&ctype->class_collection_max,
1409	&ctype->class_collection_act, wch) \|= class_bit;
1410
1411	if (handle_digits == `1`)
1412	{
1413	/ We must store the digit values. /
1414	if (ctype->mbdigits_act == ctype->mbdigits_max)
1415	{
1416	ctype->mbdigits_max *= `2`;
1417	ctype->mbdigits = xrealloc (ctype->mbdigits,
1418	(ctype->mbdigits_max
1419	* sizeof (char *)));
1420	ctype->wcdigits_max *= `2`;
1421	ctype->wcdigits = xrealloc (ctype->wcdigits,
1422	(ctype->wcdigits_max
1423	* sizeof (uint32_t)));
1424	}
1425
1426	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428	}
1429	else if (handle_digits == `2`)
1430	{
1431	/ We must store the digit values. /
1432	if (ctype->outdigits_act >= `10`)
1433	{
1434	lr_error (ldfile, _("\
1435	%s: field `%s' does not contain exactly ten entries"),
1436	"LC_CTYPE", "outdigit");
1437	return;
1438	}
1439
1440	ctype->mboutdigits[ctype->outdigits_act] = seq;
1441	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442	++ctype->outdigits_act;
1443	}
1444	}
1445	}
1446	}
1447
1448
1449	/ Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. /
1450	static void
1451	charclass_ucs4_ellipsis (struct linereader *ldfile,
1452	struct locale_ctype_t *ctype,
1453	const struct charmap_t *charmap,
1454	struct repertoire_t *repertoire,
1455	struct token *now, uint32_t last_wch,
1456	unsigned long int class256_bit,
1457	unsigned long int class_bit, int ignore_content,
1458	int handle_digits, int step)
1459	{
1460	if (last_wch > now->val.ucs4)
1461	{
1462	lr_error (ldfile, _("\
1463	to-value <U%0X> of range is smaller than from-value <U%0X>"),
1464	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, now->val.ucs4,
1465	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, last_wch);
1466	return;
1467	}
1468
1469	if (!ignore_content)
1470	while ((last_wch += step) <= now->val.ucs4)
1471	{
1472	/ We have to find out whether there is a byte sequence corresponding*
1473	to this UCS4 value. /*
1474	struct charseq *seq;
1475	char utmp[`10`];
1476
1477	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478	seq = charmap_find_value (charmap, utmp, `9`);
1479	if (seq == NULL)
1480	{
1481	snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482	seq = charmap_find_value (charmap, utmp, `5`);
1483	}
1484
1485	if (seq == NULL)
1486	/ Try looking in the repertoire map. /
1487	seq = repertoire_find_seq (repertoire, last_wch);
1488
1489	/ If this is the first time we look for this sequence create a new*
1490	entry. /*
1491	if (seq == NULL)
1492	{
1493	static const struct charseq negative
1494	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1495
1496	/ Find the symbolic name for this UCS4 value. /
1497	if (repertoire != NULL)
1498	{
1499	const char *symbol = repertoire_find_symbol (repertoire,
1500	last_wch);
1501	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502	sizeof (uint32_t));
1503	*newp = last_wch;
1504
1505	if (symbol != NULL)
1506	/ We have a name, now search the multibyte value. /
1507	seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508
1509	if (seq == NULL)
1510	/ We have to create a fake entry. /
1511	seq = (struct charseq *) &negative;
1512	else
1513	seq->ucs4 = last_wch;
1514
1515	insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516	seq);
1517	}
1518	else
1519	/ We have to create a fake entry. /
1520	seq = (struct charseq *) &negative;
1521	}
1522
1523	/ We have a name, now search the multibyte value. /
1524	if (seq->ucs4 == last_wch && seq->nbytes == `1`)
1525	/ Yep, we can store information about this byte sequence. /
1526	ctype->class256_collection[(size_t) seq->bytes[`0`]]
1527	\|= class256_bit;
1528
1529	/ And of course we have the UCS4 position. /
1530	if (class_bit != `0`)
1531	*find_idx (ctype, &ctype->class_collection,
1532	&ctype->class_collection_max,
1533	&ctype->class_collection_act, last_wch) \|= class_bit;
1534
1535	if (handle_digits == `1`)
1536	{
1537	/ We must store the digit values. /
1538	if (ctype->mbdigits_act == ctype->mbdigits_max)
1539	{
1540	ctype->mbdigits_max *= `2`;
1541	ctype->mbdigits = xrealloc (ctype->mbdigits,
1542	(ctype->mbdigits_max
1543	* sizeof (char *)));
1544	ctype->wcdigits_max *= `2`;
1545	ctype->wcdigits = xrealloc (ctype->wcdigits,
1546	(ctype->wcdigits_max
1547	* sizeof (uint32_t)));
1548	}
1549
1550	ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551	? seq : NULL);
1552	ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553	}
1554	else if (handle_digits == `2`)
1555	{
1556	/ We must store the digit values. /
1557	if (ctype->outdigits_act >= `10`)
1558	{
1559	lr_error (ldfile, _("\
1560	%s: field `%s' does not contain exactly ten entries"),
1561	"LC_CTYPE", "outdigit");
1562	return;
1563	}
1564
1565	ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566	? seq : NULL);
1567	ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568	++ctype->outdigits_act;
1569	}
1570	}
1571	}
1572
1573
1574	/ Ellipsis as in `/xea/x12.../xea/x34'. /
1575	static void
1576	charclass_charcode_ellipsis (struct linereader *ldfile,
1577	struct locale_ctype_t *ctype,
1578	const struct charmap_t *charmap,
1579	struct repertoire_t *repertoire,
1580	struct token now, char* *last_charcode,
1581	uint32_t last_charcode_len,
1582	unsigned long int class256_bit,
1583	unsigned long int class_bit, int ignore_content,
1584	int handle_digits)
1585	{
1586	/ First check whether the to-value is larger. /
1587	if (now->val.charcode.nbytes != last_charcode_len)
1588	{
1589	lr_error (ldfile, _("\
1590	start and end character sequence of range must have the same length"));
1591	return;
1592	}
1593
1594	if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > `0`)
1595	{
1596	lr_error (ldfile, _("\
1597	to-value character sequence is smaller than from-value sequence"));
1598	return;
1599	}
1600
1601	if (!ignore_content)
1602	{
1603	do
1604	{
1605	/ Increment the byte sequence value. /
1606	struct charseq *seq;
1607	uint32_t wch;
1608	int i;
1609
1610	for (i = last_charcode_len - `1`; i >= `0`; --i)
1611	if (++last_charcode[i] != `0`)
1612	break;
1613
1614	if (last_charcode_len == `1`)
1615	/ Of course we have the charcode value. /
1616	ctype->class256_collection[(size_t) last_charcode[`0`]]
1617	\|= class256_bit;
1618
1619	/ Find the symbolic name. /
1620	seq = charmap_find_symbol (charmap, last_charcode,
1621	last_charcode_len);
1622	if (seq != NULL)
1623	{
1624	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626	strlen (seq->name));
1627	wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1628
1629	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1630	*find_idx (ctype, &ctype->class_collection,
1631	&ctype->class_collection_max,
1632	&ctype->class_collection_act, wch) \|= class_bit;
1633	}
1634	else
1635	wch = ILLEGAL_CHAR_VALUE;
1636
1637	if (handle_digits == `1`)
1638	{
1639	/ We must store the digit values. /
1640	if (ctype->mbdigits_act == ctype->mbdigits_max)
1641	{
1642	ctype->mbdigits_max *= `2`;
1643	ctype->mbdigits = xrealloc (ctype->mbdigits,
1644	(ctype->mbdigits_max
1645	* sizeof (char *)));
1646	ctype->wcdigits_max *= `2`;
1647	ctype->wcdigits = xrealloc (ctype->wcdigits,
1648	(ctype->wcdigits_max
1649	* sizeof (uint32_t)));
1650	}
1651
1652	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1654	seq->nbytes = last_charcode_len;
1655
1656	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658	}
1659	else if (handle_digits == `2`)
1660	{
1661	struct charseq *seq;
1662	/ We must store the digit values. /
1663	if (ctype->outdigits_act >= `10`)
1664	{
1665	lr_error (ldfile, _("\
1666	%s: field `%s' does not contain exactly ten entries"),
1667	"LC_CTYPE", "outdigit");
1668	return;
1669	}
1670
1671	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1673	seq->nbytes = last_charcode_len;
1674
1675	ctype->mboutdigits[ctype->outdigits_act] = seq;
1676	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677	++ctype->outdigits_act;
1678	}
1679	}
1680	while (memcmp (last_charcode, now->val.charcode.bytes,
1681	last_charcode_len) != `0`);
1682	}
1683	}
1684
1685
1686	static uint32_t *
1687	find_translit2 (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
1688	uint32_t wch)
1689	{
1690	struct translit_t *trunp = ctype->translit;
1691	struct translit_ignore_t *tirunp = ctype->translit_ignore;
1692
1693	while (trunp != NULL)
1694	{
1695	/ XXX We simplify things here. The transliterations we look*
1696	for are only allowed to have one character. /*
1697	if (trunp->from[`0`] == wch && trunp->from[`1`] == `0`)
1698	{
1699	/ Found it. Now look for a transliteration which can be*
1700	represented with the character set. /*
1701	struct translit_to_t *torunp = trunp->to;
1702
1703	while (torunp != NULL)
1704	{
1705	int i;
1706
1707	for (i = `0`; torunp->str[i] != `0`; ++i)
1708	{
1709	char utmp[`10`];
1710
1711	snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1712	if (charmap_find_value (charmap, utmp, `9`) == NULL)
1713	/ This character cannot be represented. /
1714	break;
1715	}
1716
1717	if (torunp->str[i] == `0`)
1718	return torunp->str;
1719
1720	torunp = torunp->next;
1721	}
1722
1723	break;
1724	}
1725
1726	trunp = trunp->next;
1727	}
1728
1729	/ Check for ignored chars. /
1730	while (tirunp != NULL)
1731	{
1732	if (tirunp->from <= wch && tirunp->to >= wch)
1733	{
1734	uint32_t wi;
1735
1736	for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1737	if (wi == wch)
1738	return no_str;
1739	}
1740	}
1741
1742	/ Nothing found. /
1743	return NULL;
1744	}
1745
1746
1747	uint32_t *
1748	find_translit (struct localedef_t locale, const* struct charmap_t *charmap,
1749	uint32_t wch)
1750	{
1751	struct locale_ctype_t *ctype;
1752	uint32_t *result = NULL;
1753
1754	assert (locale != NULL);
1755	ctype = locale->categories[LC_CTYPE].ctype;
1756
1757	if (ctype == NULL)
1758	return NULL;
1759
1760	if (ctype->translit != NULL)
1761	result = find_translit2 (ctype, charmap, wch);
1762
1763	if (result == NULL)
1764	{
1765	struct translit_include_t *irunp = ctype->translit_include;
1766
1767	while (irunp != NULL && result == NULL)
1768	{
1769	result = find_translit (find_locale (CTYPE_LOCALE,
1770	irunp->copy_locale,
1771	irunp->copy_repertoire,
1772	charmap),
1773	charmap, wch);
1774	irunp = irunp->next;
1775	}
1776	}
1777
1778	return result;
1779	}
1780
1781
1782	/ Read one transliteration entry. /
1783	static uint32_t *
1784	read_widestring (struct linereader ldfile, struct* token *now,
1785	const struct charmap_t *charmap,
1786	struct repertoire_t *repertoire)
1787	{
1788	uint32_t *wstr;
1789
1790	if (now->tok == tok_default_missing)
1791	/ The special name "" will denote this case. /
1792	wstr = no_str;
1793	else if (now->tok == tok_bsymbol)
1794	{
1795	/ Get the value from the repertoire. /
1796	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1797	wstr[`0`] = repertoire_find_value (repertoire, now->val.str.startmb,
1798	now->val.str.lenmb);
1799	if (wstr[`0`] == ILLEGAL_CHAR_VALUE)
1800	{
1801	/ We cannot proceed, we don't know the UCS4 value. /
1802	free (wstr);
1803	return NULL;
1804	}
1805
1806	wstr[`1`] = `0`;
1807	}
1808	else if (now->tok == tok_ucs4)
1809	{
1810	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1811	wstr[`0`] = now->val.ucs4;
1812	wstr[`1`] = `0`;
1813	}
1814	else if (now->tok == tok_charcode)
1815	{
1816	/ Argh, we have to convert to the symbol name first and then to the*
1817	UCS4 value. /*
1818	struct charseq *seq = charmap_find_symbol (charmap,
1819	now->val.str.startmb,
1820	now->val.str.lenmb);
1821	if (seq == NULL)
1822	/ Cannot find the UCS4 value. /
1823	return NULL;
1824
1825	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1826	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1827	strlen (seq->name));
1828	if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1829	/ We cannot proceed, we don't know the UCS4 value. /
1830	return NULL;
1831
1832	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1833	wstr[`0`] = seq->ucs4;
1834	wstr[`1`] = `0`;
1835	}
1836	else if (now->tok == tok_string)
1837	{
1838	wstr = now->val.str.startwc;
1839	if (wstr == NULL \|\| wstr[`0`] == `0`)
1840	return NULL;
1841	}
1842	else
1843	{
1844	if (now->tok != tok_eol && now->tok != tok_eof)
1845	lr_ignore_rest (ldfile, `0`);
1846	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847	return (uint32_t *) -`1l`;
1848	}
1849
1850	return wstr;
1851	}
1852
1853
1854	static void
1855	read_translit_entry (struct linereader ldfile, struct* locale_ctype_t *ctype,
1856	struct token now, const* struct charmap_t *charmap,
1857	struct repertoire_t *repertoire)
1858	{
1859	uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1860	struct translit_t *result;
1861	struct translit_to_t **top;
1862	struct obstack *ob = &ctype->mempool;
1863	int first;
1864	int ignore;
1865
1866	if (from_wstr == NULL)
1867	/ There is no valid from string. /
1868	return;
1869
1870	result = (struct translit_t *) obstack_alloc (ob,
1871	sizeof (struct translit_t));
1872	result->from = from_wstr;
1873	result->fname = ldfile->fname;
1874	result->lineno = ldfile->lineno;
1875	result->next = NULL;
1876	result->to = NULL;
1877	top = &result->to;
1878	first = `1`;
1879	ignore = `0`;
1880
1881	while (`1`)
1882	{
1883	uint32_t *to_wstr;
1884
1885	/ Next we have one or more transliterations. They are*
1886	separated by semicolons. /*
1887	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1888
1889	if (!first && (now->tok == tok_semicolon \|\| now->tok == tok_eol))
1890	{
1891	/ One string read. /
1892	const uint32_t zero = `0`;
1893
1894	if (!ignore)
1895	{
1896	obstack_grow (ob, &zero, `4`);
1897	to_wstr = obstack_finish (ob);
1898
1899	top = obstack_alloc (ob, sizeof* (struct translit_to_t));
1900	(*top)->str = to_wstr;
1901	(*top)->next = NULL;
1902	}
1903
1904	if (now->tok == tok_eol)
1905	{
1906	result->next = ctype->translit;
1907	ctype->translit = result;
1908	return;
1909	}
1910
1911	if (!ignore)
1912	top = &(*top)->next;
1913	ignore = `0`;
1914	}
1915	else
1916	{
1917	to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1918	if (to_wstr == (uint32_t *) -`1l`)
1919	{
1920	/ An error occurred. /
1921	obstack_free (ob, result);
1922	return;
1923	}
1924
1925	if (to_wstr == NULL)
1926	ignore = `1`;
1927	else
1928	/ This value is usable. /
1929	obstack_grow (ob, to_wstr, wcslen ((wchar_t ) to_wstr) `4`);
1930
1931	first = `0`;
1932	}
1933	}
1934	}
1935
1936
1937	static void
1938	read_translit_ignore_entry (struct linereader *ldfile,
1939	struct locale_ctype_t *ctype,
1940	const struct charmap_t *charmap,
1941	struct repertoire_t *repertoire)
1942	{
1943	/ We expect a semicolon-separated list of characters we ignore. We are*
1944	only interested in the wide character definitions. These must be
1945	single characters, possibly defining a range when an ellipsis is used. /*
1946	while (`1`)
1947	{
1948	struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1949	verbose);
1950	struct translit_ignore_t *newp;
1951	uint32_t from;
1952
1953	if (now->tok == tok_eol \|\| now->tok == tok_eof)
1954	{
1955	lr_error (ldfile,
1956	_("premature end of `translit_ignore' definition"));
1957	return;
1958	}
1959
1960	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1961	{
1962	lr_error (ldfile, _("syntax error"));
1963	lr_ignore_rest (ldfile, `0`);
1964	return;
1965	}
1966
1967	if (now->tok == tok_ucs4)
1968	from = now->val.ucs4;
1969	else
1970	/ Try to get the value. /
1971	from = repertoire_find_value (repertoire, now->val.str.startmb,
1972	now->val.str.lenmb);
1973
1974	if (from == ILLEGAL_CHAR_VALUE)
1975	{
1976	lr_error (ldfile, "invalid character name");
1977	newp = NULL;
1978	}
1979	else
1980	{
1981	newp = (struct translit_ignore_t *)
1982	obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1983	newp->from = from;
1984	newp->to = from;
1985	newp->step = `1`;
1986
1987	newp->next = ctype->translit_ignore;
1988	ctype->translit_ignore = newp;
1989	}
1990
1991	/ Now we expect either a semicolon, an ellipsis, or the end of the*
1992	line. /*
1993	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1994
1995	if (now->tok == tok_ellipsis2 \|\| now->tok == tok_ellipsis2_2)
1996	{
1997	/ XXX Should we bother implementing `....'? `...' certainly*
1998	will not be implemented. /*
1999	uint32_t to;
2000	int step = now->tok == tok_ellipsis2_2 ? `2` : `1`;
2001
2002	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003
2004	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2005	{
2006	lr_error (ldfile,
2007	_("premature end of `translit_ignore' definition"));
2008	return;
2009	}
2010
2011	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2012	{
2013	lr_error (ldfile, _("syntax error"));
2014	lr_ignore_rest (ldfile, `0`);
2015	return;
2016	}
2017
2018	if (now->tok == tok_ucs4)
2019	to = now->val.ucs4;
2020	else
2021	/ Try to get the value. /
2022	to = repertoire_find_value (repertoire, now->val.str.startmb,
2023	now->val.str.lenmb);
2024
2025	if (to == ILLEGAL_CHAR_VALUE)
2026	lr_error (ldfile, "invalid character name");
2027	else
2028	{
2029	/ Make sure the `to'-value is larger. /
2030	if (to >= from)
2031	{
2032	newp->to = to;
2033	newp->step = step;
2034	}
2035	else
2036	lr_error (ldfile, _("\
2037	to-value <U%0X> of range is smaller than from-value <U%0X>"),
2038	(to \| from) < `65536` ? `4` : `8`, to,
2039	(to \| from) < `65536` ? `4` : `8`, from);
2040	}
2041
2042	/ And the next token. /
2043	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2044	}
2045
2046	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2047	/ We are done. /
2048	return;
2049
2050	if (now->tok == tok_semicolon)
2051	/ Next round. /
2052	continue;
2053
2054	/ If we come here something is wrong. /
2055	lr_error (ldfile, _("syntax error"));
2056	lr_ignore_rest (ldfile, `0`);
2057	return;
2058	}
2059	}
2060
2061
2062	/ The parser for the LC_CTYPE section of the locale definition. /
2063	void
2064	ctype_read (struct linereader ldfile, struct* localedef_t *result,
2065	const struct charmap_t charmap, const* char *repertoire_name,
2066	int ignore_content)
2067	{
2068	struct repertoire_t *repertoire = NULL;
2069	struct locale_ctype_t *ctype;
2070	struct token *now;
2071	enum token_t nowtok;
2072	size_t cnt;
2073	uint32_t last_wch = `0`;
2074	enum token_t last_token;
2075	enum token_t ellipsis_token;
2076	int step;
2077	char last_charcode[`16`];
2078	size_t last_charcode_len = `0`;
2079	const char *last_str = NULL;
2080	int mapidx;
2081	struct localedef_t *copy_locale = NULL;
2082
2083	/ Get the repertoire we have to use. /
2084	if (repertoire_name != NULL)
2085	repertoire = repertoire_read (repertoire_name);
2086
2087	/ The rest of the line containing `LC_CTYPE' must be free. /
2088	lr_ignore_rest (ldfile, `1`);
2089
2090
2091	do
2092	{
2093	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2094	nowtok = now->tok;
2095	}
2096	while (nowtok == tok_eol);
2097
2098	/ If we see `copy' now we are almost done. /
2099	if (nowtok == tok_copy)
2100	{
2101	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2102	if (now->tok != tok_string)
2103	{
2104	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2105
2106	skip_category:
2107	do
2108	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2109	while (now->tok != tok_eof && now->tok != tok_end);
2110
2111	if (now->tok != tok_eof
2112	\|\| (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2113	now->tok == tok_eof))
2114	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2115	else if (now->tok != tok_lc_ctype)
2116	{
2117	lr_error (ldfile, _("\
2118	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119	lr_ignore_rest (ldfile, `0`);
2120	}
2121	else
2122	lr_ignore_rest (ldfile, `1`);
2123
2124	return;
2125	}
2126
2127	if (! ignore_content)
2128	{
2129	/ Get the locale definition. /
2130	copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2131	repertoire_name, charmap, NULL);
2132	if ((copy_locale->avail & CTYPE_LOCALE) == `0`)
2133	{
2134	/ Not yet loaded. So do it now. /
2135	if (locfile_read (copy_locale, charmap) != `0`)
2136	goto skip_category;
2137	}
2138
2139	if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2140	return;
2141	}
2142
2143	lr_ignore_rest (ldfile, `1`);
2144
2145	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2146	nowtok = now->tok;
2147	}
2148
2149	/ Prepare the data structures. /
2150	ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2151	ctype = result->categories[LC_CTYPE].ctype;
2152
2153	/ Remember the repertoire we use. /
2154	if (!ignore_content)
2155	ctype->repertoire = repertoire;
2156
2157	while (`1`)
2158	{
2159	unsigned long int class_bit = `0`;
2160	unsigned long int class256_bit = `0`;
2161	int handle_digits = `0`;
2162
2163	/ Of course we don't proceed beyond the end of file. /
2164	if (nowtok == tok_eof)
2165	break;
2166
2167	/ Ingore empty lines. /
2168	if (nowtok == tok_eol)
2169	{
2170	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2171	nowtok = now->tok;
2172	continue;
2173	}
2174
2175	switch (nowtok)
2176	{
2177	case tok_charclass:
2178	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2179	while (now->tok == tok_ident \|\| now->tok == tok_string)
2180	{
2181	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2182	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2183	if (now->tok != tok_semicolon)
2184	break;
2185	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2186	}
2187	if (now->tok != tok_eol)
2188	SYNTAX_ERROR (_("\
2189	%s: syntax error in definition of new character class"), "LC_CTYPE");
2190	break;
2191
2192	case tok_charconv:
2193	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194	while (now->tok == tok_ident \|\| now->tok == tok_string)
2195	{
2196	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2197	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2198	if (now->tok != tok_semicolon)
2199	break;
2200	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2201	}
2202	if (now->tok != tok_eol)
2203	SYNTAX_ERROR (_("\
2204	%s: syntax error in definition of new character map"), "LC_CTYPE");
2205	break;
2206
2207	case tok_class:
2208	/ Ignore the rest of the line if we don't need the input of*
2209	this line. /*
2210	if (ignore_content)
2211	{
2212	lr_ignore_rest (ldfile, `0`);
2213	break;
2214	}
2215
2216	/ We simply forget the `class' keyword and use the following*
2217	operand to determine the bit. /*
2218	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219	if (now->tok == tok_ident \|\| now->tok == tok_string)
2220	{
2221	/ Must can be one of the predefined class names. /
2222	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2223	if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == `0`)
2224	break;
2225	if (cnt >= ctype->nr_charclass)
2226	{
2227	/ OK, it's a new class. /
2228	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2229
2230	class_bit = _ISwbit (ctype->nr_charclass - `1`);
2231	}
2232	else
2233	{
2234	class_bit = _ISwbit (cnt);
2235
2236	free (now->val.str.startmb);
2237	}
2238	}
2239	else if (now->tok == tok_digit)
2240	goto handle_tok_digit;
2241	else if (now->tok < tok_upper \|\| now->tok > tok_blank)
2242	goto err_label;
2243	else
2244	{
2245	class_bit = BITw (now->tok);
2246	class256_bit = BIT (now->tok);
2247	}
2248
2249	/ The next character must be a semicolon. /
2250	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2251	if (now->tok != tok_semicolon)
2252	goto err_label;
2253	goto read_charclass;
2254
2255	case tok_upper:
2256	case tok_lower:
2257	case tok_alpha:
2258	case tok_alnum:
2259	case tok_space:
2260	case tok_cntrl:
2261	case tok_punct:
2262	case tok_graph:
2263	case tok_print:
2264	case tok_xdigit:
2265	case tok_blank:
2266	/ Ignore the rest of the line if we don't need the input of*
2267	this line. /*
2268	if (ignore_content)
2269	{
2270	lr_ignore_rest (ldfile, `0`);
2271	break;
2272	}
2273
2274	class_bit = BITw (now->tok);
2275	class256_bit = BIT (now->tok);
2276	handle_digits = `0`;
2277	read_charclass:
2278	ctype->class_done \|= class_bit;
2279	last_token = tok_none;
2280	ellipsis_token = tok_none;
2281	step = `1`;
2282	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2283	while (now->tok != tok_eol && now->tok != tok_eof)
2284	{
2285	uint32_t wch;
2286	struct charseq *seq;
2287
2288	if (ellipsis_token == tok_none)
2289	{
2290	if (get_character (now, charmap, repertoire, &seq, &wch))
2291	goto err_label;
2292
2293	if (!ignore_content && seq != NULL && seq->nbytes == `1`)
2294	/ Yep, we can store information about this byte*
2295	sequence. /*
2296	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
2297
2298	if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2299	&& class_bit != `0`)
2300	/ We have the UCS4 position. /
2301	*find_idx (ctype, &ctype->class_collection,
2302	&ctype->class_collection_max,
2303	&ctype->class_collection_act, wch) \|= class_bit;
2304
2305	last_token = now->tok;
2306	/ Terminate the string. /
2307	if (last_token == tok_bsymbol)
2308	{
2309	now->val.str.startmb[now->val.str.lenmb] = `'\0'`;
2310	last_str = now->val.str.startmb;
2311	}
2312	else
2313	last_str = NULL;
2314	last_wch = wch;
2315	memcpy (last_charcode, now->val.charcode.bytes, `16`);
2316	last_charcode_len = now->val.charcode.nbytes;
2317
2318	if (!ignore_content && handle_digits == `1`)
2319	{
2320	/ We must store the digit values. /
2321	if (ctype->mbdigits_act == ctype->mbdigits_max)
2322	{
2323	ctype->mbdigits_max += `10`;
2324	ctype->mbdigits = xrealloc (ctype->mbdigits,
2325	(ctype->mbdigits_max
2326	* sizeof (char *)));
2327	ctype->wcdigits_max += `10`;
2328	ctype->wcdigits = xrealloc (ctype->wcdigits,
2329	(ctype->wcdigits_max
2330	* sizeof (uint32_t)));
2331	}
2332
2333	ctype->mbdigits[ctype->mbdigits_act++] = seq;
2334	ctype->wcdigits[ctype->wcdigits_act++] = wch;
2335	}
2336	else if (!ignore_content && handle_digits == `2`)
2337	{
2338	/ We must store the digit values. /
2339	if (ctype->outdigits_act >= `10`)
2340	{
2341	lr_error (ldfile, _("\
2342	%s: field `%s' does not contain exactly ten entries"),
2343	"LC_CTYPE", "outdigit");
2344	lr_ignore_rest (ldfile, `0`);
2345	break;
2346	}
2347
2348	ctype->mboutdigits[ctype->outdigits_act] = seq;
2349	ctype->wcoutdigits[ctype->outdigits_act] = wch;
2350	++ctype->outdigits_act;
2351	}
2352	}
2353	else
2354	{
2355	/ Now it gets complicated. We have to resolve the*
2356	ellipsis problem. First we must distinguish between
2357	the different kind of ellipsis and this must match the
2358	tokens we have seen. /*
2359	assert (last_token != tok_none);
2360
2361	if (last_token != now->tok)
2362	{
2363	lr_error (ldfile, _("\
2364	ellipsis range must be marked by two operands of same type"));
2365	lr_ignore_rest (ldfile, `0`);
2366	break;
2367	}
2368
2369	if (last_token == tok_bsymbol)
2370	{
2371	if (ellipsis_token == tok_ellipsis3)
2372	lr_error (ldfile, _("with symbolic name range values \
2373	the absolute ellipsis `...' must not be used"));
2374
2375	charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2376	repertoire, now, last_str,
2377	class256_bit, class_bit,
2378	(ellipsis_token
2379	== tok_ellipsis4
2380	? `10` : `16`),
2381	ignore_content,
2382	handle_digits, step);
2383	}
2384	else if (last_token == tok_ucs4)
2385	{
2386	if (ellipsis_token != tok_ellipsis2)
2387	lr_error (ldfile, _("\
2388	with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2389
2390	charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2391	repertoire, now, last_wch,
2392	class256_bit, class_bit,
2393	ignore_content, handle_digits,
2394	step);
2395	}
2396	else
2397	{
2398	assert (last_token == tok_charcode);
2399
2400	if (ellipsis_token != tok_ellipsis3)
2401	lr_error (ldfile, _("\
2402	with character code range values one must use the absolute ellipsis `...'"));
2403
2404	charclass_charcode_ellipsis (ldfile, ctype, charmap,
2405	repertoire, now,
2406	last_charcode,
2407	last_charcode_len,
2408	class256_bit, class_bit,
2409	ignore_content,
2410	handle_digits);
2411	}
2412
2413	/ Now we have used the last value. /
2414	last_token = tok_none;
2415	}
2416
2417	/ Next we expect a semicolon or the end of the line. /
2418	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2419	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2420	break;
2421
2422	if (last_token != tok_none
2423	&& now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2424	{
2425	if (now->tok == tok_ellipsis2_2)
2426	{
2427	now->tok = tok_ellipsis2;
2428	step = `2`;
2429	}
2430	else if (now->tok == tok_ellipsis4_2)
2431	{
2432	now->tok = tok_ellipsis4;
2433	step = `2`;
2434	}
2435
2436	ellipsis_token = now->tok;
2437
2438	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2439	continue;
2440	}
2441
2442	if (now->tok != tok_semicolon)
2443	goto err_label;
2444
2445	/ And get the next character. /
2446	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2447
2448	ellipsis_token = tok_none;
2449	step = `1`;
2450	}
2451	break;
2452
2453	case tok_digit:
2454	/ Ignore the rest of the line if we don't need the input of*
2455	this line. /*
2456	if (ignore_content)
2457	{
2458	lr_ignore_rest (ldfile, `0`);
2459	break;
2460	}
2461
2462	handle_tok_digit:
2463	class_bit = _ISwdigit;
2464	class256_bit = _ISdigit;
2465	handle_digits = `1`;
2466	goto read_charclass;
2467
2468	case tok_outdigit:
2469	/ Ignore the rest of the line if we don't need the input of*
2470	this line. /*
2471	if (ignore_content)
2472	{
2473	lr_ignore_rest (ldfile, `0`);
2474	break;
2475	}
2476
2477	if (ctype->outdigits_act != `0`)
2478	lr_error (ldfile, _("\
2479	%s: field `%s' declared more than once"),
2480	"LC_CTYPE", "outdigit");
2481	class_bit = `0`;
2482	class256_bit = `0`;
2483	handle_digits = `2`;
2484	goto read_charclass;
2485
2486	case tok_toupper:
2487	/ Ignore the rest of the line if we don't need the input of*
2488	this line. /*
2489	if (ignore_content)
2490	{
2491	lr_ignore_rest (ldfile, `0`);
2492	break;
2493	}
2494
2495	mapidx = `0`;
2496	goto read_mapping;
2497
2498	case tok_tolower:
2499	/ Ignore the rest of the line if we don't need the input of*
2500	this line. /*
2501	if (ignore_content)
2502	{
2503	lr_ignore_rest (ldfile, `0`);
2504	break;
2505	}
2506
2507	mapidx = `1`;
2508	goto read_mapping;
2509
2510	case tok_map:
2511	/ Ignore the rest of the line if we don't need the input of*
2512	this line. /*
2513	if (ignore_content)
2514	{
2515	lr_ignore_rest (ldfile, `0`);
2516	break;
2517	}
2518
2519	/ We simply forget the `map' keyword and use the following*
2520	operand to determine the mapping. /*
2521	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522	if (now->tok == tok_ident \|\| now->tok == tok_string)
2523	{
2524	size_t cnt;
2525
2526	for (cnt = `2`; cnt < ctype->map_collection_nr; ++cnt)
2527	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2528	break;
2529
2530	if (cnt < ctype->map_collection_nr)
2531	free (now->val.str.startmb);
2532	else
2533	/ OK, it's a new map. /
2534	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2535
2536	mapidx = cnt;
2537	}
2538	else if (now->tok < tok_toupper \|\| now->tok > tok_tolower)
2539	goto err_label;
2540	else
2541	mapidx = now->tok - tok_toupper;
2542
2543	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544	/ This better should be a semicolon. /
2545	if (now->tok != tok_semicolon)
2546	goto err_label;
2547
2548	read_mapping:
2549	/ Test whether this mapping was already defined. /
2550	if (ctype->tomap_done[mapidx])
2551	{
2552	lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2553	ctype->mapnames[mapidx]);
2554	lr_ignore_rest (ldfile, `0`);
2555	break;
2556	}
2557	ctype->tomap_done[mapidx] = `1`;
2558
2559	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2560	while (now->tok != tok_eol && now->tok != tok_eof)
2561	{
2562	struct charseq *from_seq;
2563	uint32_t from_wch;
2564	struct charseq *to_seq;
2565	uint32_t to_wch;
2566
2567	/ Every pair starts with an opening brace. /
2568	if (now->tok != tok_open_brace)
2569	goto err_label;
2570
2571	/ Next comes the from-value. /
2572	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573	if (get_character (now, charmap, repertoire, &from_seq,
2574	&from_wch) != `0`)
2575	goto err_label;
2576
2577	/ The next is a comma. /
2578	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2579	if (now->tok != tok_comma)
2580	goto err_label;
2581
2582	/ And the other value. /
2583	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584	if (get_character (now, charmap, repertoire, &to_seq,
2585	&to_wch) != `0`)
2586	goto err_label;
2587
2588	/ And the last thing is the closing brace. /
2589	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590	if (now->tok != tok_close_brace)
2591	goto err_label;
2592
2593	if (!ignore_content)
2594	{
2595	/ Check whether the mapping converts from an ASCII value*
2596	to a non-ASCII value. /*
2597	if (from_seq != NULL && from_seq->nbytes == `1`
2598	&& isascii (from_seq->bytes[`0`])
2599	&& to_seq != NULL && (to_seq->nbytes != `1`
2600	\|\| !isascii (to_seq->bytes[`0`])))
2601	ctype->to_nonascii = `1`;
2602
2603	if (mapidx < `2` && from_seq != NULL && to_seq != NULL
2604	&& from_seq->nbytes == `1` && to_seq->nbytes == `1`)
2605	/ We can use this value. /
2606	ctype->map256_collection[mapidx][from_seq->bytes[`0`]]
2607	= to_seq->bytes[`0`];
2608
2609	if (from_wch != ILLEGAL_CHAR_VALUE
2610	&& to_wch != ILLEGAL_CHAR_VALUE)
2611	/ Both correct values. /
2612	*find_idx (ctype, &ctype->map_collection[mapidx],
2613	&ctype->map_collection_max[mapidx],
2614	&ctype->map_collection_act[mapidx],
2615	from_wch) = to_wch;
2616	}
2617
2618	/ Now comes a semicolon or the end of the line/file. /
2619	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2620	if (now->tok == tok_semicolon)
2621	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2622	}
2623	break;
2624
2625	case tok_translit_start:
2626	/ Ignore the entire translit section with its peculiar syntax*
2627	if we don't need the input. /*
2628	if (ignore_content)
2629	{
2630	do
2631	{
2632	lr_ignore_rest (ldfile, `0`);
2633	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2634	}
2635	while (now->tok != tok_translit_end && now->tok != tok_eof);
2636
2637	if (now->tok == tok_eof)
2638	lr_error (ldfile, _(\
2639	"%s: `translit_start' section does not end with `translit_end'"),
2640	"LC_CTYPE");
2641
2642	break;
2643	}
2644
2645	/ The rest of the line better should be empty. /
2646	lr_ignore_rest (ldfile, `1`);
2647
2648	/ We count here the number of allocated entries in the `translit'*
2649	array. /*
2650	cnt = `0`;
2651
2652	ldfile->translate_strings = `1`;
2653	ldfile->return_widestr = `1`;
2654
2655	/ We proceed until we see the `translit_end' token. /
2656	while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2657	now->tok != tok_translit_end && now->tok != tok_eof)
2658	{
2659	if (now->tok == tok_eol)
2660	/ Ignore empty lines. /
2661	continue;
2662
2663	if (now->tok == tok_include)
2664	{
2665	/ We have to include locale. /
2666	const char *locale_name;
2667	const char *repertoire_name;
2668	struct translit_include_t include_stmt, *include_ptr;
2669
2670	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2671	/ This should be a string or an identifier. In any*
2672	case something to name a locale. /*
2673	if (now->tok != tok_string && now->tok != tok_ident)
2674	{
2675	translit_syntax:
2676	lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2677	lr_ignore_rest (ldfile, `0`);
2678	continue;
2679	}
2680	locale_name = now->val.str.startmb;
2681
2682	/ Next should be a semicolon. /
2683	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684	if (now->tok != tok_semicolon)
2685	goto translit_syntax;
2686
2687	/ Now the repertoire name. /
2688	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2689	if ((now->tok != tok_string && now->tok != tok_ident)
2690	\|\| now->val.str.startmb == NULL)
2691	goto translit_syntax;
2692	repertoire_name = now->val.str.startmb;
2693	if (repertoire_name[`0`] == `'\0'`)
2694	/ Ignore the empty string. /
2695	repertoire_name = NULL;
2696
2697	/ Save the include statement for later processing. /
2698	include_stmt = (struct translit_include_t *)
2699	xmalloc (sizeof (struct translit_include_t));
2700	include_stmt->copy_locale = locale_name;
2701	include_stmt->copy_repertoire = repertoire_name;
2702	include_stmt->next = NULL;
2703
2704	include_ptr = &ctype->translit_include;
2705	while (*include_ptr != NULL)
2706	include_ptr = &(*include_ptr)->next;
2707	*include_ptr = include_stmt;
2708
2709	/ The rest of the line must be empty. /
2710	lr_ignore_rest (ldfile, `1`);
2711
2712	/ Make sure the locale is read. /
2713	add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2714	`1`, NULL);
2715	continue;
2716	}
2717	else if (now->tok == tok_default_missing)
2718	{
2719	uint32_t *wstr;
2720
2721	while (`1`)
2722	{
2723	/ We expect a single character or string as the*
2724	argument. /*
2725	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726	wstr = read_widestring (ldfile, now, charmap,
2727	repertoire);
2728
2729	if (wstr != NULL)
2730	{
2731	if (ctype->default_missing != NULL)
2732	{
2733	lr_error (ldfile, _("\
2734	%s: duplicate `default_missing' definition"), "LC_CTYPE");
2735	record_error_at_line (`0`, `0`,
2736	ctype->default_missing_file,
2737	ctype->default_missing_lineno,
2738	_("\
2739	previous definition was here"));
2740	}
2741	else
2742	{
2743	ctype->default_missing = wstr;
2744	ctype->default_missing_file = ldfile->fname;
2745	ctype->default_missing_lineno = ldfile->lineno;
2746	}
2747	/ We can have more entries, ignore them. /
2748	lr_ignore_rest (ldfile, `0`);
2749	break;
2750	}
2751	else if (wstr == (uint32_t *) -`1l`)
2752	/ This was an syntax error. /
2753	break;
2754
2755	/ Maybe there is another replacement we can use. /
2756	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2758	{
2759	/ Nothing found. We tell the user. /
2760	lr_error (ldfile, _("\
2761	%s: no representable `default_missing' definition found"), "LC_CTYPE");
2762	break;
2763	}
2764	if (now->tok != tok_semicolon)
2765	goto translit_syntax;
2766	}
2767
2768	continue;
2769	}
2770	else if (now->tok == tok_translit_ignore)
2771	{
2772	read_translit_ignore_entry (ldfile, ctype, charmap,
2773	repertoire);
2774	continue;
2775	}
2776
2777	read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2778	}
2779	ldfile->return_widestr = `0`;
2780
2781	if (now->tok == tok_eof)
2782	lr_error (ldfile, _(\
2783	"%s: `translit_start' section does not end with `translit_end'"),
2784	"LC_CTYPE");
2785
2786	break;
2787
2788	case tok_ident:
2789	/ Ignore the rest of the line if we don't need the input of*
2790	this line. /*
2791	if (ignore_content)
2792	{
2793	lr_ignore_rest (ldfile, `0`);
2794	break;
2795	}
2796
2797	/ This could mean one of several things. First test whether*
2798	it's a character class name. /*
2799	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2800	if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == `0`)
2801	break;
2802	if (cnt < ctype->nr_charclass)
2803	{
2804	class_bit = _ISwbit (cnt);
2805	class256_bit = cnt <= `11` ? _ISbit (cnt) : `0`;
2806	free (now->val.str.startmb);
2807	goto read_charclass;
2808	}
2809	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
2810	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2811	break;
2812	if (cnt < ctype->map_collection_nr)
2813	{
2814	mapidx = cnt;
2815	free (now->val.str.startmb);
2816	goto read_mapping;
2817	}
2818	break;
2819
2820	case tok_end:
2821	/ Next we assume `LC_CTYPE'. /
2822	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823	if (now->tok == tok_eof)
2824	break;
2825	if (now->tok == tok_eol)
2826	lr_error (ldfile, _("%s: incomplete `END' line"),
2827	"LC_CTYPE");
2828	else if (now->tok != tok_lc_ctype)
2829	lr_error (ldfile, _("\
2830	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831	lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2832	return;
2833
2834	default:
2835	err_label:
2836	if (now->tok != tok_eof)
2837	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2838	}
2839
2840	/ Prepare for the next round. /
2841	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2842	nowtok = now->tok;
2843	}
2844
2845	/ When we come here we reached the end of the file. /
2846	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2847	}
2848
2849
2850	/ Subroutine of set_class_defaults, below. /
2851	static void
2852	set_one_default (struct locale_ctype_t *ctype,
2853	const struct charmap_t *charmap,
2854	int bitpos, int from, int to)
2855	{
2856	char tmp[`2`];
2857	int ch;
2858	int bit = _ISbit (bitpos);
2859	int bitw = _ISwbit (bitpos);
2860	/ Define string. /
2861	strcpy (tmp, "?");
2862
2863	for (ch = from; ch <= to; ++ch)
2864	{
2865	struct charseq *seq;
2866	tmp[`0`] = ch;
2867
2868	seq = charmap_find_value (charmap, tmp, `1`);
2869	if (seq == NULL)
2870	{
2871	char buf[`10`];
2872	sprintf (buf, "U%08X", ch);
2873	seq = charmap_find_value (charmap, buf, `9`);
2874	}
2875	if (seq == NULL)
2876	{
2877	record_error (`0`, `0`, _("\
2878	%s: character `%s' not defined while needed as default value"),
2879	"LC_CTYPE", tmp);
2880	}
2881	else if (seq->nbytes != `1`)
2882	record_error (`0`, `0`, _("\
2883	%s: character `%s' in charmap not representable with one byte"),
2884	"LC_CTYPE", tmp);
2885	else
2886	ctype->class256_collection[seq->bytes[`0`]] \|= bit;
2887
2888	/ No need to search here, the ASCII value is also the Unicode*
2889	value. /*
2890	ELEM (ctype, class_collection, , ch) \|= bitw;
2891	}
2892	}
2893
2894	static void
2895	set_class_defaults (struct locale_ctype_t *ctype,
2896	const struct charmap_t *charmap,
2897	struct repertoire_t *repertoire)
2898	{
2899	#define set_default(bitpos, from, to) \
2900	set_one_default (ctype, charmap, bitpos, from, to)
2901
2902	/ These function defines the default values for the classes and conversions*
2903	according to POSIX.2 2.5.2.1.
2904	It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905	Don't move them unless you know what you do! /*
2906
2907	/ Set default values if keyword was not present. /
2908	if ((ctype->class_done & BITw (tok_upper)) == `0`)
2909	/ "If this keyword [lower] is not specified, the lowercase letters*
2910	`A' through `Z', ..., shall automatically belong to this class,
2911	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2912	set_default (BITPOS (tok_upper), `'A'`, `'Z'`);
2913
2914	if ((ctype->class_done & BITw (tok_lower)) == `0`)
2915	/ "If this keyword [lower] is not specified, the lowercase letters*
2916	`a' through `z', ..., shall automatically belong to this class,
2917	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2918	set_default (BITPOS (tok_lower), `'a'`, `'z'`);
2919
2920	if ((ctype->class_done & BITw (tok_alpha)) == `0`)
2921	{
2922	/ Table 2-6 in P1003.2 says that characters in class `upper' or*
2923	class `lower' must* be in class `alpha'. /
2924	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower);
2925	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower);
2926
2927	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2928	if ((ctype->class256_collection[cnt] & mask) != `0`)
2929	ctype->class256_collection[cnt] \|= BIT (tok_alpha);
2930
2931	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2932	if ((ctype->class_collection[cnt] & maskw) != `0`)
2933	ctype->class_collection[cnt] \|= BITw (tok_alpha);
2934	}
2935
2936	if ((ctype->class_done & BITw (tok_digit)) == `0`)
2937	/ "If this keyword [digit] is not specified, the digits `0' through*
2938	`9', ..., shall automatically belong to this class, with
2939	implementation-defined character values." [P1003.2, 2.5.2.1] /*
2940	set_default (BITPOS (tok_digit), `'0'`, `'9'`);
2941
2942	/ "Only characters specified for the `alpha' and `digit' keyword*
2943	shall be specified. Characters specified for the keyword `alpha'
2944	and `digit' are automatically included in this class. /*
2945	{
2946	unsigned long int mask = BIT (tok_alpha) \| BIT (tok_digit);
2947	unsigned long int maskw = BITw (tok_alpha) \| BITw (tok_digit);
2948
2949	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2950	if ((ctype->class256_collection[cnt] & mask) != `0`)
2951	ctype->class256_collection[cnt] \|= BIT (tok_alnum);
2952
2953	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2954	if ((ctype->class_collection[cnt] & maskw) != `0`)
2955	ctype->class_collection[cnt] \|= BITw (tok_alnum);
2956	}
2957
2958	if ((ctype->class_done & BITw (tok_space)) == `0`)
2959	/ "If this keyword [space] is not specified, the characters <space>,*
2960	<form-feed>, <newline>, <carriage-return>, <tab>, and
2961	<vertical-tab>, ..., shall automatically belong to this class,
2962	with implementation-defined character values." [P1003.2, 2.5.2.1] /*
2963	{
2964	struct charseq *seq;
2965
2966	seq = charmap_find_value (charmap, "space", `5`);
2967	if (seq == NULL)
2968	seq = charmap_find_value (charmap, "SP", `2`);
2969	if (seq == NULL)
2970	seq = charmap_find_value (charmap, "U00000020", `9`);
2971	if (seq == NULL)
2972	{
2973	record_error (`0`, `0`, _("\
2974	%s: character `%s' not defined while needed as default value"),
2975	"LC_CTYPE", "<space>");
2976	}
2977	else if (seq->nbytes != `1`)
2978	record_error (`0`, `0`, _("\
2979	%s: character `%s' in charmap not representable with one byte"),
2980	"LC_CTYPE", "<space>");
2981	else
2982	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
2983
2984	/ No need to search. /
2985	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_space);
2986
2987	seq = charmap_find_value (charmap, "form-feed", `9`);
2988	if (seq == NULL)
2989	seq = charmap_find_value (charmap, "U0000000C", `9`);
2990	if (seq == NULL)
2991	{
2992	record_error (`0`, `0`, _("\
2993	%s: character `%s' not defined while needed as default value"),
2994	"LC_CTYPE", "<form-feed>");
2995	}
2996	else if (seq->nbytes != `1`)
2997	record_error (`0`, `0`, _("\
2998	%s: character `%s' in charmap not representable with one byte"),
2999	"LC_CTYPE", "<form-feed>");
3000	else
3001	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3002
3003	/ No need to search. /
3004	ELEM (ctype, class_collection, , L`'\f'`) \|= BITw (tok_space);
3005
3006
3007	seq = charmap_find_value (charmap, "newline", `7`);
3008	if (seq == NULL)
3009	seq = charmap_find_value (charmap, "U0000000A", `9`);
3010	if (seq == NULL)
3011	{
3012	record_error (`0`, `0`, _("\
3013	%s: character `%s' not defined while needed as default value"),
3014	"LC_CTYPE", "<newline>");
3015	}
3016	else if (seq->nbytes != `1`)
3017	record_error (`0`, `0`, _("\
3018	%s: character `%s' in charmap not representable with one byte"),
3019	"LC_CTYPE", "<newline>");
3020	else
3021	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3022
3023	/ No need to search. /
3024	ELEM (ctype, class_collection, , L`'\n'`) \|= BITw (tok_space);
3025
3026
3027	seq = charmap_find_value (charmap, "carriage-return", `15`);
3028	if (seq == NULL)
3029	seq = charmap_find_value (charmap, "U0000000D", `9`);
3030	if (seq == NULL)
3031	{
3032	record_error (`0`, `0`, _("\
3033	%s: character `%s' not defined while needed as default value"),
3034	"LC_CTYPE", "<carriage-return>");
3035	}
3036	else if (seq->nbytes != `1`)
3037	record_error (`0`, `0`, _("\
3038	%s: character `%s' in charmap not representable with one byte"),
3039	"LC_CTYPE", "<carriage-return>");
3040	else
3041	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3042
3043	/ No need to search. /
3044	ELEM (ctype, class_collection, , L`'\r'`) \|= BITw (tok_space);
3045
3046
3047	seq = charmap_find_value (charmap, "tab", `3`);
3048	if (seq == NULL)
3049	seq = charmap_find_value (charmap, "U00000009", `9`);
3050	if (seq == NULL)
3051	{
3052	record_error (`0`, `0`, _("\
3053	%s: character `%s' not defined while needed as default value"),
3054	"LC_CTYPE", "<tab>");
3055	}
3056	else if (seq->nbytes != `1`)
3057	record_error (`0`, `0`, _("\
3058	%s: character `%s' in charmap not representable with one byte"),
3059	"LC_CTYPE", "<tab>");
3060	else
3061	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3062
3063	/ No need to search. /
3064	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_space);
3065
3066
3067	seq = charmap_find_value (charmap, "vertical-tab", `12`);
3068	if (seq == NULL)
3069	seq = charmap_find_value (charmap, "U0000000B", `9`);
3070	if (seq == NULL)
3071	{
3072	record_error (`0`, `0`, _("\
3073	%s: character `%s' not defined while needed as default value"),
3074	"LC_CTYPE", "<vertical-tab>");
3075	}
3076	else if (seq->nbytes != `1`)
3077	record_error (`0`, `0`, _("\
3078	%s: character `%s' in charmap not representable with one byte"),
3079	"LC_CTYPE", "<vertical-tab>");
3080	else
3081	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3082
3083	/ No need to search. /
3084	ELEM (ctype, class_collection, , L`'\v'`) \|= BITw (tok_space);
3085	}
3086
3087	if ((ctype->class_done & BITw (tok_xdigit)) == `0`)
3088	/ "If this keyword is not specified, the digits `0' to `9', the*
3089	uppercase letters `A' through `F', and the lowercase letters `a'
3090	through `f', ..., shell automatically belong to this class, with
3091	implementation defined character values." [P1003.2, 2.5.2.1] /*
3092	{
3093	set_default (BITPOS (tok_xdigit), `'0'`, `'9'`);
3094	set_default (BITPOS (tok_xdigit), `'A'`, `'F'`);
3095	set_default (BITPOS (tok_xdigit), `'a'`, `'f'`);
3096	}
3097
3098	if ((ctype->class_done & BITw (tok_blank)) == `0`)
3099	/ "If this keyword [blank] is unspecified, the characters <space> and*
3100	<tab> shall belong to this character class." [P1003.2, 2.5.2.1] /*
3101	{
3102	struct charseq *seq;
3103
3104	seq = charmap_find_value (charmap, "space", `5`);
3105	if (seq == NULL)
3106	seq = charmap_find_value (charmap, "SP", `2`);
3107	if (seq == NULL)
3108	seq = charmap_find_value (charmap, "U00000020", `9`);
3109	if (seq == NULL)
3110	{
3111	record_error (`0`, `0`, _("\
3112	%s: character `%s' not defined while needed as default value"),
3113	"LC_CTYPE", "<space>");
3114	}
3115	else if (seq->nbytes != `1`)
3116	record_error (`0`, `0`, _("\
3117	%s: character `%s' in charmap not representable with one byte"),
3118	"LC_CTYPE", "<space>");
3119	else
3120	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3121
3122	/ No need to search. /
3123	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_blank);
3124
3125
3126	seq = charmap_find_value (charmap, "tab", `3`);
3127	if (seq == NULL)
3128	seq = charmap_find_value (charmap, "U00000009", `9`);
3129	if (seq == NULL)
3130	{
3131	record_error (`0`, `0`, _("\
3132	%s: character `%s' not defined while needed as default value"),
3133	"LC_CTYPE", "<tab>");
3134	}
3135	else if (seq->nbytes != `1`)
3136	record_error (`0`, `0`, _("\
3137	%s: character `%s' in charmap not representable with one byte"),
3138	"LC_CTYPE", "<tab>");
3139	else
3140	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3141
3142	/ No need to search. /
3143	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_blank);
3144	}
3145
3146	if ((ctype->class_done & BITw (tok_graph)) == `0`)
3147	/ "If this keyword [graph] is not specified, characters specified for*
3148	the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149	shall belong to this character class." [P1003.2, 2.5.2.1] /*
3150	{
3151	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower)
3152	\| BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit)
3153	\| BIT (tok_punct);
3154	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower)
3155	\| BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit)
3156	\| BITw (tok_punct);
3157
3158	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3159	if ((ctype->class_collection[cnt] & maskw) != `0`)
3160	ctype->class_collection[cnt] \|= BITw (tok_graph);
3161
3162	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3163	if ((ctype->class256_collection[cnt] & mask) != `0`)
3164	ctype->class256_collection[cnt] \|= BIT (tok_graph);
3165	}
3166
3167	if ((ctype->class_done & BITw (tok_print)) == `0`)
3168	/ "If this keyword [print] is not provided, characters specified for*
3169	the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3170	and the <space> character shall belong to this character class."
3171	[P1003.2, 2.5.2.1] /*
3172	{
3173	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower)
3174	\| BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit)
3175	\| BIT (tok_punct);
3176	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower)
3177	\| BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit)
3178	\| BITw (tok_punct);
3179	struct charseq *seq;
3180
3181	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3182	if ((ctype->class_collection[cnt] & maskw) != `0`)
3183	ctype->class_collection[cnt] \|= BITw (tok_print);
3184
3185	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3186	if ((ctype->class256_collection[cnt] & mask) != `0`)
3187	ctype->class256_collection[cnt] \|= BIT (tok_print);
3188
3189
3190	seq = charmap_find_value (charmap, "space", `5`);
3191	if (seq == NULL)
3192	seq = charmap_find_value (charmap, "SP", `2`);
3193	if (seq == NULL)
3194	seq = charmap_find_value (charmap, "U00000020", `9`);
3195	if (seq == NULL)
3196	{
3197	record_error (`0`, `0`, _("\
3198	%s: character `%s' not defined while needed as default value"),
3199	"LC_CTYPE", "<space>");
3200	}
3201	else if (seq->nbytes != `1`)
3202	record_error (`0`, `0`, _("\
3203	%s: character `%s' in charmap not representable with one byte"),
3204	"LC_CTYPE", "<space>");
3205	else
3206	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_print);
3207
3208	/ No need to search. /
3209	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_print);
3210	}
3211
3212	if (ctype->tomap_done[`0`] == `0`)
3213	/ "If this keyword [toupper] is not specified, the lowercase letters*
3214	`a' through `z', and their corresponding uppercase letters `A' to
3215	`Z', ..., shall automatically be included, with implementation-
3216	defined character values." [P1003.2, 2.5.2.1] /*
3217	{
3218	char tmp[`4`];
3219	int ch;
3220
3221	strcpy (tmp, "<?>");
3222
3223	for (ch = `'a'`; ch <= `'z'`; ++ch)
3224	{
3225	struct charseq seq_from, seq_to;
3226
3227	tmp[`1`] = (char) ch;
3228
3229	seq_from = charmap_find_value (charmap, &tmp[`1`], `1`);
3230	if (seq_from == NULL)
3231	{
3232	char buf[`10`];
3233	sprintf (buf, "U%08X", ch);
3234	seq_from = charmap_find_value (charmap, buf, `9`);
3235	}
3236	if (seq_from == NULL)
3237	{
3238	record_error (`0`, `0`, _("\
3239	%s: character `%s' not defined while needed as default value"),
3240	"LC_CTYPE", tmp);
3241	}
3242	else if (seq_from->nbytes != `1`)
3243	{
3244	record_error (`0`, `0`, _("\
3245	%s: character `%s' needed as default value not representable with one byte"),
3246	"LC_CTYPE", tmp);
3247	}
3248	else
3249	{
3250	/ This conversion is implementation defined. /
3251	tmp[`1`] = (char) (ch + (`'A'` - `'a'`));
3252	seq_to = charmap_find_value (charmap, &tmp[`1`], `1`);
3253	if (seq_to == NULL)
3254	{
3255	char buf[`10`];
3256	sprintf (buf, "U%08X", ch + (`'A'` - `'a'`));
3257	seq_to = charmap_find_value (charmap, buf, `9`);
3258	}
3259	if (seq_to == NULL)
3260	{
3261	record_error (`0`, `0`, _("\
3262	%s: character `%s' not defined while needed as default value"),
3263	"LC_CTYPE", tmp);
3264	}
3265	else if (seq_to->nbytes != `1`)
3266	{
3267	record_error (`0`, `0`, _("\
3268	%s: character `%s' needed as default value not representable with one byte"),
3269	"LC_CTYPE", tmp);
3270	}
3271	else
3272	/ The index [0] is determined by the order of the*
3273	`ctype_map_newP' calls in `ctype_startup'. /*
3274	ctype->map256_collection[`0`][seq_from->bytes[`0`]]
3275	= seq_to->bytes[`0`];
3276	}
3277
3278	/ No need to search. /
3279	ELEM (ctype, map_collection, [`0`], ch) = ch + (`'A'` - `'a'`);
3280	}
3281	}
3282
3283	if (ctype->tomap_done[`1`] == `0`)
3284	/ "If this keyword [tolower] is not specified, the mapping shall be*
3285	the reverse mapping of the one specified to `toupper'." [P1003.2] /*
3286	{
3287	for (size_t cnt = `0`; cnt < ctype->map_collection_act[`0`]; ++cnt)
3288	if (ctype->map_collection[`0`][cnt] != `0`)
3289	ELEM (ctype, map_collection, [`1`],
3290	ctype->map_collection[`0`][cnt])
3291	= ctype->charnames[cnt];
3292
3293	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3294	if (ctype->map256_collection[`0`][cnt] != `0`)
3295	ctype->map256_collection[`1`][ctype->map256_collection[`0`][cnt]] = cnt;
3296	}
3297
3298	if (ctype->outdigits_act != `10`)
3299	{
3300	if (ctype->outdigits_act != `0`)
3301	record_error (`0`, `0`, _("\
3302	%s: field `%s' does not contain exactly ten entries"),
3303	"LC_CTYPE", "outdigit");
3304
3305	for (size_t cnt = ctype->outdigits_act; cnt < `10`; ++cnt)
3306	{
3307	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3308	(char *) digits + cnt,
3309	`1`);
3310
3311	if (ctype->mboutdigits[cnt] == NULL)
3312	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3313	longnames[cnt],
3314	strlen (longnames[cnt]));
3315
3316	if (ctype->mboutdigits[cnt] == NULL)
3317	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3318	uninames[cnt], `9`);
3319
3320	if (ctype->mboutdigits[cnt] == NULL)
3321	{
3322	/ Provide a replacement. /
3323	record_error (`0`, `0`, _("\
3324	no output digits defined and none of the standard names in the charmap"));
3325
3326	ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3327	sizeof (struct charseq)
3328	+ `1`);
3329
3330	/ This is better than nothing. /
3331	ctype->mboutdigits[cnt]->bytes[`0`] = digits[cnt];
3332	ctype->mboutdigits[cnt]->nbytes = `1`;
3333	}
3334
3335	ctype->wcoutdigits[cnt] = L`'0'` + cnt;
3336	}
3337
3338	ctype->outdigits_act = `10`;
3339	}
3340
3341	#undef set_default
3342	}
3343
3344
3345	/ Initialize. Assumes t->p and t->q have already been set. /
3346	static inline void
3347	wctype_table_init (struct wctype_table *t)
3348	{
3349	t->level1 = NULL;
3350	t->level1_alloc = t->level1_size = `0`;
3351	t->level2 = NULL;
3352	t->level2_alloc = t->level2_size = `0`;
3353	t->level3 = NULL;
3354	t->level3_alloc = t->level3_size = `0`;
3355	}
3356
3357	/ Add one entry. /
3358	static void
3359	wctype_table_add (struct wctype_table *t, uint32_t wc)
3360	{
3361	uint32_t index1 = wc >> (t->q + t->p + `5`);
3362	uint32_t index2 = (wc >> (t->p + `5`)) & ((`1` << t->q) - `1`);
3363	uint32_t index3 = (wc >> `5`) & ((`1` << t->p) - `1`);
3364	uint32_t index4 = wc & `0x1f`;
3365	size_t i, i1, i2;
3366
3367	if (index1 >= t->level1_size)
3368	{
3369	if (index1 >= t->level1_alloc)
3370	{
3371	size_t alloc = `2` * t->level1_alloc;
3372	if (alloc <= index1)
3373	alloc = index1 + `1`;
3374	t->level1 = (uint32_t ) xrealloc ((char* *) t->level1,
3375	alloc * sizeof (uint32_t));
3376	t->level1_alloc = alloc;
3377	}
3378	while (index1 >= t->level1_size)
3379	t->level1[t->level1_size++] = EMPTY;
3380	}
3381
3382	if (t->level1[index1] == EMPTY)
3383	{
3384	if (t->level2_size == t->level2_alloc)
3385	{
3386	size_t alloc = `2` * t->level2_alloc + `1`;
3387	t->level2 = (uint32_t ) xrealloc ((char* *) t->level2,
3388	(alloc << t->q) * sizeof (uint32_t));
3389	t->level2_alloc = alloc;
3390	}
3391	i1 = t->level2_size << t->q;
3392	i2 = (t->level2_size + `1`) << t->q;
3393	for (i = i1; i < i2; i++)
3394	t->level2[i] = EMPTY;
3395	t->level1[index1] = t->level2_size++;
3396	}
3397
3398	index2 += t->level1[index1] << t->q;
3399
3400	if (t->level2[index2] == EMPTY)
3401	{
3402	if (t->level3_size == t->level3_alloc)
3403	{
3404	size_t alloc = `2` * t->level3_alloc + `1`;
3405	t->level3 = (uint32_t ) xrealloc ((char* *) t->level3,
3406	(alloc << t->p) * sizeof (uint32_t));
3407	t->level3_alloc = alloc;
3408	}
3409	i1 = t->level3_size << t->p;
3410	i2 = (t->level3_size + `1`) << t->p;
3411	for (i = i1; i < i2; i++)
3412	t->level3[i] = `0`;
3413	t->level2[index2] = t->level3_size++;
3414	}
3415
3416	index3 += t->level2[index2] << t->p;
3417
3418	t->level3[index3] \|= (uint32_t)`1` << index4;
3419	}
3420
3421	/ Finalize and shrink. /
3422	static void
3423	add_locale_wctype_table (struct locale_file file, struct* wctype_table *t)
3424	{
3425	size_t i, j, k;
3426	uint32_t reorder3[t->level3_size];
3427	uint32_t reorder2[t->level2_size];
3428	uint32_t level2_offset, level3_offset;
3429
3430	/ Uniquify level3 blocks. /
3431	k = `0`;
3432	for (j = `0`; j < t->level3_size; j++)
3433	{
3434	for (i = `0`; i < k; i++)
3435	if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3436	(`1` << t->p) * sizeof (uint32_t)) == `0`)
3437	break;
3438	/ Relocate block j to block i. /
3439	reorder3[j] = i;
3440	if (i == k)
3441	{
3442	if (i != j)
3443	memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3444	(`1` << t->p) * sizeof (uint32_t));
3445	k++;
3446	}
3447	}
3448	t->level3_size = k;
3449
3450	for (i = `0`; i < (t->level2_size << t->q); i++)
3451	if (t->level2[i] != EMPTY)
3452	t->level2[i] = reorder3[t->level2[i]];
3453
3454	/ Uniquify level2 blocks. /
3455	k = `0`;
3456	for (j = `0`; j < t->level2_size; j++)
3457	{
3458	for (i = `0`; i < k; i++)
3459	if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3460	(`1` << t->q) * sizeof (uint32_t)) == `0`)
3461	break;
3462	/ Relocate block j to block i. /
3463	reorder2[j] = i;
3464	if (i == k)
3465	{
3466	if (i != j)
3467	memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3468	(`1` << t->q) * sizeof (uint32_t));
3469	k++;
3470	}
3471	}
3472	t->level2_size = k;
3473
3474	for (i = `0`; i < t->level1_size; i++)
3475	if (t->level1[i] != EMPTY)
3476	t->level1[i] = reorder2[t->level1[i]];
3477
3478	t->result_size =
3479	`5` * sizeof (uint32_t)
3480	+ t->level1_size * sizeof (uint32_t)
3481	+ (t->level2_size << t->q) * sizeof (uint32_t)
3482	+ (t->level3_size << t->p) * sizeof (uint32_t);
3483
3484	level2_offset =
3485	`5` * sizeof (uint32_t)
3486	+ t->level1_size * sizeof (uint32_t);
3487	level3_offset =
3488	`5` * sizeof (uint32_t)
3489	+ t->level1_size * sizeof (uint32_t)
3490	+ (t->level2_size << t->q) * sizeof (uint32_t);
3491
3492	start_locale_structure (file);
3493	add_locale_uint32 (file, t->q + t->p + `5`);
3494	add_locale_uint32 (file, t->level1_size);
3495	add_locale_uint32 (file, t->p + `5`);
3496	add_locale_uint32 (file, (`1` << t->q) - `1`);
3497	add_locale_uint32 (file, (`1` << t->p) - `1`);
3498
3499	for (i = `0`; i < t->level1_size; i++)
3500	add_locale_uint32
3501	(file,
3502	t->level1[i] == EMPTY
3503	? `0`
3504	: (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3505
3506	for (i = `0`; i < (t->level2_size << t->q); i++)
3507	add_locale_uint32
3508	(file,
3509	t->level2[i] == EMPTY
3510	? `0`
3511	: (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3512
3513	add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3514	end_locale_structure (file);
3515
3516	if (t->level1_alloc > `0`)
3517	free (t->level1);
3518	if (t->level2_alloc > `0`)
3519	free (t->level2);
3520	if (t->level3_alloc > `0`)
3521	free (t->level3);
3522	}
3523
3524	/ Flattens the included transliterations into a translit list.*
3525	Inserts them in the list at `cursor', and returns the new cursor. /*
3526	static struct translit_t **
3527	translit_flatten (struct locale_ctype_t *ctype,
3528	const struct charmap_t *charmap,
3529	struct translit_t **cursor)
3530	{
3531	while (ctype->translit_include != NULL)
3532	{
3533	const char *copy_locale = ctype->translit_include->copy_locale;
3534	const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3535	struct localedef_t *other;
3536
3537	/ Unchain the include statement. During the depth-first traversal*
3538	we don't want to visit any locale more than once. /*
3539	ctype->translit_include = ctype->translit_include->next;
3540
3541	other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3542
3543	if (other == NULL \|\| other->categories[LC_CTYPE].ctype == NULL)
3544	{
3545	record_error (`0`, `0`, _("\
3546	%s: transliteration data from locale `%s' not available"),
3547	"LC_CTYPE", copy_locale);
3548	}
3549	else
3550	{
3551	struct locale_ctype_t *other_ctype =
3552	other->categories[LC_CTYPE].ctype;
3553
3554	cursor = translit_flatten (other_ctype, charmap, cursor);
3555	assert (other_ctype->translit_include == NULL);
3556
3557	if (other_ctype->translit != NULL)
3558	{
3559	/ Insert the other_ctype->translit list at cursor. /*
3560	struct translit_t *endp = other_ctype->translit;
3561	while (endp->next != NULL)
3562	endp = endp->next;
3563
3564	endp->next = *cursor;
3565	*cursor = other_ctype->translit;
3566
3567	/ Avoid any risk of circular lists. /
3568	other_ctype->translit = NULL;
3569
3570	cursor = &endp->next;
3571	}
3572
3573	if (ctype->default_missing == NULL)
3574	ctype->default_missing = other_ctype->default_missing;
3575	}
3576	}
3577
3578	return cursor;
3579	}
3580
3581	static void
3582	allocate_arrays (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
3583	struct repertoire_t *repertoire)
3584	{
3585	size_t idx, nr;
3586	const void *key;
3587	size_t len;
3588	void *vdata;
3589	void *curs;
3590
3591	/ You wonder about this amount of memory? This is only because some*
3592	users do not manage to address the array with unsigned values or
3593	data types with range >= 256. '\200' would result in the array
3594	index -128. To help these poor people we duplicate the entries for
3595	128 up to 255 below the entry for \0. /*
3596	ctype->ctype_b = (char_class_t ) xcalloc (`256` + `128`, sizeof* (char_class_t));
3597	ctype->ctype32_b = (char_class32_t ) xcalloc (`256`, sizeof* (char_class32_t));
3598	ctype->class_b = (uint32_t **)
3599	xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3600	ctype->class_3level = (struct wctype_table *)
3601	xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3602
3603	/ This is the array accessed using the multibyte string elements. /
3604	for (idx = `0`; idx < `256`; ++idx)
3605	ctype->ctype_b[`128` + idx] = ctype->class256_collection[idx];
3606
3607	/ Mirror first 127 entries. We must take care that entry -1 is not*
3608	mirrored because EOF == -1. /*
3609	for (idx = `0`; idx < `127`; ++idx)
3610	ctype->ctype_b[idx] = ctype->ctype_b[`256` + idx];
3611
3612	/ The 32 bit array contains all characters < 0x100. /
3613	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3614	if (ctype->charnames[idx] < `0x100`)
3615	ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3616
3617	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3618	{
3619	ctype->class_b[nr] = (uint32_t ) xcalloc (`256` / `32`, sizeof* (uint32_t));
3620
3621	/ We only set CLASS_B for the bits in the ISO C classes, not*
3622	the user defined classes. The number should not change but
3623	who knows. /*
3624	#define LAST_ISO_C_BIT 11
3625	if (nr <= LAST_ISO_C_BIT)
3626	for (idx = `0`; idx < `256`; ++idx)
3627	if (ctype->class256_collection[idx] & _ISbit (nr))
3628	ctype->class_b[nr][idx >> `5`] \|= (uint32_t) `1` << (idx & `0x1f`);
3629	}
3630
3631	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3632	{
3633	struct wctype_table *t;
3634
3635	t = &ctype->class_3level[nr];
3636	t->p = `4`; / or: 5 /
3637	t->q = `7`; / or: 6 /
3638	wctype_table_init (t);
3639
3640	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3641	if (ctype->class_collection[idx] & _ISwbit (nr))
3642	wctype_table_add (t, ctype->charnames[idx]);
3643
3644	record_verbose (stderr, _("\
3645	%s: table for class \"%s\": %lu bytes"),
3646	"LC_CTYPE", ctype->classnames[nr],
3647	(unsigned long int) t->result_size);
3648	}
3649
3650	/ Room for table of mappings. /
3651	ctype->map_b = (uint32_t *) xmalloc (`2` sizeof (uint32_t *));
3652	ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3653	* sizeof (uint32_t *));
3654	ctype->map_3level = (struct wctrans_table *)
3655	xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3656
3657	/ Fill in all mappings. /
3658	for (idx = `0`; idx < `2`; ++idx)
3659	{
3660	unsigned int idx2;
3661
3662	/ Allocate table. /
3663	ctype->map_b[idx] = (uint32_t *)
3664	xmalloc ((`256` + `128`) * sizeof (uint32_t));
3665
3666	/ Copy values from collection. /
3667	for (idx2 = `0`; idx2 < `256`; ++idx2)
3668	ctype->map_b[idx][`128` + idx2] = ctype->map256_collection[idx][idx2];
3669
3670	/ Mirror first 127 entries. We must take care not to map entry*
3671	-1 because EOF == -1. /*
3672	for (idx2 = `0`; idx2 < `127`; ++idx2)
3673	ctype->map_b[idx][idx2] = ctype->map_b[idx][`256` + idx2];
3674
3675	/ EOF must map to EOF. /
3676	ctype->map_b[idx][`127`] = EOF;
3677	}
3678
3679	for (idx = `0`; idx < ctype->map_collection_nr; ++idx)
3680	{
3681	unsigned int idx2;
3682
3683	/ Allocate table. /
3684	ctype->map32_b[idx] = (uint32_t ) xmalloc (`256` sizeof (uint32_t));
3685
3686	/ Copy values from collection. Default is identity mapping. /
3687	for (idx2 = `0`; idx2 < `256`; ++idx2)
3688	ctype->map32_b[idx][idx2] =
3689	(ctype->map_collection[idx][idx2] != `0`
3690	? ctype->map_collection[idx][idx2]
3691	: idx2);
3692	}
3693
3694	for (nr = `0`; nr < ctype->map_collection_nr; nr++)
3695	{
3696	struct wctrans_table *t;
3697
3698	t = &ctype->map_3level[nr];
3699	t->p = `7`;
3700	t->q = `9`;
3701	wctrans_table_init (t);
3702
3703	for (idx = `0`; idx < ctype->map_collection_act[nr]; ++idx)
3704	if (ctype->map_collection[nr][idx] != `0`)
3705	wctrans_table_add (t, ctype->charnames[idx],
3706	ctype->map_collection[nr][idx]);
3707
3708	record_verbose (stderr, _("\
3709	%s: table for map \"%s\": %lu bytes"),
3710	"LC_CTYPE", ctype->mapnames[nr],
3711	(unsigned long int) t->result_size);
3712	}
3713
3714	/ Extra array for class and map names. /
3715	ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3716	* sizeof (uint32_t));
3717	ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3718	* sizeof (uint32_t));
3719
3720	ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3721	ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3722
3723	/ Array for width information. Because the expected widths are very*
3724	small (never larger than 2) we use only one single byte. This
3725	saves space.
3726	We put only printable characters in the table. wcwidth is specified
3727	to return -1 for non-printable characters. Doing the check here
3728	saves a run-time check.
3729	But we put L'\0' in the table. This again saves a run-time check. /*
3730	{
3731	struct wcwidth_table *t;
3732
3733	t = &ctype->width;
3734	t->p = `7`;
3735	t->q = `9`;
3736	wcwidth_table_init (t);
3737
3738	/ First set all the printable characters of the character set to*
3739	the default width. /*
3740	curs = NULL;
3741	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
3742	{
3743	struct charseq data = (struct* charseq *) vdata;
3744
3745	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3746	data->ucs4 = repertoire_find_value (ctype->repertoire,
3747	data->name, len);
3748
3749	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3750	{
3751	uint32_t *class_bits =
3752	find_idx (ctype, &ctype->class_collection, NULL,
3753	&ctype->class_collection_act, data->ucs4);
3754
3755	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3756	wcwidth_table_add (t, data->ucs4, charmap->width_default);
3757	}
3758	}
3759
3760	/ Now add the explicitly specified widths. /
3761	if (charmap->width_rules != NULL)
3762	for (size_t cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
3763	{
3764	unsigned char bytes[charmap->mb_cur_max];
3765	int nbytes = charmap->width_rules[cnt].from->nbytes;
3766
3767	/ We have the range of character for which the width is*
3768	specified described using byte sequences of the multibyte
3769	charset. We have to convert this to UCS4 now. And we
3770	cannot simply convert the beginning and the end of the
3771	sequence, we have to iterate over the byte sequence and
3772	convert it for every single character. /*
3773	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3774
3775	while (nbytes < charmap->width_rules[cnt].to->nbytes
3776	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3777	nbytes) <= `0`)
3778	{
3779	/ Find the UCS value for `bytes'. /
3780	int inner;
3781	uint32_t wch;
3782	struct charseq *seq =
3783	charmap_find_symbol (charmap, (char *) bytes, nbytes);
3784
3785	if (seq == NULL)
3786	wch = ILLEGAL_CHAR_VALUE;
3787	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3788	wch = seq->ucs4;
3789	else
3790	wch = repertoire_find_value (ctype->repertoire, seq->name,
3791	strlen (seq->name));
3792
3793	if (wch != ILLEGAL_CHAR_VALUE)
3794	{
3795	/ Store the value. /
3796	uint32_t *class_bits =
3797	find_idx (ctype, &ctype->class_collection, NULL,
3798	&ctype->class_collection_act, wch);
3799
3800	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3801	wcwidth_table_add (t, wch,
3802	charmap->width_rules[cnt].width);
3803	}
3804
3805	/ "Increment" the bytes sequence. /
3806	inner = nbytes - `1`;
3807	while (inner >= `0` && bytes[inner] == `0xff`)
3808	--inner;
3809
3810	if (inner < `0`)
3811	{
3812	/ We have to extend the byte sequence. /
3813	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3814	break;
3815
3816	bytes[`0`] = `1`;
3817	memset (&bytes[`1`], `0`, nbytes);
3818	++nbytes;
3819	}
3820	else
3821	{
3822	++bytes[inner];
3823	while (++inner < nbytes)
3824	bytes[inner] = `0`;
3825	}
3826	}
3827	}
3828
3829	/ Set the width of L'\0' to 0. /
3830	wcwidth_table_add (t, `0`, `0`);
3831
3832	record_verbose (stderr, _("%s: table for width: %lu bytes"),
3833	"LC_CTYPE", (unsigned long int) t->result_size);
3834	}
3835
3836	/ Set MB_CUR_MAX. /
3837	ctype->mb_cur_max = charmap->mb_cur_max;
3838
3839	/ Now determine the table for the transliteration information.*
3840
3841	XXX It is not yet clear to me whether it is worth implementing a
3842	complicated algorithm which uses a hash table to locate the entries.
3843	For now I'll use a simple array which can be searching using binary
3844	search. /*
3845	if (ctype->translit_include != NULL)
3846	/ Traverse the locales mentioned in the `include' statements in a*
3847	depth-first way and fold in their transliteration information. /*
3848	translit_flatten (ctype, charmap, &ctype->translit);
3849
3850	if (ctype->translit != NULL)
3851	{
3852	/ First count how many entries we have. This is the upper limit*
3853	since some entries from the included files might be overwritten. /*
3854	size_t number = `0`;
3855	struct translit_t *runp = ctype->translit;
3856	struct translit_t **sorted;
3857	size_t from_len, to_len;
3858
3859	while (runp != NULL)
3860	{
3861	++number;
3862	runp = runp->next;
3863	}
3864
3865	/ Next we allocate an array large enough and fill in the values. /
3866	sorted = (struct translit_t **) alloca (number
3867	* sizeof (struct translit_t **));
3868	runp = ctype->translit;
3869	number = `0`;
3870	do
3871	{
3872	/ Search for the place where to insert this string.*
3873	XXX Better use a real sorting algorithm later. /*
3874	size_t idx = `0`;
3875	int replace = `0`;
3876
3877	while (idx < number)
3878	{
3879	int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3880	(const wchar_t *) runp->from);
3881	if (res == `0`)
3882	{
3883	replace = `1`;
3884	break;
3885	}
3886	if (res > `0`)
3887	break;
3888	++idx;
3889	}
3890
3891	if (replace)
3892	sorted[idx] = runp;
3893	else
3894	{
3895	memmove (&sorted[idx + `1`], &sorted[idx],
3896	(number - idx) * sizeof (struct translit_t *));
3897	sorted[idx] = runp;
3898	++number;
3899	}
3900
3901	runp = runp->next;
3902	}
3903	while (runp != NULL);
3904
3905	/ The next step is putting all the possible transliteration*
3906	strings in one memory block so that we can write it out.
3907	We need several different blocks:
3908	- index to the from-string array
3909	- from-string array
3910	- index to the to-string array
3911	- to-string array.
3912	*/
3913	from_len = to_len = `0`;
3914	for (size_t cnt = `0`; cnt < number; ++cnt)
3915	{
3916	struct translit_to_t *srunp;
3917	from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3918	srunp = sorted[cnt]->to;
3919	while (srunp != NULL)
3920	{
3921	to_len += wcslen ((const wchar_t *) srunp->str) + `1`;
3922	srunp = srunp->next;
3923	}
3924	/ Plus one for the extra NUL character marking the end of*
3925	the list for the current entry. /*
3926	++to_len;
3927	}
3928
3929	/ We can allocate the arrays for the results. /
3930	ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3931	ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3932	ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3933	ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3934
3935	from_len = `0`;
3936	to_len = `0`;
3937	for (size_t cnt = `0`; cnt < number; ++cnt)
3938	{
3939	size_t len;
3940	struct translit_to_t *srunp;
3941
3942	ctype->translit_from_idx[cnt] = from_len;
3943	ctype->translit_to_idx[cnt] = to_len;
3944
3945	len = wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3946	wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3947	(const wchar_t *) sorted[cnt]->from, len);
3948	from_len += len;
3949
3950	ctype->translit_to_idx[cnt] = to_len;
3951	srunp = sorted[cnt]->to;
3952	while (srunp != NULL)
3953	{
3954	len = wcslen ((const wchar_t *) srunp->str) + `1`;
3955	wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3956	(const wchar_t *) srunp->str, len);
3957	to_len += len;
3958	srunp = srunp->next;
3959	}
3960	ctype->translit_to_tbl[to_len++] = L`'\0'`;
3961	}
3962
3963	/ Store the information about the length. /
3964	ctype->translit_idx_size = number;
3965	ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3966	ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3967	}
3968	else
3969	{
3970	ctype->translit_from_idx = no_str;
3971	ctype->translit_from_tbl = no_str;
3972	ctype->translit_to_tbl = no_str;
3973	ctype->translit_idx_size = `0`;
3974	ctype->translit_from_tbl_size = `0`;
3975	ctype->translit_to_tbl_size = `0`;
3976	}
3977	}
3978

Browse the source code of glibc/locale/programs/ld-ctype.c