ld-ctype.c source code [glibc/locale/programs/ld-ctype.c]

1	/ Copyright (C) 1995-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <alloca.h>
23	#include <byteswap.h>
24	#include <endian.h>
25	#include <errno.h>
26	#include <limits.h>
27	#include <obstack.h>
28	#include <stdlib.h>
29	#include <string.h>
30	#include <wchar.h>
31	#include <wctype.h>
32	#include <stdint.h>
33	#include <sys/uio.h>
34
35	#include "localedef.h"
36	#include "charmap.h"
37	#include "localeinfo.h"
38	#include "langinfo.h"
39	#include "linereader.h"
40	#include "locfile-token.h"
41	#include "locfile.h"
42
43	#include <assert.h>
44
45
46	/ The bit used for representing a special class. /
47	#define BITPOS(class) ((class) - tok_upper)
48	#define BIT(class) (_ISbit (BITPOS (class)))
49	#define BITw(class) (_ISwbit (BITPOS (class)))
50
51	#define ELEM(ctype, collection, idx, value) \
52	*find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53	&ctype->collection##_act idx, value)
54
55
56	/ To be compatible with former implementations we for now restrict*
57	the number of bits for character classes to 16. When compatibility
58	is not necessary anymore increase the number to 32. /*
59	#define char_class_t uint16_t
60	#define char_class32_t uint32_t
61
62
63	/ Type to describe a transliteration action. We have a possibly*
64	multiple character from-string and a set of multiple character
65	to-strings. All are 32bit values since this is what is used in
66	the gconv functions. /*
67	struct translit_to_t
68	{
69	uint32_t *str;
70
71	struct translit_to_t *next;
72	};
73
74	struct translit_t
75	{
76	uint32_t *from;
77
78	const char *fname;
79	size_t lineno;
80
81	struct translit_to_t *to;
82
83	struct translit_t *next;
84	};
85
86	struct translit_ignore_t
87	{
88	uint32_t from;
89	uint32_t to;
90	uint32_t step;
91
92	const char *fname;
93	size_t lineno;
94
95	struct translit_ignore_t *next;
96	};
97
98
99	/ Type to describe a transliteration include statement. /
100	struct translit_include_t
101	{
102	const char *copy_locale;
103	const char *copy_repertoire;
104
105	struct translit_include_t *next;
106	};
107
108	/ Provide some dummy pointer for empty string. /
109	static uint32_t no_str[] = { `0` };
110
111
112	/ Sparse table of uint32_t. /
113	#define TABLE idx_table
114	#define ELEMENT uint32_t
115	#define DEFAULT ((uint32_t) ~0)
116	#define NO_ADD_LOCALE
117	#include "3level.h"
118
119	#define TABLE wcwidth_table
120	#define ELEMENT uint8_t
121	#define DEFAULT 0xff
122	#include "3level.h"
123
124	#define TABLE wctrans_table
125	#define ELEMENT int32_t
126	#define DEFAULT 0
127	#define wctrans_table_add wctrans_table_add_internal
128	#include "3level.h"
129	#undef wctrans_table_add
130	/ The wctrans_table must actually store the difference between the*
131	desired result and the argument. /*
132	static inline void
133	wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134	{
135	wctrans_table_add_internal (t, wc, mapped_wc - wc);
136	}
137
138	/ Construction of sparse 3-level tables.*
139	See wchar-lookup.h for their structure and the meaning of p and q. /*
140
141	struct wctype_table
142	{
143	/ Parameters. /
144	unsigned int p;
145	unsigned int q;
146	/ Working representation. /
147	size_t level1_alloc;
148	size_t level1_size;
149	uint32_t *level1;
150	size_t level2_alloc;
151	size_t level2_size;
152	uint32_t *level2;
153	size_t level3_alloc;
154	size_t level3_size;
155	uint32_t *level3;
156	size_t result_size;
157	};
158
159	static void add_locale_wctype_table (struct locale_file *file,
160	struct wctype_table *t);
161
162	/ The real definition of the struct for the LC_CTYPE locale. /
163	struct locale_ctype_t
164	{
165	uint32_t *charnames;
166	size_t charnames_max;
167	size_t charnames_act;
168	/ An index lookup table, to speedup find_idx. /
169	struct idx_table charnames_idx;
170
171	struct repertoire_t *repertoire;
172
173	/ We will allow up to 8 * sizeof (uint32_t) character classes. /
174	#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
175	size_t nr_charclass;
176	const char *classnames[MAX_NR_CHARCLASS];
177	uint32_t last_class_char;
178	uint32_t class256_collection[`256`];
179	uint32_t *class_collection;
180	size_t class_collection_max;
181	size_t class_collection_act;
182	uint32_t class_done;
183	uint32_t class_offset;
184
185	struct charseq **mbdigits;
186	size_t mbdigits_act;
187	size_t mbdigits_max;
188	uint32_t *wcdigits;
189	size_t wcdigits_act;
190	size_t wcdigits_max;
191
192	struct charseq *mboutdigits[`10`];
193	uint32_t wcoutdigits[`10`];
194	size_t outdigits_act;
195
196	/ If the following number ever turns out to be too small simply*
197	increase it. But I doubt it will. --drepper@gnu /*
198	#define MAX_NR_CHARMAP 16
199	const char *mapnames[MAX_NR_CHARMAP];
200	uint32_t *map_collection[MAX_NR_CHARMAP];
201	uint32_t map256_collection[`2`][`256`];
202	size_t map_collection_max[MAX_NR_CHARMAP];
203	size_t map_collection_act[MAX_NR_CHARMAP];
204	size_t map_collection_nr;
205	size_t last_map_idx;
206	int tomap_done[MAX_NR_CHARMAP];
207	uint32_t map_offset;
208
209	/ Transliteration information. /
210	struct translit_include_t *translit_include;
211	struct translit_t *translit;
212	struct translit_ignore_t *translit_ignore;
213	uint32_t ntranslit_ignore;
214
215	uint32_t *default_missing;
216	const char *default_missing_file;
217	size_t default_missing_lineno;
218
219	uint32_t to_nonascii;
220	uint32_t nonascii_case;
221
222	/ The arrays for the binary representation. /
223	char_class_t *ctype_b;
224	char_class32_t *ctype32_b;
225	uint32_t **map_b;
226	uint32_t **map32_b;
227	uint32_t **class_b;
228	struct wctype_table *class_3level;
229	struct wctrans_table *map_3level;
230	uint32_t *class_name_ptr;
231	uint32_t *map_name_ptr;
232	struct wcwidth_table width;
233	uint32_t mb_cur_max;
234	const char *codeset_name;
235	uint32_t *translit_from_idx;
236	uint32_t *translit_from_tbl;
237	uint32_t *translit_to_idx;
238	uint32_t *translit_to_tbl;
239	uint32_t translit_idx_size;
240	size_t translit_from_tbl_size;
241	size_t translit_to_tbl_size;
242
243	struct obstack mempool;
244	};
245
246
247	/ Marker for an empty slot. This has the value 0xFFFFFFFF, regardless*
248	whether 'int' is 16 bit, 32 bit, or 64 bit. /*
249	#define EMPTY ((uint32_t) ~0)
250
251
252	#define obstack_chunk_alloc xmalloc
253	#define obstack_chunk_free free
254
255
256	/ Prototypes for local functions. /
257	static void ctype_startup (struct linereader lr, struct* localedef_t *locale,
258	const struct charmap_t *charmap,
259	struct localedef_t *copy_locale,
260	int ignore_content);
261	static void ctype_class_new (struct linereader *lr,
262	struct locale_ctype_t ctype, const* char *name);
263	static void ctype_map_new (struct linereader *lr,
264	struct locale_ctype_t *ctype,
265	const char name, const* struct charmap_t *charmap);
266	static uint32_t find_idx (struct* locale_ctype_t ctype, uint32_t *table,
267	size_t max, size_t act, uint32_t idx);
268	static void set_class_defaults (struct locale_ctype_t *ctype,
269	const struct charmap_t *charmap,
270	struct repertoire_t *repertoire);
271	static void allocate_arrays (struct locale_ctype_t *ctype,
272	const struct charmap_t *charmap,
273	struct repertoire_t *repertoire);
274
275
276	static const char *longnames[] =
277	{
278	"zero", "one", "two", "three", "four",
279	"five", "six", "seven", "eight", "nine"
280	};
281	static const char *uninames[] =
282	{
283	"U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284	"U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285	};
286	static const unsigned char digits[] = "0123456789";
287
288
289	static void
290	ctype_startup (struct linereader lr, struct* localedef_t *locale,
291	const struct charmap_t *charmap,
292	struct localedef_t copy_locale, int* ignore_content)
293	{
294	unsigned int cnt;
295	struct locale_ctype_t *ctype;
296
297	if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
298	{
299	if (copy_locale == NULL)
300	{
301	/ Allocate the needed room. /
302	locale->categories[LC_CTYPE].ctype = ctype =
303	(struct locale_ctype_t *) xcalloc (`1`,
304	sizeof (struct locale_ctype_t));
305
306	/ We have seen no names yet. /
307	ctype->charnames_max = charmap->mb_cur_max == `1` ? `256` : `512`;
308	ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309	* sizeof (uint32_t));
310	for (cnt = `0`; cnt < `256`; ++cnt)
311	ctype->charnames[cnt] = cnt;
312	ctype->charnames_act = `256`;
313	idx_table_init (&ctype->charnames_idx);
314
315	/ Fill character class information. /
316	ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317	/ The order of the following instructions determines the bit*
318	positions! /*
319	ctype_class_new (lr, ctype, "upper");
320	ctype_class_new (lr, ctype, "lower");
321	ctype_class_new (lr, ctype, "alpha");
322	ctype_class_new (lr, ctype, "digit");
323	ctype_class_new (lr, ctype, "xdigit");
324	ctype_class_new (lr, ctype, "space");
325	ctype_class_new (lr, ctype, "print");
326	ctype_class_new (lr, ctype, "graph");
327	ctype_class_new (lr, ctype, "blank");
328	ctype_class_new (lr, ctype, "cntrl");
329	ctype_class_new (lr, ctype, "punct");
330	ctype_class_new (lr, ctype, "alnum");
331
332	ctype->class_collection_max = charmap->mb_cur_max == `1` ? `256` : `512`;
333	ctype->class_collection
334	= (uint32_t ) xcalloc (sizeof* (unsigned long int),
335	ctype->class_collection_max);
336	ctype->class_collection_act = `256`;
337
338	/ Fill character map information. /
339	ctype->last_map_idx = MAX_NR_CHARMAP;
340	ctype_map_new (lr, ctype, "toupper", charmap);
341	ctype_map_new (lr, ctype, "tolower", charmap);
342
343	/ Fill first 256 entries in `toXXX' arrays. /
344	for (cnt = `0`; cnt < `256`; ++cnt)
345	{
346	ctype->map_collection[`0`][cnt] = cnt;
347	ctype->map_collection[`1`][cnt] = cnt;
348
349	ctype->map256_collection[`0`][cnt] = cnt;
350	ctype->map256_collection[`1`][cnt] = cnt;
351	}
352
353	if (enc_not_ascii_compatible)
354	ctype->to_nonascii = `1`;
355
356	obstack_init (&ctype->mempool);
357	}
358	else
359	ctype = locale->categories[LC_CTYPE].ctype =
360	copy_locale->categories[LC_CTYPE].ctype;
361	}
362	}
363
364
365	void
366	ctype_finish (struct localedef_t locale, const* struct charmap_t *charmap)
367	{
368	/ See POSIX.2, table 2-6 for the meaning of the following table. /
369	#define NCLASS 12
370	static const struct
371	{
372	const char *name;
373	const char allow[NCLASS];
374	}
375	valid_table[NCLASS] =
376	{
377	/ The order is important. See token.h for more information.*
378	M = Always, D = Default, - = Permitted, X = Mutually exclusive /*
379	{ "upper", "--MX-XDDXXX-" },
380	{ "lower", "--MX-XDDXXX-" },
381	{ "alpha", "---X-XDDXXX-" },
382	{ "digit", "XXX--XDDXXX-" },
383	{ "xdigit", "-----XDDXXX-" },
384	{ "space", "XXXXX------X" },
385	{ "print", "---------X--" },
386	{ "graph", "---------X--" },
387	{ "blank", "XXXXXM-----X" },
388	{ "cntrl", "XXXXX-XX--XX" },
389	{ "punct", "XXXXX-DD-X-X" },
390	{ "alnum", "-----XDDXXX-" }
391	};
392	size_t cnt;
393	int cls1, cls2;
394	uint32_t space_value;
395	struct charseq *space_seq;
396	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
397	int warned;
398	const void *key;
399	size_t len;
400	void *vdata;
401	void *curs;
402
403	/ Now resolve copying and also handle completely missing definitions. /
404	if (ctype == NULL)
405	{
406	const char *repertoire_name;
407
408	/ First see whether we were supposed to copy. If yes, find the*
409	actual definition. /*
410	if (locale->copy_name[LC_CTYPE] != NULL)
411	{
412	/ Find the copying locale. This has to happen transitively since*
413	the locale we are copying from might also copying another one. /*
414	struct localedef_t *from = locale;
415
416	do
417	from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418	from->repertoire_name, charmap);
419	while (from->categories[LC_CTYPE].ctype == NULL
420	&& from->copy_name[LC_CTYPE] != NULL);
421
422	ctype = locale->categories[LC_CTYPE].ctype
423	= from->categories[LC_CTYPE].ctype;
424	}
425
426	/ If there is still no definition issue an warning and create an*
427	empty one. /*
428	if (ctype == NULL)
429	{
430	record_warning (_("\
431	No definition for %s category found"), "LC_CTYPE");
432	ctype_startup (NULL, locale, charmap, NULL, `0`);
433	ctype = locale->categories[LC_CTYPE].ctype;
434	}
435
436	/ Get the repertoire we have to use. /
437	repertoire_name = locale->repertoire_name ?: repertoire_global;
438	if (repertoire_name != NULL)
439	ctype->repertoire = repertoire_read (repertoire_name);
440	}
441
442	/ We need the name of the currently used 8-bit character set to*
443	make correct conversion between this 8-bit representation and the
444	ISO 10646 character set used internally for wide characters. /*
445	ctype->codeset_name = charmap->code_set_name;
446	if (ctype->codeset_name == NULL)
447	{
448	record_error (`0`, `0`, _("\
449	No character set name specified in charmap"));
450	ctype->codeset_name = "//UNKNOWN//";
451	}
452
453	/ Set default value for classes not specified. /
454	set_class_defaults (ctype, charmap, ctype->repertoire);
455
456	/ Check according to table. /
457	for (cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
458	{
459	uint32_t tmp = ctype->class_collection[cnt];
460
461	if (tmp != `0`)
462	{
463	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
464	if ((tmp & _ISwbit (cls1)) != `0`)
465	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
466	if (valid_table[cls1].allow[cls2] != `'-'`)
467	{
468	int eq = (tmp & _ISwbit (cls2)) != `0`;
469	switch (valid_table[cls1].allow[cls2])
470	{
471	case `'M'`:
472	if (!eq)
473	{
474	uint32_t value = ctype->charnames[cnt];
475
476	record_error (`0`, `0`, _("\
477	character L'\\u%0*x' in class `%s' must be in class `%s'"),
478	value > `0xffff` ? `8` : `4`,
479	value,
480	valid_table[cls1].name,
481	valid_table[cls2].name);
482	}
483	break;
484
485	case `'X'`:
486	if (eq)
487	{
488	uint32_t value = ctype->charnames[cnt];
489
490	record_error (`0`, `0`, _("\
491	character L'\\u%0*x' in class `%s' must not be in class `%s'"),
492	value > `0xffff` ? `8` : `4`,
493	value,
494	valid_table[cls1].name,
495	valid_table[cls2].name);
496	}
497	break;
498
499	case `'D'`:
500	ctype->class_collection[cnt] \|= _ISwbit (cls2);
501	break;
502
503	default:
504	record_error (`5`, `0`, _("\
505	internal error in %s, line %u"), __FUNCTION__, __LINE__);
506	}
507	}
508	}
509	}
510
511	for (cnt = `0`; cnt < `256`; ++cnt)
512	{
513	uint32_t tmp = ctype->class256_collection[cnt];
514
515	if (tmp != `0`)
516	{
517	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
518	if ((tmp & _ISbit (cls1)) != `0`)
519	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
520	if (valid_table[cls1].allow[cls2] != `'-'`)
521	{
522	int eq = (tmp & _ISbit (cls2)) != `0`;
523	switch (valid_table[cls1].allow[cls2])
524	{
525	case `'M'`:
526	if (!eq)
527	{
528	char buf[`17`];
529
530	snprintf (buf, sizeof buf, "\\%Zo", cnt);
531
532	record_error (`0`, `0`, _("\
533	character '%s' in class `%s' must be in class `%s'"),
534	buf,
535	valid_table[cls1].name,
536	valid_table[cls2].name);
537	}
538	break;
539
540	case `'X'`:
541	if (eq)
542	{
543	char buf[`17`];
544
545	snprintf (buf, sizeof buf, "\\%Zo", cnt);
546
547	record_error (`0`, `0`, _("\
548	character '%s' in class `%s' must not be in class `%s'"),
549	buf,
550	valid_table[cls1].name,
551	valid_table[cls2].name);
552	}
553	break;
554
555	case `'D'`:
556	ctype->class256_collection[cnt] \|= _ISbit (cls2);
557	break;
558
559	default:
560	record_error (`5`, `0`, _("\
561	internal error in %s, line %u"), __FUNCTION__, __LINE__);
562	}
563	}
564	}
565	}
566
567	/ ... and now test <SP> as a special case. /
568	space_value = `32`;
569	if (((cnt = BITPOS (tok_space),
570	(ELEM (ctype, class_collection, , space_value)
571	& BITw (tok_space)) == `0`)
572	\|\| (cnt = BITPOS (tok_blank),
573	(ELEM (ctype, class_collection, , space_value)
574	& BITw (tok_blank)) == `0`)))
575	{
576	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
577	valid_table[cnt].name);
578	}
579	else if (((cnt = BITPOS (tok_punct),
580	(ELEM (ctype, class_collection, , space_value)
581	& BITw (tok_punct)) != `0`)
582	\|\| (cnt = BITPOS (tok_graph),
583	(ELEM (ctype, class_collection, , space_value)
584	& BITw (tok_graph))
585	!= `0`)))
586	{
587	record_error (`0`, `0`, _("\
588	<SP> character must not be in class `%s'"),
589	valid_table[cnt].name);
590	}
591	else
592	ELEM (ctype, class_collection, , space_value) \|= BITw (tok_print);
593
594	space_seq = charmap_find_value (charmap, "SP", `2`);
595	if (space_seq == NULL)
596	space_seq = charmap_find_value (charmap, "space", `5`);
597	if (space_seq == NULL)
598	space_seq = charmap_find_value (charmap, "U00000020", `9`);
599	if (space_seq == NULL \|\| space_seq->nbytes != `1`)
600	{
601	record_error (`0`, `0`, _("\
602	character <SP> not defined in character map"));
603	}
604	else if (((cnt = BITPOS (tok_space),
605	(ctype->class256_collection[space_seq->bytes[`0`]]
606	& BIT (tok_space)) == `0`)
607	\|\| (cnt = BITPOS (tok_blank),
608	(ctype->class256_collection[space_seq->bytes[`0`]]
609	& BIT (tok_blank)) == `0`)))
610	{
611	record_error (`0`, `0`, _("<SP> character not in class `%s'"),
612	valid_table[cnt].name);
613	}
614	else if (((cnt = BITPOS (tok_punct),
615	(ctype->class256_collection[space_seq->bytes[`0`]]
616	& BIT (tok_punct)) != `0`)
617	\|\| (cnt = BITPOS (tok_graph),
618	(ctype->class256_collection[space_seq->bytes[`0`]]
619	& BIT (tok_graph)) != `0`)))
620	{
621	record_error (`0`, `0`, _("\
622	<SP> character must not be in class `%s'"),
623	valid_table[cnt].name);
624	}
625	else
626	ctype->class256_collection[space_seq->bytes[`0`]] \|= BIT (tok_print);
627
628	/ Check whether all single-byte characters make to their upper/lowercase*
629	equivalent according to the ASCII rules. /*
630	for (cnt = `'A'`; cnt <= `'Z'`; ++cnt)
631	{
632	uint32_t uppval = ctype->map256_collection[`0`][cnt];
633	uint32_t lowval = ctype->map256_collection[`1`][cnt];
634	uint32_t lowuppval = ctype->map256_collection[`0`][lowval];
635	uint32_t lowlowval = ctype->map256_collection[`1`][lowval];
636
637	if (uppval != cnt
638	\|\| lowval != cnt + `0x20`
639	\|\| lowuppval != cnt
640	\|\| lowlowval != cnt + `0x20`)
641	ctype->nonascii_case = `1`;
642	}
643	for (cnt = `0`; cnt < `256`; ++cnt)
644	if (cnt < `'A'` \|\| (cnt > `'Z'` && cnt < `'a'`) \|\| cnt > `'z'`)
645	if (ctype->map256_collection[`0`][cnt] != cnt
646	\|\| ctype->map256_collection[`1`][cnt] != cnt)
647	ctype->nonascii_case = `1`;
648
649	/ Now that the tests are done make sure the name array contains all*
650	characters which are handled in the WIDTH section of the
651	character set definition file. /*
652	if (charmap->width_rules != NULL)
653	for (cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
654	{
655	unsigned char bytes[charmap->mb_cur_max];
656	int nbytes = charmap->width_rules[cnt].from->nbytes;
657
658	/ We have the range of character for which the width is*
659	specified described using byte sequences of the multibyte
660	charset. We have to convert this to UCS4 now. And we
661	cannot simply convert the beginning and the end of the
662	sequence, we have to iterate over the byte sequence and
663	convert it for every single character. /*
664	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
665
666	while (nbytes < charmap->width_rules[cnt].to->nbytes
667	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
668	nbytes) <= `0`)
669	{
670	/ Find the UCS value for `bytes'. /
671	int inner;
672	uint32_t wch;
673	struct charseq *seq
674	= charmap_find_symbol (charmap, (char *) bytes, nbytes);
675
676	if (seq == NULL)
677	wch = ILLEGAL_CHAR_VALUE;
678	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
679	wch = seq->ucs4;
680	else
681	wch = repertoire_find_value (ctype->repertoire, seq->name,
682	strlen (seq->name));
683
684	if (wch != ILLEGAL_CHAR_VALUE)
685	/ We are only interested in the side-effects of the*
686	`find_idx' call. It will add appropriate entries in
687	the name array if this is necessary. /*
688	(void) find_idx (ctype, NULL, NULL, NULL, wch);
689
690	/ "Increment" the bytes sequence. /
691	inner = nbytes - `1`;
692	while (inner >= `0` && bytes[inner] == `0xff`)
693	--inner;
694
695	if (inner < `0`)
696	{
697	/ We have to extend the byte sequence. /
698	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
699	break;
700
701	bytes[`0`] = `1`;
702	memset (&bytes[`1`], `0`, nbytes);
703	++nbytes;
704	}
705	else
706	{
707	++bytes[inner];
708	while (++inner < nbytes)
709	bytes[inner] = `0`;
710	}
711	}
712	}
713
714	/ Now set all the other characters of the character set to the*
715	default width. /*
716	curs = NULL;
717	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
718	{
719	struct charseq data = (struct* charseq *) vdata;
720
721	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
722	data->ucs4 = repertoire_find_value (ctype->repertoire,
723	data->name, len);
724
725	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
726	(void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
727	}
728
729	/ There must be a multiple of 10 digits. /
730	if (ctype->mbdigits_act % `10` != `0`)
731	{
732	assert (ctype->mbdigits_act == ctype->wcdigits_act);
733	ctype->wcdigits_act -= ctype->mbdigits_act % `10`;
734	ctype->mbdigits_act -= ctype->mbdigits_act % `10`;
735	record_error (`0`, `0`, _("\
736	`digit' category has not entries in groups of ten"));
737	}
738
739	/ Check the input digits. There must be a multiple of ten available.*
740	In each group it could be that one or the other character is missing.
741	In this case the whole group must be removed. /*
742	cnt = `0`;
743	while (cnt < ctype->mbdigits_act)
744	{
745	size_t inner;
746	for (inner = `0`; inner < `10`; ++inner)
747	if (ctype->mbdigits[cnt + inner] == NULL)
748	break;
749
750	if (inner == `10`)
751	cnt += `10`;
752	else
753	{
754	/ Remove the group. /
755	memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + `10`],
756	((ctype->wcdigits_act - cnt - `10`)
757	* sizeof (ctype->mbdigits[`0`])));
758	ctype->mbdigits_act -= `10`;
759	}
760	}
761
762	/ If no input digits are given use the default. /
763	if (ctype->mbdigits_act == `0`)
764	{
765	if (ctype->mbdigits_max == `0`)
766	{
767	ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
768	`10` * sizeof (struct charseq *));
769	ctype->mbdigits_max = `10`;
770	}
771
772	for (cnt = `0`; cnt < `10`; ++cnt)
773	{
774	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
775	(char *) digits + cnt, `1`);
776	if (ctype->mbdigits[cnt] == NULL)
777	{
778	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
779	longnames[cnt],
780	strlen (longnames[cnt]));
781	if (ctype->mbdigits[cnt] == NULL)
782	{
783	/ Hum, this ain't good. /
784	record_error (`0`, `0`, _("\
785	no input digits defined and none of the standard names in the charmap"));
786
787	ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
788	sizeof (struct charseq) + `1`);
789
790	/ This is better than nothing. /
791	ctype->mbdigits[cnt]->bytes[`0`] = digits[cnt];
792	ctype->mbdigits[cnt]->nbytes = `1`;
793	}
794	}
795	}
796
797	ctype->mbdigits_act = `10`;
798	}
799
800	/ Check the wide character input digits. There must be a multiple*
801	of ten available. In each group it could be that one or the other
802	character is missing. In this case the whole group must be
803	removed. /*
804	cnt = `0`;
805	while (cnt < ctype->wcdigits_act)
806	{
807	size_t inner;
808	for (inner = `0`; inner < `10`; ++inner)
809	if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
810	break;
811
812	if (inner == `10`)
813	cnt += `10`;
814	else
815	{
816	/ Remove the group. /
817	memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + `10`],
818	((ctype->wcdigits_act - cnt - `10`)
819	* sizeof (ctype->wcdigits[`0`])));
820	ctype->wcdigits_act -= `10`;
821	}
822	}
823
824	/ If no input digits are given use the default. /
825	if (ctype->wcdigits_act == `0`)
826	{
827	if (ctype->wcdigits_max == `0`)
828	{
829	ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
830	`10` * sizeof (uint32_t));
831	ctype->wcdigits_max = `10`;
832	}
833
834	for (cnt = `0`; cnt < `10`; ++cnt)
835	ctype->wcdigits[cnt] = L`'0'` + cnt;
836
837	ctype->mbdigits_act = `10`;
838	}
839
840	/ Check the outdigits. /
841	warned = `0`;
842	for (cnt = `0`; cnt < `10`; ++cnt)
843	if (ctype->mboutdigits[cnt] == NULL)
844	{
845	if (!warned)
846	{
847	record_error (`0`, `0`, _("\
848	not all characters used in `outdigit' are available in the charmap"));
849	warned = `1`;
850	}
851
852	static const struct charseq replace =
853	{
854	.nbytes = `1`,
855	.bytes = "?",
856	};
857	ctype->mboutdigits[cnt] = (struct charseq *) &replace;
858	}
859
860	warned = `0`;
861	for (cnt = `0`; cnt < `10`; ++cnt)
862	if (ctype->wcoutdigits[cnt] == `0`)
863	{
864	if (!warned)
865	{
866	record_error (`0`, `0`, _("\
867	not all characters used in `outdigit' are available in the repertoire"));
868	warned = `1`;
869	}
870
871	ctype->wcoutdigits[cnt] = L`'?'`;
872	}
873
874	/ Sort the entries in the translit_ignore list. /
875	if (ctype->translit_ignore != NULL)
876	{
877	struct translit_ignore_t *firstp = ctype->translit_ignore;
878	struct translit_ignore_t *runp;
879
880	ctype->ntranslit_ignore = `1`;
881
882	for (runp = firstp->next; runp != NULL; runp = runp->next)
883	{
884	struct translit_ignore_t *lastp = NULL;
885	struct translit_ignore_t *cmpp;
886
887	++ctype->ntranslit_ignore;
888
889	for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
890	if (runp->from < cmpp->from)
891	break;
892
893	runp->next = lastp;
894	if (lastp == NULL)
895	firstp = runp;
896	}
897
898	ctype->translit_ignore = firstp;
899	}
900	}
901
902
903	void
904	ctype_output (struct localedef_t locale, const* struct charmap_t *charmap,
905	const char *output_path)
906	{
907	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
908	const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
909	+ ctype->nr_charclass + ctype->map_collection_nr);
910	struct locale_file file;
911	uint32_t default_missing_len;
912	size_t elem, cnt;
913
914	/ Now prepare the output: Find the sizes of the table we can use. /
915	allocate_arrays (ctype, charmap, ctype->repertoire);
916
917	default_missing_len = (ctype->default_missing
918	? wcslen ((wchar_t *) ctype->default_missing)
919	: `0`);
920
921	init_locale_data (&file, nelems);
922	for (elem = `0`; elem < nelems; ++elem)
923	{
924	if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
925	switch (elem)
926	{
927	#define CTYPE_EMPTY(name) \
928	case name: \
929	add_locale_empty (&file); \
930	break
931
932	CTYPE_EMPTY(_NL_CTYPE_GAP1);
933	CTYPE_EMPTY(_NL_CTYPE_GAP2);
934	CTYPE_EMPTY(_NL_CTYPE_GAP3);
935	CTYPE_EMPTY(_NL_CTYPE_GAP4);
936	CTYPE_EMPTY(_NL_CTYPE_GAP5);
937	CTYPE_EMPTY(_NL_CTYPE_GAP6);
938
939	#define CTYPE_RAW_DATA(name, base, size) \
940	case _NL_ITEM_INDEX (name): \
941	add_locale_raw_data (&file, base, size); \
942	break
943
944	CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
945	ctype->ctype_b,
946	(`256` + `128`) * sizeof (char_class_t));
947
948	#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
949	case _NL_ITEM_INDEX (name): \
950	add_locale_uint32_array (&file, base, n_elems); \
951	break
952
953	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[`0`], `256` + `128`);
954	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[`1`], `256` + `128`);
955	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[`0`], `256`);
956	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[`1`], `256`);
957	CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
958	ctype->ctype32_b,
959	`256` * sizeof (char_class32_t));
960
961	#define CTYPE_UINT32(name, value) \
962	case _NL_ITEM_INDEX (name): \
963	add_locale_uint32 (&file, value); \
964	break
965
966	CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
967	CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
968	CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
969
970	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
971	ctype->translit_from_idx,
972	ctype->translit_idx_size);
973
974	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
975	ctype->translit_from_tbl,
976	ctype->translit_from_tbl_size
977	/ sizeof (uint32_t));
978
979	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
980	ctype->translit_to_idx,
981	ctype->translit_idx_size);
982
983	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
984	ctype->translit_to_tbl,
985	ctype->translit_to_tbl_size / sizeof (uint32_t));
986
987	case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
988	/ The class name array. /
989	start_locale_structure (&file);
990	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
991	add_locale_string (&file, ctype->classnames[cnt]);
992	add_locale_char (&file, `0`);
993	align_locale_data (&file, LOCFILE_ALIGN);
994	end_locale_structure (&file);
995	break;
996
997	case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
998	/ The class name array. /
999	start_locale_structure (&file);
1000	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1001	add_locale_string (&file, ctype->mapnames[cnt]);
1002	add_locale_char (&file, `0`);
1003	align_locale_data (&file, LOCFILE_ALIGN);
1004	end_locale_structure (&file);
1005	break;
1006
1007	case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1008	add_locale_wcwidth_table (&file, &ctype->width);
1009	break;
1010
1011	CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1012
1013	case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1014	add_locale_string (&file, ctype->codeset_name);
1015	break;
1016
1017	CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1018
1019	CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1020
1021	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1022	add_locale_uint32 (&file, ctype->mbdigits_act / `10`);
1023	break;
1024
1025	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1026	add_locale_uint32 (&file, ctype->wcdigits_act / `10`);
1027	break;
1028
1029	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1030	start_locale_structure (&file);
1031	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1032	cnt < ctype->mbdigits_act; cnt += `10`)
1033	{
1034	add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1035	ctype->mbdigits[cnt]->nbytes);
1036	add_locale_char (&file, `0`);
1037	}
1038	end_locale_structure (&file);
1039	break;
1040
1041	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1042	start_locale_structure (&file);
1043	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1044	add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1045	ctype->mboutdigits[cnt]->nbytes);
1046	add_locale_char (&file, `0`);
1047	end_locale_structure (&file);
1048	break;
1049
1050	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1051	start_locale_structure (&file);
1052	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1053	cnt < ctype->wcdigits_act; cnt += `10`)
1054	add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1055	end_locale_structure (&file);
1056	break;
1057
1058	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1059	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1060	add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1061	break;
1062
1063	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1064	add_locale_uint32 (&file, default_missing_len);
1065	break;
1066
1067	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1068	add_locale_uint32_array (&file, ctype->default_missing,
1069	default_missing_len);
1070	break;
1071
1072	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1073	add_locale_uint32 (&file, ctype->ntranslit_ignore);
1074	break;
1075
1076	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1077	start_locale_structure (&file);
1078	{
1079	struct translit_ignore_t *runp;
1080	for (runp = ctype->translit_ignore; runp != NULL;
1081	runp = runp->next)
1082	{
1083	add_locale_uint32 (&file, runp->from);
1084	add_locale_uint32 (&file, runp->to);
1085	add_locale_uint32 (&file, runp->step);
1086	}
1087	}
1088	end_locale_structure (&file);
1089	break;
1090
1091	default:
1092	assert (! "unknown CTYPE element");
1093	}
1094	else
1095	{
1096	/ Handle extra maps. /
1097	size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1098	if (nr < ctype->nr_charclass)
1099	{
1100	start_locale_prelude (&file);
1101	add_locale_uint32_array (&file, ctype->class_b[nr], `256` / `32`);
1102	end_locale_prelude (&file);
1103	add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1104	}
1105	else
1106	{
1107	nr -= ctype->nr_charclass;
1108	assert (nr < ctype->map_collection_nr);
1109	add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1110	}
1111	}
1112	}
1113
1114	write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1115	}
1116
1117
1118	/ Local functions. /
1119	static void
1120	ctype_class_new (struct linereader lr, struct* locale_ctype_t *ctype,
1121	const char *name)
1122	{
1123	size_t cnt;
1124
1125	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
1126	if (strcmp (ctype->classnames[cnt], name) == `0`)
1127	break;
1128
1129	if (cnt < ctype->nr_charclass)
1130	{
1131	lr_error (lr, _("character class `%s' already defined"), name);
1132	return;
1133	}
1134
1135	if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1136	/ Exit code 2 is prescribed in P1003.2b. /
1137	record_error (`2`, `0`, _("\
1138	implementation limit: no more than %Zd character classes allowed"),
1139	MAX_NR_CHARCLASS);
1140
1141	ctype->classnames[ctype->nr_charclass++] = name;
1142	}
1143
1144
1145	static void
1146	ctype_map_new (struct linereader lr, struct* locale_ctype_t *ctype,
1147	const char name, const* struct charmap_t *charmap)
1148	{
1149	size_t max_chars = `0`;
1150	size_t cnt;
1151
1152	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1153	{
1154	if (strcmp (ctype->mapnames[cnt], name) == `0`)
1155	break;
1156
1157	if (max_chars < ctype->map_collection_max[cnt])
1158	max_chars = ctype->map_collection_max[cnt];
1159	}
1160
1161	if (cnt < ctype->map_collection_nr)
1162	{
1163	lr_error (lr, _("character map `%s' already defined"), name);
1164	return;
1165	}
1166
1167	if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1168	/ Exit code 2 is prescribed in P1003.2b. /
1169	record_error (`2`, `0`, _("\
1170	implementation limit: no more than %d character maps allowed"),
1171	MAX_NR_CHARMAP);
1172
1173	ctype->mapnames[cnt] = name;
1174
1175	if (max_chars == `0`)
1176	ctype->map_collection_max[cnt] = charmap->mb_cur_max == `1` ? `256` : `512`;
1177	else
1178	ctype->map_collection_max[cnt] = max_chars;
1179
1180	ctype->map_collection[cnt] = (uint32_t *)
1181	xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1182	ctype->map_collection_act[cnt] = `256`;
1183
1184	++ctype->map_collection_nr;
1185	}
1186
1187
1188	/ We have to be prepared that TABLE, MAX, and ACT can be NULL. This*
1189	is possible if we only want to extend the name array. /*
1190	static uint32_t *
1191	find_idx (struct locale_ctype_t ctype, uint32_t table, size_t max,
1192	size_t *act, uint32_t idx)
1193	{
1194	size_t cnt;
1195
1196	if (idx < `256`)
1197	return table == NULL ? NULL : &(*table)[idx];
1198
1199	/ Use the charnames_idx lookup table instead of the slow search loop. /
1200	#if 1
1201	cnt = idx_table_get (&ctype->charnames_idx, idx);
1202	if (cnt == EMPTY)
1203	/ Not found. /
1204	cnt = ctype->charnames_act;
1205	#else
1206	for (cnt = `256`; cnt < ctype->charnames_act; ++cnt)
1207	if (ctype->charnames[cnt] == idx)
1208	break;
1209	#endif
1210
1211	/ We have to distinguish two cases: the name is found or not. /
1212	if (cnt == ctype->charnames_act)
1213	{
1214	/ Extend the name array. /
1215	if (ctype->charnames_act == ctype->charnames_max)
1216	{
1217	ctype->charnames_max *= `2`;
1218	ctype->charnames = (uint32_t *)
1219	xrealloc (ctype->charnames,
1220	sizeof (uint32_t) * ctype->charnames_max);
1221	}
1222	ctype->charnames[ctype->charnames_act++] = idx;
1223	idx_table_add (&ctype->charnames_idx, idx, cnt);
1224	}
1225
1226	if (table == NULL)
1227	/ We have done everything we are asked to do. /
1228	return NULL;
1229
1230	if (max == NULL)
1231	/ The caller does not want to extend the table. /
1232	return (cnt >= act ? NULL : &(table)[cnt]);
1233
1234	if (cnt >= *act)
1235	{
1236	if (cnt >= *max)
1237	{
1238	size_t old_max = *max;
1239	do
1240	max = `2`;
1241	while (*max <= cnt);
1242
1243	*table =
1244	(uint32_t ) xrealloc (table, max sizeof (uint32_t));
1245	memset (&(*table)[old_max], `'\0'`,
1246	(max - old_max) sizeof (uint32_t));
1247	}
1248
1249	*act = cnt + `1`;
1250	}
1251
1252	return &(*table)[cnt];
1253	}
1254
1255
1256	static int
1257	get_character (struct token now, const* struct charmap_t *charmap,
1258	struct repertoire_t *repertoire,
1259	struct charseq *seqp, uint32_t wchp)
1260	{
1261	if (now->tok == tok_bsymbol)
1262	{
1263	/ This will hopefully be the normal case. /
1264	*wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1265	now->val.str.lenmb);
1266	*seqp = charmap_find_value (charmap, now->val.str.startmb,
1267	now->val.str.lenmb);
1268	}
1269	else if (now->tok == tok_ucs4)
1270	{
1271	char utmp[`10`];
1272
1273	snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1274	*seqp = charmap_find_value (charmap, utmp, `9`);
1275
1276	if (*seqp == NULL)
1277	*seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1278
1279	if (*seqp == NULL)
1280	{
1281	/ Compute the value in the charmap from the UCS value. /
1282	const char *symbol = repertoire_find_symbol (repertoire,
1283	now->val.ucs4);
1284
1285	if (symbol == NULL)
1286	*seqp = NULL;
1287	else
1288	*seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1289
1290	if (*seqp == NULL)
1291	{
1292	if (repertoire != NULL)
1293	{
1294	/ Insert a negative entry. /
1295	static const struct charseq negative
1296	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1297	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1298	sizeof (uint32_t));
1299	*newp = now->val.ucs4;
1300
1301	insert_entry (&repertoire->seq_table, newp,
1302	sizeof (uint32_t), (void *) &negative);
1303	}
1304	}
1305	else
1306	(*seqp)->ucs4 = now->val.ucs4;
1307	}
1308	else if ((*seqp)->ucs4 != now->val.ucs4)
1309	*seqp = NULL;
1310
1311	*wchp = now->val.ucs4;
1312	}
1313	else if (now->tok == tok_charcode)
1314	{
1315	/ We must map from the byte code to UCS4. /
1316	*seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1317	now->val.str.lenmb);
1318
1319	if (*seqp == NULL)
1320	*wchp = ILLEGAL_CHAR_VALUE;
1321	else
1322	{
1323	if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1324	(seqp)->ucs4 = repertoire_find_value (repertoire, (seqp)->name,
1325	strlen ((*seqp)->name));
1326	wchp = (seqp)->ucs4;
1327	}
1328	}
1329	else
1330	return `1`;
1331
1332	return `0`;
1333	}
1334
1335
1336	/ Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and*
1337	the .(2). counterparts. /*
1338	static void
1339	charclass_symbolic_ellipsis (struct linereader *ldfile,
1340	struct locale_ctype_t *ctype,
1341	const struct charmap_t *charmap,
1342	struct repertoire_t *repertoire,
1343	struct token *now,
1344	const char *last_str,
1345	unsigned long int class256_bit,
1346	unsigned long int class_bit, int base,
1347	int ignore_content, int handle_digits, int step)
1348	{
1349	const char *nowstr = now->val.str.startmb;
1350	char tmp[now->val.str.lenmb + `1`];
1351	const char *cp;
1352	char *endp;
1353	unsigned long int from;
1354	unsigned long int to;
1355
1356	/ We have to compute the ellipsis values using the symbolic names. /
1357	assert (last_str != NULL);
1358
1359	if (strlen (last_str) != now->val.str.lenmb)
1360	{
1361	invalid_range:
1362	lr_error (ldfile,
1363	_("`%s' and `%.*s' are not valid names for symbolic range"),
1364	last_str, (int) now->val.str.lenmb, nowstr);
1365	return;
1366	}
1367
1368	if (memcmp (last_str, nowstr, now->val.str.lenmb) == `0`)
1369	/ Nothing to do, the names are the same. /
1370	return;
1371
1372	for (cp = last_str; cp == (nowstr + (cp - last_str)); ++cp)
1373	;
1374
1375	errno = `0`;
1376	from = strtoul (cp, &endp, base);
1377	if ((from == UINT_MAX && errno == ERANGE) \|\| *endp != `'\0'`)
1378	goto invalid_range;
1379
1380	to = strtoul (nowstr + (cp - last_str), &endp, base);
1381	if ((to == UINT_MAX && errno == ERANGE)
1382	\|\| (endp - nowstr) != now->val.str.lenmb \|\| from >= to)
1383	goto invalid_range;
1384
1385	/ OK, we have a range FROM - TO. Now we can create the symbolic names. /
1386	if (!ignore_content)
1387	{
1388	now->val.str.startmb = tmp;
1389	while ((from += step) <= to)
1390	{
1391	struct charseq *seq;
1392	uint32_t wch;
1393
1394	sprintf (tmp, (base == `10` ? "%.s%0ld" : "%.s%0lX"),
1395	(int) (cp - last_str), last_str,
1396	(int) (now->val.str.lenmb - (cp - last_str)),
1397	from);
1398
1399	if (get_character (now, charmap, repertoire, &seq, &wch))
1400	goto invalid_range;
1401
1402	if (seq != NULL && seq->nbytes == `1`)
1403	/ Yep, we can store information about this byte sequence. /
1404	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
1405
1406	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1407	/ We have the UCS4 position. /
1408	*find_idx (ctype, &ctype->class_collection,
1409	&ctype->class_collection_max,
1410	&ctype->class_collection_act, wch) \|= class_bit;
1411
1412	if (handle_digits == `1`)
1413	{
1414	/ We must store the digit values. /
1415	if (ctype->mbdigits_act == ctype->mbdigits_max)
1416	{
1417	ctype->mbdigits_max *= `2`;
1418	ctype->mbdigits = xrealloc (ctype->mbdigits,
1419	(ctype->mbdigits_max
1420	* sizeof (char *)));
1421	ctype->wcdigits_max *= `2`;
1422	ctype->wcdigits = xrealloc (ctype->wcdigits,
1423	(ctype->wcdigits_max
1424	* sizeof (uint32_t)));
1425	}
1426
1427	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1428	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1429	}
1430	else if (handle_digits == `2`)
1431	{
1432	/ We must store the digit values. /
1433	if (ctype->outdigits_act >= `10`)
1434	{
1435	lr_error (ldfile, _("\
1436	%s: field `%s' does not contain exactly ten entries"),
1437	"LC_CTYPE", "outdigit");
1438	return;
1439	}
1440
1441	ctype->mboutdigits[ctype->outdigits_act] = seq;
1442	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1443	++ctype->outdigits_act;
1444	}
1445	}
1446	}
1447	}
1448
1449
1450	/ Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. /
1451	static void
1452	charclass_ucs4_ellipsis (struct linereader *ldfile,
1453	struct locale_ctype_t *ctype,
1454	const struct charmap_t *charmap,
1455	struct repertoire_t *repertoire,
1456	struct token *now, uint32_t last_wch,
1457	unsigned long int class256_bit,
1458	unsigned long int class_bit, int ignore_content,
1459	int handle_digits, int step)
1460	{
1461	if (last_wch > now->val.ucs4)
1462	{
1463	lr_error (ldfile, _("\
1464	to-value <U%0X> of range is smaller than from-value <U%0X>"),
1465	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, now->val.ucs4,
1466	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, last_wch);
1467	return;
1468	}
1469
1470	if (!ignore_content)
1471	while ((last_wch += step) <= now->val.ucs4)
1472	{
1473	/ We have to find out whether there is a byte sequence corresponding*
1474	to this UCS4 value. /*
1475	struct charseq *seq;
1476	char utmp[`10`];
1477
1478	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1479	seq = charmap_find_value (charmap, utmp, `9`);
1480	if (seq == NULL)
1481	{
1482	snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1483	seq = charmap_find_value (charmap, utmp, `5`);
1484	}
1485
1486	if (seq == NULL)
1487	/ Try looking in the repertoire map. /
1488	seq = repertoire_find_seq (repertoire, last_wch);
1489
1490	/ If this is the first time we look for this sequence create a new*
1491	entry. /*
1492	if (seq == NULL)
1493	{
1494	static const struct charseq negative
1495	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1496
1497	/ Find the symbolic name for this UCS4 value. /
1498	if (repertoire != NULL)
1499	{
1500	const char *symbol = repertoire_find_symbol (repertoire,
1501	last_wch);
1502	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1503	sizeof (uint32_t));
1504	*newp = last_wch;
1505
1506	if (symbol != NULL)
1507	/ We have a name, now search the multibyte value. /
1508	seq = charmap_find_value (charmap, symbol, strlen (symbol));
1509
1510	if (seq == NULL)
1511	/ We have to create a fake entry. /
1512	seq = (struct charseq *) &negative;
1513	else
1514	seq->ucs4 = last_wch;
1515
1516	insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1517	seq);
1518	}
1519	else
1520	/ We have to create a fake entry. /
1521	seq = (struct charseq *) &negative;
1522	}
1523
1524	/ We have a name, now search the multibyte value. /
1525	if (seq->ucs4 == last_wch && seq->nbytes == `1`)
1526	/ Yep, we can store information about this byte sequence. /
1527	ctype->class256_collection[(size_t) seq->bytes[`0`]]
1528	\|= class256_bit;
1529
1530	/ And of course we have the UCS4 position. /
1531	if (class_bit != `0`)
1532	*find_idx (ctype, &ctype->class_collection,
1533	&ctype->class_collection_max,
1534	&ctype->class_collection_act, last_wch) \|= class_bit;
1535
1536	if (handle_digits == `1`)
1537	{
1538	/ We must store the digit values. /
1539	if (ctype->mbdigits_act == ctype->mbdigits_max)
1540	{
1541	ctype->mbdigits_max *= `2`;
1542	ctype->mbdigits = xrealloc (ctype->mbdigits,
1543	(ctype->mbdigits_max
1544	* sizeof (char *)));
1545	ctype->wcdigits_max *= `2`;
1546	ctype->wcdigits = xrealloc (ctype->wcdigits,
1547	(ctype->wcdigits_max
1548	* sizeof (uint32_t)));
1549	}
1550
1551	ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1552	? seq : NULL);
1553	ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1554	}
1555	else if (handle_digits == `2`)
1556	{
1557	/ We must store the digit values. /
1558	if (ctype->outdigits_act >= `10`)
1559	{
1560	lr_error (ldfile, _("\
1561	%s: field `%s' does not contain exactly ten entries"),
1562	"LC_CTYPE", "outdigit");
1563	return;
1564	}
1565
1566	ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1567	? seq : NULL);
1568	ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1569	++ctype->outdigits_act;
1570	}
1571	}
1572	}
1573
1574
1575	/ Ellipsis as in `/xea/x12.../xea/x34'. /
1576	static void
1577	charclass_charcode_ellipsis (struct linereader *ldfile,
1578	struct locale_ctype_t *ctype,
1579	const struct charmap_t *charmap,
1580	struct repertoire_t *repertoire,
1581	struct token now, char* *last_charcode,
1582	uint32_t last_charcode_len,
1583	unsigned long int class256_bit,
1584	unsigned long int class_bit, int ignore_content,
1585	int handle_digits)
1586	{
1587	/ First check whether the to-value is larger. /
1588	if (now->val.charcode.nbytes != last_charcode_len)
1589	{
1590	lr_error (ldfile, _("\
1591	start and end character sequence of range must have the same length"));
1592	return;
1593	}
1594
1595	if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > `0`)
1596	{
1597	lr_error (ldfile, _("\
1598	to-value character sequence is smaller than from-value sequence"));
1599	return;
1600	}
1601
1602	if (!ignore_content)
1603	{
1604	do
1605	{
1606	/ Increment the byte sequence value. /
1607	struct charseq *seq;
1608	uint32_t wch;
1609	int i;
1610
1611	for (i = last_charcode_len - `1`; i >= `0`; --i)
1612	if (++last_charcode[i] != `0`)
1613	break;
1614
1615	if (last_charcode_len == `1`)
1616	/ Of course we have the charcode value. /
1617	ctype->class256_collection[(size_t) last_charcode[`0`]]
1618	\|= class256_bit;
1619
1620	/ Find the symbolic name. /
1621	seq = charmap_find_symbol (charmap, last_charcode,
1622	last_charcode_len);
1623	if (seq != NULL)
1624	{
1625	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1626	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1627	strlen (seq->name));
1628	wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1629
1630	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1631	*find_idx (ctype, &ctype->class_collection,
1632	&ctype->class_collection_max,
1633	&ctype->class_collection_act, wch) \|= class_bit;
1634	}
1635	else
1636	wch = ILLEGAL_CHAR_VALUE;
1637
1638	if (handle_digits == `1`)
1639	{
1640	/ We must store the digit values. /
1641	if (ctype->mbdigits_act == ctype->mbdigits_max)
1642	{
1643	ctype->mbdigits_max *= `2`;
1644	ctype->mbdigits = xrealloc (ctype->mbdigits,
1645	(ctype->mbdigits_max
1646	* sizeof (char *)));
1647	ctype->wcdigits_max *= `2`;
1648	ctype->wcdigits = xrealloc (ctype->wcdigits,
1649	(ctype->wcdigits_max
1650	* sizeof (uint32_t)));
1651	}
1652
1653	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1654	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1655	seq->nbytes = last_charcode_len;
1656
1657	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1658	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1659	}
1660	else if (handle_digits == `2`)
1661	{
1662	struct charseq *seq;
1663	/ We must store the digit values. /
1664	if (ctype->outdigits_act >= `10`)
1665	{
1666	lr_error (ldfile, _("\
1667	%s: field `%s' does not contain exactly ten entries"),
1668	"LC_CTYPE", "outdigit");
1669	return;
1670	}
1671
1672	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1673	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1674	seq->nbytes = last_charcode_len;
1675
1676	ctype->mboutdigits[ctype->outdigits_act] = seq;
1677	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1678	++ctype->outdigits_act;
1679	}
1680	}
1681	while (memcmp (last_charcode, now->val.charcode.bytes,
1682	last_charcode_len) != `0`);
1683	}
1684	}
1685
1686
1687	static uint32_t *
1688	find_translit2 (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
1689	uint32_t wch)
1690	{
1691	struct translit_t *trunp = ctype->translit;
1692	struct translit_ignore_t *tirunp = ctype->translit_ignore;
1693
1694	while (trunp != NULL)
1695	{
1696	/ XXX We simplify things here. The transliterations we look*
1697	for are only allowed to have one character. /*
1698	if (trunp->from[`0`] == wch && trunp->from[`1`] == `0`)
1699	{
1700	/ Found it. Now look for a transliteration which can be*
1701	represented with the character set. /*
1702	struct translit_to_t *torunp = trunp->to;
1703
1704	while (torunp != NULL)
1705	{
1706	int i;
1707
1708	for (i = `0`; torunp->str[i] != `0`; ++i)
1709	{
1710	char utmp[`10`];
1711
1712	snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1713	if (charmap_find_value (charmap, utmp, `9`) == NULL)
1714	/ This character cannot be represented. /
1715	break;
1716	}
1717
1718	if (torunp->str[i] == `0`)
1719	return torunp->str;
1720
1721	torunp = torunp->next;
1722	}
1723
1724	break;
1725	}
1726
1727	trunp = trunp->next;
1728	}
1729
1730	/ Check for ignored chars. /
1731	while (tirunp != NULL)
1732	{
1733	if (tirunp->from <= wch && tirunp->to >= wch)
1734	{
1735	uint32_t wi;
1736
1737	for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1738	if (wi == wch)
1739	return no_str;
1740	}
1741	}
1742
1743	/ Nothing found. /
1744	return NULL;
1745	}
1746
1747
1748	uint32_t *
1749	find_translit (struct localedef_t locale, const* struct charmap_t *charmap,
1750	uint32_t wch)
1751	{
1752	struct locale_ctype_t *ctype;
1753	uint32_t *result = NULL;
1754
1755	assert (locale != NULL);
1756	ctype = locale->categories[LC_CTYPE].ctype;
1757
1758	if (ctype == NULL)
1759	return NULL;
1760
1761	if (ctype->translit != NULL)
1762	result = find_translit2 (ctype, charmap, wch);
1763
1764	if (result == NULL)
1765	{
1766	struct translit_include_t *irunp = ctype->translit_include;
1767
1768	while (irunp != NULL && result == NULL)
1769	{
1770	result = find_translit (find_locale (CTYPE_LOCALE,
1771	irunp->copy_locale,
1772	irunp->copy_repertoire,
1773	charmap),
1774	charmap, wch);
1775	irunp = irunp->next;
1776	}
1777	}
1778
1779	return result;
1780	}
1781
1782
1783	/ Read one transliteration entry. /
1784	static uint32_t *
1785	read_widestring (struct linereader ldfile, struct* token *now,
1786	const struct charmap_t *charmap,
1787	struct repertoire_t *repertoire)
1788	{
1789	uint32_t *wstr;
1790
1791	if (now->tok == tok_default_missing)
1792	/ The special name "" will denote this case. /
1793	wstr = no_str;
1794	else if (now->tok == tok_bsymbol)
1795	{
1796	/ Get the value from the repertoire. /
1797	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1798	wstr[`0`] = repertoire_find_value (repertoire, now->val.str.startmb,
1799	now->val.str.lenmb);
1800	if (wstr[`0`] == ILLEGAL_CHAR_VALUE)
1801	{
1802	/ We cannot proceed, we don't know the UCS4 value. /
1803	free (wstr);
1804	return NULL;
1805	}
1806
1807	wstr[`1`] = `0`;
1808	}
1809	else if (now->tok == tok_ucs4)
1810	{
1811	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1812	wstr[`0`] = now->val.ucs4;
1813	wstr[`1`] = `0`;
1814	}
1815	else if (now->tok == tok_charcode)
1816	{
1817	/ Argh, we have to convert to the symbol name first and then to the*
1818	UCS4 value. /*
1819	struct charseq *seq = charmap_find_symbol (charmap,
1820	now->val.str.startmb,
1821	now->val.str.lenmb);
1822	if (seq == NULL)
1823	/ Cannot find the UCS4 value. /
1824	return NULL;
1825
1826	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1827	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1828	strlen (seq->name));
1829	if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1830	/ We cannot proceed, we don't know the UCS4 value. /
1831	return NULL;
1832
1833	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1834	wstr[`0`] = seq->ucs4;
1835	wstr[`1`] = `0`;
1836	}
1837	else if (now->tok == tok_string)
1838	{
1839	wstr = now->val.str.startwc;
1840	if (wstr == NULL \|\| wstr[`0`] == `0`)
1841	return NULL;
1842	}
1843	else
1844	{
1845	if (now->tok != tok_eol && now->tok != tok_eof)
1846	lr_ignore_rest (ldfile, `0`);
1847	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1848	return (uint32_t *) -`1l`;
1849	}
1850
1851	return wstr;
1852	}
1853
1854
1855	static void
1856	read_translit_entry (struct linereader ldfile, struct* locale_ctype_t *ctype,
1857	struct token now, const* struct charmap_t *charmap,
1858	struct repertoire_t *repertoire)
1859	{
1860	uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1861	struct translit_t *result;
1862	struct translit_to_t **top;
1863	struct obstack *ob = &ctype->mempool;
1864	int first;
1865	int ignore;
1866
1867	if (from_wstr == NULL)
1868	/ There is no valid from string. /
1869	return;
1870
1871	result = (struct translit_t *) obstack_alloc (ob,
1872	sizeof (struct translit_t));
1873	result->from = from_wstr;
1874	result->fname = ldfile->fname;
1875	result->lineno = ldfile->lineno;
1876	result->next = NULL;
1877	result->to = NULL;
1878	top = &result->to;
1879	first = `1`;
1880	ignore = `0`;
1881
1882	while (`1`)
1883	{
1884	uint32_t *to_wstr;
1885
1886	/ Next we have one or more transliterations. They are*
1887	separated by semicolons. /*
1888	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1889
1890	if (!first && (now->tok == tok_semicolon \|\| now->tok == tok_eol))
1891	{
1892	/ One string read. /
1893	const uint32_t zero = `0`;
1894
1895	if (!ignore)
1896	{
1897	obstack_grow (ob, &zero, `4`);
1898	to_wstr = obstack_finish (ob);
1899
1900	top = obstack_alloc (ob, sizeof* (struct translit_to_t));
1901	(*top)->str = to_wstr;
1902	(*top)->next = NULL;
1903	}
1904
1905	if (now->tok == tok_eol)
1906	{
1907	result->next = ctype->translit;
1908	ctype->translit = result;
1909	return;
1910	}
1911
1912	if (!ignore)
1913	top = &(*top)->next;
1914	ignore = `0`;
1915	}
1916	else
1917	{
1918	to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1919	if (to_wstr == (uint32_t *) -`1l`)
1920	{
1921	/ An error occurred. /
1922	obstack_free (ob, result);
1923	return;
1924	}
1925
1926	if (to_wstr == NULL)
1927	ignore = `1`;
1928	else
1929	/ This value is usable. /
1930	obstack_grow (ob, to_wstr, wcslen ((wchar_t ) to_wstr) `4`);
1931
1932	first = `0`;
1933	}
1934	}
1935	}
1936
1937
1938	static void
1939	read_translit_ignore_entry (struct linereader *ldfile,
1940	struct locale_ctype_t *ctype,
1941	const struct charmap_t *charmap,
1942	struct repertoire_t *repertoire)
1943	{
1944	/ We expect a semicolon-separated list of characters we ignore. We are*
1945	only interested in the wide character definitions. These must be
1946	single characters, possibly defining a range when an ellipsis is used. /*
1947	while (`1`)
1948	{
1949	struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1950	verbose);
1951	struct translit_ignore_t *newp;
1952	uint32_t from;
1953
1954	if (now->tok == tok_eol \|\| now->tok == tok_eof)
1955	{
1956	lr_error (ldfile,
1957	_("premature end of `translit_ignore' definition"));
1958	return;
1959	}
1960
1961	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1962	{
1963	lr_error (ldfile, _("syntax error"));
1964	lr_ignore_rest (ldfile, `0`);
1965	return;
1966	}
1967
1968	if (now->tok == tok_ucs4)
1969	from = now->val.ucs4;
1970	else
1971	/ Try to get the value. /
1972	from = repertoire_find_value (repertoire, now->val.str.startmb,
1973	now->val.str.lenmb);
1974
1975	if (from == ILLEGAL_CHAR_VALUE)
1976	{
1977	lr_error (ldfile, "invalid character name");
1978	newp = NULL;
1979	}
1980	else
1981	{
1982	newp = (struct translit_ignore_t *)
1983	obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1984	newp->from = from;
1985	newp->to = from;
1986	newp->step = `1`;
1987
1988	newp->next = ctype->translit_ignore;
1989	ctype->translit_ignore = newp;
1990	}
1991
1992	/ Now we expect either a semicolon, an ellipsis, or the end of the*
1993	line. /*
1994	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1995
1996	if (now->tok == tok_ellipsis2 \|\| now->tok == tok_ellipsis2_2)
1997	{
1998	/ XXX Should we bother implementing `....'? `...' certainly*
1999	will not be implemented. /*
2000	uint32_t to;
2001	int step = now->tok == tok_ellipsis2_2 ? `2` : `1`;
2002
2003	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2004
2005	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2006	{
2007	lr_error (ldfile,
2008	_("premature end of `translit_ignore' definition"));
2009	return;
2010	}
2011
2012	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2013	{
2014	lr_error (ldfile, _("syntax error"));
2015	lr_ignore_rest (ldfile, `0`);
2016	return;
2017	}
2018
2019	if (now->tok == tok_ucs4)
2020	to = now->val.ucs4;
2021	else
2022	/ Try to get the value. /
2023	to = repertoire_find_value (repertoire, now->val.str.startmb,
2024	now->val.str.lenmb);
2025
2026	if (to == ILLEGAL_CHAR_VALUE)
2027	lr_error (ldfile, "invalid character name");
2028	else
2029	{
2030	/ Make sure the `to'-value is larger. /
2031	if (to >= from)
2032	{
2033	newp->to = to;
2034	newp->step = step;
2035	}
2036	else
2037	lr_error (ldfile, _("\
2038	to-value <U%0X> of range is smaller than from-value <U%0X>"),
2039	(to \| from) < `65536` ? `4` : `8`, to,
2040	(to \| from) < `65536` ? `4` : `8`, from);
2041	}
2042
2043	/ And the next token. /
2044	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2045	}
2046
2047	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2048	/ We are done. /
2049	return;
2050
2051	if (now->tok == tok_semicolon)
2052	/ Next round. /
2053	continue;
2054
2055	/ If we come here something is wrong. /
2056	lr_error (ldfile, _("syntax error"));
2057	lr_ignore_rest (ldfile, `0`);
2058	return;
2059	}
2060	}
2061
2062
2063	/ The parser for the LC_CTYPE section of the locale definition. /
2064	void
2065	ctype_read (struct linereader ldfile, struct* localedef_t *result,
2066	const struct charmap_t charmap, const* char *repertoire_name,
2067	int ignore_content)
2068	{
2069	struct repertoire_t *repertoire = NULL;
2070	struct locale_ctype_t *ctype;
2071	struct token *now;
2072	enum token_t nowtok;
2073	size_t cnt;
2074	uint32_t last_wch = `0`;
2075	enum token_t last_token;
2076	enum token_t ellipsis_token;
2077	int step;
2078	char last_charcode[`16`];
2079	size_t last_charcode_len = `0`;
2080	const char *last_str = NULL;
2081	int mapidx;
2082	struct localedef_t *copy_locale = NULL;
2083
2084	/ Get the repertoire we have to use. /
2085	if (repertoire_name != NULL)
2086	repertoire = repertoire_read (repertoire_name);
2087
2088	/ The rest of the line containing `LC_CTYPE' must be free. /
2089	lr_ignore_rest (ldfile, `1`);
2090
2091
2092	do
2093	{
2094	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2095	nowtok = now->tok;
2096	}
2097	while (nowtok == tok_eol);
2098
2099	/ If we see `copy' now we are almost done. /
2100	if (nowtok == tok_copy)
2101	{
2102	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2103	if (now->tok != tok_string)
2104	{
2105	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2106
2107	skip_category:
2108	do
2109	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2110	while (now->tok != tok_eof && now->tok != tok_end);
2111
2112	if (now->tok != tok_eof
2113	\|\| (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2114	now->tok == tok_eof))
2115	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2116	else if (now->tok != tok_lc_ctype)
2117	{
2118	lr_error (ldfile, _("\
2119	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2120	lr_ignore_rest (ldfile, `0`);
2121	}
2122	else
2123	lr_ignore_rest (ldfile, `1`);
2124
2125	return;
2126	}
2127
2128	if (! ignore_content)
2129	{
2130	/ Get the locale definition. /
2131	copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2132	repertoire_name, charmap, NULL);
2133	if ((copy_locale->avail & CTYPE_LOCALE) == `0`)
2134	{
2135	/ Not yet loaded. So do it now. /
2136	if (locfile_read (copy_locale, charmap) != `0`)
2137	goto skip_category;
2138	}
2139
2140	if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2141	return;
2142	}
2143
2144	lr_ignore_rest (ldfile, `1`);
2145
2146	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2147	nowtok = now->tok;
2148	}
2149
2150	/ Prepare the data structures. /
2151	ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2152	ctype = result->categories[LC_CTYPE].ctype;
2153
2154	/ Remember the repertoire we use. /
2155	if (!ignore_content)
2156	ctype->repertoire = repertoire;
2157
2158	while (`1`)
2159	{
2160	unsigned long int class_bit = `0`;
2161	unsigned long int class256_bit = `0`;
2162	int handle_digits = `0`;
2163
2164	/ Of course we don't proceed beyond the end of file. /
2165	if (nowtok == tok_eof)
2166	break;
2167
2168	/ Ingore empty lines. /
2169	if (nowtok == tok_eol)
2170	{
2171	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2172	nowtok = now->tok;
2173	continue;
2174	}
2175
2176	switch (nowtok)
2177	{
2178	case tok_charclass:
2179	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2180	while (now->tok == tok_ident \|\| now->tok == tok_string)
2181	{
2182	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2183	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2184	if (now->tok != tok_semicolon)
2185	break;
2186	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2187	}
2188	if (now->tok != tok_eol)
2189	SYNTAX_ERROR (_("\
2190	%s: syntax error in definition of new character class"), "LC_CTYPE");
2191	break;
2192
2193	case tok_charconv:
2194	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2195	while (now->tok == tok_ident \|\| now->tok == tok_string)
2196	{
2197	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2198	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2199	if (now->tok != tok_semicolon)
2200	break;
2201	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2202	}
2203	if (now->tok != tok_eol)
2204	SYNTAX_ERROR (_("\
2205	%s: syntax error in definition of new character map"), "LC_CTYPE");
2206	break;
2207
2208	case tok_class:
2209	/ Ignore the rest of the line if we don't need the input of*
2210	this line. /*
2211	if (ignore_content)
2212	{
2213	lr_ignore_rest (ldfile, `0`);
2214	break;
2215	}
2216
2217	/ We simply forget the `class' keyword and use the following*
2218	operand to determine the bit. /*
2219	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2220	if (now->tok == tok_ident \|\| now->tok == tok_string)
2221	{
2222	/ Must can be one of the predefined class names. /
2223	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2224	if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == `0`)
2225	break;
2226	if (cnt >= ctype->nr_charclass)
2227	{
2228	/ OK, it's a new class. /
2229	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2230
2231	class_bit = _ISwbit (ctype->nr_charclass - `1`);
2232	}
2233	else
2234	{
2235	class_bit = _ISwbit (cnt);
2236
2237	free (now->val.str.startmb);
2238	}
2239	}
2240	else if (now->tok == tok_digit)
2241	goto handle_tok_digit;
2242	else if (now->tok < tok_upper \|\| now->tok > tok_blank)
2243	goto err_label;
2244	else
2245	{
2246	class_bit = BITw (now->tok);
2247	class256_bit = BIT (now->tok);
2248	}
2249
2250	/ The next character must be a semicolon. /
2251	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2252	if (now->tok != tok_semicolon)
2253	goto err_label;
2254	goto read_charclass;
2255
2256	case tok_upper:
2257	case tok_lower:
2258	case tok_alpha:
2259	case tok_alnum:
2260	case tok_space:
2261	case tok_cntrl:
2262	case tok_punct:
2263	case tok_graph:
2264	case tok_print:
2265	case tok_xdigit:
2266	case tok_blank:
2267	/ Ignore the rest of the line if we don't need the input of*
2268	this line. /*
2269	if (ignore_content)
2270	{
2271	lr_ignore_rest (ldfile, `0`);
2272	break;
2273	}
2274
2275	class_bit = BITw (now->tok);
2276	class256_bit = BIT (now->tok);
2277	handle_digits = `0`;
2278	read_charclass:
2279	ctype->class_done \|= class_bit;
2280	last_token = tok_none;
2281	ellipsis_token = tok_none;
2282	step = `1`;
2283	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2284	while (now->tok != tok_eol && now->tok != tok_eof)
2285	{
2286	uint32_t wch;
2287	struct charseq *seq;
2288
2289	if (ellipsis_token == tok_none)
2290	{
2291	if (get_character (now, charmap, repertoire, &seq, &wch))
2292	goto err_label;
2293
2294	if (!ignore_content && seq != NULL && seq->nbytes == `1`)
2295	/ Yep, we can store information about this byte*
2296	sequence. /*
2297	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
2298
2299	if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2300	&& class_bit != `0`)
2301	/ We have the UCS4 position. /
2302	*find_idx (ctype, &ctype->class_collection,
2303	&ctype->class_collection_max,
2304	&ctype->class_collection_act, wch) \|= class_bit;
2305
2306	last_token = now->tok;
2307	/ Terminate the string. /
2308	if (last_token == tok_bsymbol)
2309	{
2310	now->val.str.startmb[now->val.str.lenmb] = `'\0'`;
2311	last_str = now->val.str.startmb;
2312	}
2313	else
2314	last_str = NULL;
2315	last_wch = wch;
2316	memcpy (last_charcode, now->val.charcode.bytes, `16`);
2317	last_charcode_len = now->val.charcode.nbytes;
2318
2319	if (!ignore_content && handle_digits == `1`)
2320	{
2321	/ We must store the digit values. /
2322	if (ctype->mbdigits_act == ctype->mbdigits_max)
2323	{
2324	ctype->mbdigits_max += `10`;
2325	ctype->mbdigits = xrealloc (ctype->mbdigits,
2326	(ctype->mbdigits_max
2327	* sizeof (char *)));
2328	ctype->wcdigits_max += `10`;
2329	ctype->wcdigits = xrealloc (ctype->wcdigits,
2330	(ctype->wcdigits_max
2331	* sizeof (uint32_t)));
2332	}
2333
2334	ctype->mbdigits[ctype->mbdigits_act++] = seq;
2335	ctype->wcdigits[ctype->wcdigits_act++] = wch;
2336	}
2337	else if (!ignore_content && handle_digits == `2`)
2338	{
2339	/ We must store the digit values. /
2340	if (ctype->outdigits_act >= `10`)
2341	{
2342	lr_error (ldfile, _("\
2343	%s: field `%s' does not contain exactly ten entries"),
2344	"LC_CTYPE", "outdigit");
2345	lr_ignore_rest (ldfile, `0`);
2346	break;
2347	}
2348
2349	ctype->mboutdigits[ctype->outdigits_act] = seq;
2350	ctype->wcoutdigits[ctype->outdigits_act] = wch;
2351	++ctype->outdigits_act;
2352	}
2353	}
2354	else
2355	{
2356	/ Now it gets complicated. We have to resolve the*
2357	ellipsis problem. First we must distinguish between
2358	the different kind of ellipsis and this must match the
2359	tokens we have seen. /*
2360	assert (last_token != tok_none);
2361
2362	if (last_token != now->tok)
2363	{
2364	lr_error (ldfile, _("\
2365	ellipsis range must be marked by two operands of same type"));
2366	lr_ignore_rest (ldfile, `0`);
2367	break;
2368	}
2369
2370	if (last_token == tok_bsymbol)
2371	{
2372	if (ellipsis_token == tok_ellipsis3)
2373	lr_error (ldfile, _("with symbolic name range values \
2374	the absolute ellipsis `...' must not be used"));
2375
2376	charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2377	repertoire, now, last_str,
2378	class256_bit, class_bit,
2379	(ellipsis_token
2380	== tok_ellipsis4
2381	? `10` : `16`),
2382	ignore_content,
2383	handle_digits, step);
2384	}
2385	else if (last_token == tok_ucs4)
2386	{
2387	if (ellipsis_token != tok_ellipsis2)
2388	lr_error (ldfile, _("\
2389	with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2390
2391	charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2392	repertoire, now, last_wch,
2393	class256_bit, class_bit,
2394	ignore_content, handle_digits,
2395	step);
2396	}
2397	else
2398	{
2399	assert (last_token == tok_charcode);
2400
2401	if (ellipsis_token != tok_ellipsis3)
2402	lr_error (ldfile, _("\
2403	with character code range values one must use the absolute ellipsis `...'"));
2404
2405	charclass_charcode_ellipsis (ldfile, ctype, charmap,
2406	repertoire, now,
2407	last_charcode,
2408	last_charcode_len,
2409	class256_bit, class_bit,
2410	ignore_content,
2411	handle_digits);
2412	}
2413
2414	/ Now we have used the last value. /
2415	last_token = tok_none;
2416	}
2417
2418	/ Next we expect a semicolon or the end of the line. /
2419	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2420	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2421	break;
2422
2423	if (last_token != tok_none
2424	&& now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2425	{
2426	if (now->tok == tok_ellipsis2_2)
2427	{
2428	now->tok = tok_ellipsis2;
2429	step = `2`;
2430	}
2431	else if (now->tok == tok_ellipsis4_2)
2432	{
2433	now->tok = tok_ellipsis4;
2434	step = `2`;
2435	}
2436
2437	ellipsis_token = now->tok;
2438
2439	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2440	continue;
2441	}
2442
2443	if (now->tok != tok_semicolon)
2444	goto err_label;
2445
2446	/ And get the next character. /
2447	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2448
2449	ellipsis_token = tok_none;
2450	step = `1`;
2451	}
2452	break;
2453
2454	case tok_digit:
2455	/ Ignore the rest of the line if we don't need the input of*
2456	this line. /*
2457	if (ignore_content)
2458	{
2459	lr_ignore_rest (ldfile, `0`);
2460	break;
2461	}
2462
2463	handle_tok_digit:
2464	class_bit = _ISwdigit;
2465	class256_bit = _ISdigit;
2466	handle_digits = `1`;
2467	goto read_charclass;
2468
2469	case tok_outdigit:
2470	/ Ignore the rest of the line if we don't need the input of*
2471	this line. /*
2472	if (ignore_content)
2473	{
2474	lr_ignore_rest (ldfile, `0`);
2475	break;
2476	}
2477
2478	if (ctype->outdigits_act != `0`)
2479	lr_error (ldfile, _("\
2480	%s: field `%s' declared more than once"),
2481	"LC_CTYPE", "outdigit");
2482	class_bit = `0`;
2483	class256_bit = `0`;
2484	handle_digits = `2`;
2485	goto read_charclass;
2486
2487	case tok_toupper:
2488	/ Ignore the rest of the line if we don't need the input of*
2489	this line. /*
2490	if (ignore_content)
2491	{
2492	lr_ignore_rest (ldfile, `0`);
2493	break;
2494	}
2495
2496	mapidx = `0`;
2497	goto read_mapping;
2498
2499	case tok_tolower:
2500	/ Ignore the rest of the line if we don't need the input of*
2501	this line. /*
2502	if (ignore_content)
2503	{
2504	lr_ignore_rest (ldfile, `0`);
2505	break;
2506	}
2507
2508	mapidx = `1`;
2509	goto read_mapping;
2510
2511	case tok_map:
2512	/ Ignore the rest of the line if we don't need the input of*
2513	this line. /*
2514	if (ignore_content)
2515	{
2516	lr_ignore_rest (ldfile, `0`);
2517	break;
2518	}
2519
2520	/ We simply forget the `map' keyword and use the following*
2521	operand to determine the mapping. /*
2522	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2523	if (now->tok == tok_ident \|\| now->tok == tok_string)
2524	{
2525	size_t cnt;
2526
2527	for (cnt = `2`; cnt < ctype->map_collection_nr; ++cnt)
2528	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2529	break;
2530
2531	if (cnt < ctype->map_collection_nr)
2532	free (now->val.str.startmb);
2533	else
2534	/ OK, it's a new map. /
2535	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2536
2537	mapidx = cnt;
2538	}
2539	else if (now->tok < tok_toupper \|\| now->tok > tok_tolower)
2540	goto err_label;
2541	else
2542	mapidx = now->tok - tok_toupper;
2543
2544	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2545	/ This better should be a semicolon. /
2546	if (now->tok != tok_semicolon)
2547	goto err_label;
2548
2549	read_mapping:
2550	/ Test whether this mapping was already defined. /
2551	if (ctype->tomap_done[mapidx])
2552	{
2553	lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2554	ctype->mapnames[mapidx]);
2555	lr_ignore_rest (ldfile, `0`);
2556	break;
2557	}
2558	ctype->tomap_done[mapidx] = `1`;
2559
2560	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2561	while (now->tok != tok_eol && now->tok != tok_eof)
2562	{
2563	struct charseq *from_seq;
2564	uint32_t from_wch;
2565	struct charseq *to_seq;
2566	uint32_t to_wch;
2567
2568	/ Every pair starts with an opening brace. /
2569	if (now->tok != tok_open_brace)
2570	goto err_label;
2571
2572	/ Next comes the from-value. /
2573	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2574	if (get_character (now, charmap, repertoire, &from_seq,
2575	&from_wch) != `0`)
2576	goto err_label;
2577
2578	/ The next is a comma. /
2579	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2580	if (now->tok != tok_comma)
2581	goto err_label;
2582
2583	/ And the other value. /
2584	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2585	if (get_character (now, charmap, repertoire, &to_seq,
2586	&to_wch) != `0`)
2587	goto err_label;
2588
2589	/ And the last thing is the closing brace. /
2590	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2591	if (now->tok != tok_close_brace)
2592	goto err_label;
2593
2594	if (!ignore_content)
2595	{
2596	/ Check whether the mapping converts from an ASCII value*
2597	to a non-ASCII value. /*
2598	if (from_seq != NULL && from_seq->nbytes == `1`
2599	&& isascii (from_seq->bytes[`0`])
2600	&& to_seq != NULL && (to_seq->nbytes != `1`
2601	\|\| !isascii (to_seq->bytes[`0`])))
2602	ctype->to_nonascii = `1`;
2603
2604	if (mapidx < `2` && from_seq != NULL && to_seq != NULL
2605	&& from_seq->nbytes == `1` && to_seq->nbytes == `1`)
2606	/ We can use this value. /
2607	ctype->map256_collection[mapidx][from_seq->bytes[`0`]]
2608	= to_seq->bytes[`0`];
2609
2610	if (from_wch != ILLEGAL_CHAR_VALUE
2611	&& to_wch != ILLEGAL_CHAR_VALUE)
2612	/ Both correct values. /
2613	*find_idx (ctype, &ctype->map_collection[mapidx],
2614	&ctype->map_collection_max[mapidx],
2615	&ctype->map_collection_act[mapidx],
2616	from_wch) = to_wch;
2617	}
2618
2619	/ Now comes a semicolon or the end of the line/file. /
2620	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2621	if (now->tok == tok_semicolon)
2622	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2623	}
2624	break;
2625
2626	case tok_translit_start:
2627	/ Ignore the entire translit section with its peculiar syntax*
2628	if we don't need the input. /*
2629	if (ignore_content)
2630	{
2631	do
2632	{
2633	lr_ignore_rest (ldfile, `0`);
2634	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2635	}
2636	while (now->tok != tok_translit_end && now->tok != tok_eof);
2637
2638	if (now->tok == tok_eof)
2639	lr_error (ldfile, _(\
2640	"%s: `translit_start' section does not end with `translit_end'"),
2641	"LC_CTYPE");
2642
2643	break;
2644	}
2645
2646	/ The rest of the line better should be empty. /
2647	lr_ignore_rest (ldfile, `1`);
2648
2649	/ We count here the number of allocated entries in the `translit'*
2650	array. /*
2651	cnt = `0`;
2652
2653	ldfile->translate_strings = `1`;
2654	ldfile->return_widestr = `1`;
2655
2656	/ We proceed until we see the `translit_end' token. /
2657	while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2658	now->tok != tok_translit_end && now->tok != tok_eof)
2659	{
2660	if (now->tok == tok_eol)
2661	/ Ignore empty lines. /
2662	continue;
2663
2664	if (now->tok == tok_include)
2665	{
2666	/ We have to include locale. /
2667	const char *locale_name;
2668	const char *repertoire_name;
2669	struct translit_include_t include_stmt, *include_ptr;
2670
2671	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2672	/ This should be a string or an identifier. In any*
2673	case something to name a locale. /*
2674	if (now->tok != tok_string && now->tok != tok_ident)
2675	{
2676	translit_syntax:
2677	lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2678	lr_ignore_rest (ldfile, `0`);
2679	continue;
2680	}
2681	locale_name = now->val.str.startmb;
2682
2683	/ Next should be a semicolon. /
2684	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2685	if (now->tok != tok_semicolon)
2686	goto translit_syntax;
2687
2688	/ Now the repertoire name. /
2689	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2690	if ((now->tok != tok_string && now->tok != tok_ident)
2691	\|\| now->val.str.startmb == NULL)
2692	goto translit_syntax;
2693	repertoire_name = now->val.str.startmb;
2694	if (repertoire_name[`0`] == `'\0'`)
2695	/ Ignore the empty string. /
2696	repertoire_name = NULL;
2697
2698	/ Save the include statement for later processing. /
2699	include_stmt = (struct translit_include_t *)
2700	xmalloc (sizeof (struct translit_include_t));
2701	include_stmt->copy_locale = locale_name;
2702	include_stmt->copy_repertoire = repertoire_name;
2703	include_stmt->next = NULL;
2704
2705	include_ptr = &ctype->translit_include;
2706	while (*include_ptr != NULL)
2707	include_ptr = &(*include_ptr)->next;
2708	*include_ptr = include_stmt;
2709
2710	/ The rest of the line must be empty. /
2711	lr_ignore_rest (ldfile, `1`);
2712
2713	/ Make sure the locale is read. /
2714	add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2715	`1`, NULL);
2716	continue;
2717	}
2718	else if (now->tok == tok_default_missing)
2719	{
2720	uint32_t *wstr;
2721
2722	while (`1`)
2723	{
2724	/ We expect a single character or string as the*
2725	argument. /*
2726	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2727	wstr = read_widestring (ldfile, now, charmap,
2728	repertoire);
2729
2730	if (wstr != NULL)
2731	{
2732	if (ctype->default_missing != NULL)
2733	{
2734	lr_error (ldfile, _("\
2735	%s: duplicate `default_missing' definition"), "LC_CTYPE");
2736	record_error_at_line (`0`, `0`,
2737	ctype->default_missing_file,
2738	ctype->default_missing_lineno,
2739	_("\
2740	previous definition was here"));
2741	}
2742	else
2743	{
2744	ctype->default_missing = wstr;
2745	ctype->default_missing_file = ldfile->fname;
2746	ctype->default_missing_lineno = ldfile->lineno;
2747	}
2748	/ We can have more entries, ignore them. /
2749	lr_ignore_rest (ldfile, `0`);
2750	break;
2751	}
2752	else if (wstr == (uint32_t *) -`1l`)
2753	/ This was an syntax error. /
2754	break;
2755
2756	/ Maybe there is another replacement we can use. /
2757	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2758	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2759	{
2760	/ Nothing found. We tell the user. /
2761	lr_error (ldfile, _("\
2762	%s: no representable `default_missing' definition found"), "LC_CTYPE");
2763	break;
2764	}
2765	if (now->tok != tok_semicolon)
2766	goto translit_syntax;
2767	}
2768
2769	continue;
2770	}
2771	else if (now->tok == tok_translit_ignore)
2772	{
2773	read_translit_ignore_entry (ldfile, ctype, charmap,
2774	repertoire);
2775	continue;
2776	}
2777
2778	read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2779	}
2780	ldfile->return_widestr = `0`;
2781
2782	if (now->tok == tok_eof)
2783	lr_error (ldfile, _(\
2784	"%s: `translit_start' section does not end with `translit_end'"),
2785	"LC_CTYPE");
2786
2787	break;
2788
2789	case tok_ident:
2790	/ Ignore the rest of the line if we don't need the input of*
2791	this line. /*
2792	if (ignore_content)
2793	{
2794	lr_ignore_rest (ldfile, `0`);
2795	break;
2796	}
2797
2798	/ This could mean one of several things. First test whether*
2799	it's a character class name. /*
2800	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2801	if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == `0`)
2802	break;
2803	if (cnt < ctype->nr_charclass)
2804	{
2805	class_bit = _ISwbit (cnt);
2806	class256_bit = cnt <= `11` ? _ISbit (cnt) : `0`;
2807	free (now->val.str.startmb);
2808	goto read_charclass;
2809	}
2810	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
2811	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2812	break;
2813	if (cnt < ctype->map_collection_nr)
2814	{
2815	mapidx = cnt;
2816	free (now->val.str.startmb);
2817	goto read_mapping;
2818	}
2819	break;
2820
2821	case tok_end:
2822	/ Next we assume `LC_CTYPE'. /
2823	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2824	if (now->tok == tok_eof)
2825	break;
2826	if (now->tok == tok_eol)
2827	lr_error (ldfile, _("%s: incomplete `END' line"),
2828	"LC_CTYPE");
2829	else if (now->tok != tok_lc_ctype)
2830	lr_error (ldfile, _("\
2831	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2832	lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2833	return;
2834
2835	default:
2836	err_label:
2837	if (now->tok != tok_eof)
2838	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2839	}
2840
2841	/ Prepare for the next round. /
2842	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2843	nowtok = now->tok;
2844	}
2845
2846	/ When we come here we reached the end of the file. /
2847	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2848	}
2849
2850
2851	/ Subroutine of set_class_defaults, below. /
2852	static void
2853	set_one_default (struct locale_ctype_t *ctype,
2854	const struct charmap_t *charmap,
2855	int bitpos, int from, int to)
2856	{
2857	char tmp[`2`];
2858	int ch;
2859	int bit = _ISbit (bitpos);
2860	int bitw = _ISwbit (bitpos);
2861	/ Define string. /
2862	strcpy (tmp, "?");
2863
2864	for (ch = from; ch <= to; ++ch)
2865	{
2866	struct charseq *seq;
2867	tmp[`0`] = ch;
2868
2869	seq = charmap_find_value (charmap, tmp, `1`);
2870	if (seq == NULL)
2871	{
2872	char buf[`10`];
2873	sprintf (buf, "U%08X", ch);
2874	seq = charmap_find_value (charmap, buf, `9`);
2875	}
2876	if (seq == NULL)
2877	{
2878	record_error (`0`, `0`, _("\
2879	%s: character `%s' not defined while needed as default value"),
2880	"LC_CTYPE", tmp);
2881	}
2882	else if (seq->nbytes != `1`)
2883	record_error (`0`, `0`, _("\
2884	%s: character `%s' in charmap not representable with one byte"),
2885	"LC_CTYPE", tmp);
2886	else
2887	ctype->class256_collection[seq->bytes[`0`]] \|= bit;
2888
2889	/ No need to search here, the ASCII value is also the Unicode*
2890	value. /*
2891	ELEM (ctype, class_collection, , ch) \|= bitw;
2892	}
2893	}
2894
2895	static void
2896	set_class_defaults (struct locale_ctype_t *ctype,
2897	const struct charmap_t *charmap,
2898	struct repertoire_t *repertoire)
2899	{
2900	#define set_default(bitpos, from, to) \
2901	set_one_default (ctype, charmap, bitpos, from, to)
2902
2903	/ These function defines the default values for the classes and conversions*
2904	according to POSIX.2 2.5.2.1.
2905	It may seem that the order of these if-blocks is arbitrary but it is NOT.
2906	Don't move them unless you know what you do! /*
2907
2908	/ Set default values if keyword was not present. /
2909	if ((ctype->class_done & BITw (tok_upper)) == `0`)
2910	/ "If this keyword [lower] is not specified, the lowercase letters*
2911	`A' through `Z', ..., shall automatically belong to this class,
2912	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2913	set_default (BITPOS (tok_upper), `'A'`, `'Z'`);
2914
2915	if ((ctype->class_done & BITw (tok_lower)) == `0`)
2916	/ "If this keyword [lower] is not specified, the lowercase letters*
2917	`a' through `z', ..., shall automatically belong to this class,
2918	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2919	set_default (BITPOS (tok_lower), `'a'`, `'z'`);
2920
2921	if ((ctype->class_done & BITw (tok_alpha)) == `0`)
2922	{
2923	/ Table 2-6 in P1003.2 says that characters in class `upper' or*
2924	class `lower' must* be in class `alpha'. /
2925	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower);
2926	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower);
2927
2928	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2929	if ((ctype->class256_collection[cnt] & mask) != `0`)
2930	ctype->class256_collection[cnt] \|= BIT (tok_alpha);
2931
2932	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2933	if ((ctype->class_collection[cnt] & maskw) != `0`)
2934	ctype->class_collection[cnt] \|= BITw (tok_alpha);
2935	}
2936
2937	if ((ctype->class_done & BITw (tok_digit)) == `0`)
2938	/ "If this keyword [digit] is not specified, the digits `0' through*
2939	`9', ..., shall automatically belong to this class, with
2940	implementation-defined character values." [P1003.2, 2.5.2.1] /*
2941	set_default (BITPOS (tok_digit), `'0'`, `'9'`);
2942
2943	/ "Only characters specified for the `alpha' and `digit' keyword*
2944	shall be specified. Characters specified for the keyword `alpha'
2945	and `digit' are automatically included in this class. /*
2946	{
2947	unsigned long int mask = BIT (tok_alpha) \| BIT (tok_digit);
2948	unsigned long int maskw = BITw (tok_alpha) \| BITw (tok_digit);
2949
2950	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2951	if ((ctype->class256_collection[cnt] & mask) != `0`)
2952	ctype->class256_collection[cnt] \|= BIT (tok_alnum);
2953
2954	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2955	if ((ctype->class_collection[cnt] & maskw) != `0`)
2956	ctype->class_collection[cnt] \|= BITw (tok_alnum);
2957	}
2958
2959	if ((ctype->class_done & BITw (tok_space)) == `0`)
2960	/ "If this keyword [space] is not specified, the characters <space>,*
2961	<form-feed>, <newline>, <carriage-return>, <tab>, and
2962	<vertical-tab>, ..., shall automatically belong to this class,
2963	with implementation-defined character values." [P1003.2, 2.5.2.1] /*
2964	{
2965	struct charseq *seq;
2966
2967	seq = charmap_find_value (charmap, "space", `5`);
2968	if (seq == NULL)
2969	seq = charmap_find_value (charmap, "SP", `2`);
2970	if (seq == NULL)
2971	seq = charmap_find_value (charmap, "U00000020", `9`);
2972	if (seq == NULL)
2973	{
2974	record_error (`0`, `0`, _("\
2975	%s: character `%s' not defined while needed as default value"),
2976	"LC_CTYPE", "<space>");
2977	}
2978	else if (seq->nbytes != `1`)
2979	record_error (`0`, `0`, _("\
2980	%s: character `%s' in charmap not representable with one byte"),
2981	"LC_CTYPE", "<space>");
2982	else
2983	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
2984
2985	/ No need to search. /
2986	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_space);
2987
2988	seq = charmap_find_value (charmap, "form-feed", `9`);
2989	if (seq == NULL)
2990	seq = charmap_find_value (charmap, "U0000000C", `9`);
2991	if (seq == NULL)
2992	{
2993	record_error (`0`, `0`, _("\
2994	%s: character `%s' not defined while needed as default value"),
2995	"LC_CTYPE", "<form-feed>");
2996	}
2997	else if (seq->nbytes != `1`)
2998	record_error (`0`, `0`, _("\
2999	%s: character `%s' in charmap not representable with one byte"),
3000	"LC_CTYPE", "<form-feed>");
3001	else
3002	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3003
3004	/ No need to search. /
3005	ELEM (ctype, class_collection, , L`'\f'`) \|= BITw (tok_space);
3006
3007
3008	seq = charmap_find_value (charmap, "newline", `7`);
3009	if (seq == NULL)
3010	seq = charmap_find_value (charmap, "U0000000A", `9`);
3011	if (seq == NULL)
3012	{
3013	record_error (`0`, `0`, _("\
3014	%s: character `%s' not defined while needed as default value"),
3015	"LC_CTYPE", "<newline>");
3016	}
3017	else if (seq->nbytes != `1`)
3018	record_error (`0`, `0`, _("\
3019	%s: character `%s' in charmap not representable with one byte"),
3020	"LC_CTYPE", "<newline>");
3021	else
3022	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3023
3024	/ No need to search. /
3025	ELEM (ctype, class_collection, , L`'\n'`) \|= BITw (tok_space);
3026
3027
3028	seq = charmap_find_value (charmap, "carriage-return", `15`);
3029	if (seq == NULL)
3030	seq = charmap_find_value (charmap, "U0000000D", `9`);
3031	if (seq == NULL)
3032	{
3033	record_error (`0`, `0`, _("\
3034	%s: character `%s' not defined while needed as default value"),
3035	"LC_CTYPE", "<carriage-return>");
3036	}
3037	else if (seq->nbytes != `1`)
3038	record_error (`0`, `0`, _("\
3039	%s: character `%s' in charmap not representable with one byte"),
3040	"LC_CTYPE", "<carriage-return>");
3041	else
3042	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3043
3044	/ No need to search. /
3045	ELEM (ctype, class_collection, , L`'\r'`) \|= BITw (tok_space);
3046
3047
3048	seq = charmap_find_value (charmap, "tab", `3`);
3049	if (seq == NULL)
3050	seq = charmap_find_value (charmap, "U00000009", `9`);
3051	if (seq == NULL)
3052	{
3053	record_error (`0`, `0`, _("\
3054	%s: character `%s' not defined while needed as default value"),
3055	"LC_CTYPE", "<tab>");
3056	}
3057	else if (seq->nbytes != `1`)
3058	record_error (`0`, `0`, _("\
3059	%s: character `%s' in charmap not representable with one byte"),
3060	"LC_CTYPE", "<tab>");
3061	else
3062	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3063
3064	/ No need to search. /
3065	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_space);
3066
3067
3068	seq = charmap_find_value (charmap, "vertical-tab", `12`);
3069	if (seq == NULL)
3070	seq = charmap_find_value (charmap, "U0000000B", `9`);
3071	if (seq == NULL)
3072	{
3073	record_error (`0`, `0`, _("\
3074	%s: character `%s' not defined while needed as default value"),
3075	"LC_CTYPE", "<vertical-tab>");
3076	}
3077	else if (seq->nbytes != `1`)
3078	record_error (`0`, `0`, _("\
3079	%s: character `%s' in charmap not representable with one byte"),
3080	"LC_CTYPE", "<vertical-tab>");
3081	else
3082	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3083
3084	/ No need to search. /
3085	ELEM (ctype, class_collection, , L`'\v'`) \|= BITw (tok_space);
3086	}
3087
3088	if ((ctype->class_done & BITw (tok_xdigit)) == `0`)
3089	/ "If this keyword is not specified, the digits `0' to `9', the*
3090	uppercase letters `A' through `F', and the lowercase letters `a'
3091	through `f', ..., shell automatically belong to this class, with
3092	implementation defined character values." [P1003.2, 2.5.2.1] /*
3093	{
3094	set_default (BITPOS (tok_xdigit), `'0'`, `'9'`);
3095	set_default (BITPOS (tok_xdigit), `'A'`, `'F'`);
3096	set_default (BITPOS (tok_xdigit), `'a'`, `'f'`);
3097	}
3098
3099	if ((ctype->class_done & BITw (tok_blank)) == `0`)
3100	/ "If this keyword [blank] is unspecified, the characters <space> and*
3101	<tab> shall belong to this character class." [P1003.2, 2.5.2.1] /*
3102	{
3103	struct charseq *seq;
3104
3105	seq = charmap_find_value (charmap, "space", `5`);
3106	if (seq == NULL)
3107	seq = charmap_find_value (charmap, "SP", `2`);
3108	if (seq == NULL)
3109	seq = charmap_find_value (charmap, "U00000020", `9`);
3110	if (seq == NULL)
3111	{
3112	record_error (`0`, `0`, _("\
3113	%s: character `%s' not defined while needed as default value"),
3114	"LC_CTYPE", "<space>");
3115	}
3116	else if (seq->nbytes != `1`)
3117	record_error (`0`, `0`, _("\
3118	%s: character `%s' in charmap not representable with one byte"),
3119	"LC_CTYPE", "<space>");
3120	else
3121	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3122
3123	/ No need to search. /
3124	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_blank);
3125
3126
3127	seq = charmap_find_value (charmap, "tab", `3`);
3128	if (seq == NULL)
3129	seq = charmap_find_value (charmap, "U00000009", `9`);
3130	if (seq == NULL)
3131	{
3132	record_error (`0`, `0`, _("\
3133	%s: character `%s' not defined while needed as default value"),
3134	"LC_CTYPE", "<tab>");
3135	}
3136	else if (seq->nbytes != `1`)
3137	record_error (`0`, `0`, _("\
3138	%s: character `%s' in charmap not representable with one byte"),
3139	"LC_CTYPE", "<tab>");
3140	else
3141	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3142
3143	/ No need to search. /
3144	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_blank);
3145	}
3146
3147	if ((ctype->class_done & BITw (tok_graph)) == `0`)
3148	/ "If this keyword [graph] is not specified, characters specified for*
3149	the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3150	shall belong to this character class." [P1003.2, 2.5.2.1] /*
3151	{
3152	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower)
3153	\| BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit)
3154	\| BIT (tok_punct);
3155	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower)
3156	\| BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit)
3157	\| BITw (tok_punct);
3158
3159	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3160	if ((ctype->class_collection[cnt] & maskw) != `0`)
3161	ctype->class_collection[cnt] \|= BITw (tok_graph);
3162
3163	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3164	if ((ctype->class256_collection[cnt] & mask) != `0`)
3165	ctype->class256_collection[cnt] \|= BIT (tok_graph);
3166	}
3167
3168	if ((ctype->class_done & BITw (tok_print)) == `0`)
3169	/ "If this keyword [print] is not provided, characters specified for*
3170	the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3171	and the <space> character shall belong to this character class."
3172	[P1003.2, 2.5.2.1] /*
3173	{
3174	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower)
3175	\| BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit)
3176	\| BIT (tok_punct);
3177	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower)
3178	\| BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit)
3179	\| BITw (tok_punct);
3180	struct charseq *seq;
3181
3182	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3183	if ((ctype->class_collection[cnt] & maskw) != `0`)
3184	ctype->class_collection[cnt] \|= BITw (tok_print);
3185
3186	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3187	if ((ctype->class256_collection[cnt] & mask) != `0`)
3188	ctype->class256_collection[cnt] \|= BIT (tok_print);
3189
3190
3191	seq = charmap_find_value (charmap, "space", `5`);
3192	if (seq == NULL)
3193	seq = charmap_find_value (charmap, "SP", `2`);
3194	if (seq == NULL)
3195	seq = charmap_find_value (charmap, "U00000020", `9`);
3196	if (seq == NULL)
3197	{
3198	record_error (`0`, `0`, _("\
3199	%s: character `%s' not defined while needed as default value"),
3200	"LC_CTYPE", "<space>");
3201	}
3202	else if (seq->nbytes != `1`)
3203	record_error (`0`, `0`, _("\
3204	%s: character `%s' in charmap not representable with one byte"),
3205	"LC_CTYPE", "<space>");
3206	else
3207	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_print);
3208
3209	/ No need to search. /
3210	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_print);
3211	}
3212
3213	if (ctype->tomap_done[`0`] == `0`)
3214	/ "If this keyword [toupper] is not specified, the lowercase letters*
3215	`a' through `z', and their corresponding uppercase letters `A' to
3216	`Z', ..., shall automatically be included, with implementation-
3217	defined character values." [P1003.2, 2.5.2.1] /*
3218	{
3219	char tmp[`4`];
3220	int ch;
3221
3222	strcpy (tmp, "<?>");
3223
3224	for (ch = `'a'`; ch <= `'z'`; ++ch)
3225	{
3226	struct charseq seq_from, seq_to;
3227
3228	tmp[`1`] = (char) ch;
3229
3230	seq_from = charmap_find_value (charmap, &tmp[`1`], `1`);
3231	if (seq_from == NULL)
3232	{
3233	char buf[`10`];
3234	sprintf (buf, "U%08X", ch);
3235	seq_from = charmap_find_value (charmap, buf, `9`);
3236	}
3237	if (seq_from == NULL)
3238	{
3239	record_error (`0`, `0`, _("\
3240	%s: character `%s' not defined while needed as default value"),
3241	"LC_CTYPE", tmp);
3242	}
3243	else if (seq_from->nbytes != `1`)
3244	{
3245	record_error (`0`, `0`, _("\
3246	%s: character `%s' needed as default value not representable with one byte"),
3247	"LC_CTYPE", tmp);
3248	}
3249	else
3250	{
3251	/ This conversion is implementation defined. /
3252	tmp[`1`] = (char) (ch + (`'A'` - `'a'`));
3253	seq_to = charmap_find_value (charmap, &tmp[`1`], `1`);
3254	if (seq_to == NULL)
3255	{
3256	char buf[`10`];
3257	sprintf (buf, "U%08X", ch + (`'A'` - `'a'`));
3258	seq_to = charmap_find_value (charmap, buf, `9`);
3259	}
3260	if (seq_to == NULL)
3261	{
3262	record_error (`0`, `0`, _("\
3263	%s: character `%s' not defined while needed as default value"),
3264	"LC_CTYPE", tmp);
3265	}
3266	else if (seq_to->nbytes != `1`)
3267	{
3268	record_error (`0`, `0`, _("\
3269	%s: character `%s' needed as default value not representable with one byte"),
3270	"LC_CTYPE", tmp);
3271	}
3272	else
3273	/ The index [0] is determined by the order of the*
3274	`ctype_map_newP' calls in `ctype_startup'. /*
3275	ctype->map256_collection[`0`][seq_from->bytes[`0`]]
3276	= seq_to->bytes[`0`];
3277	}
3278
3279	/ No need to search. /
3280	ELEM (ctype, map_collection, [`0`], ch) = ch + (`'A'` - `'a'`);
3281	}
3282	}
3283
3284	if (ctype->tomap_done[`1`] == `0`)
3285	/ "If this keyword [tolower] is not specified, the mapping shall be*
3286	the reverse mapping of the one specified to `toupper'." [P1003.2] /*
3287	{
3288	for (size_t cnt = `0`; cnt < ctype->map_collection_act[`0`]; ++cnt)
3289	if (ctype->map_collection[`0`][cnt] != `0`)
3290	ELEM (ctype, map_collection, [`1`],
3291	ctype->map_collection[`0`][cnt])
3292	= ctype->charnames[cnt];
3293
3294	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3295	if (ctype->map256_collection[`0`][cnt] != `0`)
3296	ctype->map256_collection[`1`][ctype->map256_collection[`0`][cnt]] = cnt;
3297	}
3298
3299	if (ctype->outdigits_act != `10`)
3300	{
3301	if (ctype->outdigits_act != `0`)
3302	record_error (`0`, `0`, _("\
3303	%s: field `%s' does not contain exactly ten entries"),
3304	"LC_CTYPE", "outdigit");
3305
3306	for (size_t cnt = ctype->outdigits_act; cnt < `10`; ++cnt)
3307	{
3308	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3309	(char *) digits + cnt,
3310	`1`);
3311
3312	if (ctype->mboutdigits[cnt] == NULL)
3313	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3314	longnames[cnt],
3315	strlen (longnames[cnt]));
3316
3317	if (ctype->mboutdigits[cnt] == NULL)
3318	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3319	uninames[cnt], `9`);
3320
3321	if (ctype->mboutdigits[cnt] == NULL)
3322	{
3323	/ Provide a replacement. /
3324	record_error (`0`, `0`, _("\
3325	no output digits defined and none of the standard names in the charmap"));
3326
3327	ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3328	sizeof (struct charseq)
3329	+ `1`);
3330
3331	/ This is better than nothing. /
3332	ctype->mboutdigits[cnt]->bytes[`0`] = digits[cnt];
3333	ctype->mboutdigits[cnt]->nbytes = `1`;
3334	}
3335
3336	ctype->wcoutdigits[cnt] = L`'0'` + cnt;
3337	}
3338
3339	ctype->outdigits_act = `10`;
3340	}
3341
3342	#undef set_default
3343	}
3344
3345
3346	/ Initialize. Assumes t->p and t->q have already been set. /
3347	static inline void
3348	wctype_table_init (struct wctype_table *t)
3349	{
3350	t->level1 = NULL;
3351	t->level1_alloc = t->level1_size = `0`;
3352	t->level2 = NULL;
3353	t->level2_alloc = t->level2_size = `0`;
3354	t->level3 = NULL;
3355	t->level3_alloc = t->level3_size = `0`;
3356	}
3357
3358	/ Retrieve an entry. /
3359	static inline int
3360	wctype_table_get (struct wctype_table *t, uint32_t wc)
3361	{
3362	uint32_t index1 = wc >> (t->q + t->p + `5`);
3363	if (index1 < t->level1_size)
3364	{
3365	uint32_t lookup1 = t->level1[index1];
3366	if (lookup1 != EMPTY)
3367	{
3368	uint32_t index2 = ((wc >> (t->p + `5`)) & ((`1` << t->q) - `1`))
3369	+ (lookup1 << t->q);
3370	uint32_t lookup2 = t->level2[index2];
3371	if (lookup2 != EMPTY)
3372	{
3373	uint32_t index3 = ((wc >> `5`) & ((`1` << t->p) - `1`))
3374	+ (lookup2 << t->p);
3375	uint32_t lookup3 = t->level3[index3];
3376	uint32_t index4 = wc & `0x1f`;
3377
3378	return (lookup3 >> index4) & `1`;
3379	}
3380	}
3381	}
3382	return `0`;
3383	}
3384
3385	/ Add one entry. /
3386	static void
3387	wctype_table_add (struct wctype_table *t, uint32_t wc)
3388	{
3389	uint32_t index1 = wc >> (t->q + t->p + `5`);
3390	uint32_t index2 = (wc >> (t->p + `5`)) & ((`1` << t->q) - `1`);
3391	uint32_t index3 = (wc >> `5`) & ((`1` << t->p) - `1`);
3392	uint32_t index4 = wc & `0x1f`;
3393	size_t i, i1, i2;
3394
3395	if (index1 >= t->level1_size)
3396	{
3397	if (index1 >= t->level1_alloc)
3398	{
3399	size_t alloc = `2` * t->level1_alloc;
3400	if (alloc <= index1)
3401	alloc = index1 + `1`;
3402	t->level1 = (uint32_t ) xrealloc ((char* *) t->level1,
3403	alloc * sizeof (uint32_t));
3404	t->level1_alloc = alloc;
3405	}
3406	while (index1 >= t->level1_size)
3407	t->level1[t->level1_size++] = EMPTY;
3408	}
3409
3410	if (t->level1[index1] == EMPTY)
3411	{
3412	if (t->level2_size == t->level2_alloc)
3413	{
3414	size_t alloc = `2` * t->level2_alloc + `1`;
3415	t->level2 = (uint32_t ) xrealloc ((char* *) t->level2,
3416	(alloc << t->q) * sizeof (uint32_t));
3417	t->level2_alloc = alloc;
3418	}
3419	i1 = t->level2_size << t->q;
3420	i2 = (t->level2_size + `1`) << t->q;
3421	for (i = i1; i < i2; i++)
3422	t->level2[i] = EMPTY;
3423	t->level1[index1] = t->level2_size++;
3424	}
3425
3426	index2 += t->level1[index1] << t->q;
3427
3428	if (t->level2[index2] == EMPTY)
3429	{
3430	if (t->level3_size == t->level3_alloc)
3431	{
3432	size_t alloc = `2` * t->level3_alloc + `1`;
3433	t->level3 = (uint32_t ) xrealloc ((char* *) t->level3,
3434	(alloc << t->p) * sizeof (uint32_t));
3435	t->level3_alloc = alloc;
3436	}
3437	i1 = t->level3_size << t->p;
3438	i2 = (t->level3_size + `1`) << t->p;
3439	for (i = i1; i < i2; i++)
3440	t->level3[i] = `0`;
3441	t->level2[index2] = t->level3_size++;
3442	}
3443
3444	index3 += t->level2[index2] << t->p;
3445
3446	t->level3[index3] \|= (uint32_t)`1` << index4;
3447	}
3448
3449	/ Finalize and shrink. /
3450	static void
3451	add_locale_wctype_table (struct locale_file file, struct* wctype_table *t)
3452	{
3453	size_t i, j, k;
3454	uint32_t reorder3[t->level3_size];
3455	uint32_t reorder2[t->level2_size];
3456	uint32_t level2_offset, level3_offset;
3457
3458	/ Uniquify level3 blocks. /
3459	k = `0`;
3460	for (j = `0`; j < t->level3_size; j++)
3461	{
3462	for (i = `0`; i < k; i++)
3463	if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3464	(`1` << t->p) * sizeof (uint32_t)) == `0`)
3465	break;
3466	/ Relocate block j to block i. /
3467	reorder3[j] = i;
3468	if (i == k)
3469	{
3470	if (i != j)
3471	memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3472	(`1` << t->p) * sizeof (uint32_t));
3473	k++;
3474	}
3475	}
3476	t->level3_size = k;
3477
3478	for (i = `0`; i < (t->level2_size << t->q); i++)
3479	if (t->level2[i] != EMPTY)
3480	t->level2[i] = reorder3[t->level2[i]];
3481
3482	/ Uniquify level2 blocks. /
3483	k = `0`;
3484	for (j = `0`; j < t->level2_size; j++)
3485	{
3486	for (i = `0`; i < k; i++)
3487	if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3488	(`1` << t->q) * sizeof (uint32_t)) == `0`)
3489	break;
3490	/ Relocate block j to block i. /
3491	reorder2[j] = i;
3492	if (i == k)
3493	{
3494	if (i != j)
3495	memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3496	(`1` << t->q) * sizeof (uint32_t));
3497	k++;
3498	}
3499	}
3500	t->level2_size = k;
3501
3502	for (i = `0`; i < t->level1_size; i++)
3503	if (t->level1[i] != EMPTY)
3504	t->level1[i] = reorder2[t->level1[i]];
3505
3506	t->result_size =
3507	`5` * sizeof (uint32_t)
3508	+ t->level1_size * sizeof (uint32_t)
3509	+ (t->level2_size << t->q) * sizeof (uint32_t)
3510	+ (t->level3_size << t->p) * sizeof (uint32_t);
3511
3512	level2_offset =
3513	`5` * sizeof (uint32_t)
3514	+ t->level1_size * sizeof (uint32_t);
3515	level3_offset =
3516	`5` * sizeof (uint32_t)
3517	+ t->level1_size * sizeof (uint32_t)
3518	+ (t->level2_size << t->q) * sizeof (uint32_t);
3519
3520	start_locale_structure (file);
3521	add_locale_uint32 (file, t->q + t->p + `5`);
3522	add_locale_uint32 (file, t->level1_size);
3523	add_locale_uint32 (file, t->p + `5`);
3524	add_locale_uint32 (file, (`1` << t->q) - `1`);
3525	add_locale_uint32 (file, (`1` << t->p) - `1`);
3526
3527	for (i = `0`; i < t->level1_size; i++)
3528	add_locale_uint32
3529	(file,
3530	t->level1[i] == EMPTY
3531	? `0`
3532	: (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3533
3534	for (i = `0`; i < (t->level2_size << t->q); i++)
3535	add_locale_uint32
3536	(file,
3537	t->level2[i] == EMPTY
3538	? `0`
3539	: (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3540
3541	add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3542	end_locale_structure (file);
3543
3544	if (t->level1_alloc > `0`)
3545	free (t->level1);
3546	if (t->level2_alloc > `0`)
3547	free (t->level2);
3548	if (t->level3_alloc > `0`)
3549	free (t->level3);
3550	}
3551
3552	/ Flattens the included transliterations into a translit list.*
3553	Inserts them in the list at `cursor', and returns the new cursor. /*
3554	static struct translit_t **
3555	translit_flatten (struct locale_ctype_t *ctype,
3556	const struct charmap_t *charmap,
3557	struct translit_t **cursor)
3558	{
3559	while (ctype->translit_include != NULL)
3560	{
3561	const char *copy_locale = ctype->translit_include->copy_locale;
3562	const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3563	struct localedef_t *other;
3564
3565	/ Unchain the include statement. During the depth-first traversal*
3566	we don't want to visit any locale more than once. /*
3567	ctype->translit_include = ctype->translit_include->next;
3568
3569	other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3570
3571	if (other == NULL \|\| other->categories[LC_CTYPE].ctype == NULL)
3572	{
3573	record_error (`0`, `0`, _("\
3574	%s: transliteration data from locale `%s' not available"),
3575	"LC_CTYPE", copy_locale);
3576	}
3577	else
3578	{
3579	struct locale_ctype_t *other_ctype =
3580	other->categories[LC_CTYPE].ctype;
3581
3582	cursor = translit_flatten (other_ctype, charmap, cursor);
3583	assert (other_ctype->translit_include == NULL);
3584
3585	if (other_ctype->translit != NULL)
3586	{
3587	/ Insert the other_ctype->translit list at cursor. /*
3588	struct translit_t *endp = other_ctype->translit;
3589	while (endp->next != NULL)
3590	endp = endp->next;
3591
3592	endp->next = *cursor;
3593	*cursor = other_ctype->translit;
3594
3595	/ Avoid any risk of circular lists. /
3596	other_ctype->translit = NULL;
3597
3598	cursor = &endp->next;
3599	}
3600
3601	if (ctype->default_missing == NULL)
3602	ctype->default_missing = other_ctype->default_missing;
3603	}
3604	}
3605
3606	return cursor;
3607	}
3608
3609	static void
3610	allocate_arrays (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
3611	struct repertoire_t *repertoire)
3612	{
3613	size_t idx, nr;
3614	const void *key;
3615	size_t len;
3616	void *vdata;
3617	void *curs;
3618
3619	/ You wonder about this amount of memory? This is only because some*
3620	users do not manage to address the array with unsigned values or
3621	data types with range >= 256. '\200' would result in the array
3622	index -128. To help these poor people we duplicate the entries for
3623	128 up to 255 below the entry for \0. /*
3624	ctype->ctype_b = (char_class_t ) xcalloc (`256` + `128`, sizeof* (char_class_t));
3625	ctype->ctype32_b = (char_class32_t ) xcalloc (`256`, sizeof* (char_class32_t));
3626	ctype->class_b = (uint32_t **)
3627	xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3628	ctype->class_3level = (struct wctype_table *)
3629	xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3630
3631	/ This is the array accessed using the multibyte string elements. /
3632	for (idx = `0`; idx < `256`; ++idx)
3633	ctype->ctype_b[`128` + idx] = ctype->class256_collection[idx];
3634
3635	/ Mirror first 127 entries. We must take care that entry -1 is not*
3636	mirrored because EOF == -1. /*
3637	for (idx = `0`; idx < `127`; ++idx)
3638	ctype->ctype_b[idx] = ctype->ctype_b[`256` + idx];
3639
3640	/ The 32 bit array contains all characters < 0x100. /
3641	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3642	if (ctype->charnames[idx] < `0x100`)
3643	ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3644
3645	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3646	{
3647	ctype->class_b[nr] = (uint32_t ) xcalloc (`256` / `32`, sizeof* (uint32_t));
3648
3649	/ We only set CLASS_B for the bits in the ISO C classes, not*
3650	the user defined classes. The number should not change but
3651	who knows. /*
3652	#define LAST_ISO_C_BIT 11
3653	if (nr <= LAST_ISO_C_BIT)
3654	for (idx = `0`; idx < `256`; ++idx)
3655	if (ctype->class256_collection[idx] & _ISbit (nr))
3656	ctype->class_b[nr][idx >> `5`] \|= (uint32_t) `1` << (idx & `0x1f`);
3657	}
3658
3659	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3660	{
3661	struct wctype_table *t;
3662
3663	t = &ctype->class_3level[nr];
3664	t->p = `4`; / or: 5 /
3665	t->q = `7`; / or: 6 /
3666	wctype_table_init (t);
3667
3668	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3669	if (ctype->class_collection[idx] & _ISwbit (nr))
3670	wctype_table_add (t, ctype->charnames[idx]);
3671
3672	record_verbose (stderr, _("\
3673	%s: table for class \"%s\": %lu bytes"),
3674	"LC_CTYPE", ctype->classnames[nr],
3675	(unsigned long int) t->result_size);
3676	}
3677
3678	/ Room for table of mappings. /
3679	ctype->map_b = (uint32_t *) xmalloc (`2` sizeof (uint32_t *));
3680	ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3681	* sizeof (uint32_t *));
3682	ctype->map_3level = (struct wctrans_table *)
3683	xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3684
3685	/ Fill in all mappings. /
3686	for (idx = `0`; idx < `2`; ++idx)
3687	{
3688	unsigned int idx2;
3689
3690	/ Allocate table. /
3691	ctype->map_b[idx] = (uint32_t *)
3692	xmalloc ((`256` + `128`) * sizeof (uint32_t));
3693
3694	/ Copy values from collection. /
3695	for (idx2 = `0`; idx2 < `256`; ++idx2)
3696	ctype->map_b[idx][`128` + idx2] = ctype->map256_collection[idx][idx2];
3697
3698	/ Mirror first 127 entries. We must take care not to map entry*
3699	-1 because EOF == -1. /*
3700	for (idx2 = `0`; idx2 < `127`; ++idx2)
3701	ctype->map_b[idx][idx2] = ctype->map_b[idx][`256` + idx2];
3702
3703	/ EOF must map to EOF. /
3704	ctype->map_b[idx][`127`] = EOF;
3705	}
3706
3707	for (idx = `0`; idx < ctype->map_collection_nr; ++idx)
3708	{
3709	unsigned int idx2;
3710
3711	/ Allocate table. /
3712	ctype->map32_b[idx] = (uint32_t ) xmalloc (`256` sizeof (uint32_t));
3713
3714	/ Copy values from collection. Default is identity mapping. /
3715	for (idx2 = `0`; idx2 < `256`; ++idx2)
3716	ctype->map32_b[idx][idx2] =
3717	(ctype->map_collection[idx][idx2] != `0`
3718	? ctype->map_collection[idx][idx2]
3719	: idx2);
3720	}
3721
3722	for (nr = `0`; nr < ctype->map_collection_nr; nr++)
3723	{
3724	struct wctrans_table *t;
3725
3726	t = &ctype->map_3level[nr];
3727	t->p = `7`;
3728	t->q = `9`;
3729	wctrans_table_init (t);
3730
3731	for (idx = `0`; idx < ctype->map_collection_act[nr]; ++idx)
3732	if (ctype->map_collection[nr][idx] != `0`)
3733	wctrans_table_add (t, ctype->charnames[idx],
3734	ctype->map_collection[nr][idx]);
3735
3736	record_verbose (stderr, _("\
3737	%s: table for map \"%s\": %lu bytes"),
3738	"LC_CTYPE", ctype->mapnames[nr],
3739	(unsigned long int) t->result_size);
3740	}
3741
3742	/ Extra array for class and map names. /
3743	ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3744	* sizeof (uint32_t));
3745	ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3746	* sizeof (uint32_t));
3747
3748	ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3749	ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3750
3751	/ Array for width information. Because the expected widths are very*
3752	small (never larger than 2) we use only one single byte. This
3753	saves space.
3754	We put only printable characters in the table. wcwidth is specified
3755	to return -1 for non-printable characters. Doing the check here
3756	saves a run-time check.
3757	But we put L'\0' in the table. This again saves a run-time check. /*
3758	{
3759	struct wcwidth_table *t;
3760
3761	t = &ctype->width;
3762	t->p = `7`;
3763	t->q = `9`;
3764	wcwidth_table_init (t);
3765
3766	/ First set all the printable characters of the character set to*
3767	the default width. /*
3768	curs = NULL;
3769	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
3770	{
3771	struct charseq data = (struct* charseq *) vdata;
3772
3773	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3774	data->ucs4 = repertoire_find_value (ctype->repertoire,
3775	data->name, len);
3776
3777	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3778	{
3779	uint32_t *class_bits =
3780	find_idx (ctype, &ctype->class_collection, NULL,
3781	&ctype->class_collection_act, data->ucs4);
3782
3783	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3784	wcwidth_table_add (t, data->ucs4, charmap->width_default);
3785	}
3786	}
3787
3788	/ Now add the explicitly specified widths. /
3789	if (charmap->width_rules != NULL)
3790	for (size_t cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
3791	{
3792	unsigned char bytes[charmap->mb_cur_max];
3793	int nbytes = charmap->width_rules[cnt].from->nbytes;
3794
3795	/ We have the range of character for which the width is*
3796	specified described using byte sequences of the multibyte
3797	charset. We have to convert this to UCS4 now. And we
3798	cannot simply convert the beginning and the end of the
3799	sequence, we have to iterate over the byte sequence and
3800	convert it for every single character. /*
3801	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3802
3803	while (nbytes < charmap->width_rules[cnt].to->nbytes
3804	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3805	nbytes) <= `0`)
3806	{
3807	/ Find the UCS value for `bytes'. /
3808	int inner;
3809	uint32_t wch;
3810	struct charseq *seq =
3811	charmap_find_symbol (charmap, (char *) bytes, nbytes);
3812
3813	if (seq == NULL)
3814	wch = ILLEGAL_CHAR_VALUE;
3815	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3816	wch = seq->ucs4;
3817	else
3818	wch = repertoire_find_value (ctype->repertoire, seq->name,
3819	strlen (seq->name));
3820
3821	if (wch != ILLEGAL_CHAR_VALUE)
3822	{
3823	/ Store the value. /
3824	uint32_t *class_bits =
3825	find_idx (ctype, &ctype->class_collection, NULL,
3826	&ctype->class_collection_act, wch);
3827
3828	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3829	wcwidth_table_add (t, wch,
3830	charmap->width_rules[cnt].width);
3831	}
3832
3833	/ "Increment" the bytes sequence. /
3834	inner = nbytes - `1`;
3835	while (inner >= `0` && bytes[inner] == `0xff`)
3836	--inner;
3837
3838	if (inner < `0`)
3839	{
3840	/ We have to extend the byte sequence. /
3841	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3842	break;
3843
3844	bytes[`0`] = `1`;
3845	memset (&bytes[`1`], `0`, nbytes);
3846	++nbytes;
3847	}
3848	else
3849	{
3850	++bytes[inner];
3851	while (++inner < nbytes)
3852	bytes[inner] = `0`;
3853	}
3854	}
3855	}
3856
3857	/ Set the width of L'\0' to 0. /
3858	wcwidth_table_add (t, `0`, `0`);
3859
3860	record_verbose (stderr, _("%s: table for width: %lu bytes"),
3861	"LC_CTYPE", (unsigned long int) t->result_size);
3862	}
3863
3864	/ Set MB_CUR_MAX. /
3865	ctype->mb_cur_max = charmap->mb_cur_max;
3866
3867	/ Now determine the table for the transliteration information.*
3868
3869	XXX It is not yet clear to me whether it is worth implementing a
3870	complicated algorithm which uses a hash table to locate the entries.
3871	For now I'll use a simple array which can be searching using binary
3872	search. /*
3873	if (ctype->translit_include != NULL)
3874	/ Traverse the locales mentioned in the `include' statements in a*
3875	depth-first way and fold in their transliteration information. /*
3876	translit_flatten (ctype, charmap, &ctype->translit);
3877
3878	if (ctype->translit != NULL)
3879	{
3880	/ First count how many entries we have. This is the upper limit*
3881	since some entries from the included files might be overwritten. /*
3882	size_t number = `0`;
3883	struct translit_t *runp = ctype->translit;
3884	struct translit_t **sorted;
3885	size_t from_len, to_len;
3886
3887	while (runp != NULL)
3888	{
3889	++number;
3890	runp = runp->next;
3891	}
3892
3893	/ Next we allocate an array large enough and fill in the values. /
3894	sorted = (struct translit_t **) alloca (number
3895	* sizeof (struct translit_t **));
3896	runp = ctype->translit;
3897	number = `0`;
3898	do
3899	{
3900	/ Search for the place where to insert this string.*
3901	XXX Better use a real sorting algorithm later. /*
3902	size_t idx = `0`;
3903	int replace = `0`;
3904
3905	while (idx < number)
3906	{
3907	int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3908	(const wchar_t *) runp->from);
3909	if (res == `0`)
3910	{
3911	replace = `1`;
3912	break;
3913	}
3914	if (res > `0`)
3915	break;
3916	++idx;
3917	}
3918
3919	if (replace)
3920	sorted[idx] = runp;
3921	else
3922	{
3923	memmove (&sorted[idx + `1`], &sorted[idx],
3924	(number - idx) * sizeof (struct translit_t *));
3925	sorted[idx] = runp;
3926	++number;
3927	}
3928
3929	runp = runp->next;
3930	}
3931	while (runp != NULL);
3932
3933	/ The next step is putting all the possible transliteration*
3934	strings in one memory block so that we can write it out.
3935	We need several different blocks:
3936	- index to the from-string array
3937	- from-string array
3938	- index to the to-string array
3939	- to-string array.
3940	*/
3941	from_len = to_len = `0`;
3942	for (size_t cnt = `0`; cnt < number; ++cnt)
3943	{
3944	struct translit_to_t *srunp;
3945	from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3946	srunp = sorted[cnt]->to;
3947	while (srunp != NULL)
3948	{
3949	to_len += wcslen ((const wchar_t *) srunp->str) + `1`;
3950	srunp = srunp->next;
3951	}
3952	/ Plus one for the extra NUL character marking the end of*
3953	the list for the current entry. /*
3954	++to_len;
3955	}
3956
3957	/ We can allocate the arrays for the results. /
3958	ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3959	ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3960	ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3961	ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3962
3963	from_len = `0`;
3964	to_len = `0`;
3965	for (size_t cnt = `0`; cnt < number; ++cnt)
3966	{
3967	size_t len;
3968	struct translit_to_t *srunp;
3969
3970	ctype->translit_from_idx[cnt] = from_len;
3971	ctype->translit_to_idx[cnt] = to_len;
3972
3973	len = wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3974	wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3975	(const wchar_t *) sorted[cnt]->from, len);
3976	from_len += len;
3977
3978	ctype->translit_to_idx[cnt] = to_len;
3979	srunp = sorted[cnt]->to;
3980	while (srunp != NULL)
3981	{
3982	len = wcslen ((const wchar_t *) srunp->str) + `1`;
3983	wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3984	(const wchar_t *) srunp->str, len);
3985	to_len += len;
3986	srunp = srunp->next;
3987	}
3988	ctype->translit_to_tbl[to_len++] = L`'\0'`;
3989	}
3990
3991	/ Store the information about the length. /
3992	ctype->translit_idx_size = number;
3993	ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3994	ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3995	}
3996	else
3997	{
3998	ctype->translit_from_idx = no_str;
3999	ctype->translit_from_tbl = no_str;
4000	ctype->translit_to_tbl = no_str;
4001	ctype->translit_idx_size = `0`;
4002	ctype->translit_from_tbl_size = `0`;
4003	ctype->translit_to_tbl_size = `0`;
4004	}
4005	}
4006

Browse the source code of glibc/locale/programs/ld-ctype.c