iso-2022-jp.c source code [glibc/iconvdata/iso-2022-jp.c]

1	/ Conversion module for ISO-2022-JP and ISO-2022-JP-2.*
2	Copyright (C) 1998-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <assert.h>
20	#include <dlfcn.h>
21	#include <gconv.h>
22	#include <stdint.h>
23	#include <stdlib.h>
24	#include <string.h>
25	#include "jis0201.h"
26	#include "jis0208.h"
27	#include "jis0212.h"
28	#include "gb2312.h"
29	#include "ksc5601.h"
30
31	struct gap
32	{
33	uint16_t start;
34	uint16_t end;
35	int32_t idx;
36	};
37
38	#include "iso8859-7jp.h"
39
40	/ This makes obvious what everybody knows: 0x1b is the Esc character. /
41	#define ESC 0x1b
42
43	/ We provide our own initialization and destructor function. /
44	#define DEFINE_INIT 0
45	#define DEFINE_FINI 0
46
47	/ Definitions used in the body of the `gconv' function. /
48	#define FROM_LOOP from_iso2022jp_loop
49	#define TO_LOOP to_iso2022jp_loop
50	#define ONE_DIRECTION 0
51	#define FROM_LOOP_MIN_NEEDED_FROM 1
52	#define FROM_LOOP_MAX_NEEDED_FROM 4
53	#define FROM_LOOP_MIN_NEEDED_TO 4
54	#define FROM_LOOP_MAX_NEEDED_TO 4
55	#define TO_LOOP_MIN_NEEDED_FROM 4
56	#define TO_LOOP_MAX_NEEDED_FROM 4
57	#define TO_LOOP_MIN_NEEDED_TO 1
58	#define TO_LOOP_MAX_NEEDED_TO 6
59	#define FROM_DIRECTION (dir == from_iso2022jp)
60	#define PREPARE_LOOP \
61	enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
62	enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
63	int save_set; \
64	int *setp = &data->__statep->__count;
65	#define EXTRA_LOOP_ARGS , var, setp
66
67
68	/ Direction of the transformation. /
69	enum direction
70	{
71	illegal_dir,
72	to_iso2022jp,
73	from_iso2022jp
74	};
75
76	/ We handle ISO-2022-jp and ISO-2022-JP-2 here. /
77	enum variant
78	{
79	illegal_var,
80	iso2022jp,
81	iso2022jp2
82	};
83
84
85	struct iso2022jp_data
86	{
87	enum direction dir;
88	enum variant var;
89	};
90
91
92	/ The COUNT element of the state keeps track of the currently selected*
93	character set. The possible values are: /*
94	enum
95	{
96	ASCII_set = `0`,
97	JISX0208_1978_set = `1` << `3`,
98	JISX0208_1983_set = `2` << `3`,
99	JISX0201_Roman_set = `3` << `3`,
100	JISX0201_Kana_set = `4` << `3`,
101	GB2312_set = `5` << `3`,
102	KSC5601_set = `6` << `3`,
103	JISX0212_set = `7` << `3`,
104	CURRENT_SEL_MASK = `7` << `3`
105	};
106
107	/ The second value stored is the designation of the G2 set. The following*
108	values are possible: /*
109	enum
110	{
111	UNSPECIFIED_set = `0`,
112	ISO88591_set = `1` << `6`,
113	ISO88597_set = `2` << `6`,
114	CURRENT_ASSIGN_MASK = `3` << `6`
115	};
116
117	/ The third value, only used during conversion from Unicode to ISO-2022-JP-2,*
118	describes the language tag parsing status. The possible values are as
119	follows. Values >= TAG_language are temporary tag parsing states. /*
120	enum
121	{
122	TAG_none = `0`,
123	TAG_language = `4` << `8`,
124	TAG_language_j = `5` << `8`,
125	TAG_language_ja = `1` << `8`,
126	TAG_language_k = `6` << `8`,
127	TAG_language_ko = `2` << `8`,
128	TAG_language_z = `7` << `8`,
129	TAG_language_zh = `3` << `8`,
130	CURRENT_TAG_MASK = `7` << `8`
131	};
132
133
134	extern int gconv_init (struct __gconv_step *step);
135	int
136	gconv_init (struct __gconv_step *step)
137	{
138	/ Determine which direction. /
139	struct iso2022jp_data *new_data;
140	enum direction dir = illegal_dir;
141	enum variant var = illegal_var;
142	int result;
143
144	if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == `0`)
145	{
146	dir = from_iso2022jp;
147	var = iso2022jp;
148	}
149	else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == `0`)
150	{
151	dir = to_iso2022jp;
152	var = iso2022jp;
153	}
154	else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == `0`)
155	{
156	dir = from_iso2022jp;
157	var = iso2022jp2;
158	}
159	else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == `0`)
160	{
161	dir = to_iso2022jp;
162	var = iso2022jp2;
163	}
164
165	result = __GCONV_NOCONV;
166	if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
167	{
168	new_data
169	= (struct iso2022jp_data ) malloc (sizeof* (struct iso2022jp_data));
170
171	result = __GCONV_NOMEM;
172	if (new_data != NULL)
173	{
174	new_data->dir = dir;
175	new_data->var = var;
176	step->__data = new_data;
177
178	if (dir == from_iso2022jp)
179	{
180	step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
181	step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
182	step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
183	step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
184	}
185	else
186	{
187	step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
188	step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
189	step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
190	step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
191	}
192
193	/ Yes, this is a stateful encoding. /
194	step->__stateful = `1`;
195
196	result = __GCONV_OK;
197	}
198	}
199
200	return result;
201	}
202
203
204	extern void gconv_end (struct __gconv_step *data);
205	void
206	gconv_end (struct __gconv_step *data)
207	{
208	free (data->__data);
209	}
210
211
212	/ Since this is a stateful encoding we have to provide code which resets*
213	the output state to the initial state. This has to be done during the
214	flushing. /*
215	#define EMIT_SHIFT_TO_INIT \
216	/* Avoid warning about unused variable 'var'. */ \
217	(void) var; \
218	\
219	if ((data->__statep->__count & ~7) != ASCII_set) \
220	{ \
221	if (dir == from_iso2022jp \
222	\|\| (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
223	{ \
224	/* It's easy, we don't have to emit anything, we just reset the \
225	state for the input. Note that this also clears the G2 \
226	designation. */ \
227	data->__statep->__count &= 7; \
228	data->__statep->__count \|= ASCII_set; \
229	} \
230	else \
231	{ \
232	/* We are not in the initial state. To switch back we have \
233	to emit the sequence `Esc ( B'. */ \
234	if (__glibc_unlikely (outbuf + 3 > outend)) \
235	/* We don't have enough room in the output buffer. */ \
236	status = __GCONV_FULL_OUTPUT; \
237	else \
238	{ \
239	/* Write out the shift sequence. */ \
240	*outbuf++ = ESC; \
241	*outbuf++ = '('; \
242	*outbuf++ = 'B'; \
243	/* Note that this also clears the G2 designation. */ \
244	data->__statep->__count &= 7; \
245	data->__statep->__count \|= ASCII_set; \
246	} \
247	} \
248	}
249
250
251	/ Since we might have to reset input pointer we must be able to save*
252	and retore the state. /*
253	#define SAVE_RESET_STATE(Save) \
254	if (Save) \
255	save_set = *setp; \
256	else \
257	*setp = save_set
258
259
260	/ First define the conversion function from ISO-2022-JP to UCS4. /
261	#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
262	#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
263	#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
264	#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
265	#define LOOPFCT FROM_LOOP
266	#define BODY \
267	{ \
268	uint32_t ch = *inptr; \
269	\
270	/* Recognize escape sequences. */ \
271	if (__builtin_expect (ch, 0) == ESC) \
272	{ \
273	/* We now must be prepared to read two to three more \
274	characters. If we have a match in the first character but \
275	then the input buffer ends we terminate with an error since \
276	we must not risk missing an escape sequence just because it \
277	is not entirely in the current input buffer. */ \
278	if (__builtin_expect (inptr + 2 >= inend, 0) \
279	\|\| (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
280	&& __builtin_expect (inptr + 3 >= inend, 0))) \
281	{ \
282	/* Not enough input available. */ \
283	result = __GCONV_INCOMPLETE_INPUT; \
284	break; \
285	} \
286	\
287	if (inptr[1] == '(') \
288	{ \
289	if (inptr[2] == 'B') \
290	{ \
291	/* ASCII selected. */ \
292	set = ASCII_set; \
293	inptr += 3; \
294	continue; \
295	} \
296	else if (inptr[2] == 'J') \
297	{ \
298	/* JIS X 0201 selected. */ \
299	set = JISX0201_Roman_set; \
300	inptr += 3; \
301	continue; \
302	} \
303	else if (var == iso2022jp2 && inptr[2] == 'I') \
304	{ \
305	/* JIS X 0201 selected. */ \
306	set = JISX0201_Kana_set; \
307	inptr += 3; \
308	continue; \
309	} \
310	} \
311	else if (inptr[1] == '$') \
312	{ \
313	if (inptr[2] == '@') \
314	{ \
315	/* JIS X 0208-1978 selected. */ \
316	set = JISX0208_1978_set; \
317	inptr += 3; \
318	continue; \
319	} \
320	else if (inptr[2] == 'B') \
321	{ \
322	/* JIS X 0208-1983 selected. */ \
323	set = JISX0208_1983_set; \
324	inptr += 3; \
325	continue; \
326	} \
327	else if (var == iso2022jp2) \
328	{ \
329	if (inptr[2] == 'A') \
330	{ \
331	/* GB 2312-1980 selected. */ \
332	set = GB2312_set; \
333	inptr += 3; \
334	continue; \
335	} \
336	else if (inptr[2] == '(') \
337	{ \
338	if (inptr[3] == 'C') \
339	{ \
340	/* KSC 5601-1987 selected. */ \
341	set = KSC5601_set; \
342	inptr += 4; \
343	continue; \
344	} \
345	else if (inptr[3] == 'D') \
346	{ \
347	/* JIS X 0212-1990 selected. */ \
348	set = JISX0212_set; \
349	inptr += 4; \
350	continue; \
351	} \
352	} \
353	} \
354	} \
355	else if (var == iso2022jp2 && inptr[1] == '.') \
356	{ \
357	if (inptr[2] == 'A') \
358	{ \
359	/* ISO 8859-1-GR selected. */ \
360	set2 = ISO88591_set; \
361	inptr += 3; \
362	continue; \
363	} \
364	else if (inptr[2] == 'F') \
365	{ \
366	/* ISO 8859-7-GR selected. */ \
367	set2 = ISO88597_set; \
368	inptr += 3; \
369	continue; \
370	} \
371	} \
372	} \
373	\
374	if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
375	{ \
376	if (set2 == ISO88591_set) \
377	{ \
378	ch = inptr[2] \| 0x80; \
379	inptr += 3; \
380	} \
381	else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
382	{ \
383	/* We use the table from the ISO 8859-7 module. */ \
384	if (inptr[2] < 0x20 \|\| inptr[2] >= 0x80) \
385	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
386	ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
387	if (ch == 0) \
388	STANDARD_FROM_LOOP_ERR_HANDLER (3); \
389	inptr += 3; \
390	} \
391	else \
392	{ \
393	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
394	} \
395	} \
396	else if (ch >= 0x80) \
397	{ \
398	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
399	} \
400	else if (set == ASCII_set \|\| (ch < 0x21 \|\| ch == 0x7f)) \
401	/* Almost done, just advance the input pointer. */ \
402	++inptr; \
403	else if (set == JISX0201_Roman_set) \
404	{ \
405	/* Use the JIS X 0201 table. */ \
406	ch = jisx0201_to_ucs4 (ch); \
407	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
408	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
409	++inptr; \
410	} \
411	else if (set == JISX0201_Kana_set) \
412	{ \
413	/* Use the JIS X 0201 table. */ \
414	ch = jisx0201_to_ucs4 (ch + 0x80); \
415	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
416	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
417	++inptr; \
418	} \
419	else \
420	{ \
421	if (set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
422	/* XXX I don't have the tables for these two old variants of \
423	JIS X 0208. Therefore I'm using the tables for JIS X \
424	0208-1990. If somebody has problems with this please \
425	provide the appropriate tables. */ \
426	ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
427	else if (set == JISX0212_set) \
428	/* Use the JIS X 0212 table. */ \
429	ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
430	else if (set == GB2312_set) \
431	/* Use the GB 2312 table. */ \
432	ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
433	else \
434	{ \
435	assert (set == KSC5601_set); \
436	\
437	/* Use the KSC 5601 table. */ \
438	ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
439	} \
440	\
441	if (__glibc_unlikely (ch == 0)) \
442	{ \
443	result = __GCONV_INCOMPLETE_INPUT; \
444	break; \
445	} \
446	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
447	{ \
448	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
449	} \
450	} \
451	\
452	put32 (outptr, ch); \
453	outptr += 4; \
454	}
455	#define LOOP_NEED_FLAGS
456	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
457	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
458	int set2 = *setp & CURRENT_ASSIGN_MASK
459	#define UPDATE_PARAMS *setp = set \| set2
460	#include <iconv/loop.c>
461
462
463	/ Next, define the other direction. /
464
465	enum conversion { none = `0`, european, japanese, chinese, korean, other };
466
467	/ A datatype for conversion lists. /
468	typedef unsigned int cvlist_t;
469	#define CVLIST(cv1, cv2, cv3, cv4, cv5) \
470	((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
471	#define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
472	#define CVLIST_REST(cvl) ((cvl) >> 3)
473	static const cvlist_t conversion_lists[`4`] =
474	{
475	/ TAG_none / CVLIST (japanese, european, chinese, korean, other),
476	/ TAG_language_ja / CVLIST (japanese, european, chinese, korean, other),
477	/ TAG_language_ko / CVLIST (korean, european, japanese, chinese, other),
478	/ TAG_language_zh / CVLIST (chinese, european, japanese, korean, other)
479	};
480
481	#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
482	#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
483	#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
484	#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
485	#define LOOPFCT TO_LOOP
486	#define BODY \
487	{ \
488	uint32_t ch; \
489	size_t written; \
490	\
491	ch = get32 (inptr); \
492	\
493	if (var == iso2022jp2) \
494	{ \
495	/* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
496	if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
497	{ \
498	ch &= 0x7f; \
499	if (ch >= 'A' && ch <= 'Z') \
500	ch += 'a' - 'A'; \
501	if (ch == 0x01) \
502	tag = TAG_language; \
503	else if (ch == 'j' && tag == TAG_language) \
504	tag = TAG_language_j; \
505	else if (ch == 'a' && tag == TAG_language_j) \
506	tag = TAG_language_ja; \
507	else if (ch == 'k' && tag == TAG_language) \
508	tag = TAG_language_k; \
509	else if (ch == 'o' && tag == TAG_language_k) \
510	tag = TAG_language_ko; \
511	else if (ch == 'z' && tag == TAG_language) \
512	tag = TAG_language_z; \
513	else if (ch == 'h' && tag == TAG_language_z) \
514	tag = TAG_language_zh; \
515	else if (ch == 0x7f) \
516	tag = TAG_none; \
517	else \
518	{ \
519	/* Other tag characters reset the tag parsing state (if the \
520	current state is a temporary state) or are ignored (if \
521	the current state is a stable one). */ \
522	if (tag >= TAG_language) \
523	tag = TAG_none; \
524	} \
525	\
526	inptr += 4; \
527	continue; \
528	} \
529	\
530	/* Non-tag characters reset the tag parsing state, if the current \
531	state is a temporary state. */ \
532	if (__glibc_unlikely (tag >= TAG_language)) \
533	tag = TAG_none; \
534	} \
535	\
536	/* First see whether we can write the character using the currently \
537	selected character set. But ignore the selected character set if \
538	the current language tag shows different preferences. */ \
539	if (set == ASCII_set) \
540	{ \
541	/* Please note that the NUL byte is not matched if we are not \
542	currently using the ASCII charset. This is because we must \
543	switch to the initial state whenever a NUL byte is written. */ \
544	if (ch <= 0x7f) \
545	{ \
546	*outptr++ = ch; \
547	written = 1; \
548	\
549	/* At the beginning of a line, G2 designation is cleared. */ \
550	if (var == iso2022jp2 && ch == 0x0a) \
551	set2 = UNSPECIFIED_set; \
552	} \
553	else \
554	written = __UNKNOWN_10646_CHAR; \
555	} \
556	/* ISO-2022-JP recommends to encode the newline character always in \
557	ASCII since this allows a context-free interpretation of the \
558	characters at the beginning of the next line. Otherwise it would \
559	have to be known whether the last line ended using ASCII or \
560	JIS X 0201. */ \
561	else if (set == JISX0201_Roman_set \
562	&& (__builtin_expect (tag == TAG_none, 1) \
563	\|\| tag == TAG_language_ja)) \
564	{ \
565	unsigned char buf[1]; \
566	written = ucs4_to_jisx0201 (ch, buf); \
567	if (written != __UNKNOWN_10646_CHAR) \
568	{ \
569	if (buf[0] > 0x20 && buf[0] < 0x80) \
570	{ \
571	*outptr++ = buf[0]; \
572	written = 1; \
573	} \
574	else \
575	written = __UNKNOWN_10646_CHAR; \
576	} \
577	} \
578	else if (set == JISX0201_Kana_set \
579	&& (__builtin_expect (tag == TAG_none, 1) \
580	\|\| tag == TAG_language_ja)) \
581	{ \
582	unsigned char buf[1]; \
583	written = ucs4_to_jisx0201 (ch, buf); \
584	if (written != __UNKNOWN_10646_CHAR) \
585	{ \
586	if (buf[0] > 0xa0 && buf[0] < 0xe0) \
587	{ \
588	*outptr++ = buf[0] - 0x80; \
589	written = 1; \
590	} \
591	else \
592	written = __UNKNOWN_10646_CHAR; \
593	} \
594	} \
595	else \
596	{ \
597	if ((set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
598	&& (__builtin_expect (tag == TAG_none, 1) \
599	\|\| tag == TAG_language_ja)) \
600	written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
601	else if (set == JISX0212_set \
602	&& (__builtin_expect (tag == TAG_none, 1) \
603	\|\| tag == TAG_language_ja)) \
604	written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
605	else if (set == GB2312_set \
606	&& (__builtin_expect (tag == TAG_none, 1) \
607	\|\| tag == TAG_language_zh)) \
608	written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
609	else if (set == KSC5601_set \
610	&& (__builtin_expect (tag == TAG_none, 1) \
611	\|\| tag == TAG_language_ko)) \
612	written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
613	else \
614	written = __UNKNOWN_10646_CHAR; \
615	\
616	if (__glibc_unlikely (written == 0)) \
617	{ \
618	result = __GCONV_FULL_OUTPUT; \
619	break; \
620	} \
621	else if (written != __UNKNOWN_10646_CHAR) \
622	outptr += written; \
623	} \
624	\
625	if (written == __UNKNOWN_10646_CHAR \
626	&& __builtin_expect (tag == TAG_none, 1)) \
627	{ \
628	if (set2 == ISO88591_set) \
629	{ \
630	if (ch >= 0x80 && ch <= 0xff) \
631	{ \
632	if (__glibc_unlikely (outptr + 3 > outend)) \
633	{ \
634	result = __GCONV_FULL_OUTPUT; \
635	break; \
636	} \
637	\
638	*outptr++ = ESC; \
639	*outptr++ = 'N'; \
640	*outptr++ = ch & 0x7f; \
641	written = 3; \
642	} \
643	} \
644	else if (set2 == ISO88597_set) \
645	{ \
646	if (__glibc_likely (ch < 0xffff)) \
647	{ \
648	const struct gap *rp = from_idx; \
649	\
650	while (ch > rp->end) \
651	++rp; \
652	if (ch >= rp->start) \
653	{ \
654	unsigned char res = \
655	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
656	if (res != '\0') \
657	{ \
658	if (__glibc_unlikely (outptr + 3 > outend)) \
659	{ \
660	result = __GCONV_FULL_OUTPUT; \
661	break; \
662	} \
663	\
664	*outptr++ = ESC; \
665	*outptr++ = 'N'; \
666	*outptr++ = res & 0x7f; \
667	written = 3; \
668	} \
669	} \
670	} \
671	} \
672	} \
673	\
674	if (written == __UNKNOWN_10646_CHAR) \
675	{ \
676	/* The attempts to use the currently selected character set \
677	failed, either because the language tag changed, or because \
678	the character requires a different character set, or because \
679	the character is unknown. \
680	The CJK character sets partially overlap when seen as subsets \
681	of ISO 10646; therefore there is no single correct result. \
682	We use a preferrence order which depends on the language tag. */ \
683	\
684	if (ch <= 0x7f) \
685	{ \
686	/* We must encode using ASCII. First write out the \
687	escape sequence. */ \
688	if (__glibc_unlikely (outptr + 3 > outend)) \
689	{ \
690	result = __GCONV_FULL_OUTPUT; \
691	break; \
692	} \
693	\
694	*outptr++ = ESC; \
695	*outptr++ = '('; \
696	*outptr++ = 'B'; \
697	set = ASCII_set; \
698	\
699	if (__glibc_unlikely (outptr + 1 > outend)) \
700	{ \
701	result = __GCONV_FULL_OUTPUT; \
702	break; \
703	} \
704	*outptr++ = ch; \
705	\
706	/* At the beginning of a line, G2 designation is cleared. */ \
707	if (var == iso2022jp2 && ch == 0x0a) \
708	set2 = UNSPECIFIED_set; \
709	} \
710	else \
711	{ \
712	/* Now it becomes difficult. We must search the other \
713	character sets one by one. Use an ordered conversion \
714	list that depends on the current language tag. */ \
715	cvlist_t conversion_list; \
716	unsigned char buf[2]; \
717	int res = __GCONV_ILLEGAL_INPUT; \
718	\
719	if (var == iso2022jp2) \
720	conversion_list = conversion_lists[tag >> 8]; \
721	else \
722	conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
723	\
724	do \
725	switch (CVLIST_FIRST (conversion_list)) \
726	{ \
727	case european: \
728	\
729	/* Try ISO 8859-1 upper half. */ \
730	if (ch >= 0x80 && ch <= 0xff) \
731	{ \
732	if (set2 != ISO88591_set) \
733	{ \
734	if (__builtin_expect (outptr + 3 > outend, 0)) \
735	{ \
736	res = __GCONV_FULL_OUTPUT; \
737	break; \
738	} \
739	*outptr++ = ESC; \
740	*outptr++ = '.'; \
741	*outptr++ = 'A'; \
742	set2 = ISO88591_set; \
743	} \
744	\
745	if (__glibc_unlikely (outptr + 3 > outend)) \
746	{ \
747	res = __GCONV_FULL_OUTPUT; \
748	break; \
749	} \
750	*outptr++ = ESC; \
751	*outptr++ = 'N'; \
752	*outptr++ = ch - 0x80; \
753	res = __GCONV_OK; \
754	break; \
755	} \
756	\
757	/* Try ISO 8859-7 upper half. */ \
758	if (__glibc_likely (ch < 0xffff)) \
759	{ \
760	const struct gap *rp = from_idx; \
761	\
762	while (ch > rp->end) \
763	++rp; \
764	if (ch >= rp->start) \
765	{ \
766	unsigned char ch2 = \
767	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
768	if (ch2 != '\0') \
769	{ \
770	if (set2 != ISO88597_set) \
771	{ \
772	if (__builtin_expect (outptr + 3 > outend, \
773	0)) \
774	{ \
775	res = __GCONV_FULL_OUTPUT; \
776	break; \
777	} \
778	*outptr++ = ESC; \
779	*outptr++ = '.'; \
780	*outptr++ = 'F'; \
781	set2 = ISO88597_set; \
782	} \
783	\
784	if (__builtin_expect (outptr + 3 > outend, 0)) \
785	{ \
786	res = __GCONV_FULL_OUTPUT; \
787	break; \
788	} \
789	*outptr++ = ESC; \
790	*outptr++ = 'N'; \
791	*outptr++ = ch2 - 0x80; \
792	res = __GCONV_OK; \
793	break; \
794	} \
795	} \
796	} \
797	\
798	break; \
799	\
800	case japanese: \
801	\
802	/* Try JIS X 0201 Roman. */ \
803	written = ucs4_to_jisx0201 (ch, buf); \
804	if (written != __UNKNOWN_10646_CHAR \
805	&& buf[0] > 0x20 && buf[0] < 0x80) \
806	{ \
807	if (set != JISX0201_Roman_set) \
808	{ \
809	if (__builtin_expect (outptr + 3 > outend, 0)) \
810	{ \
811	res = __GCONV_FULL_OUTPUT; \
812	break; \
813	} \
814	*outptr++ = ESC; \
815	*outptr++ = '('; \
816	*outptr++ = 'J'; \
817	set = JISX0201_Roman_set; \
818	} \
819	\
820	if (__glibc_unlikely (outptr + 1 > outend)) \
821	{ \
822	res = __GCONV_FULL_OUTPUT; \
823	break; \
824	} \
825	*outptr++ = buf[0]; \
826	res = __GCONV_OK; \
827	break; \
828	} \
829	\
830	/* Try JIS X 0208. */ \
831	written = ucs4_to_jisx0208 (ch, buf, 2); \
832	if (written != __UNKNOWN_10646_CHAR) \
833	{ \
834	if (set != JISX0208_1983_set) \
835	{ \
836	if (__builtin_expect (outptr + 3 > outend, 0)) \
837	{ \
838	res = __GCONV_FULL_OUTPUT; \
839	break; \
840	} \
841	*outptr++ = ESC; \
842	*outptr++ = '$'; \
843	*outptr++ = 'B'; \
844	set = JISX0208_1983_set; \
845	} \
846	\
847	if (__glibc_unlikely (outptr + 2 > outend)) \
848	{ \
849	res = __GCONV_FULL_OUTPUT; \
850	break; \
851	} \
852	*outptr++ = buf[0]; \
853	*outptr++ = buf[1]; \
854	res = __GCONV_OK; \
855	break; \
856	} \
857	\
858	if (__glibc_unlikely (var == iso2022jp)) \
859	/* Don't use the other Japanese character sets. */ \
860	break; \
861	\
862	/* Try JIS X 0212. */ \
863	written = ucs4_to_jisx0212 (ch, buf, 2); \
864	if (written != __UNKNOWN_10646_CHAR) \
865	{ \
866	if (set != JISX0212_set) \
867	{ \
868	if (__builtin_expect (outptr + 4 > outend, 0)) \
869	{ \
870	res = __GCONV_FULL_OUTPUT; \
871	break; \
872	} \
873	*outptr++ = ESC; \
874	*outptr++ = '$'; \
875	*outptr++ = '('; \
876	*outptr++ = 'D'; \
877	set = JISX0212_set; \
878	} \
879	\
880	if (__glibc_unlikely (outptr + 2 > outend)) \
881	{ \
882	res = __GCONV_FULL_OUTPUT; \
883	break; \
884	} \
885	*outptr++ = buf[0]; \
886	*outptr++ = buf[1]; \
887	res = __GCONV_OK; \
888	break; \
889	} \
890	\
891	break; \
892	\
893	case chinese: \
894	assert (var == iso2022jp2); \
895	\
896	/* Try GB 2312. */ \
897	written = ucs4_to_gb2312 (ch, buf, 2); \
898	if (written != __UNKNOWN_10646_CHAR) \
899	{ \
900	if (set != GB2312_set) \
901	{ \
902	if (__builtin_expect (outptr + 3 > outend, 0)) \
903	{ \
904	res = __GCONV_FULL_OUTPUT; \
905	break; \
906	} \
907	*outptr++ = ESC; \
908	*outptr++ = '$'; \
909	*outptr++ = 'A'; \
910	set = GB2312_set; \
911	} \
912	\
913	if (__glibc_unlikely (outptr + 2 > outend)) \
914	{ \
915	res = __GCONV_FULL_OUTPUT; \
916	break; \
917	} \
918	*outptr++ = buf[0]; \
919	*outptr++ = buf[1]; \
920	res = __GCONV_OK; \
921	break; \
922	} \
923	\
924	break; \
925	\
926	case korean: \
927	assert (var == iso2022jp2); \
928	\
929	/* Try KSC 5601. */ \
930	written = ucs4_to_ksc5601 (ch, buf, 2); \
931	if (written != __UNKNOWN_10646_CHAR) \
932	{ \
933	if (set != KSC5601_set) \
934	{ \
935	if (__builtin_expect (outptr + 4 > outend, 0)) \
936	{ \
937	res = __GCONV_FULL_OUTPUT; \
938	break; \
939	} \
940	*outptr++ = ESC; \
941	*outptr++ = '$'; \
942	*outptr++ = '('; \
943	*outptr++ = 'C'; \
944	set = KSC5601_set; \
945	} \
946	\
947	if (__glibc_unlikely (outptr + 2 > outend)) \
948	{ \
949	res = __GCONV_FULL_OUTPUT; \
950	break; \
951	} \
952	*outptr++ = buf[0]; \
953	*outptr++ = buf[1]; \
954	res = __GCONV_OK; \
955	break; \
956	} \
957	\
958	break; \
959	\
960	case other: \
961	assert (var == iso2022jp2); \
962	\
963	/* Try JIS X 0201 Kana. This is not officially part \
964	of ISO-2022-JP-2, according to RFC 1554. Therefore \
965	we try this only after all other attempts. */ \
966	written = ucs4_to_jisx0201 (ch, buf); \
967	if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
968	{ \
969	if (set != JISX0201_Kana_set) \
970	{ \
971	if (__builtin_expect (outptr + 3 > outend, 0)) \
972	{ \
973	res = __GCONV_FULL_OUTPUT; \
974	break; \
975	} \
976	*outptr++ = ESC; \
977	*outptr++ = '('; \
978	*outptr++ = 'I'; \
979	set = JISX0201_Kana_set; \
980	} \
981	\
982	if (__glibc_unlikely (outptr + 1 > outend)) \
983	{ \
984	res = __GCONV_FULL_OUTPUT; \
985	break; \
986	} \
987	*outptr++ = buf[0] - 0x80; \
988	res = __GCONV_OK; \
989	break; \
990	} \
991	\
992	break; \
993	\
994	default: \
995	abort (); \
996	} \
997	while (res == __GCONV_ILLEGAL_INPUT \
998	&& (conversion_list = CVLIST_REST (conversion_list)) != 0);\
999	\
1000	if (res == __GCONV_FULL_OUTPUT) \
1001	{ \
1002	result = res; \
1003	break; \
1004	} \
1005	\
1006	if (res == __GCONV_ILLEGAL_INPUT) \
1007	{ \
1008	STANDARD_TO_LOOP_ERR_HANDLER (4); \
1009	} \
1010	} \
1011	} \
1012	\
1013	/* Now that we wrote the output increment the input pointer. */ \
1014	inptr += 4; \
1015	}
1016	#define LOOP_NEED_FLAGS
1017	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
1018	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1019	int set2 = *setp & CURRENT_ASSIGN_MASK; \
1020	int tag = *setp & CURRENT_TAG_MASK;
1021	#define REINIT_PARAMS do \
1022	{ \
1023	set = *setp & CURRENT_SEL_MASK; \
1024	set2 = *setp & CURRENT_ASSIGN_MASK; \
1025	tag = *setp & CURRENT_TAG_MASK; \
1026	} \
1027	while (0)
1028	#define UPDATE_PARAMS *setp = set \| set2 \| tag
1029	#include <iconv/loop.c>
1030
1031
1032	/ Now define the toplevel functions. /
1033	#include <iconv/skeleton.c>
1034

Browse the source code of glibc/iconvdata/iso-2022-jp.c