iso-2022-jp.c source code [glibc/iconvdata/iso-2022-jp.c]

1	/ Conversion module for ISO-2022-JP and ISO-2022-JP-2.*
2	Copyright (C) 1998-2021 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <assert.h>
21	#include <dlfcn.h>
22	#include <gconv.h>
23	#include <stdint.h>
24	#include <stdlib.h>
25	#include <string.h>
26	#include "jis0201.h"
27	#include "jis0208.h"
28	#include "jis0212.h"
29	#include "gb2312.h"
30	#include "ksc5601.h"
31
32	struct gap
33	{
34	uint16_t start;
35	uint16_t end;
36	int32_t idx;
37	};
38
39	#include "iso8859-7jp.h"
40
41	/ This makes obvious what everybody knows: 0x1b is the Esc character. /
42	#define ESC 0x1b
43
44	/ We provide our own initialization and destructor function. /
45	#define DEFINE_INIT 0
46	#define DEFINE_FINI 0
47
48	/ Definitions used in the body of the `gconv' function. /
49	#define FROM_LOOP from_iso2022jp_loop
50	#define TO_LOOP to_iso2022jp_loop
51	#define ONE_DIRECTION 0
52	#define FROM_LOOP_MIN_NEEDED_FROM 1
53	#define FROM_LOOP_MAX_NEEDED_FROM 4
54	#define FROM_LOOP_MIN_NEEDED_TO 4
55	#define FROM_LOOP_MAX_NEEDED_TO 4
56	#define TO_LOOP_MIN_NEEDED_FROM 4
57	#define TO_LOOP_MAX_NEEDED_FROM 4
58	#define TO_LOOP_MIN_NEEDED_TO 1
59	#define TO_LOOP_MAX_NEEDED_TO 6
60	#define FROM_DIRECTION (dir == from_iso2022jp)
61	#define PREPARE_LOOP \
62	enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
63	enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
64	int save_set; \
65	int *setp = &data->__statep->__count;
66	#define EXTRA_LOOP_ARGS , var, setp
67
68
69	/ Direction of the transformation. /
70	enum direction
71	{
72	illegal_dir,
73	to_iso2022jp,
74	from_iso2022jp
75	};
76
77	/ We handle ISO-2022-jp and ISO-2022-JP-2 here. /
78	enum variant
79	{
80	illegal_var,
81	iso2022jp,
82	iso2022jp2
83	};
84
85
86	struct iso2022jp_data
87	{
88	enum direction dir;
89	enum variant var;
90	};
91
92
93	/ The COUNT element of the state keeps track of the currently selected*
94	character set. The possible values are: /*
95	enum
96	{
97	ASCII_set = `0`,
98	JISX0208_1978_set = `1` << `3`,
99	JISX0208_1983_set = `2` << `3`,
100	JISX0201_Roman_set = `3` << `3`,
101	JISX0201_Kana_set = `4` << `3`,
102	GB2312_set = `5` << `3`,
103	KSC5601_set = `6` << `3`,
104	JISX0212_set = `7` << `3`,
105	CURRENT_SEL_MASK = `7` << `3`
106	};
107
108	/ The second value stored is the designation of the G2 set. The following*
109	values are possible: /*
110	enum
111	{
112	UNSPECIFIED_set = `0`,
113	ISO88591_set = `1` << `6`,
114	ISO88597_set = `2` << `6`,
115	CURRENT_ASSIGN_MASK = `3` << `6`
116	};
117
118	/ The third value, only used during conversion from Unicode to ISO-2022-JP-2,*
119	describes the language tag parsing status. The possible values are as
120	follows. Values >= TAG_language are temporary tag parsing states. /*
121	enum
122	{
123	TAG_none = `0`,
124	TAG_language = `4` << `8`,
125	TAG_language_j = `5` << `8`,
126	TAG_language_ja = `1` << `8`,
127	TAG_language_k = `6` << `8`,
128	TAG_language_ko = `2` << `8`,
129	TAG_language_z = `7` << `8`,
130	TAG_language_zh = `3` << `8`,
131	CURRENT_TAG_MASK = `7` << `8`
132	};
133
134
135	extern int gconv_init (struct __gconv_step *step);
136	int
137	gconv_init (struct __gconv_step *step)
138	{
139	/ Determine which direction. /
140	struct iso2022jp_data *new_data;
141	enum direction dir = illegal_dir;
142	enum variant var = illegal_var;
143	int result;
144
145	if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == `0`)
146	{
147	dir = from_iso2022jp;
148	var = iso2022jp;
149	}
150	else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == `0`)
151	{
152	dir = to_iso2022jp;
153	var = iso2022jp;
154	}
155	else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == `0`)
156	{
157	dir = from_iso2022jp;
158	var = iso2022jp2;
159	}
160	else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == `0`)
161	{
162	dir = to_iso2022jp;
163	var = iso2022jp2;
164	}
165
166	result = __GCONV_NOCONV;
167	if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
168	{
169	new_data
170	= (struct iso2022jp_data ) malloc (sizeof* (struct iso2022jp_data));
171
172	result = __GCONV_NOMEM;
173	if (new_data != NULL)
174	{
175	new_data->dir = dir;
176	new_data->var = var;
177	step->__data = new_data;
178
179	if (dir == from_iso2022jp)
180	{
181	step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
182	step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
183	step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
184	step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
185	}
186	else
187	{
188	step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
189	step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
190	step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
191	step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
192	}
193
194	/ Yes, this is a stateful encoding. /
195	step->__stateful = `1`;
196
197	result = __GCONV_OK;
198	}
199	}
200
201	return result;
202	}
203
204
205	extern void gconv_end (struct __gconv_step *data);
206	void
207	gconv_end (struct __gconv_step *data)
208	{
209	free (data->__data);
210	}
211
212
213	/ Since this is a stateful encoding we have to provide code which resets*
214	the output state to the initial state. This has to be done during the
215	flushing. /*
216	#define EMIT_SHIFT_TO_INIT \
217	/* Avoid warning about unused variable 'var'. */ \
218	(void) var; \
219	\
220	if ((data->__statep->__count & ~7) != ASCII_set) \
221	{ \
222	if (dir == from_iso2022jp \
223	\|\| (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
224	{ \
225	/* It's easy, we don't have to emit anything, we just reset the \
226	state for the input. Note that this also clears the G2 \
227	designation. */ \
228	data->__statep->__count &= 7; \
229	data->__statep->__count \|= ASCII_set; \
230	} \
231	else \
232	{ \
233	/* We are not in the initial state. To switch back we have \
234	to emit the sequence `Esc ( B'. */ \
235	if (__glibc_unlikely (outbuf + 3 > outend)) \
236	/* We don't have enough room in the output buffer. */ \
237	status = __GCONV_FULL_OUTPUT; \
238	else \
239	{ \
240	/* Write out the shift sequence. */ \
241	*outbuf++ = ESC; \
242	*outbuf++ = '('; \
243	*outbuf++ = 'B'; \
244	/* Note that this also clears the G2 designation. */ \
245	data->__statep->__count &= 7; \
246	data->__statep->__count \|= ASCII_set; \
247	} \
248	} \
249	}
250
251
252	/ Since we might have to reset input pointer we must be able to save*
253	and retore the state. /*
254	#define SAVE_RESET_STATE(Save) \
255	if (Save) \
256	save_set = *setp; \
257	else \
258	*setp = save_set
259
260
261	/ First define the conversion function from ISO-2022-JP to UCS4. /
262	#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
263	#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
264	#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
265	#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
266	#define LOOPFCT FROM_LOOP
267	#define BODY \
268	{ \
269	uint32_t ch = *inptr; \
270	\
271	/* Recognize escape sequences. */ \
272	if (__builtin_expect (ch, 0) == ESC) \
273	{ \
274	/* We now must be prepared to read two to three more \
275	characters. If we have a match in the first character but \
276	then the input buffer ends we terminate with an error since \
277	we must not risk missing an escape sequence just because it \
278	is not entirely in the current input buffer. */ \
279	if (__builtin_expect (inptr + 2 >= inend, 0) \
280	\|\| (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
281	&& __builtin_expect (inptr + 3 >= inend, 0))) \
282	{ \
283	/* Not enough input available. */ \
284	result = __GCONV_INCOMPLETE_INPUT; \
285	break; \
286	} \
287	\
288	if (inptr[1] == '(') \
289	{ \
290	if (inptr[2] == 'B') \
291	{ \
292	/* ASCII selected. */ \
293	set = ASCII_set; \
294	inptr += 3; \
295	continue; \
296	} \
297	else if (inptr[2] == 'J') \
298	{ \
299	/* JIS X 0201 selected. */ \
300	set = JISX0201_Roman_set; \
301	inptr += 3; \
302	continue; \
303	} \
304	else if (var == iso2022jp2 && inptr[2] == 'I') \
305	{ \
306	/* JIS X 0201 selected. */ \
307	set = JISX0201_Kana_set; \
308	inptr += 3; \
309	continue; \
310	} \
311	} \
312	else if (inptr[1] == '$') \
313	{ \
314	if (inptr[2] == '@') \
315	{ \
316	/* JIS X 0208-1978 selected. */ \
317	set = JISX0208_1978_set; \
318	inptr += 3; \
319	continue; \
320	} \
321	else if (inptr[2] == 'B') \
322	{ \
323	/* JIS X 0208-1983 selected. */ \
324	set = JISX0208_1983_set; \
325	inptr += 3; \
326	continue; \
327	} \
328	else if (var == iso2022jp2) \
329	{ \
330	if (inptr[2] == 'A') \
331	{ \
332	/* GB 2312-1980 selected. */ \
333	set = GB2312_set; \
334	inptr += 3; \
335	continue; \
336	} \
337	else if (inptr[2] == '(') \
338	{ \
339	if (inptr[3] == 'C') \
340	{ \
341	/* KSC 5601-1987 selected. */ \
342	set = KSC5601_set; \
343	inptr += 4; \
344	continue; \
345	} \
346	else if (inptr[3] == 'D') \
347	{ \
348	/* JIS X 0212-1990 selected. */ \
349	set = JISX0212_set; \
350	inptr += 4; \
351	continue; \
352	} \
353	} \
354	} \
355	} \
356	else if (var == iso2022jp2 && inptr[1] == '.') \
357	{ \
358	if (inptr[2] == 'A') \
359	{ \
360	/* ISO 8859-1-GR selected. */ \
361	set2 = ISO88591_set; \
362	inptr += 3; \
363	continue; \
364	} \
365	else if (inptr[2] == 'F') \
366	{ \
367	/* ISO 8859-7-GR selected. */ \
368	set2 = ISO88597_set; \
369	inptr += 3; \
370	continue; \
371	} \
372	} \
373	} \
374	\
375	if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
376	{ \
377	if (set2 == ISO88591_set) \
378	{ \
379	ch = inptr[2] \| 0x80; \
380	inptr += 3; \
381	} \
382	else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
383	{ \
384	/* We use the table from the ISO 8859-7 module. */ \
385	if (inptr[2] < 0x20 \|\| inptr[2] >= 0x80) \
386	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
387	ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
388	if (ch == 0) \
389	STANDARD_FROM_LOOP_ERR_HANDLER (3); \
390	inptr += 3; \
391	} \
392	else \
393	{ \
394	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
395	} \
396	} \
397	else if (ch >= 0x80) \
398	{ \
399	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
400	} \
401	else if (set == ASCII_set \|\| (ch < 0x21 \|\| ch == 0x7f)) \
402	/* Almost done, just advance the input pointer. */ \
403	++inptr; \
404	else if (set == JISX0201_Roman_set) \
405	{ \
406	/* Use the JIS X 0201 table. */ \
407	ch = jisx0201_to_ucs4 (ch); \
408	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
409	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
410	++inptr; \
411	} \
412	else if (set == JISX0201_Kana_set) \
413	{ \
414	/* Use the JIS X 0201 table. */ \
415	ch = jisx0201_to_ucs4 (ch + 0x80); \
416	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
417	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
418	++inptr; \
419	} \
420	else \
421	{ \
422	if (set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
423	/* XXX I don't have the tables for these two old variants of \
424	JIS X 0208. Therefore I'm using the tables for JIS X \
425	0208-1990. If somebody has problems with this please \
426	provide the appropriate tables. */ \
427	ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
428	else if (set == JISX0212_set) \
429	/* Use the JIS X 0212 table. */ \
430	ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
431	else if (set == GB2312_set) \
432	/* Use the GB 2312 table. */ \
433	ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
434	else \
435	{ \
436	assert (set == KSC5601_set); \
437	\
438	/* Use the KSC 5601 table. */ \
439	ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
440	} \
441	\
442	if (__glibc_unlikely (ch == 0)) \
443	{ \
444	result = __GCONV_INCOMPLETE_INPUT; \
445	break; \
446	} \
447	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
448	{ \
449	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
450	} \
451	} \
452	\
453	put32 (outptr, ch); \
454	outptr += 4; \
455	}
456	#define LOOP_NEED_FLAGS
457	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
458	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
459	int set2 = *setp & CURRENT_ASSIGN_MASK
460	#define UPDATE_PARAMS *setp = set \| set2
461	#include <iconv/loop.c>
462
463
464	/ Next, define the other direction. /
465
466	enum conversion { none = `0`, european, japanese, chinese, korean, other };
467
468	/ A datatype for conversion lists. /
469	typedef unsigned int cvlist_t;
470	#define CVLIST(cv1, cv2, cv3, cv4, cv5) \
471	((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
472	#define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
473	#define CVLIST_REST(cvl) ((cvl) >> 3)
474	static const cvlist_t conversion_lists[`4`] =
475	{
476	/ TAG_none / CVLIST (japanese, european, chinese, korean, other),
477	/ TAG_language_ja / CVLIST (japanese, european, chinese, korean, other),
478	/ TAG_language_ko / CVLIST (korean, european, japanese, chinese, other),
479	/ TAG_language_zh / CVLIST (chinese, european, japanese, korean, other)
480	};
481
482	#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
483	#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
484	#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
485	#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
486	#define LOOPFCT TO_LOOP
487	#define BODY \
488	{ \
489	uint32_t ch; \
490	size_t written; \
491	\
492	ch = get32 (inptr); \
493	\
494	if (var == iso2022jp2) \
495	{ \
496	/* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
497	if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
498	{ \
499	ch &= 0x7f; \
500	if (ch >= 'A' && ch <= 'Z') \
501	ch += 'a' - 'A'; \
502	if (ch == 0x01) \
503	tag = TAG_language; \
504	else if (ch == 'j' && tag == TAG_language) \
505	tag = TAG_language_j; \
506	else if (ch == 'a' && tag == TAG_language_j) \
507	tag = TAG_language_ja; \
508	else if (ch == 'k' && tag == TAG_language) \
509	tag = TAG_language_k; \
510	else if (ch == 'o' && tag == TAG_language_k) \
511	tag = TAG_language_ko; \
512	else if (ch == 'z' && tag == TAG_language) \
513	tag = TAG_language_z; \
514	else if (ch == 'h' && tag == TAG_language_z) \
515	tag = TAG_language_zh; \
516	else if (ch == 0x7f) \
517	tag = TAG_none; \
518	else \
519	{ \
520	/* Other tag characters reset the tag parsing state (if the \
521	current state is a temporary state) or are ignored (if \
522	the current state is a stable one). */ \
523	if (tag >= TAG_language) \
524	tag = TAG_none; \
525	} \
526	\
527	inptr += 4; \
528	continue; \
529	} \
530	\
531	/* Non-tag characters reset the tag parsing state, if the current \
532	state is a temporary state. */ \
533	if (__glibc_unlikely (tag >= TAG_language)) \
534	tag = TAG_none; \
535	} \
536	\
537	/* First see whether we can write the character using the currently \
538	selected character set. But ignore the selected character set if \
539	the current language tag shows different preferences. */ \
540	if (set == ASCII_set) \
541	{ \
542	/* Please note that the NUL byte is not matched if we are not \
543	currently using the ASCII charset. This is because we must \
544	switch to the initial state whenever a NUL byte is written. */ \
545	if (ch <= 0x7f) \
546	{ \
547	*outptr++ = ch; \
548	written = 1; \
549	\
550	/* At the beginning of a line, G2 designation is cleared. */ \
551	if (var == iso2022jp2 && ch == 0x0a) \
552	set2 = UNSPECIFIED_set; \
553	} \
554	else \
555	written = __UNKNOWN_10646_CHAR; \
556	} \
557	/* ISO-2022-JP recommends to encode the newline character always in \
558	ASCII since this allows a context-free interpretation of the \
559	characters at the beginning of the next line. Otherwise it would \
560	have to be known whether the last line ended using ASCII or \
561	JIS X 0201. */ \
562	else if (set == JISX0201_Roman_set \
563	&& (__builtin_expect (tag == TAG_none, 1) \
564	\|\| tag == TAG_language_ja)) \
565	{ \
566	unsigned char buf[1]; \
567	written = ucs4_to_jisx0201 (ch, buf); \
568	if (written != __UNKNOWN_10646_CHAR) \
569	{ \
570	if (buf[0] > 0x20 && buf[0] < 0x80) \
571	{ \
572	*outptr++ = buf[0]; \
573	written = 1; \
574	} \
575	else \
576	written = __UNKNOWN_10646_CHAR; \
577	} \
578	} \
579	else if (set == JISX0201_Kana_set \
580	&& (__builtin_expect (tag == TAG_none, 1) \
581	\|\| tag == TAG_language_ja)) \
582	{ \
583	unsigned char buf[1]; \
584	written = ucs4_to_jisx0201 (ch, buf); \
585	if (written != __UNKNOWN_10646_CHAR) \
586	{ \
587	if (buf[0] > 0xa0 && buf[0] < 0xe0) \
588	{ \
589	*outptr++ = buf[0] - 0x80; \
590	written = 1; \
591	} \
592	else \
593	written = __UNKNOWN_10646_CHAR; \
594	} \
595	} \
596	else \
597	{ \
598	if ((set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
599	&& (__builtin_expect (tag == TAG_none, 1) \
600	\|\| tag == TAG_language_ja)) \
601	written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
602	else if (set == JISX0212_set \
603	&& (__builtin_expect (tag == TAG_none, 1) \
604	\|\| tag == TAG_language_ja)) \
605	written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
606	else if (set == GB2312_set \
607	&& (__builtin_expect (tag == TAG_none, 1) \
608	\|\| tag == TAG_language_zh)) \
609	written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
610	else if (set == KSC5601_set \
611	&& (__builtin_expect (tag == TAG_none, 1) \
612	\|\| tag == TAG_language_ko)) \
613	written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
614	else \
615	written = __UNKNOWN_10646_CHAR; \
616	\
617	if (__glibc_unlikely (written == 0)) \
618	{ \
619	result = __GCONV_FULL_OUTPUT; \
620	break; \
621	} \
622	else if (written != __UNKNOWN_10646_CHAR) \
623	outptr += written; \
624	} \
625	\
626	if (written == __UNKNOWN_10646_CHAR \
627	&& __builtin_expect (tag == TAG_none, 1)) \
628	{ \
629	if (set2 == ISO88591_set) \
630	{ \
631	if (ch >= 0x80 && ch <= 0xff) \
632	{ \
633	if (__glibc_unlikely (outptr + 3 > outend)) \
634	{ \
635	result = __GCONV_FULL_OUTPUT; \
636	break; \
637	} \
638	\
639	*outptr++ = ESC; \
640	*outptr++ = 'N'; \
641	*outptr++ = ch & 0x7f; \
642	written = 3; \
643	} \
644	} \
645	else if (set2 == ISO88597_set) \
646	{ \
647	if (__glibc_likely (ch < 0xffff)) \
648	{ \
649	const struct gap *rp = from_idx; \
650	\
651	while (ch > rp->end) \
652	++rp; \
653	if (ch >= rp->start) \
654	{ \
655	unsigned char res = \
656	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
657	if (res != '\0') \
658	{ \
659	if (__glibc_unlikely (outptr + 3 > outend)) \
660	{ \
661	result = __GCONV_FULL_OUTPUT; \
662	break; \
663	} \
664	\
665	*outptr++ = ESC; \
666	*outptr++ = 'N'; \
667	*outptr++ = res & 0x7f; \
668	written = 3; \
669	} \
670	} \
671	} \
672	} \
673	} \
674	\
675	if (written == __UNKNOWN_10646_CHAR) \
676	{ \
677	/* The attempts to use the currently selected character set \
678	failed, either because the language tag changed, or because \
679	the character requires a different character set, or because \
680	the character is unknown. \
681	The CJK character sets partially overlap when seen as subsets \
682	of ISO 10646; therefore there is no single correct result. \
683	We use a preferrence order which depends on the language tag. */ \
684	\
685	if (ch <= 0x7f) \
686	{ \
687	/* We must encode using ASCII. First write out the \
688	escape sequence. */ \
689	if (__glibc_unlikely (outptr + 3 > outend)) \
690	{ \
691	result = __GCONV_FULL_OUTPUT; \
692	break; \
693	} \
694	\
695	*outptr++ = ESC; \
696	*outptr++ = '('; \
697	*outptr++ = 'B'; \
698	set = ASCII_set; \
699	\
700	if (__glibc_unlikely (outptr + 1 > outend)) \
701	{ \
702	result = __GCONV_FULL_OUTPUT; \
703	break; \
704	} \
705	*outptr++ = ch; \
706	\
707	/* At the beginning of a line, G2 designation is cleared. */ \
708	if (var == iso2022jp2 && ch == 0x0a) \
709	set2 = UNSPECIFIED_set; \
710	} \
711	else \
712	{ \
713	/* Now it becomes difficult. We must search the other \
714	character sets one by one. Use an ordered conversion \
715	list that depends on the current language tag. */ \
716	cvlist_t conversion_list; \
717	unsigned char buf[2]; \
718	int res = __GCONV_ILLEGAL_INPUT; \
719	\
720	if (var == iso2022jp2) \
721	conversion_list = conversion_lists[tag >> 8]; \
722	else \
723	conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
724	\
725	do \
726	switch (CVLIST_FIRST (conversion_list)) \
727	{ \
728	case european: \
729	\
730	/* Try ISO 8859-1 upper half. */ \
731	if (ch >= 0x80 && ch <= 0xff) \
732	{ \
733	if (set2 != ISO88591_set) \
734	{ \
735	if (__builtin_expect (outptr + 3 > outend, 0)) \
736	{ \
737	res = __GCONV_FULL_OUTPUT; \
738	break; \
739	} \
740	*outptr++ = ESC; \
741	*outptr++ = '.'; \
742	*outptr++ = 'A'; \
743	set2 = ISO88591_set; \
744	} \
745	\
746	if (__glibc_unlikely (outptr + 3 > outend)) \
747	{ \
748	res = __GCONV_FULL_OUTPUT; \
749	break; \
750	} \
751	*outptr++ = ESC; \
752	*outptr++ = 'N'; \
753	*outptr++ = ch - 0x80; \
754	res = __GCONV_OK; \
755	break; \
756	} \
757	\
758	/* Try ISO 8859-7 upper half. */ \
759	if (__glibc_likely (ch < 0xffff)) \
760	{ \
761	const struct gap *rp = from_idx; \
762	\
763	while (ch > rp->end) \
764	++rp; \
765	if (ch >= rp->start) \
766	{ \
767	unsigned char ch2 = \
768	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
769	if (ch2 != '\0') \
770	{ \
771	if (set2 != ISO88597_set) \
772	{ \
773	if (__builtin_expect (outptr + 3 > outend, \
774	0)) \
775	{ \
776	res = __GCONV_FULL_OUTPUT; \
777	break; \
778	} \
779	*outptr++ = ESC; \
780	*outptr++ = '.'; \
781	*outptr++ = 'F'; \
782	set2 = ISO88597_set; \
783	} \
784	\
785	if (__builtin_expect (outptr + 3 > outend, 0)) \
786	{ \
787	res = __GCONV_FULL_OUTPUT; \
788	break; \
789	} \
790	*outptr++ = ESC; \
791	*outptr++ = 'N'; \
792	*outptr++ = ch2 - 0x80; \
793	res = __GCONV_OK; \
794	break; \
795	} \
796	} \
797	} \
798	\
799	break; \
800	\
801	case japanese: \
802	\
803	/* Try JIS X 0201 Roman. */ \
804	written = ucs4_to_jisx0201 (ch, buf); \
805	if (written != __UNKNOWN_10646_CHAR \
806	&& buf[0] > 0x20 && buf[0] < 0x80) \
807	{ \
808	if (set != JISX0201_Roman_set) \
809	{ \
810	if (__builtin_expect (outptr + 3 > outend, 0)) \
811	{ \
812	res = __GCONV_FULL_OUTPUT; \
813	break; \
814	} \
815	*outptr++ = ESC; \
816	*outptr++ = '('; \
817	*outptr++ = 'J'; \
818	set = JISX0201_Roman_set; \
819	} \
820	\
821	if (__glibc_unlikely (outptr + 1 > outend)) \
822	{ \
823	res = __GCONV_FULL_OUTPUT; \
824	break; \
825	} \
826	*outptr++ = buf[0]; \
827	res = __GCONV_OK; \
828	break; \
829	} \
830	\
831	/* Try JIS X 0208. */ \
832	written = ucs4_to_jisx0208 (ch, buf, 2); \
833	if (written != __UNKNOWN_10646_CHAR) \
834	{ \
835	if (set != JISX0208_1983_set) \
836	{ \
837	if (__builtin_expect (outptr + 3 > outend, 0)) \
838	{ \
839	res = __GCONV_FULL_OUTPUT; \
840	break; \
841	} \
842	*outptr++ = ESC; \
843	*outptr++ = '$'; \
844	*outptr++ = 'B'; \
845	set = JISX0208_1983_set; \
846	} \
847	\
848	if (__glibc_unlikely (outptr + 2 > outend)) \
849	{ \
850	res = __GCONV_FULL_OUTPUT; \
851	break; \
852	} \
853	*outptr++ = buf[0]; \
854	*outptr++ = buf[1]; \
855	res = __GCONV_OK; \
856	break; \
857	} \
858	\
859	if (__glibc_unlikely (var == iso2022jp)) \
860	/* Don't use the other Japanese character sets. */ \
861	break; \
862	\
863	/* Try JIS X 0212. */ \
864	written = ucs4_to_jisx0212 (ch, buf, 2); \
865	if (written != __UNKNOWN_10646_CHAR) \
866	{ \
867	if (set != JISX0212_set) \
868	{ \
869	if (__builtin_expect (outptr + 4 > outend, 0)) \
870	{ \
871	res = __GCONV_FULL_OUTPUT; \
872	break; \
873	} \
874	*outptr++ = ESC; \
875	*outptr++ = '$'; \
876	*outptr++ = '('; \
877	*outptr++ = 'D'; \
878	set = JISX0212_set; \
879	} \
880	\
881	if (__glibc_unlikely (outptr + 2 > outend)) \
882	{ \
883	res = __GCONV_FULL_OUTPUT; \
884	break; \
885	} \
886	*outptr++ = buf[0]; \
887	*outptr++ = buf[1]; \
888	res = __GCONV_OK; \
889	break; \
890	} \
891	\
892	break; \
893	\
894	case chinese: \
895	assert (var == iso2022jp2); \
896	\
897	/* Try GB 2312. */ \
898	written = ucs4_to_gb2312 (ch, buf, 2); \
899	if (written != __UNKNOWN_10646_CHAR) \
900	{ \
901	if (set != GB2312_set) \
902	{ \
903	if (__builtin_expect (outptr + 3 > outend, 0)) \
904	{ \
905	res = __GCONV_FULL_OUTPUT; \
906	break; \
907	} \
908	*outptr++ = ESC; \
909	*outptr++ = '$'; \
910	*outptr++ = 'A'; \
911	set = GB2312_set; \
912	} \
913	\
914	if (__glibc_unlikely (outptr + 2 > outend)) \
915	{ \
916	res = __GCONV_FULL_OUTPUT; \
917	break; \
918	} \
919	*outptr++ = buf[0]; \
920	*outptr++ = buf[1]; \
921	res = __GCONV_OK; \
922	break; \
923	} \
924	\
925	break; \
926	\
927	case korean: \
928	assert (var == iso2022jp2); \
929	\
930	/* Try KSC 5601. */ \
931	written = ucs4_to_ksc5601 (ch, buf, 2); \
932	if (written != __UNKNOWN_10646_CHAR) \
933	{ \
934	if (set != KSC5601_set) \
935	{ \
936	if (__builtin_expect (outptr + 4 > outend, 0)) \
937	{ \
938	res = __GCONV_FULL_OUTPUT; \
939	break; \
940	} \
941	*outptr++ = ESC; \
942	*outptr++ = '$'; \
943	*outptr++ = '('; \
944	*outptr++ = 'C'; \
945	set = KSC5601_set; \
946	} \
947	\
948	if (__glibc_unlikely (outptr + 2 > outend)) \
949	{ \
950	res = __GCONV_FULL_OUTPUT; \
951	break; \
952	} \
953	*outptr++ = buf[0]; \
954	*outptr++ = buf[1]; \
955	res = __GCONV_OK; \
956	break; \
957	} \
958	\
959	break; \
960	\
961	case other: \
962	assert (var == iso2022jp2); \
963	\
964	/* Try JIS X 0201 Kana. This is not officially part \
965	of ISO-2022-JP-2, according to RFC 1554. Therefore \
966	we try this only after all other attempts. */ \
967	written = ucs4_to_jisx0201 (ch, buf); \
968	if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
969	{ \
970	if (set != JISX0201_Kana_set) \
971	{ \
972	if (__builtin_expect (outptr + 3 > outend, 0)) \
973	{ \
974	res = __GCONV_FULL_OUTPUT; \
975	break; \
976	} \
977	*outptr++ = ESC; \
978	*outptr++ = '('; \
979	*outptr++ = 'I'; \
980	set = JISX0201_Kana_set; \
981	} \
982	\
983	if (__glibc_unlikely (outptr + 1 > outend)) \
984	{ \
985	res = __GCONV_FULL_OUTPUT; \
986	break; \
987	} \
988	*outptr++ = buf[0] - 0x80; \
989	res = __GCONV_OK; \
990	break; \
991	} \
992	\
993	break; \
994	\
995	default: \
996	abort (); \
997	} \
998	while (res == __GCONV_ILLEGAL_INPUT \
999	&& (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1000	\
1001	if (res == __GCONV_FULL_OUTPUT) \
1002	{ \
1003	result = res; \
1004	break; \
1005	} \
1006	\
1007	if (res == __GCONV_ILLEGAL_INPUT) \
1008	{ \
1009	STANDARD_TO_LOOP_ERR_HANDLER (4); \
1010	} \
1011	} \
1012	} \
1013	\
1014	/* Now that we wrote the output increment the input pointer. */ \
1015	inptr += 4; \
1016	}
1017	#define LOOP_NEED_FLAGS
1018	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
1019	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1020	int set2 = *setp & CURRENT_ASSIGN_MASK; \
1021	int tag = *setp & CURRENT_TAG_MASK;
1022	#define REINIT_PARAMS do \
1023	{ \
1024	set = *setp & CURRENT_SEL_MASK; \
1025	set2 = *setp & CURRENT_ASSIGN_MASK; \
1026	tag = *setp & CURRENT_TAG_MASK; \
1027	} \
1028	while (0)
1029	#define UPDATE_PARAMS *setp = set \| set2 \| tag
1030	#include <iconv/loop.c>
1031
1032
1033	/ Now define the toplevel functions. /
1034	#include <iconv/skeleton.c>
1035

Browse the source code of glibc/iconvdata/iso-2022-jp.c