ibm933.c source code [glibc/iconvdata/ibm933.c]

1	/ Conversion from and to IBM933.*
2	Copyright (C) 2000-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ IBM933 is designed for the representation of Korean using a stateful*
20	EBCDIC encoding scheme. It is also known as CCSID 933 or CP933. See:
21	https://www-01.ibm.com/software/globalization/ccsid/ccsid933.html /*
22
23	#include <dlfcn.h>
24	#include <stdint.h>
25	#include <wchar.h>
26	#include <byteswap.h>
27	#include "ibm933.h"
28
29	/ The shift sequences for this charset (it does not use ESC). /
30	#define SI 0x0F /* Shift In, host code to turn DBCS off. */
31	#define SO 0x0E /* Shift Out, host code to turn DBCS on. */
32
33	/ Definitions used in the body of the `gconv' function. /
34	#define CHARSET_NAME "IBM933//"
35	#define FROM_LOOP from_ibm933
36	#define TO_LOOP to_ibm933
37	#define ONE_DIRECTION 0
38	#define FROM_LOOP_MIN_NEEDED_FROM 1
39	#define FROM_LOOP_MAX_NEEDED_FROM 2
40	#define FROM_LOOP_MIN_NEEDED_TO 4
41	#define FROM_LOOP_MAX_NEEDED_TO 4
42	#define TO_LOOP_MIN_NEEDED_FROM 4
43	#define TO_LOOP_MAX_NEEDED_FROM 4
44	#define TO_LOOP_MIN_NEEDED_TO 1
45	#define TO_LOOP_MAX_NEEDED_TO 3
46	#define PREPARE_LOOP \
47	int save_curcs; \
48	int *curcsp = &data->__statep->__count;
49	#define EXTRA_LOOP_ARGS , curcsp
50
51	/ Definitions of initialization and destructor function. /
52	#define DEFINE_INIT 1
53	#define DEFINE_FINI 1
54
55
56	/ Since this is a stateful encoding we have to provide code which resets*
57	the output state to the initial state. This has to be done during the
58	flushing. /*
59	#define EMIT_SHIFT_TO_INIT \
60	if ((data->__statep->__count & ~7) != sb) \
61	{ \
62	if (FROM_DIRECTION) \
63	data->__statep->__count &= 7; \
64	else \
65	{ \
66	/* We are not in the initial state. To switch back we have \
67	to emit `SI'. */ \
68	if (__glibc_unlikely (outbuf >= outend)) \
69	/* We don't have enough room in the output buffer. */ \
70	status = __GCONV_FULL_OUTPUT; \
71	else \
72	{ \
73	/* Write out the shift sequence. */ \
74	*outbuf++ = SI; \
75	data->__statep->__count &= 7; \
76	} \
77	} \
78	}
79
80
81	/ Since we might have to reset input pointer we must be able to save*
82	and retore the state. /*
83	#define SAVE_RESET_STATE(Save) \
84	if (Save) \
85	save_curcs = *curcsp; \
86	else \
87	*curcsp = save_curcs
88
89
90	/ Current codeset type. /
91	enum
92	{
93	sb = `0`,
94	db = `64`
95	};
96
97	/ First, define the conversion function from IBM-933 to UCS4. /
98	#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
99	#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
100	#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
101	#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
102	#define LOOPFCT FROM_LOOP
103	#define BODY \
104	{ \
105	uint32_t ch = *inptr; \
106	uint32_t res; \
107	\
108	if (__builtin_expect (ch, 0) == SO) \
109	{ \
110	/* Shift OUT, change to DBCS converter (redundant escape okay). */ \
111	curcs = db; \
112	++inptr; \
113	continue; \
114	} \
115	else if (__builtin_expect (ch, 0) == SI) \
116	{ \
117	/* Shift IN, change to SBCS converter (redundant escape okay). */ \
118	curcs = sb; \
119	++inptr; \
120	continue; \
121	} \
122	\
123	if (curcs == sb) \
124	{ \
125	/* Use the IBM933 table for single byte. */ \
126	res = __ibm933sb_to_ucs4[ch]; \
127	if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \
128	{ \
129	/* This is an illegal character. */ \
130	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
131	} \
132	else \
133	{ \
134	put32 (outptr, res); \
135	outptr += 4; \
136	} \
137	++inptr; \
138	} \
139	else \
140	{ \
141	const struct gap *rp2 = __ibm933db_to_ucs4_idx; \
142	\
143	assert (curcs == db); \
144	\
145	/* Use the IBM933 table for double byte. */ \
146	if (__glibc_unlikely (inptr + 1 >= inend)) \
147	{ \
148	/* The second character is not available. Store the \
149	intermediate result. */ \
150	result = __GCONV_INCOMPLETE_INPUT; \
151	break; \
152	} \
153	\
154	ch = (ch * 0x100) + inptr[1]; \
155	while (ch > rp2->end) \
156	++rp2; \
157	\
158	if (__builtin_expect (rp2->start == 0xffff, 0) \
159	\|\| __builtin_expect (ch < rp2->start, 0) \
160	\|\| (res = __ibm933db_to_ucs4[ch + rp2->idx], \
161	__builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \
162	{ \
163	/* This is an illegal character. */ \
164	STANDARD_FROM_LOOP_ERR_HANDLER (2); \
165	} \
166	else \
167	{ \
168	put32 (outptr, res); \
169	outptr += 4; \
170	inptr += 2; \
171	} \
172	} \
173	}
174	#define LOOP_NEED_FLAGS
175	#define EXTRA_LOOP_DECLS , int *curcsp
176	#define INIT_PARAMS int curcs = *curcsp & ~7
177	#define UPDATE_PARAMS *curcsp = curcs
178	#include <iconv/loop.c>
179
180	/ Next, define the other direction. /
181	#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
182	#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
183	#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
184	#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
185	#define LOOPFCT TO_LOOP
186	#define BODY \
187	{ \
188	uint32_t ch = get32 (inptr); \
189	const struct gap *rp1 = __ucs4_to_ibm933sb_idx; \
190	const struct gap *rp2 = __ucs4_to_ibm933db_idx; \
191	\
192	if (__glibc_unlikely (ch >= 0xffff)) \
193	{ \
194	UNICODE_TAG_HANDLER (ch, 4); \
195	\
196	STANDARD_TO_LOOP_ERR_HANDLER (4); \
197	} \
198	\
199	while (ch > rp1->end) \
200	++rp1; \
201	\
202	/* Use the UCS4 table for single byte. */ \
203	unsigned char sbconv; \
204	if (__builtin_expect (ch < rp1->start, 0) \
205	\|\| (sbconv = __ucs4_to_ibm933sb[ch + rp1->idx], \
206	__builtin_expect (sbconv, L'\1') == L'\0' && ch != '\0')) \
207	{ \
208	/* Use the UCS4 table for double byte. */ \
209	while (ch > rp2->end) \
210	++rp2; \
211	\
212	const char *cp; \
213	if (__builtin_expect (ch < rp2->start, 0) \
214	\|\| (cp = __ucs4_to_ibm933db[ch + rp2->idx], \
215	__builtin_expect (cp[0], L'\1')==L'\0' && ch != '\0')) \
216	{ \
217	/* This is an illegal character. */ \
218	STANDARD_TO_LOOP_ERR_HANDLER (4); \
219	} \
220	else \
221	{ \
222	if (curcs == sb) \
223	{ \
224	if (__glibc_unlikely (outptr + 1 > outend)) \
225	{ \
226	result = __GCONV_FULL_OUTPUT; \
227	break; \
228	} \
229	*outptr++ = SO; \
230	curcs = db; \
231	} \
232	\
233	if (__glibc_unlikely (outptr + 2 > outend)) \
234	{ \
235	result = __GCONV_FULL_OUTPUT; \
236	break; \
237	} \
238	*outptr++ = cp[0]; \
239	*outptr++ = cp[1]; \
240	} \
241	} \
242	else \
243	{ \
244	if (curcs == db) \
245	{ \
246	if (__glibc_unlikely (outptr + 1 > outend)) \
247	{ \
248	result = __GCONV_FULL_OUTPUT; \
249	break; \
250	} \
251	*outptr++ = SI; \
252	curcs = sb; \
253	} \
254	\
255	if (__glibc_unlikely (outptr + 1 > outend)) \
256	{ \
257	result = __GCONV_FULL_OUTPUT; \
258	break; \
259	} \
260	*outptr++ = sbconv; \
261	} \
262	\
263	/* Now that we wrote the output increment the input pointer. */ \
264	inptr += 4; \
265	}
266	#define LOOP_NEED_FLAGS
267	#define EXTRA_LOOP_DECLS , int *curcsp
268	#define INIT_PARAMS int curcs = *curcsp & ~7
269	#define REINIT_PARAMS curcs = *curcsp & ~7
270	#define UPDATE_PARAMS *curcsp = curcs
271	#include <iconv/loop.c>
272
273	/ Now define the toplevel functions. /
274	#include <iconv/skeleton.c>
275

Browse the source code of glibc/iconvdata/ibm933.c