ibm1364.c source code [glibc/iconvdata/ibm1364.c]

1	/ Conversion from and to IBM1364.*
2	Copyright (C) 2005-2021 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2005.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include <dlfcn.h>
21	#include <stdint.h>
22	#include <wchar.h>
23	#include <byteswap.h>
24
25	#ifndef CHARSET_NAME
26	/ This is really the IBM1364 converter, not another module sharing*
27	the code. /*
28	# define DATA_HEADER "ibm1364.h"
29	# define CHARSET_NAME "IBM1364//"
30	# define FROM_LOOP from_ibm1364
31	# define TO_LOOP to_ibm1364
32	# define SB_TO_UCS4 __ibm1364sb_to_ucs4
33	# define DB_TO_UCS4_IDX __ibm1364db_to_ucs4_idx
34	# define DB_TO_UCS4 __ibm1364db_to_ucs4
35	# define UCS4_TO_SB_IDX __ucs4_to_ibm1364sb_idx
36	# define UCS4_TO_SB __ucs4_to_ibm1364sb
37	# define UCS4_TO_DB_IDX __ucs4_to_ibm1364db_idx
38	# define UCS4_TO_DB __ucs4_to_ibm1364db
39	# define UCS_LIMIT 0xffff
40	#endif
41
42
43	#include DATA_HEADER
44
45	/ The shift sequences for this charset (it does not use ESC). /
46	#define SI 0x0F /* Shift In, host code to turn DBCS off. */
47	#define SO 0x0E /* Shift Out, host code to turn DBCS on. */
48
49	/ Definitions used in the body of the `gconv' function. /
50	#define MIN_NEEDED_FROM 1
51	#define MAX_NEEDED_FROM 2
52	#define MIN_NEEDED_TO 4
53	#ifdef HAS_COMBINED
54	# define MAX_NEEDED_TO 8
55	#else
56	# define MAX_NEEDED_TO 4
57	#endif
58	#define ONE_DIRECTION 0
59	#define PREPARE_LOOP \
60	int save_curcs; \
61	int *curcsp = &data->__statep->__count;
62	#define EXTRA_LOOP_ARGS , curcsp
63
64	/ Definitions of initialization and destructor function. /
65	#define DEFINE_INIT 1
66	#define DEFINE_FINI 1
67
68
69	/ Since this is a stateful encoding we have to provide code which resets*
70	the output state to the initial state. This has to be done during the
71	flushing. /*
72	#define EMIT_SHIFT_TO_INIT \
73	if ((data->__statep->__count & ~7) != sb) \
74	{ \
75	if (FROM_DIRECTION) \
76	data->__statep->__count &= 7; \
77	else \
78	{ \
79	/* We are not in the initial state. To switch back we have \
80	to emit `SI'. */ \
81	if (__glibc_unlikely (outbuf >= outend)) \
82	/* We don't have enough room in the output buffer. */ \
83	status = __GCONV_FULL_OUTPUT; \
84	else \
85	{ \
86	/* Write out the shift sequence. */ \
87	*outbuf++ = SI; \
88	data->__statep->__count &= 7; \
89	} \
90	} \
91	}
92
93
94	/ Since we might have to reset input pointer we must be able to save*
95	and retore the state. /*
96	#define SAVE_RESET_STATE(Save) \
97	if (Save) \
98	save_curcs = *curcsp; \
99	else \
100	*curcsp = save_curcs
101
102
103	/ Current codeset type. /
104	enum
105	{
106	sb = `0`,
107	db = `64`
108	};
109
110
111	/ Subroutine to write out converted UCS4 from IBM-13XX. /
112	#ifdef HAS_COMBINED
113	# define SUB_COMBINED_UCS_FROM_IBM13XX \
114	{ \
115	if (res != UCS_LIMIT \|\| ch < __TO_UCS4_COMBINED_MIN \
116	\|\| ch > __TO_UCS4_COMBINED_MAX) \
117	{ \
118	put32 (outptr, res); \
119	outptr += 4; \
120	} \
121	else \
122	{ \
123	/* This is a combined character. Make sure we have room. */ \
124	if (__glibc_unlikely (outptr + 8 > outend)) \
125	{ \
126	result = __GCONV_FULL_OUTPUT; \
127	break; \
128	} \
129	\
130	const struct divide *cmbp \
131	= &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN]; \
132	assert (cmbp->res1 != 0 && cmbp->res2 != 0); \
133	\
134	put32 (outptr, cmbp->res1); \
135	outptr += 4; \
136	put32 (outptr, cmbp->res2); \
137	outptr += 4; \
138	} \
139	}
140	#else
141	# define SUB_COMBINED_UCS_FROM_IBM13XX \
142	{ \
143	put32 (outptr, res); \
144	outptr += 4; \
145	}
146	#endif /* HAS_COMBINED */
147
148
149	/ First, define the conversion function from IBM-13XX to UCS4. /
150	#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
151	#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
152	#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
153	#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
154	#define LOOPFCT FROM_LOOP
155	#define BODY \
156	{ \
157	uint32_t ch = *inptr; \
158	\
159	if (__builtin_expect (ch, 0) == SO) \
160	{ \
161	/* Shift OUT, change to DBCS converter (redundant escape okay). */ \
162	curcs = db; \
163	++inptr; \
164	continue; \
165	} \
166	if (__builtin_expect (ch, 0) == SI) \
167	{ \
168	/* Shift IN, change to SBCS converter (redundant escape okay). */ \
169	curcs = sb; \
170	++inptr; \
171	continue; \
172	} \
173	\
174	if (curcs == sb) \
175	{ \
176	/* Use the IBM13XX table for single byte. */ \
177	uint32_t res = SB_TO_UCS4[ch]; \
178	if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \
179	{ \
180	/* This is an illegal character. */ \
181	if (! ignore_errors_p ()) \
182	{ \
183	result = __GCONV_ILLEGAL_INPUT; \
184	break; \
185	} \
186	++*irreversible; \
187	} \
188	else \
189	{ \
190	put32 (outptr, res); \
191	outptr += 4; \
192	} \
193	++inptr; \
194	} \
195	else \
196	{ \
197	assert (curcs == db); \
198	\
199	if (__glibc_unlikely (inptr + 1 >= inend)) \
200	{ \
201	/* The second character is not available. Store the \
202	intermediate result. */ \
203	result = __GCONV_INCOMPLETE_INPUT; \
204	break; \
205	} \
206	\
207	ch = (ch * 0x100) + inptr[1]; \
208	\
209	/* Use the IBM1364 table for double byte. */ \
210	const struct gap *rp2 = DB_TO_UCS4_IDX; \
211	while (ch > rp2->end) \
212	++rp2; \
213	\
214	uint32_t res; \
215	if (__builtin_expect (rp2->start == 0xffff, 0) \
216	\|\| __builtin_expect (ch < rp2->start, 0) \
217	\|\| (res = DB_TO_UCS4[ch + rp2->idx], \
218	__builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \
219	{ \
220	/* This is an illegal character. */ \
221	if (! ignore_errors_p ()) \
222	{ \
223	result = __GCONV_ILLEGAL_INPUT; \
224	break; \
225	} \
226	++*irreversible; \
227	} \
228	else \
229	{ \
230	SUB_COMBINED_UCS_FROM_IBM13XX; \
231	} \
232	inptr += 2; \
233	} \
234	}
235	#define LOOP_NEED_FLAGS
236	#define EXTRA_LOOP_DECLS , int *curcsp
237	#define INIT_PARAMS int curcs = *curcsp & ~7
238	#define UPDATE_PARAMS *curcsp = curcs
239	#include <iconv/loop.c>
240
241
242	/ Subroutine to convert two UCS4 codes to IBM-13XX. /
243	#ifdef HAS_COMBINED
244	# define SUB_COMBINED_UCS_TO_IBM13XX \
245	{ \
246	const struct combine *cmbp = UCS4_COMB_TO_DB; \
247	while (cmbp->res1 < ch) \
248	++cmbp; \
249	/* XXX if last char is beginning of combining store in state */ \
250	if (cmbp->res1 == ch && inptr + 4 < inend) \
251	{ \
252	/* See if input is part of a combined character. */ \
253	uint32_t ch_next = get32 (inptr + 4); \
254	while (cmbp->res2 != ch_next) \
255	{ \
256	++cmbp; \
257	if (cmbp->res1 != ch) \
258	goto not_combined; \
259	} \
260	\
261	/* It is a combined character. First make sure we are in \
262	double byte mode. */ \
263	if (curcs == sb) \
264	{ \
265	/* We know there is room for at least one byte. */ \
266	*outptr++ = SO; \
267	curcs = db; \
268	} \
269	\
270	if (__glibc_unlikely (outptr + 2 > outend)) \
271	{ \
272	result = __GCONV_FULL_OUTPUT; \
273	break; \
274	} \
275	*outptr++ = cmbp->ch[0]; \
276	*outptr++ = cmbp->ch[1]; \
277	inptr += 8; \
278	continue; \
279	\
280	not_combined:; \
281	} \
282	}
283	#else
284	# define SUB_COMBINED_UCS_TO_IBM13XX
285	#endif /* HAS_COMBINED */
286
287
288	/ Next, define the other direction. /
289	#define MIN_NEEDED_INPUT MIN_NEEDED_TO
290	#define MAX_NEEDED_INPUT MAX_NEEDED_TO
291	#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
292	#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
293	#define LOOPFCT TO_LOOP
294	#define BODY \
295	{ \
296	uint32_t ch = get32 (inptr); \
297	\
298	if (__glibc_unlikely (ch >= UCS_LIMIT)) \
299	{ \
300	UNICODE_TAG_HANDLER (ch, 4); \
301	\
302	if (! ignore_errors_p ()) \
303	{ \
304	result = __GCONV_ILLEGAL_INPUT; \
305	break; \
306	} \
307	++*irreversible; \
308	inptr += 4; \
309	continue; \
310	} \
311	\
312	SUB_COMBINED_UCS_TO_IBM13XX; \
313	\
314	const struct gap *rp1 = UCS4_TO_SB_IDX; \
315	while (ch > rp1->end) \
316	++rp1; \
317	\
318	/* Use the UCS4 table for single byte. */ \
319	const char *cp; \
320	if (__builtin_expect (ch < rp1->start, 0) \
321	\|\| (cp = UCS4_TO_SB[ch + rp1->idx], \
322	__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \
323	{ \
324	/* Use the UCS4 table for double byte. */ \
325	const struct gap *rp2 = UCS4_TO_DB_IDX; \
326	while (ch > rp2->end) \
327	++rp2; \
328	\
329	if (__builtin_expect (ch < rp2->start, 0) \
330	\|\| (cp = UCS4_TO_DB[ch + rp2->idx], \
331	__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \
332	{ \
333	/* This is an illegal character. */ \
334	if (! ignore_errors_p ()) \
335	{ \
336	result = __GCONV_ILLEGAL_INPUT; \
337	break; \
338	} \
339	++*irreversible; \
340	} \
341	else \
342	{ \
343	if (curcs == sb) \
344	{ \
345	/* We know there is room for at least one byte. */ \
346	*outptr++ = SO; \
347	curcs = db; \
348	} \
349	\
350	if (__glibc_unlikely (outptr + 2 > outend)) \
351	{ \
352	result = __GCONV_FULL_OUTPUT; \
353	break; \
354	} \
355	*outptr++ = cp[0]; \
356	*outptr++ = cp[1]; \
357	} \
358	} \
359	else \
360	{ \
361	if (__glibc_unlikely (curcs == db)) \
362	{ \
363	/* We know there is room for at least one byte. */ \
364	*outptr++ = SI; \
365	curcs = sb; \
366	\
367	if (__glibc_unlikely (outptr >= outend)) \
368	{ \
369	result = __GCONV_FULL_OUTPUT; \
370	break; \
371	} \
372	} \
373	\
374	*outptr++ = cp[0]; \
375	} \
376	\
377	/* Now that we wrote the output increment the input pointer. */ \
378	inptr += 4; \
379	}
380	#define LOOP_NEED_FLAGS
381	#define EXTRA_LOOP_DECLS , int *curcsp
382	#define INIT_PARAMS int curcs = *curcsp & ~7
383	#define REINIT_PARAMS curcs = *curcsp & ~7
384	#define UPDATE_PARAMS *curcsp = curcs
385	#include <iconv/loop.c>
386
387	/ Now define the toplevel functions. /
388	#include <iconv/skeleton.c>
389

Browse the source code of glibc/iconvdata/ibm1364.c