1 | /* Access functions for GB2312 conversion. |
2 | Copyright (C) 1998-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #ifndef _GB2312_H |
21 | #define _GB2312_H 1 |
22 | |
23 | #include <gconv.h> |
24 | #include <stdint.h> |
25 | #include <assert.h> |
26 | |
27 | /* Conversion table. */ |
28 | extern const uint16_t __gb2312_to_ucs[]; |
29 | |
30 | |
31 | static inline uint32_t |
32 | __attribute ((always_inline)) |
33 | gb2312_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) |
34 | { |
35 | unsigned char ch = *(*s); |
36 | unsigned char ch2; |
37 | int idx; |
38 | |
39 | if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) > 0x77) |
40 | return __UNKNOWN_10646_CHAR; |
41 | |
42 | if (avail < 2) |
43 | return 0; |
44 | |
45 | ch2 = (*s)[1]; |
46 | if ((ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) |
47 | return __UNKNOWN_10646_CHAR; |
48 | |
49 | idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); |
50 | if (idx > 0x1ff1) |
51 | return __UNKNOWN_10646_CHAR; |
52 | |
53 | (*s) += 2; |
54 | |
55 | return __gb2312_to_ucs[idx] ?: ((*s) -= 2, __UNKNOWN_10646_CHAR); |
56 | } |
57 | |
58 | |
59 | extern const char __gb2312_from_ucs4_tab1[][2]; |
60 | extern const char __gb2312_from_ucs4_tab2[][2]; |
61 | extern const char __gb2312_from_ucs4_tab3[][2]; |
62 | extern const char __gb2312_from_ucs4_tab4[][2]; |
63 | extern const char __gb2312_from_ucs4_tab5[][2]; |
64 | extern const char __gb2312_from_ucs4_tab6[][2]; |
65 | extern const char __gb2312_from_ucs4_tab7[][2]; |
66 | extern const char __gb2312_from_ucs4_tab8[][2]; |
67 | extern const char __gb2312_from_ucs4_tab9[][2]; |
68 | |
69 | static inline size_t |
70 | __attribute ((always_inline)) |
71 | ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail) |
72 | { |
73 | unsigned int ch = (unsigned int) wch; |
74 | char buf[2]; |
75 | const char *cp = buf; |
76 | |
77 | switch (ch) |
78 | { |
79 | case 0xa4 ... 0x101: |
80 | cp = __gb2312_from_ucs4_tab1[ch - 0xa4]; |
81 | break; |
82 | case 0x113: |
83 | cp = "\x28\x25" ; |
84 | break; |
85 | case 0x11b: |
86 | cp = "\x28\x27" ; |
87 | break; |
88 | case 0x12b: |
89 | cp = "\x28\x29" ; |
90 | break; |
91 | case 0x14d: |
92 | cp = "\x28\x2d" ; |
93 | break; |
94 | case 0x16b: |
95 | cp = "\x28\x31" ; |
96 | break; |
97 | case 0x1ce: |
98 | cp = "\x28\x23" ; |
99 | break; |
100 | case 0x1d0: |
101 | cp = "\x28\x2b" ; |
102 | break; |
103 | case 0x1d2: |
104 | cp = "\x28\x2f" ; |
105 | break; |
106 | case 0x1d4: |
107 | cp = "\x28\x33" ; |
108 | break; |
109 | case 0x1d6: |
110 | cp = "\x28\x35" ; |
111 | break; |
112 | case 0x1d8: |
113 | cp = "\x28\x36" ; |
114 | break; |
115 | case 0x1da: |
116 | cp = "\x28\x37" ; |
117 | break; |
118 | case 0x1dc: |
119 | cp = "\x28\x38" ; |
120 | break; |
121 | case 0x2c7: |
122 | cp = "\x21\x26" ; |
123 | break; |
124 | case 0x2c9: |
125 | cp = "\x21\x25" ; |
126 | break; |
127 | case 0x391 ... 0x3c9: |
128 | cp = __gb2312_from_ucs4_tab2[ch - 0x391]; |
129 | break; |
130 | case 0x401 ... 0x451: |
131 | cp = __gb2312_from_ucs4_tab3[ch - 0x401]; |
132 | break; |
133 | case 0x2015 ... 0x203b: |
134 | cp = __gb2312_from_ucs4_tab4[ch - 0x2015]; |
135 | break; |
136 | case 0x2103 ... 0x22a5: |
137 | cp = __gb2312_from_ucs4_tab5[ch - 0x2103]; |
138 | break; |
139 | case 0x2312: |
140 | cp = "\x21\x50" ; |
141 | break; |
142 | case 0x2460 ... 0x249b: |
143 | cp = __gb2312_from_ucs4_tab6[ch - 0x2460]; |
144 | break; |
145 | case 0x2500 ... 0x254b: |
146 | buf[0] = '\x29'; |
147 | buf[1] = '\x24' + (ch % 256); |
148 | break; |
149 | case 0x25a0: |
150 | cp = "\x21\x76" ; |
151 | break; |
152 | case 0x25a1: |
153 | cp = "\x21\x75" ; |
154 | break; |
155 | case 0x25b2: |
156 | cp = "\x21\x78" ; |
157 | break; |
158 | case 0x25b3: |
159 | cp = "\x21\x77" ; |
160 | break; |
161 | case 0x25c6: |
162 | cp = "\x21\x74" ; |
163 | break; |
164 | case 0x25c7: |
165 | cp = "\x21\x73" ; |
166 | break; |
167 | case 0x25cb: |
168 | cp = "\x21\x70" ; |
169 | break; |
170 | case 0x25ce: |
171 | cp = "\x21\x72" ; |
172 | break; |
173 | case 0x25cf: |
174 | cp = "\x21\x71" ; |
175 | break; |
176 | case 0x2605: |
177 | cp = "\x21\x6f" ; |
178 | break; |
179 | case 0x2606: |
180 | cp = "\x21\x6e" ; |
181 | break; |
182 | case 0x2640: |
183 | cp = "\x21\x62" ; |
184 | break; |
185 | case 0x2642: |
186 | cp = "\x21\x61" ; |
187 | break; |
188 | case 0x3000 ... 0x3129: |
189 | cp = __gb2312_from_ucs4_tab7[ch - 0x3000]; |
190 | break; |
191 | case 0x3220 ... 0x3229: |
192 | buf[0] = '\x22'; |
193 | buf[1] = '\x65' + (ch - 0x3220); |
194 | break; |
195 | case 0x4e00 ... 0x9fa0: |
196 | cp = __gb2312_from_ucs4_tab8[ch - 0x4e00]; |
197 | break; |
198 | case 0xff01 ... 0xff5e: |
199 | cp = __gb2312_from_ucs4_tab9[ch - 0xff01]; |
200 | break; |
201 | case 0xffe0: |
202 | cp = "\x21\x69" ; |
203 | break; |
204 | case 0xffe1: |
205 | cp = "\x21\x6a" ; |
206 | break; |
207 | case 0xffe3: |
208 | cp = "\x23\x7e" ; |
209 | break; |
210 | case 0xffe5: |
211 | cp = "\x23\x24" ; |
212 | break; |
213 | default: |
214 | return __UNKNOWN_10646_CHAR; |
215 | } |
216 | |
217 | if (cp[0] == '\0') |
218 | return __UNKNOWN_10646_CHAR; |
219 | |
220 | assert (cp[1] != '\0'); |
221 | |
222 | if (avail < 2) |
223 | return 0; |
224 | |
225 | s[0] = cp[0]; |
226 | s[1] = cp[1]; |
227 | |
228 | return 2; |
229 | } |
230 | |
231 | #endif /* gb2312.h */ |
232 | |