1 | /* Access functions for GB2312 conversion. |
2 | Copyright (C) 1998-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef _GB2312_H |
20 | #define _GB2312_H 1 |
21 | |
22 | #include <gconv.h> |
23 | #include <stdint.h> |
24 | #include <assert.h> |
25 | |
26 | /* Conversion table. */ |
27 | extern const uint16_t __gb2312_to_ucs[]; |
28 | |
29 | |
30 | static inline uint32_t |
31 | __attribute ((always_inline)) |
32 | gb2312_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) |
33 | { |
34 | unsigned char ch = *(*s); |
35 | unsigned char ch2; |
36 | int idx; |
37 | |
38 | if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) > 0x77) |
39 | return __UNKNOWN_10646_CHAR; |
40 | |
41 | if (avail < 2) |
42 | return 0; |
43 | |
44 | ch2 = (*s)[1]; |
45 | if ((ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) |
46 | return __UNKNOWN_10646_CHAR; |
47 | |
48 | idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); |
49 | if (idx > 0x1ff1) |
50 | return __UNKNOWN_10646_CHAR; |
51 | |
52 | (*s) += 2; |
53 | |
54 | return __gb2312_to_ucs[idx] ?: ((*s) -= 2, __UNKNOWN_10646_CHAR); |
55 | } |
56 | |
57 | |
58 | extern const char __gb2312_from_ucs4_tab1[][2]; |
59 | extern const char __gb2312_from_ucs4_tab2[][2]; |
60 | extern const char __gb2312_from_ucs4_tab3[][2]; |
61 | extern const char __gb2312_from_ucs4_tab4[][2]; |
62 | extern const char __gb2312_from_ucs4_tab5[][2]; |
63 | extern const char __gb2312_from_ucs4_tab6[][2]; |
64 | extern const char __gb2312_from_ucs4_tab7[][2]; |
65 | extern const char __gb2312_from_ucs4_tab8[][2]; |
66 | extern const char __gb2312_from_ucs4_tab9[][2]; |
67 | |
68 | static inline size_t |
69 | __attribute ((always_inline)) |
70 | ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail) |
71 | { |
72 | unsigned int ch = (unsigned int) wch; |
73 | char buf[2]; |
74 | const char *cp = buf; |
75 | |
76 | switch (ch) |
77 | { |
78 | case 0xa4 ... 0x101: |
79 | cp = __gb2312_from_ucs4_tab1[ch - 0xa4]; |
80 | break; |
81 | case 0x113: |
82 | cp = "\x28\x25" ; |
83 | break; |
84 | case 0x11b: |
85 | cp = "\x28\x27" ; |
86 | break; |
87 | case 0x12b: |
88 | cp = "\x28\x29" ; |
89 | break; |
90 | case 0x14d: |
91 | cp = "\x28\x2d" ; |
92 | break; |
93 | case 0x16b: |
94 | cp = "\x28\x31" ; |
95 | break; |
96 | case 0x1ce: |
97 | cp = "\x28\x23" ; |
98 | break; |
99 | case 0x1d0: |
100 | cp = "\x28\x2b" ; |
101 | break; |
102 | case 0x1d2: |
103 | cp = "\x28\x2f" ; |
104 | break; |
105 | case 0x1d4: |
106 | cp = "\x28\x33" ; |
107 | break; |
108 | case 0x1d6: |
109 | cp = "\x28\x35" ; |
110 | break; |
111 | case 0x1d8: |
112 | cp = "\x28\x36" ; |
113 | break; |
114 | case 0x1da: |
115 | cp = "\x28\x37" ; |
116 | break; |
117 | case 0x1dc: |
118 | cp = "\x28\x38" ; |
119 | break; |
120 | case 0x2c7: |
121 | cp = "\x21\x26" ; |
122 | break; |
123 | case 0x2c9: |
124 | cp = "\x21\x25" ; |
125 | break; |
126 | case 0x391 ... 0x3c9: |
127 | cp = __gb2312_from_ucs4_tab2[ch - 0x391]; |
128 | break; |
129 | case 0x401 ... 0x451: |
130 | cp = __gb2312_from_ucs4_tab3[ch - 0x401]; |
131 | break; |
132 | case 0x2015 ... 0x203b: |
133 | cp = __gb2312_from_ucs4_tab4[ch - 0x2015]; |
134 | break; |
135 | case 0x2103 ... 0x22a5: |
136 | cp = __gb2312_from_ucs4_tab5[ch - 0x2103]; |
137 | break; |
138 | case 0x2312: |
139 | cp = "\x21\x50" ; |
140 | break; |
141 | case 0x2460 ... 0x249b: |
142 | cp = __gb2312_from_ucs4_tab6[ch - 0x2460]; |
143 | break; |
144 | case 0x2500 ... 0x254b: |
145 | buf[0] = '\x29'; |
146 | buf[1] = '\x24' + (ch % 256); |
147 | break; |
148 | case 0x25a0: |
149 | cp = "\x21\x76" ; |
150 | break; |
151 | case 0x25a1: |
152 | cp = "\x21\x75" ; |
153 | break; |
154 | case 0x25b2: |
155 | cp = "\x21\x78" ; |
156 | break; |
157 | case 0x25b3: |
158 | cp = "\x21\x77" ; |
159 | break; |
160 | case 0x25c6: |
161 | cp = "\x21\x74" ; |
162 | break; |
163 | case 0x25c7: |
164 | cp = "\x21\x73" ; |
165 | break; |
166 | case 0x25cb: |
167 | cp = "\x21\x70" ; |
168 | break; |
169 | case 0x25ce: |
170 | cp = "\x21\x72" ; |
171 | break; |
172 | case 0x25cf: |
173 | cp = "\x21\x71" ; |
174 | break; |
175 | case 0x2605: |
176 | cp = "\x21\x6f" ; |
177 | break; |
178 | case 0x2606: |
179 | cp = "\x21\x6e" ; |
180 | break; |
181 | case 0x2640: |
182 | cp = "\x21\x62" ; |
183 | break; |
184 | case 0x2642: |
185 | cp = "\x21\x61" ; |
186 | break; |
187 | case 0x3000 ... 0x3129: |
188 | cp = __gb2312_from_ucs4_tab7[ch - 0x3000]; |
189 | break; |
190 | case 0x3220 ... 0x3229: |
191 | buf[0] = '\x22'; |
192 | buf[1] = '\x65' + (ch - 0x3220); |
193 | break; |
194 | case 0x4e00 ... 0x9fa0: |
195 | cp = __gb2312_from_ucs4_tab8[ch - 0x4e00]; |
196 | break; |
197 | case 0xff01 ... 0xff5e: |
198 | cp = __gb2312_from_ucs4_tab9[ch - 0xff01]; |
199 | break; |
200 | case 0xffe0: |
201 | cp = "\x21\x69" ; |
202 | break; |
203 | case 0xffe1: |
204 | cp = "\x21\x6a" ; |
205 | break; |
206 | case 0xffe3: |
207 | cp = "\x23\x7e" ; |
208 | break; |
209 | case 0xffe5: |
210 | cp = "\x23\x24" ; |
211 | break; |
212 | default: |
213 | return __UNKNOWN_10646_CHAR; |
214 | } |
215 | |
216 | if (cp[0] == '\0') |
217 | return __UNKNOWN_10646_CHAR; |
218 | |
219 | assert (cp[1] != '\0'); |
220 | |
221 | if (avail < 2) |
222 | return 0; |
223 | |
224 | s[0] = cp[0]; |
225 | s[1] = cp[1]; |
226 | |
227 | return 2; |
228 | } |
229 | |
230 | #endif /* gb2312.h */ |
231 | |