1 | /* Copyright (C) 1999-2019 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. |
17 | |
18 | As a special exception, if you link the code in this file with |
19 | files compiled with a GNU compiler to produce an executable, |
20 | that does not cause the resulting executable to be covered by |
21 | the GNU Lesser General Public License. This exception does not |
22 | however invalidate any other reasons why the executable file |
23 | might be covered by the GNU Lesser General Public License. |
24 | This exception applies to code released by its copyright holders |
25 | in files containing the exception. */ |
26 | |
27 | #include <libioP.h> |
28 | #include <dlfcn.h> |
29 | #include <wchar.h> |
30 | #include <assert.h> |
31 | #include <stdlib.h> |
32 | #include <string.h> |
33 | |
34 | #include <langinfo.h> |
35 | #include <locale/localeinfo.h> |
36 | #include <wcsmbs/wcsmbsload.h> |
37 | #include <iconv/gconv_int.h> |
38 | #include <shlib-compat.h> |
39 | #include <sysdep.h> |
40 | |
41 | |
42 | /* Prototypes of libio's codecvt functions. */ |
43 | static enum __codecvt_result do_out (struct _IO_codecvt *codecvt, |
44 | __mbstate_t *statep, |
45 | const wchar_t *from_start, |
46 | const wchar_t *from_end, |
47 | const wchar_t **from_stop, char *to_start, |
48 | char *to_end, char **to_stop); |
49 | static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt, |
50 | __mbstate_t *statep, char *to_start, |
51 | char *to_end, char **to_stop); |
52 | static enum __codecvt_result do_in (struct _IO_codecvt *codecvt, |
53 | __mbstate_t *statep, |
54 | const char *from_start, |
55 | const char *from_end, |
56 | const char **from_stop, wchar_t *to_start, |
57 | wchar_t *to_end, wchar_t **to_stop); |
58 | static int do_encoding (struct _IO_codecvt *codecvt); |
59 | static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, |
60 | const char *from_start, |
61 | const char *from_end, size_t max); |
62 | static int do_max_length (struct _IO_codecvt *codecvt); |
63 | static int do_always_noconv (struct _IO_codecvt *codecvt); |
64 | |
65 | |
66 | /* The functions used in `codecvt' for libio are always the same. */ |
67 | const struct _IO_codecvt __libio_codecvt = |
68 | { |
69 | .__codecvt_destr = NULL, /* Destructor, never used. */ |
70 | .__codecvt_do_out = do_out, |
71 | .__codecvt_do_unshift = do_unshift, |
72 | .__codecvt_do_in = do_in, |
73 | .__codecvt_do_encoding = do_encoding, |
74 | .__codecvt_do_always_noconv = do_always_noconv, |
75 | .__codecvt_do_length = do_length, |
76 | .__codecvt_do_max_length = do_max_length |
77 | }; |
78 | |
79 | |
80 | /* Return orientation of stream. If mode is nonzero try to change |
81 | the orientation first. */ |
82 | #undef _IO_fwide |
83 | int |
84 | _IO_fwide (FILE *fp, int mode) |
85 | { |
86 | /* Normalize the value. */ |
87 | mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1); |
88 | |
89 | #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) |
90 | if (__builtin_expect (&_IO_stdin_used == NULL, 0) |
91 | && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr)) |
92 | /* This is for a stream in the glibc 2.0 format. */ |
93 | return -1; |
94 | #endif |
95 | |
96 | /* The orientation already has been determined. */ |
97 | if (fp->_mode != 0 |
98 | /* Or the caller simply wants to know about the current orientation. */ |
99 | || mode == 0) |
100 | return fp->_mode; |
101 | |
102 | /* Set the orientation appropriately. */ |
103 | if (mode > 0) |
104 | { |
105 | struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt; |
106 | |
107 | fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; |
108 | fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base; |
109 | |
110 | /* Get the character conversion functions based on the currently |
111 | selected locale for LC_CTYPE. */ |
112 | { |
113 | /* Clear the state. We start all over again. */ |
114 | memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t)); |
115 | memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t)); |
116 | |
117 | struct gconv_fcts fcts; |
118 | __wcsmbs_clone_conv (&fcts); |
119 | assert (fcts.towc_nsteps == 1); |
120 | assert (fcts.tomb_nsteps == 1); |
121 | |
122 | /* The functions are always the same. */ |
123 | *cc = __libio_codecvt; |
124 | |
125 | cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps; |
126 | cc->__cd_in.__cd.__steps = fcts.towc; |
127 | |
128 | cc->__cd_in.__cd.__data[0].__invocation_counter = 0; |
129 | cc->__cd_in.__cd.__data[0].__internal_use = 1; |
130 | cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST; |
131 | cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; |
132 | |
133 | cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps; |
134 | cc->__cd_out.__cd.__steps = fcts.tomb; |
135 | |
136 | cc->__cd_out.__cd.__data[0].__invocation_counter = 0; |
137 | cc->__cd_out.__cd.__data[0].__internal_use = 1; |
138 | cc->__cd_out.__cd.__data[0].__flags |
139 | = __GCONV_IS_LAST | __GCONV_TRANSLIT; |
140 | cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; |
141 | } |
142 | |
143 | /* From now on use the wide character callback functions. */ |
144 | _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable; |
145 | } |
146 | |
147 | /* Set the mode now. */ |
148 | fp->_mode = mode; |
149 | |
150 | return mode; |
151 | } |
152 | |
153 | |
154 | static enum __codecvt_result |
155 | do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep, |
156 | const wchar_t *from_start, const wchar_t *from_end, |
157 | const wchar_t **from_stop, char *to_start, char *to_end, |
158 | char **to_stop) |
159 | { |
160 | enum __codecvt_result result; |
161 | |
162 | struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; |
163 | int status; |
164 | size_t dummy; |
165 | const unsigned char *from_start_copy = (unsigned char *) from_start; |
166 | |
167 | codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start; |
168 | codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end; |
169 | codecvt->__cd_out.__cd.__data[0].__statep = statep; |
170 | |
171 | __gconv_fct fct = gs->__fct; |
172 | #ifdef PTR_DEMANGLE |
173 | if (gs->__shlib_handle != NULL) |
174 | PTR_DEMANGLE (fct); |
175 | #endif |
176 | |
177 | status = DL_CALL_FCT (fct, |
178 | (gs, codecvt->__cd_out.__cd.__data, &from_start_copy, |
179 | (const unsigned char *) from_end, NULL, |
180 | &dummy, 0, 0)); |
181 | |
182 | *from_stop = (wchar_t *) from_start_copy; |
183 | *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf; |
184 | |
185 | switch (status) |
186 | { |
187 | case __GCONV_OK: |
188 | case __GCONV_EMPTY_INPUT: |
189 | result = __codecvt_ok; |
190 | break; |
191 | |
192 | case __GCONV_FULL_OUTPUT: |
193 | case __GCONV_INCOMPLETE_INPUT: |
194 | result = __codecvt_partial; |
195 | break; |
196 | |
197 | default: |
198 | result = __codecvt_error; |
199 | break; |
200 | } |
201 | |
202 | return result; |
203 | } |
204 | |
205 | |
206 | static enum __codecvt_result |
207 | do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep, |
208 | char *to_start, char *to_end, char **to_stop) |
209 | { |
210 | enum __codecvt_result result; |
211 | |
212 | struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; |
213 | int status; |
214 | size_t dummy; |
215 | |
216 | codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start; |
217 | codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end; |
218 | codecvt->__cd_out.__cd.__data[0].__statep = statep; |
219 | |
220 | __gconv_fct fct = gs->__fct; |
221 | #ifdef PTR_DEMANGLE |
222 | if (gs->__shlib_handle != NULL) |
223 | PTR_DEMANGLE (fct); |
224 | #endif |
225 | |
226 | status = DL_CALL_FCT (fct, |
227 | (gs, codecvt->__cd_out.__cd.__data, NULL, NULL, |
228 | NULL, &dummy, 1, 0)); |
229 | |
230 | *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf; |
231 | |
232 | switch (status) |
233 | { |
234 | case __GCONV_OK: |
235 | case __GCONV_EMPTY_INPUT: |
236 | result = __codecvt_ok; |
237 | break; |
238 | |
239 | case __GCONV_FULL_OUTPUT: |
240 | case __GCONV_INCOMPLETE_INPUT: |
241 | result = __codecvt_partial; |
242 | break; |
243 | |
244 | default: |
245 | result = __codecvt_error; |
246 | break; |
247 | } |
248 | |
249 | return result; |
250 | } |
251 | |
252 | |
253 | static enum __codecvt_result |
254 | do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep, |
255 | const char *from_start, const char *from_end, const char **from_stop, |
256 | wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop) |
257 | { |
258 | enum __codecvt_result result; |
259 | |
260 | struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; |
261 | int status; |
262 | size_t dummy; |
263 | const unsigned char *from_start_copy = (unsigned char *) from_start; |
264 | |
265 | codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start; |
266 | codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end; |
267 | codecvt->__cd_in.__cd.__data[0].__statep = statep; |
268 | |
269 | __gconv_fct fct = gs->__fct; |
270 | #ifdef PTR_DEMANGLE |
271 | if (gs->__shlib_handle != NULL) |
272 | PTR_DEMANGLE (fct); |
273 | #endif |
274 | |
275 | status = DL_CALL_FCT (fct, |
276 | (gs, codecvt->__cd_in.__cd.__data, &from_start_copy, |
277 | (const unsigned char *) from_end, NULL, |
278 | &dummy, 0, 0)); |
279 | |
280 | *from_stop = (const char *) from_start_copy; |
281 | *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf; |
282 | |
283 | switch (status) |
284 | { |
285 | case __GCONV_OK: |
286 | case __GCONV_EMPTY_INPUT: |
287 | result = __codecvt_ok; |
288 | break; |
289 | |
290 | case __GCONV_FULL_OUTPUT: |
291 | case __GCONV_INCOMPLETE_INPUT: |
292 | result = __codecvt_partial; |
293 | break; |
294 | |
295 | default: |
296 | result = __codecvt_error; |
297 | break; |
298 | } |
299 | |
300 | return result; |
301 | } |
302 | |
303 | |
304 | static int |
305 | do_encoding (struct _IO_codecvt *codecvt) |
306 | { |
307 | /* See whether the encoding is stateful. */ |
308 | if (codecvt->__cd_in.__cd.__steps[0].__stateful) |
309 | return -1; |
310 | /* Fortunately not. Now determine the input bytes for the conversion |
311 | necessary for each wide character. */ |
312 | if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from |
313 | != codecvt->__cd_in.__cd.__steps[0].__max_needed_from) |
314 | /* Not a constant value. */ |
315 | return 0; |
316 | |
317 | return codecvt->__cd_in.__cd.__steps[0].__min_needed_from; |
318 | } |
319 | |
320 | |
321 | static int |
322 | do_always_noconv (struct _IO_codecvt *codecvt) |
323 | { |
324 | return 0; |
325 | } |
326 | |
327 | |
328 | static int |
329 | do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, |
330 | const char *from_start, const char *from_end, size_t max) |
331 | { |
332 | int result; |
333 | const unsigned char *cp = (const unsigned char *) from_start; |
334 | wchar_t to_buf[max]; |
335 | struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; |
336 | size_t dummy; |
337 | |
338 | codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf; |
339 | codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max]; |
340 | codecvt->__cd_in.__cd.__data[0].__statep = statep; |
341 | |
342 | __gconv_fct fct = gs->__fct; |
343 | #ifdef PTR_DEMANGLE |
344 | if (gs->__shlib_handle != NULL) |
345 | PTR_DEMANGLE (fct); |
346 | #endif |
347 | |
348 | DL_CALL_FCT (fct, |
349 | (gs, codecvt->__cd_in.__cd.__data, &cp, |
350 | (const unsigned char *) from_end, NULL, |
351 | &dummy, 0, 0)); |
352 | |
353 | result = cp - (const unsigned char *) from_start; |
354 | |
355 | return result; |
356 | } |
357 | |
358 | |
359 | static int |
360 | do_max_length (struct _IO_codecvt *codecvt) |
361 | { |
362 | return codecvt->__cd_in.__cd.__steps[0].__max_needed_from; |
363 | } |
364 | |