1 | /* Helper functions for parsing printf format strings. |
2 | Copyright (C) 1995-2023 Free Software Foundation, Inc. |
3 | This file is part of th GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <ctype.h> |
20 | #include <limits.h> |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | #include <sys/param.h> |
24 | #include <wchar.h> |
25 | #include <wctype.h> |
26 | |
27 | #ifndef COMPILE_WPRINTF |
28 | # define CHAR_T char |
29 | # define UCHAR_T unsigned char |
30 | # define INT_T int |
31 | # define L_(Str) Str |
32 | # define ISDIGIT(Ch) isdigit (Ch) |
33 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_mb |
34 | #else |
35 | # define CHAR_T wchar_t |
36 | # define UCHAR_T unsigned int |
37 | # define INT_T wint_t |
38 | # define L_(Str) L##Str |
39 | # define ISDIGIT(Ch) iswdigit (Ch) |
40 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_wc |
41 | #endif |
42 | |
43 | #include "printf-parse.h" |
44 | |
45 | #define NDEBUG 1 |
46 | #include <assert.h> |
47 | |
48 | |
49 | |
50 | /* FORMAT must point to a '%' at the beginning of a spec. Fills in *SPEC |
51 | with the parsed details. POSN is the number of arguments already |
52 | consumed. At most MAXTYPES - POSN types are filled in TYPES. Return |
53 | the number of args consumed by this spec; *MAX_REF_ARG is updated so it |
54 | remains the highest argument index used. */ |
55 | size_t |
56 | attribute_hidden |
57 | #ifdef COMPILE_WPRINTF |
58 | __parse_one_specwc (const UCHAR_T *format, size_t posn, |
59 | struct printf_spec *spec, size_t *max_ref_arg, |
60 | bool *failed) |
61 | #else |
62 | __parse_one_specmb (const UCHAR_T *format, size_t posn, |
63 | struct printf_spec *spec, size_t *max_ref_arg, |
64 | bool *failed) |
65 | #endif |
66 | { |
67 | unsigned int n; |
68 | size_t nargs = 0; |
69 | bool is_fast; |
70 | |
71 | /* Skip the '%'. */ |
72 | ++format; |
73 | |
74 | /* Clear information structure. */ |
75 | spec->data_arg = -1; |
76 | spec->info.alt = 0; |
77 | spec->info.space = 0; |
78 | spec->info.left = 0; |
79 | spec->info.showsign = 0; |
80 | spec->info.group = 0; |
81 | spec->info.i18n = 0; |
82 | spec->info.extra = 0; |
83 | spec->info.pad = ' '; |
84 | spec->info.wide = sizeof (UCHAR_T) > 1; |
85 | spec->info.is_binary128 = 0; |
86 | |
87 | *failed = false; |
88 | |
89 | /* Test for positional argument. */ |
90 | if (ISDIGIT (*format)) |
91 | { |
92 | const UCHAR_T *begin = format; |
93 | |
94 | n = read_int (&format); |
95 | |
96 | if (n != 0 && *format == L_('$')) |
97 | /* Is positional parameter. */ |
98 | { |
99 | ++format; /* Skip the '$'. */ |
100 | if (n != -1) |
101 | { |
102 | spec->data_arg = n - 1; |
103 | *max_ref_arg = MAX (*max_ref_arg, n); |
104 | } |
105 | } |
106 | else |
107 | /* Oops; that was actually the width and/or 0 padding flag. |
108 | Step back and read it again. */ |
109 | format = begin; |
110 | } |
111 | |
112 | /* Check for spec modifiers. */ |
113 | do |
114 | { |
115 | switch (*format) |
116 | { |
117 | case L_(' '): |
118 | /* Output a space in place of a sign, when there is no sign. */ |
119 | spec->info.space = 1; |
120 | continue; |
121 | case L_('+'): |
122 | /* Always output + or - for numbers. */ |
123 | spec->info.showsign = 1; |
124 | continue; |
125 | case L_('-'): |
126 | /* Left-justify things. */ |
127 | spec->info.left = 1; |
128 | continue; |
129 | case L_('#'): |
130 | /* Use the "alternate form": |
131 | Hex has 0x or 0X, FP always has a decimal point. */ |
132 | spec->info.alt = 1; |
133 | continue; |
134 | case L_('0'): |
135 | /* Pad with 0s. */ |
136 | spec->info.pad = '0'; |
137 | continue; |
138 | case L_('\''): |
139 | /* Show grouping in numbers if the locale information |
140 | indicates any. */ |
141 | spec->info.group = 1; |
142 | continue; |
143 | case L_('I'): |
144 | /* Use the internationalized form of the output. Currently |
145 | means to use the `outdigits' of the current locale. */ |
146 | spec->info.i18n = 1; |
147 | continue; |
148 | default: |
149 | break; |
150 | } |
151 | break; |
152 | } |
153 | while (*++format); |
154 | |
155 | if (spec->info.left) |
156 | spec->info.pad = ' '; |
157 | |
158 | /* Get the field width. */ |
159 | spec->width_arg = -1; |
160 | spec->info.width = 0; |
161 | if (*format == L_('*')) |
162 | { |
163 | /* The field width is given in an argument. |
164 | A negative field width indicates left justification. */ |
165 | const UCHAR_T *begin = ++format; |
166 | |
167 | if (ISDIGIT (*format)) |
168 | { |
169 | /* The width argument might be found in a positional parameter. */ |
170 | n = read_int (&format); |
171 | |
172 | if (n != 0 && *format == L_('$')) |
173 | { |
174 | if (n != -1) |
175 | { |
176 | spec->width_arg = n - 1; |
177 | *max_ref_arg = MAX (*max_ref_arg, n); |
178 | } |
179 | ++format; /* Skip '$'. */ |
180 | } |
181 | } |
182 | |
183 | if (spec->width_arg < 0) |
184 | { |
185 | /* Not in a positional parameter. Consume one argument. */ |
186 | spec->width_arg = posn++; |
187 | ++nargs; |
188 | format = begin; /* Step back and reread. */ |
189 | } |
190 | } |
191 | else if (ISDIGIT (*format)) |
192 | { |
193 | int n = read_int (&format); |
194 | |
195 | /* Constant width specification. */ |
196 | if (n != -1) |
197 | spec->info.width = n; |
198 | } |
199 | /* Get the precision. */ |
200 | spec->prec_arg = -1; |
201 | /* -1 means none given; 0 means explicit 0. */ |
202 | spec->info.prec = -1; |
203 | if (*format == L_('.')) |
204 | { |
205 | ++format; |
206 | if (*format == L_('*')) |
207 | { |
208 | /* The precision is given in an argument. */ |
209 | const UCHAR_T *begin = ++format; |
210 | |
211 | if (ISDIGIT (*format)) |
212 | { |
213 | n = read_int (&format); |
214 | |
215 | if (n != 0 && *format == L_('$')) |
216 | { |
217 | if (n != -1) |
218 | { |
219 | spec->prec_arg = n - 1; |
220 | *max_ref_arg = MAX (*max_ref_arg, n); |
221 | } |
222 | ++format; |
223 | } |
224 | } |
225 | |
226 | if (spec->prec_arg < 0) |
227 | { |
228 | /* Not in a positional parameter. */ |
229 | spec->prec_arg = posn++; |
230 | ++nargs; |
231 | format = begin; |
232 | } |
233 | } |
234 | else if (ISDIGIT (*format)) |
235 | { |
236 | int n = read_int (&format); |
237 | |
238 | if (n != -1) |
239 | spec->info.prec = n; |
240 | } |
241 | else |
242 | /* "%.?" is treated like "%.0?". */ |
243 | spec->info.prec = 0; |
244 | } |
245 | |
246 | /* Check for type modifiers. */ |
247 | spec->info.is_long_double = 0; |
248 | spec->info.is_short = 0; |
249 | spec->info.is_long = 0; |
250 | spec->info.is_char = 0; |
251 | spec->info.user = 0; |
252 | |
253 | if (__builtin_expect (__printf_modifier_table == NULL, 1) |
254 | || __printf_modifier_table[*format] == NULL |
255 | || HANDLE_REGISTERED_MODIFIER (&format, &spec->info) != 0) |
256 | switch (*format++) |
257 | { |
258 | case L_('h'): |
259 | /* ints are short ints or chars. */ |
260 | if (*format != L_('h')) |
261 | spec->info.is_short = 1; |
262 | else |
263 | { |
264 | ++format; |
265 | spec->info.is_char = 1; |
266 | } |
267 | break; |
268 | case L_('l'): |
269 | /* ints are long ints. */ |
270 | spec->info.is_long = 1; |
271 | if (*format != L_('l')) |
272 | break; |
273 | ++format; |
274 | /* FALLTHROUGH */ |
275 | case L_('L'): |
276 | /* doubles are long doubles, and ints are long long ints. */ |
277 | case L_('q'): |
278 | /* 4.4 uses this for long long. */ |
279 | spec->info.is_long_double = 1; |
280 | break; |
281 | case L_('z'): |
282 | case L_('Z'): |
283 | /* ints are size_ts. */ |
284 | assert (sizeof (size_t) <= sizeof (unsigned long long int)); |
285 | #if LONG_MAX != LONG_LONG_MAX |
286 | spec->info.is_long_double = (sizeof (size_t) |
287 | > sizeof (unsigned long int)); |
288 | #endif |
289 | spec->info.is_long = sizeof (size_t) > sizeof (unsigned int); |
290 | break; |
291 | case L_('t'): |
292 | assert (sizeof (ptrdiff_t) <= sizeof (long long int)); |
293 | #if LONG_MAX != LONG_LONG_MAX |
294 | spec->info.is_long_double = (sizeof (ptrdiff_t) > sizeof (long int)); |
295 | #endif |
296 | spec->info.is_long = sizeof (ptrdiff_t) > sizeof (int); |
297 | break; |
298 | case L_('j'): |
299 | assert (sizeof (uintmax_t) <= sizeof (unsigned long long int)); |
300 | #if LONG_MAX != LONG_LONG_MAX |
301 | spec->info.is_long_double = (sizeof (uintmax_t) |
302 | > sizeof (unsigned long int)); |
303 | #endif |
304 | spec->info.is_long = sizeof (uintmax_t) > sizeof (unsigned int); |
305 | break; |
306 | case L_('w'): |
307 | is_fast = false; |
308 | if (*format == L_('f')) |
309 | { |
310 | ++format; |
311 | is_fast = true; |
312 | } |
313 | int bitwidth = 0; |
314 | if (ISDIGIT (*format)) |
315 | bitwidth = read_int (&format); |
316 | if (is_fast) |
317 | switch (bitwidth) |
318 | { |
319 | case 8: |
320 | bitwidth = INT_FAST8_WIDTH; |
321 | break; |
322 | case 16: |
323 | bitwidth = INT_FAST16_WIDTH; |
324 | break; |
325 | case 32: |
326 | bitwidth = INT_FAST32_WIDTH; |
327 | break; |
328 | case 64: |
329 | bitwidth = INT_FAST64_WIDTH; |
330 | break; |
331 | } |
332 | switch (bitwidth) |
333 | { |
334 | case 8: |
335 | spec->info.is_char = 1; |
336 | break; |
337 | case 16: |
338 | spec->info.is_short = 1; |
339 | break; |
340 | case 32: |
341 | break; |
342 | case 64: |
343 | spec->info.is_long_double = 1; |
344 | spec->info.is_long = 1; |
345 | break; |
346 | default: |
347 | /* ISO C requires this error to be detected. */ |
348 | __set_errno (EINVAL); |
349 | *failed = true; |
350 | break; |
351 | } |
352 | break; |
353 | default: |
354 | /* Not a recognized modifier. Backup. */ |
355 | --format; |
356 | break; |
357 | } |
358 | |
359 | /* Get the format specification. */ |
360 | spec->info.spec = (wchar_t) *format++; |
361 | spec->size = -1; |
362 | if (__builtin_expect (__printf_function_table == NULL, 1) |
363 | || spec->info.spec > UCHAR_MAX |
364 | || __printf_arginfo_table[spec->info.spec] == NULL |
365 | /* We don't try to get the types for all arguments if the format |
366 | uses more than one. The normal case is covered though. If |
367 | the call returns -1 we continue with the normal specifiers. */ |
368 | || (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec]) |
369 | (&spec->info, 1, &spec->data_arg_type, |
370 | &spec->size)) < 0) |
371 | { |
372 | /* Find the data argument types of a built-in spec. */ |
373 | spec->ndata_args = 1; |
374 | |
375 | switch (spec->info.spec) |
376 | { |
377 | case L'i': |
378 | case L'd': |
379 | case L'u': |
380 | case L'o': |
381 | case L'X': |
382 | case L'x': |
383 | case L'B': |
384 | case L'b': |
385 | #if LONG_MAX != LONG_LONG_MAX |
386 | if (spec->info.is_long_double) |
387 | spec->data_arg_type = PA_INT|PA_FLAG_LONG_LONG; |
388 | else |
389 | #endif |
390 | if (spec->info.is_long) |
391 | spec->data_arg_type = PA_INT|PA_FLAG_LONG; |
392 | else if (spec->info.is_short) |
393 | spec->data_arg_type = PA_INT|PA_FLAG_SHORT; |
394 | else if (spec->info.is_char) |
395 | spec->data_arg_type = PA_CHAR; |
396 | else |
397 | spec->data_arg_type = PA_INT; |
398 | break; |
399 | case L'e': |
400 | case L'E': |
401 | case L'f': |
402 | case L'F': |
403 | case L'g': |
404 | case L'G': |
405 | case L'a': |
406 | case L'A': |
407 | if (spec->info.is_long_double) |
408 | spec->data_arg_type = PA_DOUBLE|PA_FLAG_LONG_DOUBLE; |
409 | else |
410 | spec->data_arg_type = PA_DOUBLE; |
411 | break; |
412 | case L'c': |
413 | spec->data_arg_type = PA_CHAR; |
414 | break; |
415 | case L'C': |
416 | spec->data_arg_type = PA_WCHAR; |
417 | break; |
418 | case L's': |
419 | spec->data_arg_type = PA_STRING; |
420 | break; |
421 | case L'S': |
422 | spec->data_arg_type = PA_WSTRING; |
423 | break; |
424 | case L'p': |
425 | spec->data_arg_type = PA_POINTER; |
426 | break; |
427 | case L'n': |
428 | spec->data_arg_type = PA_INT|PA_FLAG_PTR; |
429 | break; |
430 | |
431 | case L'm': |
432 | default: |
433 | /* An unknown spec will consume no args. */ |
434 | spec->ndata_args = 0; |
435 | break; |
436 | } |
437 | } |
438 | |
439 | if (spec->data_arg == -1 && spec->ndata_args > 0) |
440 | { |
441 | /* There are args consumed, but no positional spec. Use the |
442 | next sequential arg position. */ |
443 | spec->data_arg = posn; |
444 | nargs += spec->ndata_args; |
445 | } |
446 | |
447 | if (spec->info.spec == L'\0') |
448 | /* Format ended before this spec was complete. */ |
449 | spec->end_of_fmt = spec->next_fmt = format - 1; |
450 | else |
451 | { |
452 | /* Find the next format spec. */ |
453 | spec->end_of_fmt = format; |
454 | #ifdef COMPILE_WPRINTF |
455 | spec->next_fmt = __find_specwc (format); |
456 | #else |
457 | spec->next_fmt = __find_specmb (format); |
458 | #endif |
459 | } |
460 | |
461 | return nargs; |
462 | } |
463 | |