1 | /* Helper functions for parsing printf format strings. |
2 | Copyright (C) 1995-2023 Free Software Foundation, Inc. |
3 | This file is part of th GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <ctype.h> |
20 | #include <limits.h> |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | #include <sys/param.h> |
24 | #include <wchar.h> |
25 | #include <wctype.h> |
26 | |
27 | #ifndef COMPILE_WPRINTF |
28 | # define CHAR_T char |
29 | # define UCHAR_T unsigned char |
30 | # define INT_T int |
31 | # define L_(Str) Str |
32 | # define ISDIGIT(Ch) isdigit (Ch) |
33 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_mb |
34 | #else |
35 | # define CHAR_T wchar_t |
36 | # define UCHAR_T unsigned int |
37 | # define INT_T wint_t |
38 | # define L_(Str) L##Str |
39 | # define ISDIGIT(Ch) iswdigit (Ch) |
40 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_wc |
41 | #endif |
42 | |
43 | #include "printf-parse.h" |
44 | |
45 | #define NDEBUG 1 |
46 | #include <assert.h> |
47 | |
48 | |
49 | |
50 | /* FORMAT must point to a '%' at the beginning of a spec. Fills in *SPEC |
51 | with the parsed details. POSN is the number of arguments already |
52 | consumed. At most MAXTYPES - POSN types are filled in TYPES. Return |
53 | the number of args consumed by this spec; *MAX_REF_ARG is updated so it |
54 | remains the highest argument index used. */ |
55 | size_t |
56 | attribute_hidden |
57 | #ifdef COMPILE_WPRINTF |
58 | __parse_one_specwc (const UCHAR_T *format, size_t posn, |
59 | struct printf_spec *spec, size_t *max_ref_arg) |
60 | #else |
61 | __parse_one_specmb (const UCHAR_T *format, size_t posn, |
62 | struct printf_spec *spec, size_t *max_ref_arg) |
63 | #endif |
64 | { |
65 | unsigned int n; |
66 | size_t nargs = 0; |
67 | |
68 | /* Skip the '%'. */ |
69 | ++format; |
70 | |
71 | /* Clear information structure. */ |
72 | spec->data_arg = -1; |
73 | spec->info.alt = 0; |
74 | spec->info.space = 0; |
75 | spec->info.left = 0; |
76 | spec->info.showsign = 0; |
77 | spec->info.group = 0; |
78 | spec->info.i18n = 0; |
79 | spec->info.extra = 0; |
80 | spec->info.pad = ' '; |
81 | spec->info.wide = sizeof (UCHAR_T) > 1; |
82 | spec->info.is_binary128 = 0; |
83 | |
84 | /* Test for positional argument. */ |
85 | if (ISDIGIT (*format)) |
86 | { |
87 | const UCHAR_T *begin = format; |
88 | |
89 | n = read_int (&format); |
90 | |
91 | if (n != 0 && *format == L_('$')) |
92 | /* Is positional parameter. */ |
93 | { |
94 | ++format; /* Skip the '$'. */ |
95 | if (n != -1) |
96 | { |
97 | spec->data_arg = n - 1; |
98 | *max_ref_arg = MAX (*max_ref_arg, n); |
99 | } |
100 | } |
101 | else |
102 | /* Oops; that was actually the width and/or 0 padding flag. |
103 | Step back and read it again. */ |
104 | format = begin; |
105 | } |
106 | |
107 | /* Check for spec modifiers. */ |
108 | do |
109 | { |
110 | switch (*format) |
111 | { |
112 | case L_(' '): |
113 | /* Output a space in place of a sign, when there is no sign. */ |
114 | spec->info.space = 1; |
115 | continue; |
116 | case L_('+'): |
117 | /* Always output + or - for numbers. */ |
118 | spec->info.showsign = 1; |
119 | continue; |
120 | case L_('-'): |
121 | /* Left-justify things. */ |
122 | spec->info.left = 1; |
123 | continue; |
124 | case L_('#'): |
125 | /* Use the "alternate form": |
126 | Hex has 0x or 0X, FP always has a decimal point. */ |
127 | spec->info.alt = 1; |
128 | continue; |
129 | case L_('0'): |
130 | /* Pad with 0s. */ |
131 | spec->info.pad = '0'; |
132 | continue; |
133 | case L_('\''): |
134 | /* Show grouping in numbers if the locale information |
135 | indicates any. */ |
136 | spec->info.group = 1; |
137 | continue; |
138 | case L_('I'): |
139 | /* Use the internationalized form of the output. Currently |
140 | means to use the `outdigits' of the current locale. */ |
141 | spec->info.i18n = 1; |
142 | continue; |
143 | default: |
144 | break; |
145 | } |
146 | break; |
147 | } |
148 | while (*++format); |
149 | |
150 | if (spec->info.left) |
151 | spec->info.pad = ' '; |
152 | |
153 | /* Get the field width. */ |
154 | spec->width_arg = -1; |
155 | spec->info.width = 0; |
156 | if (*format == L_('*')) |
157 | { |
158 | /* The field width is given in an argument. |
159 | A negative field width indicates left justification. */ |
160 | const UCHAR_T *begin = ++format; |
161 | |
162 | if (ISDIGIT (*format)) |
163 | { |
164 | /* The width argument might be found in a positional parameter. */ |
165 | n = read_int (&format); |
166 | |
167 | if (n != 0 && *format == L_('$')) |
168 | { |
169 | if (n != -1) |
170 | { |
171 | spec->width_arg = n - 1; |
172 | *max_ref_arg = MAX (*max_ref_arg, n); |
173 | } |
174 | ++format; /* Skip '$'. */ |
175 | } |
176 | } |
177 | |
178 | if (spec->width_arg < 0) |
179 | { |
180 | /* Not in a positional parameter. Consume one argument. */ |
181 | spec->width_arg = posn++; |
182 | ++nargs; |
183 | format = begin; /* Step back and reread. */ |
184 | } |
185 | } |
186 | else if (ISDIGIT (*format)) |
187 | { |
188 | int n = read_int (&format); |
189 | |
190 | /* Constant width specification. */ |
191 | if (n != -1) |
192 | spec->info.width = n; |
193 | } |
194 | /* Get the precision. */ |
195 | spec->prec_arg = -1; |
196 | /* -1 means none given; 0 means explicit 0. */ |
197 | spec->info.prec = -1; |
198 | if (*format == L_('.')) |
199 | { |
200 | ++format; |
201 | if (*format == L_('*')) |
202 | { |
203 | /* The precision is given in an argument. */ |
204 | const UCHAR_T *begin = ++format; |
205 | |
206 | if (ISDIGIT (*format)) |
207 | { |
208 | n = read_int (&format); |
209 | |
210 | if (n != 0 && *format == L_('$')) |
211 | { |
212 | if (n != -1) |
213 | { |
214 | spec->prec_arg = n - 1; |
215 | *max_ref_arg = MAX (*max_ref_arg, n); |
216 | } |
217 | ++format; |
218 | } |
219 | } |
220 | |
221 | if (spec->prec_arg < 0) |
222 | { |
223 | /* Not in a positional parameter. */ |
224 | spec->prec_arg = posn++; |
225 | ++nargs; |
226 | format = begin; |
227 | } |
228 | } |
229 | else if (ISDIGIT (*format)) |
230 | { |
231 | int n = read_int (&format); |
232 | |
233 | if (n != -1) |
234 | spec->info.prec = n; |
235 | } |
236 | else |
237 | /* "%.?" is treated like "%.0?". */ |
238 | spec->info.prec = 0; |
239 | } |
240 | |
241 | /* Check for type modifiers. */ |
242 | spec->info.is_long_double = 0; |
243 | spec->info.is_short = 0; |
244 | spec->info.is_long = 0; |
245 | spec->info.is_char = 0; |
246 | spec->info.user = 0; |
247 | |
248 | if (__builtin_expect (__printf_modifier_table == NULL, 1) |
249 | || __printf_modifier_table[*format] == NULL |
250 | || HANDLE_REGISTERED_MODIFIER (&format, &spec->info) != 0) |
251 | switch (*format++) |
252 | { |
253 | case L_('h'): |
254 | /* ints are short ints or chars. */ |
255 | if (*format != L_('h')) |
256 | spec->info.is_short = 1; |
257 | else |
258 | { |
259 | ++format; |
260 | spec->info.is_char = 1; |
261 | } |
262 | break; |
263 | case L_('l'): |
264 | /* ints are long ints. */ |
265 | spec->info.is_long = 1; |
266 | if (*format != L_('l')) |
267 | break; |
268 | ++format; |
269 | /* FALLTHROUGH */ |
270 | case L_('L'): |
271 | /* doubles are long doubles, and ints are long long ints. */ |
272 | case L_('q'): |
273 | /* 4.4 uses this for long long. */ |
274 | spec->info.is_long_double = 1; |
275 | break; |
276 | case L_('z'): |
277 | case L_('Z'): |
278 | /* ints are size_ts. */ |
279 | assert (sizeof (size_t) <= sizeof (unsigned long long int)); |
280 | #if LONG_MAX != LONG_LONG_MAX |
281 | spec->info.is_long_double = (sizeof (size_t) |
282 | > sizeof (unsigned long int)); |
283 | #endif |
284 | spec->info.is_long = sizeof (size_t) > sizeof (unsigned int); |
285 | break; |
286 | case L_('t'): |
287 | assert (sizeof (ptrdiff_t) <= sizeof (long long int)); |
288 | #if LONG_MAX != LONG_LONG_MAX |
289 | spec->info.is_long_double = (sizeof (ptrdiff_t) > sizeof (long int)); |
290 | #endif |
291 | spec->info.is_long = sizeof (ptrdiff_t) > sizeof (int); |
292 | break; |
293 | case L_('j'): |
294 | assert (sizeof (uintmax_t) <= sizeof (unsigned long long int)); |
295 | #if LONG_MAX != LONG_LONG_MAX |
296 | spec->info.is_long_double = (sizeof (uintmax_t) |
297 | > sizeof (unsigned long int)); |
298 | #endif |
299 | spec->info.is_long = sizeof (uintmax_t) > sizeof (unsigned int); |
300 | break; |
301 | default: |
302 | /* Not a recognized modifier. Backup. */ |
303 | --format; |
304 | break; |
305 | } |
306 | |
307 | /* Get the format specification. */ |
308 | spec->info.spec = (wchar_t) *format++; |
309 | spec->size = -1; |
310 | if (__builtin_expect (__printf_function_table == NULL, 1) |
311 | || spec->info.spec > UCHAR_MAX |
312 | || __printf_arginfo_table[spec->info.spec] == NULL |
313 | /* We don't try to get the types for all arguments if the format |
314 | uses more than one. The normal case is covered though. If |
315 | the call returns -1 we continue with the normal specifiers. */ |
316 | || (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec]) |
317 | (&spec->info, 1, &spec->data_arg_type, |
318 | &spec->size)) < 0) |
319 | { |
320 | /* Find the data argument types of a built-in spec. */ |
321 | spec->ndata_args = 1; |
322 | |
323 | switch (spec->info.spec) |
324 | { |
325 | case L'i': |
326 | case L'd': |
327 | case L'u': |
328 | case L'o': |
329 | case L'X': |
330 | case L'x': |
331 | case L'B': |
332 | case L'b': |
333 | #if LONG_MAX != LONG_LONG_MAX |
334 | if (spec->info.is_long_double) |
335 | spec->data_arg_type = PA_INT|PA_FLAG_LONG_LONG; |
336 | else |
337 | #endif |
338 | if (spec->info.is_long) |
339 | spec->data_arg_type = PA_INT|PA_FLAG_LONG; |
340 | else if (spec->info.is_short) |
341 | spec->data_arg_type = PA_INT|PA_FLAG_SHORT; |
342 | else if (spec->info.is_char) |
343 | spec->data_arg_type = PA_CHAR; |
344 | else |
345 | spec->data_arg_type = PA_INT; |
346 | break; |
347 | case L'e': |
348 | case L'E': |
349 | case L'f': |
350 | case L'F': |
351 | case L'g': |
352 | case L'G': |
353 | case L'a': |
354 | case L'A': |
355 | if (spec->info.is_long_double) |
356 | spec->data_arg_type = PA_DOUBLE|PA_FLAG_LONG_DOUBLE; |
357 | else |
358 | spec->data_arg_type = PA_DOUBLE; |
359 | break; |
360 | case L'c': |
361 | spec->data_arg_type = PA_CHAR; |
362 | break; |
363 | case L'C': |
364 | spec->data_arg_type = PA_WCHAR; |
365 | break; |
366 | case L's': |
367 | spec->data_arg_type = PA_STRING; |
368 | break; |
369 | case L'S': |
370 | spec->data_arg_type = PA_WSTRING; |
371 | break; |
372 | case L'p': |
373 | spec->data_arg_type = PA_POINTER; |
374 | break; |
375 | case L'n': |
376 | spec->data_arg_type = PA_INT|PA_FLAG_PTR; |
377 | break; |
378 | |
379 | case L'm': |
380 | default: |
381 | /* An unknown spec will consume no args. */ |
382 | spec->ndata_args = 0; |
383 | break; |
384 | } |
385 | } |
386 | |
387 | if (spec->data_arg == -1 && spec->ndata_args > 0) |
388 | { |
389 | /* There are args consumed, but no positional spec. Use the |
390 | next sequential arg position. */ |
391 | spec->data_arg = posn; |
392 | nargs += spec->ndata_args; |
393 | } |
394 | |
395 | if (spec->info.spec == L'\0') |
396 | /* Format ended before this spec was complete. */ |
397 | spec->end_of_fmt = spec->next_fmt = format - 1; |
398 | else |
399 | { |
400 | /* Find the next format spec. */ |
401 | spec->end_of_fmt = format; |
402 | #ifdef COMPILE_WPRINTF |
403 | spec->next_fmt = __find_specwc (format); |
404 | #else |
405 | spec->next_fmt = __find_specmb (format); |
406 | #endif |
407 | } |
408 | |
409 | return nargs; |
410 | } |
411 | |