1/* Internal functions for the *scanf* implementation.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <assert.h>
20#include <errno.h>
21#include <limits.h>
22#include <ctype.h>
23#include <stdarg.h>
24#include <stdbool.h>
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <wchar.h>
30#include <wctype.h>
31#include <libc-diag.h>
32#include <libc-lock.h>
33#include <locale/localeinfo.h>
34#include <scratch_buffer.h>
35
36#ifdef __GNUC__
37# define HAVE_LONGLONG
38# define LONGLONG long long
39#else
40# define LONGLONG long
41#endif
42
43/* Determine whether we have to handle `long long' at all. */
44#if LONG_MAX == LONG_LONG_MAX
45# define need_longlong 0
46#else
47# define need_longlong 1
48#endif
49
50/* Determine whether we have to handle `long'. */
51#if INT_MAX == LONG_MAX
52# define need_long 0
53#else
54# define need_long 1
55#endif
56
57/* Those are flags in the conversion format. */
58#define LONG 0x0001 /* l: long or double */
59#define LONGDBL 0x0002 /* L: long long or long double */
60#define SHORT 0x0004 /* h: short */
61#define SUPPRESS 0x0008 /* *: suppress assignment */
62#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
63#define NOSKIP 0x0020 /* do not skip blanks */
64#define NUMBER_SIGNED 0x0040 /* signed integer */
65#define GROUP 0x0080 /* ': group numbers */
66#define GNU_MALLOC 0x0100 /* a: malloc strings */
67#define CHAR 0x0200 /* hh: char */
68#define I18N 0x0400 /* I: use locale's digits */
69#define HEXA_FLOAT 0x0800 /* hexadecimal float */
70#define READ_POINTER 0x1000 /* this is a pointer value */
71#define POSIX_MALLOC 0x2000 /* m: malloc strings */
72#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
73
74#include <locale/localeinfo.h>
75#include <libioP.h>
76
77#ifdef COMPILE_WSCANF
78# define ungetc(c, s) ((void) (c == WEOF \
79 || (--read_in, \
80 _IO_sputbackwc (s, c))))
81# define ungetc_not_eof(c, s) ((void) (--read_in, \
82 _IO_sputbackwc (s, c)))
83# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
84 : ((c = _IO_getwc_unlocked (s)), \
85 (void) (c != WEOF \
86 ? ++read_in \
87 : (size_t) (inchar_errno = errno)), c))
88
89# define ISSPACE(Ch) iswspace (Ch)
90# define ISDIGIT(Ch) iswdigit (Ch)
91# define ISXDIGIT(Ch) iswxdigit (Ch)
92# define TOLOWER(Ch) towlower (Ch)
93# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94# define __strtoll_internal __wcstoll_internal
95# define __strtoull_internal __wcstoull_internal
96# define __strtol_internal __wcstol_internal
97# define __strtoul_internal __wcstoul_internal
98# define __strtold_internal __wcstold_internal
99# define __strtod_internal __wcstod_internal
100# define __strtof_internal __wcstof_internal
101# if __HAVE_FLOAT128_UNLIKE_LDBL
102# define __strtof128_internal __wcstof128_internal
103# endif
104
105# define L_(Str) L##Str
106# define CHAR_T wchar_t
107# define UCHAR_T unsigned int
108# define WINT_T wint_t
109# undef EOF
110# define EOF WEOF
111#else
112# define ungetc(c, s) ((void) ((int) c == EOF \
113 || (--read_in, \
114 _IO_sputbackc (s, (unsigned char) c))))
115# define ungetc_not_eof(c, s) ((void) (--read_in, \
116 _IO_sputbackc (s, (unsigned char) c)))
117# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
118 : ((c = _IO_getc_unlocked (s)), \
119 (void) (c != EOF \
120 ? ++read_in \
121 : (size_t) (inchar_errno = errno)), c))
122# define ISSPACE(Ch) __isspace_l (Ch, loc)
123# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
124# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
125# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
126# define ORIENT if (_IO_vtable_offset (s) == 0 \
127 && _IO_fwide (s, -1) != -1) \
128 return EOF
129
130# define L_(Str) Str
131# define CHAR_T char
132# define UCHAR_T unsigned char
133# define WINT_T int
134#endif
135
136#include "printf-parse.h" /* Use read_int. */
137
138#define encode_error() do { \
139 __set_errno (EILSEQ); \
140 goto errout; \
141 } while (0)
142#define conv_error() do { \
143 goto errout; \
144 } while (0)
145#define input_error() do { \
146 if (done == 0) done = EOF; \
147 goto errout; \
148 } while (0)
149#define add_ptr_to_free(ptr) \
150 do \
151 { \
152 if (ptrs_to_free == NULL \
153 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
154 / sizeof (ptrs_to_free->ptrs[0]))) \
155 { \
156 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
157 new_ptrs->count = 0; \
158 new_ptrs->next = ptrs_to_free; \
159 ptrs_to_free = new_ptrs; \
160 } \
161 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
162 } \
163 while (0)
164#define ARGCHECK(s, format) \
165 do \
166 { \
167 /* Check file argument for consistence. */ \
168 CHECK_FILE (s, EOF); \
169 if (s->_flags & _IO_NO_READS) \
170 { \
171 __set_errno (EBADF); \
172 return EOF; \
173 } \
174 else if (format == NULL) \
175 { \
176 __set_errno (EINVAL); \
177 return EOF; \
178 } \
179 } while (0)
180#define LOCK_STREAM(S) \
181 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
182 _IO_flockfile (S)
183#define UNLOCK_STREAM(S) \
184 _IO_funlockfile (S); \
185 __libc_cleanup_region_end (0)
186
187struct ptrs_to_free
188{
189 size_t count;
190 struct ptrs_to_free *next;
191 char **ptrs[32];
192};
193
194struct char_buffer {
195 CHAR_T *current;
196 CHAR_T *end;
197 struct scratch_buffer scratch;
198};
199
200/* Returns a pointer to the first CHAR_T object in the buffer. Only
201 valid if char_buffer_add (BUFFER, CH) has been called and
202 char_buffer_error (BUFFER) is false. */
203static inline CHAR_T *
204char_buffer_start (const struct char_buffer *buffer)
205{
206 return (CHAR_T *) buffer->scratch.data;
207}
208
209/* Returns the number of CHAR_T objects in the buffer. Only valid if
210 char_buffer_error (BUFFER) is false. */
211static inline size_t
212char_buffer_size (const struct char_buffer *buffer)
213{
214 return buffer->current - char_buffer_start (buffer);
215}
216
217/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
218 scratch buffer. */
219static inline void
220char_buffer_rewind (struct char_buffer *buffer)
221{
222 buffer->current = char_buffer_start (buffer);
223 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
224}
225
226/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
227 failed. */
228static inline bool
229char_buffer_error (const struct char_buffer *buffer)
230{
231 return __glibc_unlikely (buffer->current == NULL);
232}
233
234/* Slow path for char_buffer_add. */
235static void
236char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
237{
238 if (char_buffer_error (buffer))
239 return;
240 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
241 if (!scratch_buffer_grow_preserve (&buffer->scratch))
242 {
243 buffer->current = NULL;
244 buffer->end = NULL;
245 return;
246 }
247 char_buffer_rewind (buffer);
248 buffer->current += offset;
249 *buffer->current++ = ch;
250}
251
252/* Adds CH to BUFFER. This function does not report any errors, check
253 for them with char_buffer_error. */
254static inline void
255char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
256 __attribute__ ((always_inline));
257static inline void
258char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
259{
260 if (__glibc_unlikely (buffer->current == buffer->end))
261 char_buffer_add_slow (buffer, ch);
262 else
263 *buffer->current++ = ch;
264}
265
266/* Read formatted input from S according to the format string
267 FORMAT, using the argument list in ARG.
268 Return the number of assignments made, or -1 for an input error. */
269#ifdef COMPILE_WSCANF
270int
271__vfwscanf_internal (FILE *s, const wchar_t *format, va_list argptr,
272 unsigned int mode_flags)
273#else
274int
275__vfscanf_internal (FILE *s, const char *format, va_list argptr,
276 unsigned int mode_flags)
277#endif
278{
279 va_list arg;
280 const UCHAR_T *f = (const UCHAR_T *) format;
281 UCHAR_T fc; /* Current character of the format. */
282 WINT_T done = 0; /* Assignments done. */
283 size_t read_in = 0; /* Chars read in. */
284 WINT_T c = 0; /* Last char read. */
285 int width; /* Maximum field width. */
286 int flags; /* Modifiers for current format element. */
287#ifndef COMPILE_WSCANF
288 locale_t loc = _NL_CURRENT_LOCALE;
289 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
290#endif
291
292 /* Errno of last failed inchar call. */
293 int inchar_errno = 0;
294 /* Status for reading F-P nums. */
295 char got_digit, got_dot, got_e, got_sign;
296 /* If a [...] is a [^...]. */
297 CHAR_T not_in;
298#define exp_char not_in
299 /* Base for integral numbers. */
300 int base;
301 /* Decimal point character. */
302#ifdef COMPILE_WSCANF
303 wint_t decimal;
304#else
305 const char *decimal;
306#endif
307 /* The thousands character of the current locale. */
308#ifdef COMPILE_WSCANF
309 wint_t thousands;
310#else
311 const char *thousands;
312#endif
313 struct ptrs_to_free *ptrs_to_free = NULL;
314 /* State for the conversions. */
315 mbstate_t state;
316 /* Integral holding variables. */
317 union
318 {
319 long long int q;
320 unsigned long long int uq;
321 long int l;
322 unsigned long int ul;
323 } num;
324 /* Character-buffer pointer. */
325 char *str = NULL;
326 wchar_t *wstr = NULL;
327 char **strptr = NULL;
328 ssize_t strsize = 0;
329 /* We must not react on white spaces immediately because they can
330 possibly be matched even if in the input stream no character is
331 available anymore. */
332 int skip_space = 0;
333 /* Workspace. */
334 CHAR_T *tw; /* Temporary pointer. */
335 struct char_buffer charbuf;
336 scratch_buffer_init (&charbuf.scratch);
337
338#ifdef __va_copy
339 __va_copy (arg, argptr);
340#else
341 arg = (va_list) argptr;
342#endif
343
344#ifdef ORIENT
345 ORIENT;
346#endif
347
348 ARGCHECK (s, format);
349
350 {
351#ifndef COMPILE_WSCANF
352 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
353#endif
354
355 /* Figure out the decimal point character. */
356#ifdef COMPILE_WSCANF
357 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
358#else
359 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
360#endif
361 /* Figure out the thousands separator character. */
362#ifdef COMPILE_WSCANF
363 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
364#else
365 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
366 if (*thousands == '\0')
367 thousands = NULL;
368#endif
369 }
370
371 /* Lock the stream. */
372 LOCK_STREAM (s);
373
374
375#ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378#endif
379
380 /* Run through the format string. */
381 while (*f != '\0')
382 {
383 unsigned int argpos;
384 /* Extract the next argument, which is of type TYPE.
385 For a %N$... spec, this is the Nth argument from the beginning;
386 otherwise it is the next argument after the state now in ARG. */
387#ifdef __va_copy
388# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
389 : ({ unsigned int pos = argpos; \
390 va_list arg; \
391 __va_copy (arg, argptr); \
392 while (--pos > 0) \
393 (void) va_arg (arg, void *); \
394 va_arg (arg, type); \
395 }))
396#else
397# if 0
398 /* XXX Possible optimization. */
399# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
400 : ({ va_list arg = (va_list) argptr; \
401 arg = (va_list) ((char *) arg \
402 + (argpos - 1) \
403 * __va_rounded_size (void *)); \
404 va_arg (arg, type); \
405 }))
406# else
407# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
408 : ({ unsigned int pos = argpos; \
409 va_list arg = (va_list) argptr; \
410 while (--pos > 0) \
411 (void) va_arg (arg, void *); \
412 va_arg (arg, type); \
413 }))
414# endif
415#endif
416
417#ifndef COMPILE_WSCANF
418 if (!isascii (*f))
419 {
420 /* Non-ASCII, may be a multibyte. */
421 int len = __mbrlen ((const char *) f, strlen ((const char *) f),
422 &state);
423 if (len > 0)
424 {
425 do
426 {
427 c = inchar ();
428 if (__glibc_unlikely (c == EOF))
429 input_error ();
430 else if (c != *f++)
431 {
432 ungetc_not_eof (c, s);
433 conv_error ();
434 }
435 }
436 while (--len > 0);
437 continue;
438 }
439 }
440#endif
441
442 fc = *f++;
443 if (fc != '%')
444 {
445 /* Remember to skip spaces. */
446 if (ISSPACE (fc))
447 {
448 skip_space = 1;
449 continue;
450 }
451
452 /* Read a character. */
453 c = inchar ();
454
455 /* Characters other than format specs must just match. */
456 if (__glibc_unlikely (c == EOF))
457 input_error ();
458
459 /* We saw white space char as the last character in the format
460 string. Now it's time to skip all leading white space. */
461 if (skip_space)
462 {
463 while (ISSPACE (c))
464 if (__glibc_unlikely (inchar () == EOF))
465 input_error ();
466 skip_space = 0;
467 }
468
469 if (__glibc_unlikely (c != fc))
470 {
471 ungetc (c, s);
472 conv_error ();
473 }
474
475 continue;
476 }
477
478 /* This is the start of the conversion string. */
479 flags = 0;
480
481 /* Initialize state of modifiers. */
482 argpos = 0;
483
484 /* Prepare temporary buffer. */
485 char_buffer_rewind (&charbuf);
486
487 /* Check for a positional parameter specification. */
488 if (ISDIGIT (*f))
489 {
490 argpos = read_int (&f);
491 if (*f == L_('$'))
492 ++f;
493 else
494 {
495 /* Oops; that was actually the field width. */
496 width = argpos;
497 argpos = 0;
498 goto got_width;
499 }
500 }
501
502 /* Check for the assignment-suppressing, the number grouping flag,
503 and the signal to use the locale's digit representation. */
504 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
505 switch (*f++)
506 {
507 case L_('*'):
508 flags |= SUPPRESS;
509 break;
510 case L_('\''):
511#ifdef COMPILE_WSCANF
512 if (thousands != L'\0')
513#else
514 if (thousands != NULL)
515#endif
516 flags |= GROUP;
517 break;
518 case L_('I'):
519 flags |= I18N;
520 break;
521 }
522
523 /* Find the maximum field width. */
524 width = 0;
525 if (ISDIGIT (*f))
526 width = read_int (&f);
527 got_width:
528 if (width == 0)
529 width = -1;
530
531 /* Check for type modifiers. */
532 switch (*f++)
533 {
534 case L_('h'):
535 /* ints are short ints or chars. */
536 if (*f == L_('h'))
537 {
538 ++f;
539 flags |= CHAR;
540 }
541 else
542 flags |= SHORT;
543 break;
544 case L_('l'):
545 if (*f == L_('l'))
546 {
547 /* A double `l' is equivalent to an `L'. */
548 ++f;
549 flags |= LONGDBL | LONG;
550 }
551 else
552 /* ints are long ints. */
553 flags |= LONG;
554 break;
555 case L_('q'):
556 case L_('L'):
557 /* doubles are long doubles, and ints are long long ints. */
558 flags |= LONGDBL | LONG;
559 break;
560 case L_('a'):
561 /* The `a' is used as a flag only if followed by `s', `S' or
562 `['. */
563 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
564 {
565 --f;
566 break;
567 }
568 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
569 supported at all. */
570 if (__glibc_likely ((mode_flags & SCANF_ISOC99_A) != 0))
571 {
572 --f;
573 break;
574 }
575 /* String conversions (%s, %[) take a `char **'
576 arg and fill it in with a malloc'd pointer. */
577 flags |= GNU_MALLOC;
578 break;
579 case L_('m'):
580 flags |= POSIX_MALLOC;
581 if (*f == L_('l'))
582 {
583 ++f;
584 flags |= LONG;
585 }
586 break;
587 case L_('z'):
588 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
589 flags |= LONGDBL;
590 else if (sizeof (size_t) > sizeof (unsigned int))
591 flags |= LONG;
592 break;
593 case L_('j'):
594 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
595 flags |= LONGDBL;
596 else if (sizeof (uintmax_t) > sizeof (unsigned int))
597 flags |= LONG;
598 break;
599 case L_('t'):
600 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
601 flags |= LONGDBL;
602 else if (sizeof (ptrdiff_t) > sizeof (int))
603 flags |= LONG;
604 break;
605 default:
606 /* Not a recognized modifier. Backup. */
607 --f;
608 break;
609 }
610
611 /* End of the format string? */
612 if (__glibc_unlikely (*f == L_('\0')))
613 conv_error ();
614
615 /* Find the conversion specifier. */
616 fc = *f++;
617 if (skip_space || (fc != L_('[') && fc != L_('c')
618 && fc != L_('C') && fc != L_('n')))
619 {
620 /* Eat whitespace. */
621 int save_errno = errno;
622 __set_errno (0);
623 do
624 /* We add the additional test for EOF here since otherwise
625 inchar will restore the old errno value which might be
626 EINTR but does not indicate an interrupt since nothing
627 was read at this time. */
628 if (__builtin_expect ((c == EOF || inchar () == EOF)
629 && errno == EINTR, 0))
630 input_error ();
631 while (ISSPACE (c));
632 __set_errno (save_errno);
633 ungetc (c, s);
634 skip_space = 0;
635 }
636
637 switch (fc)
638 {
639 case L_('%'): /* Must match a literal '%'. */
640 c = inchar ();
641 if (__glibc_unlikely (c == EOF))
642 input_error ();
643 if (__glibc_unlikely (c != fc))
644 {
645 ungetc_not_eof (c, s);
646 conv_error ();
647 }
648 break;
649
650 case L_('n'): /* Answer number of assignments done. */
651 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
652 with the 'n' conversion specifier. */
653 if (!(flags & SUPPRESS))
654 {
655 /* Don't count the read-ahead. */
656 if (need_longlong && (flags & LONGDBL))
657 *ARG (long long int *) = read_in;
658 else if (need_long && (flags & LONG))
659 *ARG (long int *) = read_in;
660 else if (flags & SHORT)
661 *ARG (short int *) = read_in;
662 else if (!(flags & CHAR))
663 *ARG (int *) = read_in;
664 else
665 *ARG (char *) = read_in;
666
667#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
668 /* We have a severe problem here. The ISO C standard
669 contradicts itself in explaining the effect of the %n
670 format in `scanf'. While in ISO C:1990 and the ISO C
671 Amendment 1:1995 the result is described as
672
673 Execution of a %n directive does not effect the
674 assignment count returned at the completion of
675 execution of the f(w)scanf function.
676
677 in ISO C Corrigendum 1:1994 the following was added:
678
679 Subclause 7.9.6.2
680 Add the following fourth example:
681 In:
682 #include <stdio.h>
683 int d1, d2, n1, n2, i;
684 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
685 the value 123 is assigned to d1 and the value3 to n1.
686 Because %n can never get an input failure the value
687 of 3 is also assigned to n2. The value of d2 is not
688 affected. The value 3 is assigned to i.
689
690 We go for now with the historically correct code from ISO C,
691 i.e., we don't count the %n assignments. When it ever
692 should proof to be wrong just remove the #ifdef above. */
693 ++done;
694#endif
695 }
696 break;
697
698 case L_('c'): /* Match characters. */
699 if ((flags & LONG) == 0)
700 {
701 if (width == -1)
702 width = 1;
703
704#define STRING_ARG(Str, Type, Width) \
705 do if (!(flags & SUPPRESS)) \
706 { \
707 if (flags & MALLOC) \
708 { \
709 /* The string is to be stored in a malloc'd buffer. */ \
710 /* For %mS using char ** is actually wrong, but \
711 shouldn't make a difference on any arch glibc \
712 supports and would unnecessarily complicate \
713 things. */ \
714 strptr = ARG (char **); \
715 if (strptr == NULL) \
716 conv_error (); \
717 /* Allocate an initial buffer. */ \
718 strsize = Width; \
719 *strptr = (char *) malloc (strsize * sizeof (Type)); \
720 Str = (Type *) *strptr; \
721 if (Str != NULL) \
722 add_ptr_to_free (strptr); \
723 else if (flags & POSIX_MALLOC) \
724 { \
725 done = EOF; \
726 goto errout; \
727 } \
728 } \
729 else \
730 Str = ARG (Type *); \
731 if (Str == NULL) \
732 conv_error (); \
733 } while (0)
734#ifdef COMPILE_WSCANF
735 STRING_ARG (str, char, 100);
736#else
737 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
738#endif
739
740 c = inchar ();
741 if (__glibc_unlikely (c == EOF))
742 input_error ();
743
744#ifdef COMPILE_WSCANF
745 /* We have to convert the wide character(s) into multibyte
746 characters and store the result. */
747 memset (&state, '\0', sizeof (state));
748
749 do
750 {
751 size_t n;
752
753 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
754 && *strptr + strsize - str <= MB_LEN_MAX)
755 {
756 /* We have to enlarge the buffer if the `m' flag
757 was given. */
758 size_t strleng = str - *strptr;
759 char *newstr;
760
761 newstr = (char *) realloc (*strptr, strsize * 2);
762 if (newstr == NULL)
763 {
764 /* Can't allocate that much. Last-ditch effort. */
765 newstr = (char *) realloc (*strptr,
766 strleng + MB_LEN_MAX);
767 if (newstr == NULL)
768 {
769 /* c can't have `a' flag, only `m'. */
770 done = EOF;
771 goto errout;
772 }
773 else
774 {
775 *strptr = newstr;
776 str = newstr + strleng;
777 strsize = strleng + MB_LEN_MAX;
778 }
779 }
780 else
781 {
782 *strptr = newstr;
783 str = newstr + strleng;
784 strsize *= 2;
785 }
786 }
787
788 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
789 if (__glibc_unlikely (n == (size_t) -1))
790 /* No valid wide character. */
791 input_error ();
792
793 /* Increment the output pointer. Even if we don't
794 write anything. */
795 str += n;
796 }
797 while (--width > 0 && inchar () != EOF);
798#else
799 if (!(flags & SUPPRESS))
800 {
801 do
802 {
803 if ((flags & MALLOC)
804 && (char *) str == *strptr + strsize)
805 {
806 /* Enlarge the buffer. */
807 size_t newsize
808 = strsize
809 + (strsize >= width ? width - 1 : strsize);
810
811 str = (char *) realloc (*strptr, newsize);
812 if (str == NULL)
813 {
814 /* Can't allocate that much. Last-ditch
815 effort. */
816 str = (char *) realloc (*strptr, strsize + 1);
817 if (str == NULL)
818 {
819 /* c can't have `a' flag, only `m'. */
820 done = EOF;
821 goto errout;
822 }
823 else
824 {
825 *strptr = (char *) str;
826 str += strsize;
827 ++strsize;
828 }
829 }
830 else
831 {
832 *strptr = (char *) str;
833 str += strsize;
834 strsize = newsize;
835 }
836 }
837 *str++ = c;
838 }
839 while (--width > 0 && inchar () != EOF);
840 }
841 else
842 while (--width > 0 && inchar () != EOF);
843#endif
844
845 if (!(flags & SUPPRESS))
846 {
847 if ((flags & MALLOC) && str - *strptr != strsize)
848 {
849 char *cp = (char *) realloc (*strptr, str - *strptr);
850 if (cp != NULL)
851 *strptr = cp;
852 }
853 strptr = NULL;
854 ++done;
855 }
856
857 break;
858 }
859 /* FALLTHROUGH */
860 case L_('C'):
861 if (width == -1)
862 width = 1;
863
864 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
865
866 c = inchar ();
867 if (__glibc_unlikely (c == EOF))
868 input_error ();
869
870#ifdef COMPILE_WSCANF
871 /* Just store the incoming wide characters. */
872 if (!(flags & SUPPRESS))
873 {
874 do
875 {
876 if ((flags & MALLOC)
877 && wstr == (wchar_t *) *strptr + strsize)
878 {
879 size_t newsize
880 = strsize + (strsize > width ? width - 1 : strsize);
881 /* Enlarge the buffer. */
882 wstr = (wchar_t *) realloc (*strptr,
883 newsize * sizeof (wchar_t));
884 if (wstr == NULL)
885 {
886 /* Can't allocate that much. Last-ditch effort. */
887 wstr = (wchar_t *) realloc (*strptr,
888 (strsize + 1)
889 * sizeof (wchar_t));
890 if (wstr == NULL)
891 {
892 /* C or lc can't have `a' flag, only `m'
893 flag. */
894 done = EOF;
895 goto errout;
896 }
897 else
898 {
899 *strptr = (char *) wstr;
900 wstr += strsize;
901 ++strsize;
902 }
903 }
904 else
905 {
906 *strptr = (char *) wstr;
907 wstr += strsize;
908 strsize = newsize;
909 }
910 }
911 *wstr++ = c;
912 }
913 while (--width > 0 && inchar () != EOF);
914 }
915 else
916 while (--width > 0 && inchar () != EOF);
917#else
918 {
919 /* We have to convert the multibyte input sequence to wide
920 characters. */
921 char buf[1];
922 mbstate_t cstate;
923
924 memset (&cstate, '\0', sizeof (cstate));
925
926 do
927 {
928 /* This is what we present the mbrtowc function first. */
929 buf[0] = c;
930
931 if (!(flags & SUPPRESS) && (flags & MALLOC)
932 && wstr == (wchar_t *) *strptr + strsize)
933 {
934 size_t newsize
935 = strsize + (strsize > width ? width - 1 : strsize);
936 /* Enlarge the buffer. */
937 wstr = (wchar_t *) realloc (*strptr,
938 newsize * sizeof (wchar_t));
939 if (wstr == NULL)
940 {
941 /* Can't allocate that much. Last-ditch effort. */
942 wstr = (wchar_t *) realloc (*strptr,
943 ((strsize + 1)
944 * sizeof (wchar_t)));
945 if (wstr == NULL)
946 {
947 /* C or lc can't have `a' flag, only `m' flag. */
948 done = EOF;
949 goto errout;
950 }
951 else
952 {
953 *strptr = (char *) wstr;
954 wstr += strsize;
955 ++strsize;
956 }
957 }
958 else
959 {
960 *strptr = (char *) wstr;
961 wstr += strsize;
962 strsize = newsize;
963 }
964 }
965
966 while (1)
967 {
968 size_t n;
969
970 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
971 buf, 1, &cstate);
972
973 if (n == (size_t) -2)
974 {
975 /* Possibly correct character, just not enough
976 input. */
977 if (__glibc_unlikely (inchar () == EOF))
978 encode_error ();
979
980 buf[0] = c;
981 continue;
982 }
983
984 if (__glibc_unlikely (n != 1))
985 encode_error ();
986
987 /* We have a match. */
988 break;
989 }
990
991 /* Advance the result pointer. */
992 ++wstr;
993 }
994 while (--width > 0 && inchar () != EOF);
995 }
996#endif
997
998 if (!(flags & SUPPRESS))
999 {
1000 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1001 {
1002 wchar_t *cp = (wchar_t *) realloc (*strptr,
1003 ((wstr
1004 - (wchar_t *) *strptr)
1005 * sizeof (wchar_t)));
1006 if (cp != NULL)
1007 *strptr = (char *) cp;
1008 }
1009 strptr = NULL;
1010
1011 ++done;
1012 }
1013
1014 break;
1015
1016 case L_('s'): /* Read a string. */
1017 if (!(flags & LONG))
1018 {
1019 STRING_ARG (str, char, 100);
1020
1021 c = inchar ();
1022 if (__glibc_unlikely (c == EOF))
1023 input_error ();
1024
1025#ifdef COMPILE_WSCANF
1026 memset (&state, '\0', sizeof (state));
1027#endif
1028
1029 do
1030 {
1031 if (ISSPACE (c))
1032 {
1033 ungetc_not_eof (c, s);
1034 break;
1035 }
1036
1037#ifdef COMPILE_WSCANF
1038 /* This is quite complicated. We have to convert the
1039 wide characters into multibyte characters and then
1040 store them. */
1041 {
1042 size_t n;
1043
1044 if (!(flags & SUPPRESS) && (flags & MALLOC)
1045 && *strptr + strsize - str <= MB_LEN_MAX)
1046 {
1047 /* We have to enlarge the buffer if the `a' or `m'
1048 flag was given. */
1049 size_t strleng = str - *strptr;
1050 char *newstr;
1051
1052 newstr = (char *) realloc (*strptr, strsize * 2);
1053 if (newstr == NULL)
1054 {
1055 /* Can't allocate that much. Last-ditch
1056 effort. */
1057 newstr = (char *) realloc (*strptr,
1058 strleng + MB_LEN_MAX);
1059 if (newstr == NULL)
1060 {
1061 if (flags & POSIX_MALLOC)
1062 {
1063 done = EOF;
1064 goto errout;
1065 }
1066 /* We lose. Oh well. Terminate the
1067 string and stop converting,
1068 so at least we don't skip any input. */
1069 ((char *) (*strptr))[strleng] = '\0';
1070 strptr = NULL;
1071 ++done;
1072 conv_error ();
1073 }
1074 else
1075 {
1076 *strptr = newstr;
1077 str = newstr + strleng;
1078 strsize = strleng + MB_LEN_MAX;
1079 }
1080 }
1081 else
1082 {
1083 *strptr = newstr;
1084 str = newstr + strleng;
1085 strsize *= 2;
1086 }
1087 }
1088
1089 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1090 &state);
1091 if (__glibc_unlikely (n == (size_t) -1))
1092 encode_error ();
1093
1094 assert (n <= MB_LEN_MAX);
1095 str += n;
1096 }
1097#else
1098 /* This is easy. */
1099 if (!(flags & SUPPRESS))
1100 {
1101 *str++ = c;
1102 if ((flags & MALLOC)
1103 && (char *) str == *strptr + strsize)
1104 {
1105 /* Enlarge the buffer. */
1106 str = (char *) realloc (*strptr, 2 * strsize);
1107 if (str == NULL)
1108 {
1109 /* Can't allocate that much. Last-ditch
1110 effort. */
1111 str = (char *) realloc (*strptr, strsize + 1);
1112 if (str == NULL)
1113 {
1114 if (flags & POSIX_MALLOC)
1115 {
1116 done = EOF;
1117 goto errout;
1118 }
1119 /* We lose. Oh well. Terminate the
1120 string and stop converting,
1121 so at least we don't skip any input. */
1122 ((char *) (*strptr))[strsize - 1] = '\0';
1123 strptr = NULL;
1124 ++done;
1125 conv_error ();
1126 }
1127 else
1128 {
1129 *strptr = (char *) str;
1130 str += strsize;
1131 ++strsize;
1132 }
1133 }
1134 else
1135 {
1136 *strptr = (char *) str;
1137 str += strsize;
1138 strsize *= 2;
1139 }
1140 }
1141 }
1142#endif
1143 }
1144 while ((width <= 0 || --width > 0) && inchar () != EOF);
1145
1146 if (!(flags & SUPPRESS))
1147 {
1148#ifdef COMPILE_WSCANF
1149 /* We have to emit the code to get into the initial
1150 state. */
1151 char buf[MB_LEN_MAX];
1152 size_t n = __wcrtomb (buf, L'\0', &state);
1153 if (n > 0 && (flags & MALLOC)
1154 && str + n >= *strptr + strsize)
1155 {
1156 /* Enlarge the buffer. */
1157 size_t strleng = str - *strptr;
1158 char *newstr;
1159
1160 newstr = (char *) realloc (*strptr, strleng + n + 1);
1161 if (newstr == NULL)
1162 {
1163 if (flags & POSIX_MALLOC)
1164 {
1165 done = EOF;
1166 goto errout;
1167 }
1168 /* We lose. Oh well. Terminate the string
1169 and stop converting, so at least we don't
1170 skip any input. */
1171 ((char *) (*strptr))[strleng] = '\0';
1172 strptr = NULL;
1173 ++done;
1174 conv_error ();
1175 }
1176 else
1177 {
1178 *strptr = newstr;
1179 str = newstr + strleng;
1180 strsize = strleng + n + 1;
1181 }
1182 }
1183
1184 str = __mempcpy (str, buf, n);
1185#endif
1186 *str++ = '\0';
1187
1188 if ((flags & MALLOC) && str - *strptr != strsize)
1189 {
1190 char *cp = (char *) realloc (*strptr, str - *strptr);
1191 if (cp != NULL)
1192 *strptr = cp;
1193 }
1194 strptr = NULL;
1195
1196 ++done;
1197 }
1198 break;
1199 }
1200 /* FALLTHROUGH */
1201
1202 case L_('S'):
1203 {
1204#ifndef COMPILE_WSCANF
1205 mbstate_t cstate;
1206#endif
1207
1208 /* Wide character string. */
1209 STRING_ARG (wstr, wchar_t, 100);
1210
1211 c = inchar ();
1212 if (__builtin_expect (c == EOF, 0))
1213 input_error ();
1214
1215#ifndef COMPILE_WSCANF
1216 memset (&cstate, '\0', sizeof (cstate));
1217#endif
1218
1219 do
1220 {
1221 if (ISSPACE (c))
1222 {
1223 ungetc_not_eof (c, s);
1224 break;
1225 }
1226
1227#ifdef COMPILE_WSCANF
1228 /* This is easy. */
1229 if (!(flags & SUPPRESS))
1230 {
1231 *wstr++ = c;
1232 if ((flags & MALLOC)
1233 && wstr == (wchar_t *) *strptr + strsize)
1234 {
1235 /* Enlarge the buffer. */
1236 wstr = (wchar_t *) realloc (*strptr,
1237 (2 * strsize)
1238 * sizeof (wchar_t));
1239 if (wstr == NULL)
1240 {
1241 /* Can't allocate that much. Last-ditch
1242 effort. */
1243 wstr = (wchar_t *) realloc (*strptr,
1244 (strsize + 1)
1245 * sizeof (wchar_t));
1246 if (wstr == NULL)
1247 {
1248 if (flags & POSIX_MALLOC)
1249 {
1250 done = EOF;
1251 goto errout;
1252 }
1253 /* We lose. Oh well. Terminate the string
1254 and stop converting, so at least we don't
1255 skip any input. */
1256 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1257 strptr = NULL;
1258 ++done;
1259 conv_error ();
1260 }
1261 else
1262 {
1263 *strptr = (char *) wstr;
1264 wstr += strsize;
1265 ++strsize;
1266 }
1267 }
1268 else
1269 {
1270 *strptr = (char *) wstr;
1271 wstr += strsize;
1272 strsize *= 2;
1273 }
1274 }
1275 }
1276#else
1277 {
1278 char buf[1];
1279
1280 buf[0] = c;
1281
1282 while (1)
1283 {
1284 size_t n;
1285
1286 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1287 buf, 1, &cstate);
1288
1289 if (n == (size_t) -2)
1290 {
1291 /* Possibly correct character, just not enough
1292 input. */
1293 if (__glibc_unlikely (inchar () == EOF))
1294 encode_error ();
1295
1296 buf[0] = c;
1297 continue;
1298 }
1299
1300 if (__glibc_unlikely (n != 1))
1301 encode_error ();
1302
1303 /* We have a match. */
1304 ++wstr;
1305 break;
1306 }
1307
1308 if (!(flags & SUPPRESS) && (flags & MALLOC)
1309 && wstr == (wchar_t *) *strptr + strsize)
1310 {
1311 /* Enlarge the buffer. */
1312 wstr = (wchar_t *) realloc (*strptr,
1313 (2 * strsize
1314 * sizeof (wchar_t)));
1315 if (wstr == NULL)
1316 {
1317 /* Can't allocate that much. Last-ditch effort. */
1318 wstr = (wchar_t *) realloc (*strptr,
1319 ((strsize + 1)
1320 * sizeof (wchar_t)));
1321 if (wstr == NULL)
1322 {
1323 if (flags & POSIX_MALLOC)
1324 {
1325 done = EOF;
1326 goto errout;
1327 }
1328 /* We lose. Oh well. Terminate the
1329 string and stop converting, so at
1330 least we don't skip any input. */
1331 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1332 strptr = NULL;
1333 ++done;
1334 conv_error ();
1335 }
1336 else
1337 {
1338 *strptr = (char *) wstr;
1339 wstr += strsize;
1340 ++strsize;
1341 }
1342 }
1343 else
1344 {
1345 *strptr = (char *) wstr;
1346 wstr += strsize;
1347 strsize *= 2;
1348 }
1349 }
1350 }
1351#endif
1352 }
1353 while ((width <= 0 || --width > 0) && inchar () != EOF);
1354
1355 if (!(flags & SUPPRESS))
1356 {
1357 *wstr++ = L'\0';
1358
1359 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1360 {
1361 wchar_t *cp = (wchar_t *) realloc (*strptr,
1362 ((wstr
1363 - (wchar_t *) *strptr)
1364 * sizeof (wchar_t)));
1365 if (cp != NULL)
1366 *strptr = (char *) cp;
1367 }
1368 strptr = NULL;
1369
1370 ++done;
1371 }
1372 }
1373 break;
1374
1375 case L_('x'): /* Hexadecimal integer. */
1376 case L_('X'): /* Ditto. */
1377 base = 16;
1378 goto number;
1379
1380 case L_('o'): /* Octal integer. */
1381 base = 8;
1382 goto number;
1383
1384 case L_('b'): /* Binary integer. */
1385 base = 2;
1386 goto number;
1387
1388 case L_('u'): /* Unsigned decimal integer. */
1389 base = 10;
1390 goto number;
1391
1392 case L_('d'): /* Signed decimal integer. */
1393 base = 10;
1394 flags |= NUMBER_SIGNED;
1395 goto number;
1396
1397 case L_('i'): /* Generic number. */
1398 base = 0;
1399 flags |= NUMBER_SIGNED;
1400
1401 number:
1402 c = inchar ();
1403 if (__glibc_unlikely (c == EOF))
1404 input_error ();
1405
1406 /* Check for a sign. */
1407 if (c == L_('-') || c == L_('+'))
1408 {
1409 char_buffer_add (&charbuf, c);
1410 if (width > 0)
1411 --width;
1412 c = inchar ();
1413 }
1414
1415 /* Look for a leading indication of base. */
1416 if (width != 0 && c == L_('0'))
1417 {
1418 if (width > 0)
1419 --width;
1420
1421 char_buffer_add (&charbuf, c);
1422 c = inchar ();
1423
1424 if (width != 0 && TOLOWER (c) == L_('x'))
1425 {
1426 if (base == 0)
1427 base = 16;
1428 if (base == 16)
1429 {
1430 if (width > 0)
1431 --width;
1432 c = inchar ();
1433 }
1434 }
1435 else if (width != 0
1436 && TOLOWER (c) == L_('b')
1437 && (base == 2
1438 || ((mode_flags & SCANF_ISOC23_BIN_CST) != 0
1439 && base == 0)))
1440 {
1441 base = 2;
1442 if (width > 0)
1443 --width;
1444 c = inchar ();
1445 }
1446 else if (base == 0)
1447 base = 8;
1448 }
1449
1450 if (base == 0)
1451 base = 10;
1452
1453 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1454 {
1455 int from_level;
1456 int to_level;
1457 int level;
1458 enum { num_digits_len = 10 };
1459#ifdef COMPILE_WSCANF
1460 const wchar_t *wcdigits[num_digits_len];
1461#else
1462 const char *mbdigits[num_digits_len];
1463#endif
1464 CHAR_T *digits_extended[num_digits_len] = { NULL };
1465
1466 /* "to_inpunct" is a map from ASCII digits to their
1467 equivalent in locale. This is defined for locales
1468 which use an extra digits set. */
1469 wctrans_t map = __wctrans ("to_inpunct");
1470 int n;
1471
1472 from_level = 0;
1473#ifdef COMPILE_WSCANF
1474 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1475 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1476#else
1477 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1478#endif
1479
1480 /* Get the alternative digit forms if there are any. */
1481 if (__glibc_unlikely (map != NULL))
1482 {
1483 /* Adding new level for extra digits set in locale file. */
1484 ++to_level;
1485
1486 for (n = 0; n < num_digits_len; ++n)
1487 {
1488#ifdef COMPILE_WSCANF
1489 wcdigits[n] = (const wchar_t *)
1490 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1491
1492 wchar_t *wc_extended = (wchar_t *)
1493 malloc ((to_level + 2) * sizeof (wchar_t));
1494 if (wc_extended == NULL)
1495 {
1496 done = EOF;
1497 goto digits_extended_fail;
1498 }
1499 __wmemcpy (wc_extended, wcdigits[n], to_level);
1500 wc_extended[to_level] = __towctrans (L'0' + n, map);
1501 wc_extended[to_level + 1] = '\0';
1502 digits_extended[n] = wc_extended;
1503#else
1504 mbdigits[n]
1505 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1506
1507 /* Get the equivalent wide char in map. */
1508 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1509
1510 /* Convert it to multibyte representation. */
1511 mbstate_t state;
1512 memset (&state, '\0', sizeof (state));
1513
1514 char extra_mbdigit[MB_LEN_MAX];
1515 size_t mblen
1516 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1517
1518 if (mblen == (size_t) -1)
1519 {
1520 /* Ignore this new level. */
1521 map = NULL;
1522 break;
1523 }
1524
1525 /* Calculate the length of mbdigits[n]. */
1526 const char *last_char = mbdigits[n];
1527 for (level = 0; level < to_level; ++level)
1528 last_char = strchr (last_char, '\0') + 1;
1529
1530 size_t mbdigits_len = last_char - mbdigits[n];
1531
1532 /* Allocate memory for extended multibyte digit. */
1533 char *mb_extended = malloc (mbdigits_len + mblen + 1);
1534 if (mb_extended == NULL)
1535 {
1536 done = EOF;
1537 goto digits_extended_fail;
1538 }
1539
1540 /* And get the mbdigits + extra_digit string. */
1541 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1542 mbdigits_len),
1543 extra_mbdigit, mblen) = '\0';
1544 digits_extended[n] = mb_extended;
1545#endif
1546 }
1547 }
1548
1549 /* Read the number into workspace. */
1550 while (c != EOF && width != 0)
1551 {
1552 /* In this round we get the pointer to the digit strings
1553 and also perform the first round of comparisons. */
1554 for (n = 0; n < num_digits_len; ++n)
1555 {
1556 /* Get the string for the digits with value N. */
1557#ifdef COMPILE_WSCANF
1558
1559 /* wcdigits_extended[] is fully set in the loop
1560 above, but the test for "map != NULL" is done
1561 inside the loop here and outside the loop there. */
1562 DIAG_PUSH_NEEDS_COMMENT;
1563 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1564
1565 if (__glibc_unlikely (map != NULL))
1566 wcdigits[n] = digits_extended[n];
1567 else
1568 wcdigits[n] = (const wchar_t *)
1569 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1570 wcdigits[n] += from_level;
1571
1572 DIAG_POP_NEEDS_COMMENT;
1573
1574 if (c == (wint_t) *wcdigits[n])
1575 {
1576 to_level = from_level;
1577 break;
1578 }
1579
1580 /* Advance the pointer to the next string. */
1581 ++wcdigits[n];
1582#else
1583 const char *cmpp;
1584 int avail = width > 0 ? width : INT_MAX;
1585
1586 if (__glibc_unlikely (map != NULL))
1587 mbdigits[n] = digits_extended[n];
1588 else
1589 mbdigits[n]
1590 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1591
1592 for (level = 0; level < from_level; level++)
1593 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1594
1595 cmpp = mbdigits[n];
1596 while ((unsigned char) *cmpp == c && avail >= 0)
1597 {
1598 if (*++cmpp == '\0')
1599 break;
1600 else
1601 {
1602 if (avail == 0 || inchar () == EOF)
1603 break;
1604 --avail;
1605 }
1606 }
1607
1608 if (*cmpp == '\0')
1609 {
1610 if (width > 0)
1611 width = avail;
1612 to_level = from_level;
1613 break;
1614 }
1615
1616 /* We are pushing all read characters back. */
1617 if (cmpp > mbdigits[n])
1618 {
1619 ungetc (c, s);
1620 while (--cmpp > mbdigits[n])
1621 ungetc_not_eof ((unsigned char) *cmpp, s);
1622 c = (unsigned char) *cmpp;
1623 }
1624
1625 /* Advance the pointer to the next string. */
1626 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1627#endif
1628 }
1629
1630 if (n == num_digits_len)
1631 {
1632 /* Have not yet found the digit. */
1633 for (level = from_level + 1; level <= to_level; ++level)
1634 {
1635 /* Search all ten digits of this level. */
1636 for (n = 0; n < num_digits_len; ++n)
1637 {
1638#ifdef COMPILE_WSCANF
1639 if (c == (wint_t) *wcdigits[n])
1640 break;
1641
1642 /* Advance the pointer to the next string. */
1643 ++wcdigits[n];
1644#else
1645 const char *cmpp;
1646 int avail = width > 0 ? width : INT_MAX;
1647
1648 cmpp = mbdigits[n];
1649 while ((unsigned char) *cmpp == c && avail >= 0)
1650 {
1651 if (*++cmpp == '\0')
1652 break;
1653 else
1654 {
1655 if (avail == 0 || inchar () == EOF)
1656 break;
1657 --avail;
1658 }
1659 }
1660
1661 if (*cmpp == '\0')
1662 {
1663 if (width > 0)
1664 width = avail;
1665 break;
1666 }
1667
1668 /* We are pushing all read characters back. */
1669 if (cmpp > mbdigits[n])
1670 {
1671 ungetc (c, s);
1672 while (--cmpp > mbdigits[n])
1673 ungetc_not_eof ((unsigned char) *cmpp, s);
1674 c = (unsigned char) *cmpp;
1675 }
1676
1677 /* Advance the pointer to the next string. */
1678 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1679#endif
1680 }
1681
1682 if (n < 10)
1683 {
1684 /* Found it. */
1685 from_level = level;
1686 to_level = level;
1687 break;
1688 }
1689 }
1690 }
1691
1692 if (n < num_digits_len)
1693 c = L_('0') + n;
1694 else if (flags & GROUP)
1695 {
1696 /* Try matching against the thousands separator. */
1697#ifdef COMPILE_WSCANF
1698 if (c != thousands)
1699 break;
1700#else
1701 const char *cmpp = thousands;
1702 int avail = width > 0 ? width : INT_MAX;
1703
1704 while ((unsigned char) *cmpp == c && avail >= 0)
1705 {
1706 char_buffer_add (&charbuf, c);
1707 if (*++cmpp == '\0')
1708 break;
1709 else
1710 {
1711 if (avail == 0 || inchar () == EOF)
1712 break;
1713 --avail;
1714 }
1715 }
1716
1717 if (char_buffer_error (&charbuf))
1718 {
1719 __set_errno (ENOMEM);
1720 done = EOF;
1721 break;
1722 }
1723
1724 if (*cmpp != '\0')
1725 {
1726 /* We are pushing all read characters back. */
1727 if (cmpp > thousands)
1728 {
1729 charbuf.current -= cmpp - thousands;
1730 ungetc (c, s);
1731 while (--cmpp > thousands)
1732 ungetc_not_eof ((unsigned char) *cmpp, s);
1733 c = (unsigned char) *cmpp;
1734 }
1735 break;
1736 }
1737
1738 if (width > 0)
1739 width = avail;
1740
1741 /* The last thousands character will be added back by
1742 the char_buffer_add below. */
1743 --charbuf.current;
1744#endif
1745 }
1746 else
1747 break;
1748
1749 char_buffer_add (&charbuf, c);
1750 if (width > 0)
1751 --width;
1752
1753 c = inchar ();
1754 }
1755
1756digits_extended_fail:
1757 for (n = 0; n < num_digits_len; n++)
1758 free (digits_extended[n]);
1759
1760 if (done == EOF)
1761 goto errout;
1762 }
1763 else
1764 /* Read the number into workspace. */
1765 while (c != EOF && width != 0)
1766 {
1767 if (base == 16)
1768 {
1769 if (!ISXDIGIT (c))
1770 break;
1771 }
1772 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1773 {
1774 if (base == 10 && (flags & GROUP))
1775 {
1776 /* Try matching against the thousands separator. */
1777#ifdef COMPILE_WSCANF
1778 if (c != thousands)
1779 break;
1780#else
1781 const char *cmpp = thousands;
1782 int avail = width > 0 ? width : INT_MAX;
1783
1784 while ((unsigned char) *cmpp == c && avail >= 0)
1785 {
1786 char_buffer_add (&charbuf, c);
1787 if (*++cmpp == '\0')
1788 break;
1789 else
1790 {
1791 if (avail == 0 || inchar () == EOF)
1792 break;
1793 --avail;
1794 }
1795 }
1796
1797 if (char_buffer_error (&charbuf))
1798 {
1799 __set_errno (ENOMEM);
1800 done = EOF;
1801 goto errout;
1802 }
1803
1804 if (*cmpp != '\0')
1805 {
1806 /* We are pushing all read characters back. */
1807 if (cmpp > thousands)
1808 {
1809 charbuf.current -= cmpp - thousands;
1810 ungetc (c, s);
1811 while (--cmpp > thousands)
1812 ungetc_not_eof ((unsigned char) *cmpp, s);
1813 c = (unsigned char) *cmpp;
1814 }
1815 break;
1816 }
1817
1818 if (width > 0)
1819 width = avail;
1820
1821 /* The last thousands character will be added back by
1822 the char_buffer_add below. */
1823 --charbuf.current;
1824#endif
1825 }
1826 else
1827 break;
1828 }
1829 char_buffer_add (&charbuf, c);
1830 if (width > 0)
1831 --width;
1832
1833 c = inchar ();
1834 }
1835
1836 if (char_buffer_error (&charbuf))
1837 {
1838 __set_errno (ENOMEM);
1839 done = EOF;
1840 goto errout;
1841 }
1842
1843 if (char_buffer_size (&charbuf) == 0
1844 || (char_buffer_size (&charbuf) == 1
1845 && (char_buffer_start (&charbuf)[0] == L_('+')
1846 || char_buffer_start (&charbuf)[0] == L_('-'))))
1847 {
1848 /* There was no number. If we are supposed to read a pointer
1849 we must recognize "(nil)" as well. */
1850 if (__builtin_expect (char_buffer_size (&charbuf) == 0
1851 && (flags & READ_POINTER)
1852 && (width < 0 || width >= 5)
1853 && c == '('
1854 && TOLOWER (inchar ()) == L_('n')
1855 && TOLOWER (inchar ()) == L_('i')
1856 && TOLOWER (inchar ()) == L_('l')
1857 && inchar () == L_(')'), 1))
1858 /* We must produce the value of a NULL pointer. A single
1859 '0' digit is enough. */
1860 char_buffer_add (&charbuf, L_('0'));
1861 else
1862 {
1863 /* The last read character is not part of the number
1864 anymore. */
1865 ungetc (c, s);
1866
1867 conv_error ();
1868 }
1869 }
1870 else
1871 /* The just read character is not part of the number anymore. */
1872 ungetc (c, s);
1873
1874 /* Convert the number. */
1875 char_buffer_add (&charbuf, L_('\0'));
1876 if (char_buffer_error (&charbuf))
1877 {
1878 __set_errno (ENOMEM);
1879 done = EOF;
1880 goto errout;
1881 }
1882 if (need_longlong && (flags & LONGDBL))
1883 {
1884 if (flags & NUMBER_SIGNED)
1885 num.q = __strtoll_internal
1886 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1887 else
1888 num.uq = __strtoull_internal
1889 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1890 }
1891 else
1892 {
1893 if (flags & NUMBER_SIGNED)
1894 num.l = __strtol_internal
1895 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1896 else
1897 num.ul = __strtoul_internal
1898 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1899 }
1900 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1901 conv_error ();
1902
1903 if (!(flags & SUPPRESS))
1904 {
1905 if (flags & NUMBER_SIGNED)
1906 {
1907 if (need_longlong && (flags & LONGDBL))
1908 *ARG (LONGLONG int *) = num.q;
1909 else if (need_long && (flags & LONG))
1910 *ARG (long int *) = num.l;
1911 else if (flags & SHORT)
1912 *ARG (short int *) = (short int) num.l;
1913 else if (!(flags & CHAR))
1914 *ARG (int *) = (int) num.l;
1915 else
1916 *ARG (signed char *) = (signed char) num.ul;
1917 }
1918 else
1919 {
1920 if (need_longlong && (flags & LONGDBL))
1921 *ARG (unsigned LONGLONG int *) = num.uq;
1922 else if (need_long && (flags & LONG))
1923 *ARG (unsigned long int *) = num.ul;
1924 else if (flags & SHORT)
1925 *ARG (unsigned short int *)
1926 = (unsigned short int) num.ul;
1927 else if (!(flags & CHAR))
1928 *ARG (unsigned int *) = (unsigned int) num.ul;
1929 else
1930 *ARG (unsigned char *) = (unsigned char) num.ul;
1931 }
1932 ++done;
1933 }
1934 break;
1935
1936 case L_('e'): /* Floating-point numbers. */
1937 case L_('E'):
1938 case L_('f'):
1939 case L_('F'):
1940 case L_('g'):
1941 case L_('G'):
1942 case L_('a'):
1943 case L_('A'):
1944 c = inchar ();
1945 if (width > 0)
1946 --width;
1947 if (__glibc_unlikely (c == EOF))
1948 input_error ();
1949
1950 got_digit = got_dot = got_e = got_sign = 0;
1951
1952 /* Check for a sign. */
1953 if (c == L_('-') || c == L_('+'))
1954 {
1955 got_sign = 1;
1956 char_buffer_add (&charbuf, c);
1957 if (__glibc_unlikely (width == 0 || inchar () == EOF))
1958 /* EOF is only an input error before we read any chars. */
1959 conv_error ();
1960 if (width > 0)
1961 --width;
1962 }
1963
1964 /* Take care for the special arguments "nan" and "inf". */
1965 if (TOLOWER (c) == L_('n'))
1966 {
1967 /* Maybe "nan". */
1968 char_buffer_add (&charbuf, c);
1969 if (__builtin_expect (width == 0
1970 || inchar () == EOF
1971 || TOLOWER (c) != L_('a'), 0))
1972 conv_error ();
1973 if (width > 0)
1974 --width;
1975 char_buffer_add (&charbuf, c);
1976 if (__builtin_expect (width == 0
1977 || inchar () == EOF
1978 || TOLOWER (c) != L_('n'), 0))
1979 conv_error ();
1980 if (width > 0)
1981 --width;
1982 char_buffer_add (&charbuf, c);
1983 /* It is "nan". */
1984 goto scan_float;
1985 }
1986 else if (TOLOWER (c) == L_('i'))
1987 {
1988 /* Maybe "inf" or "infinity". */
1989 char_buffer_add (&charbuf, c);
1990 if (__builtin_expect (width == 0
1991 || inchar () == EOF
1992 || TOLOWER (c) != L_('n'), 0))
1993 conv_error ();
1994 if (width > 0)
1995 --width;
1996 char_buffer_add (&charbuf, c);
1997 if (__builtin_expect (width == 0
1998 || inchar () == EOF
1999 || TOLOWER (c) != L_('f'), 0))
2000 conv_error ();
2001 if (width > 0)
2002 --width;
2003 char_buffer_add (&charbuf, c);
2004 /* It is as least "inf". */
2005 if (width != 0 && inchar () != EOF)
2006 {
2007 if (TOLOWER (c) == L_('i'))
2008 {
2009 if (width > 0)
2010 --width;
2011 /* Now we have to read the rest as well. */
2012 char_buffer_add (&charbuf, c);
2013 if (__builtin_expect (width == 0
2014 || inchar () == EOF
2015 || TOLOWER (c) != L_('n'), 0))
2016 conv_error ();
2017 if (width > 0)
2018 --width;
2019 char_buffer_add (&charbuf, c);
2020 if (__builtin_expect (width == 0
2021 || inchar () == EOF
2022 || TOLOWER (c) != L_('i'), 0))
2023 conv_error ();
2024 if (width > 0)
2025 --width;
2026 char_buffer_add (&charbuf, c);
2027 if (__builtin_expect (width == 0
2028 || inchar () == EOF
2029 || TOLOWER (c) != L_('t'), 0))
2030 conv_error ();
2031 if (width > 0)
2032 --width;
2033 char_buffer_add (&charbuf, c);
2034 if (__builtin_expect (width == 0
2035 || inchar () == EOF
2036 || TOLOWER (c) != L_('y'), 0))
2037 conv_error ();
2038 if (width > 0)
2039 --width;
2040 char_buffer_add (&charbuf, c);
2041 }
2042 else
2043 /* Never mind. */
2044 ungetc (c, s);
2045 }
2046 goto scan_float;
2047 }
2048
2049 exp_char = L_('e');
2050 if (width != 0 && c == L_('0'))
2051 {
2052 char_buffer_add (&charbuf, c);
2053 c = inchar ();
2054 if (width > 0)
2055 --width;
2056 if (width != 0 && TOLOWER (c) == L_('x'))
2057 {
2058 /* It is a number in hexadecimal format. */
2059 char_buffer_add (&charbuf, c);
2060
2061 flags |= HEXA_FLOAT;
2062 exp_char = L_('p');
2063
2064 /* Grouping is not allowed. */
2065 flags &= ~GROUP;
2066 c = inchar ();
2067 if (width > 0)
2068 --width;
2069 }
2070 else
2071 got_digit = 1;
2072 }
2073
2074 while (1)
2075 {
2076 if (char_buffer_error (&charbuf))
2077 {
2078 __set_errno (ENOMEM);
2079 done = EOF;
2080 goto errout;
2081 }
2082 if (ISDIGIT (c))
2083 {
2084 char_buffer_add (&charbuf, c);
2085 got_digit = 1;
2086 }
2087 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2088 {
2089 char_buffer_add (&charbuf, c);
2090 got_digit = 1;
2091 }
2092 else if (got_e && charbuf.current[-1] == exp_char
2093 && (c == L_('-') || c == L_('+')))
2094 char_buffer_add (&charbuf, c);
2095 else if (got_digit && !got_e
2096 && (CHAR_T) TOLOWER (c) == exp_char)
2097 {
2098 char_buffer_add (&charbuf, exp_char);
2099 got_e = got_dot = 1;
2100 }
2101 else
2102 {
2103#ifdef COMPILE_WSCANF
2104 if (! got_dot && c == decimal)
2105 {
2106 char_buffer_add (&charbuf, c);
2107 got_dot = 1;
2108 }
2109 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2110 char_buffer_add (&charbuf, c);
2111 else
2112 {
2113 /* The last read character is not part of the number
2114 anymore. */
2115 ungetc (c, s);
2116 break;
2117 }
2118#else
2119 const char *cmpp = decimal;
2120 int avail = width > 0 ? width : INT_MAX;
2121
2122 if (! got_dot)
2123 {
2124 while ((unsigned char) *cmpp == c && avail >= 0)
2125 if (*++cmpp == '\0')
2126 break;
2127 else
2128 {
2129 if (avail == 0 || inchar () == EOF)
2130 break;
2131 --avail;
2132 }
2133 }
2134
2135 if (*cmpp == '\0')
2136 {
2137 /* Add all the characters. */
2138 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2139 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2140 if (width > 0)
2141 width = avail;
2142 got_dot = 1;
2143 }
2144 else
2145 {
2146 /* Figure out whether it is a thousands separator.
2147 There is one problem: we possibly read more than
2148 one character. We cannot push them back but since
2149 we know that parts of the `decimal' string matched,
2150 we can compare against it. */
2151 const char *cmp2p = thousands;
2152
2153 if ((flags & GROUP) != 0 && ! got_dot)
2154 {
2155 while (cmp2p - thousands < cmpp - decimal
2156 && *cmp2p == decimal[cmp2p - thousands])
2157 ++cmp2p;
2158 if (cmp2p - thousands == cmpp - decimal)
2159 {
2160 while ((unsigned char) *cmp2p == c && avail >= 0)
2161 if (*++cmp2p == '\0')
2162 break;
2163 else
2164 {
2165 if (avail == 0 || inchar () == EOF)
2166 break;
2167 --avail;
2168 }
2169 }
2170 }
2171
2172 if (cmp2p != NULL && *cmp2p == '\0')
2173 {
2174 /* Add all the characters. */
2175 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2176 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2177 if (width > 0)
2178 width = avail;
2179 }
2180 else
2181 {
2182 /* The last read character is not part of the number
2183 anymore. */
2184 ungetc (c, s);
2185 break;
2186 }
2187 }
2188#endif
2189 }
2190
2191 if (width == 0 || inchar () == EOF)
2192 break;
2193
2194 if (width > 0)
2195 --width;
2196 }
2197
2198 if (char_buffer_error (&charbuf))
2199 {
2200 __set_errno (ENOMEM);
2201 done = EOF;
2202 goto errout;
2203 }
2204
2205 wctrans_t map;
2206 if (__builtin_expect ((flags & I18N) != 0, 0)
2207 /* Hexadecimal floats make no sense, fixing localized
2208 digits with ASCII letters. */
2209 && !(flags & HEXA_FLOAT)
2210 /* Minimum requirement. */
2211 && (char_buffer_size (&charbuf) == got_sign || got_dot)
2212 && (map = __wctrans ("to_inpunct")) != NULL)
2213 {
2214 /* Reget the first character. */
2215 inchar ();
2216
2217 /* Localized digits, decimal points, and thousands
2218 separator. */
2219 wint_t wcdigits[12];
2220
2221 /* First get decimal equivalent to check if we read it
2222 or not. */
2223 wcdigits[11] = __towctrans (L'.', map);
2224
2225 /* If we have not read any character or have just read
2226 locale decimal point which matches the decimal point
2227 for localized FP numbers, then we may have localized
2228 digits. Note, we test GOT_DOT above. */
2229#ifdef COMPILE_WSCANF
2230 if (char_buffer_size (&charbuf) == got_sign
2231 || (char_buffer_size (&charbuf) == got_sign + 1
2232 && wcdigits[11] == decimal))
2233#else
2234 char mbdigits[12][MB_LEN_MAX + 1];
2235
2236 mbstate_t state;
2237 memset (&state, '\0', sizeof (state));
2238
2239 bool match_so_far = char_buffer_size (&charbuf) == got_sign;
2240 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
2241 if (mblen != (size_t) -1)
2242 {
2243 mbdigits[11][mblen] = '\0';
2244 match_so_far |=
2245 (char_buffer_size (&charbuf) == strlen (decimal) + got_sign
2246 && strcmp (decimal, mbdigits[11]) == 0);
2247 }
2248 else
2249 {
2250 size_t decimal_len = strlen (decimal);
2251 /* This should always be the case but the data comes
2252 from a file. */
2253 if (decimal_len <= MB_LEN_MAX)
2254 {
2255 match_so_far |= (char_buffer_size (&charbuf)
2256 == decimal_len + got_sign);
2257 memcpy (mbdigits[11], decimal, decimal_len + 1);
2258 }
2259 else
2260 match_so_far = false;
2261 }
2262
2263 if (match_so_far)
2264#endif
2265 {
2266 bool have_locthousands = (flags & GROUP) != 0;
2267
2268 /* Now get the digits and the thousands-sep equivalents. */
2269 for (int n = 0; n < 11; ++n)
2270 {
2271 if (n < 10)
2272 wcdigits[n] = __towctrans (L'0' + n, map);
2273 else if (n == 10)
2274 {
2275 wcdigits[10] = __towctrans (L',', map);
2276 have_locthousands &= wcdigits[10] != L'\0';
2277 }
2278
2279#ifndef COMPILE_WSCANF
2280 memset (&state, '\0', sizeof (state));
2281
2282 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
2283 &state);
2284 if (mblen == (size_t) -1)
2285 {
2286 if (n == 10)
2287 {
2288 if (have_locthousands)
2289 {
2290 size_t thousands_len = strlen (thousands);
2291 if (thousands_len <= MB_LEN_MAX)
2292 memcpy (mbdigits[10], thousands,
2293 thousands_len + 1);
2294 else
2295 have_locthousands = false;
2296 }
2297 }
2298 else
2299 /* Ignore checking against localized digits. */
2300 goto no_i18nflt;
2301 }
2302 else
2303 mbdigits[n][mblen] = '\0';
2304#endif
2305 }
2306
2307 /* Start checking against localized digits, if
2308 conversion is done correctly. */
2309 while (1)
2310 {
2311 if (char_buffer_error (&charbuf))
2312 {
2313 __set_errno (ENOMEM);
2314 done = EOF;
2315 goto errout;
2316 }
2317 if (got_e && charbuf.current[-1] == exp_char
2318 && (c == L_('-') || c == L_('+')))
2319 char_buffer_add (&charbuf, c);
2320 else if (char_buffer_size (&charbuf) > got_sign && !got_e
2321 && (CHAR_T) TOLOWER (c) == exp_char)
2322 {
2323 char_buffer_add (&charbuf, exp_char);
2324 got_e = got_dot = 1;
2325 }
2326 else
2327 {
2328 /* Check against localized digits, decimal point,
2329 and thousands separator. */
2330 int n;
2331 for (n = 0; n < 12; ++n)
2332 {
2333#ifdef COMPILE_WSCANF
2334 if (c == wcdigits[n])
2335 {
2336 if (n < 10)
2337 char_buffer_add (&charbuf, L_('0') + n);
2338 else if (n == 11 && !got_dot)
2339 {
2340 char_buffer_add (&charbuf, decimal);
2341 got_dot = 1;
2342 }
2343 else if (n == 10 && have_locthousands
2344 && ! got_dot)
2345 char_buffer_add (&charbuf, thousands);
2346 else
2347 /* The last read character is not part
2348 of the number anymore. */
2349 n = 12;
2350
2351 break;
2352 }
2353#else
2354 const char *cmpp = mbdigits[n];
2355 int avail = width > 0 ? width : INT_MAX;
2356
2357 while ((unsigned char) *cmpp == c && avail >= 0)
2358 if (*++cmpp == '\0')
2359 break;
2360 else
2361 {
2362 if (avail == 0 || inchar () == EOF)
2363 break;
2364 --avail;
2365 }
2366 if (*cmpp == '\0')
2367 {
2368 if (width > 0)
2369 width = avail;
2370
2371 if (n < 10)
2372 char_buffer_add (&charbuf, L_('0') + n);
2373 else if (n == 11 && !got_dot)
2374 {
2375 /* Add all the characters. */
2376 for (cmpp = decimal; *cmpp != '\0';
2377 ++cmpp)
2378 char_buffer_add (&charbuf,
2379 (unsigned char) *cmpp);
2380
2381 got_dot = 1;
2382 }
2383 else if (n == 10 && (flags & GROUP) != 0
2384 && ! got_dot)
2385 {
2386 /* Add all the characters. */
2387 for (cmpp = thousands; *cmpp != '\0';
2388 ++cmpp)
2389 char_buffer_add (&charbuf,
2390 (unsigned char) *cmpp);
2391 }
2392 else
2393 /* The last read character is not part
2394 of the number anymore. */
2395 n = 12;
2396
2397 break;
2398 }
2399
2400 /* We are pushing all read characters back. */
2401 if (cmpp > mbdigits[n])
2402 {
2403 ungetc (c, s);
2404 while (--cmpp > mbdigits[n])
2405 ungetc_not_eof ((unsigned char) *cmpp, s);
2406 c = (unsigned char) *cmpp;
2407 }
2408#endif
2409 }
2410
2411 if (n >= 12)
2412 {
2413 /* The last read character is not part
2414 of the number anymore. */
2415 ungetc (c, s);
2416 break;
2417 }
2418 }
2419
2420 if (width == 0 || inchar () == EOF)
2421 break;
2422
2423 if (width > 0)
2424 --width;
2425 }
2426 }
2427
2428#ifndef COMPILE_WSCANF
2429 no_i18nflt:
2430 ;
2431#endif
2432 }
2433
2434 if (char_buffer_error (&charbuf))
2435 {
2436 __set_errno (ENOMEM);
2437 done = EOF;
2438 goto errout;
2439 }
2440
2441 /* Have we read any character? If we try to read a number
2442 in hexadecimal notation and we have read only the `0x'
2443 prefix this is an error. */
2444 if (__glibc_unlikely (char_buffer_size (&charbuf) == got_sign
2445 || ((flags & HEXA_FLOAT)
2446 && (char_buffer_size (&charbuf)
2447 == 2 + got_sign))))
2448 conv_error ();
2449
2450 scan_float:
2451 /* Convert the number. */
2452 char_buffer_add (&charbuf, L_('\0'));
2453 if (char_buffer_error (&charbuf))
2454 {
2455 __set_errno (ENOMEM);
2456 done = EOF;
2457 goto errout;
2458 }
2459#if __HAVE_FLOAT128_UNLIKE_LDBL
2460 if ((flags & LONGDBL) \
2461 && (mode_flags & SCANF_LDBL_USES_FLOAT128) != 0)
2462 {
2463 _Float128 d = __strtof128_internal
2464 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2465 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2466 *ARG (_Float128 *) = d;
2467 }
2468 else
2469#endif
2470 if ((flags & LONGDBL) \
2471 && __glibc_likely ((mode_flags & SCANF_LDBL_IS_DBL) == 0))
2472 {
2473 long double d = __strtold_internal
2474 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2475 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2476 *ARG (long double *) = d;
2477 }
2478 else if (flags & (LONG | LONGDBL))
2479 {
2480 double d = __strtod_internal
2481 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2482 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2483 *ARG (double *) = d;
2484 }
2485 else
2486 {
2487 float d = __strtof_internal
2488 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2489 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2490 *ARG (float *) = d;
2491 }
2492
2493 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2494 conv_error ();
2495
2496 if (!(flags & SUPPRESS))
2497 ++done;
2498 break;
2499
2500 case L_('['): /* Character class. */
2501 if (flags & LONG)
2502 STRING_ARG (wstr, wchar_t, 100);
2503 else
2504 STRING_ARG (str, char, 100);
2505
2506 if (*f == L_('^'))
2507 {
2508 ++f;
2509 not_in = 1;
2510 }
2511 else
2512 not_in = 0;
2513
2514
2515#ifdef COMPILE_WSCANF
2516 /* Find the beginning and the end of the scanlist. We are not
2517 creating a lookup table since it would have to be too large.
2518 Instead we search each time through the string. This is not
2519 a constant lookup time but who uses this feature deserves to
2520 be punished. */
2521 tw = (wchar_t *) f; /* Marks the beginning. */
2522
2523 if (*f == L']')
2524 ++f;
2525
2526 while ((fc = *f++) != L'\0' && fc != L']');
2527
2528 if (__glibc_unlikely (fc == L'\0'))
2529 conv_error ();
2530 wchar_t *twend = (wchar_t *) f - 1;
2531#else
2532 /* Fill WP with byte flags indexed by character.
2533 We will use this flag map for matching input characters. */
2534 if (!scratch_buffer_set_array_size
2535 (&charbuf.scratch, UCHAR_MAX + 1, 1))
2536 {
2537 done = EOF;
2538 goto errout;
2539 }
2540 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2541
2542 fc = *f;
2543 if (fc == ']' || fc == '-')
2544 {
2545 /* If ] or - appears before any char in the set, it is not
2546 the terminator or separator, but the first char in the
2547 set. */
2548 ((char *)charbuf.scratch.data)[fc] = 1;
2549 ++f;
2550 }
2551
2552 while ((fc = *f++) != '\0' && fc != ']')
2553 if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
2554 {
2555 /* Add all characters from the one before the '-'
2556 up to (but not including) the next format char. */
2557 for (fc = f[-2]; fc < *f; ++fc)
2558 ((char *)charbuf.scratch.data)[fc] = 1;
2559 }
2560 else
2561 /* Add the character to the flag map. */
2562 ((char *)charbuf.scratch.data)[fc] = 1;
2563
2564 if (__glibc_unlikely (fc == '\0'))
2565 conv_error();
2566#endif
2567
2568 if (flags & LONG)
2569 {
2570 size_t now = read_in;
2571#ifdef COMPILE_WSCANF
2572 if (__glibc_unlikely (inchar () == WEOF))
2573 input_error ();
2574
2575 do
2576 {
2577 wchar_t *runp;
2578
2579 /* Test whether it's in the scanlist. */
2580 runp = tw;
2581 while (runp < twend)
2582 {
2583 if (runp[0] == L'-' && runp[1] != '\0'
2584 && runp + 1 != twend
2585 && runp != tw
2586 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2587 {
2588 /* Match against all characters in between the
2589 first and last character of the sequence. */
2590 wchar_t wc;
2591
2592 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2593 if ((wint_t) wc == c)
2594 break;
2595
2596 if (wc <= runp[1] && !not_in)
2597 break;
2598 if (wc <= runp[1] && not_in)
2599 {
2600 /* The current character is not in the
2601 scanset. */
2602 ungetc (c, s);
2603 goto out;
2604 }
2605
2606 runp += 2;
2607 }
2608 else
2609 {
2610 if ((wint_t) *runp == c && !not_in)
2611 break;
2612 if ((wint_t) *runp == c && not_in)
2613 {
2614 ungetc (c, s);
2615 goto out;
2616 }
2617
2618 ++runp;
2619 }
2620 }
2621
2622 if (runp == twend && !not_in)
2623 {
2624 ungetc (c, s);
2625 goto out;
2626 }
2627
2628 if (!(flags & SUPPRESS))
2629 {
2630 *wstr++ = c;
2631
2632 if ((flags & MALLOC)
2633 && wstr == (wchar_t *) *strptr + strsize)
2634 {
2635 /* Enlarge the buffer. */
2636 wstr = (wchar_t *) realloc (*strptr,
2637 (2 * strsize)
2638 * sizeof (wchar_t));
2639 if (wstr == NULL)
2640 {
2641 /* Can't allocate that much. Last-ditch
2642 effort. */
2643 wstr = (wchar_t *)
2644 realloc (*strptr, (strsize + 1)
2645 * sizeof (wchar_t));
2646 if (wstr == NULL)
2647 {
2648 if (flags & POSIX_MALLOC)
2649 {
2650 done = EOF;
2651 goto errout;
2652 }
2653 /* We lose. Oh well. Terminate the string
2654 and stop converting, so at least we don't
2655 skip any input. */
2656 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2657 strptr = NULL;
2658 ++done;
2659 conv_error ();
2660 }
2661 else
2662 {
2663 *strptr = (char *) wstr;
2664 wstr += strsize;
2665 ++strsize;
2666 }
2667 }
2668 else
2669 {
2670 *strptr = (char *) wstr;
2671 wstr += strsize;
2672 strsize *= 2;
2673 }
2674 }
2675 }
2676 }
2677 while ((width < 0 || --width > 0) && inchar () != WEOF);
2678 out:
2679#else
2680 char buf[MB_LEN_MAX];
2681 size_t cnt = 0;
2682 mbstate_t cstate;
2683
2684 if (__glibc_unlikely (inchar () == EOF))
2685 input_error ();
2686
2687 memset (&cstate, '\0', sizeof (cstate));
2688
2689 do
2690 {
2691 if (((char *) charbuf.scratch.data)[c] == not_in)
2692 {
2693 ungetc_not_eof (c, s);
2694 break;
2695 }
2696
2697 /* This is easy. */
2698 if (!(flags & SUPPRESS))
2699 {
2700 size_t n;
2701
2702 /* Convert it into a wide character. */
2703 buf[0] = c;
2704 n = __mbrtowc (wstr, buf, 1, &cstate);
2705
2706 if (n == (size_t) -2)
2707 {
2708 /* Possibly correct character, just not enough
2709 input. */
2710 ++cnt;
2711 assert (cnt < MB_LEN_MAX);
2712 continue;
2713 }
2714 cnt = 0;
2715
2716 ++wstr;
2717 if ((flags & MALLOC)
2718 && wstr == (wchar_t *) *strptr + strsize)
2719 {
2720 /* Enlarge the buffer. */
2721 wstr = (wchar_t *) realloc (*strptr,
2722 (2 * strsize
2723 * sizeof (wchar_t)));
2724 if (wstr == NULL)
2725 {
2726 /* Can't allocate that much. Last-ditch
2727 effort. */
2728 wstr = (wchar_t *)
2729 realloc (*strptr, ((strsize + 1)
2730 * sizeof (wchar_t)));
2731 if (wstr == NULL)
2732 {
2733 if (flags & POSIX_MALLOC)
2734 {
2735 done = EOF;
2736 goto errout;
2737 }
2738 /* We lose. Oh well. Terminate the
2739 string and stop converting,
2740 so at least we don't skip any input. */
2741 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2742 strptr = NULL;
2743 ++done;
2744 conv_error ();
2745 }
2746 else
2747 {
2748 *strptr = (char *) wstr;
2749 wstr += strsize;
2750 ++strsize;
2751 }
2752 }
2753 else
2754 {
2755 *strptr = (char *) wstr;
2756 wstr += strsize;
2757 strsize *= 2;
2758 }
2759 }
2760 }
2761
2762 if (width >= 0 && --width <= 0)
2763 break;
2764 }
2765 while (inchar () != EOF);
2766
2767 if (__glibc_unlikely (cnt != 0))
2768 /* We stopped in the middle of recognizing another
2769 character. That's a problem. */
2770 encode_error ();
2771#endif
2772
2773 if (__glibc_unlikely (now == read_in))
2774 /* We haven't successfully read any character. */
2775 conv_error ();
2776
2777 if (!(flags & SUPPRESS))
2778 {
2779 *wstr++ = L'\0';
2780
2781 if ((flags & MALLOC)
2782 && wstr - (wchar_t *) *strptr != strsize)
2783 {
2784 wchar_t *cp = (wchar_t *)
2785 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2786 * sizeof (wchar_t)));
2787 if (cp != NULL)
2788 *strptr = (char *) cp;
2789 }
2790 strptr = NULL;
2791
2792 ++done;
2793 }
2794 }
2795 else
2796 {
2797 size_t now = read_in;
2798
2799 if (__glibc_unlikely (inchar () == EOF))
2800 input_error ();
2801
2802#ifdef COMPILE_WSCANF
2803
2804 memset (&state, '\0', sizeof (state));
2805
2806 do
2807 {
2808 wchar_t *runp;
2809 size_t n;
2810
2811 /* Test whether it's in the scanlist. */
2812 runp = tw;
2813 while (runp < twend)
2814 {
2815 if (runp[0] == L'-' && runp[1] != '\0'
2816 && runp + 1 != twend
2817 && runp != tw
2818 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2819 {
2820 /* Match against all characters in between the
2821 first and last character of the sequence. */
2822 wchar_t wc;
2823
2824 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2825 if ((wint_t) wc == c)
2826 break;
2827
2828 if (wc <= runp[1] && !not_in)
2829 break;
2830 if (wc <= runp[1] && not_in)
2831 {
2832 /* The current character is not in the
2833 scanset. */
2834 ungetc (c, s);
2835 goto out2;
2836 }
2837
2838 runp += 2;
2839 }
2840 else
2841 {
2842 if ((wint_t) *runp == c && !not_in)
2843 break;
2844 if ((wint_t) *runp == c && not_in)
2845 {
2846 ungetc (c, s);
2847 goto out2;
2848 }
2849
2850 ++runp;
2851 }
2852 }
2853
2854 if (runp == twend && !not_in)
2855 {
2856 ungetc (c, s);
2857 goto out2;
2858 }
2859
2860 if (!(flags & SUPPRESS))
2861 {
2862 if ((flags & MALLOC)
2863 && *strptr + strsize - str <= MB_LEN_MAX)
2864 {
2865 /* Enlarge the buffer. */
2866 size_t strleng = str - *strptr;
2867 char *newstr;
2868
2869 newstr = (char *) realloc (*strptr, 2 * strsize);
2870 if (newstr == NULL)
2871 {
2872 /* Can't allocate that much. Last-ditch
2873 effort. */
2874 newstr = (char *) realloc (*strptr,
2875 strleng + MB_LEN_MAX);
2876 if (newstr == NULL)
2877 {
2878 if (flags & POSIX_MALLOC)
2879 {
2880 done = EOF;
2881 goto errout;
2882 }
2883 /* We lose. Oh well. Terminate the string
2884 and stop converting, so at least we don't
2885 skip any input. */
2886 ((char *) (*strptr))[strleng] = '\0';
2887 strptr = NULL;
2888 ++done;
2889 conv_error ();
2890 }
2891 else
2892 {
2893 *strptr = newstr;
2894 str = newstr + strleng;
2895 strsize = strleng + MB_LEN_MAX;
2896 }
2897 }
2898 else
2899 {
2900 *strptr = newstr;
2901 str = newstr + strleng;
2902 strsize *= 2;
2903 }
2904 }
2905 }
2906
2907 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2908 if (__glibc_unlikely (n == (size_t) -1))
2909 encode_error ();
2910
2911 assert (n <= MB_LEN_MAX);
2912 str += n;
2913 }
2914 while ((width < 0 || --width > 0) && inchar () != WEOF);
2915 out2:
2916#else
2917 do
2918 {
2919 if (((char *) charbuf.scratch.data)[c] == not_in)
2920 {
2921 ungetc_not_eof (c, s);
2922 break;
2923 }
2924
2925 /* This is easy. */
2926 if (!(flags & SUPPRESS))
2927 {
2928 *str++ = c;
2929 if ((flags & MALLOC)
2930 && (char *) str == *strptr + strsize)
2931 {
2932 /* Enlarge the buffer. */
2933 size_t newsize = 2 * strsize;
2934
2935 allocagain:
2936 str = (char *) realloc (*strptr, newsize);
2937 if (str == NULL)
2938 {
2939 /* Can't allocate that much. Last-ditch
2940 effort. */
2941 if (newsize > strsize + 1)
2942 {
2943 newsize = strsize + 1;
2944 goto allocagain;
2945 }
2946 if (flags & POSIX_MALLOC)
2947 {
2948 done = EOF;
2949 goto errout;
2950 }
2951 /* We lose. Oh well. Terminate the
2952 string and stop converting,
2953 so at least we don't skip any input. */
2954 ((char *) (*strptr))[strsize - 1] = '\0';
2955 strptr = NULL;
2956 ++done;
2957 conv_error ();
2958 }
2959 else
2960 {
2961 *strptr = (char *) str;
2962 str += strsize;
2963 strsize = newsize;
2964 }
2965 }
2966 }
2967 }
2968 while ((width < 0 || --width > 0) && inchar () != EOF);
2969#endif
2970
2971 if (__glibc_unlikely (now == read_in))
2972 /* We haven't successfully read any character. */
2973 conv_error ();
2974
2975 if (!(flags & SUPPRESS))
2976 {
2977#ifdef COMPILE_WSCANF
2978 /* We have to emit the code to get into the initial
2979 state. */
2980 char buf[MB_LEN_MAX];
2981 size_t n = __wcrtomb (buf, L'\0', &state);
2982 if (n > 0 && (flags & MALLOC)
2983 && str + n >= *strptr + strsize)
2984 {
2985 /* Enlarge the buffer. */
2986 size_t strleng = str - *strptr;
2987 char *newstr;
2988
2989 newstr = (char *) realloc (*strptr, strleng + n + 1);
2990 if (newstr == NULL)
2991 {
2992 if (flags & POSIX_MALLOC)
2993 {
2994 done = EOF;
2995 goto errout;
2996 }
2997 /* We lose. Oh well. Terminate the string
2998 and stop converting, so at least we don't
2999 skip any input. */
3000 ((char *) (*strptr))[strleng] = '\0';
3001 strptr = NULL;
3002 ++done;
3003 conv_error ();
3004 }
3005 else
3006 {
3007 *strptr = newstr;
3008 str = newstr + strleng;
3009 strsize = strleng + n + 1;
3010 }
3011 }
3012
3013 str = __mempcpy (str, buf, n);
3014#endif
3015 *str++ = '\0';
3016
3017 if ((flags & MALLOC) && str - *strptr != strsize)
3018 {
3019 char *cp = (char *) realloc (*strptr, str - *strptr);
3020 if (cp != NULL)
3021 *strptr = cp;
3022 }
3023 strptr = NULL;
3024
3025 ++done;
3026 }
3027 }
3028 break;
3029
3030 case L_('p'): /* Generic pointer. */
3031 base = 16;
3032 /* A PTR must be the same size as a `long int'. */
3033 flags &= ~(SHORT|LONGDBL);
3034 if (need_long)
3035 flags |= LONG;
3036 flags |= READ_POINTER;
3037 goto number;
3038
3039 default:
3040 /* If this is an unknown format character punt. */
3041 conv_error ();
3042 }
3043 }
3044
3045 /* The last thing we saw int the format string was a white space.
3046 Consume the last white spaces. */
3047 if (skip_space)
3048 {
3049 do
3050 c = inchar ();
3051 while (ISSPACE (c));
3052 ungetc (c, s);
3053 }
3054
3055 errout:
3056 /* Unlock stream. */
3057 UNLOCK_STREAM (s);
3058
3059 scratch_buffer_free (&charbuf.scratch);
3060
3061 if (__glibc_unlikely (done == EOF))
3062 {
3063 if (__glibc_unlikely (ptrs_to_free != NULL))
3064 {
3065 struct ptrs_to_free *p = ptrs_to_free;
3066 while (p != NULL)
3067 {
3068 for (size_t cnt = 0; cnt < p->count; ++cnt)
3069 {
3070 free (*p->ptrs[cnt]);
3071 *p->ptrs[cnt] = NULL;
3072 }
3073 p = p->next;
3074 ptrs_to_free = p;
3075 }
3076 }
3077 }
3078 else if (__glibc_unlikely (strptr != NULL))
3079 {
3080 free (*strptr);
3081 *strptr = NULL;
3082 }
3083 return done;
3084}
3085