1 | /* Convert string representing a number to integer value, using given locale. |
2 | Copyright (C) 1997-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <features.h> |
20 | #undef __GLIBC_USE_C2X_STRTOL |
21 | #define __GLIBC_USE_C2X_STRTOL 0 |
22 | |
23 | #if HAVE_CONFIG_H |
24 | # include <config.h> |
25 | #endif |
26 | |
27 | #ifdef _LIBC |
28 | # define USE_NUMBER_GROUPING |
29 | # define HAVE_LIMITS_H |
30 | #endif |
31 | |
32 | #include <ctype.h> |
33 | #include <errno.h> |
34 | #ifndef __set_errno |
35 | # define __set_errno(Val) errno = (Val) |
36 | #endif |
37 | |
38 | #ifdef HAVE_LIMITS_H |
39 | # include <limits.h> |
40 | #endif |
41 | |
42 | #include <stddef.h> |
43 | #include <stdlib.h> |
44 | #include <string.h> |
45 | #include <locale.h> |
46 | #include <stdint.h> |
47 | #include <bits/wordsize.h> |
48 | |
49 | #ifdef USE_NUMBER_GROUPING |
50 | # include "../locale/localeinfo.h" |
51 | #endif |
52 | |
53 | /* Nonzero if we are defining `strtoul' or `strtoull', operating on |
54 | unsigned integers. */ |
55 | #ifndef UNSIGNED |
56 | # define UNSIGNED 0 |
57 | # define INT LONG int |
58 | #else |
59 | # define INT unsigned LONG int |
60 | #endif |
61 | |
62 | /* Determine the name. */ |
63 | #if UNSIGNED |
64 | # ifdef USE_WIDE_CHAR |
65 | # ifdef QUAD |
66 | # define strtol_l wcstoull_l |
67 | # define __isoc23_strtol_l __isoc23_wcstoull_l |
68 | # else |
69 | # define strtol_l wcstoul_l |
70 | # define __isoc23_strtol_l __isoc23_wcstoul_l |
71 | # endif |
72 | # else |
73 | # ifdef QUAD |
74 | # define strtol_l strtoull_l |
75 | # define __isoc23_strtol_l __isoc23_strtoull_l |
76 | # else |
77 | # define strtol_l strtoul_l |
78 | # define __isoc23_strtol_l __isoc23_strtoul_l |
79 | # endif |
80 | # endif |
81 | #else |
82 | # ifdef USE_WIDE_CHAR |
83 | # ifdef QUAD |
84 | # define strtol_l wcstoll_l |
85 | # define __isoc23_strtol_l __isoc23_wcstoll_l |
86 | # else |
87 | # define strtol_l wcstol_l |
88 | # define __isoc23_strtol_l __isoc23_wcstol_l |
89 | # endif |
90 | # else |
91 | # ifdef QUAD |
92 | # define strtol_l strtoll_l |
93 | # define __isoc23_strtol_l __isoc23_strtoll_l |
94 | # else |
95 | # define strtol_l strtol_l |
96 | # define __isoc23_strtol_l __isoc23_strtol_l |
97 | # endif |
98 | # endif |
99 | #endif |
100 | |
101 | #define __strtol_l __strtol_l2(strtol_l) |
102 | #define __strtol_l2(name) __strtol_l3(name) |
103 | #define __strtol_l3(name) __##name |
104 | |
105 | |
106 | /* If QUAD is defined, we are defining `strtoll' or `strtoull', |
107 | operating on `long long int's. */ |
108 | #ifdef QUAD |
109 | # define LONG long long |
110 | # define STRTOL_LONG_MIN LONG_LONG_MIN |
111 | # define STRTOL_LONG_MAX LONG_LONG_MAX |
112 | # define STRTOL_ULONG_MAX ULONG_LONG_MAX |
113 | #else |
114 | # define LONG long |
115 | |
116 | # ifndef ULONG_MAX |
117 | # define ULONG_MAX ((unsigned long int) ~(unsigned long int) 0) |
118 | # endif |
119 | # ifndef LONG_MAX |
120 | # define LONG_MAX ((long int) (ULONG_MAX >> 1)) |
121 | # endif |
122 | # define STRTOL_LONG_MIN LONG_MIN |
123 | # define STRTOL_LONG_MAX LONG_MAX |
124 | # define STRTOL_ULONG_MAX ULONG_MAX |
125 | #endif |
126 | |
127 | |
128 | /* We use this code for the extended locale handling where the |
129 | function gets as an additional argument the locale which has to be |
130 | used. To access the values we have to redefine the _NL_CURRENT and |
131 | _NL_CURRENT_WORD macros. */ |
132 | #undef _NL_CURRENT |
133 | #define _NL_CURRENT(category, item) \ |
134 | (current->values[_NL_ITEM_INDEX (item)].string) |
135 | #undef _NL_CURRENT_WORD |
136 | #define _NL_CURRENT_WORD(category, item) \ |
137 | ((uint32_t) current->values[_NL_ITEM_INDEX (item)].word) |
138 | |
139 | #if defined _LIBC || defined HAVE_WCHAR_H |
140 | # include <wchar.h> |
141 | #endif |
142 | |
143 | #ifdef USE_WIDE_CHAR |
144 | # include <wctype.h> |
145 | # define L_(Ch) L##Ch |
146 | # define UCHAR_TYPE wint_t |
147 | # define STRING_TYPE wchar_t |
148 | # define ISSPACE(Ch) __iswspace_l ((Ch), loc) |
149 | # define ISALPHA(Ch) __iswalpha_l ((Ch), _nl_C_locobj_ptr) |
150 | # define TOUPPER(Ch) __towupper_l ((Ch), _nl_C_locobj_ptr) |
151 | #else |
152 | # if defined _LIBC \ |
153 | || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) |
154 | # define IN_CTYPE_DOMAIN(c) 1 |
155 | # else |
156 | # define IN_CTYPE_DOMAIN(c) isascii(c) |
157 | # endif |
158 | # define L_(Ch) Ch |
159 | # define UCHAR_TYPE unsigned char |
160 | # define STRING_TYPE char |
161 | # define ISSPACE(Ch) __isspace_l ((Ch), loc) |
162 | # define ISALPHA(Ch) __isalpha_l ((Ch), _nl_C_locobj_ptr) |
163 | # define TOUPPER(Ch) __toupper_l ((Ch), _nl_C_locobj_ptr) |
164 | #endif |
165 | |
166 | #define INTERNAL(X) INTERNAL1(X) |
167 | #define INTERNAL1(X) __##X##_internal |
168 | #define WEAKNAME(X) WEAKNAME1(X) |
169 | |
170 | #ifdef USE_NUMBER_GROUPING |
171 | /* This file defines a function to check for correct grouping. */ |
172 | # include "grouping.h" |
173 | #endif |
174 | |
175 | |
176 | /* Define tables of maximum values and remainders in order to detect |
177 | overflow. Do this at compile-time in order to avoid the runtime |
178 | overhead of the division. */ |
179 | extern const unsigned long __strtol_ul_max_tab[] attribute_hidden; |
180 | extern const unsigned char __strtol_ul_rem_tab[] attribute_hidden; |
181 | #if defined(QUAD) && __WORDSIZE == 32 |
182 | extern const unsigned long long __strtol_ull_max_tab[] attribute_hidden; |
183 | extern const unsigned char __strtol_ull_rem_tab[] attribute_hidden; |
184 | #endif |
185 | |
186 | #define DEF(TYPE, NAME) \ |
187 | const TYPE NAME[] attribute_hidden = \ |
188 | { \ |
189 | F(2), F(3), F(4), F(5), F(6), F(7), F(8), F(9), F(10), \ |
190 | F(11), F(12), F(13), F(14), F(15), F(16), F(17), F(18), F(19), F(20), \ |
191 | F(21), F(22), F(23), F(24), F(25), F(26), F(27), F(28), F(29), F(30), \ |
192 | F(31), F(32), F(33), F(34), F(35), F(36) \ |
193 | } |
194 | |
195 | #if !UNSIGNED && !defined (USE_WIDE_CHAR) && !defined (QUAD) |
196 | # define F(X) ULONG_MAX / X |
197 | DEF (unsigned long, __strtol_ul_max_tab); |
198 | # undef F |
199 | # define F(X) ULONG_MAX % X |
200 | DEF (unsigned char, __strtol_ul_rem_tab); |
201 | # undef F |
202 | #endif |
203 | #if !UNSIGNED && !defined (USE_WIDE_CHAR) && defined (QUAD) \ |
204 | && __WORDSIZE == 32 |
205 | # define F(X) ULONG_LONG_MAX / X |
206 | DEF (unsigned long long, __strtol_ull_max_tab); |
207 | # undef F |
208 | # define F(X) ULONG_LONG_MAX % X |
209 | DEF (unsigned char, __strtol_ull_rem_tab); |
210 | # undef F |
211 | #endif |
212 | #undef DEF |
213 | |
214 | /* Define some more readable aliases for these arrays which correspond |
215 | to how they'll be used in the function below. */ |
216 | #define jmax_tab __strtol_ul_max_tab |
217 | #if defined(QUAD) && __WORDSIZE == 32 |
218 | # define cutoff_tab __strtol_ull_max_tab |
219 | # define cutlim_tab __strtol_ull_rem_tab |
220 | #else |
221 | # define cutoff_tab __strtol_ul_max_tab |
222 | # define cutlim_tab __strtol_ul_rem_tab |
223 | #endif |
224 | |
225 | |
226 | /* Convert NPTR to an `unsigned long int' or `long int' in base BASE. |
227 | If BASE is 0 the base is determined by the presence of a leading |
228 | zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal. |
229 | If BASE is < 2 or > 36, it is reset to 10. |
230 | If BIN_CST is true, binary constants starting "0b" or "0B" are accepted |
231 | in base 0 and 2. |
232 | If ENDPTR is not NULL, a pointer to the character after the last |
233 | one converted is stored in *ENDPTR. */ |
234 | |
235 | INT |
236 | INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr, |
237 | int base, int group, bool bin_cst, locale_t loc) |
238 | { |
239 | int negative; |
240 | unsigned LONG int cutoff; |
241 | unsigned int cutlim; |
242 | unsigned LONG int i; |
243 | const STRING_TYPE *s; |
244 | UCHAR_TYPE c; |
245 | const STRING_TYPE *save, *end; |
246 | int overflow; |
247 | #ifndef USE_WIDE_CHAR |
248 | size_t cnt; |
249 | #endif |
250 | |
251 | #ifdef USE_NUMBER_GROUPING |
252 | struct __locale_data *current = loc->__locales[LC_NUMERIC]; |
253 | /* The thousands character of the current locale. */ |
254 | # ifdef USE_WIDE_CHAR |
255 | wchar_t thousands = L'\0'; |
256 | # else |
257 | const char *thousands = NULL; |
258 | size_t thousands_len = 0; |
259 | # endif |
260 | /* The numeric grouping specification of the current locale, |
261 | in the format described in <locale.h>. */ |
262 | const char *grouping; |
263 | |
264 | if (__glibc_unlikely (group)) |
265 | { |
266 | grouping = _NL_CURRENT (LC_NUMERIC, GROUPING); |
267 | if (*grouping <= 0 || *grouping == CHAR_MAX) |
268 | grouping = NULL; |
269 | else |
270 | { |
271 | /* Figure out the thousands separator character. */ |
272 | # ifdef USE_WIDE_CHAR |
273 | # ifdef _LIBC |
274 | thousands = _NL_CURRENT_WORD (LC_NUMERIC, |
275 | _NL_NUMERIC_THOUSANDS_SEP_WC); |
276 | # endif |
277 | if (thousands == L'\0') |
278 | grouping = NULL; |
279 | # else |
280 | # ifdef _LIBC |
281 | thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP); |
282 | # endif |
283 | if (*thousands == '\0') |
284 | { |
285 | thousands = NULL; |
286 | grouping = NULL; |
287 | } |
288 | # endif |
289 | } |
290 | } |
291 | else |
292 | grouping = NULL; |
293 | #endif |
294 | |
295 | if (base < 0 || base == 1 || base > 36) |
296 | { |
297 | __set_errno (EINVAL); |
298 | return 0; |
299 | } |
300 | |
301 | save = s = nptr; |
302 | |
303 | /* Skip white space. */ |
304 | while (ISSPACE (*s)) |
305 | ++s; |
306 | if (__glibc_unlikely (*s == L_('\0'))) |
307 | goto noconv; |
308 | |
309 | /* Check for a sign. */ |
310 | negative = 0; |
311 | if (*s == L_('-')) |
312 | { |
313 | negative = 1; |
314 | ++s; |
315 | } |
316 | else if (*s == L_('+')) |
317 | ++s; |
318 | |
319 | /* Recognize number prefix and if BASE is zero, figure it out ourselves. */ |
320 | if (*s == L_('0')) |
321 | { |
322 | if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X')) |
323 | { |
324 | s += 2; |
325 | base = 16; |
326 | } |
327 | else if (bin_cst && (base == 0 || base == 2) && TOUPPER (s[1]) == L_('B')) |
328 | { |
329 | s += 2; |
330 | base = 2; |
331 | } |
332 | else if (base == 0) |
333 | base = 8; |
334 | } |
335 | else if (base == 0) |
336 | base = 10; |
337 | |
338 | /* Save the pointer so we can check later if anything happened. */ |
339 | save = s; |
340 | |
341 | #ifdef USE_NUMBER_GROUPING |
342 | if (base != 10) |
343 | grouping = NULL; |
344 | |
345 | if (__glibc_unlikely (grouping != NULL)) |
346 | { |
347 | # ifndef USE_WIDE_CHAR |
348 | thousands_len = strlen (thousands); |
349 | # endif |
350 | |
351 | /* Find the end of the digit string and check its grouping. */ |
352 | end = s; |
353 | if ( |
354 | # ifdef USE_WIDE_CHAR |
355 | *s != thousands |
356 | # else |
357 | ({ for (cnt = 0; cnt < thousands_len; ++cnt) |
358 | if (thousands[cnt] != end[cnt]) |
359 | break; |
360 | cnt < thousands_len; }) |
361 | # endif |
362 | ) |
363 | { |
364 | for (c = *end; c != L_('\0'); c = *++end) |
365 | if (((STRING_TYPE) c < L_('0') || (STRING_TYPE) c > L_('9')) |
366 | # ifdef USE_WIDE_CHAR |
367 | && (wchar_t) c != thousands |
368 | # else |
369 | && ({ for (cnt = 0; cnt < thousands_len; ++cnt) |
370 | if (thousands[cnt] != end[cnt]) |
371 | break; |
372 | cnt < thousands_len; }) |
373 | # endif |
374 | && (!ISALPHA (c) |
375 | || (int) (TOUPPER (c) - L_('A') + 10) >= base)) |
376 | break; |
377 | |
378 | # ifdef USE_WIDE_CHAR |
379 | end = __correctly_grouped_prefixwc (s, end, thousands, grouping); |
380 | # else |
381 | end = __correctly_grouped_prefixmb (s, end, thousands, grouping); |
382 | # endif |
383 | } |
384 | } |
385 | else |
386 | #endif |
387 | end = NULL; |
388 | |
389 | /* Avoid runtime division; lookup cutoff and limit. */ |
390 | cutoff = cutoff_tab[base - 2]; |
391 | cutlim = cutlim_tab[base - 2]; |
392 | |
393 | overflow = 0; |
394 | i = 0; |
395 | c = *s; |
396 | if (sizeof (long int) != sizeof (LONG int)) |
397 | { |
398 | unsigned long int j = 0; |
399 | unsigned long int jmax = jmax_tab[base - 2]; |
400 | |
401 | for (;c != L_('\0'); c = *++s) |
402 | { |
403 | if (s == end) |
404 | break; |
405 | if (c >= L_('0') && c <= L_('9')) |
406 | c -= L_('0'); |
407 | #ifdef USE_NUMBER_GROUPING |
408 | # ifdef USE_WIDE_CHAR |
409 | else if (grouping && (wchar_t) c == thousands) |
410 | continue; |
411 | # else |
412 | else if (thousands_len) |
413 | { |
414 | for (cnt = 0; cnt < thousands_len; ++cnt) |
415 | if (thousands[cnt] != s[cnt]) |
416 | break; |
417 | if (cnt == thousands_len) |
418 | { |
419 | s += thousands_len - 1; |
420 | continue; |
421 | } |
422 | if (ISALPHA (c)) |
423 | c = TOUPPER (c) - L_('A') + 10; |
424 | else |
425 | break; |
426 | } |
427 | # endif |
428 | #endif |
429 | else if (ISALPHA (c)) |
430 | c = TOUPPER (c) - L_('A') + 10; |
431 | else |
432 | break; |
433 | if ((int) c >= base) |
434 | break; |
435 | /* Note that we never can have an overflow. */ |
436 | else if (j >= jmax) |
437 | { |
438 | /* We have an overflow. Now use the long representation. */ |
439 | i = (unsigned LONG int) j; |
440 | goto use_long; |
441 | } |
442 | else |
443 | j = j * (unsigned long int) base + c; |
444 | } |
445 | |
446 | i = (unsigned LONG int) j; |
447 | } |
448 | else |
449 | for (;c != L_('\0'); c = *++s) |
450 | { |
451 | if (s == end) |
452 | break; |
453 | if (c >= L_('0') && c <= L_('9')) |
454 | c -= L_('0'); |
455 | #ifdef USE_NUMBER_GROUPING |
456 | # ifdef USE_WIDE_CHAR |
457 | else if (grouping && (wchar_t) c == thousands) |
458 | continue; |
459 | # else |
460 | else if (thousands_len) |
461 | { |
462 | for (cnt = 0; cnt < thousands_len; ++cnt) |
463 | if (thousands[cnt] != s[cnt]) |
464 | break; |
465 | if (cnt == thousands_len) |
466 | { |
467 | s += thousands_len - 1; |
468 | continue; |
469 | } |
470 | if (ISALPHA (c)) |
471 | c = TOUPPER (c) - L_('A') + 10; |
472 | else |
473 | break; |
474 | } |
475 | # endif |
476 | #endif |
477 | else if (ISALPHA (c)) |
478 | c = TOUPPER (c) - L_('A') + 10; |
479 | else |
480 | break; |
481 | if ((int) c >= base) |
482 | break; |
483 | /* Check for overflow. */ |
484 | if (i > cutoff || (i == cutoff && c > cutlim)) |
485 | overflow = 1; |
486 | else |
487 | { |
488 | use_long: |
489 | i *= (unsigned LONG int) base; |
490 | i += c; |
491 | } |
492 | } |
493 | |
494 | /* Check if anything actually happened. */ |
495 | if (s == save) |
496 | goto noconv; |
497 | |
498 | /* Store in ENDPTR the address of one character |
499 | past the last character we converted. */ |
500 | if (endptr != NULL) |
501 | *endptr = (STRING_TYPE *) s; |
502 | |
503 | #if !UNSIGNED |
504 | /* Check for a value that is within the range of |
505 | `unsigned LONG int', but outside the range of `LONG int'. */ |
506 | if (overflow == 0 |
507 | && i > (negative |
508 | ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1 |
509 | : (unsigned LONG int) STRTOL_LONG_MAX)) |
510 | overflow = 1; |
511 | #endif |
512 | |
513 | if (__glibc_unlikely (overflow)) |
514 | { |
515 | __set_errno (ERANGE); |
516 | #if UNSIGNED |
517 | return STRTOL_ULONG_MAX; |
518 | #else |
519 | return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX; |
520 | #endif |
521 | } |
522 | |
523 | /* Return the result of the appropriate sign. */ |
524 | return negative ? -i : i; |
525 | |
526 | noconv: |
527 | /* We must handle a special case here: the base is 0 or 16 and the |
528 | first two characters are '0' and 'x', but the rest are no |
529 | hexadecimal digits. Likewise when the base is 0 or 2 and the |
530 | first two characters are '0' and 'b', but the rest are no binary |
531 | digits. This is no error case. We return 0 and ENDPTR points to |
532 | the 'x' or 'b'. */ |
533 | if (endptr != NULL) |
534 | { |
535 | if (save - nptr >= 2 |
536 | && (TOUPPER (save[-1]) == L_('X') |
537 | || (bin_cst && TOUPPER (save[-1]) == L_('B'))) |
538 | && save[-2] == L_('0')) |
539 | *endptr = (STRING_TYPE *) &save[-1]; |
540 | else |
541 | /* There was no number to convert. */ |
542 | *endptr = (STRING_TYPE *) nptr; |
543 | } |
544 | |
545 | return 0L; |
546 | } |
547 | #if defined _LIBC && !defined USE_WIDE_CHAR |
548 | libc_hidden_def (INTERNAL (__strtol_l)) |
549 | #endif |
550 | |
551 | /* External user entry point. */ |
552 | |
553 | #if _LIBC - 0 == 0 |
554 | |
555 | /* Prototype. */ |
556 | extern INT __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr, |
557 | int base); |
558 | #endif |
559 | |
560 | |
561 | INT |
562 | #ifdef weak_function |
563 | weak_function |
564 | #endif |
565 | __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr, |
566 | int base, locale_t loc) |
567 | { |
568 | return INTERNAL (__strtol_l) (nptr, endptr, base, 0, false, loc); |
569 | } |
570 | libc_hidden_def (__strtol_l) |
571 | weak_alias (__strtol_l, strtol_l) |
572 | |
573 | INT |
574 | __isoc23_strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr, |
575 | int base, locale_t loc) |
576 | { |
577 | return INTERNAL (__strtol_l) (nptr, endptr, base, 0, true, loc); |
578 | } |
579 | libc_hidden_def (__isoc23_strtol_l) |
580 | |