1 | /* Classify a domain name for IDNA purposes. |
2 | Copyright (C) 2018-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <errno.h> |
20 | #include <inet/net-internal.h> |
21 | #include <stdbool.h> |
22 | #include <string.h> |
23 | #include <wchar.h> |
24 | |
25 | enum idna_name_classification |
26 | __idna_name_classify (const char *name) |
27 | { |
28 | mbstate_t mbs; |
29 | memset (&mbs, 0, sizeof (mbs)); |
30 | const char *p = name; |
31 | const char *end = p + strlen (p) + 1; |
32 | bool nonascii = false; |
33 | bool backslash = false; |
34 | while (true) |
35 | { |
36 | wchar_t wc; |
37 | size_t result = mbrtowc (&wc, p, end - p, &mbs); |
38 | if (result == 0) |
39 | /* NUL terminator was reached. */ |
40 | break; |
41 | else if (result == (size_t) -2) |
42 | /* Incomplete trailing multi-byte character. This is an |
43 | encoding error because we received the full name. */ |
44 | return idna_name_encoding_error; |
45 | else if (result == (size_t) -1) |
46 | { |
47 | /* Other error, including EILSEQ. */ |
48 | if (errno == EILSEQ) |
49 | return idna_name_encoding_error; |
50 | else if (errno == ENOMEM) |
51 | return idna_name_memory_error; |
52 | else |
53 | return idna_name_error; |
54 | } |
55 | else |
56 | { |
57 | /* A wide character was decoded. */ |
58 | p += result; |
59 | if (wc == L'\\') |
60 | backslash = true; |
61 | else if (wc > 127) |
62 | nonascii = true; |
63 | } |
64 | } |
65 | |
66 | if (nonascii) |
67 | { |
68 | if (backslash) |
69 | return idna_name_nonascii_backslash; |
70 | else |
71 | return idna_name_nonascii; |
72 | } |
73 | else |
74 | return idna_name_ascii; |
75 | } |
76 | |