1 | /* Copyright (C) 1991-2021 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef _LIBC |
19 | # include <stdint.h> |
20 | #endif |
21 | |
22 | struct STRUCT |
23 | { |
24 | const CHAR *pattern; |
25 | const CHAR *string; |
26 | bool no_leading_period; |
27 | }; |
28 | |
29 | /* Match STRING against the file name pattern PATTERN, returning zero if |
30 | it matches, nonzero if not. */ |
31 | static int FCT (const CHAR *pattern, const CHAR *string, |
32 | const CHAR *string_end, bool no_leading_period, int flags, |
33 | struct STRUCT *ends, size_t alloca_used); |
34 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
35 | const CHAR *string_end, bool no_leading_period, int flags, |
36 | size_t alloca_used); |
37 | static const CHAR *END (const CHAR *patternp); |
38 | |
39 | static int |
40 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
41 | bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) |
42 | { |
43 | const CHAR *p = pattern, *n = string; |
44 | UCHAR c; |
45 | #ifdef _LIBC |
46 | # if WIDE_CHAR_VERSION |
47 | const char *collseq = (const char *) |
48 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
49 | # else |
50 | const UCHAR *collseq = (const UCHAR *) |
51 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
52 | # endif |
53 | #endif |
54 | |
55 | while ((c = *p++) != L_('\0')) |
56 | { |
57 | bool new_no_leading_period = false; |
58 | c = FOLD (c); |
59 | |
60 | switch (c) |
61 | { |
62 | case L_('?'): |
63 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
64 | { |
65 | int res = EXT (c, p, n, string_end, no_leading_period, |
66 | flags, alloca_used); |
67 | if (res != -1) |
68 | return res; |
69 | } |
70 | |
71 | if (n == string_end) |
72 | return FNM_NOMATCH; |
73 | else if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
74 | return FNM_NOMATCH; |
75 | else if (*n == L_('.') && no_leading_period) |
76 | return FNM_NOMATCH; |
77 | break; |
78 | |
79 | case L_('\\'): |
80 | if (!(flags & FNM_NOESCAPE)) |
81 | { |
82 | c = *p++; |
83 | if (c == L_('\0')) |
84 | /* Trailing \ loses. */ |
85 | return FNM_NOMATCH; |
86 | c = FOLD (c); |
87 | } |
88 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
89 | return FNM_NOMATCH; |
90 | break; |
91 | |
92 | case L_('*'): |
93 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
94 | { |
95 | int res = EXT (c, p, n, string_end, no_leading_period, |
96 | flags, alloca_used); |
97 | if (res != -1) |
98 | return res; |
99 | } |
100 | else if (ends != NULL) |
101 | { |
102 | ends->pattern = p - 1; |
103 | ends->string = n; |
104 | ends->no_leading_period = no_leading_period; |
105 | return 0; |
106 | } |
107 | |
108 | if (n != string_end && *n == L_('.') && no_leading_period) |
109 | return FNM_NOMATCH; |
110 | |
111 | for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) |
112 | { |
113 | if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) |
114 | { |
115 | const CHAR *endp = END (p); |
116 | if (endp != p) |
117 | { |
118 | /* This is a pattern. Skip over it. */ |
119 | p = endp; |
120 | continue; |
121 | } |
122 | } |
123 | |
124 | if (c == L_('?')) |
125 | { |
126 | /* A ? needs to match one character. */ |
127 | if (n == string_end) |
128 | /* There isn't another character; no match. */ |
129 | return FNM_NOMATCH; |
130 | else if (*n == L_('/') |
131 | && __glibc_unlikely (flags & FNM_FILE_NAME)) |
132 | /* A slash does not match a wildcard under |
133 | FNM_FILE_NAME. */ |
134 | return FNM_NOMATCH; |
135 | else |
136 | /* One character of the string is consumed in matching |
137 | this ? wildcard, so *??? won't match if there are |
138 | less than three characters. */ |
139 | ++n; |
140 | } |
141 | } |
142 | |
143 | if (c == L_('\0')) |
144 | /* The wildcard(s) is/are the last element of the pattern. |
145 | If the name is a file name and contains another slash |
146 | this means it cannot match, unless the FNM_LEADING_DIR |
147 | flag is set. */ |
148 | { |
149 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
150 | |
151 | if (flags & FNM_FILE_NAME) |
152 | { |
153 | if (flags & FNM_LEADING_DIR) |
154 | result = 0; |
155 | else |
156 | { |
157 | if (MEMCHR (n, L_('/'), string_end - n) == NULL) |
158 | result = 0; |
159 | } |
160 | } |
161 | |
162 | return result; |
163 | } |
164 | else |
165 | { |
166 | const CHAR *endp; |
167 | struct STRUCT end; |
168 | |
169 | end.pattern = NULL; |
170 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), |
171 | string_end - n); |
172 | if (endp == NULL) |
173 | endp = string_end; |
174 | |
175 | if (c == L_('[') |
176 | || (__glibc_unlikely (flags & FNM_EXTMATCH) |
177 | && (c == L_('@') || c == L_('+') || c == L_('!')) |
178 | && *p == L_('('))) |
179 | { |
180 | int flags2 = ((flags & FNM_FILE_NAME) |
181 | ? flags : (flags & ~FNM_PERIOD)); |
182 | |
183 | for (--p; n < endp; ++n, no_leading_period = false) |
184 | if (FCT (p, n, string_end, no_leading_period, flags2, |
185 | &end, alloca_used) == 0) |
186 | goto found; |
187 | } |
188 | else if (c == L_('/') && (flags & FNM_FILE_NAME)) |
189 | { |
190 | while (n < string_end && *n != L_('/')) |
191 | ++n; |
192 | if (n < string_end && *n == L_('/') |
193 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
194 | NULL, alloca_used) == 0)) |
195 | return 0; |
196 | } |
197 | else |
198 | { |
199 | int flags2 = ((flags & FNM_FILE_NAME) |
200 | ? flags : (flags & ~FNM_PERIOD)); |
201 | |
202 | if (c == L_('\\') && !(flags & FNM_NOESCAPE)) |
203 | c = *p; |
204 | c = FOLD (c); |
205 | for (--p; n < endp; ++n, no_leading_period = false) |
206 | if (FOLD ((UCHAR) *n) == c |
207 | && (FCT (p, n, string_end, no_leading_period, flags2, |
208 | &end, alloca_used) == 0)) |
209 | { |
210 | found: |
211 | if (end.pattern == NULL) |
212 | return 0; |
213 | break; |
214 | } |
215 | if (end.pattern != NULL) |
216 | { |
217 | p = end.pattern; |
218 | n = end.string; |
219 | no_leading_period = end.no_leading_period; |
220 | continue; |
221 | } |
222 | } |
223 | } |
224 | |
225 | /* If we come here no match is possible with the wildcard. */ |
226 | return FNM_NOMATCH; |
227 | |
228 | case L_('['): |
229 | { |
230 | /* Nonzero if the sense of the character class is inverted. */ |
231 | const CHAR *p_init = p; |
232 | const CHAR *n_init = n; |
233 | bool not; |
234 | CHAR cold; |
235 | UCHAR fn; |
236 | |
237 | if (posixly_correct == 0) |
238 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
239 | |
240 | if (n == string_end) |
241 | return FNM_NOMATCH; |
242 | |
243 | if (*n == L_('.') && no_leading_period) |
244 | return FNM_NOMATCH; |
245 | |
246 | if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
247 | /* '/' cannot be matched. */ |
248 | return FNM_NOMATCH; |
249 | |
250 | not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); |
251 | if (not) |
252 | ++p; |
253 | |
254 | fn = FOLD ((UCHAR) *n); |
255 | |
256 | c = *p++; |
257 | for (;;) |
258 | { |
259 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
260 | { |
261 | if (*p == L_('\0')) |
262 | return FNM_NOMATCH; |
263 | c = FOLD ((UCHAR) *p); |
264 | ++p; |
265 | |
266 | goto normal_bracket; |
267 | } |
268 | else if (c == L_('[') && *p == L_(':')) |
269 | { |
270 | /* Leave room for the null. */ |
271 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
272 | size_t c1 = 0; |
273 | wctype_t wt; |
274 | const CHAR *startp = p; |
275 | |
276 | for (;;) |
277 | { |
278 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
279 | /* The name is too long and therefore the pattern |
280 | is ill-formed. */ |
281 | return FNM_NOMATCH; |
282 | |
283 | c = *++p; |
284 | if (c == L_(':') && p[1] == L_(']')) |
285 | { |
286 | p += 2; |
287 | break; |
288 | } |
289 | if (c < L_('a') || c >= L_('z')) |
290 | { |
291 | /* This cannot possibly be a character class name. |
292 | Match it as a normal range. */ |
293 | p = startp; |
294 | c = L_('['); |
295 | goto normal_bracket; |
296 | } |
297 | str[c1++] = c; |
298 | } |
299 | str[c1] = L_('\0'); |
300 | |
301 | wt = IS_CHAR_CLASS (str); |
302 | if (wt == 0) |
303 | /* Invalid character class name. */ |
304 | return FNM_NOMATCH; |
305 | |
306 | #if defined _LIBC && ! WIDE_CHAR_VERSION |
307 | /* The following code is glibc specific but does |
308 | there a good job in speeding up the code since |
309 | we can avoid the btowc() call. */ |
310 | if (_ISCTYPE ((UCHAR) *n, wt)) |
311 | goto matched; |
312 | #else |
313 | if (iswctype (BTOWC ((UCHAR) *n), wt)) |
314 | goto matched; |
315 | #endif |
316 | c = *p++; |
317 | } |
318 | #ifdef _LIBC |
319 | else if (c == L_('[') && *p == L_('=')) |
320 | { |
321 | /* It's important that STR be a scalar variable rather |
322 | than a one-element array, because GCC (at least 4.9.2 |
323 | -O2 on x86-64) can be confused by the array and |
324 | diagnose a "used initialized" in a dead branch in the |
325 | findidx function. */ |
326 | UCHAR str; |
327 | uint32_t nrules = |
328 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
329 | const CHAR *startp = p; |
330 | |
331 | c = *++p; |
332 | if (c == L_('\0')) |
333 | { |
334 | p = startp; |
335 | c = L_('['); |
336 | goto normal_bracket; |
337 | } |
338 | str = c; |
339 | |
340 | c = *++p; |
341 | if (c != L_('=') || p[1] != L_(']')) |
342 | { |
343 | p = startp; |
344 | c = L_('['); |
345 | goto normal_bracket; |
346 | } |
347 | p += 2; |
348 | |
349 | if (nrules == 0) |
350 | { |
351 | if ((UCHAR) *n == str) |
352 | goto matched; |
353 | } |
354 | else |
355 | { |
356 | const int32_t *table; |
357 | # if WIDE_CHAR_VERSION |
358 | const int32_t *weights; |
359 | const wint_t *extra; |
360 | # else |
361 | const unsigned char *weights; |
362 | const unsigned char *; |
363 | # endif |
364 | const int32_t *indirect; |
365 | int32_t idx; |
366 | const UCHAR *cp = (const UCHAR *) &str; |
367 | |
368 | # if WIDE_CHAR_VERSION |
369 | table = (const int32_t *) |
370 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
371 | weights = (const int32_t *) |
372 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
373 | extra = (const wint_t *) |
374 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
375 | indirect = (const int32_t *) |
376 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
377 | # else |
378 | table = (const int32_t *) |
379 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
380 | weights = (const unsigned char *) |
381 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
382 | extra = (const unsigned char *) |
383 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
384 | indirect = (const int32_t *) |
385 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
386 | # endif |
387 | |
388 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
389 | if (idx != 0) |
390 | { |
391 | /* We found a table entry. Now see whether the |
392 | character we are currently at has the same |
393 | equivalence class value. */ |
394 | int len = weights[idx & 0xffffff]; |
395 | int32_t idx2; |
396 | const UCHAR *np = (const UCHAR *) n; |
397 | |
398 | idx2 = FINDIDX (table, indirect, extra, |
399 | &np, string_end - n); |
400 | if (idx2 != 0 |
401 | && (idx >> 24) == (idx2 >> 24) |
402 | && len == weights[idx2 & 0xffffff]) |
403 | { |
404 | int cnt = 0; |
405 | |
406 | idx &= 0xffffff; |
407 | idx2 &= 0xffffff; |
408 | |
409 | while (cnt < len |
410 | && (weights[idx + 1 + cnt] |
411 | == weights[idx2 + 1 + cnt])) |
412 | ++cnt; |
413 | |
414 | if (cnt == len) |
415 | goto matched; |
416 | } |
417 | } |
418 | } |
419 | |
420 | c = *p++; |
421 | } |
422 | #endif |
423 | else if (c == L_('\0')) |
424 | { |
425 | /* [ unterminated, treat as normal character. */ |
426 | p = p_init; |
427 | n = n_init; |
428 | c = L_('['); |
429 | goto normal_match; |
430 | } |
431 | else |
432 | { |
433 | bool is_range = false; |
434 | |
435 | #ifdef _LIBC |
436 | bool is_seqval = false; |
437 | |
438 | if (c == L_('[') && *p == L_('.')) |
439 | { |
440 | uint32_t nrules = |
441 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
442 | const CHAR *startp = p; |
443 | size_t c1 = 0; |
444 | |
445 | while (1) |
446 | { |
447 | c = *++p; |
448 | if (c == L_('.') && p[1] == L_(']')) |
449 | { |
450 | p += 2; |
451 | break; |
452 | } |
453 | if (c == '\0') |
454 | return FNM_NOMATCH; |
455 | ++c1; |
456 | } |
457 | |
458 | /* We have to handling the symbols differently in |
459 | ranges since then the collation sequence is |
460 | important. */ |
461 | is_range = *p == L_('-') && p[1] != L_('\0'); |
462 | |
463 | if (nrules == 0) |
464 | { |
465 | /* There are no names defined in the collation |
466 | data. Therefore we only accept the trivial |
467 | names consisting of the character itself. */ |
468 | if (c1 != 1) |
469 | return FNM_NOMATCH; |
470 | |
471 | if (!is_range && *n == startp[1]) |
472 | goto matched; |
473 | |
474 | cold = startp[1]; |
475 | c = *p++; |
476 | } |
477 | else |
478 | { |
479 | int32_t table_size; |
480 | const int32_t *symb_table; |
481 | const unsigned char *; |
482 | int32_t idx; |
483 | int32_t elem; |
484 | # if WIDE_CHAR_VERSION |
485 | CHAR *wextra; |
486 | # endif |
487 | |
488 | table_size = |
489 | _NL_CURRENT_WORD (LC_COLLATE, |
490 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
491 | symb_table = (const int32_t *) |
492 | _NL_CURRENT (LC_COLLATE, |
493 | _NL_COLLATE_SYMB_TABLEMB); |
494 | extra = (const unsigned char *) |
495 | _NL_CURRENT (LC_COLLATE, |
496 | _NL_COLLATE_SYMB_EXTRAMB); |
497 | |
498 | for (elem = 0; elem < table_size; elem++) |
499 | if (symb_table[2 * elem] != 0) |
500 | { |
501 | idx = symb_table[2 * elem + 1]; |
502 | /* Skip the name of collating element. */ |
503 | idx += 1 + extra[idx]; |
504 | # if WIDE_CHAR_VERSION |
505 | /* Skip the byte sequence of the |
506 | collating element. */ |
507 | idx += 1 + extra[idx]; |
508 | /* Adjust for the alignment. */ |
509 | idx = (idx + 3) & ~3; |
510 | |
511 | wextra = (CHAR *) &extra[idx + 4]; |
512 | |
513 | if (/* Compare the length of the sequence. */ |
514 | c1 == wextra[0] |
515 | /* Compare the wide char sequence. */ |
516 | && (__wmemcmp (startp + 1, &wextra[1], |
517 | c1) |
518 | == 0)) |
519 | /* Yep, this is the entry. */ |
520 | break; |
521 | # else |
522 | if (/* Compare the length of the sequence. */ |
523 | c1 == extra[idx] |
524 | /* Compare the byte sequence. */ |
525 | && memcmp (startp + 1, |
526 | &extra[idx + 1], c1) == 0) |
527 | /* Yep, this is the entry. */ |
528 | break; |
529 | # endif |
530 | } |
531 | |
532 | if (elem < table_size) |
533 | { |
534 | /* Compare the byte sequence but only if |
535 | this is not part of a range. */ |
536 | if (! is_range |
537 | |
538 | # if WIDE_CHAR_VERSION |
539 | && __wmemcmp (n, &wextra[1], c1) == 0 |
540 | # else |
541 | && memcmp (n, &extra[idx + 1], c1) == 0 |
542 | # endif |
543 | ) |
544 | { |
545 | n += c1 - 1; |
546 | goto matched; |
547 | } |
548 | |
549 | /* Get the collation sequence value. */ |
550 | is_seqval = true; |
551 | # if WIDE_CHAR_VERSION |
552 | cold = wextra[1 + wextra[0]]; |
553 | # else |
554 | idx += 1 + extra[idx]; |
555 | /* Adjust for the alignment. */ |
556 | idx = (idx + 3) & ~3; |
557 | cold = *((int32_t *) &extra[idx]); |
558 | # endif |
559 | |
560 | c = *p++; |
561 | } |
562 | else if (c1 == 1) |
563 | { |
564 | /* No valid character. Match it as a |
565 | single byte. */ |
566 | if (!is_range && *n == startp[1]) |
567 | goto matched; |
568 | |
569 | cold = startp[1]; |
570 | c = *p++; |
571 | } |
572 | else |
573 | return FNM_NOMATCH; |
574 | } |
575 | } |
576 | else |
577 | #endif |
578 | { |
579 | c = FOLD (c); |
580 | normal_bracket: |
581 | |
582 | /* We have to handling the symbols differently in |
583 | ranges since then the collation sequence is |
584 | important. */ |
585 | is_range = (*p == L_('-') && p[1] != L_('\0') |
586 | && p[1] != L_(']')); |
587 | |
588 | if (!is_range && c == fn) |
589 | goto matched; |
590 | |
591 | #if _LIBC |
592 | /* This is needed if we goto normal_bracket; from |
593 | outside of is_seqval's scope. */ |
594 | is_seqval = false; |
595 | #endif |
596 | cold = c; |
597 | c = *p++; |
598 | } |
599 | |
600 | if (c == L_('-') && *p != L_(']')) |
601 | { |
602 | #if _LIBC |
603 | /* We have to find the collation sequence |
604 | value for C. Collation sequence is nothing |
605 | we can regularly access. The sequence |
606 | value is defined by the order in which the |
607 | definitions of the collation values for the |
608 | various characters appear in the source |
609 | file. A strange concept, nowhere |
610 | documented. */ |
611 | uint32_t fcollseq; |
612 | uint32_t lcollseq; |
613 | UCHAR cend = *p++; |
614 | |
615 | # if WIDE_CHAR_VERSION |
616 | /* Search in the 'names' array for the characters. */ |
617 | fcollseq = __collseq_table_lookup (collseq, fn); |
618 | if (fcollseq == ~((uint32_t) 0)) |
619 | /* XXX We don't know anything about the character |
620 | we are supposed to match. This means we are |
621 | failing. */ |
622 | goto range_not_matched; |
623 | |
624 | if (is_seqval) |
625 | lcollseq = cold; |
626 | else |
627 | lcollseq = __collseq_table_lookup (collseq, cold); |
628 | # else |
629 | fcollseq = collseq[fn]; |
630 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
631 | # endif |
632 | |
633 | is_seqval = false; |
634 | if (cend == L_('[') && *p == L_('.')) |
635 | { |
636 | uint32_t nrules = |
637 | _NL_CURRENT_WORD (LC_COLLATE, |
638 | _NL_COLLATE_NRULES); |
639 | const CHAR *startp = p; |
640 | size_t c1 = 0; |
641 | |
642 | while (1) |
643 | { |
644 | c = *++p; |
645 | if (c == L_('.') && p[1] == L_(']')) |
646 | { |
647 | p += 2; |
648 | break; |
649 | } |
650 | if (c == '\0') |
651 | return FNM_NOMATCH; |
652 | ++c1; |
653 | } |
654 | |
655 | if (nrules == 0) |
656 | { |
657 | /* There are no names defined in the |
658 | collation data. Therefore we only |
659 | accept the trivial names consisting |
660 | of the character itself. */ |
661 | if (c1 != 1) |
662 | return FNM_NOMATCH; |
663 | |
664 | cend = startp[1]; |
665 | } |
666 | else |
667 | { |
668 | int32_t table_size; |
669 | const int32_t *symb_table; |
670 | const unsigned char *; |
671 | int32_t idx; |
672 | int32_t elem; |
673 | # if WIDE_CHAR_VERSION |
674 | CHAR *wextra; |
675 | # endif |
676 | |
677 | table_size = |
678 | _NL_CURRENT_WORD (LC_COLLATE, |
679 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
680 | symb_table = (const int32_t *) |
681 | _NL_CURRENT (LC_COLLATE, |
682 | _NL_COLLATE_SYMB_TABLEMB); |
683 | extra = (const unsigned char *) |
684 | _NL_CURRENT (LC_COLLATE, |
685 | _NL_COLLATE_SYMB_EXTRAMB); |
686 | |
687 | for (elem = 0; elem < table_size; elem++) |
688 | if (symb_table[2 * elem] != 0) |
689 | { |
690 | idx = symb_table[2 * elem + 1]; |
691 | /* Skip the name of collating |
692 | element. */ |
693 | idx += 1 + extra[idx]; |
694 | # if WIDE_CHAR_VERSION |
695 | /* Skip the byte sequence of the |
696 | collating element. */ |
697 | idx += 1 + extra[idx]; |
698 | /* Adjust for the alignment. */ |
699 | idx = (idx + 3) & ~3; |
700 | |
701 | wextra = (CHAR *) &extra[idx + 4]; |
702 | |
703 | if (/* Compare the length of the |
704 | sequence. */ |
705 | c1 == wextra[0] |
706 | /* Compare the wide char sequence. */ |
707 | && (__wmemcmp (startp + 1, |
708 | &wextra[1], c1) |
709 | == 0)) |
710 | /* Yep, this is the entry. */ |
711 | break; |
712 | # else |
713 | if (/* Compare the length of the |
714 | sequence. */ |
715 | c1 == extra[idx] |
716 | /* Compare the byte sequence. */ |
717 | && memcmp (startp + 1, |
718 | &extra[idx + 1], c1) == 0) |
719 | /* Yep, this is the entry. */ |
720 | break; |
721 | # endif |
722 | } |
723 | |
724 | if (elem < table_size) |
725 | { |
726 | /* Get the collation sequence value. */ |
727 | is_seqval = true; |
728 | # if WIDE_CHAR_VERSION |
729 | cend = wextra[1 + wextra[0]]; |
730 | # else |
731 | idx += 1 + extra[idx]; |
732 | /* Adjust for the alignment. */ |
733 | idx = (idx + 3) & ~3; |
734 | cend = *((int32_t *) &extra[idx]); |
735 | # endif |
736 | } |
737 | else if (c1 == 1) |
738 | { |
739 | cend = startp[1]; |
740 | c = *p++; |
741 | } |
742 | else |
743 | return FNM_NOMATCH; |
744 | } |
745 | } |
746 | else |
747 | { |
748 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
749 | cend = *p++; |
750 | if (cend == L_('\0')) |
751 | return FNM_NOMATCH; |
752 | cend = FOLD (cend); |
753 | } |
754 | |
755 | /* XXX It is not entirely clear to me how to handle |
756 | characters which are not mentioned in the |
757 | collation specification. */ |
758 | if ( |
759 | # if WIDE_CHAR_VERSION |
760 | lcollseq == 0xffffffff || |
761 | # endif |
762 | lcollseq <= fcollseq) |
763 | { |
764 | /* We have to look at the upper bound. */ |
765 | uint32_t hcollseq; |
766 | |
767 | if (is_seqval) |
768 | hcollseq = cend; |
769 | else |
770 | { |
771 | # if WIDE_CHAR_VERSION |
772 | hcollseq = |
773 | __collseq_table_lookup (collseq, cend); |
774 | if (hcollseq == ~((uint32_t) 0)) |
775 | { |
776 | /* Hum, no information about the upper |
777 | bound. The matching succeeds if the |
778 | lower bound is matched exactly. */ |
779 | if (lcollseq != fcollseq) |
780 | goto range_not_matched; |
781 | |
782 | goto matched; |
783 | } |
784 | # else |
785 | hcollseq = collseq[cend]; |
786 | # endif |
787 | } |
788 | |
789 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
790 | goto matched; |
791 | } |
792 | # if WIDE_CHAR_VERSION |
793 | range_not_matched: |
794 | # endif |
795 | #else |
796 | /* We use a boring value comparison of the character |
797 | values. This is better than comparing using |
798 | 'strcoll' since the latter would have surprising |
799 | and sometimes fatal consequences. */ |
800 | UCHAR cend = *p++; |
801 | |
802 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
803 | cend = *p++; |
804 | if (cend == L_('\0')) |
805 | return FNM_NOMATCH; |
806 | |
807 | /* It is a range. */ |
808 | if ((UCHAR) cold <= fn && fn <= cend) |
809 | goto matched; |
810 | #endif |
811 | |
812 | c = *p++; |
813 | } |
814 | } |
815 | |
816 | if (c == L_(']')) |
817 | break; |
818 | } |
819 | |
820 | if (!not) |
821 | return FNM_NOMATCH; |
822 | break; |
823 | |
824 | matched: |
825 | /* Skip the rest of the [...] that already matched. */ |
826 | while ((c = *p++) != L_(']')) |
827 | { |
828 | if (c == L_('\0')) |
829 | /* [... (unterminated) loses. */ |
830 | return FNM_NOMATCH; |
831 | |
832 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
833 | { |
834 | if (*p == L_('\0')) |
835 | return FNM_NOMATCH; |
836 | /* XXX 1003.2d11 is unclear if this is right. */ |
837 | ++p; |
838 | } |
839 | else if (c == L_('[') && *p == L_(':')) |
840 | { |
841 | int c1 = 0; |
842 | const CHAR *startp = p; |
843 | |
844 | while (1) |
845 | { |
846 | c = *++p; |
847 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
848 | return FNM_NOMATCH; |
849 | |
850 | if (*p == L_(':') && p[1] == L_(']')) |
851 | break; |
852 | |
853 | if (c < L_('a') || c >= L_('z')) |
854 | { |
855 | p = startp - 2; |
856 | break; |
857 | } |
858 | } |
859 | p += 2; |
860 | } |
861 | else if (c == L_('[') && *p == L_('=')) |
862 | { |
863 | c = *++p; |
864 | if (c == L_('\0')) |
865 | return FNM_NOMATCH; |
866 | c = *++p; |
867 | if (c != L_('=') || p[1] != L_(']')) |
868 | return FNM_NOMATCH; |
869 | p += 2; |
870 | } |
871 | else if (c == L_('[') && *p == L_('.')) |
872 | { |
873 | while (1) |
874 | { |
875 | c = *++p; |
876 | if (c == L_('\0')) |
877 | return FNM_NOMATCH; |
878 | |
879 | if (c == L_('.') && p[1] == L_(']')) |
880 | break; |
881 | } |
882 | p += 2; |
883 | } |
884 | } |
885 | if (not) |
886 | return FNM_NOMATCH; |
887 | } |
888 | break; |
889 | |
890 | case L_('+'): |
891 | case L_('@'): |
892 | case L_('!'): |
893 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
894 | { |
895 | int res = EXT (c, p, n, string_end, no_leading_period, flags, |
896 | alloca_used); |
897 | if (res != -1) |
898 | return res; |
899 | } |
900 | goto normal_match; |
901 | |
902 | case L_('/'): |
903 | if (NO_LEADING_PERIOD (flags)) |
904 | { |
905 | if (n == string_end || c != (UCHAR) *n) |
906 | return FNM_NOMATCH; |
907 | |
908 | new_no_leading_period = true; |
909 | break; |
910 | } |
911 | FALLTHROUGH; |
912 | default: |
913 | normal_match: |
914 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
915 | return FNM_NOMATCH; |
916 | } |
917 | |
918 | no_leading_period = new_no_leading_period; |
919 | ++n; |
920 | } |
921 | |
922 | if (n == string_end) |
923 | return 0; |
924 | |
925 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) |
926 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
927 | return 0; |
928 | |
929 | return FNM_NOMATCH; |
930 | } |
931 | |
932 | |
933 | static const CHAR * |
934 | END (const CHAR *pattern) |
935 | { |
936 | const CHAR *p = pattern; |
937 | |
938 | while (1) |
939 | if (*++p == L_('\0')) |
940 | /* This is an invalid pattern. */ |
941 | return pattern; |
942 | else if (*p == L_('[')) |
943 | { |
944 | /* Handle brackets special. */ |
945 | if (posixly_correct == 0) |
946 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
947 | |
948 | /* Skip the not sign. We have to recognize it because of a possibly |
949 | following ']'. */ |
950 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
951 | ++p; |
952 | /* A leading ']' is recognized as such. */ |
953 | if (*p == L_(']')) |
954 | ++p; |
955 | /* Skip over all characters of the list. */ |
956 | while (*p != L_(']')) |
957 | if (*p++ == L_('\0')) |
958 | /* This is no valid pattern. */ |
959 | return pattern; |
960 | } |
961 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
962 | || *p == L_('!')) && p[1] == L_('(')) |
963 | { |
964 | p = END (p + 1); |
965 | if (*p == L_('\0')) |
966 | /* This is an invalid pattern. */ |
967 | return pattern; |
968 | } |
969 | else if (*p == L_(')')) |
970 | break; |
971 | |
972 | return p + 1; |
973 | } |
974 | |
975 | |
976 | static int |
977 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
978 | bool no_leading_period, int flags, size_t alloca_used) |
979 | { |
980 | const CHAR *startp; |
981 | ptrdiff_t level; |
982 | struct patternlist |
983 | { |
984 | struct patternlist *next; |
985 | CHAR malloced; |
986 | CHAR str __flexarr; |
987 | } *list = NULL; |
988 | struct patternlist **lastp = &list; |
989 | size_t pattern_len = STRLEN (pattern); |
990 | bool any_malloced = false; |
991 | const CHAR *p; |
992 | const CHAR *rs; |
993 | int retval = 0; |
994 | |
995 | /* Parse the pattern. Store the individual parts in the list. */ |
996 | level = 0; |
997 | for (startp = p = pattern + 1; level >= 0; ++p) |
998 | if (*p == L_('\0')) |
999 | { |
1000 | /* This is an invalid pattern. */ |
1001 | retval = -1; |
1002 | goto out; |
1003 | } |
1004 | else if (*p == L_('[')) |
1005 | { |
1006 | /* Handle brackets special. */ |
1007 | if (posixly_correct == 0) |
1008 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1009 | |
1010 | /* Skip the not sign. We have to recognize it because of a possibly |
1011 | following ']'. */ |
1012 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
1013 | ++p; |
1014 | /* A leading ']' is recognized as such. */ |
1015 | if (*p == L_(']')) |
1016 | ++p; |
1017 | /* Skip over all characters of the list. */ |
1018 | while (*p != L_(']')) |
1019 | if (*p++ == L_('\0')) |
1020 | { |
1021 | /* This is no valid pattern. */ |
1022 | retval = -1; |
1023 | goto out; |
1024 | } |
1025 | } |
1026 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
1027 | || *p == L_('!')) && p[1] == L_('(')) |
1028 | /* Remember the nesting level. */ |
1029 | ++level; |
1030 | else if (*p == L_(')')) |
1031 | { |
1032 | if (level-- == 0) |
1033 | { |
1034 | /* This means we found the end of the pattern. */ |
1035 | #define NEW_PATTERN \ |
1036 | struct patternlist *newp; \ |
1037 | size_t plen = (opt == L_('?') || opt == L_('@') \ |
1038 | ? pattern_len : (p - startp + 1UL)); \ |
1039 | idx_t slen = FLEXSIZEOF (struct patternlist, str, 0); \ |
1040 | idx_t new_used = alloca_used + slen; \ |
1041 | idx_t plensize; \ |
1042 | if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize) \ |
1043 | || INT_ADD_WRAPV (new_used, plensize, &new_used)) \ |
1044 | { \ |
1045 | retval = -2; \ |
1046 | goto out; \ |
1047 | } \ |
1048 | slen += plensize; \ |
1049 | bool malloced = ! __libc_use_alloca (new_used); \ |
1050 | if (__glibc_unlikely (malloced)) \ |
1051 | { \ |
1052 | newp = malloc (slen); \ |
1053 | if (newp == NULL) \ |
1054 | { \ |
1055 | retval = -2; \ |
1056 | goto out; \ |
1057 | } \ |
1058 | any_malloced = true; \ |
1059 | } \ |
1060 | else \ |
1061 | newp = alloca_account (slen, alloca_used); \ |
1062 | newp->next = NULL; \ |
1063 | newp->malloced = malloced; \ |
1064 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ |
1065 | *lastp = newp; \ |
1066 | lastp = &newp->next |
1067 | NEW_PATTERN; |
1068 | } |
1069 | } |
1070 | else if (*p == L_('|')) |
1071 | { |
1072 | if (level == 0) |
1073 | { |
1074 | NEW_PATTERN; |
1075 | startp = p + 1; |
1076 | } |
1077 | } |
1078 | assert (list != NULL); |
1079 | assert (p[-1] == L_(')')); |
1080 | #undef NEW_PATTERN |
1081 | |
1082 | switch (opt) |
1083 | { |
1084 | case L_('*'): |
1085 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1086 | alloca_used) == 0) |
1087 | goto success; |
1088 | FALLTHROUGH; |
1089 | case L_('+'): |
1090 | do |
1091 | { |
1092 | for (rs = string; rs <= string_end; ++rs) |
1093 | /* First match the prefix with the current pattern with the |
1094 | current pattern. */ |
1095 | if (FCT (list->str, string, rs, no_leading_period, |
1096 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1097 | NULL, alloca_used) == 0 |
1098 | /* This was successful. Now match the rest with the rest |
1099 | of the pattern. */ |
1100 | && (FCT (p, rs, string_end, |
1101 | rs == string |
1102 | ? no_leading_period |
1103 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1104 | flags & FNM_FILE_NAME |
1105 | ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 |
1106 | /* This didn't work. Try the whole pattern. */ |
1107 | || (rs != string |
1108 | && FCT (pattern - 1, rs, string_end, |
1109 | rs == string |
1110 | ? no_leading_period |
1111 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1112 | flags & FNM_FILE_NAME |
1113 | ? flags : flags & ~FNM_PERIOD, NULL, |
1114 | alloca_used) == 0))) |
1115 | /* It worked. Signal success. */ |
1116 | goto success; |
1117 | } |
1118 | while ((list = list->next) != NULL); |
1119 | |
1120 | /* None of the patterns lead to a match. */ |
1121 | retval = FNM_NOMATCH; |
1122 | break; |
1123 | |
1124 | case L_('?'): |
1125 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1126 | alloca_used) == 0) |
1127 | goto success; |
1128 | FALLTHROUGH; |
1129 | case L_('@'): |
1130 | do |
1131 | /* I cannot believe it but 'strcat' is actually acceptable |
1132 | here. Match the entire string with the prefix from the |
1133 | pattern list and the rest of the pattern following the |
1134 | pattern list. */ |
1135 | if (FCT (STRCAT (list->str, p), string, string_end, |
1136 | no_leading_period, |
1137 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1138 | NULL, alloca_used) == 0) |
1139 | /* It worked. Signal success. */ |
1140 | goto success; |
1141 | while ((list = list->next) != NULL); |
1142 | |
1143 | /* None of the patterns lead to a match. */ |
1144 | retval = FNM_NOMATCH; |
1145 | break; |
1146 | |
1147 | case L_('!'): |
1148 | for (rs = string; rs <= string_end; ++rs) |
1149 | { |
1150 | struct patternlist *runp; |
1151 | |
1152 | for (runp = list; runp != NULL; runp = runp->next) |
1153 | if (FCT (runp->str, string, rs, no_leading_period, |
1154 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1155 | NULL, alloca_used) == 0) |
1156 | break; |
1157 | |
1158 | /* If none of the patterns matched see whether the rest does. */ |
1159 | if (runp == NULL |
1160 | && (FCT (p, rs, string_end, |
1161 | rs == string |
1162 | ? no_leading_period |
1163 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1164 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1165 | NULL, alloca_used) == 0)) |
1166 | /* This is successful. */ |
1167 | goto success; |
1168 | } |
1169 | |
1170 | /* None of the patterns together with the rest of the pattern |
1171 | lead to a match. */ |
1172 | retval = FNM_NOMATCH; |
1173 | break; |
1174 | |
1175 | default: |
1176 | assert (! "Invalid extended matching operator" ); |
1177 | retval = -1; |
1178 | break; |
1179 | } |
1180 | |
1181 | success: |
1182 | out: |
1183 | if (any_malloced) |
1184 | while (list != NULL) |
1185 | { |
1186 | struct patternlist *old = list; |
1187 | list = list->next; |
1188 | if (old->malloced) |
1189 | free (old); |
1190 | } |
1191 | |
1192 | return retval; |
1193 | } |
1194 | |
1195 | |
1196 | #undef FOLD |
1197 | #undef CHAR |
1198 | #undef UCHAR |
1199 | #undef INT |
1200 | #undef FCT |
1201 | #undef EXT |
1202 | #undef END |
1203 | #undef STRUCT |
1204 | #undef MEMPCPY |
1205 | #undef MEMCHR |
1206 | #undef STRLEN |
1207 | #undef STRCAT |
1208 | #undef L_ |
1209 | #undef BTOWC |
1210 | #undef WIDE_CHAR_VERSION |
1211 | #undef FINDIDX |
1212 | |