1 | /* Copyright (C) 1991-2022 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef _LIBC |
19 | # include <stdint.h> |
20 | #endif |
21 | |
22 | struct STRUCT |
23 | { |
24 | const CHAR *pattern; |
25 | const CHAR *string; |
26 | bool no_leading_period; |
27 | }; |
28 | |
29 | /* Match STRING against the file name pattern PATTERN, returning zero if |
30 | it matches, nonzero if not. */ |
31 | static int FCT (const CHAR *pattern, const CHAR *string, |
32 | const CHAR *string_end, bool no_leading_period, int flags, |
33 | struct STRUCT *ends); |
34 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
35 | const CHAR *string_end, bool no_leading_period, int flags); |
36 | static const CHAR *END (const CHAR *patternp); |
37 | |
38 | static int |
39 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
40 | bool no_leading_period, int flags, struct STRUCT *ends) |
41 | { |
42 | const CHAR *p = pattern, *n = string; |
43 | UCHAR c; |
44 | #ifdef _LIBC |
45 | # if WIDE_CHAR_VERSION |
46 | const char *collseq = (const char *) |
47 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
48 | # else |
49 | const UCHAR *collseq = (const UCHAR *) |
50 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
51 | # endif |
52 | #endif |
53 | |
54 | while ((c = *p++) != L_('\0')) |
55 | { |
56 | bool new_no_leading_period = false; |
57 | c = FOLD (c); |
58 | |
59 | switch (c) |
60 | { |
61 | case L_('?'): |
62 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
63 | { |
64 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
65 | if (res != -1) |
66 | return res; |
67 | } |
68 | |
69 | if (n == string_end) |
70 | return FNM_NOMATCH; |
71 | else if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
72 | return FNM_NOMATCH; |
73 | else if (*n == L_('.') && no_leading_period) |
74 | return FNM_NOMATCH; |
75 | break; |
76 | |
77 | case L_('\\'): |
78 | if (!(flags & FNM_NOESCAPE)) |
79 | { |
80 | c = *p++; |
81 | if (c == L_('\0')) |
82 | /* Trailing \ loses. */ |
83 | return FNM_NOMATCH; |
84 | c = FOLD (c); |
85 | } |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
87 | return FNM_NOMATCH; |
88 | break; |
89 | |
90 | case L_('*'): |
91 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
92 | { |
93 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
94 | if (res != -1) |
95 | return res; |
96 | } |
97 | else if (ends != NULL) |
98 | { |
99 | ends->pattern = p - 1; |
100 | ends->string = n; |
101 | ends->no_leading_period = no_leading_period; |
102 | return 0; |
103 | } |
104 | |
105 | if (n != string_end && *n == L_('.') && no_leading_period) |
106 | return FNM_NOMATCH; |
107 | |
108 | for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) |
109 | { |
110 | if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) |
111 | { |
112 | const CHAR *endp = END (p); |
113 | if (endp != p) |
114 | { |
115 | /* This is a pattern. Skip over it. */ |
116 | p = endp; |
117 | continue; |
118 | } |
119 | } |
120 | |
121 | if (c == L_('?')) |
122 | { |
123 | /* A ? needs to match one character. */ |
124 | if (n == string_end) |
125 | /* There isn't another character; no match. */ |
126 | return FNM_NOMATCH; |
127 | else if (*n == L_('/') |
128 | && __glibc_unlikely (flags & FNM_FILE_NAME)) |
129 | /* A slash does not match a wildcard under |
130 | FNM_FILE_NAME. */ |
131 | return FNM_NOMATCH; |
132 | else |
133 | /* One character of the string is consumed in matching |
134 | this ? wildcard, so *??? won't match if there are |
135 | less than three characters. */ |
136 | ++n; |
137 | } |
138 | } |
139 | |
140 | if (c == L_('\0')) |
141 | /* The wildcard(s) is/are the last element of the pattern. |
142 | If the name is a file name and contains another slash |
143 | this means it cannot match, unless the FNM_LEADING_DIR |
144 | flag is set. */ |
145 | { |
146 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
147 | |
148 | if (flags & FNM_FILE_NAME) |
149 | { |
150 | if (flags & FNM_LEADING_DIR) |
151 | result = 0; |
152 | else |
153 | { |
154 | if (MEMCHR (n, L_('/'), string_end - n) == NULL) |
155 | result = 0; |
156 | } |
157 | } |
158 | |
159 | return result; |
160 | } |
161 | else |
162 | { |
163 | const CHAR *endp; |
164 | struct STRUCT end; |
165 | |
166 | end.pattern = NULL; |
167 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), |
168 | string_end - n); |
169 | if (endp == NULL) |
170 | endp = string_end; |
171 | |
172 | if (c == L_('[') |
173 | || (__glibc_unlikely (flags & FNM_EXTMATCH) |
174 | && (c == L_('@') || c == L_('+') || c == L_('!')) |
175 | && *p == L_('('))) |
176 | { |
177 | int flags2 = ((flags & FNM_FILE_NAME) |
178 | ? flags : (flags & ~FNM_PERIOD)); |
179 | |
180 | for (--p; n < endp; ++n, no_leading_period = false) |
181 | if (FCT (p, n, string_end, no_leading_period, flags2, |
182 | &end) == 0) |
183 | goto found; |
184 | } |
185 | else if (c == L_('/') && (flags & FNM_FILE_NAME)) |
186 | { |
187 | while (n < string_end && *n != L_('/')) |
188 | ++n; |
189 | if (n < string_end && *n == L_('/') |
190 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
191 | NULL) == 0)) |
192 | return 0; |
193 | } |
194 | else |
195 | { |
196 | int flags2 = ((flags & FNM_FILE_NAME) |
197 | ? flags : (flags & ~FNM_PERIOD)); |
198 | |
199 | if (c == L_('\\') && !(flags & FNM_NOESCAPE)) |
200 | c = *p; |
201 | c = FOLD (c); |
202 | for (--p; n < endp; ++n, no_leading_period = false) |
203 | if (FOLD ((UCHAR) *n) == c |
204 | && (FCT (p, n, string_end, no_leading_period, flags2, |
205 | &end) == 0)) |
206 | { |
207 | found: |
208 | if (end.pattern == NULL) |
209 | return 0; |
210 | break; |
211 | } |
212 | if (end.pattern != NULL) |
213 | { |
214 | p = end.pattern; |
215 | n = end.string; |
216 | no_leading_period = end.no_leading_period; |
217 | continue; |
218 | } |
219 | } |
220 | } |
221 | |
222 | /* If we come here no match is possible with the wildcard. */ |
223 | return FNM_NOMATCH; |
224 | |
225 | case L_('['): |
226 | { |
227 | /* Nonzero if the sense of the character class is inverted. */ |
228 | const CHAR *p_init = p; |
229 | const CHAR *n_init = n; |
230 | bool not; |
231 | CHAR cold; |
232 | UCHAR fn; |
233 | |
234 | if (posixly_correct == 0) |
235 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
236 | |
237 | if (n == string_end) |
238 | return FNM_NOMATCH; |
239 | |
240 | if (*n == L_('.') && no_leading_period) |
241 | return FNM_NOMATCH; |
242 | |
243 | if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
244 | /* '/' cannot be matched. */ |
245 | return FNM_NOMATCH; |
246 | |
247 | not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); |
248 | if (not) |
249 | ++p; |
250 | |
251 | fn = FOLD ((UCHAR) *n); |
252 | |
253 | c = *p++; |
254 | for (;;) |
255 | { |
256 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
257 | { |
258 | if (*p == L_('\0')) |
259 | return FNM_NOMATCH; |
260 | c = FOLD ((UCHAR) *p); |
261 | ++p; |
262 | |
263 | goto normal_bracket; |
264 | } |
265 | else if (c == L_('[') && *p == L_(':')) |
266 | { |
267 | /* Leave room for the null. */ |
268 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
269 | size_t c1 = 0; |
270 | wctype_t wt; |
271 | const CHAR *startp = p; |
272 | |
273 | for (;;) |
274 | { |
275 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
276 | /* The name is too long and therefore the pattern |
277 | is ill-formed. */ |
278 | return FNM_NOMATCH; |
279 | |
280 | c = *++p; |
281 | if (c == L_(':') && p[1] == L_(']')) |
282 | { |
283 | p += 2; |
284 | break; |
285 | } |
286 | if (c < L_('a') || c >= L_('z')) |
287 | { |
288 | /* This cannot possibly be a character class name. |
289 | Match it as a normal range. */ |
290 | p = startp; |
291 | c = L_('['); |
292 | goto normal_bracket; |
293 | } |
294 | str[c1++] = c; |
295 | } |
296 | str[c1] = L_('\0'); |
297 | |
298 | wt = IS_CHAR_CLASS (str); |
299 | if (wt == 0) |
300 | /* Invalid character class name. */ |
301 | return FNM_NOMATCH; |
302 | |
303 | #if defined _LIBC && ! WIDE_CHAR_VERSION |
304 | /* The following code is glibc specific but does |
305 | there a good job in speeding up the code since |
306 | we can avoid the btowc() call. */ |
307 | if (_ISCTYPE ((UCHAR) *n, wt)) |
308 | goto matched; |
309 | #else |
310 | if (iswctype (BTOWC ((UCHAR) *n), wt)) |
311 | goto matched; |
312 | #endif |
313 | c = *p++; |
314 | } |
315 | #ifdef _LIBC |
316 | else if (c == L_('[') && *p == L_('=')) |
317 | { |
318 | /* It's important that STR be a scalar variable rather |
319 | than a one-element array, because GCC (at least 4.9.2 |
320 | -O2 on x86-64) can be confused by the array and |
321 | diagnose a "used initialized" in a dead branch in the |
322 | findidx function. */ |
323 | UCHAR str; |
324 | uint32_t nrules = |
325 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
326 | const CHAR *startp = p; |
327 | |
328 | c = *++p; |
329 | if (c == L_('\0')) |
330 | { |
331 | p = startp; |
332 | c = L_('['); |
333 | goto normal_bracket; |
334 | } |
335 | str = c; |
336 | |
337 | c = *++p; |
338 | if (c != L_('=') || p[1] != L_(']')) |
339 | { |
340 | p = startp; |
341 | c = L_('['); |
342 | goto normal_bracket; |
343 | } |
344 | p += 2; |
345 | |
346 | if (nrules == 0) |
347 | { |
348 | if ((UCHAR) *n == str) |
349 | goto matched; |
350 | } |
351 | else |
352 | { |
353 | const int32_t *table; |
354 | # if WIDE_CHAR_VERSION |
355 | const int32_t *weights; |
356 | const wint_t *extra; |
357 | # else |
358 | const unsigned char *weights; |
359 | const unsigned char *; |
360 | # endif |
361 | const int32_t *indirect; |
362 | int32_t idx; |
363 | const UCHAR *cp = (const UCHAR *) &str; |
364 | |
365 | # if WIDE_CHAR_VERSION |
366 | table = (const int32_t *) |
367 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
368 | weights = (const int32_t *) |
369 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
370 | extra = (const wint_t *) |
371 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
372 | indirect = (const int32_t *) |
373 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
374 | # else |
375 | table = (const int32_t *) |
376 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
377 | weights = (const unsigned char *) |
378 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
379 | extra = (const unsigned char *) |
380 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
381 | indirect = (const int32_t *) |
382 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
383 | # endif |
384 | |
385 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
386 | if (idx != 0) |
387 | { |
388 | /* We found a table entry. Now see whether the |
389 | character we are currently at has the same |
390 | equivalence class value. */ |
391 | int len = weights[idx & 0xffffff]; |
392 | int32_t idx2; |
393 | const UCHAR *np = (const UCHAR *) n; |
394 | |
395 | idx2 = FINDIDX (table, indirect, extra, |
396 | &np, string_end - n); |
397 | if (idx2 != 0 |
398 | && (idx >> 24) == (idx2 >> 24) |
399 | && len == weights[idx2 & 0xffffff]) |
400 | { |
401 | int cnt = 0; |
402 | |
403 | idx &= 0xffffff; |
404 | idx2 &= 0xffffff; |
405 | |
406 | while (cnt < len |
407 | && (weights[idx + 1 + cnt] |
408 | == weights[idx2 + 1 + cnt])) |
409 | ++cnt; |
410 | |
411 | if (cnt == len) |
412 | goto matched; |
413 | } |
414 | } |
415 | } |
416 | |
417 | c = *p++; |
418 | } |
419 | #endif |
420 | else if (c == L_('\0')) |
421 | { |
422 | /* [ unterminated, treat as normal character. */ |
423 | p = p_init; |
424 | n = n_init; |
425 | c = L_('['); |
426 | goto normal_match; |
427 | } |
428 | else |
429 | { |
430 | bool is_range = false; |
431 | |
432 | #ifdef _LIBC |
433 | bool is_seqval = false; |
434 | |
435 | if (c == L_('[') && *p == L_('.')) |
436 | { |
437 | uint32_t nrules = |
438 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
439 | const CHAR *startp = p; |
440 | size_t c1 = 0; |
441 | |
442 | while (1) |
443 | { |
444 | c = *++p; |
445 | if (c == L_('.') && p[1] == L_(']')) |
446 | { |
447 | p += 2; |
448 | break; |
449 | } |
450 | if (c == '\0') |
451 | return FNM_NOMATCH; |
452 | ++c1; |
453 | } |
454 | |
455 | /* We have to handling the symbols differently in |
456 | ranges since then the collation sequence is |
457 | important. */ |
458 | is_range = *p == L_('-') && p[1] != L_('\0'); |
459 | |
460 | if (nrules == 0) |
461 | { |
462 | /* There are no names defined in the collation |
463 | data. Therefore we only accept the trivial |
464 | names consisting of the character itself. */ |
465 | if (c1 != 1) |
466 | return FNM_NOMATCH; |
467 | |
468 | if (!is_range && *n == startp[1]) |
469 | goto matched; |
470 | |
471 | cold = startp[1]; |
472 | c = *p++; |
473 | } |
474 | else |
475 | { |
476 | int32_t table_size; |
477 | const int32_t *symb_table; |
478 | const unsigned char *; |
479 | int32_t idx; |
480 | int32_t elem; |
481 | # if WIDE_CHAR_VERSION |
482 | CHAR *wextra; |
483 | # endif |
484 | |
485 | table_size = |
486 | _NL_CURRENT_WORD (LC_COLLATE, |
487 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
488 | symb_table = (const int32_t *) |
489 | _NL_CURRENT (LC_COLLATE, |
490 | _NL_COLLATE_SYMB_TABLEMB); |
491 | extra = (const unsigned char *) |
492 | _NL_CURRENT (LC_COLLATE, |
493 | _NL_COLLATE_SYMB_EXTRAMB); |
494 | |
495 | for (elem = 0; elem < table_size; elem++) |
496 | if (symb_table[2 * elem] != 0) |
497 | { |
498 | idx = symb_table[2 * elem + 1]; |
499 | /* Skip the name of collating element. */ |
500 | idx += 1 + extra[idx]; |
501 | # if WIDE_CHAR_VERSION |
502 | /* Skip the byte sequence of the |
503 | collating element. */ |
504 | idx += 1 + extra[idx]; |
505 | /* Adjust for the alignment. */ |
506 | idx = (idx + 3) & ~3; |
507 | |
508 | wextra = (CHAR *) &extra[idx + 4]; |
509 | |
510 | if (/* Compare the length of the sequence. */ |
511 | c1 == wextra[0] |
512 | /* Compare the wide char sequence. */ |
513 | && (__wmemcmp (startp + 1, &wextra[1], |
514 | c1) |
515 | == 0)) |
516 | /* Yep, this is the entry. */ |
517 | break; |
518 | # else |
519 | if (/* Compare the length of the sequence. */ |
520 | c1 == extra[idx] |
521 | /* Compare the byte sequence. */ |
522 | && memcmp (startp + 1, |
523 | &extra[idx + 1], c1) == 0) |
524 | /* Yep, this is the entry. */ |
525 | break; |
526 | # endif |
527 | } |
528 | |
529 | if (elem < table_size) |
530 | { |
531 | /* Compare the byte sequence but only if |
532 | this is not part of a range. */ |
533 | if (! is_range |
534 | |
535 | # if WIDE_CHAR_VERSION |
536 | && __wmemcmp (n, &wextra[1], c1) == 0 |
537 | # else |
538 | && memcmp (n, &extra[idx + 1], c1) == 0 |
539 | # endif |
540 | ) |
541 | { |
542 | n += c1 - 1; |
543 | goto matched; |
544 | } |
545 | |
546 | /* Get the collation sequence value. */ |
547 | is_seqval = true; |
548 | # if WIDE_CHAR_VERSION |
549 | cold = wextra[1 + wextra[0]]; |
550 | # else |
551 | idx += 1 + extra[idx]; |
552 | /* Adjust for the alignment. */ |
553 | idx = (idx + 3) & ~3; |
554 | cold = *((int32_t *) &extra[idx]); |
555 | # endif |
556 | |
557 | c = *p++; |
558 | } |
559 | else if (c1 == 1) |
560 | { |
561 | /* No valid character. Match it as a |
562 | single byte. */ |
563 | if (!is_range && *n == startp[1]) |
564 | goto matched; |
565 | |
566 | cold = startp[1]; |
567 | c = *p++; |
568 | } |
569 | else |
570 | return FNM_NOMATCH; |
571 | } |
572 | } |
573 | else |
574 | #endif |
575 | { |
576 | c = FOLD (c); |
577 | normal_bracket: |
578 | |
579 | /* We have to handling the symbols differently in |
580 | ranges since then the collation sequence is |
581 | important. */ |
582 | is_range = (*p == L_('-') && p[1] != L_('\0') |
583 | && p[1] != L_(']')); |
584 | |
585 | if (!is_range && c == fn) |
586 | goto matched; |
587 | |
588 | #if _LIBC |
589 | /* This is needed if we goto normal_bracket; from |
590 | outside of is_seqval's scope. */ |
591 | is_seqval = false; |
592 | #endif |
593 | cold = c; |
594 | c = *p++; |
595 | } |
596 | |
597 | if (c == L_('-') && *p != L_(']')) |
598 | { |
599 | #if _LIBC |
600 | /* We have to find the collation sequence |
601 | value for C. Collation sequence is nothing |
602 | we can regularly access. The sequence |
603 | value is defined by the order in which the |
604 | definitions of the collation values for the |
605 | various characters appear in the source |
606 | file. A strange concept, nowhere |
607 | documented. */ |
608 | uint32_t fcollseq; |
609 | uint32_t lcollseq; |
610 | UCHAR cend = *p++; |
611 | |
612 | # if WIDE_CHAR_VERSION |
613 | /* Search in the 'names' array for the characters. */ |
614 | fcollseq = __collseq_table_lookup (collseq, fn); |
615 | if (fcollseq == ~((uint32_t) 0)) |
616 | /* XXX We don't know anything about the character |
617 | we are supposed to match. This means we are |
618 | failing. */ |
619 | goto range_not_matched; |
620 | |
621 | if (is_seqval) |
622 | lcollseq = cold; |
623 | else |
624 | lcollseq = __collseq_table_lookup (collseq, cold); |
625 | # else |
626 | fcollseq = collseq[fn]; |
627 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
628 | # endif |
629 | |
630 | is_seqval = false; |
631 | if (cend == L_('[') && *p == L_('.')) |
632 | { |
633 | uint32_t nrules = |
634 | _NL_CURRENT_WORD (LC_COLLATE, |
635 | _NL_COLLATE_NRULES); |
636 | const CHAR *startp = p; |
637 | size_t c1 = 0; |
638 | |
639 | while (1) |
640 | { |
641 | c = *++p; |
642 | if (c == L_('.') && p[1] == L_(']')) |
643 | { |
644 | p += 2; |
645 | break; |
646 | } |
647 | if (c == '\0') |
648 | return FNM_NOMATCH; |
649 | ++c1; |
650 | } |
651 | |
652 | if (nrules == 0) |
653 | { |
654 | /* There are no names defined in the |
655 | collation data. Therefore we only |
656 | accept the trivial names consisting |
657 | of the character itself. */ |
658 | if (c1 != 1) |
659 | return FNM_NOMATCH; |
660 | |
661 | cend = startp[1]; |
662 | } |
663 | else |
664 | { |
665 | int32_t table_size; |
666 | const int32_t *symb_table; |
667 | const unsigned char *; |
668 | int32_t idx; |
669 | int32_t elem; |
670 | # if WIDE_CHAR_VERSION |
671 | CHAR *wextra; |
672 | # endif |
673 | |
674 | table_size = |
675 | _NL_CURRENT_WORD (LC_COLLATE, |
676 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
677 | symb_table = (const int32_t *) |
678 | _NL_CURRENT (LC_COLLATE, |
679 | _NL_COLLATE_SYMB_TABLEMB); |
680 | extra = (const unsigned char *) |
681 | _NL_CURRENT (LC_COLLATE, |
682 | _NL_COLLATE_SYMB_EXTRAMB); |
683 | |
684 | for (elem = 0; elem < table_size; elem++) |
685 | if (symb_table[2 * elem] != 0) |
686 | { |
687 | idx = symb_table[2 * elem + 1]; |
688 | /* Skip the name of collating |
689 | element. */ |
690 | idx += 1 + extra[idx]; |
691 | # if WIDE_CHAR_VERSION |
692 | /* Skip the byte sequence of the |
693 | collating element. */ |
694 | idx += 1 + extra[idx]; |
695 | /* Adjust for the alignment. */ |
696 | idx = (idx + 3) & ~3; |
697 | |
698 | wextra = (CHAR *) &extra[idx + 4]; |
699 | |
700 | if (/* Compare the length of the |
701 | sequence. */ |
702 | c1 == wextra[0] |
703 | /* Compare the wide char sequence. */ |
704 | && (__wmemcmp (startp + 1, |
705 | &wextra[1], c1) |
706 | == 0)) |
707 | /* Yep, this is the entry. */ |
708 | break; |
709 | # else |
710 | if (/* Compare the length of the |
711 | sequence. */ |
712 | c1 == extra[idx] |
713 | /* Compare the byte sequence. */ |
714 | && memcmp (startp + 1, |
715 | &extra[idx + 1], c1) == 0) |
716 | /* Yep, this is the entry. */ |
717 | break; |
718 | # endif |
719 | } |
720 | |
721 | if (elem < table_size) |
722 | { |
723 | /* Get the collation sequence value. */ |
724 | is_seqval = true; |
725 | # if WIDE_CHAR_VERSION |
726 | cend = wextra[1 + wextra[0]]; |
727 | # else |
728 | idx += 1 + extra[idx]; |
729 | /* Adjust for the alignment. */ |
730 | idx = (idx + 3) & ~3; |
731 | cend = *((int32_t *) &extra[idx]); |
732 | # endif |
733 | } |
734 | else if (c1 == 1) |
735 | { |
736 | cend = startp[1]; |
737 | c = *p++; |
738 | } |
739 | else |
740 | return FNM_NOMATCH; |
741 | } |
742 | } |
743 | else |
744 | { |
745 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
746 | cend = *p++; |
747 | if (cend == L_('\0')) |
748 | return FNM_NOMATCH; |
749 | cend = FOLD (cend); |
750 | } |
751 | |
752 | /* XXX It is not entirely clear to me how to handle |
753 | characters which are not mentioned in the |
754 | collation specification. */ |
755 | if ( |
756 | # if WIDE_CHAR_VERSION |
757 | lcollseq == 0xffffffff || |
758 | # endif |
759 | lcollseq <= fcollseq) |
760 | { |
761 | /* We have to look at the upper bound. */ |
762 | uint32_t hcollseq; |
763 | |
764 | if (is_seqval) |
765 | hcollseq = cend; |
766 | else |
767 | { |
768 | # if WIDE_CHAR_VERSION |
769 | hcollseq = |
770 | __collseq_table_lookup (collseq, cend); |
771 | if (hcollseq == ~((uint32_t) 0)) |
772 | { |
773 | /* Hum, no information about the upper |
774 | bound. The matching succeeds if the |
775 | lower bound is matched exactly. */ |
776 | if (lcollseq != fcollseq) |
777 | goto range_not_matched; |
778 | |
779 | goto matched; |
780 | } |
781 | # else |
782 | hcollseq = collseq[cend]; |
783 | # endif |
784 | } |
785 | |
786 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
787 | goto matched; |
788 | } |
789 | # if WIDE_CHAR_VERSION |
790 | range_not_matched: |
791 | # endif |
792 | #else |
793 | /* We use a boring value comparison of the character |
794 | values. This is better than comparing using |
795 | 'strcoll' since the latter would have surprising |
796 | and sometimes fatal consequences. */ |
797 | UCHAR cend = *p++; |
798 | |
799 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
800 | cend = *p++; |
801 | if (cend == L_('\0')) |
802 | return FNM_NOMATCH; |
803 | |
804 | /* It is a range. */ |
805 | if ((UCHAR) cold <= fn && fn <= cend) |
806 | goto matched; |
807 | #endif |
808 | |
809 | c = *p++; |
810 | } |
811 | } |
812 | |
813 | if (c == L_(']')) |
814 | break; |
815 | } |
816 | |
817 | if (!not) |
818 | return FNM_NOMATCH; |
819 | break; |
820 | |
821 | matched: |
822 | /* Skip the rest of the [...] that already matched. */ |
823 | while ((c = *p++) != L_(']')) |
824 | { |
825 | if (c == L_('\0')) |
826 | { |
827 | /* [ unterminated, treat as normal character. */ |
828 | p = p_init; |
829 | n = n_init; |
830 | c = L_('['); |
831 | goto normal_match; |
832 | } |
833 | |
834 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
835 | { |
836 | if (*p == L_('\0')) |
837 | return FNM_NOMATCH; |
838 | /* XXX 1003.2d11 is unclear if this is right. */ |
839 | ++p; |
840 | } |
841 | else if (c == L_('[') && *p == L_(':')) |
842 | { |
843 | int c1 = 0; |
844 | const CHAR *startp = p; |
845 | |
846 | while (1) |
847 | { |
848 | c = *++p; |
849 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
850 | return FNM_NOMATCH; |
851 | |
852 | if (*p == L_(':') && p[1] == L_(']')) |
853 | break; |
854 | |
855 | if (c < L_('a') || c >= L_('z')) |
856 | { |
857 | p = startp - 2; |
858 | break; |
859 | } |
860 | } |
861 | p += 2; |
862 | } |
863 | else if (c == L_('[') && *p == L_('=')) |
864 | { |
865 | c = *++p; |
866 | if (c == L_('\0')) |
867 | return FNM_NOMATCH; |
868 | c = *++p; |
869 | if (c != L_('=') || p[1] != L_(']')) |
870 | return FNM_NOMATCH; |
871 | p += 2; |
872 | } |
873 | else if (c == L_('[') && *p == L_('.')) |
874 | { |
875 | while (1) |
876 | { |
877 | c = *++p; |
878 | if (c == L_('\0')) |
879 | return FNM_NOMATCH; |
880 | |
881 | if (c == L_('.') && p[1] == L_(']')) |
882 | break; |
883 | } |
884 | p += 2; |
885 | } |
886 | } |
887 | if (not) |
888 | return FNM_NOMATCH; |
889 | } |
890 | break; |
891 | |
892 | case L_('+'): |
893 | case L_('@'): |
894 | case L_('!'): |
895 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
896 | { |
897 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
898 | if (res != -1) |
899 | return res; |
900 | } |
901 | goto normal_match; |
902 | |
903 | case L_('/'): |
904 | if (NO_LEADING_PERIOD (flags)) |
905 | { |
906 | if (n == string_end || c != (UCHAR) *n) |
907 | return FNM_NOMATCH; |
908 | |
909 | new_no_leading_period = true; |
910 | break; |
911 | } |
912 | FALLTHROUGH; |
913 | default: |
914 | normal_match: |
915 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
916 | return FNM_NOMATCH; |
917 | } |
918 | |
919 | no_leading_period = new_no_leading_period; |
920 | ++n; |
921 | } |
922 | |
923 | if (n == string_end) |
924 | return 0; |
925 | |
926 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) |
927 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
928 | return 0; |
929 | |
930 | return FNM_NOMATCH; |
931 | } |
932 | |
933 | |
934 | static const CHAR * |
935 | END (const CHAR *pattern) |
936 | { |
937 | const CHAR *p = pattern; |
938 | |
939 | while (1) |
940 | if (*++p == L_('\0')) |
941 | /* This is an invalid pattern. */ |
942 | return pattern; |
943 | else if (*p == L_('[')) |
944 | { |
945 | /* Handle brackets special. */ |
946 | if (posixly_correct == 0) |
947 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
948 | |
949 | /* Skip the not sign. We have to recognize it because of a possibly |
950 | following ']'. */ |
951 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
952 | ++p; |
953 | /* A leading ']' is recognized as such. */ |
954 | if (*p == L_(']')) |
955 | ++p; |
956 | /* Skip over all characters of the list. */ |
957 | while (*p != L_(']')) |
958 | if (*p++ == L_('\0')) |
959 | /* This is no valid pattern. */ |
960 | return pattern; |
961 | } |
962 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
963 | || *p == L_('!')) && p[1] == L_('(')) |
964 | { |
965 | p = END (p + 1); |
966 | if (*p == L_('\0')) |
967 | /* This is an invalid pattern. */ |
968 | return pattern; |
969 | } |
970 | else if (*p == L_(')')) |
971 | break; |
972 | |
973 | return p + 1; |
974 | } |
975 | |
976 | #if WIDE_CHAR_VERSION |
977 | # define PATTERN_PREFIX pattern_list |
978 | #else |
979 | # define PATTERN_PREFIX wpattern_list |
980 | #endif |
981 | |
982 | #define PASTE(a,b) PASTE1(a,b) |
983 | #define PASTE1(a,b) a##b |
984 | |
985 | #define DYNARRAY_STRUCT PATTERN_PREFIX |
986 | #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr) |
987 | #define DYNARRAY_ELEMENT CHAR * |
988 | #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_) |
989 | #define DYNARRAY_INITIAL_SIZE 8 |
990 | #include <malloc/dynarray-skeleton.c> |
991 | |
992 | static int |
993 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
994 | bool no_leading_period, int flags) |
995 | { |
996 | const CHAR *startp; |
997 | ptrdiff_t level; |
998 | struct PATTERN_PREFIX list; |
999 | size_t pattern_len = STRLEN (pattern); |
1000 | size_t pattern_i = 0; |
1001 | const CHAR *p; |
1002 | const CHAR *rs; |
1003 | int retval = 0; |
1004 | |
1005 | PASTE (PATTERN_PREFIX, _init) (&list); |
1006 | |
1007 | /* Parse the pattern. Store the individual parts in the list. */ |
1008 | level = 0; |
1009 | for (startp = p = pattern + 1; level >= 0; ++p) |
1010 | if (*p == L_('\0')) |
1011 | { |
1012 | /* This is an invalid pattern. */ |
1013 | retval = -1; |
1014 | goto out; |
1015 | } |
1016 | else if (*p == L_('[')) |
1017 | { |
1018 | /* Handle brackets special. */ |
1019 | if (posixly_correct == 0) |
1020 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1021 | |
1022 | /* Skip the not sign. We have to recognize it because of a possibly |
1023 | following ']'. */ |
1024 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
1025 | ++p; |
1026 | /* A leading ']' is recognized as such. */ |
1027 | if (*p == L_(']')) |
1028 | ++p; |
1029 | /* Skip over all characters of the list. */ |
1030 | while (*p != L_(']')) |
1031 | if (*p++ == L_('\0')) |
1032 | { |
1033 | /* This is no valid pattern. */ |
1034 | retval = -1; |
1035 | goto out; |
1036 | } |
1037 | } |
1038 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
1039 | || *p == L_('!')) && p[1] == L_('(')) |
1040 | /* Remember the nesting level. */ |
1041 | ++level; |
1042 | else if (*p == L_(')') || *p == L_('|')) |
1043 | { |
1044 | if (level == 0) |
1045 | { |
1046 | size_t slen = opt == L_('?') || opt == L_('@') |
1047 | ? pattern_len : p - startp + 1; |
1048 | CHAR *newp = malloc (slen * sizeof (CHAR)); |
1049 | if (newp != NULL) |
1050 | { |
1051 | *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0'); |
1052 | PASTE (PATTERN_PREFIX,_add) (&list, newp); |
1053 | } |
1054 | if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list)) |
1055 | { |
1056 | retval = -2; |
1057 | goto out; |
1058 | } |
1059 | |
1060 | if (*p == L_('|')) |
1061 | startp = p + 1; |
1062 | } |
1063 | if (*p == L_(')')) |
1064 | level--; |
1065 | } |
1066 | assert (p[-1] == L_(')')); |
1067 | |
1068 | switch (opt) |
1069 | { |
1070 | case L_('*'): |
1071 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) |
1072 | goto success; |
1073 | FALLTHROUGH; |
1074 | case L_('+'): |
1075 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++) |
1076 | { |
1077 | for (rs = string; rs <= string_end; ++rs) |
1078 | /* First match the prefix with the current pattern with the |
1079 | current pattern. */ |
1080 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string, |
1081 | rs, no_leading_period, |
1082 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1083 | NULL) == 0 |
1084 | /* This was successful. Now match the rest with the rest |
1085 | of the pattern. */ |
1086 | && (FCT (p, rs, string_end, |
1087 | rs == string |
1088 | ? no_leading_period |
1089 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1090 | flags & FNM_FILE_NAME |
1091 | ? flags : flags & ~FNM_PERIOD, NULL) == 0 |
1092 | /* This didn't work. Try the whole pattern. */ |
1093 | || (rs != string |
1094 | && FCT (pattern - 1, rs, string_end, |
1095 | rs == string |
1096 | ? no_leading_period |
1097 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1098 | flags & FNM_FILE_NAME |
1099 | ? flags : flags & ~FNM_PERIOD, NULL) == 0))) |
1100 | /* It worked. Signal success. */ |
1101 | goto success; |
1102 | } |
1103 | |
1104 | /* None of the patterns lead to a match. */ |
1105 | retval = FNM_NOMATCH; |
1106 | break; |
1107 | |
1108 | case L_('?'): |
1109 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) |
1110 | goto success; |
1111 | FALLTHROUGH; |
1112 | case L_('@'): |
1113 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++) |
1114 | { |
1115 | /* I cannot believe it but `strcat' is actually acceptable |
1116 | here. Match the entire string with the prefix from the |
1117 | pattern list and the rest of the pattern following the |
1118 | pattern list. */ |
1119 | if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p), |
1120 | string, string_end, no_leading_period, |
1121 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1122 | NULL) == 0) |
1123 | /* It worked. Signal success. */ |
1124 | goto success; |
1125 | } |
1126 | |
1127 | /* None of the patterns lead to a match. */ |
1128 | retval = FNM_NOMATCH; |
1129 | break; |
1130 | |
1131 | case L_('!'): |
1132 | for (rs = string; rs <= string_end; ++rs) |
1133 | { |
1134 | size_t runp_i; |
1135 | |
1136 | for (runp_i = pattern_i; |
1137 | runp_i != PASTE (PATTERN_PREFIX, _size) (&list); |
1138 | runp_i++) |
1139 | { |
1140 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs, |
1141 | no_leading_period, |
1142 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1143 | NULL) == 0) |
1144 | break; |
1145 | } |
1146 | |
1147 | /* If none of the patterns matched see whether the rest does. */ |
1148 | if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list) |
1149 | && (FCT (p, rs, string_end, |
1150 | rs == string |
1151 | ? no_leading_period |
1152 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1153 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1154 | NULL) == 0)) |
1155 | /* This is successful. */ |
1156 | goto success; |
1157 | } |
1158 | |
1159 | /* None of the patterns together with the rest of the pattern |
1160 | lead to a match. */ |
1161 | retval = FNM_NOMATCH; |
1162 | break; |
1163 | |
1164 | default: |
1165 | assert (! "Invalid extended matching operator" ); |
1166 | retval = -1; |
1167 | break; |
1168 | } |
1169 | |
1170 | success: |
1171 | out: |
1172 | PASTE (PATTERN_PREFIX, _free) (&list); |
1173 | |
1174 | return retval; |
1175 | } |
1176 | |
1177 | #undef PATTERN_PREFIX |
1178 | #undef PASTE |
1179 | #undef PASTE1 |
1180 | |
1181 | #undef FOLD |
1182 | #undef CHAR |
1183 | #undef UCHAR |
1184 | #undef INT |
1185 | #undef FCT |
1186 | #undef EXT |
1187 | #undef END |
1188 | #undef STRUCT |
1189 | #undef MEMPCPY |
1190 | #undef MEMCHR |
1191 | #undef STRLEN |
1192 | #undef STRCAT |
1193 | #undef L_ |
1194 | #undef BTOWC |
1195 | #undef WIDE_CHAR_VERSION |
1196 | #undef FINDIDX |
1197 | |