1 | /* Copyright (C) 1991-2023 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef _LIBC |
19 | # include <stdint.h> |
20 | #endif |
21 | |
22 | struct STRUCT |
23 | { |
24 | const CHAR *pattern; |
25 | const CHAR *string; |
26 | bool no_leading_period; |
27 | }; |
28 | |
29 | /* Match STRING against the file name pattern PATTERN, returning zero if |
30 | it matches, nonzero if not. */ |
31 | static int FCT (const CHAR *pattern, const CHAR *string, |
32 | const CHAR *string_end, bool no_leading_period, int flags, |
33 | struct STRUCT *ends); |
34 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
35 | const CHAR *string_end, bool no_leading_period, int flags); |
36 | static const CHAR *END (const CHAR *patternp); |
37 | |
38 | static int |
39 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
40 | bool no_leading_period, int flags, struct STRUCT *ends) |
41 | { |
42 | const CHAR *p = pattern, *n = string; |
43 | UCHAR c; |
44 | #ifdef _LIBC |
45 | # if WIDE_CHAR_VERSION |
46 | const char *collseq = (const char *) |
47 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
48 | # else |
49 | const UCHAR *collseq = (const UCHAR *) |
50 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
51 | # endif |
52 | #endif |
53 | |
54 | while ((c = *p++) != L_('\0')) |
55 | { |
56 | bool new_no_leading_period = false; |
57 | c = FOLD (c); |
58 | |
59 | switch (c) |
60 | { |
61 | case L_('?'): |
62 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
63 | { |
64 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
65 | if (res != -1) |
66 | return res; |
67 | } |
68 | |
69 | if (n == string_end) |
70 | return FNM_NOMATCH; |
71 | else if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
72 | return FNM_NOMATCH; |
73 | else if (*n == L_('.') && no_leading_period) |
74 | return FNM_NOMATCH; |
75 | break; |
76 | |
77 | case L_('\\'): |
78 | if (!(flags & FNM_NOESCAPE)) |
79 | { |
80 | c = *p++; |
81 | if (c == L_('\0')) |
82 | /* Trailing \ loses. */ |
83 | return FNM_NOMATCH; |
84 | c = FOLD (c); |
85 | } |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
87 | return FNM_NOMATCH; |
88 | break; |
89 | |
90 | case L_('*'): |
91 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
92 | { |
93 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
94 | if (res != -1) |
95 | return res; |
96 | } |
97 | else if (ends != NULL) |
98 | { |
99 | ends->pattern = p - 1; |
100 | ends->string = n; |
101 | ends->no_leading_period = no_leading_period; |
102 | return 0; |
103 | } |
104 | |
105 | if (n != string_end && *n == L_('.') && no_leading_period) |
106 | return FNM_NOMATCH; |
107 | |
108 | for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) |
109 | { |
110 | if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) |
111 | { |
112 | const CHAR *endp = END (p); |
113 | if (endp != p) |
114 | { |
115 | /* This is a pattern. Skip over it. */ |
116 | p = endp; |
117 | continue; |
118 | } |
119 | } |
120 | |
121 | if (c == L_('?')) |
122 | { |
123 | /* A ? needs to match one character. */ |
124 | if (n == string_end) |
125 | /* There isn't another character; no match. */ |
126 | return FNM_NOMATCH; |
127 | else if (*n == L_('/') |
128 | && __glibc_unlikely (flags & FNM_FILE_NAME)) |
129 | /* A slash does not match a wildcard under |
130 | FNM_FILE_NAME. */ |
131 | return FNM_NOMATCH; |
132 | else |
133 | /* One character of the string is consumed in matching |
134 | this ? wildcard, so *??? won't match if there are |
135 | less than three characters. */ |
136 | ++n; |
137 | } |
138 | } |
139 | |
140 | if (c == L_('\0')) |
141 | /* The wildcard(s) is/are the last element of the pattern. |
142 | If the name is a file name and contains another slash |
143 | this means it cannot match, unless the FNM_LEADING_DIR |
144 | flag is set. */ |
145 | { |
146 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
147 | |
148 | if (flags & FNM_FILE_NAME) |
149 | { |
150 | if (flags & FNM_LEADING_DIR) |
151 | result = 0; |
152 | else |
153 | { |
154 | if (MEMCHR (n, L_('/'), string_end - n) == NULL) |
155 | result = 0; |
156 | } |
157 | } |
158 | |
159 | return result; |
160 | } |
161 | else |
162 | { |
163 | const CHAR *endp; |
164 | struct STRUCT end; |
165 | |
166 | end.pattern = NULL; |
167 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), |
168 | string_end - n); |
169 | if (endp == NULL) |
170 | endp = string_end; |
171 | |
172 | if (c == L_('[') |
173 | || (__glibc_unlikely (flags & FNM_EXTMATCH) |
174 | && (c == L_('@') || c == L_('+') || c == L_('!')) |
175 | && *p == L_('('))) |
176 | { |
177 | int flags2 = ((flags & FNM_FILE_NAME) |
178 | ? flags : (flags & ~FNM_PERIOD)); |
179 | |
180 | for (--p; n < endp; ++n, no_leading_period = false) |
181 | if (FCT (p, n, string_end, no_leading_period, flags2, |
182 | &end) == 0) |
183 | goto found; |
184 | } |
185 | else if (c == L_('/') && (flags & FNM_FILE_NAME)) |
186 | { |
187 | while (n < string_end && *n != L_('/')) |
188 | ++n; |
189 | if (n < string_end && *n == L_('/') |
190 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
191 | NULL) == 0)) |
192 | return 0; |
193 | } |
194 | else |
195 | { |
196 | int flags2 = ((flags & FNM_FILE_NAME) |
197 | ? flags : (flags & ~FNM_PERIOD)); |
198 | |
199 | if (c == L_('\\') && !(flags & FNM_NOESCAPE)) |
200 | c = *p; |
201 | c = FOLD (c); |
202 | for (--p; n < endp; ++n, no_leading_period = false) |
203 | if (FOLD ((UCHAR) *n) == c |
204 | && (FCT (p, n, string_end, no_leading_period, flags2, |
205 | &end) == 0)) |
206 | { |
207 | found: |
208 | if (end.pattern == NULL) |
209 | return 0; |
210 | break; |
211 | } |
212 | if (end.pattern != NULL) |
213 | { |
214 | p = end.pattern; |
215 | n = end.string; |
216 | no_leading_period = end.no_leading_period; |
217 | continue; |
218 | } |
219 | } |
220 | } |
221 | |
222 | /* If we come here no match is possible with the wildcard. */ |
223 | return FNM_NOMATCH; |
224 | |
225 | case L_('['): |
226 | { |
227 | /* Nonzero if the sense of the character class is inverted. */ |
228 | const CHAR *p_init = p; |
229 | const CHAR *n_init = n; |
230 | bool not; |
231 | CHAR cold; |
232 | UCHAR fn; |
233 | |
234 | if (posixly_correct == 0) |
235 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
236 | |
237 | if (n == string_end) |
238 | return FNM_NOMATCH; |
239 | |
240 | if (*n == L_('.') && no_leading_period) |
241 | return FNM_NOMATCH; |
242 | |
243 | if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
244 | /* '/' cannot be matched. */ |
245 | return FNM_NOMATCH; |
246 | |
247 | not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); |
248 | if (not) |
249 | ++p; |
250 | |
251 | fn = FOLD ((UCHAR) *n); |
252 | |
253 | c = *p++; |
254 | for (;;) |
255 | { |
256 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
257 | { |
258 | if (*p == L_('\0')) |
259 | return FNM_NOMATCH; |
260 | c = FOLD ((UCHAR) *p); |
261 | ++p; |
262 | |
263 | goto normal_bracket; |
264 | } |
265 | else if (c == L_('[') && *p == L_(':')) |
266 | { |
267 | /* Leave room for the null. */ |
268 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
269 | size_t c1 = 0; |
270 | wctype_t wt; |
271 | const CHAR *startp = p; |
272 | |
273 | for (;;) |
274 | { |
275 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
276 | /* The name is too long and therefore the pattern |
277 | is ill-formed. */ |
278 | return FNM_NOMATCH; |
279 | |
280 | c = *++p; |
281 | if (c == L_(':') && p[1] == L_(']')) |
282 | { |
283 | p += 2; |
284 | break; |
285 | } |
286 | if (c < L_('a') || c >= L_('z')) |
287 | { |
288 | /* This cannot possibly be a character class name. |
289 | Match it as a normal range. */ |
290 | p = startp; |
291 | c = L_('['); |
292 | goto normal_bracket; |
293 | } |
294 | str[c1++] = c; |
295 | } |
296 | str[c1] = L_('\0'); |
297 | |
298 | wt = IS_CHAR_CLASS (str); |
299 | if (wt == 0) |
300 | /* Invalid character class name. */ |
301 | return FNM_NOMATCH; |
302 | |
303 | #if defined _LIBC && ! WIDE_CHAR_VERSION |
304 | /* The following code is glibc specific but does |
305 | there a good job in speeding up the code since |
306 | we can avoid the btowc() call. */ |
307 | if (_ISCTYPE ((UCHAR) *n, wt)) |
308 | goto matched; |
309 | #else |
310 | if (iswctype (BTOWC ((UCHAR) *n), wt)) |
311 | goto matched; |
312 | #endif |
313 | c = *p++; |
314 | } |
315 | #ifdef _LIBC |
316 | else if (c == L_('[') && *p == L_('=')) |
317 | { |
318 | /* It's important that STR be a scalar variable rather |
319 | than a one-element array, because GCC (at least 4.9.2 |
320 | -O2 on x86-64) can be confused by the array and |
321 | diagnose a "used initialized" in a dead branch in the |
322 | findidx function. */ |
323 | UCHAR str; |
324 | uint32_t nrules = |
325 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
326 | const CHAR *startp = p; |
327 | |
328 | c = *++p; |
329 | if (c == L_('\0')) |
330 | { |
331 | p = startp; |
332 | c = L_('['); |
333 | goto normal_bracket; |
334 | } |
335 | str = c; |
336 | |
337 | c = *++p; |
338 | if (c != L_('=') || p[1] != L_(']')) |
339 | { |
340 | p = startp; |
341 | c = L_('['); |
342 | goto normal_bracket; |
343 | } |
344 | p += 2; |
345 | |
346 | if (nrules == 0) |
347 | { |
348 | if ((UCHAR) *n == str) |
349 | goto matched; |
350 | } |
351 | else |
352 | { |
353 | const int32_t *table; |
354 | # if WIDE_CHAR_VERSION |
355 | const int32_t *weights; |
356 | const wint_t *extra; |
357 | # else |
358 | const unsigned char *weights; |
359 | const unsigned char *; |
360 | # endif |
361 | const int32_t *indirect; |
362 | int32_t idx; |
363 | const UCHAR *cp = (const UCHAR *) &str; |
364 | |
365 | # if WIDE_CHAR_VERSION |
366 | table = (const int32_t *) |
367 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
368 | weights = (const int32_t *) |
369 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
370 | extra = (const wint_t *) |
371 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
372 | indirect = (const int32_t *) |
373 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
374 | # else |
375 | table = (const int32_t *) |
376 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
377 | weights = (const unsigned char *) |
378 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
379 | extra = (const unsigned char *) |
380 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
381 | indirect = (const int32_t *) |
382 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
383 | # endif |
384 | |
385 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
386 | if (idx != 0) |
387 | { |
388 | /* We found a table entry. Now see whether the |
389 | character we are currently at has the same |
390 | equivalence class value. */ |
391 | int len = weights[idx & 0xffffff]; |
392 | int32_t idx2; |
393 | const UCHAR *np = (const UCHAR *) n; |
394 | |
395 | idx2 = FINDIDX (table, indirect, extra, |
396 | &np, string_end - n); |
397 | if (idx2 != 0 |
398 | && (idx >> 24) == (idx2 >> 24) |
399 | && len == weights[idx2 & 0xffffff]) |
400 | { |
401 | int cnt = 0; |
402 | |
403 | idx &= 0xffffff; |
404 | idx2 &= 0xffffff; |
405 | |
406 | while (cnt < len |
407 | && (weights[idx + 1 + cnt] |
408 | == weights[idx2 + 1 + cnt])) |
409 | ++cnt; |
410 | |
411 | if (cnt == len) |
412 | goto matched; |
413 | } |
414 | } |
415 | } |
416 | |
417 | c = *p++; |
418 | } |
419 | #endif |
420 | else if (c == L_('\0')) |
421 | { |
422 | /* [ unterminated, treat as normal character. */ |
423 | p = p_init; |
424 | n = n_init; |
425 | c = L_('['); |
426 | goto normal_match; |
427 | } |
428 | else |
429 | { |
430 | bool is_range = false; |
431 | |
432 | #ifdef _LIBC |
433 | bool is_seqval = false; |
434 | |
435 | if (c == L_('[') && *p == L_('.')) |
436 | { |
437 | uint32_t nrules = |
438 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
439 | const CHAR *startp = p; |
440 | size_t c1 = 0; |
441 | |
442 | while (1) |
443 | { |
444 | c = *++p; |
445 | if (c == L_('.') && p[1] == L_(']')) |
446 | { |
447 | p += 2; |
448 | break; |
449 | } |
450 | if (c == '\0') |
451 | return FNM_NOMATCH; |
452 | ++c1; |
453 | } |
454 | |
455 | /* We have to handling the symbols differently in |
456 | ranges since then the collation sequence is |
457 | important. */ |
458 | is_range = *p == L_('-') && p[1] != L_('\0'); |
459 | |
460 | if (nrules == 0) |
461 | { |
462 | /* There are no names defined in the collation |
463 | data. Therefore we only accept the trivial |
464 | names consisting of the character itself. */ |
465 | if (c1 != 1) |
466 | return FNM_NOMATCH; |
467 | |
468 | if (!is_range && *n == startp[1]) |
469 | goto matched; |
470 | |
471 | cold = startp[1]; |
472 | c = *p++; |
473 | } |
474 | else |
475 | { |
476 | int32_t table_size; |
477 | const int32_t *symb_table; |
478 | const unsigned char *; |
479 | int32_t idx; |
480 | int32_t elem; |
481 | # if WIDE_CHAR_VERSION |
482 | CHAR *wextra; |
483 | # endif |
484 | |
485 | table_size = |
486 | _NL_CURRENT_WORD (LC_COLLATE, |
487 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
488 | symb_table = (const int32_t *) |
489 | _NL_CURRENT (LC_COLLATE, |
490 | _NL_COLLATE_SYMB_TABLEMB); |
491 | extra = (const unsigned char *) |
492 | _NL_CURRENT (LC_COLLATE, |
493 | _NL_COLLATE_SYMB_EXTRAMB); |
494 | |
495 | for (elem = 0; elem < table_size; elem++) |
496 | if (symb_table[2 * elem] != 0) |
497 | { |
498 | idx = symb_table[2 * elem + 1]; |
499 | /* Skip the name of collating element. */ |
500 | idx += 1 + extra[idx]; |
501 | # if WIDE_CHAR_VERSION |
502 | /* Skip the byte sequence of the |
503 | collating element. */ |
504 | idx += 1 + extra[idx]; |
505 | /* Adjust for the alignment. */ |
506 | idx = (idx + 3) & ~3; |
507 | |
508 | wextra = (CHAR *) &extra[idx + 4]; |
509 | |
510 | if (/* Compare the length of the sequence. */ |
511 | c1 == wextra[0] |
512 | /* Compare the wide char sequence. */ |
513 | && (__wmemcmp (startp + 1, &wextra[1], |
514 | c1) |
515 | == 0)) |
516 | /* Yep, this is the entry. */ |
517 | break; |
518 | # else |
519 | if (/* Compare the length of the sequence. */ |
520 | c1 == extra[idx] |
521 | /* Compare the byte sequence. */ |
522 | && memcmp (startp + 1, |
523 | &extra[idx + 1], c1) == 0) |
524 | /* Yep, this is the entry. */ |
525 | break; |
526 | # endif |
527 | } |
528 | |
529 | if (elem < table_size) |
530 | { |
531 | /* Compare the byte sequence but only if |
532 | this is not part of a range. */ |
533 | |
534 | /* The compiler might warn that idx may be |
535 | used uninitialized, however it will be |
536 | reached iff elem < table_size which means |
537 | that it was properly set in the loop |
538 | above. */ |
539 | DIAG_PUSH_NEEDS_COMMENT; |
540 | DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized" ); |
541 | if (! is_range |
542 | |
543 | # if WIDE_CHAR_VERSION |
544 | && __wmemcmp (n, &wextra[1], c1) == 0 |
545 | # else |
546 | && memcmp (n, &extra[idx + 1], c1) == 0 |
547 | # endif |
548 | ) |
549 | { |
550 | n += c1 - 1; |
551 | goto matched; |
552 | } |
553 | DIAG_POP_NEEDS_COMMENT; |
554 | |
555 | /* Get the collation sequence value. */ |
556 | is_seqval = true; |
557 | # if WIDE_CHAR_VERSION |
558 | /* The compile might warn that wextra may be |
559 | used uninitialized and similar to 'idx' |
560 | above it will be properly set by the loop. |
561 | */ |
562 | DIAG_PUSH_NEEDS_COMMENT; |
563 | DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized" ); |
564 | cold = wextra[1 + wextra[0]]; |
565 | DIAG_POP_NEEDS_COMMENT; |
566 | # else |
567 | idx += 1 + extra[idx]; |
568 | /* Adjust for the alignment. */ |
569 | idx = (idx + 3) & ~3; |
570 | cold = *((int32_t *) &extra[idx]); |
571 | # endif |
572 | |
573 | c = *p++; |
574 | } |
575 | else if (c1 == 1) |
576 | { |
577 | /* No valid character. Match it as a |
578 | single byte. */ |
579 | if (!is_range && *n == startp[1]) |
580 | goto matched; |
581 | |
582 | cold = startp[1]; |
583 | c = *p++; |
584 | } |
585 | else |
586 | return FNM_NOMATCH; |
587 | } |
588 | } |
589 | else |
590 | #endif |
591 | { |
592 | c = FOLD (c); |
593 | normal_bracket: |
594 | |
595 | /* We have to handling the symbols differently in |
596 | ranges since then the collation sequence is |
597 | important. */ |
598 | is_range = (*p == L_('-') && p[1] != L_('\0') |
599 | && p[1] != L_(']')); |
600 | |
601 | if (!is_range && c == fn) |
602 | goto matched; |
603 | |
604 | #if _LIBC |
605 | /* This is needed if we goto normal_bracket; from |
606 | outside of is_seqval's scope. */ |
607 | is_seqval = false; |
608 | #endif |
609 | cold = c; |
610 | c = *p++; |
611 | } |
612 | |
613 | if (c == L_('-') && *p != L_(']')) |
614 | { |
615 | #if _LIBC |
616 | /* We have to find the collation sequence |
617 | value for C. Collation sequence is nothing |
618 | we can regularly access. The sequence |
619 | value is defined by the order in which the |
620 | definitions of the collation values for the |
621 | various characters appear in the source |
622 | file. A strange concept, nowhere |
623 | documented. */ |
624 | uint32_t fcollseq; |
625 | uint32_t lcollseq; |
626 | UCHAR cend = *p++; |
627 | |
628 | # if WIDE_CHAR_VERSION |
629 | /* Search in the 'names' array for the characters. */ |
630 | fcollseq = __collseq_table_lookup (collseq, fn); |
631 | if (fcollseq == ~((uint32_t) 0)) |
632 | /* XXX We don't know anything about the character |
633 | we are supposed to match. This means we are |
634 | failing. */ |
635 | goto range_not_matched; |
636 | |
637 | if (is_seqval) |
638 | lcollseq = cold; |
639 | else |
640 | lcollseq = __collseq_table_lookup (collseq, cold); |
641 | # else |
642 | fcollseq = collseq[fn]; |
643 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
644 | # endif |
645 | |
646 | is_seqval = false; |
647 | if (cend == L_('[') && *p == L_('.')) |
648 | { |
649 | uint32_t nrules = |
650 | _NL_CURRENT_WORD (LC_COLLATE, |
651 | _NL_COLLATE_NRULES); |
652 | const CHAR *startp = p; |
653 | size_t c1 = 0; |
654 | |
655 | while (1) |
656 | { |
657 | c = *++p; |
658 | if (c == L_('.') && p[1] == L_(']')) |
659 | { |
660 | p += 2; |
661 | break; |
662 | } |
663 | if (c == '\0') |
664 | return FNM_NOMATCH; |
665 | ++c1; |
666 | } |
667 | |
668 | if (nrules == 0) |
669 | { |
670 | /* There are no names defined in the |
671 | collation data. Therefore we only |
672 | accept the trivial names consisting |
673 | of the character itself. */ |
674 | if (c1 != 1) |
675 | return FNM_NOMATCH; |
676 | |
677 | cend = startp[1]; |
678 | } |
679 | else |
680 | { |
681 | int32_t table_size; |
682 | const int32_t *symb_table; |
683 | const unsigned char *; |
684 | int32_t idx; |
685 | int32_t elem; |
686 | # if WIDE_CHAR_VERSION |
687 | CHAR *wextra; |
688 | # endif |
689 | |
690 | table_size = |
691 | _NL_CURRENT_WORD (LC_COLLATE, |
692 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
693 | symb_table = (const int32_t *) |
694 | _NL_CURRENT (LC_COLLATE, |
695 | _NL_COLLATE_SYMB_TABLEMB); |
696 | extra = (const unsigned char *) |
697 | _NL_CURRENT (LC_COLLATE, |
698 | _NL_COLLATE_SYMB_EXTRAMB); |
699 | |
700 | for (elem = 0; elem < table_size; elem++) |
701 | if (symb_table[2 * elem] != 0) |
702 | { |
703 | idx = symb_table[2 * elem + 1]; |
704 | /* Skip the name of collating |
705 | element. */ |
706 | idx += 1 + extra[idx]; |
707 | # if WIDE_CHAR_VERSION |
708 | /* Skip the byte sequence of the |
709 | collating element. */ |
710 | idx += 1 + extra[idx]; |
711 | /* Adjust for the alignment. */ |
712 | idx = (idx + 3) & ~3; |
713 | |
714 | wextra = (CHAR *) &extra[idx + 4]; |
715 | |
716 | if (/* Compare the length of the |
717 | sequence. */ |
718 | c1 == wextra[0] |
719 | /* Compare the wide char sequence. */ |
720 | && (__wmemcmp (startp + 1, |
721 | &wextra[1], c1) |
722 | == 0)) |
723 | /* Yep, this is the entry. */ |
724 | break; |
725 | # else |
726 | if (/* Compare the length of the |
727 | sequence. */ |
728 | c1 == extra[idx] |
729 | /* Compare the byte sequence. */ |
730 | && memcmp (startp + 1, |
731 | &extra[idx + 1], c1) == 0) |
732 | /* Yep, this is the entry. */ |
733 | break; |
734 | # endif |
735 | } |
736 | |
737 | if (elem < table_size) |
738 | { |
739 | /* Get the collation sequence value. */ |
740 | is_seqval = true; |
741 | # if WIDE_CHAR_VERSION |
742 | /* The compiler might warn that wextra may |
743 | be used uninitialized, however it will |
744 | be reached iff elem < table_size which |
745 | means that it was properly set in the |
746 | loop above. */ |
747 | DIAG_PUSH_NEEDS_COMMENT; |
748 | DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized" ); |
749 | cend = wextra[1 + wextra[0]]; |
750 | DIAG_POP_NEEDS_COMMENT; |
751 | # else |
752 | /* The compile might warn that idx may |
753 | be used uninitialized and similar to |
754 | wextra above it will be properly set by |
755 | the loop. */ |
756 | DIAG_PUSH_NEEDS_COMMENT; |
757 | DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized" ); |
758 | idx += 1 + extra[idx]; |
759 | DIAG_POP_NEEDS_COMMENT; |
760 | /* Adjust for the alignment. */ |
761 | idx = (idx + 3) & ~3; |
762 | cend = *((int32_t *) &extra[idx]); |
763 | # endif |
764 | } |
765 | else if (c1 == 1) |
766 | { |
767 | cend = startp[1]; |
768 | c = *p++; |
769 | } |
770 | else |
771 | return FNM_NOMATCH; |
772 | } |
773 | } |
774 | else |
775 | { |
776 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
777 | cend = *p++; |
778 | if (cend == L_('\0')) |
779 | return FNM_NOMATCH; |
780 | cend = FOLD (cend); |
781 | } |
782 | |
783 | /* XXX It is not entirely clear to me how to handle |
784 | characters which are not mentioned in the |
785 | collation specification. */ |
786 | if ( |
787 | # if WIDE_CHAR_VERSION |
788 | lcollseq == 0xffffffff || |
789 | # endif |
790 | lcollseq <= fcollseq) |
791 | { |
792 | /* We have to look at the upper bound. */ |
793 | uint32_t hcollseq; |
794 | |
795 | if (is_seqval) |
796 | hcollseq = cend; |
797 | else |
798 | { |
799 | # if WIDE_CHAR_VERSION |
800 | hcollseq = |
801 | __collseq_table_lookup (collseq, cend); |
802 | if (hcollseq == ~((uint32_t) 0)) |
803 | { |
804 | /* Hum, no information about the upper |
805 | bound. The matching succeeds if the |
806 | lower bound is matched exactly. */ |
807 | if (lcollseq != fcollseq) |
808 | goto range_not_matched; |
809 | |
810 | goto matched; |
811 | } |
812 | # else |
813 | hcollseq = collseq[cend]; |
814 | # endif |
815 | } |
816 | |
817 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
818 | goto matched; |
819 | } |
820 | # if WIDE_CHAR_VERSION |
821 | range_not_matched: |
822 | # endif |
823 | #else |
824 | /* We use a boring value comparison of the character |
825 | values. This is better than comparing using |
826 | 'strcoll' since the latter would have surprising |
827 | and sometimes fatal consequences. */ |
828 | UCHAR cend = *p++; |
829 | |
830 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
831 | cend = *p++; |
832 | if (cend == L_('\0')) |
833 | return FNM_NOMATCH; |
834 | |
835 | /* It is a range. */ |
836 | if ((UCHAR) cold <= fn && fn <= cend) |
837 | goto matched; |
838 | #endif |
839 | |
840 | c = *p++; |
841 | } |
842 | } |
843 | |
844 | if (c == L_(']')) |
845 | break; |
846 | } |
847 | |
848 | if (!not) |
849 | return FNM_NOMATCH; |
850 | break; |
851 | |
852 | matched: |
853 | /* Skip the rest of the [...] that already matched. */ |
854 | while ((c = *p++) != L_(']')) |
855 | { |
856 | if (c == L_('\0')) |
857 | { |
858 | /* [ unterminated, treat as normal character. */ |
859 | p = p_init; |
860 | n = n_init; |
861 | c = L_('['); |
862 | goto normal_match; |
863 | } |
864 | |
865 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
866 | { |
867 | if (*p == L_('\0')) |
868 | return FNM_NOMATCH; |
869 | /* XXX 1003.2d11 is unclear if this is right. */ |
870 | ++p; |
871 | } |
872 | else if (c == L_('[') && *p == L_(':')) |
873 | { |
874 | int c1 = 0; |
875 | const CHAR *startp = p; |
876 | |
877 | while (1) |
878 | { |
879 | c = *++p; |
880 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
881 | return FNM_NOMATCH; |
882 | |
883 | if (*p == L_(':') && p[1] == L_(']')) |
884 | break; |
885 | |
886 | if (c < L_('a') || c >= L_('z')) |
887 | { |
888 | p = startp - 2; |
889 | break; |
890 | } |
891 | } |
892 | p += 2; |
893 | } |
894 | else if (c == L_('[') && *p == L_('=')) |
895 | { |
896 | c = *++p; |
897 | if (c == L_('\0')) |
898 | return FNM_NOMATCH; |
899 | c = *++p; |
900 | if (c != L_('=') || p[1] != L_(']')) |
901 | return FNM_NOMATCH; |
902 | p += 2; |
903 | } |
904 | else if (c == L_('[') && *p == L_('.')) |
905 | { |
906 | while (1) |
907 | { |
908 | c = *++p; |
909 | if (c == L_('\0')) |
910 | return FNM_NOMATCH; |
911 | |
912 | if (c == L_('.') && p[1] == L_(']')) |
913 | break; |
914 | } |
915 | p += 2; |
916 | } |
917 | } |
918 | if (not) |
919 | return FNM_NOMATCH; |
920 | } |
921 | break; |
922 | |
923 | case L_('+'): |
924 | case L_('@'): |
925 | case L_('!'): |
926 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') |
927 | { |
928 | int res = EXT (c, p, n, string_end, no_leading_period, flags); |
929 | if (res != -1) |
930 | return res; |
931 | } |
932 | goto normal_match; |
933 | |
934 | case L_('/'): |
935 | if (NO_LEADING_PERIOD (flags)) |
936 | { |
937 | if (n == string_end || c != (UCHAR) *n) |
938 | return FNM_NOMATCH; |
939 | |
940 | new_no_leading_period = true; |
941 | break; |
942 | } |
943 | FALLTHROUGH; |
944 | default: |
945 | normal_match: |
946 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
947 | return FNM_NOMATCH; |
948 | } |
949 | |
950 | no_leading_period = new_no_leading_period; |
951 | ++n; |
952 | } |
953 | |
954 | if (n == string_end) |
955 | return 0; |
956 | |
957 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) |
958 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
959 | return 0; |
960 | |
961 | return FNM_NOMATCH; |
962 | } |
963 | |
964 | |
965 | static const CHAR * |
966 | END (const CHAR *pattern) |
967 | { |
968 | const CHAR *p = pattern; |
969 | |
970 | while (1) |
971 | if (*++p == L_('\0')) |
972 | /* This is an invalid pattern. */ |
973 | return pattern; |
974 | else if (*p == L_('[')) |
975 | { |
976 | /* Handle brackets special. */ |
977 | if (posixly_correct == 0) |
978 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
979 | |
980 | /* Skip the not sign. We have to recognize it because of a possibly |
981 | following ']'. */ |
982 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
983 | ++p; |
984 | /* A leading ']' is recognized as such. */ |
985 | if (*p == L_(']')) |
986 | ++p; |
987 | /* Skip over all characters of the list. */ |
988 | while (*p != L_(']')) |
989 | if (*p++ == L_('\0')) |
990 | /* This is no valid pattern. */ |
991 | return pattern; |
992 | } |
993 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
994 | || *p == L_('!')) && p[1] == L_('(')) |
995 | { |
996 | p = END (p + 1); |
997 | if (*p == L_('\0')) |
998 | /* This is an invalid pattern. */ |
999 | return pattern; |
1000 | } |
1001 | else if (*p == L_(')')) |
1002 | break; |
1003 | |
1004 | return p + 1; |
1005 | } |
1006 | |
1007 | #if WIDE_CHAR_VERSION |
1008 | # define PATTERN_PREFIX pattern_list |
1009 | #else |
1010 | # define PATTERN_PREFIX wpattern_list |
1011 | #endif |
1012 | |
1013 | #define PASTE(a,b) PASTE1(a,b) |
1014 | #define PASTE1(a,b) a##b |
1015 | |
1016 | #define DYNARRAY_STRUCT PATTERN_PREFIX |
1017 | #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr) |
1018 | #define DYNARRAY_ELEMENT CHAR * |
1019 | #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_) |
1020 | #define DYNARRAY_INITIAL_SIZE 8 |
1021 | #include <malloc/dynarray-skeleton.c> |
1022 | |
1023 | static int |
1024 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
1025 | bool no_leading_period, int flags) |
1026 | { |
1027 | const CHAR *startp; |
1028 | ptrdiff_t level; |
1029 | struct PATTERN_PREFIX list; |
1030 | size_t pattern_len = STRLEN (pattern); |
1031 | size_t pattern_i = 0; |
1032 | const CHAR *p; |
1033 | const CHAR *rs; |
1034 | int retval = 0; |
1035 | |
1036 | PASTE (PATTERN_PREFIX, _init) (&list); |
1037 | |
1038 | /* Parse the pattern. Store the individual parts in the list. */ |
1039 | level = 0; |
1040 | for (startp = p = pattern + 1; level >= 0; ++p) |
1041 | if (*p == L_('\0')) |
1042 | { |
1043 | /* This is an invalid pattern. */ |
1044 | retval = -1; |
1045 | goto out; |
1046 | } |
1047 | else if (*p == L_('[')) |
1048 | { |
1049 | /* Handle brackets special. */ |
1050 | if (posixly_correct == 0) |
1051 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1052 | |
1053 | /* Skip the not sign. We have to recognize it because of a possibly |
1054 | following ']'. */ |
1055 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
1056 | ++p; |
1057 | /* A leading ']' is recognized as such. */ |
1058 | if (*p == L_(']')) |
1059 | ++p; |
1060 | /* Skip over all characters of the list. */ |
1061 | while (*p != L_(']')) |
1062 | if (*p++ == L_('\0')) |
1063 | { |
1064 | /* This is no valid pattern. */ |
1065 | retval = -1; |
1066 | goto out; |
1067 | } |
1068 | } |
1069 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
1070 | || *p == L_('!')) && p[1] == L_('(')) |
1071 | /* Remember the nesting level. */ |
1072 | ++level; |
1073 | else if (*p == L_(')') || *p == L_('|')) |
1074 | { |
1075 | if (level == 0) |
1076 | { |
1077 | size_t slen = opt == L_('?') || opt == L_('@') |
1078 | ? pattern_len : p - startp + 1; |
1079 | CHAR *newp = malloc (slen * sizeof (CHAR)); |
1080 | if (newp != NULL) |
1081 | { |
1082 | *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0'); |
1083 | PASTE (PATTERN_PREFIX,_add) (&list, newp); |
1084 | } |
1085 | if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list)) |
1086 | { |
1087 | retval = -2; |
1088 | goto out; |
1089 | } |
1090 | |
1091 | if (*p == L_('|')) |
1092 | startp = p + 1; |
1093 | } |
1094 | if (*p == L_(')')) |
1095 | level--; |
1096 | } |
1097 | assert (p[-1] == L_(')')); |
1098 | |
1099 | switch (opt) |
1100 | { |
1101 | case L_('*'): |
1102 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) |
1103 | goto success; |
1104 | FALLTHROUGH; |
1105 | case L_('+'): |
1106 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++) |
1107 | { |
1108 | for (rs = string; rs <= string_end; ++rs) |
1109 | /* First match the prefix with the current pattern with the |
1110 | current pattern. */ |
1111 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string, |
1112 | rs, no_leading_period, |
1113 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1114 | NULL) == 0 |
1115 | /* This was successful. Now match the rest with the rest |
1116 | of the pattern. */ |
1117 | && (FCT (p, rs, string_end, |
1118 | rs == string |
1119 | ? no_leading_period |
1120 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1121 | flags & FNM_FILE_NAME |
1122 | ? flags : flags & ~FNM_PERIOD, NULL) == 0 |
1123 | /* This didn't work. Try the whole pattern. */ |
1124 | || (rs != string |
1125 | && FCT (pattern - 1, rs, string_end, |
1126 | rs == string |
1127 | ? no_leading_period |
1128 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1129 | flags & FNM_FILE_NAME |
1130 | ? flags : flags & ~FNM_PERIOD, NULL) == 0))) |
1131 | /* It worked. Signal success. */ |
1132 | goto success; |
1133 | } |
1134 | |
1135 | /* None of the patterns lead to a match. */ |
1136 | retval = FNM_NOMATCH; |
1137 | break; |
1138 | |
1139 | case L_('?'): |
1140 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) |
1141 | goto success; |
1142 | FALLTHROUGH; |
1143 | case L_('@'): |
1144 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++) |
1145 | { |
1146 | /* I cannot believe it but `strcat' is actually acceptable |
1147 | here. Match the entire string with the prefix from the |
1148 | pattern list and the rest of the pattern following the |
1149 | pattern list. */ |
1150 | if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p), |
1151 | string, string_end, no_leading_period, |
1152 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1153 | NULL) == 0) |
1154 | /* It worked. Signal success. */ |
1155 | goto success; |
1156 | } |
1157 | |
1158 | /* None of the patterns lead to a match. */ |
1159 | retval = FNM_NOMATCH; |
1160 | break; |
1161 | |
1162 | case L_('!'): |
1163 | for (rs = string; rs <= string_end; ++rs) |
1164 | { |
1165 | size_t runp_i; |
1166 | |
1167 | for (runp_i = pattern_i; |
1168 | runp_i != PASTE (PATTERN_PREFIX, _size) (&list); |
1169 | runp_i++) |
1170 | { |
1171 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs, |
1172 | no_leading_period, |
1173 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1174 | NULL) == 0) |
1175 | break; |
1176 | } |
1177 | |
1178 | /* If none of the patterns matched see whether the rest does. */ |
1179 | if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list) |
1180 | && (FCT (p, rs, string_end, |
1181 | rs == string |
1182 | ? no_leading_period |
1183 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
1184 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1185 | NULL) == 0)) |
1186 | /* This is successful. */ |
1187 | goto success; |
1188 | } |
1189 | |
1190 | /* None of the patterns together with the rest of the pattern |
1191 | lead to a match. */ |
1192 | retval = FNM_NOMATCH; |
1193 | break; |
1194 | |
1195 | default: |
1196 | assert (! "Invalid extended matching operator" ); |
1197 | retval = -1; |
1198 | break; |
1199 | } |
1200 | |
1201 | success: |
1202 | out: |
1203 | PASTE (PATTERN_PREFIX, _free) (&list); |
1204 | |
1205 | return retval; |
1206 | } |
1207 | |
1208 | #undef PATTERN_PREFIX |
1209 | #undef PASTE |
1210 | #undef PASTE1 |
1211 | |
1212 | #undef FOLD |
1213 | #undef CHAR |
1214 | #undef UCHAR |
1215 | #undef INT |
1216 | #undef FCT |
1217 | #undef EXT |
1218 | #undef END |
1219 | #undef STRUCT |
1220 | #undef MEMPCPY |
1221 | #undef MEMCHR |
1222 | #undef STRLEN |
1223 | #undef STRCAT |
1224 | #undef L_ |
1225 | #undef BTOWC |
1226 | #undef WIDE_CHAR_VERSION |
1227 | #undef FINDIDX |
1228 | |