1 | /* Copyright (C) 1991-2019 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <stdint.h> |
19 | |
20 | struct STRUCT |
21 | { |
22 | const CHAR *pattern; |
23 | const CHAR *string; |
24 | int no_leading_period; |
25 | }; |
26 | |
27 | /* Match STRING against the filename pattern PATTERN, returning zero if |
28 | it matches, nonzero if not. */ |
29 | static int FCT (const CHAR *pattern, const CHAR *string, |
30 | const CHAR *string_end, int no_leading_period, int flags, |
31 | struct STRUCT *ends, size_t alloca_used); |
32 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
33 | const CHAR *string_end, int no_leading_period, int flags, |
34 | size_t alloca_used); |
35 | static const CHAR *END (const CHAR *patternp); |
36 | |
37 | static int |
38 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
39 | int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) |
40 | { |
41 | const CHAR *p = pattern, *n = string; |
42 | UCHAR c; |
43 | #ifdef _LIBC |
44 | # if WIDE_CHAR_VERSION |
45 | const char *collseq = (const char *) |
46 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
47 | # else |
48 | const UCHAR *collseq = (const UCHAR *) |
49 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
50 | # endif |
51 | #endif |
52 | |
53 | while ((c = *p++) != L('\0')) |
54 | { |
55 | int new_no_leading_period = 0; |
56 | c = FOLD (c); |
57 | |
58 | switch (c) |
59 | { |
60 | case L('?'): |
61 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
62 | { |
63 | int res = EXT (c, p, n, string_end, no_leading_period, |
64 | flags, alloca_used); |
65 | if (res != -1) |
66 | return res; |
67 | } |
68 | |
69 | if (n == string_end) |
70 | return FNM_NOMATCH; |
71 | else if (*n == L('/') && (flags & FNM_FILE_NAME)) |
72 | return FNM_NOMATCH; |
73 | else if (*n == L('.') && no_leading_period) |
74 | return FNM_NOMATCH; |
75 | break; |
76 | |
77 | case L('\\'): |
78 | if (!(flags & FNM_NOESCAPE)) |
79 | { |
80 | c = *p++; |
81 | if (c == L('\0')) |
82 | /* Trailing \ loses. */ |
83 | return FNM_NOMATCH; |
84 | c = FOLD (c); |
85 | } |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
87 | return FNM_NOMATCH; |
88 | break; |
89 | |
90 | case L('*'): |
91 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
92 | { |
93 | int res = EXT (c, p, n, string_end, no_leading_period, |
94 | flags, alloca_used); |
95 | if (res != -1) |
96 | return res; |
97 | } |
98 | else if (ends != NULL) |
99 | { |
100 | ends->pattern = p - 1; |
101 | ends->string = n; |
102 | ends->no_leading_period = no_leading_period; |
103 | return 0; |
104 | } |
105 | |
106 | if (n != string_end && *n == L('.') && no_leading_period) |
107 | return FNM_NOMATCH; |
108 | |
109 | for (c = *p++; c == L('?') || c == L('*'); c = *p++) |
110 | { |
111 | if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) |
112 | { |
113 | const CHAR *endp = END (p); |
114 | if (endp != p) |
115 | { |
116 | /* This is a pattern. Skip over it. */ |
117 | p = endp; |
118 | continue; |
119 | } |
120 | } |
121 | |
122 | if (c == L('?')) |
123 | { |
124 | /* A ? needs to match one character. */ |
125 | if (n == string_end) |
126 | /* There isn't another character; no match. */ |
127 | return FNM_NOMATCH; |
128 | else if (*n == L('/') |
129 | && __builtin_expect (flags & FNM_FILE_NAME, 0)) |
130 | /* A slash does not match a wildcard under |
131 | FNM_FILE_NAME. */ |
132 | return FNM_NOMATCH; |
133 | else |
134 | /* One character of the string is consumed in matching |
135 | this ? wildcard, so *??? won't match if there are |
136 | less than three characters. */ |
137 | ++n; |
138 | } |
139 | } |
140 | |
141 | if (c == L('\0')) |
142 | /* The wildcard(s) is/are the last element of the pattern. |
143 | If the name is a file name and contains another slash |
144 | this means it cannot match, unless the FNM_LEADING_DIR |
145 | flag is set. */ |
146 | { |
147 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
148 | |
149 | if (flags & FNM_FILE_NAME) |
150 | { |
151 | if (flags & FNM_LEADING_DIR) |
152 | result = 0; |
153 | else |
154 | { |
155 | if (MEMCHR (n, L('/'), string_end - n) == NULL) |
156 | result = 0; |
157 | } |
158 | } |
159 | |
160 | return result; |
161 | } |
162 | else |
163 | { |
164 | const CHAR *endp; |
165 | struct STRUCT end; |
166 | |
167 | end.pattern = NULL; |
168 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), |
169 | string_end - n); |
170 | if (endp == NULL) |
171 | endp = string_end; |
172 | |
173 | if (c == L('[') |
174 | || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 |
175 | && (c == L('@') || c == L('+') || c == L('!')) |
176 | && *p == L('('))) |
177 | { |
178 | int flags2 = ((flags & FNM_FILE_NAME) |
179 | ? flags : (flags & ~FNM_PERIOD)); |
180 | |
181 | for (--p; n < endp; ++n, no_leading_period = 0) |
182 | if (FCT (p, n, string_end, no_leading_period, flags2, |
183 | &end, alloca_used) == 0) |
184 | goto found; |
185 | } |
186 | else if (c == L('/') && (flags & FNM_FILE_NAME)) |
187 | { |
188 | while (n < string_end && *n != L('/')) |
189 | ++n; |
190 | if (n < string_end && *n == L('/') |
191 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
192 | NULL, alloca_used) == 0)) |
193 | return 0; |
194 | } |
195 | else |
196 | { |
197 | int flags2 = ((flags & FNM_FILE_NAME) |
198 | ? flags : (flags & ~FNM_PERIOD)); |
199 | |
200 | if (c == L('\\') && !(flags & FNM_NOESCAPE)) |
201 | c = *p; |
202 | c = FOLD (c); |
203 | for (--p; n < endp; ++n, no_leading_period = 0) |
204 | if (FOLD ((UCHAR) *n) == c |
205 | && (FCT (p, n, string_end, no_leading_period, flags2, |
206 | &end, alloca_used) == 0)) |
207 | { |
208 | found: |
209 | if (end.pattern == NULL) |
210 | return 0; |
211 | break; |
212 | } |
213 | if (end.pattern != NULL) |
214 | { |
215 | p = end.pattern; |
216 | n = end.string; |
217 | no_leading_period = end.no_leading_period; |
218 | continue; |
219 | } |
220 | } |
221 | } |
222 | |
223 | /* If we come here no match is possible with the wildcard. */ |
224 | return FNM_NOMATCH; |
225 | |
226 | case L('['): |
227 | { |
228 | /* Nonzero if the sense of the character class is inverted. */ |
229 | const CHAR *p_init = p; |
230 | const CHAR *n_init = n; |
231 | int not; |
232 | CHAR cold; |
233 | UCHAR fn; |
234 | |
235 | if (posixly_correct == 0) |
236 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
237 | |
238 | if (n == string_end) |
239 | return FNM_NOMATCH; |
240 | |
241 | if (*n == L('.') && no_leading_period) |
242 | return FNM_NOMATCH; |
243 | |
244 | if (*n == L('/') && (flags & FNM_FILE_NAME)) |
245 | /* `/' cannot be matched. */ |
246 | return FNM_NOMATCH; |
247 | |
248 | not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); |
249 | if (not) |
250 | ++p; |
251 | |
252 | fn = FOLD ((UCHAR) *n); |
253 | |
254 | c = *p++; |
255 | for (;;) |
256 | { |
257 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
258 | { |
259 | if (*p == L('\0')) |
260 | return FNM_NOMATCH; |
261 | c = FOLD ((UCHAR) *p); |
262 | ++p; |
263 | |
264 | goto normal_bracket; |
265 | } |
266 | else if (c == L('[') && *p == L(':')) |
267 | { |
268 | /* Leave room for the null. */ |
269 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
270 | size_t c1 = 0; |
271 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
272 | wctype_t wt; |
273 | #endif |
274 | const CHAR *startp = p; |
275 | |
276 | for (;;) |
277 | { |
278 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
279 | /* The name is too long and therefore the pattern |
280 | is ill-formed. */ |
281 | return FNM_NOMATCH; |
282 | |
283 | c = *++p; |
284 | if (c == L(':') && p[1] == L(']')) |
285 | { |
286 | p += 2; |
287 | break; |
288 | } |
289 | if (c < L('a') || c >= L('z')) |
290 | { |
291 | /* This cannot possibly be a character class name. |
292 | Match it as a normal range. */ |
293 | p = startp; |
294 | c = L('['); |
295 | goto normal_bracket; |
296 | } |
297 | str[c1++] = c; |
298 | } |
299 | str[c1] = L('\0'); |
300 | |
301 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
302 | wt = IS_CHAR_CLASS (str); |
303 | if (wt == 0) |
304 | /* Invalid character class name. */ |
305 | return FNM_NOMATCH; |
306 | |
307 | # if defined _LIBC && ! WIDE_CHAR_VERSION |
308 | /* The following code is glibc specific but does |
309 | there a good job in speeding up the code since |
310 | we can avoid the btowc() call. */ |
311 | if (_ISCTYPE ((UCHAR) *n, wt)) |
312 | goto matched; |
313 | # else |
314 | if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) |
315 | goto matched; |
316 | # endif |
317 | #else |
318 | if ((STREQ (str, L("alnum" )) && ISALNUM ((UCHAR) *n)) |
319 | || (STREQ (str, L("alpha" )) && ISALPHA ((UCHAR) *n)) |
320 | || (STREQ (str, L("blank" )) && ISBLANK ((UCHAR) *n)) |
321 | || (STREQ (str, L("cntrl" )) && ISCNTRL ((UCHAR) *n)) |
322 | || (STREQ (str, L("digit" )) && ISDIGIT ((UCHAR) *n)) |
323 | || (STREQ (str, L("graph" )) && ISGRAPH ((UCHAR) *n)) |
324 | || (STREQ (str, L("lower" )) && ISLOWER ((UCHAR) *n)) |
325 | || (STREQ (str, L("print" )) && ISPRINT ((UCHAR) *n)) |
326 | || (STREQ (str, L("punct" )) && ISPUNCT ((UCHAR) *n)) |
327 | || (STREQ (str, L("space" )) && ISSPACE ((UCHAR) *n)) |
328 | || (STREQ (str, L("upper" )) && ISUPPER ((UCHAR) *n)) |
329 | || (STREQ (str, L("xdigit" )) && ISXDIGIT ((UCHAR) *n))) |
330 | goto matched; |
331 | #endif |
332 | c = *p++; |
333 | } |
334 | #ifdef _LIBC |
335 | else if (c == L('[') && *p == L('=')) |
336 | { |
337 | /* It's important that STR be a scalar variable rather |
338 | than a one-element array, because GCC (at least 4.9.2 |
339 | -O2 on x86-64) can be confused by the array and |
340 | diagnose a "used initialized" in a dead branch in the |
341 | findidx function. */ |
342 | UCHAR str; |
343 | uint32_t nrules = |
344 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
345 | const CHAR *startp = p; |
346 | |
347 | c = *++p; |
348 | if (c == L('\0')) |
349 | { |
350 | p = startp; |
351 | c = L('['); |
352 | goto normal_bracket; |
353 | } |
354 | str = c; |
355 | |
356 | c = *++p; |
357 | if (c != L('=') || p[1] != L(']')) |
358 | { |
359 | p = startp; |
360 | c = L('['); |
361 | goto normal_bracket; |
362 | } |
363 | p += 2; |
364 | |
365 | if (nrules == 0) |
366 | { |
367 | if ((UCHAR) *n == str) |
368 | goto matched; |
369 | } |
370 | else |
371 | { |
372 | const int32_t *table; |
373 | # if WIDE_CHAR_VERSION |
374 | const int32_t *weights; |
375 | const wint_t *extra; |
376 | # else |
377 | const unsigned char *weights; |
378 | const unsigned char *; |
379 | # endif |
380 | const int32_t *indirect; |
381 | int32_t idx; |
382 | const UCHAR *cp = (const UCHAR *) &str; |
383 | |
384 | # if WIDE_CHAR_VERSION |
385 | table = (const int32_t *) |
386 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
387 | weights = (const int32_t *) |
388 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
389 | extra = (const wint_t *) |
390 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
391 | indirect = (const int32_t *) |
392 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
393 | # else |
394 | table = (const int32_t *) |
395 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
396 | weights = (const unsigned char *) |
397 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
398 | extra = (const unsigned char *) |
399 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
400 | indirect = (const int32_t *) |
401 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
402 | # endif |
403 | |
404 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
405 | if (idx != 0) |
406 | { |
407 | /* We found a table entry. Now see whether the |
408 | character we are currently at has the same |
409 | equivalance class value. */ |
410 | int len = weights[idx & 0xffffff]; |
411 | int32_t idx2; |
412 | const UCHAR *np = (const UCHAR *) n; |
413 | |
414 | idx2 = FINDIDX (table, indirect, extra, |
415 | &np, string_end - n); |
416 | if (idx2 != 0 |
417 | && (idx >> 24) == (idx2 >> 24) |
418 | && len == weights[idx2 & 0xffffff]) |
419 | { |
420 | int cnt = 0; |
421 | |
422 | idx &= 0xffffff; |
423 | idx2 &= 0xffffff; |
424 | |
425 | while (cnt < len |
426 | && (weights[idx + 1 + cnt] |
427 | == weights[idx2 + 1 + cnt])) |
428 | ++cnt; |
429 | |
430 | if (cnt == len) |
431 | goto matched; |
432 | } |
433 | } |
434 | } |
435 | |
436 | c = *p++; |
437 | } |
438 | #endif |
439 | else if (c == L('\0')) |
440 | { |
441 | /* [ unterminated, treat as normal character. */ |
442 | p = p_init; |
443 | n = n_init; |
444 | c = L('['); |
445 | goto normal_match; |
446 | } |
447 | else |
448 | { |
449 | int is_range = 0; |
450 | |
451 | #ifdef _LIBC |
452 | int is_seqval = 0; |
453 | |
454 | if (c == L('[') && *p == L('.')) |
455 | { |
456 | uint32_t nrules = |
457 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
458 | const CHAR *startp = p; |
459 | size_t c1 = 0; |
460 | |
461 | while (1) |
462 | { |
463 | c = *++p; |
464 | if (c == L('.') && p[1] == L(']')) |
465 | { |
466 | p += 2; |
467 | break; |
468 | } |
469 | if (c == '\0') |
470 | return FNM_NOMATCH; |
471 | ++c1; |
472 | } |
473 | |
474 | /* We have to handling the symbols differently in |
475 | ranges since then the collation sequence is |
476 | important. */ |
477 | is_range = *p == L('-') && p[1] != L('\0'); |
478 | |
479 | if (nrules == 0) |
480 | { |
481 | /* There are no names defined in the collation |
482 | data. Therefore we only accept the trivial |
483 | names consisting of the character itself. */ |
484 | if (c1 != 1) |
485 | return FNM_NOMATCH; |
486 | |
487 | if (!is_range && *n == startp[1]) |
488 | goto matched; |
489 | |
490 | cold = startp[1]; |
491 | c = *p++; |
492 | } |
493 | else |
494 | { |
495 | int32_t table_size; |
496 | const int32_t *symb_table; |
497 | # if WIDE_CHAR_VERSION |
498 | char str[c1]; |
499 | unsigned int strcnt; |
500 | # else |
501 | # define str (startp + 1) |
502 | # endif |
503 | const unsigned char *; |
504 | int32_t idx; |
505 | int32_t elem; |
506 | int32_t second; |
507 | int32_t hash; |
508 | |
509 | # if WIDE_CHAR_VERSION |
510 | /* We have to convert the name to a single-byte |
511 | string. This is possible since the names |
512 | consist of ASCII characters and the internal |
513 | representation is UCS4. */ |
514 | for (strcnt = 0; strcnt < c1; ++strcnt) |
515 | str[strcnt] = startp[1 + strcnt]; |
516 | #endif |
517 | |
518 | table_size = |
519 | _NL_CURRENT_WORD (LC_COLLATE, |
520 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
521 | symb_table = (const int32_t *) |
522 | _NL_CURRENT (LC_COLLATE, |
523 | _NL_COLLATE_SYMB_TABLEMB); |
524 | extra = (const unsigned char *) |
525 | _NL_CURRENT (LC_COLLATE, |
526 | _NL_COLLATE_SYMB_EXTRAMB); |
527 | |
528 | /* Locate the character in the hashing table. */ |
529 | hash = elem_hash (str, c1); |
530 | |
531 | idx = 0; |
532 | elem = hash % table_size; |
533 | if (symb_table[2 * elem] != 0) |
534 | { |
535 | second = hash % (table_size - 2) + 1; |
536 | |
537 | do |
538 | { |
539 | /* First compare the hashing value. */ |
540 | if (symb_table[2 * elem] == hash |
541 | && (c1 |
542 | == extra[symb_table[2 * elem + 1]]) |
543 | && memcmp (str, |
544 | &extra[symb_table[2 * elem |
545 | + 1] |
546 | + 1], c1) == 0) |
547 | { |
548 | /* Yep, this is the entry. */ |
549 | idx = symb_table[2 * elem + 1]; |
550 | idx += 1 + extra[idx]; |
551 | break; |
552 | } |
553 | |
554 | /* Next entry. */ |
555 | elem += second; |
556 | } |
557 | while (symb_table[2 * elem] != 0); |
558 | } |
559 | |
560 | if (symb_table[2 * elem] != 0) |
561 | { |
562 | /* Compare the byte sequence but only if |
563 | this is not part of a range. */ |
564 | # if WIDE_CHAR_VERSION |
565 | int32_t *wextra; |
566 | |
567 | idx += 1 + extra[idx]; |
568 | /* Adjust for the alignment. */ |
569 | idx = (idx + 3) & ~3; |
570 | |
571 | wextra = (int32_t *) &extra[idx + 4]; |
572 | # endif |
573 | |
574 | if (! is_range) |
575 | { |
576 | # if WIDE_CHAR_VERSION |
577 | for (c1 = 0; |
578 | (int32_t) c1 < wextra[idx]; |
579 | ++c1) |
580 | if (n[c1] != wextra[1 + c1]) |
581 | break; |
582 | |
583 | if ((int32_t) c1 == wextra[idx]) |
584 | goto matched; |
585 | # else |
586 | for (c1 = 0; c1 < extra[idx]; ++c1) |
587 | if (n[c1] != extra[1 + c1]) |
588 | break; |
589 | |
590 | if (c1 == extra[idx]) |
591 | goto matched; |
592 | # endif |
593 | } |
594 | |
595 | /* Get the collation sequence value. */ |
596 | is_seqval = 1; |
597 | # if WIDE_CHAR_VERSION |
598 | cold = wextra[1 + wextra[idx]]; |
599 | # else |
600 | /* Adjust for the alignment. */ |
601 | idx += 1 + extra[idx]; |
602 | idx = (idx + 3) & ~4; |
603 | cold = *((int32_t *) &extra[idx]); |
604 | # endif |
605 | |
606 | c = *p++; |
607 | } |
608 | else if (c1 == 1) |
609 | { |
610 | /* No valid character. Match it as a |
611 | single byte. */ |
612 | if (!is_range && *n == str[0]) |
613 | goto matched; |
614 | |
615 | cold = str[0]; |
616 | c = *p++; |
617 | } |
618 | else |
619 | return FNM_NOMATCH; |
620 | } |
621 | } |
622 | else |
623 | # undef str |
624 | #endif |
625 | { |
626 | c = FOLD (c); |
627 | normal_bracket: |
628 | |
629 | /* We have to handling the symbols differently in |
630 | ranges since then the collation sequence is |
631 | important. */ |
632 | is_range = (*p == L('-') && p[1] != L('\0') |
633 | && p[1] != L(']')); |
634 | |
635 | if (!is_range && c == fn) |
636 | goto matched; |
637 | |
638 | /* This is needed if we goto normal_bracket; from |
639 | outside of is_seqval's scope. */ |
640 | is_seqval = 0; |
641 | cold = c; |
642 | c = *p++; |
643 | } |
644 | |
645 | if (c == L('-') && *p != L(']')) |
646 | { |
647 | #if _LIBC |
648 | /* We have to find the collation sequence |
649 | value for C. Collation sequence is nothing |
650 | we can regularly access. The sequence |
651 | value is defined by the order in which the |
652 | definitions of the collation values for the |
653 | various characters appear in the source |
654 | file. A strange concept, nowhere |
655 | documented. */ |
656 | uint32_t fcollseq; |
657 | uint32_t lcollseq; |
658 | UCHAR cend = *p++; |
659 | |
660 | # if WIDE_CHAR_VERSION |
661 | /* Search in the `names' array for the characters. */ |
662 | fcollseq = __collseq_table_lookup (collseq, fn); |
663 | if (fcollseq == ~((uint32_t) 0)) |
664 | /* XXX We don't know anything about the character |
665 | we are supposed to match. This means we are |
666 | failing. */ |
667 | goto range_not_matched; |
668 | |
669 | if (is_seqval) |
670 | lcollseq = cold; |
671 | else |
672 | lcollseq = __collseq_table_lookup (collseq, cold); |
673 | # else |
674 | fcollseq = collseq[fn]; |
675 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
676 | # endif |
677 | |
678 | is_seqval = 0; |
679 | if (cend == L('[') && *p == L('.')) |
680 | { |
681 | uint32_t nrules = |
682 | _NL_CURRENT_WORD (LC_COLLATE, |
683 | _NL_COLLATE_NRULES); |
684 | const CHAR *startp = p; |
685 | size_t c1 = 0; |
686 | |
687 | while (1) |
688 | { |
689 | c = *++p; |
690 | if (c == L('.') && p[1] == L(']')) |
691 | { |
692 | p += 2; |
693 | break; |
694 | } |
695 | if (c == '\0') |
696 | return FNM_NOMATCH; |
697 | ++c1; |
698 | } |
699 | |
700 | if (nrules == 0) |
701 | { |
702 | /* There are no names defined in the |
703 | collation data. Therefore we only |
704 | accept the trivial names consisting |
705 | of the character itself. */ |
706 | if (c1 != 1) |
707 | return FNM_NOMATCH; |
708 | |
709 | cend = startp[1]; |
710 | } |
711 | else |
712 | { |
713 | int32_t table_size; |
714 | const int32_t *symb_table; |
715 | # if WIDE_CHAR_VERSION |
716 | char str[c1]; |
717 | unsigned int strcnt; |
718 | # else |
719 | # define str (startp + 1) |
720 | # endif |
721 | const unsigned char *; |
722 | int32_t idx; |
723 | int32_t elem; |
724 | int32_t second; |
725 | int32_t hash; |
726 | |
727 | # if WIDE_CHAR_VERSION |
728 | /* We have to convert the name to a single-byte |
729 | string. This is possible since the names |
730 | consist of ASCII characters and the internal |
731 | representation is UCS4. */ |
732 | for (strcnt = 0; strcnt < c1; ++strcnt) |
733 | str[strcnt] = startp[1 + strcnt]; |
734 | # endif |
735 | |
736 | table_size = |
737 | _NL_CURRENT_WORD (LC_COLLATE, |
738 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
739 | symb_table = (const int32_t *) |
740 | _NL_CURRENT (LC_COLLATE, |
741 | _NL_COLLATE_SYMB_TABLEMB); |
742 | extra = (const unsigned char *) |
743 | _NL_CURRENT (LC_COLLATE, |
744 | _NL_COLLATE_SYMB_EXTRAMB); |
745 | |
746 | /* Locate the character in the hashing |
747 | table. */ |
748 | hash = elem_hash (str, c1); |
749 | |
750 | idx = 0; |
751 | elem = hash % table_size; |
752 | if (symb_table[2 * elem] != 0) |
753 | { |
754 | second = hash % (table_size - 2) + 1; |
755 | |
756 | do |
757 | { |
758 | /* First compare the hashing value. */ |
759 | if (symb_table[2 * elem] == hash |
760 | && (c1 |
761 | == extra[symb_table[2 * elem + 1]]) |
762 | && memcmp (str, |
763 | &extra[symb_table[2 * elem + 1] |
764 | + 1], c1) == 0) |
765 | { |
766 | /* Yep, this is the entry. */ |
767 | idx = symb_table[2 * elem + 1]; |
768 | idx += 1 + extra[idx]; |
769 | break; |
770 | } |
771 | |
772 | /* Next entry. */ |
773 | elem += second; |
774 | } |
775 | while (symb_table[2 * elem] != 0); |
776 | } |
777 | |
778 | if (symb_table[2 * elem] != 0) |
779 | { |
780 | /* Compare the byte sequence but only if |
781 | this is not part of a range. */ |
782 | # if WIDE_CHAR_VERSION |
783 | int32_t *wextra; |
784 | |
785 | idx += 1 + extra[idx]; |
786 | /* Adjust for the alignment. */ |
787 | idx = (idx + 3) & ~4; |
788 | |
789 | wextra = (int32_t *) &extra[idx + 4]; |
790 | # endif |
791 | /* Get the collation sequence value. */ |
792 | is_seqval = 1; |
793 | # if WIDE_CHAR_VERSION |
794 | cend = wextra[1 + wextra[idx]]; |
795 | # else |
796 | /* Adjust for the alignment. */ |
797 | idx += 1 + extra[idx]; |
798 | idx = (idx + 3) & ~4; |
799 | cend = *((int32_t *) &extra[idx]); |
800 | # endif |
801 | } |
802 | else if (symb_table[2 * elem] != 0 && c1 == 1) |
803 | { |
804 | cend = str[0]; |
805 | c = *p++; |
806 | } |
807 | else |
808 | return FNM_NOMATCH; |
809 | } |
810 | # undef str |
811 | } |
812 | else |
813 | { |
814 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
815 | cend = *p++; |
816 | if (cend == L('\0')) |
817 | return FNM_NOMATCH; |
818 | cend = FOLD (cend); |
819 | } |
820 | |
821 | /* XXX It is not entirely clear to me how to handle |
822 | characters which are not mentioned in the |
823 | collation specification. */ |
824 | if ( |
825 | # if WIDE_CHAR_VERSION |
826 | lcollseq == 0xffffffff || |
827 | # endif |
828 | lcollseq <= fcollseq) |
829 | { |
830 | /* We have to look at the upper bound. */ |
831 | uint32_t hcollseq; |
832 | |
833 | if (is_seqval) |
834 | hcollseq = cend; |
835 | else |
836 | { |
837 | # if WIDE_CHAR_VERSION |
838 | hcollseq = |
839 | __collseq_table_lookup (collseq, cend); |
840 | if (hcollseq == ~((uint32_t) 0)) |
841 | { |
842 | /* Hum, no information about the upper |
843 | bound. The matching succeeds if the |
844 | lower bound is matched exactly. */ |
845 | if (lcollseq != fcollseq) |
846 | goto range_not_matched; |
847 | |
848 | goto matched; |
849 | } |
850 | # else |
851 | hcollseq = collseq[cend]; |
852 | # endif |
853 | } |
854 | |
855 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
856 | goto matched; |
857 | } |
858 | # if WIDE_CHAR_VERSION |
859 | range_not_matched: |
860 | # endif |
861 | #else |
862 | /* We use a boring value comparison of the character |
863 | values. This is better than comparing using |
864 | `strcoll' since the latter would have surprising |
865 | and sometimes fatal consequences. */ |
866 | UCHAR cend = *p++; |
867 | |
868 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
869 | cend = *p++; |
870 | if (cend == L('\0')) |
871 | return FNM_NOMATCH; |
872 | |
873 | /* It is a range. */ |
874 | if (cold <= fn && fn <= cend) |
875 | goto matched; |
876 | #endif |
877 | |
878 | c = *p++; |
879 | } |
880 | } |
881 | |
882 | if (c == L(']')) |
883 | break; |
884 | } |
885 | |
886 | if (!not) |
887 | return FNM_NOMATCH; |
888 | break; |
889 | |
890 | matched: |
891 | /* Skip the rest of the [...] that already matched. */ |
892 | while ((c = *p++) != L (']')) |
893 | { |
894 | if (c == L('\0')) |
895 | /* [... (unterminated) loses. */ |
896 | return FNM_NOMATCH; |
897 | |
898 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
899 | { |
900 | if (*p == L('\0')) |
901 | return FNM_NOMATCH; |
902 | /* XXX 1003.2d11 is unclear if this is right. */ |
903 | ++p; |
904 | } |
905 | else if (c == L('[') && *p == L(':')) |
906 | { |
907 | int c1 = 0; |
908 | const CHAR *startp = p; |
909 | |
910 | while (1) |
911 | { |
912 | c = *++p; |
913 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
914 | return FNM_NOMATCH; |
915 | |
916 | if (*p == L(':') && p[1] == L(']')) |
917 | break; |
918 | |
919 | if (c < L('a') || c >= L('z')) |
920 | { |
921 | p = startp - 2; |
922 | break; |
923 | } |
924 | } |
925 | p += 2; |
926 | } |
927 | else if (c == L('[') && *p == L('=')) |
928 | { |
929 | c = *++p; |
930 | if (c == L('\0')) |
931 | return FNM_NOMATCH; |
932 | c = *++p; |
933 | if (c != L('=') || p[1] != L(']')) |
934 | return FNM_NOMATCH; |
935 | p += 2; |
936 | } |
937 | else if (c == L('[') && *p == L('.')) |
938 | { |
939 | while (1) |
940 | { |
941 | c = *++p; |
942 | if (c == L('\0')) |
943 | return FNM_NOMATCH; |
944 | |
945 | if (c == L('.') && p[1] == L(']')) |
946 | break; |
947 | } |
948 | p += 2; |
949 | } |
950 | } |
951 | if (not) |
952 | return FNM_NOMATCH; |
953 | } |
954 | break; |
955 | |
956 | case L('+'): |
957 | case L('@'): |
958 | case L('!'): |
959 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
960 | { |
961 | int res = EXT (c, p, n, string_end, no_leading_period, flags, |
962 | alloca_used); |
963 | if (res != -1) |
964 | return res; |
965 | } |
966 | goto normal_match; |
967 | |
968 | case L('/'): |
969 | if (NO_LEADING_PERIOD (flags)) |
970 | { |
971 | if (n == string_end || c != (UCHAR) *n) |
972 | return FNM_NOMATCH; |
973 | |
974 | new_no_leading_period = 1; |
975 | break; |
976 | } |
977 | /* FALLTHROUGH */ |
978 | default: |
979 | normal_match: |
980 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
981 | return FNM_NOMATCH; |
982 | } |
983 | |
984 | no_leading_period = new_no_leading_period; |
985 | ++n; |
986 | } |
987 | |
988 | if (n == string_end) |
989 | return 0; |
990 | |
991 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) |
992 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
993 | return 0; |
994 | |
995 | return FNM_NOMATCH; |
996 | } |
997 | |
998 | |
999 | static const CHAR * |
1000 | END (const CHAR *pattern) |
1001 | { |
1002 | const CHAR *p = pattern; |
1003 | |
1004 | while (1) |
1005 | if (*++p == L('\0')) |
1006 | /* This is an invalid pattern. */ |
1007 | return pattern; |
1008 | else if (*p == L('[')) |
1009 | { |
1010 | /* Handle brackets special. */ |
1011 | if (posixly_correct == 0) |
1012 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1013 | |
1014 | /* Skip the not sign. We have to recognize it because of a possibly |
1015 | following ']'. */ |
1016 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
1017 | ++p; |
1018 | /* A leading ']' is recognized as such. */ |
1019 | if (*p == L(']')) |
1020 | ++p; |
1021 | /* Skip over all characters of the list. */ |
1022 | while (*p != L(']')) |
1023 | if (*p++ == L('\0')) |
1024 | /* This is no valid pattern. */ |
1025 | return pattern; |
1026 | } |
1027 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
1028 | || *p == L('!')) && p[1] == L('(')) |
1029 | { |
1030 | p = END (p + 1); |
1031 | if (*p == L('\0')) |
1032 | /* This is an invalid pattern. */ |
1033 | return pattern; |
1034 | } |
1035 | else if (*p == L(')')) |
1036 | break; |
1037 | |
1038 | return p + 1; |
1039 | } |
1040 | |
1041 | |
1042 | static int |
1043 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
1044 | int no_leading_period, int flags, size_t alloca_used) |
1045 | { |
1046 | const CHAR *startp; |
1047 | int level; |
1048 | struct patternlist |
1049 | { |
1050 | struct patternlist *next; |
1051 | CHAR malloced; |
1052 | CHAR str[0]; |
1053 | } *list = NULL; |
1054 | struct patternlist **lastp = &list; |
1055 | size_t pattern_len = STRLEN (pattern); |
1056 | int any_malloced = 0; |
1057 | const CHAR *p; |
1058 | const CHAR *rs; |
1059 | int retval = 0; |
1060 | |
1061 | /* Parse the pattern. Store the individual parts in the list. */ |
1062 | level = 0; |
1063 | for (startp = p = pattern + 1; level >= 0; ++p) |
1064 | if (*p == L('\0')) |
1065 | { |
1066 | /* This is an invalid pattern. */ |
1067 | retval = -1; |
1068 | goto out; |
1069 | } |
1070 | else if (*p == L('[')) |
1071 | { |
1072 | /* Handle brackets special. */ |
1073 | if (posixly_correct == 0) |
1074 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1075 | |
1076 | /* Skip the not sign. We have to recognize it because of a possibly |
1077 | following ']'. */ |
1078 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
1079 | ++p; |
1080 | /* A leading ']' is recognized as such. */ |
1081 | if (*p == L(']')) |
1082 | ++p; |
1083 | /* Skip over all characters of the list. */ |
1084 | while (*p != L(']')) |
1085 | if (*p++ == L('\0')) |
1086 | { |
1087 | /* This is no valid pattern. */ |
1088 | retval = -1; |
1089 | goto out; |
1090 | } |
1091 | } |
1092 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
1093 | || *p == L('!')) && p[1] == L('(')) |
1094 | /* Remember the nesting level. */ |
1095 | ++level; |
1096 | else if (*p == L(')')) |
1097 | { |
1098 | if (level-- == 0) |
1099 | { |
1100 | /* This means we found the end of the pattern. */ |
1101 | #define NEW_PATTERN \ |
1102 | struct patternlist *newp; \ |
1103 | size_t slen = (opt == L('?') || opt == L('@') \ |
1104 | ? pattern_len : (p - startp + 1)); \ |
1105 | slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \ |
1106 | int malloced = ! __libc_use_alloca (alloca_used + slen); \ |
1107 | if (__builtin_expect (malloced, 0)) \ |
1108 | { \ |
1109 | newp = malloc (slen); \ |
1110 | if (newp == NULL) \ |
1111 | { \ |
1112 | retval = -2; \ |
1113 | goto out; \ |
1114 | } \ |
1115 | any_malloced = 1; \ |
1116 | } \ |
1117 | else \ |
1118 | newp = alloca_account (slen, alloca_used); \ |
1119 | newp->next = NULL; \ |
1120 | newp->malloced = malloced; \ |
1121 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ |
1122 | *lastp = newp; \ |
1123 | lastp = &newp->next |
1124 | NEW_PATTERN; |
1125 | } |
1126 | } |
1127 | else if (*p == L('|')) |
1128 | { |
1129 | if (level == 0) |
1130 | { |
1131 | NEW_PATTERN; |
1132 | startp = p + 1; |
1133 | } |
1134 | } |
1135 | assert (list != NULL); |
1136 | assert (p[-1] == L(')')); |
1137 | #undef NEW_PATTERN |
1138 | |
1139 | switch (opt) |
1140 | { |
1141 | case L('*'): |
1142 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1143 | alloca_used) == 0) |
1144 | goto success; |
1145 | /* FALLTHROUGH */ |
1146 | |
1147 | case L('+'): |
1148 | do |
1149 | { |
1150 | for (rs = string; rs <= string_end; ++rs) |
1151 | /* First match the prefix with the current pattern with the |
1152 | current pattern. */ |
1153 | if (FCT (list->str, string, rs, no_leading_period, |
1154 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1155 | NULL, alloca_used) == 0 |
1156 | /* This was successful. Now match the rest with the rest |
1157 | of the pattern. */ |
1158 | && (FCT (p, rs, string_end, |
1159 | rs == string |
1160 | ? no_leading_period |
1161 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1162 | flags & FNM_FILE_NAME |
1163 | ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 |
1164 | /* This didn't work. Try the whole pattern. */ |
1165 | || (rs != string |
1166 | && FCT (pattern - 1, rs, string_end, |
1167 | rs == string |
1168 | ? no_leading_period |
1169 | : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) |
1170 | ? 1 : 0), |
1171 | flags & FNM_FILE_NAME |
1172 | ? flags : flags & ~FNM_PERIOD, NULL, |
1173 | alloca_used) == 0))) |
1174 | /* It worked. Signal success. */ |
1175 | goto success; |
1176 | } |
1177 | while ((list = list->next) != NULL); |
1178 | |
1179 | /* None of the patterns lead to a match. */ |
1180 | retval = FNM_NOMATCH; |
1181 | break; |
1182 | |
1183 | case L('?'): |
1184 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1185 | alloca_used) == 0) |
1186 | goto success; |
1187 | /* FALLTHROUGH */ |
1188 | |
1189 | case L('@'): |
1190 | do |
1191 | /* I cannot believe it but `strcat' is actually acceptable |
1192 | here. Match the entire string with the prefix from the |
1193 | pattern list and the rest of the pattern following the |
1194 | pattern list. */ |
1195 | if (FCT (STRCAT (list->str, p), string, string_end, |
1196 | no_leading_period, |
1197 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1198 | NULL, alloca_used) == 0) |
1199 | /* It worked. Signal success. */ |
1200 | goto success; |
1201 | while ((list = list->next) != NULL); |
1202 | |
1203 | /* None of the patterns lead to a match. */ |
1204 | retval = FNM_NOMATCH; |
1205 | break; |
1206 | |
1207 | case L('!'): |
1208 | for (rs = string; rs <= string_end; ++rs) |
1209 | { |
1210 | struct patternlist *runp; |
1211 | |
1212 | for (runp = list; runp != NULL; runp = runp->next) |
1213 | if (FCT (runp->str, string, rs, no_leading_period, |
1214 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1215 | NULL, alloca_used) == 0) |
1216 | break; |
1217 | |
1218 | /* If none of the patterns matched see whether the rest does. */ |
1219 | if (runp == NULL |
1220 | && (FCT (p, rs, string_end, |
1221 | rs == string |
1222 | ? no_leading_period |
1223 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1224 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1225 | NULL, alloca_used) == 0)) |
1226 | /* This is successful. */ |
1227 | goto success; |
1228 | } |
1229 | |
1230 | /* None of the patterns together with the rest of the pattern |
1231 | lead to a match. */ |
1232 | retval = FNM_NOMATCH; |
1233 | break; |
1234 | |
1235 | default: |
1236 | assert (! "Invalid extended matching operator" ); |
1237 | retval = -1; |
1238 | break; |
1239 | } |
1240 | |
1241 | success: |
1242 | out: |
1243 | if (any_malloced) |
1244 | while (list != NULL) |
1245 | { |
1246 | struct patternlist *old = list; |
1247 | list = list->next; |
1248 | if (old->malloced) |
1249 | free (old); |
1250 | } |
1251 | |
1252 | return retval; |
1253 | } |
1254 | |
1255 | |
1256 | #undef FOLD |
1257 | #undef CHAR |
1258 | #undef UCHAR |
1259 | #undef INT |
1260 | #undef FCT |
1261 | #undef EXT |
1262 | #undef END |
1263 | #undef STRUCT |
1264 | #undef MEMPCPY |
1265 | #undef MEMCHR |
1266 | #undef STRCOLL |
1267 | #undef STRLEN |
1268 | #undef STRCAT |
1269 | #undef L |
1270 | #undef BTOWC |
1271 | #undef WIDE_CHAR_VERSION |
1272 | #undef FINDIDX |
1273 | |