fnmatch_loop.c source code [glibc/posix/fnmatch_loop.c]

1	/ Copyright (C) 1991-2019 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library; if not, see
16	<http://www.gnu.org/licenses/>. /*
17
18	#include <stdint.h>
19
20	struct STRUCT
21	{
22	const CHAR *pattern;
23	const CHAR *string;
24	int no_leading_period;
25	};
26
27	/ Match STRING against the filename pattern PATTERN, returning zero if*
28	it matches, nonzero if not. /*
29	static int FCT (const CHAR pattern, const* CHAR *string,
30	const CHAR string_end, int* no_leading_period, int flags,
31	struct STRUCT *ends, size_t alloca_used);
32	static int EXT (INT opt, const CHAR pattern, const* CHAR *string,
33	const CHAR string_end, int* no_leading_period, int flags,
34	size_t alloca_used);
35	static const CHAR END (const* CHAR *patternp);
36
37	static int
38	FCT (const CHAR pattern, const* CHAR string, const* CHAR *string_end,
39	int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
40	{
41	const CHAR p = pattern, n = string;
42	UCHAR c;
43	#ifdef _LIBC
44	# if WIDE_CHAR_VERSION
45	const char collseq = (const* char *)
46	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
47	# else
48	const UCHAR collseq = (const* UCHAR *)
49	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
50	# endif
51	#endif
52
53	while ((c = *p++) != L(`'\0'`))
54	{
55	int new_no_leading_period = `0`;
56	c = FOLD (c);
57
58	switch (c)
59	{
60	case L(`'?'`):
61	if (__builtin_expect (flags & FNM_EXTMATCH, `0`) && *p == `'('`)
62	{
63	int res = EXT (c, p, n, string_end, no_leading_period,
64	flags, alloca_used);
65	if (res != -`1`)
66	return res;
67	}
68
69	if (n == string_end)
70	return FNM_NOMATCH;
71	else if (*n == L(`'/'`) && (flags & FNM_FILE_NAME))
72	return FNM_NOMATCH;
73	else if (*n == L(`'.'`) && no_leading_period)
74	return FNM_NOMATCH;
75	break;
76
77	case L(`'\\'`):
78	if (!(flags & FNM_NOESCAPE))
79	{
80	c = *p++;
81	if (c == L(`'\0'`))
82	/ Trailing \ loses. /
83	return FNM_NOMATCH;
84	c = FOLD (c);
85	}
86	if (n == string_end \|\| FOLD ((UCHAR) *n) != c)
87	return FNM_NOMATCH;
88	break;
89
90	case L(`'*'`):
91	if (__builtin_expect (flags & FNM_EXTMATCH, `0`) && *p == `'('`)
92	{
93	int res = EXT (c, p, n, string_end, no_leading_period,
94	flags, alloca_used);
95	if (res != -`1`)
96	return res;
97	}
98	else if (ends != NULL)
99	{
100	ends->pattern = p - `1`;
101	ends->string = n;
102	ends->no_leading_period = no_leading_period;
103	return `0`;
104	}
105
106	if (n != string_end && *n == L(`'.'`) && no_leading_period)
107	return FNM_NOMATCH;
108
109	for (c = p++; c == L(`'?'`) \|\| c == L(`''`); c = *p++)
110	{
111	if (*p == L(`'('`) && (flags & FNM_EXTMATCH) != `0`)
112	{
113	const CHAR *endp = END (p);
114	if (endp != p)
115	{
116	/ This is a pattern. Skip over it. /
117	p = endp;
118	continue;
119	}
120	}
121
122	if (c == L(`'?'`))
123	{
124	/ A ? needs to match one character. /
125	if (n == string_end)
126	/ There isn't another character; no match. /
127	return FNM_NOMATCH;
128	else if (*n == L(`'/'`)
129	&& __builtin_expect (flags & FNM_FILE_NAME, `0`))
130	/ A slash does not match a wildcard under*
131	FNM_FILE_NAME. /*
132	return FNM_NOMATCH;
133	else
134	/ One character of the string is consumed in matching*
135	this ? wildcard, so ??? won't match if there are*
136	less than three characters. /*
137	++n;
138	}
139	}
140
141	if (c == L(`'\0'`))
142	/ The wildcard(s) is/are the last element of the pattern.*
143	If the name is a file name and contains another slash
144	this means it cannot match, unless the FNM_LEADING_DIR
145	flag is set. /*
146	{
147	int result = (flags & FNM_FILE_NAME) == `0` ? `0` : FNM_NOMATCH;
148
149	if (flags & FNM_FILE_NAME)
150	{
151	if (flags & FNM_LEADING_DIR)
152	result = `0`;
153	else
154	{
155	if (MEMCHR (n, L(`'/'`), string_end - n) == NULL)
156	result = `0`;
157	}
158	}
159
160	return result;
161	}
162	else
163	{
164	const CHAR *endp;
165	struct STRUCT end;
166
167	end.pattern = NULL;
168	endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L(`'/'`) : L(`'\0'`),
169	string_end - n);
170	if (endp == NULL)
171	endp = string_end;
172
173	if (c == L(`'['`)
174	\|\| (__builtin_expect (flags & FNM_EXTMATCH, `0`) != `0`
175	&& (c == L(`'@'`) \|\| c == L(`'+'`) \|\| c == L(`'!'`))
176	&& *p == L(`'('`)))
177	{
178	int flags2 = ((flags & FNM_FILE_NAME)
179	? flags : (flags & ~FNM_PERIOD));
180
181	for (--p; n < endp; ++n, no_leading_period = `0`)
182	if (FCT (p, n, string_end, no_leading_period, flags2,
183	&end, alloca_used) == `0`)
184	goto found;
185	}
186	else if (c == L(`'/'`) && (flags & FNM_FILE_NAME))
187	{
188	while (n < string_end && *n != L(`'/'`))
189	++n;
190	if (n < string_end && *n == L(`'/'`)
191	&& (FCT (p, n + `1`, string_end, flags & FNM_PERIOD, flags,
192	NULL, alloca_used) == `0`))
193	return `0`;
194	}
195	else
196	{
197	int flags2 = ((flags & FNM_FILE_NAME)
198	? flags : (flags & ~FNM_PERIOD));
199
200	if (c == L(`'\\'`) && !(flags & FNM_NOESCAPE))
201	c = *p;
202	c = FOLD (c);
203	for (--p; n < endp; ++n, no_leading_period = `0`)
204	if (FOLD ((UCHAR) *n) == c
205	&& (FCT (p, n, string_end, no_leading_period, flags2,
206	&end, alloca_used) == `0`))
207	{
208	found:
209	if (end.pattern == NULL)
210	return `0`;
211	break;
212	}
213	if (end.pattern != NULL)
214	{
215	p = end.pattern;
216	n = end.string;
217	no_leading_period = end.no_leading_period;
218	continue;
219	}
220	}
221	}
222
223	/ If we come here no match is possible with the wildcard. /
224	return FNM_NOMATCH;
225
226	case L(`'['`):
227	{
228	/ Nonzero if the sense of the character class is inverted. /
229	const CHAR *p_init = p;
230	const CHAR *n_init = n;
231	int not;
232	CHAR cold;
233	UCHAR fn;
234
235	if (posixly_correct == `0`)
236	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
237
238	if (n == string_end)
239	return FNM_NOMATCH;
240
241	if (*n == L(`'.'`) && no_leading_period)
242	return FNM_NOMATCH;
243
244	if (*n == L(`'/'`) && (flags & FNM_FILE_NAME))
245	/ `/' cannot be matched. /
246	return FNM_NOMATCH;
247
248	not = (p == L(`'!'`) \|\| (posixly_correct < `0` && p == L(`'^'`)));
249	if (not)
250	++p;
251
252	fn = FOLD ((UCHAR) *n);
253
254	c = *p++;
255	for (;;)
256	{
257	if (!(flags & FNM_NOESCAPE) && c == L(`'\\'`))
258	{
259	if (*p == L(`'\0'`))
260	return FNM_NOMATCH;
261	c = FOLD ((UCHAR) *p);
262	++p;
263
264	goto normal_bracket;
265	}
266	else if (c == L(`'['`) && *p == L(`':'`))
267	{
268	/ Leave room for the null. /
269	CHAR str[CHAR_CLASS_MAX_LENGTH + `1`];
270	size_t c1 = `0`;
271	#if defined _LIBC \|\| (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
272	wctype_t wt;
273	#endif
274	const CHAR *startp = p;
275
276	for (;;)
277	{
278	if (c1 == CHAR_CLASS_MAX_LENGTH)
279	/ The name is too long and therefore the pattern*
280	is ill-formed. /*
281	return FNM_NOMATCH;
282
283	c = *++p;
284	if (c == L(`':'`) && p[`1`] == L(`']'`))
285	{
286	p += `2`;
287	break;
288	}
289	if (c < L(`'a'`) \|\| c >= L(`'z'`))
290	{
291	/ This cannot possibly be a character class name.*
292	Match it as a normal range. /*
293	p = startp;
294	c = L(`'['`);
295	goto normal_bracket;
296	}
297	str[c1++] = c;
298	}
299	str[c1] = L(`'\0'`);
300
301	#if defined _LIBC \|\| (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
302	wt = IS_CHAR_CLASS (str);
303	if (wt == `0`)
304	/ Invalid character class name. /
305	return FNM_NOMATCH;
306
307	# if defined _LIBC && ! WIDE_CHAR_VERSION
308	/ The following code is glibc specific but does*
309	there a good job in speeding up the code since
310	we can avoid the btowc() call. /*
311	if (_ISCTYPE ((UCHAR) *n, wt))
312	goto matched;
313	# else
314	if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
315	goto matched;
316	# endif
317	#else
318	if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
319	\|\| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
320	\|\| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
321	\|\| (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
322	\|\| (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
323	\|\| (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
324	\|\| (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
325	\|\| (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
326	\|\| (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
327	\|\| (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
328	\|\| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
329	\|\| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
330	goto matched;
331	#endif
332	c = *p++;
333	}
334	#ifdef _LIBC
335	else if (c == L(`'['`) && *p == L(`'='`))
336	{
337	/ It's important that STR be a scalar variable rather*
338	than a one-element array, because GCC (at least 4.9.2
339	-O2 on x86-64) can be confused by the array and
340	diagnose a "used initialized" in a dead branch in the
341	findidx function. /*
342	UCHAR str;
343	uint32_t nrules =
344	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
345	const CHAR *startp = p;
346
347	c = *++p;
348	if (c == L(`'\0'`))
349	{
350	p = startp;
351	c = L(`'['`);
352	goto normal_bracket;
353	}
354	str = c;
355
356	c = *++p;
357	if (c != L(`'='`) \|\| p[`1`] != L(`']'`))
358	{
359	p = startp;
360	c = L(`'['`);
361	goto normal_bracket;
362	}
363	p += `2`;
364
365	if (nrules == `0`)
366	{
367	if ((UCHAR) *n == str)
368	goto matched;
369	}
370	else
371	{
372	const int32_t *table;
373	# if WIDE_CHAR_VERSION
374	const int32_t *weights;
375	const wint_t *extra;
376	# else
377	const unsigned char *weights;
378	const unsigned char *extra;
379	# endif
380	const int32_t *indirect;
381	int32_t idx;
382	const UCHAR cp = (const* UCHAR *) &str;
383
384	# if WIDE_CHAR_VERSION
385	table = (const int32_t *)
386	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
387	weights = (const int32_t *)
388	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
389	extra = (const wint_t *)
390	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
391	indirect = (const int32_t *)
392	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
393	# else
394	table = (const int32_t *)
395	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
396	weights = (const unsigned char *)
397	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
398	extra = (const unsigned char *)
399	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
400	indirect = (const int32_t *)
401	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
402	# endif
403
404	idx = FINDIDX (table, indirect, extra, &cp, `1`);
405	if (idx != `0`)
406	{
407	/ We found a table entry. Now see whether the*
408	character we are currently at has the same
409	equivalance class value. /*
410	int len = weights[idx & `0xffffff`];
411	int32_t idx2;
412	const UCHAR np = (const* UCHAR *) n;
413
414	idx2 = FINDIDX (table, indirect, extra,
415	&np, string_end - n);
416	if (idx2 != `0`
417	&& (idx >> `24`) == (idx2 >> `24`)
418	&& len == weights[idx2 & `0xffffff`])
419	{
420	int cnt = `0`;
421
422	idx &= `0xffffff`;
423	idx2 &= `0xffffff`;
424
425	while (cnt < len
426	&& (weights[idx + `1` + cnt]
427	== weights[idx2 + `1` + cnt]))
428	++cnt;
429
430	if (cnt == len)
431	goto matched;
432	}
433	}
434	}
435
436	c = *p++;
437	}
438	#endif
439	else if (c == L(`'\0'`))
440	{
441	/ [ unterminated, treat as normal character. /
442	p = p_init;
443	n = n_init;
444	c = L(`'['`);
445	goto normal_match;
446	}
447	else
448	{
449	int is_range = `0`;
450
451	#ifdef _LIBC
452	int is_seqval = `0`;
453
454	if (c == L(`'['`) && *p == L(`'.'`))
455	{
456	uint32_t nrules =
457	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
458	const CHAR *startp = p;
459	size_t c1 = `0`;
460
461	while (`1`)
462	{
463	c = *++p;
464	if (c == L(`'.'`) && p[`1`] == L(`']'`))
465	{
466	p += `2`;
467	break;
468	}
469	if (c == `'\0'`)
470	return FNM_NOMATCH;
471	++c1;
472	}
473
474	/ We have to handling the symbols differently in*
475	ranges since then the collation sequence is
476	important. /*
477	is_range = *p == L(`'-'`) && p[`1`] != L(`'\0'`);
478
479	if (nrules == `0`)
480	{
481	/ There are no names defined in the collation*
482	data. Therefore we only accept the trivial
483	names consisting of the character itself. /*
484	if (c1 != `1`)
485	return FNM_NOMATCH;
486
487	if (!is_range && *n == startp[`1`])
488	goto matched;
489
490	cold = startp[`1`];
491	c = *p++;
492	}
493	else
494	{
495	int32_t table_size;
496	const int32_t *symb_table;
497	# if WIDE_CHAR_VERSION
498	char str[c1];
499	unsigned int strcnt;
500	# else
501	# define str (startp + 1)
502	# endif
503	const unsigned char *extra;
504	int32_t idx;
505	int32_t elem;
506	int32_t second;
507	int32_t hash;
508
509	# if WIDE_CHAR_VERSION
510	/ We have to convert the name to a single-byte*
511	string. This is possible since the names
512	consist of ASCII characters and the internal
513	representation is UCS4. /*
514	for (strcnt = `0`; strcnt < c1; ++strcnt)
515	str[strcnt] = startp[`1` + strcnt];
516	#endif
517
518	table_size =
519	_NL_CURRENT_WORD (LC_COLLATE,
520	_NL_COLLATE_SYMB_HASH_SIZEMB);
521	symb_table = (const int32_t *)
522	_NL_CURRENT (LC_COLLATE,
523	_NL_COLLATE_SYMB_TABLEMB);
524	extra = (const unsigned char *)
525	_NL_CURRENT (LC_COLLATE,
526	_NL_COLLATE_SYMB_EXTRAMB);
527
528	/ Locate the character in the hashing table. /
529	hash = elem_hash (str, c1);
530
531	idx = `0`;
532	elem = hash % table_size;
533	if (symb_table[`2` * elem] != `0`)
534	{
535	second = hash % (table_size - `2`) + `1`;
536
537	do
538	{
539	/ First compare the hashing value. /
540	if (symb_table[`2` * elem] == hash
541	&& (c1
542	== extra[symb_table[`2` * elem + `1`]])
543	&& memcmp (str,
544	&extra[symb_table[`2` * elem
545	+ `1`]
546	+ `1`], c1) == `0`)
547	{
548	/ Yep, this is the entry. /
549	idx = symb_table[`2` * elem + `1`];
550	idx += `1` + extra[idx];
551	break;
552	}
553
554	/ Next entry. /
555	elem += second;
556	}
557	while (symb_table[`2` * elem] != `0`);
558	}
559
560	if (symb_table[`2` * elem] != `0`)
561	{
562	/ Compare the byte sequence but only if*
563	this is not part of a range. /*
564	# if WIDE_CHAR_VERSION
565	int32_t *wextra;
566
567	idx += `1` + extra[idx];
568	/ Adjust for the alignment. /
569	idx = (idx + `3`) & ~`3`;
570
571	wextra = (int32_t *) &extra[idx + `4`];
572	# endif
573
574	if (! is_range)
575	{
576	# if WIDE_CHAR_VERSION
577	for (c1 = `0`;
578	(int32_t) c1 < wextra[idx];
579	++c1)
580	if (n[c1] != wextra[`1` + c1])
581	break;
582
583	if ((int32_t) c1 == wextra[idx])
584	goto matched;
585	# else
586	for (c1 = `0`; c1 < extra[idx]; ++c1)
587	if (n[c1] != extra[`1` + c1])
588	break;
589
590	if (c1 == extra[idx])
591	goto matched;
592	# endif
593	}
594
595	/ Get the collation sequence value. /
596	is_seqval = `1`;
597	# if WIDE_CHAR_VERSION
598	cold = wextra[`1` + wextra[idx]];
599	# else
600	/ Adjust for the alignment. /
601	idx += `1` + extra[idx];
602	idx = (idx + `3`) & ~`4`;
603	cold = ((int32_t ) &extra[idx]);
604	# endif
605
606	c = *p++;
607	}
608	else if (c1 == `1`)
609	{
610	/ No valid character. Match it as a*
611	single byte. /*
612	if (!is_range && *n == str[`0`])
613	goto matched;
614
615	cold = str[`0`];
616	c = *p++;
617	}
618	else
619	return FNM_NOMATCH;
620	}
621	}
622	else
623	# undef str
624	#endif
625	{
626	c = FOLD (c);
627	normal_bracket:
628
629	/ We have to handling the symbols differently in*
630	ranges since then the collation sequence is
631	important. /*
632	is_range = (*p == L(`'-'`) && p[`1`] != L(`'\0'`)
633	&& p[`1`] != L(`']'`));
634
635	if (!is_range && c == fn)
636	goto matched;
637
638	/ This is needed if we goto normal_bracket; from*
639	outside of is_seqval's scope. /*
640	is_seqval = `0`;
641	cold = c;
642	c = *p++;
643	}
644
645	if (c == L(`'-'`) && *p != L(`']'`))
646	{
647	#if _LIBC
648	/ We have to find the collation sequence*
649	value for C. Collation sequence is nothing
650	we can regularly access. The sequence
651	value is defined by the order in which the
652	definitions of the collation values for the
653	various characters appear in the source
654	file. A strange concept, nowhere
655	documented. /*
656	uint32_t fcollseq;
657	uint32_t lcollseq;
658	UCHAR cend = *p++;
659
660	# if WIDE_CHAR_VERSION
661	/ Search in the `names' array for the characters. /
662	fcollseq = __collseq_table_lookup (collseq, fn);
663	if (fcollseq == ~((uint32_t) `0`))
664	/ XXX We don't know anything about the character*
665	we are supposed to match. This means we are
666	failing. /*
667	goto range_not_matched;
668
669	if (is_seqval)
670	lcollseq = cold;
671	else
672	lcollseq = __collseq_table_lookup (collseq, cold);
673	# else
674	fcollseq = collseq[fn];
675	lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
676	# endif
677
678	is_seqval = `0`;
679	if (cend == L(`'['`) && *p == L(`'.'`))
680	{
681	uint32_t nrules =
682	_NL_CURRENT_WORD (LC_COLLATE,
683	_NL_COLLATE_NRULES);
684	const CHAR *startp = p;
685	size_t c1 = `0`;
686
687	while (`1`)
688	{
689	c = *++p;
690	if (c == L(`'.'`) && p[`1`] == L(`']'`))
691	{
692	p += `2`;
693	break;
694	}
695	if (c == `'\0'`)
696	return FNM_NOMATCH;
697	++c1;
698	}
699
700	if (nrules == `0`)
701	{
702	/ There are no names defined in the*
703	collation data. Therefore we only
704	accept the trivial names consisting
705	of the character itself. /*
706	if (c1 != `1`)
707	return FNM_NOMATCH;
708
709	cend = startp[`1`];
710	}
711	else
712	{
713	int32_t table_size;
714	const int32_t *symb_table;
715	# if WIDE_CHAR_VERSION
716	char str[c1];
717	unsigned int strcnt;
718	# else
719	# define str (startp + 1)
720	# endif
721	const unsigned char *extra;
722	int32_t idx;
723	int32_t elem;
724	int32_t second;
725	int32_t hash;
726
727	# if WIDE_CHAR_VERSION
728	/ We have to convert the name to a single-byte*
729	string. This is possible since the names
730	consist of ASCII characters and the internal
731	representation is UCS4. /*
732	for (strcnt = `0`; strcnt < c1; ++strcnt)
733	str[strcnt] = startp[`1` + strcnt];
734	# endif
735
736	table_size =
737	_NL_CURRENT_WORD (LC_COLLATE,
738	_NL_COLLATE_SYMB_HASH_SIZEMB);
739	symb_table = (const int32_t *)
740	_NL_CURRENT (LC_COLLATE,
741	_NL_COLLATE_SYMB_TABLEMB);
742	extra = (const unsigned char *)
743	_NL_CURRENT (LC_COLLATE,
744	_NL_COLLATE_SYMB_EXTRAMB);
745
746	/ Locate the character in the hashing*
747	table. /*
748	hash = elem_hash (str, c1);
749
750	idx = `0`;
751	elem = hash % table_size;
752	if (symb_table[`2` * elem] != `0`)
753	{
754	second = hash % (table_size - `2`) + `1`;
755
756	do
757	{
758	/ First compare the hashing value. /
759	if (symb_table[`2` * elem] == hash
760	&& (c1
761	== extra[symb_table[`2` * elem + `1`]])
762	&& memcmp (str,
763	&extra[symb_table[`2` * elem + `1`]
764	+ `1`], c1) == `0`)
765	{
766	/ Yep, this is the entry. /
767	idx = symb_table[`2` * elem + `1`];
768	idx += `1` + extra[idx];
769	break;
770	}
771
772	/ Next entry. /
773	elem += second;
774	}
775	while (symb_table[`2` * elem] != `0`);
776	}
777
778	if (symb_table[`2` * elem] != `0`)
779	{
780	/ Compare the byte sequence but only if*
781	this is not part of a range. /*
782	# if WIDE_CHAR_VERSION
783	int32_t *wextra;
784
785	idx += `1` + extra[idx];
786	/ Adjust for the alignment. /
787	idx = (idx + `3`) & ~`4`;
788
789	wextra = (int32_t *) &extra[idx + `4`];
790	# endif
791	/ Get the collation sequence value. /
792	is_seqval = `1`;
793	# if WIDE_CHAR_VERSION
794	cend = wextra[`1` + wextra[idx]];
795	# else
796	/ Adjust for the alignment. /
797	idx += `1` + extra[idx];
798	idx = (idx + `3`) & ~`4`;
799	cend = ((int32_t ) &extra[idx]);
800	# endif
801	}
802	else if (symb_table[`2` * elem] != `0` && c1 == `1`)
803	{
804	cend = str[`0`];
805	c = *p++;
806	}
807	else
808	return FNM_NOMATCH;
809	}
810	# undef str
811	}
812	else
813	{
814	if (!(flags & FNM_NOESCAPE) && cend == L(`'\\'`))
815	cend = *p++;
816	if (cend == L(`'\0'`))
817	return FNM_NOMATCH;
818	cend = FOLD (cend);
819	}
820
821	/ XXX It is not entirely clear to me how to handle*
822	characters which are not mentioned in the
823	collation specification. /*
824	if (
825	# if WIDE_CHAR_VERSION
826	lcollseq == `0xffffffff` \|\|
827	# endif
828	lcollseq <= fcollseq)
829	{
830	/ We have to look at the upper bound. /
831	uint32_t hcollseq;
832
833	if (is_seqval)
834	hcollseq = cend;
835	else
836	{
837	# if WIDE_CHAR_VERSION
838	hcollseq =
839	__collseq_table_lookup (collseq, cend);
840	if (hcollseq == ~((uint32_t) `0`))
841	{
842	/ Hum, no information about the upper*
843	bound. The matching succeeds if the
844	lower bound is matched exactly. /*
845	if (lcollseq != fcollseq)
846	goto range_not_matched;
847
848	goto matched;
849	}
850	# else
851	hcollseq = collseq[cend];
852	# endif
853	}
854
855	if (lcollseq <= hcollseq && fcollseq <= hcollseq)
856	goto matched;
857	}
858	# if WIDE_CHAR_VERSION
859	range_not_matched:
860	# endif
861	#else
862	/ We use a boring value comparison of the character*
863	values. This is better than comparing using
864	`strcoll' since the latter would have surprising
865	and sometimes fatal consequences. /*
866	UCHAR cend = *p++;
867
868	if (!(flags & FNM_NOESCAPE) && cend == L(`'\\'`))
869	cend = *p++;
870	if (cend == L(`'\0'`))
871	return FNM_NOMATCH;
872
873	/ It is a range. /
874	if (cold <= fn && fn <= cend)
875	goto matched;
876	#endif
877
878	c = *p++;
879	}
880	}
881
882	if (c == L(`']'`))
883	break;
884	}
885
886	if (!not)
887	return FNM_NOMATCH;
888	break;
889
890	matched:
891	/ Skip the rest of the [...] that already matched. /
892	while ((c = *p++) != L (`']'`))
893	{
894	if (c == L(`'\0'`))
895	/ [... (unterminated) loses. /
896	return FNM_NOMATCH;
897
898	if (!(flags & FNM_NOESCAPE) && c == L(`'\\'`))
899	{
900	if (*p == L(`'\0'`))
901	return FNM_NOMATCH;
902	/ XXX 1003.2d11 is unclear if this is right. /
903	++p;
904	}
905	else if (c == L(`'['`) && *p == L(`':'`))
906	{
907	int c1 = `0`;
908	const CHAR *startp = p;
909
910	while (`1`)
911	{
912	c = *++p;
913	if (++c1 == CHAR_CLASS_MAX_LENGTH)
914	return FNM_NOMATCH;
915
916	if (*p == L(`':'`) && p[`1`] == L(`']'`))
917	break;
918
919	if (c < L(`'a'`) \|\| c >= L(`'z'`))
920	{
921	p = startp - `2`;
922	break;
923	}
924	}
925	p += `2`;
926	}
927	else if (c == L(`'['`) && *p == L(`'='`))
928	{
929	c = *++p;
930	if (c == L(`'\0'`))
931	return FNM_NOMATCH;
932	c = *++p;
933	if (c != L(`'='`) \|\| p[`1`] != L(`']'`))
934	return FNM_NOMATCH;
935	p += `2`;
936	}
937	else if (c == L(`'['`) && *p == L(`'.'`))
938	{
939	while (`1`)
940	{
941	c = *++p;
942	if (c == L(`'\0'`))
943	return FNM_NOMATCH;
944
945	if (c == L(`'.'`) && p[`1`] == L(`']'`))
946	break;
947	}
948	p += `2`;
949	}
950	}
951	if (not)
952	return FNM_NOMATCH;
953	}
954	break;
955
956	case L(`'+'`):
957	case L(`'@'`):
958	case L(`'!'`):
959	if (__builtin_expect (flags & FNM_EXTMATCH, `0`) && *p == `'('`)
960	{
961	int res = EXT (c, p, n, string_end, no_leading_period, flags,
962	alloca_used);
963	if (res != -`1`)
964	return res;
965	}
966	goto normal_match;
967
968	case L(`'/'`):
969	if (NO_LEADING_PERIOD (flags))
970	{
971	if (n == string_end \|\| c != (UCHAR) *n)
972	return FNM_NOMATCH;
973
974	new_no_leading_period = `1`;
975	break;
976	}
977	/ FALLTHROUGH /
978	default:
979	normal_match:
980	if (n == string_end \|\| c != FOLD ((UCHAR) *n))
981	return FNM_NOMATCH;
982	}
983
984	no_leading_period = new_no_leading_period;
985	++n;
986	}
987
988	if (n == string_end)
989	return `0`;
990
991	if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L(`'/'`))
992	/ The FNM_LEADING_DIR flag says that "foo" matches "foobar/frobozz". /*
993	return `0`;
994
995	return FNM_NOMATCH;
996	}
997
998
999	static const CHAR *
1000	END (const CHAR *pattern)
1001	{
1002	const CHAR *p = pattern;
1003
1004	while (`1`)
1005	if (*++p == L(`'\0'`))
1006	/ This is an invalid pattern. /
1007	return pattern;
1008	else if (*p == L(`'['`))
1009	{
1010	/ Handle brackets special. /
1011	if (posixly_correct == `0`)
1012	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
1013
1014	/ Skip the not sign. We have to recognize it because of a possibly*
1015	following ']'. /*
1016	if (++p == L(`'!'`) \|\| (posixly_correct < `0` && p == L(`'^'`)))
1017	++p;
1018	/ A leading ']' is recognized as such. /
1019	if (*p == L(`']'`))
1020	++p;
1021	/ Skip over all characters of the list. /
1022	while (*p != L(`']'`))
1023	if (*p++ == L(`'\0'`))
1024	/ This is no valid pattern. /
1025	return pattern;
1026	}
1027	else if ((p == L(`'?'`) \|\| p == L(`''`) \|\| p == L(`'+'`) \|\| *p == L(`'@'`)
1028	\|\| *p == L(`'!'`)) && p[`1`] == L(`'('`))
1029	{
1030	p = END (p + `1`);
1031	if (*p == L(`'\0'`))
1032	/ This is an invalid pattern. /
1033	return pattern;
1034	}
1035	else if (*p == L(`')'`))
1036	break;
1037
1038	return p + `1`;
1039	}
1040
1041
1042	static int
1043	EXT (INT opt, const CHAR pattern, const* CHAR string, const* CHAR *string_end,
1044	int no_leading_period, int flags, size_t alloca_used)
1045	{
1046	const CHAR *startp;
1047	int level;
1048	struct patternlist
1049	{
1050	struct patternlist *next;
1051	CHAR malloced;
1052	CHAR str[`0`];
1053	} *list = NULL;
1054	struct patternlist **lastp = &list;
1055	size_t pattern_len = STRLEN (pattern);
1056	int any_malloced = `0`;
1057	const CHAR *p;
1058	const CHAR *rs;
1059	int retval = `0`;
1060
1061	/ Parse the pattern. Store the individual parts in the list. /
1062	level = `0`;
1063	for (startp = p = pattern + `1`; level >= `0`; ++p)
1064	if (*p == L(`'\0'`))
1065	{
1066	/ This is an invalid pattern. /
1067	retval = -`1`;
1068	goto out;
1069	}
1070	else if (*p == L(`'['`))
1071	{
1072	/ Handle brackets special. /
1073	if (posixly_correct == `0`)
1074	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
1075
1076	/ Skip the not sign. We have to recognize it because of a possibly*
1077	following ']'. /*
1078	if (++p == L(`'!'`) \|\| (posixly_correct < `0` && p == L(`'^'`)))
1079	++p;
1080	/ A leading ']' is recognized as such. /
1081	if (*p == L(`']'`))
1082	++p;
1083	/ Skip over all characters of the list. /
1084	while (*p != L(`']'`))
1085	if (*p++ == L(`'\0'`))
1086	{
1087	/ This is no valid pattern. /
1088	retval = -`1`;
1089	goto out;
1090	}
1091	}
1092	else if ((p == L(`'?'`) \|\| p == L(`''`) \|\| p == L(`'+'`) \|\| *p == L(`'@'`)
1093	\|\| *p == L(`'!'`)) && p[`1`] == L(`'('`))
1094	/ Remember the nesting level. /
1095	++level;
1096	else if (*p == L(`')'`))
1097	{
1098	if (level-- == `0`)
1099	{
1100	/ This means we found the end of the pattern. /
1101	#define NEW_PATTERN \
1102	struct patternlist *newp; \
1103	size_t slen = (opt == L('?') \|\| opt == L('@') \
1104	? pattern_len : (p - startp + 1)); \
1105	slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1106	int malloced = ! __libc_use_alloca (alloca_used + slen); \
1107	if (__builtin_expect (malloced, 0)) \
1108	{ \
1109	newp = malloc (slen); \
1110	if (newp == NULL) \
1111	{ \
1112	retval = -2; \
1113	goto out; \
1114	} \
1115	any_malloced = 1; \
1116	} \
1117	else \
1118	newp = alloca_account (slen, alloca_used); \
1119	newp->next = NULL; \
1120	newp->malloced = malloced; \
1121	((CHAR ) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1122	*lastp = newp; \
1123	lastp = &newp->next
1124	NEW_PATTERN;
1125	}
1126	}
1127	else if (*p == L(`'\|'`))
1128	{
1129	if (level == `0`)
1130	{
1131	NEW_PATTERN;
1132	startp = p + `1`;
1133	}
1134	}
1135	assert (list != NULL);
1136	assert (p[-`1`] == L(`')'`));
1137	#undef NEW_PATTERN
1138
1139	switch (opt)
1140	{
1141	case L(`'*'`):
1142	if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1143	alloca_used) == `0`)
1144	goto success;
1145	/ FALLTHROUGH /
1146
1147	case L(`'+'`):
1148	do
1149	{
1150	for (rs = string; rs <= string_end; ++rs)
1151	/ First match the prefix with the current pattern with the*
1152	current pattern. /*
1153	if (FCT (list->str, string, rs, no_leading_period,
1154	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1155	NULL, alloca_used) == `0`
1156	/ This was successful. Now match the rest with the rest*
1157	of the pattern. /*
1158	&& (FCT (p, rs, string_end,
1159	rs == string
1160	? no_leading_period
1161	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags) ? `1` : `0`,
1162	flags & FNM_FILE_NAME
1163	? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == `0`
1164	/ This didn't work. Try the whole pattern. /
1165	\|\| (rs != string
1166	&& FCT (pattern - `1`, rs, string_end,
1167	rs == string
1168	? no_leading_period
1169	: (rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags)
1170	? `1` : `0`),
1171	flags & FNM_FILE_NAME
1172	? flags : flags & ~FNM_PERIOD, NULL,
1173	alloca_used) == `0`)))
1174	/ It worked. Signal success. /
1175	goto success;
1176	}
1177	while ((list = list->next) != NULL);
1178
1179	/ None of the patterns lead to a match. /
1180	retval = FNM_NOMATCH;
1181	break;
1182
1183	case L(`'?'`):
1184	if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1185	alloca_used) == `0`)
1186	goto success;
1187	/ FALLTHROUGH /
1188
1189	case L(`'@'`):
1190	do
1191	/ I cannot believe it but `strcat' is actually acceptable*
1192	here. Match the entire string with the prefix from the
1193	pattern list and the rest of the pattern following the
1194	pattern list. /*
1195	if (FCT (STRCAT (list->str, p), string, string_end,
1196	no_leading_period,
1197	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1198	NULL, alloca_used) == `0`)
1199	/ It worked. Signal success. /
1200	goto success;
1201	while ((list = list->next) != NULL);
1202
1203	/ None of the patterns lead to a match. /
1204	retval = FNM_NOMATCH;
1205	break;
1206
1207	case L(`'!'`):
1208	for (rs = string; rs <= string_end; ++rs)
1209	{
1210	struct patternlist *runp;
1211
1212	for (runp = list; runp != NULL; runp = runp->next)
1213	if (FCT (runp->str, string, rs, no_leading_period,
1214	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1215	NULL, alloca_used) == `0`)
1216	break;
1217
1218	/ If none of the patterns matched see whether the rest does. /
1219	if (runp == NULL
1220	&& (FCT (p, rs, string_end,
1221	rs == string
1222	? no_leading_period
1223	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags) ? `1` : `0`,
1224	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1225	NULL, alloca_used) == `0`))
1226	/ This is successful. /
1227	goto success;
1228	}
1229
1230	/ None of the patterns together with the rest of the pattern*
1231	lead to a match. /*
1232	retval = FNM_NOMATCH;
1233	break;
1234
1235	default:
1236	assert (! "Invalid extended matching operator");
1237	retval = -`1`;
1238	break;
1239	}
1240
1241	success:
1242	out:
1243	if (any_malloced)
1244	while (list != NULL)
1245	{
1246	struct patternlist *old = list;
1247	list = list->next;
1248	if (old->malloced)
1249	free (old);
1250	}
1251
1252	return retval;
1253	}
1254
1255
1256	#undef FOLD
1257	#undef CHAR
1258	#undef UCHAR
1259	#undef INT
1260	#undef FCT
1261	#undef EXT
1262	#undef END
1263	#undef STRUCT
1264	#undef MEMPCPY
1265	#undef MEMCHR
1266	#undef STRCOLL
1267	#undef STRLEN
1268	#undef STRCAT
1269	#undef L
1270	#undef BTOWC
1271	#undef WIDE_CHAR_VERSION
1272	#undef FINDIDX
1273

Browse the source code of glibc/posix/fnmatch_loop.c