fnmatch_loop.c source code [glibc/posix/fnmatch_loop.c]

1	/ Copyright (C) 1991-2023 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library; if not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#ifdef _LIBC
19	# include <stdint.h>
20	#endif
21
22	struct STRUCT
23	{
24	const CHAR *pattern;
25	const CHAR *string;
26	bool no_leading_period;
27	};
28
29	/ Match STRING against the file name pattern PATTERN, returning zero if*
30	it matches, nonzero if not. /*
31	static int FCT (const CHAR pattern, const* CHAR *string,
32	const CHAR string_end, bool no_leading_period, int* flags,
33	struct STRUCT *ends);
34	static int EXT (INT opt, const CHAR pattern, const* CHAR *string,
35	const CHAR string_end, bool no_leading_period, int* flags);
36	static const CHAR END (const* CHAR *patternp);
37
38	static int
39	FCT (const CHAR pattern, const* CHAR string, const* CHAR *string_end,
40	bool no_leading_period, int flags, struct STRUCT *ends)
41	{
42	const CHAR p = pattern, n = string;
43	UCHAR c;
44	#ifdef _LIBC
45	# if WIDE_CHAR_VERSION
46	const char collseq = (const* char *)
47	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48	# else
49	const UCHAR collseq = (const* UCHAR *)
50	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51	# endif
52	#endif
53
54	while ((c = *p++) != L_(`'\0'`))
55	{
56	bool new_no_leading_period = false;
57	c = FOLD (c);
58
59	switch (c)
60	{
61	case L_(`'?'`):
62	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
63	{
64	int res = EXT (c, p, n, string_end, no_leading_period, flags);
65	if (res != -`1`)
66	return res;
67	}
68
69	if (n == string_end)
70	return FNM_NOMATCH;
71	else if (*n == L_(`'/'`) && (flags & FNM_FILE_NAME))
72	return FNM_NOMATCH;
73	else if (*n == L_(`'.'`) && no_leading_period)
74	return FNM_NOMATCH;
75	break;
76
77	case L_(`'\\'`):
78	if (!(flags & FNM_NOESCAPE))
79	{
80	c = *p++;
81	if (c == L_(`'\0'`))
82	/ Trailing \ loses. /
83	return FNM_NOMATCH;
84	c = FOLD (c);
85	}
86	if (n == string_end \|\| FOLD ((UCHAR) *n) != c)
87	return FNM_NOMATCH;
88	break;
89
90	case L_(`'*'`):
91	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
92	{
93	int res = EXT (c, p, n, string_end, no_leading_period, flags);
94	if (res != -`1`)
95	return res;
96	}
97	else if (ends != NULL)
98	{
99	ends->pattern = p - `1`;
100	ends->string = n;
101	ends->no_leading_period = no_leading_period;
102	return `0`;
103	}
104
105	if (n != string_end && *n == L_(`'.'`) && no_leading_period)
106	return FNM_NOMATCH;
107
108	for (c = p++; c == L_(`'?'`) \|\| c == L_(`''`); c = *p++)
109	{
110	if (*p == L_(`'('`) && (flags & FNM_EXTMATCH) != `0`)
111	{
112	const CHAR *endp = END (p);
113	if (endp != p)
114	{
115	/ This is a pattern. Skip over it. /
116	p = endp;
117	continue;
118	}
119	}
120
121	if (c == L_(`'?'`))
122	{
123	/ A ? needs to match one character. /
124	if (n == string_end)
125	/ There isn't another character; no match. /
126	return FNM_NOMATCH;
127	else if (*n == L_(`'/'`)
128	&& __glibc_unlikely (flags & FNM_FILE_NAME))
129	/ A slash does not match a wildcard under*
130	FNM_FILE_NAME. /*
131	return FNM_NOMATCH;
132	else
133	/ One character of the string is consumed in matching*
134	this ? wildcard, so ??? won't match if there are*
135	less than three characters. /*
136	++n;
137	}
138	}
139
140	if (c == L_(`'\0'`))
141	/ The wildcard(s) is/are the last element of the pattern.*
142	If the name is a file name and contains another slash
143	this means it cannot match, unless the FNM_LEADING_DIR
144	flag is set. /*
145	{
146	int result = (flags & FNM_FILE_NAME) == `0` ? `0` : FNM_NOMATCH;
147
148	if (flags & FNM_FILE_NAME)
149	{
150	if (flags & FNM_LEADING_DIR)
151	result = `0`;
152	else
153	{
154	if (MEMCHR (n, L_(`'/'`), string_end - n) == NULL)
155	result = `0`;
156	}
157	}
158
159	return result;
160	}
161	else
162	{
163	const CHAR *endp;
164	struct STRUCT end;
165
166	end.pattern = NULL;
167	endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_(`'/'`) : L_(`'\0'`),
168	string_end - n);
169	if (endp == NULL)
170	endp = string_end;
171
172	if (c == L_(`'['`)
173	\|\| (__glibc_unlikely (flags & FNM_EXTMATCH)
174	&& (c == L_(`'@'`) \|\| c == L_(`'+'`) \|\| c == L_(`'!'`))
175	&& *p == L_(`'('`)))
176	{
177	int flags2 = ((flags & FNM_FILE_NAME)
178	? flags : (flags & ~FNM_PERIOD));
179
180	for (--p; n < endp; ++n, no_leading_period = false)
181	if (FCT (p, n, string_end, no_leading_period, flags2,
182	&end) == `0`)
183	goto found;
184	}
185	else if (c == L_(`'/'`) && (flags & FNM_FILE_NAME))
186	{
187	while (n < string_end && *n != L_(`'/'`))
188	++n;
189	if (n < string_end && *n == L_(`'/'`)
190	&& (FCT (p, n + `1`, string_end, flags & FNM_PERIOD, flags,
191	NULL) == `0`))
192	return `0`;
193	}
194	else
195	{
196	int flags2 = ((flags & FNM_FILE_NAME)
197	? flags : (flags & ~FNM_PERIOD));
198
199	if (c == L_(`'\\'`) && !(flags & FNM_NOESCAPE))
200	c = *p;
201	c = FOLD (c);
202	for (--p; n < endp; ++n, no_leading_period = false)
203	if (FOLD ((UCHAR) *n) == c
204	&& (FCT (p, n, string_end, no_leading_period, flags2,
205	&end) == `0`))
206	{
207	found:
208	if (end.pattern == NULL)
209	return `0`;
210	break;
211	}
212	if (end.pattern != NULL)
213	{
214	p = end.pattern;
215	n = end.string;
216	no_leading_period = end.no_leading_period;
217	continue;
218	}
219	}
220	}
221
222	/ If we come here no match is possible with the wildcard. /
223	return FNM_NOMATCH;
224
225	case L_(`'['`):
226	{
227	/ Nonzero if the sense of the character class is inverted. /
228	const CHAR *p_init = p;
229	const CHAR *n_init = n;
230	bool not;
231	CHAR cold;
232	UCHAR fn;
233
234	if (posixly_correct == `0`)
235	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
236
237	if (n == string_end)
238	return FNM_NOMATCH;
239
240	if (*n == L_(`'.'`) && no_leading_period)
241	return FNM_NOMATCH;
242
243	if (*n == L_(`'/'`) && (flags & FNM_FILE_NAME))
244	/ '/' cannot be matched. /
245	return FNM_NOMATCH;
246
247	not = (p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)));
248	if (not)
249	++p;
250
251	fn = FOLD ((UCHAR) *n);
252
253	c = *p++;
254	for (;;)
255	{
256	if (!(flags & FNM_NOESCAPE) && c == L_(`'\\'`))
257	{
258	if (*p == L_(`'\0'`))
259	return FNM_NOMATCH;
260	c = FOLD ((UCHAR) *p);
261	++p;
262
263	goto normal_bracket;
264	}
265	else if (c == L_(`'['`) && *p == L_(`':'`))
266	{
267	/ Leave room for the null. /
268	CHAR str[CHAR_CLASS_MAX_LENGTH + `1`];
269	size_t c1 = `0`;
270	wctype_t wt;
271	const CHAR *startp = p;
272
273	for (;;)
274	{
275	if (c1 == CHAR_CLASS_MAX_LENGTH)
276	/ The name is too long and therefore the pattern*
277	is ill-formed. /*
278	return FNM_NOMATCH;
279
280	c = *++p;
281	if (c == L_(`':'`) && p[`1`] == L_(`']'`))
282	{
283	p += `2`;
284	break;
285	}
286	if (c < L_(`'a'`) \|\| c >= L_(`'z'`))
287	{
288	/ This cannot possibly be a character class name.*
289	Match it as a normal range. /*
290	p = startp;
291	c = L_(`'['`);
292	goto normal_bracket;
293	}
294	str[c1++] = c;
295	}
296	str[c1] = L_(`'\0'`);
297
298	wt = IS_CHAR_CLASS (str);
299	if (wt == `0`)
300	/ Invalid character class name. /
301	return FNM_NOMATCH;
302
303	#if defined _LIBC && ! WIDE_CHAR_VERSION
304	/ The following code is glibc specific but does*
305	there a good job in speeding up the code since
306	we can avoid the btowc() call. /*
307	if (_ISCTYPE ((UCHAR) *n, wt))
308	goto matched;
309	#else
310	if (iswctype (BTOWC ((UCHAR) *n), wt))
311	goto matched;
312	#endif
313	c = *p++;
314	}
315	#ifdef _LIBC
316	else if (c == L_(`'['`) && *p == L_(`'='`))
317	{
318	/ It's important that STR be a scalar variable rather*
319	than a one-element array, because GCC (at least 4.9.2
320	-O2 on x86-64) can be confused by the array and
321	diagnose a "used initialized" in a dead branch in the
322	findidx function. /*
323	UCHAR str;
324	uint32_t nrules =
325	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326	const CHAR *startp = p;
327
328	c = *++p;
329	if (c == L_(`'\0'`))
330	{
331	p = startp;
332	c = L_(`'['`);
333	goto normal_bracket;
334	}
335	str = c;
336
337	c = *++p;
338	if (c != L_(`'='`) \|\| p[`1`] != L_(`']'`))
339	{
340	p = startp;
341	c = L_(`'['`);
342	goto normal_bracket;
343	}
344	p += `2`;
345
346	if (nrules == `0`)
347	{
348	if ((UCHAR) *n == str)
349	goto matched;
350	}
351	else
352	{
353	const int32_t *table;
354	# if WIDE_CHAR_VERSION
355	const int32_t *weights;
356	const wint_t *extra;
357	# else
358	const unsigned char *weights;
359	const unsigned char *extra;
360	# endif
361	const int32_t *indirect;
362	int32_t idx;
363	const UCHAR cp = (const* UCHAR *) &str;
364
365	# if WIDE_CHAR_VERSION
366	table = (const int32_t *)
367	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368	weights = (const int32_t *)
369	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370	extra = (const wint_t *)
371	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372	indirect = (const int32_t *)
373	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
374	# else
375	table = (const int32_t *)
376	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377	weights = (const unsigned char *)
378	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379	extra = (const unsigned char *)
380	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381	indirect = (const int32_t *)
382	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383	# endif
384
385	idx = FINDIDX (table, indirect, extra, &cp, `1`);
386	if (idx != `0`)
387	{
388	/ We found a table entry. Now see whether the*
389	character we are currently at has the same
390	equivalence class value. /*
391	int len = weights[idx & `0xffffff`];
392	int32_t idx2;
393	const UCHAR np = (const* UCHAR *) n;
394
395	idx2 = FINDIDX (table, indirect, extra,
396	&np, string_end - n);
397	if (idx2 != `0`
398	&& (idx >> `24`) == (idx2 >> `24`)
399	&& len == weights[idx2 & `0xffffff`])
400	{
401	int cnt = `0`;
402
403	idx &= `0xffffff`;
404	idx2 &= `0xffffff`;
405
406	while (cnt < len
407	&& (weights[idx + `1` + cnt]
408	== weights[idx2 + `1` + cnt]))
409	++cnt;
410
411	if (cnt == len)
412	goto matched;
413	}
414	}
415	}
416
417	c = *p++;
418	}
419	#endif
420	else if (c == L_(`'\0'`))
421	{
422	/ [ unterminated, treat as normal character. /
423	p = p_init;
424	n = n_init;
425	c = L_(`'['`);
426	goto normal_match;
427	}
428	else
429	{
430	bool is_range = false;
431
432	#ifdef _LIBC
433	bool is_seqval = false;
434
435	if (c == L_(`'['`) && *p == L_(`'.'`))
436	{
437	uint32_t nrules =
438	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439	const CHAR *startp = p;
440	size_t c1 = `0`;
441
442	while (`1`)
443	{
444	c = *++p;
445	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
446	{
447	p += `2`;
448	break;
449	}
450	if (c == `'\0'`)
451	return FNM_NOMATCH;
452	++c1;
453	}
454
455	/ We have to handling the symbols differently in*
456	ranges since then the collation sequence is
457	important. /*
458	is_range = *p == L_(`'-'`) && p[`1`] != L_(`'\0'`);
459
460	if (nrules == `0`)
461	{
462	/ There are no names defined in the collation*
463	data. Therefore we only accept the trivial
464	names consisting of the character itself. /*
465	if (c1 != `1`)
466	return FNM_NOMATCH;
467
468	if (!is_range && *n == startp[`1`])
469	goto matched;
470
471	cold = startp[`1`];
472	c = *p++;
473	}
474	else
475	{
476	int32_t table_size;
477	const int32_t *symb_table;
478	const unsigned char *extra;
479	int32_t idx;
480	int32_t elem;
481	# if WIDE_CHAR_VERSION
482	CHAR *wextra;
483	# endif
484
485	table_size =
486	_NL_CURRENT_WORD (LC_COLLATE,
487	_NL_COLLATE_SYMB_HASH_SIZEMB);
488	symb_table = (const int32_t *)
489	_NL_CURRENT (LC_COLLATE,
490	_NL_COLLATE_SYMB_TABLEMB);
491	extra = (const unsigned char *)
492	_NL_CURRENT (LC_COLLATE,
493	_NL_COLLATE_SYMB_EXTRAMB);
494
495	for (elem = `0`; elem < table_size; elem++)
496	if (symb_table[`2` * elem] != `0`)
497	{
498	idx = symb_table[`2` * elem + `1`];
499	/ Skip the name of collating element. /
500	idx += `1` + extra[idx];
501	# if WIDE_CHAR_VERSION
502	/ Skip the byte sequence of the*
503	collating element. /*
504	idx += `1` + extra[idx];
505	/ Adjust for the alignment. /
506	idx = (idx + `3`) & ~`3`;
507
508	wextra = (CHAR *) &extra[idx + `4`];
509
510	if (/ Compare the length of the sequence. /
511	c1 == wextra[`0`]
512	/ Compare the wide char sequence. /
513	&& (__wmemcmp (startp + `1`, &wextra[`1`],
514	c1)
515	== `0`))
516	/ Yep, this is the entry. /
517	break;
518	# else
519	if (/ Compare the length of the sequence. /
520	c1 == extra[idx]
521	/ Compare the byte sequence. /
522	&& memcmp (startp + `1`,
523	&extra[idx + `1`], c1) == `0`)
524	/ Yep, this is the entry. /
525	break;
526	# endif
527	}
528
529	if (elem < table_size)
530	{
531	/ Compare the byte sequence but only if*
532	this is not part of a range. /*
533
534	/ The compiler might warn that idx may be*
535	used uninitialized, however it will be
536	reached iff elem < table_size which means
537	that it was properly set in the loop
538	above. /*
539	DIAG_PUSH_NEEDS_COMMENT;
540	DIAG_IGNORE_Os_NEEDS_COMMENT (`8`, "-Wmaybe-uninitialized");
541	if (! is_range
542
543	# if WIDE_CHAR_VERSION
544	&& __wmemcmp (n, &wextra[`1`], c1) == `0`
545	# else
546	&& memcmp (n, &extra[idx + `1`], c1) == `0`
547	# endif
548	)
549	{
550	n += c1 - `1`;
551	goto matched;
552	}
553	DIAG_POP_NEEDS_COMMENT;
554
555	/ Get the collation sequence value. /
556	is_seqval = true;
557	# if WIDE_CHAR_VERSION
558	/ The compile might warn that wextra may be*
559	used uninitialized and similar to 'idx'
560	above it will be properly set by the loop.
561	*/
562	DIAG_PUSH_NEEDS_COMMENT;
563	DIAG_IGNORE_Os_NEEDS_COMMENT (`8`, "-Wmaybe-uninitialized");
564	cold = wextra[`1` + wextra[`0`]];
565	DIAG_POP_NEEDS_COMMENT;
566	# else
567	idx += `1` + extra[idx];
568	/ Adjust for the alignment. /
569	idx = (idx + `3`) & ~`3`;
570	cold = ((int32_t ) &extra[idx]);
571	# endif
572
573	c = *p++;
574	}
575	else if (c1 == `1`)
576	{
577	/ No valid character. Match it as a*
578	single byte. /*
579	if (!is_range && *n == startp[`1`])
580	goto matched;
581
582	cold = startp[`1`];
583	c = *p++;
584	}
585	else
586	return FNM_NOMATCH;
587	}
588	}
589	else
590	#endif
591	{
592	c = FOLD (c);
593	normal_bracket:
594
595	/ We have to handling the symbols differently in*
596	ranges since then the collation sequence is
597	important. /*
598	is_range = (*p == L_(`'-'`) && p[`1`] != L_(`'\0'`)
599	&& p[`1`] != L_(`']'`));
600
601	if (!is_range && c == fn)
602	goto matched;
603
604	#if _LIBC
605	/ This is needed if we goto normal_bracket; from*
606	outside of is_seqval's scope. /*
607	is_seqval = false;
608	#endif
609	cold = c;
610	c = *p++;
611	}
612
613	if (c == L_(`'-'`) && *p != L_(`']'`))
614	{
615	#if _LIBC
616	/ We have to find the collation sequence*
617	value for C. Collation sequence is nothing
618	we can regularly access. The sequence
619	value is defined by the order in which the
620	definitions of the collation values for the
621	various characters appear in the source
622	file. A strange concept, nowhere
623	documented. /*
624	uint32_t fcollseq;
625	uint32_t lcollseq;
626	UCHAR cend = *p++;
627
628	# if WIDE_CHAR_VERSION
629	/ Search in the 'names' array for the characters. /
630	fcollseq = __collseq_table_lookup (collseq, fn);
631	if (fcollseq == ~((uint32_t) `0`))
632	/ XXX We don't know anything about the character*
633	we are supposed to match. This means we are
634	failing. /*
635	goto range_not_matched;
636
637	if (is_seqval)
638	lcollseq = cold;
639	else
640	lcollseq = __collseq_table_lookup (collseq, cold);
641	# else
642	fcollseq = collseq[fn];
643	lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
644	# endif
645
646	is_seqval = false;
647	if (cend == L_(`'['`) && *p == L_(`'.'`))
648	{
649	uint32_t nrules =
650	_NL_CURRENT_WORD (LC_COLLATE,
651	_NL_COLLATE_NRULES);
652	const CHAR *startp = p;
653	size_t c1 = `0`;
654
655	while (`1`)
656	{
657	c = *++p;
658	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
659	{
660	p += `2`;
661	break;
662	}
663	if (c == `'\0'`)
664	return FNM_NOMATCH;
665	++c1;
666	}
667
668	if (nrules == `0`)
669	{
670	/ There are no names defined in the*
671	collation data. Therefore we only
672	accept the trivial names consisting
673	of the character itself. /*
674	if (c1 != `1`)
675	return FNM_NOMATCH;
676
677	cend = startp[`1`];
678	}
679	else
680	{
681	int32_t table_size;
682	const int32_t *symb_table;
683	const unsigned char *extra;
684	int32_t idx;
685	int32_t elem;
686	# if WIDE_CHAR_VERSION
687	CHAR *wextra;
688	# endif
689
690	table_size =
691	_NL_CURRENT_WORD (LC_COLLATE,
692	_NL_COLLATE_SYMB_HASH_SIZEMB);
693	symb_table = (const int32_t *)
694	_NL_CURRENT (LC_COLLATE,
695	_NL_COLLATE_SYMB_TABLEMB);
696	extra = (const unsigned char *)
697	_NL_CURRENT (LC_COLLATE,
698	_NL_COLLATE_SYMB_EXTRAMB);
699
700	for (elem = `0`; elem < table_size; elem++)
701	if (symb_table[`2` * elem] != `0`)
702	{
703	idx = symb_table[`2` * elem + `1`];
704	/ Skip the name of collating*
705	element. /*
706	idx += `1` + extra[idx];
707	# if WIDE_CHAR_VERSION
708	/ Skip the byte sequence of the*
709	collating element. /*
710	idx += `1` + extra[idx];
711	/ Adjust for the alignment. /
712	idx = (idx + `3`) & ~`3`;
713
714	wextra = (CHAR *) &extra[idx + `4`];
715
716	if (/ Compare the length of the*
717	sequence. /*
718	c1 == wextra[`0`]
719	/ Compare the wide char sequence. /
720	&& (__wmemcmp (startp + `1`,
721	&wextra[`1`], c1)
722	== `0`))
723	/ Yep, this is the entry. /
724	break;
725	# else
726	if (/ Compare the length of the*
727	sequence. /*
728	c1 == extra[idx]
729	/ Compare the byte sequence. /
730	&& memcmp (startp + `1`,
731	&extra[idx + `1`], c1) == `0`)
732	/ Yep, this is the entry. /
733	break;
734	# endif
735	}
736
737	if (elem < table_size)
738	{
739	/ Get the collation sequence value. /
740	is_seqval = true;
741	# if WIDE_CHAR_VERSION
742	/ The compiler might warn that wextra may*
743	be used uninitialized, however it will
744	be reached iff elem < table_size which
745	means that it was properly set in the
746	loop above. /*
747	DIAG_PUSH_NEEDS_COMMENT;
748	DIAG_IGNORE_Os_NEEDS_COMMENT (`8`, "-Wmaybe-uninitialized");
749	cend = wextra[`1` + wextra[`0`]];
750	DIAG_POP_NEEDS_COMMENT;
751	# else
752	/ The compile might warn that idx may*
753	be used uninitialized and similar to
754	wextra above it will be properly set by
755	the loop. /*
756	DIAG_PUSH_NEEDS_COMMENT;
757	DIAG_IGNORE_Os_NEEDS_COMMENT (`8`, "-Wmaybe-uninitialized");
758	idx += `1` + extra[idx];
759	DIAG_POP_NEEDS_COMMENT;
760	/ Adjust for the alignment. /
761	idx = (idx + `3`) & ~`3`;
762	cend = ((int32_t ) &extra[idx]);
763	# endif
764	}
765	else if (c1 == `1`)
766	{
767	cend = startp[`1`];
768	c = *p++;
769	}
770	else
771	return FNM_NOMATCH;
772	}
773	}
774	else
775	{
776	if (!(flags & FNM_NOESCAPE) && cend == L_(`'\\'`))
777	cend = *p++;
778	if (cend == L_(`'\0'`))
779	return FNM_NOMATCH;
780	cend = FOLD (cend);
781	}
782
783	/ XXX It is not entirely clear to me how to handle*
784	characters which are not mentioned in the
785	collation specification. /*
786	if (
787	# if WIDE_CHAR_VERSION
788	lcollseq == `0xffffffff` \|\|
789	# endif
790	lcollseq <= fcollseq)
791	{
792	/ We have to look at the upper bound. /
793	uint32_t hcollseq;
794
795	if (is_seqval)
796	hcollseq = cend;
797	else
798	{
799	# if WIDE_CHAR_VERSION
800	hcollseq =
801	__collseq_table_lookup (collseq, cend);
802	if (hcollseq == ~((uint32_t) `0`))
803	{
804	/ Hum, no information about the upper*
805	bound. The matching succeeds if the
806	lower bound is matched exactly. /*
807	if (lcollseq != fcollseq)
808	goto range_not_matched;
809
810	goto matched;
811	}
812	# else
813	hcollseq = collseq[cend];
814	# endif
815	}
816
817	if (lcollseq <= hcollseq && fcollseq <= hcollseq)
818	goto matched;
819	}
820	# if WIDE_CHAR_VERSION
821	range_not_matched:
822	# endif
823	#else
824	/ We use a boring value comparison of the character*
825	values. This is better than comparing using
826	'strcoll' since the latter would have surprising
827	and sometimes fatal consequences. /*
828	UCHAR cend = *p++;
829
830	if (!(flags & FNM_NOESCAPE) && cend == L_(`'\\'`))
831	cend = *p++;
832	if (cend == L_(`'\0'`))
833	return FNM_NOMATCH;
834
835	/ It is a range. /
836	if ((UCHAR) cold <= fn && fn <= cend)
837	goto matched;
838	#endif
839
840	c = *p++;
841	}
842	}
843
844	if (c == L_(`']'`))
845	break;
846	}
847
848	if (!not)
849	return FNM_NOMATCH;
850	break;
851
852	matched:
853	/ Skip the rest of the [...] that already matched. /
854	while ((c = *p++) != L_(`']'`))
855	{
856	if (c == L_(`'\0'`))
857	{
858	/ [ unterminated, treat as normal character. /
859	p = p_init;
860	n = n_init;
861	c = L_(`'['`);
862	goto normal_match;
863	}
864
865	if (!(flags & FNM_NOESCAPE) && c == L_(`'\\'`))
866	{
867	if (*p == L_(`'\0'`))
868	return FNM_NOMATCH;
869	/ XXX 1003.2d11 is unclear if this is right. /
870	++p;
871	}
872	else if (c == L_(`'['`) && *p == L_(`':'`))
873	{
874	int c1 = `0`;
875	const CHAR *startp = p;
876
877	while (`1`)
878	{
879	c = *++p;
880	if (++c1 == CHAR_CLASS_MAX_LENGTH)
881	return FNM_NOMATCH;
882
883	if (*p == L_(`':'`) && p[`1`] == L_(`']'`))
884	break;
885
886	if (c < L_(`'a'`) \|\| c >= L_(`'z'`))
887	{
888	p = startp - `2`;
889	break;
890	}
891	}
892	p += `2`;
893	}
894	else if (c == L_(`'['`) && *p == L_(`'='`))
895	{
896	c = *++p;
897	if (c == L_(`'\0'`))
898	return FNM_NOMATCH;
899	c = *++p;
900	if (c != L_(`'='`) \|\| p[`1`] != L_(`']'`))
901	return FNM_NOMATCH;
902	p += `2`;
903	}
904	else if (c == L_(`'['`) && *p == L_(`'.'`))
905	{
906	while (`1`)
907	{
908	c = *++p;
909	if (c == L_(`'\0'`))
910	return FNM_NOMATCH;
911
912	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
913	break;
914	}
915	p += `2`;
916	}
917	}
918	if (not)
919	return FNM_NOMATCH;
920	}
921	break;
922
923	case L_(`'+'`):
924	case L_(`'@'`):
925	case L_(`'!'`):
926	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
927	{
928	int res = EXT (c, p, n, string_end, no_leading_period, flags);
929	if (res != -`1`)
930	return res;
931	}
932	goto normal_match;
933
934	case L_(`'/'`):
935	if (NO_LEADING_PERIOD (flags))
936	{
937	if (n == string_end \|\| c != (UCHAR) *n)
938	return FNM_NOMATCH;
939
940	new_no_leading_period = true;
941	break;
942	}
943	FALLTHROUGH;
944	default:
945	normal_match:
946	if (n == string_end \|\| c != FOLD ((UCHAR) *n))
947	return FNM_NOMATCH;
948	}
949
950	no_leading_period = new_no_leading_period;
951	++n;
952	}
953
954	if (n == string_end)
955	return `0`;
956
957	if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_(`'/'`))
958	/ The FNM_LEADING_DIR flag says that "foo" matches "foobar/frobozz". /*
959	return `0`;
960
961	return FNM_NOMATCH;
962	}
963
964
965	static const CHAR *
966	END (const CHAR *pattern)
967	{
968	const CHAR *p = pattern;
969
970	while (`1`)
971	if (*++p == L_(`'\0'`))
972	/ This is an invalid pattern. /
973	return pattern;
974	else if (*p == L_(`'['`))
975	{
976	/ Handle brackets special. /
977	if (posixly_correct == `0`)
978	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
979
980	/ Skip the not sign. We have to recognize it because of a possibly*
981	following ']'. /*
982	if (++p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)))
983	++p;
984	/ A leading ']' is recognized as such. /
985	if (*p == L_(`']'`))
986	++p;
987	/ Skip over all characters of the list. /
988	while (*p != L_(`']'`))
989	if (*p++ == L_(`'\0'`))
990	/ This is no valid pattern. /
991	return pattern;
992	}
993	else if ((p == L_(`'?'`) \|\| p == L_(`''`) \|\| p == L_(`'+'`) \|\| *p == L_(`'@'`)
994	\|\| *p == L_(`'!'`)) && p[`1`] == L_(`'('`))
995	{
996	p = END (p + `1`);
997	if (*p == L_(`'\0'`))
998	/ This is an invalid pattern. /
999	return pattern;
1000	}
1001	else if (*p == L_(`')'`))
1002	break;
1003
1004	return p + `1`;
1005	}
1006
1007	#if WIDE_CHAR_VERSION
1008	# define PATTERN_PREFIX pattern_list
1009	#else
1010	# define PATTERN_PREFIX wpattern_list
1011	#endif
1012
1013	#define PASTE(a,b) PASTE1(a,b)
1014	#define PASTE1(a,b) a##b
1015
1016	#define DYNARRAY_STRUCT PATTERN_PREFIX
1017	#define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
1018	#define DYNARRAY_ELEMENT CHAR *
1019	#define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_)
1020	#define DYNARRAY_INITIAL_SIZE 8
1021	#include <malloc/dynarray-skeleton.c>
1022
1023	static int
1024	EXT (INT opt, const CHAR pattern, const* CHAR string, const* CHAR *string_end,
1025	bool no_leading_period, int flags)
1026	{
1027	const CHAR *startp;
1028	ptrdiff_t level;
1029	struct PATTERN_PREFIX list;
1030	size_t pattern_len = STRLEN (pattern);
1031	size_t pattern_i = `0`;
1032	const CHAR *p;
1033	const CHAR *rs;
1034	int retval = `0`;
1035
1036	PASTE (PATTERN_PREFIX, _init) (&list);
1037
1038	/ Parse the pattern. Store the individual parts in the list. /
1039	level = `0`;
1040	for (startp = p = pattern + `1`; level >= `0`; ++p)
1041	if (*p == L_(`'\0'`))
1042	{
1043	/ This is an invalid pattern. /
1044	retval = -`1`;
1045	goto out;
1046	}
1047	else if (*p == L_(`'['`))
1048	{
1049	/ Handle brackets special. /
1050	if (posixly_correct == `0`)
1051	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
1052
1053	/ Skip the not sign. We have to recognize it because of a possibly*
1054	following ']'. /*
1055	if (++p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)))
1056	++p;
1057	/ A leading ']' is recognized as such. /
1058	if (*p == L_(`']'`))
1059	++p;
1060	/ Skip over all characters of the list. /
1061	while (*p != L_(`']'`))
1062	if (*p++ == L_(`'\0'`))
1063	{
1064	/ This is no valid pattern. /
1065	retval = -`1`;
1066	goto out;
1067	}
1068	}
1069	else if ((p == L_(`'?'`) \|\| p == L_(`''`) \|\| p == L_(`'+'`) \|\| *p == L_(`'@'`)
1070	\|\| *p == L_(`'!'`)) && p[`1`] == L_(`'('`))
1071	/ Remember the nesting level. /
1072	++level;
1073	else if (p == L_(`')'`) \|\| p == L_(`'\|'`))
1074	{
1075	if (level == `0`)
1076	{
1077	size_t slen = opt == L_(`'?'`) \|\| opt == L_(`'@'`)
1078	? pattern_len : p - startp + `1`;
1079	CHAR newp = malloc (slen sizeof (CHAR));
1080	if (newp != NULL)
1081	{
1082	((CHAR ) MEMPCPY (newp, startp, p - startp)) = L_(`'\0'`);
1083	PASTE (PATTERN_PREFIX,_add) (&list, newp);
1084	}
1085	if (newp == NULL \|\| PASTE (PATTERN_PREFIX, _has_failed) (&list))
1086	{
1087	retval = -`2`;
1088	goto out;
1089	}
1090
1091	if (*p == L_(`'\|'`))
1092	startp = p + `1`;
1093	}
1094	if (*p == L_(`')'`))
1095	level--;
1096	}
1097	assert (p[-`1`] == L_(`')'`));
1098
1099	switch (opt)
1100	{
1101	case L_(`'*'`):
1102	if (FCT (p, string, string_end, no_leading_period, flags, NULL) == `0`)
1103	goto success;
1104	FALLTHROUGH;
1105	case L_(`'+'`):
1106	for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1107	{
1108	for (rs = string; rs <= string_end; ++rs)
1109	/ First match the prefix with the current pattern with the*
1110	current pattern. /*
1111	if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1112	rs, no_leading_period,
1113	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1114	NULL) == `0`
1115	/ This was successful. Now match the rest with the rest*
1116	of the pattern. /*
1117	&& (FCT (p, rs, string_end,
1118	rs == string
1119	? no_leading_period
1120	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1121	flags & FNM_FILE_NAME
1122	? flags : flags & ~FNM_PERIOD, NULL) == `0`
1123	/ This didn't work. Try the whole pattern. /
1124	\|\| (rs != string
1125	&& FCT (pattern - `1`, rs, string_end,
1126	rs == string
1127	? no_leading_period
1128	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1129	flags & FNM_FILE_NAME
1130	? flags : flags & ~FNM_PERIOD, NULL) == `0`)))
1131	/ It worked. Signal success. /
1132	goto success;
1133	}
1134
1135	/ None of the patterns lead to a match. /
1136	retval = FNM_NOMATCH;
1137	break;
1138
1139	case L_(`'?'`):
1140	if (FCT (p, string, string_end, no_leading_period, flags, NULL) == `0`)
1141	goto success;
1142	FALLTHROUGH;
1143	case L_(`'@'`):
1144	for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1145	{
1146	/ I cannot believe it but `strcat' is actually acceptable*
1147	here. Match the entire string with the prefix from the
1148	pattern list and the rest of the pattern following the
1149	pattern list. /*
1150	if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1151	string, string_end, no_leading_period,
1152	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1153	NULL) == `0`)
1154	/ It worked. Signal success. /
1155	goto success;
1156	}
1157
1158	/ None of the patterns lead to a match. /
1159	retval = FNM_NOMATCH;
1160	break;
1161
1162	case L_(`'!'`):
1163	for (rs = string; rs <= string_end; ++rs)
1164	{
1165	size_t runp_i;
1166
1167	for (runp_i = pattern_i;
1168	runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1169	runp_i++)
1170	{
1171	if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1172	no_leading_period,
1173	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1174	NULL) == `0`)
1175	break;
1176	}
1177
1178	/ If none of the patterns matched see whether the rest does. /
1179	if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1180	&& (FCT (p, rs, string_end,
1181	rs == string
1182	? no_leading_period
1183	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1184	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1185	NULL) == `0`))
1186	/ This is successful. /
1187	goto success;
1188	}
1189
1190	/ None of the patterns together with the rest of the pattern*
1191	lead to a match. /*
1192	retval = FNM_NOMATCH;
1193	break;
1194
1195	default:
1196	assert (! "Invalid extended matching operator");
1197	retval = -`1`;
1198	break;
1199	}
1200
1201	success:
1202	out:
1203	PASTE (PATTERN_PREFIX, _free) (&list);
1204
1205	return retval;
1206	}
1207
1208	#undef PATTERN_PREFIX
1209	#undef PASTE
1210	#undef PASTE1
1211
1212	#undef FOLD
1213	#undef CHAR
1214	#undef UCHAR
1215	#undef INT
1216	#undef FCT
1217	#undef EXT
1218	#undef END
1219	#undef STRUCT
1220	#undef MEMPCPY
1221	#undef MEMCHR
1222	#undef STRLEN
1223	#undef STRCAT
1224	#undef L_
1225	#undef BTOWC
1226	#undef WIDE_CHAR_VERSION
1227	#undef FINDIDX
1228

Browse the source code of glibc/posix/fnmatch_loop.c