fnmatch_loop.c source code [glibc/posix/fnmatch_loop.c]

1	/ Copyright (C) 1991-2021 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library; if not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#ifdef _LIBC
19	# include <stdint.h>
20	#endif
21
22	struct STRUCT
23	{
24	const CHAR *pattern;
25	const CHAR *string;
26	bool no_leading_period;
27	};
28
29	/ Match STRING against the file name pattern PATTERN, returning zero if*
30	it matches, nonzero if not. /*
31	static int FCT (const CHAR pattern, const* CHAR *string,
32	const CHAR string_end, bool no_leading_period, int* flags,
33	struct STRUCT *ends, size_t alloca_used);
34	static int EXT (INT opt, const CHAR pattern, const* CHAR *string,
35	const CHAR string_end, bool no_leading_period, int* flags,
36	size_t alloca_used);
37	static const CHAR END (const* CHAR *patternp);
38
39	static int
40	FCT (const CHAR pattern, const* CHAR string, const* CHAR *string_end,
41	bool no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
42	{
43	const CHAR p = pattern, n = string;
44	UCHAR c;
45	#ifdef _LIBC
46	# if WIDE_CHAR_VERSION
47	const char collseq = (const* char *)
48	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
49	# else
50	const UCHAR collseq = (const* UCHAR *)
51	_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
52	# endif
53	#endif
54
55	while ((c = *p++) != L_(`'\0'`))
56	{
57	bool new_no_leading_period = false;
58	c = FOLD (c);
59
60	switch (c)
61	{
62	case L_(`'?'`):
63	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
64	{
65	int res = EXT (c, p, n, string_end, no_leading_period,
66	flags, alloca_used);
67	if (res != -`1`)
68	return res;
69	}
70
71	if (n == string_end)
72	return FNM_NOMATCH;
73	else if (*n == L_(`'/'`) && (flags & FNM_FILE_NAME))
74	return FNM_NOMATCH;
75	else if (*n == L_(`'.'`) && no_leading_period)
76	return FNM_NOMATCH;
77	break;
78
79	case L_(`'\\'`):
80	if (!(flags & FNM_NOESCAPE))
81	{
82	c = *p++;
83	if (c == L_(`'\0'`))
84	/ Trailing \ loses. /
85	return FNM_NOMATCH;
86	c = FOLD (c);
87	}
88	if (n == string_end \|\| FOLD ((UCHAR) *n) != c)
89	return FNM_NOMATCH;
90	break;
91
92	case L_(`'*'`):
93	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
94	{
95	int res = EXT (c, p, n, string_end, no_leading_period,
96	flags, alloca_used);
97	if (res != -`1`)
98	return res;
99	}
100	else if (ends != NULL)
101	{
102	ends->pattern = p - `1`;
103	ends->string = n;
104	ends->no_leading_period = no_leading_period;
105	return `0`;
106	}
107
108	if (n != string_end && *n == L_(`'.'`) && no_leading_period)
109	return FNM_NOMATCH;
110
111	for (c = p++; c == L_(`'?'`) \|\| c == L_(`''`); c = *p++)
112	{
113	if (*p == L_(`'('`) && (flags & FNM_EXTMATCH) != `0`)
114	{
115	const CHAR *endp = END (p);
116	if (endp != p)
117	{
118	/ This is a pattern. Skip over it. /
119	p = endp;
120	continue;
121	}
122	}
123
124	if (c == L_(`'?'`))
125	{
126	/ A ? needs to match one character. /
127	if (n == string_end)
128	/ There isn't another character; no match. /
129	return FNM_NOMATCH;
130	else if (*n == L_(`'/'`)
131	&& __glibc_unlikely (flags & FNM_FILE_NAME))
132	/ A slash does not match a wildcard under*
133	FNM_FILE_NAME. /*
134	return FNM_NOMATCH;
135	else
136	/ One character of the string is consumed in matching*
137	this ? wildcard, so ??? won't match if there are*
138	less than three characters. /*
139	++n;
140	}
141	}
142
143	if (c == L_(`'\0'`))
144	/ The wildcard(s) is/are the last element of the pattern.*
145	If the name is a file name and contains another slash
146	this means it cannot match, unless the FNM_LEADING_DIR
147	flag is set. /*
148	{
149	int result = (flags & FNM_FILE_NAME) == `0` ? `0` : FNM_NOMATCH;
150
151	if (flags & FNM_FILE_NAME)
152	{
153	if (flags & FNM_LEADING_DIR)
154	result = `0`;
155	else
156	{
157	if (MEMCHR (n, L_(`'/'`), string_end - n) == NULL)
158	result = `0`;
159	}
160	}
161
162	return result;
163	}
164	else
165	{
166	const CHAR *endp;
167	struct STRUCT end;
168
169	end.pattern = NULL;
170	endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_(`'/'`) : L_(`'\0'`),
171	string_end - n);
172	if (endp == NULL)
173	endp = string_end;
174
175	if (c == L_(`'['`)
176	\|\| (__glibc_unlikely (flags & FNM_EXTMATCH)
177	&& (c == L_(`'@'`) \|\| c == L_(`'+'`) \|\| c == L_(`'!'`))
178	&& *p == L_(`'('`)))
179	{
180	int flags2 = ((flags & FNM_FILE_NAME)
181	? flags : (flags & ~FNM_PERIOD));
182
183	for (--p; n < endp; ++n, no_leading_period = false)
184	if (FCT (p, n, string_end, no_leading_period, flags2,
185	&end, alloca_used) == `0`)
186	goto found;
187	}
188	else if (c == L_(`'/'`) && (flags & FNM_FILE_NAME))
189	{
190	while (n < string_end && *n != L_(`'/'`))
191	++n;
192	if (n < string_end && *n == L_(`'/'`)
193	&& (FCT (p, n + `1`, string_end, flags & FNM_PERIOD, flags,
194	NULL, alloca_used) == `0`))
195	return `0`;
196	}
197	else
198	{
199	int flags2 = ((flags & FNM_FILE_NAME)
200	? flags : (flags & ~FNM_PERIOD));
201
202	if (c == L_(`'\\'`) && !(flags & FNM_NOESCAPE))
203	c = *p;
204	c = FOLD (c);
205	for (--p; n < endp; ++n, no_leading_period = false)
206	if (FOLD ((UCHAR) *n) == c
207	&& (FCT (p, n, string_end, no_leading_period, flags2,
208	&end, alloca_used) == `0`))
209	{
210	found:
211	if (end.pattern == NULL)
212	return `0`;
213	break;
214	}
215	if (end.pattern != NULL)
216	{
217	p = end.pattern;
218	n = end.string;
219	no_leading_period = end.no_leading_period;
220	continue;
221	}
222	}
223	}
224
225	/ If we come here no match is possible with the wildcard. /
226	return FNM_NOMATCH;
227
228	case L_(`'['`):
229	{
230	/ Nonzero if the sense of the character class is inverted. /
231	const CHAR *p_init = p;
232	const CHAR *n_init = n;
233	bool not;
234	CHAR cold;
235	UCHAR fn;
236
237	if (posixly_correct == `0`)
238	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
239
240	if (n == string_end)
241	return FNM_NOMATCH;
242
243	if (*n == L_(`'.'`) && no_leading_period)
244	return FNM_NOMATCH;
245
246	if (*n == L_(`'/'`) && (flags & FNM_FILE_NAME))
247	/ '/' cannot be matched. /
248	return FNM_NOMATCH;
249
250	not = (p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)));
251	if (not)
252	++p;
253
254	fn = FOLD ((UCHAR) *n);
255
256	c = *p++;
257	for (;;)
258	{
259	if (!(flags & FNM_NOESCAPE) && c == L_(`'\\'`))
260	{
261	if (*p == L_(`'\0'`))
262	return FNM_NOMATCH;
263	c = FOLD ((UCHAR) *p);
264	++p;
265
266	goto normal_bracket;
267	}
268	else if (c == L_(`'['`) && *p == L_(`':'`))
269	{
270	/ Leave room for the null. /
271	CHAR str[CHAR_CLASS_MAX_LENGTH + `1`];
272	size_t c1 = `0`;
273	wctype_t wt;
274	const CHAR *startp = p;
275
276	for (;;)
277	{
278	if (c1 == CHAR_CLASS_MAX_LENGTH)
279	/ The name is too long and therefore the pattern*
280	is ill-formed. /*
281	return FNM_NOMATCH;
282
283	c = *++p;
284	if (c == L_(`':'`) && p[`1`] == L_(`']'`))
285	{
286	p += `2`;
287	break;
288	}
289	if (c < L_(`'a'`) \|\| c >= L_(`'z'`))
290	{
291	/ This cannot possibly be a character class name.*
292	Match it as a normal range. /*
293	p = startp;
294	c = L_(`'['`);
295	goto normal_bracket;
296	}
297	str[c1++] = c;
298	}
299	str[c1] = L_(`'\0'`);
300
301	wt = IS_CHAR_CLASS (str);
302	if (wt == `0`)
303	/ Invalid character class name. /
304	return FNM_NOMATCH;
305
306	#if defined _LIBC && ! WIDE_CHAR_VERSION
307	/ The following code is glibc specific but does*
308	there a good job in speeding up the code since
309	we can avoid the btowc() call. /*
310	if (_ISCTYPE ((UCHAR) *n, wt))
311	goto matched;
312	#else
313	if (iswctype (BTOWC ((UCHAR) *n), wt))
314	goto matched;
315	#endif
316	c = *p++;
317	}
318	#ifdef _LIBC
319	else if (c == L_(`'['`) && *p == L_(`'='`))
320	{
321	/ It's important that STR be a scalar variable rather*
322	than a one-element array, because GCC (at least 4.9.2
323	-O2 on x86-64) can be confused by the array and
324	diagnose a "used initialized" in a dead branch in the
325	findidx function. /*
326	UCHAR str;
327	uint32_t nrules =
328	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
329	const CHAR *startp = p;
330
331	c = *++p;
332	if (c == L_(`'\0'`))
333	{
334	p = startp;
335	c = L_(`'['`);
336	goto normal_bracket;
337	}
338	str = c;
339
340	c = *++p;
341	if (c != L_(`'='`) \|\| p[`1`] != L_(`']'`))
342	{
343	p = startp;
344	c = L_(`'['`);
345	goto normal_bracket;
346	}
347	p += `2`;
348
349	if (nrules == `0`)
350	{
351	if ((UCHAR) *n == str)
352	goto matched;
353	}
354	else
355	{
356	const int32_t *table;
357	# if WIDE_CHAR_VERSION
358	const int32_t *weights;
359	const wint_t *extra;
360	# else
361	const unsigned char *weights;
362	const unsigned char *extra;
363	# endif
364	const int32_t *indirect;
365	int32_t idx;
366	const UCHAR cp = (const* UCHAR *) &str;
367
368	# if WIDE_CHAR_VERSION
369	table = (const int32_t *)
370	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
371	weights = (const int32_t *)
372	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
373	extra = (const wint_t *)
374	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
375	indirect = (const int32_t *)
376	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
377	# else
378	table = (const int32_t *)
379	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
380	weights = (const unsigned char *)
381	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
382	extra = (const unsigned char *)
383	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
384	indirect = (const int32_t *)
385	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
386	# endif
387
388	idx = FINDIDX (table, indirect, extra, &cp, `1`);
389	if (idx != `0`)
390	{
391	/ We found a table entry. Now see whether the*
392	character we are currently at has the same
393	equivalence class value. /*
394	int len = weights[idx & `0xffffff`];
395	int32_t idx2;
396	const UCHAR np = (const* UCHAR *) n;
397
398	idx2 = FINDIDX (table, indirect, extra,
399	&np, string_end - n);
400	if (idx2 != `0`
401	&& (idx >> `24`) == (idx2 >> `24`)
402	&& len == weights[idx2 & `0xffffff`])
403	{
404	int cnt = `0`;
405
406	idx &= `0xffffff`;
407	idx2 &= `0xffffff`;
408
409	while (cnt < len
410	&& (weights[idx + `1` + cnt]
411	== weights[idx2 + `1` + cnt]))
412	++cnt;
413
414	if (cnt == len)
415	goto matched;
416	}
417	}
418	}
419
420	c = *p++;
421	}
422	#endif
423	else if (c == L_(`'\0'`))
424	{
425	/ [ unterminated, treat as normal character. /
426	p = p_init;
427	n = n_init;
428	c = L_(`'['`);
429	goto normal_match;
430	}
431	else
432	{
433	bool is_range = false;
434
435	#ifdef _LIBC
436	bool is_seqval = false;
437
438	if (c == L_(`'['`) && *p == L_(`'.'`))
439	{
440	uint32_t nrules =
441	_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
442	const CHAR *startp = p;
443	size_t c1 = `0`;
444
445	while (`1`)
446	{
447	c = *++p;
448	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
449	{
450	p += `2`;
451	break;
452	}
453	if (c == `'\0'`)
454	return FNM_NOMATCH;
455	++c1;
456	}
457
458	/ We have to handling the symbols differently in*
459	ranges since then the collation sequence is
460	important. /*
461	is_range = *p == L_(`'-'`) && p[`1`] != L_(`'\0'`);
462
463	if (nrules == `0`)
464	{
465	/ There are no names defined in the collation*
466	data. Therefore we only accept the trivial
467	names consisting of the character itself. /*
468	if (c1 != `1`)
469	return FNM_NOMATCH;
470
471	if (!is_range && *n == startp[`1`])
472	goto matched;
473
474	cold = startp[`1`];
475	c = *p++;
476	}
477	else
478	{
479	int32_t table_size;
480	const int32_t *symb_table;
481	const unsigned char *extra;
482	int32_t idx;
483	int32_t elem;
484	# if WIDE_CHAR_VERSION
485	CHAR *wextra;
486	# endif
487
488	table_size =
489	_NL_CURRENT_WORD (LC_COLLATE,
490	_NL_COLLATE_SYMB_HASH_SIZEMB);
491	symb_table = (const int32_t *)
492	_NL_CURRENT (LC_COLLATE,
493	_NL_COLLATE_SYMB_TABLEMB);
494	extra = (const unsigned char *)
495	_NL_CURRENT (LC_COLLATE,
496	_NL_COLLATE_SYMB_EXTRAMB);
497
498	for (elem = `0`; elem < table_size; elem++)
499	if (symb_table[`2` * elem] != `0`)
500	{
501	idx = symb_table[`2` * elem + `1`];
502	/ Skip the name of collating element. /
503	idx += `1` + extra[idx];
504	# if WIDE_CHAR_VERSION
505	/ Skip the byte sequence of the*
506	collating element. /*
507	idx += `1` + extra[idx];
508	/ Adjust for the alignment. /
509	idx = (idx + `3`) & ~`3`;
510
511	wextra = (CHAR *) &extra[idx + `4`];
512
513	if (/ Compare the length of the sequence. /
514	c1 == wextra[`0`]
515	/ Compare the wide char sequence. /
516	&& (__wmemcmp (startp + `1`, &wextra[`1`],
517	c1)
518	== `0`))
519	/ Yep, this is the entry. /
520	break;
521	# else
522	if (/ Compare the length of the sequence. /
523	c1 == extra[idx]
524	/ Compare the byte sequence. /
525	&& memcmp (startp + `1`,
526	&extra[idx + `1`], c1) == `0`)
527	/ Yep, this is the entry. /
528	break;
529	# endif
530	}
531
532	if (elem < table_size)
533	{
534	/ Compare the byte sequence but only if*
535	this is not part of a range. /*
536	if (! is_range
537
538	# if WIDE_CHAR_VERSION
539	&& __wmemcmp (n, &wextra[`1`], c1) == `0`
540	# else
541	&& memcmp (n, &extra[idx + `1`], c1) == `0`
542	# endif
543	)
544	{
545	n += c1 - `1`;
546	goto matched;
547	}
548
549	/ Get the collation sequence value. /
550	is_seqval = true;
551	# if WIDE_CHAR_VERSION
552	cold = wextra[`1` + wextra[`0`]];
553	# else
554	idx += `1` + extra[idx];
555	/ Adjust for the alignment. /
556	idx = (idx + `3`) & ~`3`;
557	cold = ((int32_t ) &extra[idx]);
558	# endif
559
560	c = *p++;
561	}
562	else if (c1 == `1`)
563	{
564	/ No valid character. Match it as a*
565	single byte. /*
566	if (!is_range && *n == startp[`1`])
567	goto matched;
568
569	cold = startp[`1`];
570	c = *p++;
571	}
572	else
573	return FNM_NOMATCH;
574	}
575	}
576	else
577	#endif
578	{
579	c = FOLD (c);
580	normal_bracket:
581
582	/ We have to handling the symbols differently in*
583	ranges since then the collation sequence is
584	important. /*
585	is_range = (*p == L_(`'-'`) && p[`1`] != L_(`'\0'`)
586	&& p[`1`] != L_(`']'`));
587
588	if (!is_range && c == fn)
589	goto matched;
590
591	#if _LIBC
592	/ This is needed if we goto normal_bracket; from*
593	outside of is_seqval's scope. /*
594	is_seqval = false;
595	#endif
596	cold = c;
597	c = *p++;
598	}
599
600	if (c == L_(`'-'`) && *p != L_(`']'`))
601	{
602	#if _LIBC
603	/ We have to find the collation sequence*
604	value for C. Collation sequence is nothing
605	we can regularly access. The sequence
606	value is defined by the order in which the
607	definitions of the collation values for the
608	various characters appear in the source
609	file. A strange concept, nowhere
610	documented. /*
611	uint32_t fcollseq;
612	uint32_t lcollseq;
613	UCHAR cend = *p++;
614
615	# if WIDE_CHAR_VERSION
616	/ Search in the 'names' array for the characters. /
617	fcollseq = __collseq_table_lookup (collseq, fn);
618	if (fcollseq == ~((uint32_t) `0`))
619	/ XXX We don't know anything about the character*
620	we are supposed to match. This means we are
621	failing. /*
622	goto range_not_matched;
623
624	if (is_seqval)
625	lcollseq = cold;
626	else
627	lcollseq = __collseq_table_lookup (collseq, cold);
628	# else
629	fcollseq = collseq[fn];
630	lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
631	# endif
632
633	is_seqval = false;
634	if (cend == L_(`'['`) && *p == L_(`'.'`))
635	{
636	uint32_t nrules =
637	_NL_CURRENT_WORD (LC_COLLATE,
638	_NL_COLLATE_NRULES);
639	const CHAR *startp = p;
640	size_t c1 = `0`;
641
642	while (`1`)
643	{
644	c = *++p;
645	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
646	{
647	p += `2`;
648	break;
649	}
650	if (c == `'\0'`)
651	return FNM_NOMATCH;
652	++c1;
653	}
654
655	if (nrules == `0`)
656	{
657	/ There are no names defined in the*
658	collation data. Therefore we only
659	accept the trivial names consisting
660	of the character itself. /*
661	if (c1 != `1`)
662	return FNM_NOMATCH;
663
664	cend = startp[`1`];
665	}
666	else
667	{
668	int32_t table_size;
669	const int32_t *symb_table;
670	const unsigned char *extra;
671	int32_t idx;
672	int32_t elem;
673	# if WIDE_CHAR_VERSION
674	CHAR *wextra;
675	# endif
676
677	table_size =
678	_NL_CURRENT_WORD (LC_COLLATE,
679	_NL_COLLATE_SYMB_HASH_SIZEMB);
680	symb_table = (const int32_t *)
681	_NL_CURRENT (LC_COLLATE,
682	_NL_COLLATE_SYMB_TABLEMB);
683	extra = (const unsigned char *)
684	_NL_CURRENT (LC_COLLATE,
685	_NL_COLLATE_SYMB_EXTRAMB);
686
687	for (elem = `0`; elem < table_size; elem++)
688	if (symb_table[`2` * elem] != `0`)
689	{
690	idx = symb_table[`2` * elem + `1`];
691	/ Skip the name of collating*
692	element. /*
693	idx += `1` + extra[idx];
694	# if WIDE_CHAR_VERSION
695	/ Skip the byte sequence of the*
696	collating element. /*
697	idx += `1` + extra[idx];
698	/ Adjust for the alignment. /
699	idx = (idx + `3`) & ~`3`;
700
701	wextra = (CHAR *) &extra[idx + `4`];
702
703	if (/ Compare the length of the*
704	sequence. /*
705	c1 == wextra[`0`]
706	/ Compare the wide char sequence. /
707	&& (__wmemcmp (startp + `1`,
708	&wextra[`1`], c1)
709	== `0`))
710	/ Yep, this is the entry. /
711	break;
712	# else
713	if (/ Compare the length of the*
714	sequence. /*
715	c1 == extra[idx]
716	/ Compare the byte sequence. /
717	&& memcmp (startp + `1`,
718	&extra[idx + `1`], c1) == `0`)
719	/ Yep, this is the entry. /
720	break;
721	# endif
722	}
723
724	if (elem < table_size)
725	{
726	/ Get the collation sequence value. /
727	is_seqval = true;
728	# if WIDE_CHAR_VERSION
729	cend = wextra[`1` + wextra[`0`]];
730	# else
731	idx += `1` + extra[idx];
732	/ Adjust for the alignment. /
733	idx = (idx + `3`) & ~`3`;
734	cend = ((int32_t ) &extra[idx]);
735	# endif
736	}
737	else if (c1 == `1`)
738	{
739	cend = startp[`1`];
740	c = *p++;
741	}
742	else
743	return FNM_NOMATCH;
744	}
745	}
746	else
747	{
748	if (!(flags & FNM_NOESCAPE) && cend == L_(`'\\'`))
749	cend = *p++;
750	if (cend == L_(`'\0'`))
751	return FNM_NOMATCH;
752	cend = FOLD (cend);
753	}
754
755	/ XXX It is not entirely clear to me how to handle*
756	characters which are not mentioned in the
757	collation specification. /*
758	if (
759	# if WIDE_CHAR_VERSION
760	lcollseq == `0xffffffff` \|\|
761	# endif
762	lcollseq <= fcollseq)
763	{
764	/ We have to look at the upper bound. /
765	uint32_t hcollseq;
766
767	if (is_seqval)
768	hcollseq = cend;
769	else
770	{
771	# if WIDE_CHAR_VERSION
772	hcollseq =
773	__collseq_table_lookup (collseq, cend);
774	if (hcollseq == ~((uint32_t) `0`))
775	{
776	/ Hum, no information about the upper*
777	bound. The matching succeeds if the
778	lower bound is matched exactly. /*
779	if (lcollseq != fcollseq)
780	goto range_not_matched;
781
782	goto matched;
783	}
784	# else
785	hcollseq = collseq[cend];
786	# endif
787	}
788
789	if (lcollseq <= hcollseq && fcollseq <= hcollseq)
790	goto matched;
791	}
792	# if WIDE_CHAR_VERSION
793	range_not_matched:
794	# endif
795	#else
796	/ We use a boring value comparison of the character*
797	values. This is better than comparing using
798	'strcoll' since the latter would have surprising
799	and sometimes fatal consequences. /*
800	UCHAR cend = *p++;
801
802	if (!(flags & FNM_NOESCAPE) && cend == L_(`'\\'`))
803	cend = *p++;
804	if (cend == L_(`'\0'`))
805	return FNM_NOMATCH;
806
807	/ It is a range. /
808	if ((UCHAR) cold <= fn && fn <= cend)
809	goto matched;
810	#endif
811
812	c = *p++;
813	}
814	}
815
816	if (c == L_(`']'`))
817	break;
818	}
819
820	if (!not)
821	return FNM_NOMATCH;
822	break;
823
824	matched:
825	/ Skip the rest of the [...] that already matched. /
826	while ((c = *p++) != L_(`']'`))
827	{
828	if (c == L_(`'\0'`))
829	/ [... (unterminated) loses. /
830	return FNM_NOMATCH;
831
832	if (!(flags & FNM_NOESCAPE) && c == L_(`'\\'`))
833	{
834	if (*p == L_(`'\0'`))
835	return FNM_NOMATCH;
836	/ XXX 1003.2d11 is unclear if this is right. /
837	++p;
838	}
839	else if (c == L_(`'['`) && *p == L_(`':'`))
840	{
841	int c1 = `0`;
842	const CHAR *startp = p;
843
844	while (`1`)
845	{
846	c = *++p;
847	if (++c1 == CHAR_CLASS_MAX_LENGTH)
848	return FNM_NOMATCH;
849
850	if (*p == L_(`':'`) && p[`1`] == L_(`']'`))
851	break;
852
853	if (c < L_(`'a'`) \|\| c >= L_(`'z'`))
854	{
855	p = startp - `2`;
856	break;
857	}
858	}
859	p += `2`;
860	}
861	else if (c == L_(`'['`) && *p == L_(`'='`))
862	{
863	c = *++p;
864	if (c == L_(`'\0'`))
865	return FNM_NOMATCH;
866	c = *++p;
867	if (c != L_(`'='`) \|\| p[`1`] != L_(`']'`))
868	return FNM_NOMATCH;
869	p += `2`;
870	}
871	else if (c == L_(`'['`) && *p == L_(`'.'`))
872	{
873	while (`1`)
874	{
875	c = *++p;
876	if (c == L_(`'\0'`))
877	return FNM_NOMATCH;
878
879	if (c == L_(`'.'`) && p[`1`] == L_(`']'`))
880	break;
881	}
882	p += `2`;
883	}
884	}
885	if (not)
886	return FNM_NOMATCH;
887	}
888	break;
889
890	case L_(`'+'`):
891	case L_(`'@'`):
892	case L_(`'!'`):
893	if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == `'('`)
894	{
895	int res = EXT (c, p, n, string_end, no_leading_period, flags,
896	alloca_used);
897	if (res != -`1`)
898	return res;
899	}
900	goto normal_match;
901
902	case L_(`'/'`):
903	if (NO_LEADING_PERIOD (flags))
904	{
905	if (n == string_end \|\| c != (UCHAR) *n)
906	return FNM_NOMATCH;
907
908	new_no_leading_period = true;
909	break;
910	}
911	FALLTHROUGH;
912	default:
913	normal_match:
914	if (n == string_end \|\| c != FOLD ((UCHAR) *n))
915	return FNM_NOMATCH;
916	}
917
918	no_leading_period = new_no_leading_period;
919	++n;
920	}
921
922	if (n == string_end)
923	return `0`;
924
925	if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_(`'/'`))
926	/ The FNM_LEADING_DIR flag says that "foo" matches "foobar/frobozz". /*
927	return `0`;
928
929	return FNM_NOMATCH;
930	}
931
932
933	static const CHAR *
934	END (const CHAR *pattern)
935	{
936	const CHAR *p = pattern;
937
938	while (`1`)
939	if (*++p == L_(`'\0'`))
940	/ This is an invalid pattern. /
941	return pattern;
942	else if (*p == L_(`'['`))
943	{
944	/ Handle brackets special. /
945	if (posixly_correct == `0`)
946	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
947
948	/ Skip the not sign. We have to recognize it because of a possibly*
949	following ']'. /*
950	if (++p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)))
951	++p;
952	/ A leading ']' is recognized as such. /
953	if (*p == L_(`']'`))
954	++p;
955	/ Skip over all characters of the list. /
956	while (*p != L_(`']'`))
957	if (*p++ == L_(`'\0'`))
958	/ This is no valid pattern. /
959	return pattern;
960	}
961	else if ((p == L_(`'?'`) \|\| p == L_(`''`) \|\| p == L_(`'+'`) \|\| *p == L_(`'@'`)
962	\|\| *p == L_(`'!'`)) && p[`1`] == L_(`'('`))
963	{
964	p = END (p + `1`);
965	if (*p == L_(`'\0'`))
966	/ This is an invalid pattern. /
967	return pattern;
968	}
969	else if (*p == L_(`')'`))
970	break;
971
972	return p + `1`;
973	}
974
975
976	static int
977	EXT (INT opt, const CHAR pattern, const* CHAR string, const* CHAR *string_end,
978	bool no_leading_period, int flags, size_t alloca_used)
979	{
980	const CHAR *startp;
981	ptrdiff_t level;
982	struct patternlist
983	{
984	struct patternlist *next;
985	CHAR malloced;
986	CHAR str __flexarr;
987	} *list = NULL;
988	struct patternlist **lastp = &list;
989	size_t pattern_len = STRLEN (pattern);
990	bool any_malloced = false;
991	const CHAR *p;
992	const CHAR *rs;
993	int retval = `0`;
994
995	/ Parse the pattern. Store the individual parts in the list. /
996	level = `0`;
997	for (startp = p = pattern + `1`; level >= `0`; ++p)
998	if (*p == L_(`'\0'`))
999	{
1000	/ This is an invalid pattern. /
1001	retval = -`1`;
1002	goto out;
1003	}
1004	else if (*p == L_(`'['`))
1005	{
1006	/ Handle brackets special. /
1007	if (posixly_correct == `0`)
1008	posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? `1` : -`1`;
1009
1010	/ Skip the not sign. We have to recognize it because of a possibly*
1011	following ']'. /*
1012	if (++p == L_(`'!'`) \|\| (posixly_correct < `0` && p == L_(`'^'`)))
1013	++p;
1014	/ A leading ']' is recognized as such. /
1015	if (*p == L_(`']'`))
1016	++p;
1017	/ Skip over all characters of the list. /
1018	while (*p != L_(`']'`))
1019	if (*p++ == L_(`'\0'`))
1020	{
1021	/ This is no valid pattern. /
1022	retval = -`1`;
1023	goto out;
1024	}
1025	}
1026	else if ((p == L_(`'?'`) \|\| p == L_(`''`) \|\| p == L_(`'+'`) \|\| *p == L_(`'@'`)
1027	\|\| *p == L_(`'!'`)) && p[`1`] == L_(`'('`))
1028	/ Remember the nesting level. /
1029	++level;
1030	else if (*p == L_(`')'`))
1031	{
1032	if (level-- == `0`)
1033	{
1034	/ This means we found the end of the pattern. /
1035	#define NEW_PATTERN \
1036	struct patternlist *newp; \
1037	size_t plen = (opt == L_('?') \|\| opt == L_('@') \
1038	? pattern_len : (p - startp + 1UL)); \
1039	idx_t slen = FLEXSIZEOF (struct patternlist, str, 0); \
1040	idx_t new_used = alloca_used + slen; \
1041	idx_t plensize; \
1042	if (INT_MULTIPLY_WRAPV (plen, sizeof (CHAR), &plensize) \
1043	\|\| INT_ADD_WRAPV (new_used, plensize, &new_used)) \
1044	{ \
1045	retval = -2; \
1046	goto out; \
1047	} \
1048	slen += plensize; \
1049	bool malloced = ! __libc_use_alloca (new_used); \
1050	if (__glibc_unlikely (malloced)) \
1051	{ \
1052	newp = malloc (slen); \
1053	if (newp == NULL) \
1054	{ \
1055	retval = -2; \
1056	goto out; \
1057	} \
1058	any_malloced = true; \
1059	} \
1060	else \
1061	newp = alloca_account (slen, alloca_used); \
1062	newp->next = NULL; \
1063	newp->malloced = malloced; \
1064	((CHAR ) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1065	*lastp = newp; \
1066	lastp = &newp->next
1067	NEW_PATTERN;
1068	}
1069	}
1070	else if (*p == L_(`'\|'`))
1071	{
1072	if (level == `0`)
1073	{
1074	NEW_PATTERN;
1075	startp = p + `1`;
1076	}
1077	}
1078	assert (list != NULL);
1079	assert (p[-`1`] == L_(`')'`));
1080	#undef NEW_PATTERN
1081
1082	switch (opt)
1083	{
1084	case L_(`'*'`):
1085	if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1086	alloca_used) == `0`)
1087	goto success;
1088	FALLTHROUGH;
1089	case L_(`'+'`):
1090	do
1091	{
1092	for (rs = string; rs <= string_end; ++rs)
1093	/ First match the prefix with the current pattern with the*
1094	current pattern. /*
1095	if (FCT (list->str, string, rs, no_leading_period,
1096	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1097	NULL, alloca_used) == `0`
1098	/ This was successful. Now match the rest with the rest*
1099	of the pattern. /*
1100	&& (FCT (p, rs, string_end,
1101	rs == string
1102	? no_leading_period
1103	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1104	flags & FNM_FILE_NAME
1105	? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == `0`
1106	/ This didn't work. Try the whole pattern. /
1107	\|\| (rs != string
1108	&& FCT (pattern - `1`, rs, string_end,
1109	rs == string
1110	? no_leading_period
1111	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1112	flags & FNM_FILE_NAME
1113	? flags : flags & ~FNM_PERIOD, NULL,
1114	alloca_used) == `0`)))
1115	/ It worked. Signal success. /
1116	goto success;
1117	}
1118	while ((list = list->next) != NULL);
1119
1120	/ None of the patterns lead to a match. /
1121	retval = FNM_NOMATCH;
1122	break;
1123
1124	case L_(`'?'`):
1125	if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1126	alloca_used) == `0`)
1127	goto success;
1128	FALLTHROUGH;
1129	case L_(`'@'`):
1130	do
1131	/ I cannot believe it but 'strcat' is actually acceptable*
1132	here. Match the entire string with the prefix from the
1133	pattern list and the rest of the pattern following the
1134	pattern list. /*
1135	if (FCT (STRCAT (list->str, p), string, string_end,
1136	no_leading_period,
1137	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1138	NULL, alloca_used) == `0`)
1139	/ It worked. Signal success. /
1140	goto success;
1141	while ((list = list->next) != NULL);
1142
1143	/ None of the patterns lead to a match. /
1144	retval = FNM_NOMATCH;
1145	break;
1146
1147	case L_(`'!'`):
1148	for (rs = string; rs <= string_end; ++rs)
1149	{
1150	struct patternlist *runp;
1151
1152	for (runp = list; runp != NULL; runp = runp->next)
1153	if (FCT (runp->str, string, rs, no_leading_period,
1154	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1155	NULL, alloca_used) == `0`)
1156	break;
1157
1158	/ If none of the patterns matched see whether the rest does. /
1159	if (runp == NULL
1160	&& (FCT (p, rs, string_end,
1161	rs == string
1162	? no_leading_period
1163	: rs[-`1`] == `'/'` && NO_LEADING_PERIOD (flags),
1164	flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1165	NULL, alloca_used) == `0`))
1166	/ This is successful. /
1167	goto success;
1168	}
1169
1170	/ None of the patterns together with the rest of the pattern*
1171	lead to a match. /*
1172	retval = FNM_NOMATCH;
1173	break;
1174
1175	default:
1176	assert (! "Invalid extended matching operator");
1177	retval = -`1`;
1178	break;
1179	}
1180
1181	success:
1182	out:
1183	if (any_malloced)
1184	while (list != NULL)
1185	{
1186	struct patternlist *old = list;
1187	list = list->next;
1188	if (old->malloced)
1189	free (old);
1190	}
1191
1192	return retval;
1193	}
1194
1195
1196	#undef FOLD
1197	#undef CHAR
1198	#undef UCHAR
1199	#undef INT
1200	#undef FCT
1201	#undef EXT
1202	#undef END
1203	#undef STRUCT
1204	#undef MEMPCPY
1205	#undef MEMCHR
1206	#undef STRLEN
1207	#undef STRCAT
1208	#undef L_
1209	#undef BTOWC
1210	#undef WIDE_CHAR_VERSION
1211	#undef FINDIDX
1212

Browse the source code of glibc/posix/fnmatch_loop.c