regexec.c source code [glibc/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2019 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
21	Idx n);
22	static void match_ctx_clean (re_match_context_t *mctx);
23	static void match_ctx_free (re_match_context_t *cache);
24	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25	Idx str_idx, Idx from, Idx to);
26	static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28	Idx str_idx);
29	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30	Idx node, Idx str_idx);
31	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
32	re_dfastate_t **limited_sts, Idx last_node,
33	Idx last_str_idx);
34	static reg_errcode_t re_search_internal (const regex_t *preg,
35	const char *string, Idx length,
36	Idx start, Idx last_start, Idx stop,
37	size_t nmatch, regmatch_t pmatch[],
38	int eflags);
39	static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40	const char *string1, Idx length1,
41	const char *string2, Idx length2,
42	Idx start, regoff_t range,
43	struct re_registers *regs,
44	Idx stop, bool ret_len);
45	static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46	const char *string, Idx length, Idx start,
47	regoff_t range, Idx stop,
48	struct re_registers *regs,
49	bool ret_len);
50	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
51	Idx nregs, int regs_allocated);
52	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53	static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54	Idx *p_match_first);
55	static Idx check_halt_state_context (const re_match_context_t *mctx,
56	const re_dfastate_t *state, Idx idx);
57	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
58	regmatch_t *prev_idx_match, Idx cur_node,
59	Idx cur_idx, Idx nmatch);
60	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61	Idx str_idx, Idx dest_node, Idx nregs,
62	regmatch_t *regs,
63	re_node_set *eps_via_nodes);
64	static reg_errcode_t set_regs (const regex_t *preg,
65	const re_match_context_t *mctx,
66	size_t nmatch, regmatch_t *pmatch,
67	bool fl_backtrack);
68	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70	#ifdef RE_ENABLE_I18N
71	static int sift_states_iter_mb (const re_match_context_t *mctx,
72	re_sift_context_t *sctx,
73	Idx node_idx, Idx str_idx, Idx max_str_idx);
74	#endif /* RE_ENABLE_I18N */
75	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76	re_sift_context_t *sctx);
77	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78	re_sift_context_t *sctx, Idx str_idx,
79	re_node_set *cur_dest);
80	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81	re_sift_context_t *sctx,
82	Idx str_idx,
83	re_node_set *dest_nodes);
84	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85	re_node_set *dest_nodes,
86	const re_node_set *candidates);
87	static bool check_dst_limits (const re_match_context_t *mctx,
88	const re_node_set *limits,
89	Idx dst_node, Idx dst_idx, Idx src_node,
90	Idx src_idx);
91	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92	int boundaries, Idx subexp_idx,
93	Idx from_node, Idx bkref_idx);
94	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95	Idx limit, Idx subexp_idx,
96	Idx node, Idx str_idx,
97	Idx bkref_idx);
98	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99	re_node_set *dest_nodes,
100	const re_node_set *candidates,
101	re_node_set *limits,
102	struct re_backref_cache_entry *bkref_ents,
103	Idx str_idx);
104	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105	re_sift_context_t *sctx,
106	Idx str_idx, const re_node_set *candidates);
107	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108	re_dfastate_t **dst,
109	re_dfastate_t **src, Idx num);
110	static re_dfastate_t find_recover_state (reg_errcode_t err,
111	re_match_context_t *mctx);
112	static re_dfastate_t transit_state (reg_errcode_t err,
113	re_match_context_t *mctx,
114	re_dfastate_t *state);
115	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
116	re_match_context_t *mctx,
117	re_dfastate_t *next_state);
118	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119	re_node_set *cur_nodes,
120	Idx str_idx);
121	#if 0
122	static re_dfastate_t transit_state_sb (reg_errcode_t err,
123	re_match_context_t *mctx,
124	re_dfastate_t *pstate);
125	#endif
126	#ifdef RE_ENABLE_I18N
127	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128	re_dfastate_t *pstate);
129	#endif /* RE_ENABLE_I18N */
130	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131	const re_node_set *nodes);
132	static reg_errcode_t get_subexp (re_match_context_t *mctx,
133	Idx bkref_node, Idx bkref_str_idx);
134	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135	const re_sub_match_top_t *sub_top,
136	re_sub_match_last_t *sub_last,
137	Idx bkref_node, Idx bkref_str);
138	static Idx find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
139	Idx subexp_idx, int type);
140	static reg_errcode_t check_arrival (re_match_context_t *mctx,
141	state_array_t *path, Idx top_node,
142	Idx top_str, Idx last_node, Idx last_str,
143	int type);
144	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145	Idx str_idx,
146	re_node_set *cur_nodes,
147	re_node_set *next_nodes);
148	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149	re_node_set *cur_nodes,
150	Idx ex_subexp, int type);
151	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152	re_node_set *dst_nodes,
153	Idx target, Idx ex_subexp,
154	int type);
155	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156	re_node_set *cur_nodes, Idx cur_str,
157	Idx subexp_num, int type);
158	static bool build_trtable (const re_dfa_t dfa, re_dfastate_t state);
159	#ifdef RE_ENABLE_I18N
160	static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161	const re_string_t *input, Idx idx);
162	# ifdef _LIBC
163	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164	size_t name_len);
165	# endif /* _LIBC */
166	#endif /* RE_ENABLE_I18N */
167	static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168	const re_dfastate_t *state,
169	re_node_set *states_node,
170	bitset_t *states_ch);
171	static bool check_node_accept (const re_match_context_t *mctx,
172	const re_token_t *node, Idx idx);
173	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
174
175	/ Entry point for POSIX code. /
176
177	/ regexec searches for a given pattern, specified by PREG, in the*
178	string STRING.
179
180	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181	'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182	least NMATCH elements, and we set them to the offsets of the
183	corresponding matched substrings.
184
185	EFLAGS specifies "execution flags" which affect matching: if
186	REG_NOTBOL is set, then ^ does not match at the beginning of the
187	string; if REG_NOTEOL is set, then $ does not match at the end.
188
189	We return 0 if we find a match and REG_NOMATCH if not. /*
190
191	int
192	regexec (const regex_t *__restrict preg, const char *__restrict string,
193	size_t nmatch, regmatch_t pmatch[], int eflags)
194	{
195	reg_errcode_t err;
196	Idx start, length;
197	re_dfa_t *dfa = preg->buffer;
198
199	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
200	return REG_BADPAT;
201
202	if (eflags & REG_STARTEND)
203	{
204	start = pmatch[`0`].rm_so;
205	length = pmatch[`0`].rm_eo;
206	}
207	else
208	{
209	start = `0`;
210	length = strlen (string);
211	}
212
213	lock_lock (dfa->lock);
214	if (preg->no_sub)
215	err = re_search_internal (preg, string, length, start, length,
216	length, `0`, NULL, eflags);
217	else
218	err = re_search_internal (preg, string, length, start, length,
219	length, nmatch, pmatch, eflags);
220	lock_unlock (dfa->lock);
221	return err != REG_NOERROR;
222	}
223
224	#ifdef _LIBC
225	libc_hidden_def (__regexec)
226
227	# include <shlib-compat.h>
228	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229
230	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231	__typeof__ (__regexec) __compat_regexec;
232
233	int
234	attribute_compat_text_section
235	__compat_regexec (const regex_t *__restrict preg,
236	const char *__restrict string, size_t nmatch,
237	regmatch_t pmatch[], int eflags)
238	{
239	return regexec (preg, string, nmatch, pmatch,
240	eflags & (REG_NOTBOL \| REG_NOTEOL));
241	}
242	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243	# endif
244	#endif
245
246	/ Entry points for GNU code. /
247
248	/ re_match, re_search, re_match_2, re_search_2*
249
250	The former two functions operate on STRING with length LENGTH,
251	while the later two operate on concatenation of STRING1 and STRING2
252	with lengths LENGTH1 and LENGTH2, respectively.
253
254	re_match() matches the compiled pattern in BUFP against the string,
255	starting at index START.
256
257	re_search() first tries matching at index START, then it tries to match
258	starting from index START + 1, and so on. The last start position tried
259	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
260	way as re_match().)
261
262	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263	the first STOP characters of the concatenation of the strings should be
264	concerned.
265
266	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267	and all groups is stored in REGS. (For the "_2" variants, the offsets are
268	computed relative to the concatenation, not relative to the individual
269	strings.)
270
271	On success, re_match* functions return the length of the match, re_search*
272	return the position of the start of the match. Return value -1 means no
273	match was found and -2 indicates an internal error. /*
274
275	regoff_t
276	re_match (struct re_pattern_buffer bufp, const* char *string, Idx length,
277	Idx start, struct re_registers *regs)
278	{
279	return re_search_stub (bufp, string, length, start, `0`, length, regs, true);
280	}
281	#ifdef _LIBC
282	weak_alias (__re_match, re_match)
283	#endif
284
285	regoff_t
286	re_search (struct re_pattern_buffer bufp, const* char *string, Idx length,
287	Idx start, regoff_t range, struct re_registers *regs)
288	{
289	return re_search_stub (bufp, string, length, start, range, length, regs,
290	false);
291	}
292	#ifdef _LIBC
293	weak_alias (__re_search, re_search)
294	#endif
295
296	regoff_t
297	re_match_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
298	const char *string2, Idx length2, Idx start,
299	struct re_registers *regs, Idx stop)
300	{
301	return re_search_2_stub (bufp, string1, length1, string2, length2,
302	start, `0`, regs, stop, true);
303	}
304	#ifdef _LIBC
305	weak_alias (__re_match_2, re_match_2)
306	#endif
307
308	regoff_t
309	re_search_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
310	const char *string2, Idx length2, Idx start, regoff_t range,
311	struct re_registers *regs, Idx stop)
312	{
313	return re_search_2_stub (bufp, string1, length1, string2, length2,
314	start, range, regs, stop, false);
315	}
316	#ifdef _LIBC
317	weak_alias (__re_search_2, re_search_2)
318	#endif
319
320	static regoff_t
321	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
322	Idx length1, const char *string2, Idx length2, Idx start,
323	regoff_t range, struct re_registers *regs,
324	Idx stop, bool ret_len)
325	{
326	const char *str;
327	regoff_t rval;
328	Idx len;
329	char *s = NULL;
330
331	if (__glibc_unlikely ((length1 < `0` \|\| length2 < `0` \|\| stop < `0`
332	\|\| INT_ADD_WRAPV (length1, length2, &len))))
333	return -`2`;
334
335	/ Concatenate the strings. /
336	if (length2 > `0`)
337	if (length1 > `0`)
338	{
339	s = re_malloc (char, len);
340
341	if (__glibc_unlikely (s == NULL))
342	return -`2`;
343	#ifdef _LIBC
344	memcpy (__mempcpy (s, string1, length1), string2, length2);
345	#else
346	memcpy (s, string1, length1);
347	memcpy (s + length1, string2, length2);
348	#endif
349	str = s;
350	}
351	else
352	str = string2;
353	else
354	str = string1;
355
356	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
357	ret_len);
358	re_free (s);
359	return rval;
360	}
361
362	/ The parameters have the same meaning as those of re_search.*
363	Additional parameters:
364	If RET_LEN is true the length of the match is returned (re_match style);
365	otherwise the position of the match is returned. /*
366
367	static regoff_t
368	re_search_stub (struct re_pattern_buffer bufp, const* char *string, Idx length,
369	Idx start, regoff_t range, Idx stop, struct re_registers *regs,
370	bool ret_len)
371	{
372	reg_errcode_t result;
373	regmatch_t *pmatch;
374	Idx nregs;
375	regoff_t rval;
376	int eflags = `0`;
377	re_dfa_t *dfa = bufp->buffer;
378	Idx last_start = start + range;
379
380	/ Check for out-of-range. /
381	if (__glibc_unlikely (start < `0` \|\| start > length))
382	return -`1`;
383	if (__glibc_unlikely (length < last_start
384	\|\| (`0` <= range && last_start < start)))
385	last_start = length;
386	else if (__glibc_unlikely (last_start < `0`
387	\|\| (range < `0` && start <= last_start)))
388	last_start = `0`;
389
390	lock_lock (dfa->lock);
391
392	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
393	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
394
395	/ Compile fastmap if we haven't yet. /
396	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
397	re_compile_fastmap (bufp);
398
399	if (__glibc_unlikely (bufp->no_sub))
400	regs = NULL;
401
402	/ We need at least 1 register. /
403	if (regs == NULL)
404	nregs = `1`;
405	else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406	&& regs->num_regs <= bufp->re_nsub))
407	{
408	nregs = regs->num_regs;
409	if (__glibc_unlikely (nregs < `1`))
410	{
411	/ Nothing can be copied to regs. /
412	regs = NULL;
413	nregs = `1`;
414	}
415	}
416	else
417	nregs = bufp->re_nsub + `1`;
418	pmatch = re_malloc (regmatch_t, nregs);
419	if (__glibc_unlikely (pmatch == NULL))
420	{
421	rval = -`2`;
422	goto out;
423	}
424
425	result = re_search_internal (bufp, string, length, start, last_start, stop,
426	nregs, pmatch, eflags);
427
428	rval = `0`;
429
430	/ I hope we needn't fill their regs with -1's when no match was found. /
431	if (result != REG_NOERROR)
432	rval = result == REG_NOMATCH ? -`1` : -`2`;
433	else if (regs != NULL)
434	{
435	/ If caller wants register contents data back, copy them. /
436	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
437	bufp->regs_allocated);
438	if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
439	rval = -`2`;
440	}
441
442	if (__glibc_likely (rval == `0`))
443	{
444	if (ret_len)
445	{
446	assert (pmatch[`0`].rm_so == start);
447	rval = pmatch[`0`].rm_eo - start;
448	}
449	else
450	rval = pmatch[`0`].rm_so;
451	}
452	re_free (pmatch);
453	out:
454	lock_unlock (dfa->lock);
455	return rval;
456	}
457
458	static unsigned
459	re_copy_regs (struct re_registers regs, regmatch_t pmatch, Idx nregs,
460	int regs_allocated)
461	{
462	int rval = REGS_REALLOCATE;
463	Idx i;
464	Idx need_regs = nregs + `1`;
465	/ We need one extra element beyond 'num_regs' for the '-1' marker GNU code*
466	uses. /*
467
468	/ Have the register data arrays been allocated? /
469	if (regs_allocated == REGS_UNALLOCATED)
470	{ / No. So allocate them with malloc. /
471	regs->start = re_malloc (regoff_t, need_regs);
472	if (__glibc_unlikely (regs->start == NULL))
473	return REGS_UNALLOCATED;
474	regs->end = re_malloc (regoff_t, need_regs);
475	if (__glibc_unlikely (regs->end == NULL))
476	{
477	re_free (regs->start);
478	return REGS_UNALLOCATED;
479	}
480	regs->num_regs = need_regs;
481	}
482	else if (regs_allocated == REGS_REALLOCATE)
483	{ / Yes. If we need more elements than were already*
484	allocated, reallocate them. If we need fewer, just
485	leave it alone. /*
486	if (__glibc_unlikely (need_regs > regs->num_regs))
487	{
488	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
489	regoff_t *new_end;
490	if (__glibc_unlikely (new_start == NULL))
491	return REGS_UNALLOCATED;
492	new_end = re_realloc (regs->end, regoff_t, need_regs);
493	if (__glibc_unlikely (new_end == NULL))
494	{
495	re_free (new_start);
496	return REGS_UNALLOCATED;
497	}
498	regs->start = new_start;
499	regs->end = new_end;
500	regs->num_regs = need_regs;
501	}
502	}
503	else
504	{
505	assert (regs_allocated == REGS_FIXED);
506	/ This function may not be called with REGS_FIXED and nregs too big. /
507	assert (regs->num_regs >= nregs);
508	rval = REGS_FIXED;
509	}
510
511	/ Copy the regs. /
512	for (i = `0`; i < nregs; ++i)
513	{
514	regs->start[i] = pmatch[i].rm_so;
515	regs->end[i] = pmatch[i].rm_eo;
516	}
517	for ( ; i < regs->num_regs; ++i)
518	regs->start[i] = regs->end[i] = -`1`;
519
520	return rval;
521	}
522
523	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
524	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
525	this memory for recording register information. STARTS and ENDS
526	must be allocated using the malloc library routine, and must each
527	be at least NUM_REGS sizeof (regoff_t) bytes long.*
528
529	If NUM_REGS == 0, then subsequent matches should allocate their own
530	register data.
531
532	Unless this function is called, the first search or match using
533	PATTERN_BUFFER will allocate its own register data, without
534	freeing the old data. /*
535
536	void
537	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
538	__re_size_t num_regs, regoff_t starts, regoff_t ends)
539	{
540	if (num_regs)
541	{
542	bufp->regs_allocated = REGS_REALLOCATE;
543	regs->num_regs = num_regs;
544	regs->start = starts;
545	regs->end = ends;
546	}
547	else
548	{
549	bufp->regs_allocated = REGS_UNALLOCATED;
550	regs->num_regs = `0`;
551	regs->start = regs->end = NULL;
552	}
553	}
554	#ifdef _LIBC
555	weak_alias (__re_set_registers, re_set_registers)
556	#endif
557
558	/ Entry points compatible with 4.2 BSD regex library. We don't define*
559	them unless specifically requested. /*
560
561	#if defined _REGEX_RE_COMP \|\| defined _LIBC
562	int
563	# ifdef _LIBC
564	weak_function
565	# endif
566	re_exec (const char *s)
567	{
568	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
569	}
570	#endif /* _REGEX_RE_COMP */
571
572	/ Internal entry point. /
573
574	/ Searches for a compiled pattern PREG in the string STRING, whose*
575	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
576	meaning as with regexec. LAST_START is START + RANGE, where
577	START and RANGE have the same meaning as with re_search.
578	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
579	otherwise return the error code.
580	Note: We assume front end functions already check ranges.
581	(0 <= LAST_START && LAST_START <= LENGTH) /*
582
583	static reg_errcode_t
584	__attribute_warn_unused_result__
585	re_search_internal (const regex_t preg, const* char *string, Idx length,
586	Idx start, Idx last_start, Idx stop, size_t nmatch,
587	regmatch_t pmatch[], int eflags)
588	{
589	reg_errcode_t err;
590	const re_dfa_t *dfa = preg->buffer;
591	Idx left_lim, right_lim;
592	int incr;
593	bool fl_longest_match;
594	int match_kind;
595	Idx match_first;
596	Idx match_last = -`1`;
597	Idx extra_nmatch;
598	bool sb;
599	int ch;
600	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
601	re_match_context_t mctx = { .dfa = dfa };
602	#else
603	re_match_context_t mctx;
604	#endif
605	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
606	&& start != last_start && !preg->can_be_null)
607	? preg->fastmap : NULL);
608	RE_TRANSLATE_TYPE t = preg->translate;
609
610	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
611	memset (&mctx, `'\0'`, sizeof (re_match_context_t));
612	mctx.dfa = dfa;
613	#endif
614
615	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
616	nmatch -= extra_nmatch;
617
618	/ Check if the DFA haven't been compiled. /
619	if (__glibc_unlikely (preg->used == `0` \|\| dfa->init_state == NULL
620	\|\| dfa->init_state_word == NULL
621	\|\| dfa->init_state_nl == NULL
622	\|\| dfa->init_state_begbuf == NULL))
623	return REG_NOMATCH;
624
625	#ifdef DEBUG
626	/ We assume front-end functions already check them. /
627	assert (`0` <= last_start && last_start <= length);
628	#endif
629
630	/ If initial states with non-begbuf contexts have no elements,*
631	the regex must be anchored. If preg->newline_anchor is set,
632	we'll never use init_state_nl, so do not check it. /*
633	if (dfa->init_state->nodes.nelem == `0`
634	&& dfa->init_state_word->nodes.nelem == `0`
635	&& (dfa->init_state_nl->nodes.nelem == `0`
636	\|\| !preg->newline_anchor))
637	{
638	if (start != `0` && last_start != `0`)
639	return REG_NOMATCH;
640	start = last_start = `0`;
641	}
642
643	/ We must check the longest matching, if nmatch > 0. /
644	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
645
646	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
647	preg->translate, (preg->syntax & RE_ICASE) != `0`,
648	dfa);
649	if (__glibc_unlikely (err != REG_NOERROR))
650	goto free_return;
651	mctx.input.stop = stop;
652	mctx.input.raw_stop = stop;
653	mctx.input.newline_anchor = preg->newline_anchor;
654
655	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
656	if (__glibc_unlikely (err != REG_NOERROR))
657	goto free_return;
658
659	/ We will log all the DFA states through which the dfa pass,*
660	if nmatch > 1, or this dfa has "multibyte node", which is a
661	back-reference or a node which can accept multibyte character or
662	multi character collating element. /*
663	if (nmatch > `1` \|\| dfa->has_mb_node)
664	{
665	/ Avoid overflow. /
666	if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
667	<= mctx.input.bufs_len)))
668	{
669	err = REG_ESPACE;
670	goto free_return;
671	}
672
673	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
674	if (__glibc_unlikely (mctx.state_log == NULL))
675	{
676	err = REG_ESPACE;
677	goto free_return;
678	}
679	}
680	else
681	mctx.state_log = NULL;
682
683	match_first = start;
684	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
685	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
686
687	/ Check incrementally whether the input string matches. /
688	incr = (last_start < start) ? -`1` : `1`;
689	left_lim = (last_start < start) ? last_start : start;
690	right_lim = (last_start < start) ? start : last_start;
691	sb = dfa->mb_cur_max == `1`;
692	match_kind =
693	(fastmap
694	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
695	\| (start <= last_start ? `2` : `0`)
696	\| (t != NULL ? `1` : `0`))
697	: `8`);
698
699	for (;; match_first += incr)
700	{
701	err = REG_NOMATCH;
702	if (match_first < left_lim \|\| right_lim < match_first)
703	goto free_return;
704
705	/ Advance as rapidly as possible through the string, until we*
706	find a plausible place to start matching. This may be done
707	with varying efficiency, so there are various possibilities:
708	only the most common of them are specialized, in order to
709	save on code size. We use a switch statement for speed. /*
710	switch (match_kind)
711	{
712	case `8`:
713	/ No fastmap. /
714	break;
715
716	case `7`:
717	/ Fastmap with single-byte translation, match forward. /
718	while (__glibc_likely (match_first < right_lim)
719	&& !fastmap[t[(unsigned char) string[match_first]]])
720	++match_first;
721	goto forward_match_found_start_or_reached_end;
722
723	case `6`:
724	/ Fastmap without translation, match forward. /
725	while (__glibc_likely (match_first < right_lim)
726	&& !fastmap[(unsigned char) string[match_first]])
727	++match_first;
728
729	forward_match_found_start_or_reached_end:
730	if (__glibc_unlikely (match_first == right_lim))
731	{
732	ch = match_first >= length
733	? `0` : (unsigned char) string[match_first];
734	if (!fastmap[t ? t[ch] : ch])
735	goto free_return;
736	}
737	break;
738
739	case `4`:
740	case `5`:
741	/ Fastmap without multi-byte translation, match backwards. /
742	while (match_first >= left_lim)
743	{
744	ch = match_first >= length
745	? `0` : (unsigned char) string[match_first];
746	if (fastmap[t ? t[ch] : ch])
747	break;
748	--match_first;
749	}
750	if (match_first < left_lim)
751	goto free_return;
752	break;
753
754	default:
755	/ In this case, we can't determine easily the current byte,*
756	since it might be a component byte of a multibyte
757	character. Then we use the constructed buffer instead. /*
758	for (;;)
759	{
760	/ If MATCH_FIRST is out of the valid range, reconstruct the*
761	buffers. /*
762	__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
763	if (__glibc_unlikely (offset
764	>= (__re_size_t) mctx.input.valid_raw_len))
765	{
766	err = re_string_reconstruct (&mctx.input, match_first,
767	eflags);
768	if (__glibc_unlikely (err != REG_NOERROR))
769	goto free_return;
770
771	offset = match_first - mctx.input.raw_mbs_idx;
772	}
773	/ If MATCH_FIRST is out of the buffer, leave it as '\0'.*
774	Note that MATCH_FIRST must not be smaller than 0. /*
775	ch = (match_first >= length
776	? `0` : re_string_byte_at (&mctx.input, offset));
777	if (fastmap[ch])
778	break;
779	match_first += incr;
780	if (match_first < left_lim \|\| match_first > right_lim)
781	{
782	err = REG_NOMATCH;
783	goto free_return;
784	}
785	}
786	break;
787	}
788
789	/ Reconstruct the buffers so that the matcher can assume that*
790	the matching starts from the beginning of the buffer. /*
791	err = re_string_reconstruct (&mctx.input, match_first, eflags);
792	if (__glibc_unlikely (err != REG_NOERROR))
793	goto free_return;
794
795	#ifdef RE_ENABLE_I18N
796	/ Don't consider this char as a possible match start if it part,*
797	yet isn't the head, of a multibyte character. /*
798	if (!sb && !re_string_first_byte (&mctx.input, `0`))
799	continue;
800	#endif
801
802	/ It seems to be appropriate one, then use the matcher. /
803	/ We assume that the matching starts from 0. /
804	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
805	match_last = check_matching (&mctx, fl_longest_match,
806	start <= last_start ? &match_first : NULL);
807	if (match_last != -`1`)
808	{
809	if (__glibc_unlikely (match_last == -`2`))
810	{
811	err = REG_ESPACE;
812	goto free_return;
813	}
814	else
815	{
816	mctx.match_last = match_last;
817	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
818	{
819	re_dfastate_t *pstate = mctx.state_log[match_last];
820	mctx.last_node = check_halt_state_context (&mctx, pstate,
821	match_last);
822	}
823	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
824	\|\| dfa->nbackref)
825	{
826	err = prune_impossible_nodes (&mctx);
827	if (err == REG_NOERROR)
828	break;
829	if (__glibc_unlikely (err != REG_NOMATCH))
830	goto free_return;
831	match_last = -`1`;
832	}
833	else
834	break; / We found a match. /
835	}
836	}
837
838	match_ctx_clean (&mctx);
839	}
840
841	#ifdef DEBUG
842	assert (match_last != -`1`);
843	assert (err == REG_NOERROR);
844	#endif
845
846	/ Set pmatch[] if we need. /
847	if (nmatch > `0`)
848	{
849	Idx reg_idx;
850
851	/ Initialize registers. /
852	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
853	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
854
855	/ Set the points where matching start/end. /
856	pmatch[`0`].rm_so = `0`;
857	pmatch[`0`].rm_eo = mctx.match_last;
858	/ FIXME: This function should fail if mctx.match_last exceeds*
859	the maximum possible regoff_t value. We need a new error
860	code REG_OVERFLOW. /*
861
862	if (!preg->no_sub && nmatch > `1`)
863	{
864	err = set_regs (preg, &mctx, nmatch, pmatch,
865	dfa->has_plural_match && dfa->nbackref > `0`);
866	if (__glibc_unlikely (err != REG_NOERROR))
867	goto free_return;
868	}
869
870	/ At last, add the offset to each register, since we slid*
871	the buffers so that we could assume that the matching starts
872	from 0. /*
873	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
874	if (pmatch[reg_idx].rm_so != -`1`)
875	{
876	#ifdef RE_ENABLE_I18N
877	if (__glibc_unlikely (mctx.input.offsets_needed != `0`))
878	{
879	pmatch[reg_idx].rm_so =
880	(pmatch[reg_idx].rm_so == mctx.input.valid_len
881	? mctx.input.valid_raw_len
882	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
883	pmatch[reg_idx].rm_eo =
884	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
885	? mctx.input.valid_raw_len
886	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
887	}
888	#else
889	assert (mctx.input.offsets_needed == `0`);
890	#endif
891	pmatch[reg_idx].rm_so += match_first;
892	pmatch[reg_idx].rm_eo += match_first;
893	}
894	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
895	{
896	pmatch[nmatch + reg_idx].rm_so = -`1`;
897	pmatch[nmatch + reg_idx].rm_eo = -`1`;
898	}
899
900	if (dfa->subexp_map)
901	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
902	if (dfa->subexp_map[reg_idx] != reg_idx)
903	{
904	pmatch[reg_idx + `1`].rm_so
905	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
906	pmatch[reg_idx + `1`].rm_eo
907	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
908	}
909	}
910
911	free_return:
912	re_free (mctx.state_log);
913	if (dfa->nbackref)
914	match_ctx_free (&mctx);
915	re_string_destruct (&mctx.input);
916	return err;
917	}
918
919	static reg_errcode_t
920	__attribute_warn_unused_result__
921	prune_impossible_nodes (re_match_context_t *mctx)
922	{
923	const re_dfa_t *const dfa = mctx->dfa;
924	Idx halt_node, match_last;
925	reg_errcode_t ret;
926	re_dfastate_t **sifted_states;
927	re_dfastate_t **lim_states = NULL;
928	re_sift_context_t sctx;
929	#ifdef DEBUG
930	assert (mctx->state_log != NULL);
931	#endif
932	match_last = mctx->match_last;
933	halt_node = mctx->last_node;
934
935	/ Avoid overflow. /
936	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
937	<= match_last))
938	return REG_ESPACE;
939
940	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
941	if (__glibc_unlikely (sifted_states == NULL))
942	{
943	ret = REG_ESPACE;
944	goto free_return;
945	}
946	if (dfa->nbackref)
947	{
948	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
949	if (__glibc_unlikely (lim_states == NULL))
950	{
951	ret = REG_ESPACE;
952	goto free_return;
953	}
954	while (`1`)
955	{
956	memset (lim_states, `'\0'`,
957	sizeof (re_dfastate_t ) (match_last + `1`));
958	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
959	match_last);
960	ret = sift_states_backward (mctx, &sctx);
961	re_node_set_free (&sctx.limits);
962	if (__glibc_unlikely (ret != REG_NOERROR))
963	goto free_return;
964	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
965	break;
966	do
967	{
968	--match_last;
969	if (match_last < `0`)
970	{
971	ret = REG_NOMATCH;
972	goto free_return;
973	}
974	} while (mctx->state_log[match_last] == NULL
975	\|\| !mctx->state_log[match_last]->halt);
976	halt_node = check_halt_state_context (mctx,
977	mctx->state_log[match_last],
978	match_last);
979	}
980	ret = merge_state_array (dfa, sifted_states, lim_states,
981	match_last + `1`);
982	re_free (lim_states);
983	lim_states = NULL;
984	if (__glibc_unlikely (ret != REG_NOERROR))
985	goto free_return;
986	}
987	else
988	{
989	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
990	ret = sift_states_backward (mctx, &sctx);
991	re_node_set_free (&sctx.limits);
992	if (__glibc_unlikely (ret != REG_NOERROR))
993	goto free_return;
994	if (sifted_states[`0`] == NULL)
995	{
996	ret = REG_NOMATCH;
997	goto free_return;
998	}
999	}
1000	re_free (mctx->state_log);
1001	mctx->state_log = sifted_states;
1002	sifted_states = NULL;
1003	mctx->last_node = halt_node;
1004	mctx->match_last = match_last;
1005	ret = REG_NOERROR;
1006	free_return:
1007	re_free (sifted_states);
1008	re_free (lim_states);
1009	return ret;
1010	}
1011
1012	/ Acquire an initial state and return it.*
1013	We must select appropriate initial state depending on the context,
1014	since initial states may have constraints like "\<", "^", etc.. /*
1015
1016	static inline re_dfastate_t *
1017	__attribute__ ((always_inline))
1018	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1019	Idx idx)
1020	{
1021	const re_dfa_t *const dfa = mctx->dfa;
1022	if (dfa->init_state->has_constraint)
1023	{
1024	unsigned int context;
1025	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1026	if (IS_WORD_CONTEXT (context))
1027	return dfa->init_state_word;
1028	else if (IS_ORDINARY_CONTEXT (context))
1029	return dfa->init_state;
1030	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1031	return dfa->init_state_begbuf;
1032	else if (IS_NEWLINE_CONTEXT (context))
1033	return dfa->init_state_nl;
1034	else if (IS_BEGBUF_CONTEXT (context))
1035	{
1036	/ It is relatively rare case, then calculate on demand. /
1037	return re_acquire_state_context (err, dfa,
1038	dfa->init_state->entrance_nodes,
1039	context);
1040	}
1041	else
1042	/ Must not happen? /
1043	return dfa->init_state;
1044	}
1045	else
1046	return dfa->init_state;
1047	}
1048
1049	/ Check whether the regular expression match input string INPUT or not,*
1050	and return the index where the matching end. Return -1 if
1051	there is no match, and return -2 in case of an error.
1052	FL_LONGEST_MATCH means we want the POSIX longest matching.
1053	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1054	next place where we may want to try matching.
1055	Note that the matcher assumes that the matching starts from the current
1056	index of the buffer. /*
1057
1058	static Idx
1059	__attribute_warn_unused_result__
1060	check_matching (re_match_context_t *mctx, bool fl_longest_match,
1061	Idx *p_match_first)
1062	{
1063	const re_dfa_t *const dfa = mctx->dfa;
1064	reg_errcode_t err;
1065	Idx match = `0`;
1066	Idx match_last = -`1`;
1067	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1068	re_dfastate_t *cur_state;
1069	bool at_init_state = p_match_first != NULL;
1070	Idx next_start_idx = cur_str_idx;
1071
1072	err = REG_NOERROR;
1073	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1074	/ An initial state must not be NULL (invalid). /
1075	if (__glibc_unlikely (cur_state == NULL))
1076	{
1077	assert (err == REG_ESPACE);
1078	return -`2`;
1079	}
1080
1081	if (mctx->state_log != NULL)
1082	{
1083	mctx->state_log[cur_str_idx] = cur_state;
1084
1085	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1086	later. E.g. Processing back references. /*
1087	if (__glibc_unlikely (dfa->nbackref))
1088	{
1089	at_init_state = false;
1090	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1091	if (__glibc_unlikely (err != REG_NOERROR))
1092	return err;
1093
1094	if (cur_state->has_backref)
1095	{
1096	err = transit_state_bkref (mctx, &cur_state->nodes);
1097	if (__glibc_unlikely (err != REG_NOERROR))
1098	return err;
1099	}
1100	}
1101	}
1102
1103	/ If the RE accepts NULL string. /
1104	if (__glibc_unlikely (cur_state->halt))
1105	{
1106	if (!cur_state->has_constraint
1107	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1108	{
1109	if (!fl_longest_match)
1110	return cur_str_idx;
1111	else
1112	{
1113	match_last = cur_str_idx;
1114	match = `1`;
1115	}
1116	}
1117	}
1118
1119	while (!re_string_eoi (&mctx->input))
1120	{
1121	re_dfastate_t *old_state = cur_state;
1122	Idx next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1123
1124	if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1125	&& mctx->input.bufs_len < mctx->input.len)
1126	\|\| (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1127	&& mctx->input.valid_len < mctx->input.len))
1128	{
1129	err = extend_buffers (mctx, next_char_idx + `1`);
1130	if (__glibc_unlikely (err != REG_NOERROR))
1131	{
1132	assert (err == REG_ESPACE);
1133	return -`2`;
1134	}
1135	}
1136
1137	cur_state = transit_state (&err, mctx, cur_state);
1138	if (mctx->state_log != NULL)
1139	cur_state = merge_state_with_log (&err, mctx, cur_state);
1140
1141	if (cur_state == NULL)
1142	{
1143	/ Reached the invalid state or an error. Try to recover a valid*
1144	state using the state log, if available and if we have not
1145	already found a valid (even if not the longest) match. /*
1146	if (__glibc_unlikely (err != REG_NOERROR))
1147	return -`2`;
1148
1149	if (mctx->state_log == NULL
1150	\|\| (match && !fl_longest_match)
1151	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1152	break;
1153	}
1154
1155	if (__glibc_unlikely (at_init_state))
1156	{
1157	if (old_state == cur_state)
1158	next_start_idx = next_char_idx;
1159	else
1160	at_init_state = false;
1161	}
1162
1163	if (cur_state->halt)
1164	{
1165	/ Reached a halt state.*
1166	Check the halt state can satisfy the current context. /*
1167	if (!cur_state->has_constraint
1168	\|\| check_halt_state_context (mctx, cur_state,
1169	re_string_cur_idx (&mctx->input)))
1170	{
1171	/ We found an appropriate halt state. /
1172	match_last = re_string_cur_idx (&mctx->input);
1173	match = `1`;
1174
1175	/ We found a match, do not modify match_first below. /
1176	p_match_first = NULL;
1177	if (!fl_longest_match)
1178	break;
1179	}
1180	}
1181	}
1182
1183	if (p_match_first)
1184	*p_match_first += next_start_idx;
1185
1186	return match_last;
1187	}
1188
1189	/ Check NODE match the current context. /
1190
1191	static bool
1192	check_halt_node_context (const re_dfa_t dfa, Idx node, unsigned* int context)
1193	{
1194	re_token_type_t type = dfa->nodes[node].type;
1195	unsigned int constraint = dfa->nodes[node].constraint;
1196	if (type != END_OF_RE)
1197	return false;
1198	if (!constraint)
1199	return true;
1200	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1201	return false;
1202	return true;
1203	}
1204
1205	/ Check the halt state STATE match the current context.*
1206	Return 0 if not match, if the node, STATE has, is a halt node and
1207	match the context, return the node. /*
1208
1209	static Idx
1210	check_halt_state_context (const re_match_context_t *mctx,
1211	const re_dfastate_t *state, Idx idx)
1212	{
1213	Idx i;
1214	unsigned int context;
1215	#ifdef DEBUG
1216	assert (state->halt);
1217	#endif
1218	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1219	for (i = `0`; i < state->nodes.nelem; ++i)
1220	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1221	return state->nodes.elems[i];
1222	return `0`;
1223	}
1224
1225	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1226	corresponding to the DFA).
1227	Return the destination node, and update EPS_VIA_NODES;
1228	return -1 in case of errors. /*
1229
1230	static Idx
1231	proceed_next_node (const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1232	Idx pidx, Idx node, re_node_set eps_via_nodes,
1233	struct re_fail_stack_t *fs)
1234	{
1235	const re_dfa_t *const dfa = mctx->dfa;
1236	Idx i;
1237	bool ok;
1238	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1239	{
1240	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1241	re_node_set *edests = &dfa->edests[node];
1242	Idx dest_node;
1243	ok = re_node_set_insert (eps_via_nodes, node);
1244	if (__glibc_unlikely (! ok))
1245	return -`2`;
1246	/ Pick up a valid destination, or return -1 if none*
1247	is found. /*
1248	for (dest_node = -`1`, i = `0`; i < edests->nelem; ++i)
1249	{
1250	Idx candidate = edests->elems[i];
1251	if (!re_node_set_contains (cur_nodes, candidate))
1252	continue;
1253	if (dest_node == -`1`)
1254	dest_node = candidate;
1255
1256	else
1257	{
1258	/ In order to avoid infinite loop like "(a)", return the second*
1259	epsilon-transition if the first was already considered. /*
1260	if (re_node_set_contains (eps_via_nodes, dest_node))
1261	return candidate;
1262
1263	/ Otherwise, push the second epsilon-transition on the fail stack. /
1264	else if (fs != NULL
1265	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1266	eps_via_nodes))
1267	return -`2`;
1268
1269	/ We know we are going to exit. /
1270	break;
1271	}
1272	}
1273	return dest_node;
1274	}
1275	else
1276	{
1277	Idx naccepted = `0`;
1278	re_token_type_t type = dfa->nodes[node].type;
1279
1280	#ifdef RE_ENABLE_I18N
1281	if (dfa->nodes[node].accept_mb)
1282	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1283	else
1284	#endif /* RE_ENABLE_I18N */
1285	if (type == OP_BACK_REF)
1286	{
1287	Idx subexp_idx = dfa->nodes[node].opr.idx + `1`;
1288	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1289	if (fs != NULL)
1290	{
1291	if (regs[subexp_idx].rm_so == -`1` \|\| regs[subexp_idx].rm_eo == -`1`)
1292	return -`1`;
1293	else if (naccepted)
1294	{
1295	char buf = (char* *) re_string_get_buffer (&mctx->input);
1296	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1297	naccepted) != `0`)
1298	return -`1`;
1299	}
1300	}
1301
1302	if (naccepted == `0`)
1303	{
1304	Idx dest_node;
1305	ok = re_node_set_insert (eps_via_nodes, node);
1306	if (__glibc_unlikely (! ok))
1307	return -`2`;
1308	dest_node = dfa->edests[node].elems[`0`];
1309	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1310	dest_node))
1311	return dest_node;
1312	}
1313	}
1314
1315	if (naccepted != `0`
1316	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1317	{
1318	Idx dest_node = dfa->nexts[node];
1319	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1320	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1321	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1322	dest_node)))
1323	return -`1`;
1324	re_node_set_empty (eps_via_nodes);
1325	return dest_node;
1326	}
1327	}
1328	return -`1`;
1329	}
1330
1331	static reg_errcode_t
1332	__attribute_warn_unused_result__
1333	push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1334	Idx nregs, regmatch_t regs, re_node_set eps_via_nodes)
1335	{
1336	reg_errcode_t err;
1337	Idx num = fs->num++;
1338	if (fs->num == fs->alloc)
1339	{
1340	struct re_fail_stack_ent_t *new_array;
1341	new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1342	fs->alloc * `2`);
1343	if (new_array == NULL)
1344	return REG_ESPACE;
1345	fs->alloc *= `2`;
1346	fs->stack = new_array;
1347	}
1348	fs->stack[num].idx = str_idx;
1349	fs->stack[num].node = dest_node;
1350	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1351	if (fs->stack[num].regs == NULL)
1352	return REG_ESPACE;
1353	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1354	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1355	return err;
1356	}
1357
1358	static Idx
1359	pop_fail_stack (struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1360	regmatch_t regs, re_node_set eps_via_nodes)
1361	{
1362	Idx num = --fs->num;
1363	assert (num >= `0`);
1364	*pidx = fs->stack[num].idx;
1365	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1366	re_node_set_free (eps_via_nodes);
1367	re_free (fs->stack[num].regs);
1368	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1369	return fs->stack[num].node;
1370	}
1371
1372	/ Set the positions where the subexpressions are starts/ends to registers*
1373	PMATCH.
1374	Note: We assume that pmatch[0] is already set, and
1375	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1376
1377	static reg_errcode_t
1378	__attribute_warn_unused_result__
1379	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1380	regmatch_t *pmatch, bool fl_backtrack)
1381	{
1382	const re_dfa_t *dfa = preg->buffer;
1383	Idx idx, cur_node;
1384	re_node_set eps_via_nodes;
1385	struct re_fail_stack_t *fs;
1386	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1387	regmatch_t *prev_idx_match;
1388	bool prev_idx_match_malloced = false;
1389
1390	#ifdef DEBUG
1391	assert (nmatch > `1`);
1392	assert (mctx->state_log != NULL);
1393	#endif
1394	if (fl_backtrack)
1395	{
1396	fs = &fs_body;
1397	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1398	if (fs->stack == NULL)
1399	return REG_ESPACE;
1400	}
1401	else
1402	fs = NULL;
1403
1404	cur_node = dfa->init_node;
1405	re_node_set_init_empty (&eps_via_nodes);
1406
1407	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1408	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1409	else
1410	{
1411	prev_idx_match = re_malloc (regmatch_t, nmatch);
1412	if (prev_idx_match == NULL)
1413	{
1414	free_fail_stack_return (fs);
1415	return REG_ESPACE;
1416	}
1417	prev_idx_match_malloced = true;
1418	}
1419	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1420
1421	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1422	{
1423	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1424
1425	if (idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1426	{
1427	Idx reg_idx;
1428	if (fs)
1429	{
1430	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1431	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1432	break;
1433	if (reg_idx == nmatch)
1434	{
1435	re_node_set_free (&eps_via_nodes);
1436	if (prev_idx_match_malloced)
1437	re_free (prev_idx_match);
1438	return free_fail_stack_return (fs);
1439	}
1440	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1441	&eps_via_nodes);
1442	}
1443	else
1444	{
1445	re_node_set_free (&eps_via_nodes);
1446	if (prev_idx_match_malloced)
1447	re_free (prev_idx_match);
1448	return REG_NOERROR;
1449	}
1450	}
1451
1452	/ Proceed to next node. /
1453	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1454	&eps_via_nodes, fs);
1455
1456	if (__glibc_unlikely (cur_node < `0`))
1457	{
1458	if (__glibc_unlikely (cur_node == -`2`))
1459	{
1460	re_node_set_free (&eps_via_nodes);
1461	if (prev_idx_match_malloced)
1462	re_free (prev_idx_match);
1463	free_fail_stack_return (fs);
1464	return REG_ESPACE;
1465	}
1466	if (fs)
1467	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1468	&eps_via_nodes);
1469	else
1470	{
1471	re_node_set_free (&eps_via_nodes);
1472	if (prev_idx_match_malloced)
1473	re_free (prev_idx_match);
1474	return REG_NOMATCH;
1475	}
1476	}
1477	}
1478	re_node_set_free (&eps_via_nodes);
1479	if (prev_idx_match_malloced)
1480	re_free (prev_idx_match);
1481	return free_fail_stack_return (fs);
1482	}
1483
1484	static reg_errcode_t
1485	free_fail_stack_return (struct re_fail_stack_t *fs)
1486	{
1487	if (fs)
1488	{
1489	Idx fs_idx;
1490	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1491	{
1492	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1493	re_free (fs->stack[fs_idx].regs);
1494	}
1495	re_free (fs->stack);
1496	}
1497	return REG_NOERROR;
1498	}
1499
1500	static void
1501	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1502	regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1503	{
1504	int type = dfa->nodes[cur_node].type;
1505	if (type == OP_OPEN_SUBEXP)
1506	{
1507	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1508
1509	/ We are at the first node of this sub expression. /
1510	if (reg_num < nmatch)
1511	{
1512	pmatch[reg_num].rm_so = cur_idx;
1513	pmatch[reg_num].rm_eo = -`1`;
1514	}
1515	}
1516	else if (type == OP_CLOSE_SUBEXP)
1517	{
1518	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1519	if (reg_num < nmatch)
1520	{
1521	/ We are at the last node of this sub expression. /
1522	if (pmatch[reg_num].rm_so < cur_idx)
1523	{
1524	pmatch[reg_num].rm_eo = cur_idx;
1525	/ This is a non-empty match or we are not inside an optional*
1526	subexpression. Accept this right away. /*
1527	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1528	}
1529	else
1530	{
1531	if (dfa->nodes[cur_node].opt_subexp
1532	&& prev_idx_match[reg_num].rm_so != -`1`)
1533	/ We transited through an empty match for an optional*
1534	subexpression, like (a?), and this is not the subexp's*
1535	first match. Copy back the old content of the registers
1536	so that matches of an inner subexpression are undone as
1537	well, like in ((a?)). /
1538	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1539	else
1540	/ We completed a subexpression, but it may be part of*
1541	an optional one, so do not update PREV_IDX_MATCH. /*
1542	pmatch[reg_num].rm_eo = cur_idx;
1543	}
1544	}
1545	}
1546	}
1547
1548	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1549	and sift the nodes in each states according to the following rules.
1550	Updated state_log will be wrote to STATE_LOG.
1551
1552	Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1553	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1554	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1555	the LAST_NODE, we throw away the node 'a'.
1556	2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1557	string 's' and transit to 'b':
1558	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1559	away the node 'a'.
1560	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1561	thrown away, we throw away the node 'a'.
1562	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1563	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1564	node 'a'.
1565	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1566	we throw away the node 'a'. /*
1567
1568	#define STATE_NODE_CONTAINS(state,node) \
1569	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1570
1571	static reg_errcode_t
1572	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1573	{
1574	reg_errcode_t err;
1575	int null_cnt = `0`;
1576	Idx str_idx = sctx->last_str_idx;
1577	re_node_set cur_dest;
1578
1579	#ifdef DEBUG
1580	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1581	#endif
1582
1583	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1584	transit to the last_node and the last_node itself. /*
1585	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1586	if (__glibc_unlikely (err != REG_NOERROR))
1587	return err;
1588	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1589	if (__glibc_unlikely (err != REG_NOERROR))
1590	goto free_return;
1591
1592	/ Then check each states in the state_log. /
1593	while (str_idx > `0`)
1594	{
1595	/ Update counters. /
1596	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1597	if (null_cnt > mctx->max_mb_elem_len)
1598	{
1599	memset (sctx->sifted_states, `'\0'`,
1600	sizeof (re_dfastate_t ) str_idx);
1601	re_node_set_free (&cur_dest);
1602	return REG_NOERROR;
1603	}
1604	re_node_set_empty (&cur_dest);
1605	--str_idx;
1606
1607	if (mctx->state_log[str_idx])
1608	{
1609	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1610	if (__glibc_unlikely (err != REG_NOERROR))
1611	goto free_return;
1612	}
1613
1614	/ Add all the nodes which satisfy the following conditions:*
1615	- It can epsilon transit to a node in CUR_DEST.
1616	- It is in CUR_SRC.
1617	And update state_log. /*
1618	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1619	if (__glibc_unlikely (err != REG_NOERROR))
1620	goto free_return;
1621	}
1622	err = REG_NOERROR;
1623	free_return:
1624	re_node_set_free (&cur_dest);
1625	return err;
1626	}
1627
1628	static reg_errcode_t
1629	__attribute_warn_unused_result__
1630	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1631	Idx str_idx, re_node_set *cur_dest)
1632	{
1633	const re_dfa_t *const dfa = mctx->dfa;
1634	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1635	Idx i;
1636
1637	/ Then build the next sifted state.*
1638	We build the next sifted state on 'cur_dest', and update
1639	'sifted_states[str_idx]' with 'cur_dest'.
1640	Note:
1641	'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1642	'cur_src' points the node_set of the old 'state_log[str_idx]'
1643	(with the epsilon nodes pre-filtered out). /*
1644	for (i = `0`; i < cur_src->nelem; i++)
1645	{
1646	Idx prev_node = cur_src->elems[i];
1647	int naccepted = `0`;
1648	bool ok;
1649
1650	#ifdef DEBUG
1651	re_token_type_t type = dfa->nodes[prev_node].type;
1652	assert (!IS_EPSILON_NODE (type));
1653	#endif
1654	#ifdef RE_ENABLE_I18N
1655	/ If the node may accept "multi byte". /
1656	if (dfa->nodes[prev_node].accept_mb)
1657	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1658	str_idx, sctx->last_str_idx);
1659	#endif /* RE_ENABLE_I18N */
1660
1661	/ We don't check backreferences here.*
1662	See update_cur_sifted_state(). /*
1663	if (!naccepted
1664	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1665	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1666	dfa->nexts[prev_node]))
1667	naccepted = `1`;
1668
1669	if (naccepted == `0`)
1670	continue;
1671
1672	if (sctx->limits.nelem)
1673	{
1674	Idx to_idx = str_idx + naccepted;
1675	if (check_dst_limits (mctx, &sctx->limits,
1676	dfa->nexts[prev_node], to_idx,
1677	prev_node, str_idx))
1678	continue;
1679	}
1680	ok = re_node_set_insert (cur_dest, prev_node);
1681	if (__glibc_unlikely (! ok))
1682	return REG_ESPACE;
1683	}
1684
1685	return REG_NOERROR;
1686	}
1687
1688	/ Helper functions. /
1689
1690	static reg_errcode_t
1691	clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1692	{
1693	Idx top = mctx->state_log_top;
1694
1695	if ((next_state_log_idx >= mctx->input.bufs_len
1696	&& mctx->input.bufs_len < mctx->input.len)
1697	\|\| (next_state_log_idx >= mctx->input.valid_len
1698	&& mctx->input.valid_len < mctx->input.len))
1699	{
1700	reg_errcode_t err;
1701	err = extend_buffers (mctx, next_state_log_idx + `1`);
1702	if (__glibc_unlikely (err != REG_NOERROR))
1703	return err;
1704	}
1705
1706	if (top < next_state_log_idx)
1707	{
1708	memset (mctx->state_log + top + `1`, `'\0'`,
1709	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1710	mctx->state_log_top = next_state_log_idx;
1711	}
1712	return REG_NOERROR;
1713	}
1714
1715	static reg_errcode_t
1716	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1717	re_dfastate_t **src, Idx num)
1718	{
1719	Idx st_idx;
1720	reg_errcode_t err;
1721	for (st_idx = `0`; st_idx < num; ++st_idx)
1722	{
1723	if (dst[st_idx] == NULL)
1724	dst[st_idx] = src[st_idx];
1725	else if (src[st_idx] != NULL)
1726	{
1727	re_node_set merged_set;
1728	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1729	&src[st_idx]->nodes);
1730	if (__glibc_unlikely (err != REG_NOERROR))
1731	return err;
1732	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1733	re_node_set_free (&merged_set);
1734	if (__glibc_unlikely (err != REG_NOERROR))
1735	return err;
1736	}
1737	}
1738	return REG_NOERROR;
1739	}
1740
1741	static reg_errcode_t
1742	update_cur_sifted_state (const re_match_context_t *mctx,
1743	re_sift_context_t *sctx, Idx str_idx,
1744	re_node_set *dest_nodes)
1745	{
1746	const re_dfa_t *const dfa = mctx->dfa;
1747	reg_errcode_t err = REG_NOERROR;
1748	const re_node_set *candidates;
1749	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1750	: &mctx->state_log[str_idx]->nodes);
1751
1752	if (dest_nodes->nelem == `0`)
1753	sctx->sifted_states[str_idx] = NULL;
1754	else
1755	{
1756	if (candidates)
1757	{
1758	/ At first, add the nodes which can epsilon transit to a node in*
1759	DEST_NODE. /*
1760	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1761	if (__glibc_unlikely (err != REG_NOERROR))
1762	return err;
1763
1764	/ Then, check the limitations in the current sift_context. /
1765	if (sctx->limits.nelem)
1766	{
1767	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1768	mctx->bkref_ents, str_idx);
1769	if (__glibc_unlikely (err != REG_NOERROR))
1770	return err;
1771	}
1772	}
1773
1774	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1775	if (__glibc_unlikely (err != REG_NOERROR))
1776	return err;
1777	}
1778
1779	if (candidates && mctx->state_log[str_idx]->has_backref)
1780	{
1781	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1782	if (__glibc_unlikely (err != REG_NOERROR))
1783	return err;
1784	}
1785	return REG_NOERROR;
1786	}
1787
1788	static reg_errcode_t
1789	__attribute_warn_unused_result__
1790	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1791	const re_node_set *candidates)
1792	{
1793	reg_errcode_t err = REG_NOERROR;
1794	Idx i;
1795
1796	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1797	if (__glibc_unlikely (err != REG_NOERROR))
1798	return err;
1799
1800	if (!state->inveclosure.alloc)
1801	{
1802	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1803	if (__glibc_unlikely (err != REG_NOERROR))
1804	return REG_ESPACE;
1805	for (i = `0`; i < dest_nodes->nelem; i++)
1806	{
1807	err = re_node_set_merge (&state->inveclosure,
1808	dfa->inveclosures + dest_nodes->elems[i]);
1809	if (__glibc_unlikely (err != REG_NOERROR))
1810	return REG_ESPACE;
1811	}
1812	}
1813	return re_node_set_add_intersect (dest_nodes, candidates,
1814	&state->inveclosure);
1815	}
1816
1817	static reg_errcode_t
1818	sub_epsilon_src_nodes (const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1819	const re_node_set *candidates)
1820	{
1821	Idx ecl_idx;
1822	reg_errcode_t err;
1823	re_node_set *inv_eclosure = dfa->inveclosures + node;
1824	re_node_set except_nodes;
1825	re_node_set_init_empty (&except_nodes);
1826	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1827	{
1828	Idx cur_node = inv_eclosure->elems[ecl_idx];
1829	if (cur_node == node)
1830	continue;
1831	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1832	{
1833	Idx edst1 = dfa->edests[cur_node].elems[`0`];
1834	Idx edst2 = ((dfa->edests[cur_node].nelem > `1`)
1835	? dfa->edests[cur_node].elems[`1`] : -`1`);
1836	if ((!re_node_set_contains (inv_eclosure, edst1)
1837	&& re_node_set_contains (dest_nodes, edst1))
1838	\|\| (edst2 > `0`
1839	&& !re_node_set_contains (inv_eclosure, edst2)
1840	&& re_node_set_contains (dest_nodes, edst2)))
1841	{
1842	err = re_node_set_add_intersect (&except_nodes, candidates,
1843	dfa->inveclosures + cur_node);
1844	if (__glibc_unlikely (err != REG_NOERROR))
1845	{
1846	re_node_set_free (&except_nodes);
1847	return err;
1848	}
1849	}
1850	}
1851	}
1852	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1853	{
1854	Idx cur_node = inv_eclosure->elems[ecl_idx];
1855	if (!re_node_set_contains (&except_nodes, cur_node))
1856	{
1857	Idx idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1858	re_node_set_remove_at (dest_nodes, idx);
1859	}
1860	}
1861	re_node_set_free (&except_nodes);
1862	return REG_NOERROR;
1863	}
1864
1865	static bool
1866	check_dst_limits (const re_match_context_t mctx, const* re_node_set *limits,
1867	Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1868	{
1869	const re_dfa_t *const dfa = mctx->dfa;
1870	Idx lim_idx, src_pos, dst_pos;
1871
1872	Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1873	Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1874	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1875	{
1876	Idx subexp_idx;
1877	struct re_backref_cache_entry *ent;
1878	ent = mctx->bkref_ents + limits->elems[lim_idx];
1879	subexp_idx = dfa->nodes[ent->node].opr.idx;
1880
1881	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1882	subexp_idx, dst_node, dst_idx,
1883	dst_bkref_idx);
1884	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1885	subexp_idx, src_node, src_idx,
1886	src_bkref_idx);
1887
1888	/ In case of:*
1889	<src> <dst> ( <subexp> )
1890	( <subexp> ) <src> <dst>
1891	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1892	if (src_pos == dst_pos)
1893	continue; / This is unrelated limitation. /
1894	else
1895	return true;
1896	}
1897	return false;
1898	}
1899
1900	static int
1901	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1902	Idx subexp_idx, Idx from_node, Idx bkref_idx)
1903	{
1904	const re_dfa_t *const dfa = mctx->dfa;
1905	const re_node_set *eclosures = dfa->eclosures + from_node;
1906	Idx node_idx;
1907
1908	/ Else, we are on the boundary: examine the nodes on the epsilon*
1909	closure. /*
1910	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1911	{
1912	Idx node = eclosures->elems[node_idx];
1913	switch (dfa->nodes[node].type)
1914	{
1915	case OP_BACK_REF:
1916	if (bkref_idx != -`1`)
1917	{
1918	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1919	do
1920	{
1921	Idx dst;
1922	int cpos;
1923
1924	if (ent->node != node)
1925	continue;
1926
1927	if (subexp_idx < BITSET_WORD_BITS
1928	&& !(ent->eps_reachable_subexps_map
1929	& ((bitset_word_t) `1` << subexp_idx)))
1930	continue;
1931
1932	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1933	OP_CLOSE_SUBEXP cases below. But, if the
1934	destination node is the same node as the source
1935	node, don't recurse because it would cause an
1936	infinite loop: a regex that exhibits this behavior
1937	is ()\1\1* /
1938	dst = dfa->edests[node].elems[`0`];
1939	if (dst == from_node)
1940	{
1941	if (boundaries & `1`)
1942	return -`1`;
1943	else / if (boundaries & 2) /
1944	return `0`;
1945	}
1946
1947	cpos =
1948	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1949	dst, bkref_idx);
1950	if (cpos == -`1` / && (boundaries & 1) /)
1951	return -`1`;
1952	if (cpos == `0` && (boundaries & `2`))
1953	return `0`;
1954
1955	if (subexp_idx < BITSET_WORD_BITS)
1956	ent->eps_reachable_subexps_map
1957	&= ~((bitset_word_t) `1` << subexp_idx);
1958	}
1959	while (ent++->more);
1960	}
1961	break;
1962
1963	case OP_OPEN_SUBEXP:
1964	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1965	return -`1`;
1966	break;
1967
1968	case OP_CLOSE_SUBEXP:
1969	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1970	return `0`;
1971	break;
1972
1973	default:
1974	break;
1975	}
1976	}
1977
1978	return (boundaries & `2`) ? `1` : `0`;
1979	}
1980
1981	static int
1982	check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1983	Idx subexp_idx, Idx from_node, Idx str_idx,
1984	Idx bkref_idx)
1985	{
1986	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1987	int boundaries;
1988
1989	/ If we are outside the range of the subexpression, return -1 or 1. /
1990	if (str_idx < lim->subexp_from)
1991	return -`1`;
1992
1993	if (lim->subexp_to < str_idx)
1994	return `1`;
1995
1996	/ If we are within the subexpression, return 0. /
1997	boundaries = (str_idx == lim->subexp_from);
1998	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1999	if (boundaries == `0`)
2000	return `0`;
2001
2002	/ Else, examine epsilon closure. /
2003	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2004	from_node, bkref_idx);
2005	}
2006
2007	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
2008	which are against limitations from DEST_NODES. /*
2009
2010	static reg_errcode_t
2011	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
2012	const re_node_set candidates, re_node_set limits,
2013	struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2014	{
2015	reg_errcode_t err;
2016	Idx node_idx, lim_idx;
2017
2018	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
2019	{
2020	Idx subexp_idx;
2021	struct re_backref_cache_entry *ent;
2022	ent = bkref_ents + limits->elems[lim_idx];
2023
2024	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2025	continue; / This is unrelated limitation. /
2026
2027	subexp_idx = dfa->nodes[ent->node].opr.idx;
2028	if (ent->subexp_to == str_idx)
2029	{
2030	Idx ops_node = -`1`;
2031	Idx cls_node = -`1`;
2032	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2033	{
2034	Idx node = dest_nodes->elems[node_idx];
2035	re_token_type_t type = dfa->nodes[node].type;
2036	if (type == OP_OPEN_SUBEXP
2037	&& subexp_idx == dfa->nodes[node].opr.idx)
2038	ops_node = node;
2039	else if (type == OP_CLOSE_SUBEXP
2040	&& subexp_idx == dfa->nodes[node].opr.idx)
2041	cls_node = node;
2042	}
2043
2044	/ Check the limitation of the open subexpression. /
2045	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2046	if (ops_node >= `0`)
2047	{
2048	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2049	candidates);
2050	if (__glibc_unlikely (err != REG_NOERROR))
2051	return err;
2052	}
2053
2054	/ Check the limitation of the close subexpression. /
2055	if (cls_node >= `0`)
2056	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2057	{
2058	Idx node = dest_nodes->elems[node_idx];
2059	if (!re_node_set_contains (dfa->inveclosures + node,
2060	cls_node)
2061	&& !re_node_set_contains (dfa->eclosures + node,
2062	cls_node))
2063	{
2064	/ It is against this limitation.*
2065	Remove it form the current sifted state. /*
2066	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2067	candidates);
2068	if (__glibc_unlikely (err != REG_NOERROR))
2069	return err;
2070	--node_idx;
2071	}
2072	}
2073	}
2074	else / (ent->subexp_to != str_idx) /
2075	{
2076	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2077	{
2078	Idx node = dest_nodes->elems[node_idx];
2079	re_token_type_t type = dfa->nodes[node].type;
2080	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2081	{
2082	if (subexp_idx != dfa->nodes[node].opr.idx)
2083	continue;
2084	/ It is against this limitation.*
2085	Remove it form the current sifted state. /*
2086	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2087	candidates);
2088	if (__glibc_unlikely (err != REG_NOERROR))
2089	return err;
2090	}
2091	}
2092	}
2093	}
2094	return REG_NOERROR;
2095	}
2096
2097	static reg_errcode_t
2098	__attribute_warn_unused_result__
2099	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2100	Idx str_idx, const re_node_set *candidates)
2101	{
2102	const re_dfa_t *const dfa = mctx->dfa;
2103	reg_errcode_t err;
2104	Idx node_idx, node;
2105	re_sift_context_t local_sctx;
2106	Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2107
2108	if (first_idx == -`1`)
2109	return REG_NOERROR;
2110
2111	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2112
2113	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2114	{
2115	Idx enabled_idx;
2116	re_token_type_t type;
2117	struct re_backref_cache_entry *entry;
2118	node = candidates->elems[node_idx];
2119	type = dfa->nodes[node].type;
2120	/ Avoid infinite loop for the REs like "()\1+". /
2121	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2122	continue;
2123	if (type != OP_BACK_REF)
2124	continue;
2125
2126	entry = mctx->bkref_ents + first_idx;
2127	enabled_idx = first_idx;
2128	do
2129	{
2130	Idx subexp_len;
2131	Idx to_idx;
2132	Idx dst_node;
2133	bool ok;
2134	re_dfastate_t *cur_state;
2135
2136	if (entry->node != node)
2137	continue;
2138	subexp_len = entry->subexp_to - entry->subexp_from;
2139	to_idx = str_idx + subexp_len;
2140	dst_node = (subexp_len ? dfa->nexts[node]
2141	: dfa->edests[node].elems[`0`]);
2142
2143	if (to_idx > sctx->last_str_idx
2144	\|\| sctx->sifted_states[to_idx] == NULL
2145	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2146	\|\| check_dst_limits (mctx, &sctx->limits, node,
2147	str_idx, dst_node, to_idx))
2148	continue;
2149
2150	if (local_sctx.sifted_states == NULL)
2151	{
2152	local_sctx = *sctx;
2153	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2154	if (__glibc_unlikely (err != REG_NOERROR))
2155	goto free_return;
2156	}
2157	local_sctx.last_node = node;
2158	local_sctx.last_str_idx = str_idx;
2159	ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2160	if (__glibc_unlikely (! ok))
2161	{
2162	err = REG_ESPACE;
2163	goto free_return;
2164	}
2165	cur_state = local_sctx.sifted_states[str_idx];
2166	err = sift_states_backward (mctx, &local_sctx);
2167	if (__glibc_unlikely (err != REG_NOERROR))
2168	goto free_return;
2169	if (sctx->limited_states != NULL)
2170	{
2171	err = merge_state_array (dfa, sctx->limited_states,
2172	local_sctx.sifted_states,
2173	str_idx + `1`);
2174	if (__glibc_unlikely (err != REG_NOERROR))
2175	goto free_return;
2176	}
2177	local_sctx.sifted_states[str_idx] = cur_state;
2178	re_node_set_remove (&local_sctx.limits, enabled_idx);
2179
2180	/ mctx->bkref_ents may have changed, reload the pointer. /
2181	entry = mctx->bkref_ents + enabled_idx;
2182	}
2183	while (enabled_idx++, entry++->more);
2184	}
2185	err = REG_NOERROR;
2186	free_return:
2187	if (local_sctx.sifted_states != NULL)
2188	{
2189	re_node_set_free (&local_sctx.limits);
2190	}
2191
2192	return err;
2193	}
2194
2195
2196	#ifdef RE_ENABLE_I18N
2197	static int
2198	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2199	Idx node_idx, Idx str_idx, Idx max_str_idx)
2200	{
2201	const re_dfa_t *const dfa = mctx->dfa;
2202	int naccepted;
2203	/ Check the node can accept "multi byte". /
2204	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2205	if (naccepted > `0` && str_idx + naccepted <= max_str_idx &&
2206	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2207	dfa->nexts[node_idx]))
2208	/ The node can't accept the "multi byte", or the*
2209	destination was already thrown away, then the node
2210	couldn't accept the current input "multi byte". /*
2211	naccepted = `0`;
2212	/ Otherwise, it is sure that the node could accept*
2213	'naccepted' bytes input. /*
2214	return naccepted;
2215	}
2216	#endif /* RE_ENABLE_I18N */
2217
2218
2219	/ Functions for state transition. /
2220
2221	/ Return the next state to which the current state STATE will transit by*
2222	accepting the current input byte, and update STATE_LOG if necessary.
2223	If STATE can accept a multibyte char/collating element/back reference
2224	update the destination of STATE_LOG. /*
2225
2226	static re_dfastate_t *
2227	__attribute_warn_unused_result__
2228	transit_state (reg_errcode_t err, re_match_context_t mctx,
2229	re_dfastate_t *state)
2230	{
2231	re_dfastate_t **trtable;
2232	unsigned char ch;
2233
2234	#ifdef RE_ENABLE_I18N
2235	/ If the current state can accept multibyte. /
2236	if (__glibc_unlikely (state->accept_mb))
2237	{
2238	*err = transit_state_mb (mctx, state);
2239	if (__glibc_unlikely (*err != REG_NOERROR))
2240	return NULL;
2241	}
2242	#endif /* RE_ENABLE_I18N */
2243
2244	/ Then decide the next state with the single byte. /
2245	#if 0
2246	if (`0`)
2247	/ don't use transition table /
2248	return transit_state_sb (err, mctx, state);
2249	#endif
2250
2251	/ Use transition table /
2252	ch = re_string_fetch_byte (&mctx->input);
2253	for (;;)
2254	{
2255	trtable = state->trtable;
2256	if (__glibc_likely (trtable != NULL))
2257	return trtable[ch];
2258
2259	trtable = state->word_trtable;
2260	if (__glibc_likely (trtable != NULL))
2261	{
2262	unsigned int context;
2263	context
2264	= re_string_context_at (&mctx->input,
2265	re_string_cur_idx (&mctx->input) - `1`,
2266	mctx->eflags);
2267	if (IS_WORD_CONTEXT (context))
2268	return trtable[ch + SBC_MAX];
2269	else
2270	return trtable[ch];
2271	}
2272
2273	if (!build_trtable (mctx->dfa, state))
2274	{
2275	*err = REG_ESPACE;
2276	return NULL;
2277	}
2278
2279	/ Retry, we now have a transition table. /
2280	}
2281	}
2282
2283	/ Update the state_log if we need /
2284	static re_dfastate_t *
2285	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2286	re_dfastate_t *next_state)
2287	{
2288	const re_dfa_t *const dfa = mctx->dfa;
2289	Idx cur_idx = re_string_cur_idx (&mctx->input);
2290
2291	if (cur_idx > mctx->state_log_top)
2292	{
2293	mctx->state_log[cur_idx] = next_state;
2294	mctx->state_log_top = cur_idx;
2295	}
2296	else if (mctx->state_log[cur_idx] == `0`)
2297	{
2298	mctx->state_log[cur_idx] = next_state;
2299	}
2300	else
2301	{
2302	re_dfastate_t *pstate;
2303	unsigned int context;
2304	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2305	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2306	the destination of a multibyte char/collating element/
2307	back reference. Then the next state is the union set of
2308	these destinations and the results of the transition table. /*
2309	pstate = mctx->state_log[cur_idx];
2310	log_nodes = pstate->entrance_nodes;
2311	if (next_state != NULL)
2312	{
2313	table_nodes = next_state->entrance_nodes;
2314	*err = re_node_set_init_union (&next_nodes, table_nodes,
2315	log_nodes);
2316	if (__glibc_unlikely (*err != REG_NOERROR))
2317	return NULL;
2318	}
2319	else
2320	next_nodes = *log_nodes;
2321	/ Note: We already add the nodes of the initial state,*
2322	then we don't need to add them here. /*
2323
2324	context = re_string_context_at (&mctx->input,
2325	re_string_cur_idx (&mctx->input) - `1`,
2326	mctx->eflags);
2327	next_state = mctx->state_log[cur_idx]
2328	= re_acquire_state_context (err, dfa, &next_nodes, context);
2329	/ We don't need to check errors here, since the return value of*
2330	this function is next_state and ERR is already set. /*
2331
2332	if (table_nodes != NULL)
2333	re_node_set_free (&next_nodes);
2334	}
2335
2336	if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2337	{
2338	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2339	later. We must check them here, since the back references in the
2340	next state might use them. /*
2341	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2342	cur_idx);
2343	if (__glibc_unlikely (*err != REG_NOERROR))
2344	return NULL;
2345
2346	/ If the next state has back references. /
2347	if (next_state->has_backref)
2348	{
2349	*err = transit_state_bkref (mctx, &next_state->nodes);
2350	if (__glibc_unlikely (*err != REG_NOERROR))
2351	return NULL;
2352	next_state = mctx->state_log[cur_idx];
2353	}
2354	}
2355
2356	return next_state;
2357	}
2358
2359	/ Skip bytes in the input that correspond to part of a*
2360	multi-byte match, then look in the log for a state
2361	from which to restart matching. /*
2362	static re_dfastate_t *
2363	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2364	{
2365	re_dfastate_t *cur_state;
2366	do
2367	{
2368	Idx max = mctx->state_log_top;
2369	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2370
2371	do
2372	{
2373	if (++cur_str_idx > max)
2374	return NULL;
2375	re_string_skip_bytes (&mctx->input, `1`);
2376	}
2377	while (mctx->state_log[cur_str_idx] == NULL);
2378
2379	cur_state = merge_state_with_log (err, mctx, NULL);
2380	}
2381	while (*err == REG_NOERROR && cur_state == NULL);
2382	return cur_state;
2383	}
2384
2385	/ Helper functions for transit_state. /
2386
2387	/ From the node set CUR_NODES, pick up the nodes whose types are*
2388	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2389	expression. And register them to use them later for evaluating the
2390	corresponding back references. /*
2391
2392	static reg_errcode_t
2393	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2394	Idx str_idx)
2395	{
2396	const re_dfa_t *const dfa = mctx->dfa;
2397	Idx node_idx;
2398	reg_errcode_t err;
2399
2400	/ TODO: This isn't efficient.*
2401	Because there might be more than one nodes whose types are
2402	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2403	nodes.
2404	E.g. RE: (a){2} /*
2405	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2406	{
2407	Idx node = cur_nodes->elems[node_idx];
2408	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2409	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2410	&& (dfa->used_bkref_map
2411	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2412	{
2413	err = match_ctx_add_subtop (mctx, node, str_idx);
2414	if (__glibc_unlikely (err != REG_NOERROR))
2415	return err;
2416	}
2417	}
2418	return REG_NOERROR;
2419	}
2420
2421	#if 0
2422	/ Return the next state to which the current state STATE will transit by*
2423	accepting the current input byte. /*
2424
2425	static re_dfastate_t *
2426	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2427	re_dfastate_t *state)
2428	{
2429	const re_dfa_t *const dfa = mctx->dfa;
2430	re_node_set next_nodes;
2431	re_dfastate_t *next_state;
2432	Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2433	unsigned int context;
2434
2435	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2436	if (__glibc_unlikely (*err != REG_NOERROR))
2437	return NULL;
2438	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2439	{
2440	Idx cur_node = state->nodes.elems[node_cnt];
2441	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2442	{
2443	*err = re_node_set_merge (&next_nodes,
2444	dfa->eclosures + dfa->nexts[cur_node]);
2445	if (__glibc_unlikely (*err != REG_NOERROR))
2446	{
2447	re_node_set_free (&next_nodes);
2448	return NULL;
2449	}
2450	}
2451	}
2452	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2453	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2454	/ We don't need to check errors here, since the return value of*
2455	this function is next_state and ERR is already set. /*
2456
2457	re_node_set_free (&next_nodes);
2458	re_string_skip_bytes (&mctx->input, `1`);
2459	return next_state;
2460	}
2461	#endif
2462
2463	#ifdef RE_ENABLE_I18N
2464	static reg_errcode_t
2465	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2466	{
2467	const re_dfa_t *const dfa = mctx->dfa;
2468	reg_errcode_t err;
2469	Idx i;
2470
2471	for (i = `0`; i < pstate->nodes.nelem; ++i)
2472	{
2473	re_node_set dest_nodes, *new_nodes;
2474	Idx cur_node_idx = pstate->nodes.elems[i];
2475	int naccepted;
2476	Idx dest_idx;
2477	unsigned int context;
2478	re_dfastate_t *dest_state;
2479
2480	if (!dfa->nodes[cur_node_idx].accept_mb)
2481	continue;
2482
2483	if (dfa->nodes[cur_node_idx].constraint)
2484	{
2485	context = re_string_context_at (&mctx->input,
2486	re_string_cur_idx (&mctx->input),
2487	mctx->eflags);
2488	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2489	context))
2490	continue;
2491	}
2492
2493	/ How many bytes the node can accept? /
2494	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2495	re_string_cur_idx (&mctx->input));
2496	if (naccepted == `0`)
2497	continue;
2498
2499	/ The node can accepts 'naccepted' bytes. /
2500	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2501	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2502	: mctx->max_mb_elem_len);
2503	err = clean_state_log_if_needed (mctx, dest_idx);
2504	if (__glibc_unlikely (err != REG_NOERROR))
2505	return err;
2506	#ifdef DEBUG
2507	assert (dfa->nexts[cur_node_idx] != -`1`);
2508	#endif
2509	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2510
2511	dest_state = mctx->state_log[dest_idx];
2512	if (dest_state == NULL)
2513	dest_nodes = *new_nodes;
2514	else
2515	{
2516	err = re_node_set_init_union (&dest_nodes,
2517	dest_state->entrance_nodes, new_nodes);
2518	if (__glibc_unlikely (err != REG_NOERROR))
2519	return err;
2520	}
2521	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2522	mctx->eflags);
2523	mctx->state_log[dest_idx]
2524	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2525	if (dest_state != NULL)
2526	re_node_set_free (&dest_nodes);
2527	if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2528	&& err != REG_NOERROR))
2529	return err;
2530	}
2531	return REG_NOERROR;
2532	}
2533	#endif /* RE_ENABLE_I18N */
2534
2535	static reg_errcode_t
2536	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2537	{
2538	const re_dfa_t *const dfa = mctx->dfa;
2539	reg_errcode_t err;
2540	Idx i;
2541	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2542
2543	for (i = `0`; i < nodes->nelem; ++i)
2544	{
2545	Idx dest_str_idx, prev_nelem, bkc_idx;
2546	Idx node_idx = nodes->elems[i];
2547	unsigned int context;
2548	const re_token_t *node = dfa->nodes + node_idx;
2549	re_node_set *new_dest_nodes;
2550
2551	/ Check whether 'node' is a backreference or not. /
2552	if (node->type != OP_BACK_REF)
2553	continue;
2554
2555	if (node->constraint)
2556	{
2557	context = re_string_context_at (&mctx->input, cur_str_idx,
2558	mctx->eflags);
2559	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2560	continue;
2561	}
2562
2563	/ 'node' is a backreference.*
2564	Check the substring which the substring matched. /*
2565	bkc_idx = mctx->nbkref_ents;
2566	err = get_subexp (mctx, node_idx, cur_str_idx);
2567	if (__glibc_unlikely (err != REG_NOERROR))
2568	goto free_return;
2569
2570	/ And add the epsilon closures (which is 'new_dest_nodes') of*
2571	the backreference to appropriate state_log. /*
2572	#ifdef DEBUG
2573	assert (dfa->nexts[node_idx] != -`1`);
2574	#endif
2575	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2576	{
2577	Idx subexp_len;
2578	re_dfastate_t *dest_state;
2579	struct re_backref_cache_entry *bkref_ent;
2580	bkref_ent = mctx->bkref_ents + bkc_idx;
2581	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2582	continue;
2583	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2584	new_dest_nodes = (subexp_len == `0`
2585	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2586	: dfa->eclosures + dfa->nexts[node_idx]);
2587	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2588	- bkref_ent->subexp_from);
2589	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2590	mctx->eflags);
2591	dest_state = mctx->state_log[dest_str_idx];
2592	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2593	: mctx->state_log[cur_str_idx]->nodes.nelem);
2594	/ Add 'new_dest_node' to state_log. /
2595	if (dest_state == NULL)
2596	{
2597	mctx->state_log[dest_str_idx]
2598	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2599	context);
2600	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2601	&& err != REG_NOERROR))
2602	goto free_return;
2603	}
2604	else
2605	{
2606	re_node_set dest_nodes;
2607	err = re_node_set_init_union (&dest_nodes,
2608	dest_state->entrance_nodes,
2609	new_dest_nodes);
2610	if (__glibc_unlikely (err != REG_NOERROR))
2611	{
2612	re_node_set_free (&dest_nodes);
2613	goto free_return;
2614	}
2615	mctx->state_log[dest_str_idx]
2616	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2617	re_node_set_free (&dest_nodes);
2618	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2619	&& err != REG_NOERROR))
2620	goto free_return;
2621	}
2622	/ We need to check recursively if the backreference can epsilon*
2623	transit. /*
2624	if (subexp_len == `0`
2625	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2626	{
2627	err = check_subexp_matching_top (mctx, new_dest_nodes,
2628	cur_str_idx);
2629	if (__glibc_unlikely (err != REG_NOERROR))
2630	goto free_return;
2631	err = transit_state_bkref (mctx, new_dest_nodes);
2632	if (__glibc_unlikely (err != REG_NOERROR))
2633	goto free_return;
2634	}
2635	}
2636	}
2637	err = REG_NOERROR;
2638	free_return:
2639	return err;
2640	}
2641
2642	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2643	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2644	Note that we might collect inappropriate candidates here.
2645	However, the cost of checking them strictly here is too high, then we
2646	delay these checking for prune_impossible_nodes(). /*
2647
2648	static reg_errcode_t
2649	__attribute_warn_unused_result__
2650	get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2651	{
2652	const re_dfa_t *const dfa = mctx->dfa;
2653	Idx subexp_num, sub_top_idx;
2654	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2655	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2656	Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2657	if (cache_idx != -`1`)
2658	{
2659	const struct re_backref_cache_entry *entry
2660	= mctx->bkref_ents + cache_idx;
2661	do
2662	if (entry->node == bkref_node)
2663	return REG_NOERROR; / We already checked it. /
2664	while (entry++->more);
2665	}
2666
2667	subexp_num = dfa->nodes[bkref_node].opr.idx;
2668
2669	/ For each sub expression /
2670	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2671	{
2672	reg_errcode_t err;
2673	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2674	re_sub_match_last_t *sub_last;
2675	Idx sub_last_idx, sl_str, bkref_str_off;
2676
2677	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2678	continue; / It isn't related. /
2679
2680	sl_str = sub_top->str_idx;
2681	bkref_str_off = bkref_str_idx;
2682	/ At first, check the last node of sub expressions we already*
2683	evaluated. /*
2684	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2685	{
2686	regoff_t sl_str_diff;
2687	sub_last = sub_top->lasts[sub_last_idx];
2688	sl_str_diff = sub_last->str_idx - sl_str;
2689	/ The matched string by the sub expression match with the substring*
2690	at the back reference? /*
2691	if (sl_str_diff > `0`)
2692	{
2693	if (__glibc_unlikely (bkref_str_off + sl_str_diff
2694	> mctx->input.valid_len))
2695	{
2696	/ Not enough chars for a successful match. /
2697	if (bkref_str_off + sl_str_diff > mctx->input.len)
2698	break;
2699
2700	err = clean_state_log_if_needed (mctx,
2701	bkref_str_off
2702	+ sl_str_diff);
2703	if (__glibc_unlikely (err != REG_NOERROR))
2704	return err;
2705	buf = (const char *) re_string_get_buffer (&mctx->input);
2706	}
2707	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2708	/ We don't need to search this sub expression any more. /
2709	break;
2710	}
2711	bkref_str_off += sl_str_diff;
2712	sl_str += sl_str_diff;
2713	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2714	bkref_str_idx);
2715
2716	/ Reload buf, since the preceding call might have reallocated*
2717	the buffer. /*
2718	buf = (const char *) re_string_get_buffer (&mctx->input);
2719
2720	if (err == REG_NOMATCH)
2721	continue;
2722	if (__glibc_unlikely (err != REG_NOERROR))
2723	return err;
2724	}
2725
2726	if (sub_last_idx < sub_top->nlasts)
2727	continue;
2728	if (sub_last_idx > `0`)
2729	++sl_str;
2730	/ Then, search for the other last nodes of the sub expression. /
2731	for (; sl_str <= bkref_str_idx; ++sl_str)
2732	{
2733	Idx cls_node;
2734	regoff_t sl_str_off;
2735	const re_node_set *nodes;
2736	sl_str_off = sl_str - sub_top->str_idx;
2737	/ The matched string by the sub expression match with the substring*
2738	at the back reference? /*
2739	if (sl_str_off > `0`)
2740	{
2741	if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2742	{
2743	/ If we are at the end of the input, we cannot match. /
2744	if (bkref_str_off >= mctx->input.len)
2745	break;
2746
2747	err = extend_buffers (mctx, bkref_str_off + `1`);
2748	if (__glibc_unlikely (err != REG_NOERROR))
2749	return err;
2750
2751	buf = (const char *) re_string_get_buffer (&mctx->input);
2752	}
2753	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2754	break; / We don't need to search this sub expression*
2755	any more. /*
2756	}
2757	if (mctx->state_log[sl_str] == NULL)
2758	continue;
2759	/ Does this state have a ')' of the sub expression? /
2760	nodes = &mctx->state_log[sl_str]->nodes;
2761	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2762	OP_CLOSE_SUBEXP);
2763	if (cls_node == -`1`)
2764	continue; / No. /
2765	if (sub_top->path == NULL)
2766	{
2767	sub_top->path = calloc (sizeof (state_array_t),
2768	sl_str - sub_top->str_idx + `1`);
2769	if (sub_top->path == NULL)
2770	return REG_ESPACE;
2771	}
2772	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2773	in the current context? /*
2774	err = check_arrival (mctx, sub_top->path, sub_top->node,
2775	sub_top->str_idx, cls_node, sl_str,
2776	OP_CLOSE_SUBEXP);
2777	if (err == REG_NOMATCH)
2778	continue;
2779	if (__glibc_unlikely (err != REG_NOERROR))
2780	return err;
2781	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2782	if (__glibc_unlikely (sub_last == NULL))
2783	return REG_ESPACE;
2784	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2785	bkref_str_idx);
2786	buf = (const char *) re_string_get_buffer (&mctx->input);
2787	if (err == REG_NOMATCH)
2788	continue;
2789	if (__glibc_unlikely (err != REG_NOERROR))
2790	return err;
2791	}
2792	}
2793	return REG_NOERROR;
2794	}
2795
2796	/ Helper functions for get_subexp(). /
2797
2798	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2799	If it can arrive, register the sub expression expressed with SUB_TOP
2800	and SUB_LAST. /*
2801
2802	static reg_errcode_t
2803	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2804	re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2805	{
2806	reg_errcode_t err;
2807	Idx to_idx;
2808	/ Can the subexpression arrive the back reference? /
2809	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2810	sub_last->str_idx, bkref_node, bkref_str,
2811	OP_OPEN_SUBEXP);
2812	if (err != REG_NOERROR)
2813	return err;
2814	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2815	sub_last->str_idx);
2816	if (__glibc_unlikely (err != REG_NOERROR))
2817	return err;
2818	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2819	return clean_state_log_if_needed (mctx, to_idx);
2820	}
2821
2822	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2823	Search '(' if FL_OPEN, or search ')' otherwise.
2824	TODO: This function isn't efficient...
2825	Because there might be more than one nodes whose types are
2826	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2827	nodes.
2828	E.g. RE: (a){2} /*
2829
2830	static Idx
2831	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2832	Idx subexp_idx, int type)
2833	{
2834	Idx cls_idx;
2835	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2836	{
2837	Idx cls_node = nodes->elems[cls_idx];
2838	const re_token_t *node = dfa->nodes + cls_node;
2839	if (node->type == type
2840	&& node->opr.idx == subexp_idx)
2841	return cls_node;
2842	}
2843	return -`1`;
2844	}
2845
2846	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2847	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2848	heavily reused.
2849	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. /*
2850
2851	static reg_errcode_t
2852	__attribute_warn_unused_result__
2853	check_arrival (re_match_context_t mctx, state_array_t path, Idx top_node,
2854	Idx top_str, Idx last_node, Idx last_str, int type)
2855	{
2856	const re_dfa_t *const dfa = mctx->dfa;
2857	reg_errcode_t err = REG_NOERROR;
2858	Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2859	re_dfastate_t *cur_state = NULL;
2860	re_node_set *cur_nodes, next_nodes;
2861	re_dfastate_t **backup_state_log;
2862	unsigned int context;
2863
2864	subexp_num = dfa->nodes[top_node].opr.idx;
2865	/ Extend the buffer if we need. /
2866	if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + `1`))
2867	{
2868	re_dfastate_t **new_array;
2869	Idx old_alloc = path->alloc;
2870	Idx incr_alloc = last_str + mctx->max_mb_elem_len + `1`;
2871	Idx new_alloc;
2872	if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2873	return REG_ESPACE;
2874	new_alloc = old_alloc + incr_alloc;
2875	if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2876	return REG_ESPACE;
2877	new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2878	if (__glibc_unlikely (new_array == NULL))
2879	return REG_ESPACE;
2880	path->array = new_array;
2881	path->alloc = new_alloc;
2882	memset (new_array + old_alloc, `'\0'`,
2883	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2884	}
2885
2886	str_idx = path->next_idx ? path->next_idx : top_str;
2887
2888	/ Temporary modify MCTX. /
2889	backup_state_log = mctx->state_log;
2890	backup_cur_idx = mctx->input.cur_idx;
2891	mctx->state_log = path->array;
2892	mctx->input.cur_idx = str_idx;
2893
2894	/ Setup initial node set. /
2895	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2896	if (str_idx == top_str)
2897	{
2898	err = re_node_set_init_1 (&next_nodes, top_node);
2899	if (__glibc_unlikely (err != REG_NOERROR))
2900	return err;
2901	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2902	if (__glibc_unlikely (err != REG_NOERROR))
2903	{
2904	re_node_set_free (&next_nodes);
2905	return err;
2906	}
2907	}
2908	else
2909	{
2910	cur_state = mctx->state_log[str_idx];
2911	if (cur_state && cur_state->has_backref)
2912	{
2913	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2914	if (__glibc_unlikely (err != REG_NOERROR))
2915	return err;
2916	}
2917	else
2918	re_node_set_init_empty (&next_nodes);
2919	}
2920	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2921	{
2922	if (next_nodes.nelem)
2923	{
2924	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2925	subexp_num, type);
2926	if (__glibc_unlikely (err != REG_NOERROR))
2927	{
2928	re_node_set_free (&next_nodes);
2929	return err;
2930	}
2931	}
2932	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2933	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2934	{
2935	re_node_set_free (&next_nodes);
2936	return err;
2937	}
2938	mctx->state_log[str_idx] = cur_state;
2939	}
2940
2941	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2942	{
2943	re_node_set_empty (&next_nodes);
2944	if (mctx->state_log[str_idx + `1`])
2945	{
2946	err = re_node_set_merge (&next_nodes,
2947	&mctx->state_log[str_idx + `1`]->nodes);
2948	if (__glibc_unlikely (err != REG_NOERROR))
2949	{
2950	re_node_set_free (&next_nodes);
2951	return err;
2952	}
2953	}
2954	if (cur_state)
2955	{
2956	err = check_arrival_add_next_nodes (mctx, str_idx,
2957	&cur_state->non_eps_nodes,
2958	&next_nodes);
2959	if (__glibc_unlikely (err != REG_NOERROR))
2960	{
2961	re_node_set_free (&next_nodes);
2962	return err;
2963	}
2964	}
2965	++str_idx;
2966	if (next_nodes.nelem)
2967	{
2968	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2969	if (__glibc_unlikely (err != REG_NOERROR))
2970	{
2971	re_node_set_free (&next_nodes);
2972	return err;
2973	}
2974	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2975	subexp_num, type);
2976	if (__glibc_unlikely (err != REG_NOERROR))
2977	{
2978	re_node_set_free (&next_nodes);
2979	return err;
2980	}
2981	}
2982	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2983	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2984	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2985	{
2986	re_node_set_free (&next_nodes);
2987	return err;
2988	}
2989	mctx->state_log[str_idx] = cur_state;
2990	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2991	}
2992	re_node_set_free (&next_nodes);
2993	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2994	: &mctx->state_log[last_str]->nodes);
2995	path->next_idx = str_idx;
2996
2997	/ Fix MCTX. /
2998	mctx->state_log = backup_state_log;
2999	mctx->input.cur_idx = backup_cur_idx;
3000
3001	/ Then check the current node set has the node LAST_NODE. /
3002	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3003	return REG_NOERROR;
3004
3005	return REG_NOMATCH;
3006	}
3007
3008	/ Helper functions for check_arrival. /
3009
3010	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
3011	to NEXT_NODES.
3012	TODO: This function is similar to the functions transit_state(),*
3013	however this function has many additional works.
3014	Can't we unify them? /*
3015
3016	static reg_errcode_t
3017	__attribute_warn_unused_result__
3018	check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3019	re_node_set cur_nodes, re_node_set next_nodes)
3020	{
3021	const re_dfa_t *const dfa = mctx->dfa;
3022	bool ok;
3023	Idx cur_idx;
3024	#ifdef RE_ENABLE_I18N
3025	reg_errcode_t err = REG_NOERROR;
3026	#endif
3027	re_node_set union_set;
3028	re_node_set_init_empty (&union_set);
3029	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
3030	{
3031	int naccepted = `0`;
3032	Idx cur_node = cur_nodes->elems[cur_idx];
3033	#ifdef DEBUG
3034	re_token_type_t type = dfa->nodes[cur_node].type;
3035	assert (!IS_EPSILON_NODE (type));
3036	#endif
3037	#ifdef RE_ENABLE_I18N
3038	/ If the node may accept "multi byte". /
3039	if (dfa->nodes[cur_node].accept_mb)
3040	{
3041	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3042	str_idx);
3043	if (naccepted > `1`)
3044	{
3045	re_dfastate_t *dest_state;
3046	Idx next_node = dfa->nexts[cur_node];
3047	Idx next_idx = str_idx + naccepted;
3048	dest_state = mctx->state_log[next_idx];
3049	re_node_set_empty (&union_set);
3050	if (dest_state)
3051	{
3052	err = re_node_set_merge (&union_set, &dest_state->nodes);
3053	if (__glibc_unlikely (err != REG_NOERROR))
3054	{
3055	re_node_set_free (&union_set);
3056	return err;
3057	}
3058	}
3059	ok = re_node_set_insert (&union_set, next_node);
3060	if (__glibc_unlikely (! ok))
3061	{
3062	re_node_set_free (&union_set);
3063	return REG_ESPACE;
3064	}
3065	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3066	&union_set);
3067	if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3068	&& err != REG_NOERROR))
3069	{
3070	re_node_set_free (&union_set);
3071	return err;
3072	}
3073	}
3074	}
3075	#endif /* RE_ENABLE_I18N */
3076	if (naccepted
3077	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3078	{
3079	ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3080	if (__glibc_unlikely (! ok))
3081	{
3082	re_node_set_free (&union_set);
3083	return REG_ESPACE;
3084	}
3085	}
3086	}
3087	re_node_set_free (&union_set);
3088	return REG_NOERROR;
3089	}
3090
3091	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3092	CUR_NODES, however exclude the nodes which are:
3093	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3094	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3095	*/
3096
3097	static reg_errcode_t
3098	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3099	Idx ex_subexp, int type)
3100	{
3101	reg_errcode_t err;
3102	Idx idx, outside_node;
3103	re_node_set new_nodes;
3104	#ifdef DEBUG
3105	assert (cur_nodes->nelem);
3106	#endif
3107	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3108	if (__glibc_unlikely (err != REG_NOERROR))
3109	return err;
3110	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3111	closures of the node in CUR_NODES. /*
3112
3113	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3114	{
3115	Idx cur_node = cur_nodes->elems[idx];
3116	const re_node_set *eclosure = dfa->eclosures + cur_node;
3117	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3118	if (outside_node == -`1`)
3119	{
3120	/ There are no problematic nodes, just merge them. /
3121	err = re_node_set_merge (&new_nodes, eclosure);
3122	if (__glibc_unlikely (err != REG_NOERROR))
3123	{
3124	re_node_set_free (&new_nodes);
3125	return err;
3126	}
3127	}
3128	else
3129	{
3130	/ There are problematic nodes, re-calculate incrementally. /
3131	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3132	ex_subexp, type);
3133	if (__glibc_unlikely (err != REG_NOERROR))
3134	{
3135	re_node_set_free (&new_nodes);
3136	return err;
3137	}
3138	}
3139	}
3140	re_node_set_free (cur_nodes);
3141	*cur_nodes = new_nodes;
3142	return REG_NOERROR;
3143	}
3144
3145	/ Helper function for check_arrival_expand_ecl.*
3146	Check incrementally the epsilon closure of TARGET, and if it isn't
3147	problematic append it to DST_NODES. /*
3148
3149	static reg_errcode_t
3150	__attribute_warn_unused_result__
3151	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3152	Idx target, Idx ex_subexp, int type)
3153	{
3154	Idx cur_node;
3155	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3156	{
3157	bool ok;
3158
3159	if (dfa->nodes[cur_node].type == type
3160	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3161	{
3162	if (type == OP_CLOSE_SUBEXP)
3163	{
3164	ok = re_node_set_insert (dst_nodes, cur_node);
3165	if (__glibc_unlikely (! ok))
3166	return REG_ESPACE;
3167	}
3168	break;
3169	}
3170	ok = re_node_set_insert (dst_nodes, cur_node);
3171	if (__glibc_unlikely (! ok))
3172	return REG_ESPACE;
3173	if (dfa->edests[cur_node].nelem == `0`)
3174	break;
3175	if (dfa->edests[cur_node].nelem == `2`)
3176	{
3177	reg_errcode_t err;
3178	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3179	dfa->edests[cur_node].elems[`1`],
3180	ex_subexp, type);
3181	if (__glibc_unlikely (err != REG_NOERROR))
3182	return err;
3183	}
3184	cur_node = dfa->edests[cur_node].elems[`0`];
3185	}
3186	return REG_NOERROR;
3187	}
3188
3189
3190	/ For all the back references in the current state, calculate the*
3191	destination of the back references by the appropriate entry
3192	in MCTX->BKREF_ENTS. /*
3193
3194	static reg_errcode_t
3195	__attribute_warn_unused_result__
3196	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3197	Idx cur_str, Idx subexp_num, int type)
3198	{
3199	const re_dfa_t *const dfa = mctx->dfa;
3200	reg_errcode_t err;
3201	Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3202	struct re_backref_cache_entry *ent;
3203
3204	if (cache_idx_start == -`1`)
3205	return REG_NOERROR;
3206
3207	restart:
3208	ent = mctx->bkref_ents + cache_idx_start;
3209	do
3210	{
3211	Idx to_idx, next_node;
3212
3213	/ Is this entry ENT is appropriate? /
3214	if (!re_node_set_contains (cur_nodes, ent->node))
3215	continue; / No. /
3216
3217	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3218	/ Calculate the destination of the back reference, and append it*
3219	to MCTX->STATE_LOG. /*
3220	if (to_idx == cur_str)
3221	{
3222	/ The backreference did epsilon transit, we must re-check all the*
3223	node in the current state. /*
3224	re_node_set new_dests;
3225	reg_errcode_t err2, err3;
3226	next_node = dfa->edests[ent->node].elems[`0`];
3227	if (re_node_set_contains (cur_nodes, next_node))
3228	continue;
3229	err = re_node_set_init_1 (&new_dests, next_node);
3230	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3231	err3 = re_node_set_merge (cur_nodes, &new_dests);
3232	re_node_set_free (&new_dests);
3233	if (__glibc_unlikely (err != REG_NOERROR \|\| err2 != REG_NOERROR
3234	\|\| err3 != REG_NOERROR))
3235	{
3236	err = (err != REG_NOERROR ? err
3237	: (err2 != REG_NOERROR ? err2 : err3));
3238	return err;
3239	}
3240	/ TODO: It is still inefficient... /
3241	goto restart;
3242	}
3243	else
3244	{
3245	re_node_set union_set;
3246	next_node = dfa->nexts[ent->node];
3247	if (mctx->state_log[to_idx])
3248	{
3249	bool ok;
3250	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3251	next_node))
3252	continue;
3253	err = re_node_set_init_copy (&union_set,
3254	&mctx->state_log[to_idx]->nodes);
3255	ok = re_node_set_insert (&union_set, next_node);
3256	if (__glibc_unlikely (err != REG_NOERROR \|\| ! ok))
3257	{
3258	re_node_set_free (&union_set);
3259	err = err != REG_NOERROR ? err : REG_ESPACE;
3260	return err;
3261	}
3262	}
3263	else
3264	{
3265	err = re_node_set_init_1 (&union_set, next_node);
3266	if (__glibc_unlikely (err != REG_NOERROR))
3267	return err;
3268	}
3269	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3270	re_node_set_free (&union_set);
3271	if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3272	&& err != REG_NOERROR))
3273	return err;
3274	}
3275	}
3276	while (ent++->more);
3277	return REG_NOERROR;
3278	}
3279
3280	/ Build transition table for the state.*
3281	Return true if successful. /*
3282
3283	static bool
3284	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3285	{
3286	reg_errcode_t err;
3287	Idx i, j;
3288	int ch;
3289	bool need_word_trtable = false;
3290	bitset_word_t elem, mask;
3291	bool dests_node_malloced = false;
3292	bool dest_states_malloced = false;
3293	Idx ndests; / Number of the destination states from 'state'. /
3294	re_dfastate_t **trtable;
3295	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3296	re_node_set follows, *dests_node;
3297	bitset_t *dests_ch;
3298	bitset_t acceptable;
3299
3300	struct dests_alloc
3301	{
3302	re_node_set dests_node[SBC_MAX];
3303	bitset_t dests_ch[SBC_MAX];
3304	} *dests_alloc;
3305
3306	/ We build DFA states which corresponds to the destination nodes*
3307	from 'state'. 'dests_node[i]' represents the nodes which i-th
3308	destination state contains, and 'dests_ch[i]' represents the
3309	characters which i-th destination state accepts. /*
3310	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3311	dests_alloc = (struct dests_alloc ) alloca (sizeof* (struct dests_alloc));
3312	else
3313	{
3314	dests_alloc = re_malloc (struct dests_alloc, `1`);
3315	if (__glibc_unlikely (dests_alloc == NULL))
3316	return false;
3317	dests_node_malloced = true;
3318	}
3319	dests_node = dests_alloc->dests_node;
3320	dests_ch = dests_alloc->dests_ch;
3321
3322	/ Initialize transition table. /
3323	state->word_trtable = state->trtable = NULL;
3324
3325	/ At first, group all nodes belonging to 'state' into several*
3326	destinations. /*
3327	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3328	if (__glibc_unlikely (ndests <= `0`))
3329	{
3330	if (dests_node_malloced)
3331	re_free (dests_alloc);
3332	/ Return false in case of an error, true otherwise. /
3333	if (ndests == `0`)
3334	{
3335	state->trtable = (re_dfastate_t **)
3336	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3337	if (__glibc_unlikely (state->trtable == NULL))
3338	return false;
3339	return true;
3340	}
3341	return false;
3342	}
3343
3344	err = re_node_set_alloc (&follows, ndests + `1`);
3345	if (__glibc_unlikely (err != REG_NOERROR))
3346	goto out_free;
3347
3348	/ Avoid arithmetic overflow in size calculation. /
3349	size_t ndests_max
3350	= ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3351	/ (`3` * sizeof (re_dfastate_t *)));
3352	if (__glibc_unlikely (ndests_max < ndests))
3353	goto out_free;
3354
3355	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3356	+ ndests * `3` * sizeof (re_dfastate_t *)))
3357	dest_states = (re_dfastate_t **)
3358	alloca (ndests * `3` * sizeof (re_dfastate_t *));
3359	else
3360	{
3361	dest_states = re_malloc (re_dfastate_t , ndests `3`);
3362	if (__glibc_unlikely (dest_states == NULL))
3363	{
3364	out_free:
3365	if (dest_states_malloced)
3366	re_free (dest_states);
3367	re_node_set_free (&follows);
3368	for (i = `0`; i < ndests; ++i)
3369	re_node_set_free (dests_node + i);
3370	if (dests_node_malloced)
3371	re_free (dests_alloc);
3372	return false;
3373	}
3374	dest_states_malloced = true;
3375	}
3376	dest_states_word = dest_states + ndests;
3377	dest_states_nl = dest_states_word + ndests;
3378	bitset_empty (acceptable);
3379
3380	/ Then build the states for all destinations. /
3381	for (i = `0`; i < ndests; ++i)
3382	{
3383	Idx next_node;
3384	re_node_set_empty (&follows);
3385	/ Merge the follows of this destination states. /
3386	for (j = `0`; j < dests_node[i].nelem; ++j)
3387	{
3388	next_node = dfa->nexts[dests_node[i].elems[j]];
3389	if (next_node != -`1`)
3390	{
3391	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3392	if (__glibc_unlikely (err != REG_NOERROR))
3393	goto out_free;
3394	}
3395	}
3396	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3397	if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3398	goto out_free;
3399	/ If the new state has context constraint,*
3400	build appropriate states for these contexts. /*
3401	if (dest_states[i]->has_constraint)
3402	{
3403	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3404	CONTEXT_WORD);
3405	if (__glibc_unlikely (dest_states_word[i] == NULL
3406	&& err != REG_NOERROR))
3407	goto out_free;
3408
3409	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3410	need_word_trtable = true;
3411
3412	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3413	CONTEXT_NEWLINE);
3414	if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3415	goto out_free;
3416	}
3417	else
3418	{
3419	dest_states_word[i] = dest_states[i];
3420	dest_states_nl[i] = dest_states[i];
3421	}
3422	bitset_merge (acceptable, dests_ch[i]);
3423	}
3424
3425	if (!__glibc_unlikely (need_word_trtable))
3426	{
3427	/ We don't care about whether the following character is a word*
3428	character, or we are in a single-byte character set so we can
3429	discern by looking at the character code: allocate a
3430	256-entry transition table. /*
3431	trtable = state->trtable =
3432	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3433	if (__glibc_unlikely (trtable == NULL))
3434	goto out_free;
3435
3436	/ For all characters ch...: /
3437	for (i = `0`; i < BITSET_WORDS; ++i)
3438	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3439	elem;
3440	mask <<= `1`, elem >>= `1`, ++ch)
3441	if (__glibc_unlikely (elem & `1`))
3442	{
3443	/ There must be exactly one destination which accepts*
3444	character ch. See group_nodes_into_DFAstates. /*
3445	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3446	;
3447
3448	/ j-th destination accepts the word character ch. /
3449	if (dfa->word_char[i] & mask)
3450	trtable[ch] = dest_states_word[j];
3451	else
3452	trtable[ch] = dest_states[j];
3453	}
3454	}
3455	else
3456	{
3457	/ We care about whether the following character is a word*
3458	character, and we are in a multi-byte character set: discern
3459	by looking at the character code: build two 256-entry
3460	transition tables, one starting at trtable[0] and one
3461	starting at trtable[SBC_MAX]. /*
3462	trtable = state->word_trtable =
3463	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3464	if (__glibc_unlikely (trtable == NULL))
3465	goto out_free;
3466
3467	/ For all characters ch...: /
3468	for (i = `0`; i < BITSET_WORDS; ++i)
3469	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3470	elem;
3471	mask <<= `1`, elem >>= `1`, ++ch)
3472	if (__glibc_unlikely (elem & `1`))
3473	{
3474	/ There must be exactly one destination which accepts*
3475	character ch. See group_nodes_into_DFAstates. /*
3476	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3477	;
3478
3479	/ j-th destination accepts the word character ch. /
3480	trtable[ch] = dest_states[j];
3481	trtable[ch + SBC_MAX] = dest_states_word[j];
3482	}
3483	}
3484
3485	/ new line /
3486	if (bitset_contain (acceptable, NEWLINE_CHAR))
3487	{
3488	/ The current state accepts newline character. /
3489	for (j = `0`; j < ndests; ++j)
3490	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3491	{
3492	/ k-th destination accepts newline character. /
3493	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3494	if (need_word_trtable)
3495	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3496	/ There must be only one destination which accepts*
3497	newline. See group_nodes_into_DFAstates. /*
3498	break;
3499	}
3500	}
3501
3502	if (dest_states_malloced)
3503	re_free (dest_states);
3504
3505	re_node_set_free (&follows);
3506	for (i = `0`; i < ndests; ++i)
3507	re_node_set_free (dests_node + i);
3508
3509	if (dests_node_malloced)
3510	re_free (dests_alloc);
3511
3512	return true;
3513	}
3514
3515	/ Group all nodes belonging to STATE into several destinations.*
3516	Then for all destinations, set the nodes belonging to the destination
3517	to DESTS_NODE[i] and set the characters accepted by the destination
3518	to DEST_CH[i]. This function return the number of destinations. /*
3519
3520	static Idx
3521	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3522	re_node_set dests_node, bitset_t dests_ch)
3523	{
3524	reg_errcode_t err;
3525	bool ok;
3526	Idx i, j, k;
3527	Idx ndests; / Number of the destinations from 'state'. /
3528	bitset_t accepts; / Characters a node can accept. /
3529	const re_node_set *cur_nodes = &state->nodes;
3530	bitset_empty (accepts);
3531	ndests = `0`;
3532
3533	/ For all the nodes belonging to 'state', /
3534	for (i = `0`; i < cur_nodes->nelem; ++i)
3535	{
3536	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3537	re_token_type_t type = node->type;
3538	unsigned int constraint = node->constraint;
3539
3540	/ Enumerate all single byte character this node can accept. /
3541	if (type == CHARACTER)
3542	bitset_set (accepts, node->opr.c);
3543	else if (type == SIMPLE_BRACKET)
3544	{
3545	bitset_merge (accepts, node->opr.sbcset);
3546	}
3547	else if (type == OP_PERIOD)
3548	{
3549	#ifdef RE_ENABLE_I18N
3550	if (dfa->mb_cur_max > `1`)
3551	bitset_merge (accepts, dfa->sb_char);
3552	else
3553	#endif
3554	bitset_set_all (accepts);
3555	if (!(dfa->syntax & RE_DOT_NEWLINE))
3556	bitset_clear (accepts, `'\n'`);
3557	if (dfa->syntax & RE_DOT_NOT_NULL)
3558	bitset_clear (accepts, `'\0'`);
3559	}
3560	#ifdef RE_ENABLE_I18N
3561	else if (type == OP_UTF8_PERIOD)
3562	{
3563	if (ASCII_CHARS % BITSET_WORD_BITS == `0`)
3564	memset (accepts, -`1`, ASCII_CHARS / CHAR_BIT);
3565	else
3566	bitset_merge (accepts, utf8_sb_map);
3567	if (!(dfa->syntax & RE_DOT_NEWLINE))
3568	bitset_clear (accepts, `'\n'`);
3569	if (dfa->syntax & RE_DOT_NOT_NULL)
3570	bitset_clear (accepts, `'\0'`);
3571	}
3572	#endif
3573	else
3574	continue;
3575
3576	/ Check the 'accepts' and sift the characters which are not*
3577	match it the context. /*
3578	if (constraint)
3579	{
3580	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3581	{
3582	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3583	bitset_empty (accepts);
3584	if (accepts_newline)
3585	bitset_set (accepts, NEWLINE_CHAR);
3586	else
3587	continue;
3588	}
3589	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3590	{
3591	bitset_empty (accepts);
3592	continue;
3593	}
3594
3595	if (constraint & NEXT_WORD_CONSTRAINT)
3596	{
3597	bitset_word_t any_set = `0`;
3598	if (type == CHARACTER && !node->word_char)
3599	{
3600	bitset_empty (accepts);
3601	continue;
3602	}
3603	#ifdef RE_ENABLE_I18N
3604	if (dfa->mb_cur_max > `1`)
3605	for (j = `0`; j < BITSET_WORDS; ++j)
3606	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3607	else
3608	#endif
3609	for (j = `0`; j < BITSET_WORDS; ++j)
3610	any_set \|= (accepts[j] &= dfa->word_char[j]);
3611	if (!any_set)
3612	continue;
3613	}
3614	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3615	{
3616	bitset_word_t any_set = `0`;
3617	if (type == CHARACTER && node->word_char)
3618	{
3619	bitset_empty (accepts);
3620	continue;
3621	}
3622	#ifdef RE_ENABLE_I18N
3623	if (dfa->mb_cur_max > `1`)
3624	for (j = `0`; j < BITSET_WORDS; ++j)
3625	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3626	else
3627	#endif
3628	for (j = `0`; j < BITSET_WORDS; ++j)
3629	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3630	if (!any_set)
3631	continue;
3632	}
3633	}
3634
3635	/ Then divide 'accepts' into DFA states, or create a new*
3636	state. Above, we make sure that accepts is not empty. /*
3637	for (j = `0`; j < ndests; ++j)
3638	{
3639	bitset_t intersec; / Intersection sets, see below. /
3640	bitset_t remains;
3641	/ Flags, see below. /
3642	bitset_word_t has_intersec, not_subset, not_consumed;
3643
3644	/ Optimization, skip if this state doesn't accept the character. /
3645	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3646	continue;
3647
3648	/ Enumerate the intersection set of this state and 'accepts'. /
3649	has_intersec = `0`;
3650	for (k = `0`; k < BITSET_WORDS; ++k)
3651	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3652	/ And skip if the intersection set is empty. /
3653	if (!has_intersec)
3654	continue;
3655
3656	/ Then check if this state is a subset of 'accepts'. /
3657	not_subset = not_consumed = `0`;
3658	for (k = `0`; k < BITSET_WORDS; ++k)
3659	{
3660	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3661	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3662	}
3663
3664	/ If this state isn't a subset of 'accepts', create a*
3665	new group state, which has the 'remains'. /*
3666	if (not_subset)
3667	{
3668	bitset_copy (dests_ch[ndests], remains);
3669	bitset_copy (dests_ch[j], intersec);
3670	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3671	if (__glibc_unlikely (err != REG_NOERROR))
3672	goto error_return;
3673	++ndests;
3674	}
3675
3676	/ Put the position in the current group. /
3677	ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3678	if (__glibc_unlikely (! ok))
3679	goto error_return;
3680
3681	/ If all characters are consumed, go to next node. /
3682	if (!not_consumed)
3683	break;
3684	}
3685	/ Some characters remain, create a new group. /
3686	if (j == ndests)
3687	{
3688	bitset_copy (dests_ch[ndests], accepts);
3689	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3690	if (__glibc_unlikely (err != REG_NOERROR))
3691	goto error_return;
3692	++ndests;
3693	bitset_empty (accepts);
3694	}
3695	}
3696	return ndests;
3697	error_return:
3698	for (j = `0`; j < ndests; ++j)
3699	re_node_set_free (dests_node + j);
3700	return -`1`;
3701	}
3702
3703	#ifdef RE_ENABLE_I18N
3704	/ Check how many bytes the node 'dfa->nodes[node_idx]' accepts.*
3705	Return the number of the bytes the node accepts.
3706	STR_IDX is the current index of the input string.
3707
3708	This function handles the nodes which can accept one character, or
3709	one collating element like '.', '[a-z]', opposite to the other nodes
3710	can only accept one byte. /*
3711
3712	# ifdef _LIBC
3713	# include <locale/weight.h>
3714	# endif
3715
3716	static int
3717	check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3718	const re_string_t *input, Idx str_idx)
3719	{
3720	const re_token_t *node = dfa->nodes + node_idx;
3721	int char_len, elem_len;
3722	Idx i;
3723
3724	if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3725	{
3726	unsigned char c = re_string_byte_at (input, str_idx), d;
3727	if (__glibc_likely (c < `0xc2`))
3728	return `0`;
3729
3730	if (str_idx + `2` > input->len)
3731	return `0`;
3732
3733	d = re_string_byte_at (input, str_idx + `1`);
3734	if (c < `0xe0`)
3735	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3736	else if (c < `0xf0`)
3737	{
3738	char_len = `3`;
3739	if (c == `0xe0` && d < `0xa0`)
3740	return `0`;
3741	}
3742	else if (c < `0xf8`)
3743	{
3744	char_len = `4`;
3745	if (c == `0xf0` && d < `0x90`)
3746	return `0`;
3747	}
3748	else if (c < `0xfc`)
3749	{
3750	char_len = `5`;
3751	if (c == `0xf8` && d < `0x88`)
3752	return `0`;
3753	}
3754	else if (c < `0xfe`)
3755	{
3756	char_len = `6`;
3757	if (c == `0xfc` && d < `0x84`)
3758	return `0`;
3759	}
3760	else
3761	return `0`;
3762
3763	if (str_idx + char_len > input->len)
3764	return `0`;
3765
3766	for (i = `1`; i < char_len; ++i)
3767	{
3768	d = re_string_byte_at (input, str_idx + i);
3769	if (d < `0x80` \|\| d > `0xbf`)
3770	return `0`;
3771	}
3772	return char_len;
3773	}
3774
3775	char_len = re_string_char_size_at (input, str_idx);
3776	if (node->type == OP_PERIOD)
3777	{
3778	if (char_len <= `1`)
3779	return `0`;
3780	/ FIXME: I don't think this if is needed, as both '\n'*
3781	and '\0' are char_len == 1. /*
3782	/ '.' accepts any one character except the following two cases. /
3783	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3784	re_string_byte_at (input, str_idx) == `'\n'`) \|\|
3785	((dfa->syntax & RE_DOT_NOT_NULL) &&
3786	re_string_byte_at (input, str_idx) == `'\0'`))
3787	return `0`;
3788	return char_len;
3789	}
3790
3791	elem_len = re_string_elem_size_at (input, str_idx);
3792	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3793	return `0`;
3794
3795	if (node->type == COMPLEX_BRACKET)
3796	{
3797	const re_charset_t *cset = node->opr.mbcset;
3798	# ifdef _LIBC
3799	const unsigned char *pin
3800	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3801	Idx j;
3802	uint32_t nrules;
3803	# endif /* _LIBC */
3804	int match_len = `0`;
3805	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3806	? re_string_wchar_at (input, str_idx) : `0`);
3807
3808	/ match with multibyte character? /
3809	for (i = `0`; i < cset->nmbchars; ++i)
3810	if (wc == cset->mbchars[i])
3811	{
3812	match_len = char_len;
3813	goto check_node_accept_bytes_match;
3814	}
3815	/ match with character_class? /
3816	for (i = `0`; i < cset->nchar_classes; ++i)
3817	{
3818	wctype_t wt = cset->char_classes[i];
3819	if (__iswctype (wc, wt))
3820	{
3821	match_len = char_len;
3822	goto check_node_accept_bytes_match;
3823	}
3824	}
3825
3826	# ifdef _LIBC
3827	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3828	if (nrules != `0`)
3829	{
3830	unsigned int in_collseq = `0`;
3831	const int32_t table, indirect;
3832	const unsigned char weights, extra;
3833	const char *collseqwc;
3834
3835	/ match with collating_symbol? /
3836	if (cset->ncoll_syms)
3837	extra = (const unsigned char *)
3838	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3839	for (i = `0`; i < cset->ncoll_syms; ++i)
3840	{
3841	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3842	/ Compare the length of input collating element and*
3843	the length of current collating element. /*
3844	if (*coll_sym != elem_len)
3845	continue;
3846	/ Compare each bytes. /
3847	for (j = `0`; j < *coll_sym; j++)
3848	if (pin[j] != coll_sym[`1` + j])
3849	break;
3850	if (j == *coll_sym)
3851	{
3852	/ Match if every bytes is equal. /
3853	match_len = j;
3854	goto check_node_accept_bytes_match;
3855	}
3856	}
3857
3858	if (cset->nranges)
3859	{
3860	if (elem_len <= char_len)
3861	{
3862	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3863	in_collseq = __collseq_table_lookup (collseqwc, wc);
3864	}
3865	else
3866	in_collseq = find_collation_sequence_value (pin, elem_len);
3867	}
3868	/ match with range expression? /
3869	/ FIXME: Implement rational ranges here, too. /
3870	for (i = `0`; i < cset->nranges; ++i)
3871	if (cset->range_starts[i] <= in_collseq
3872	&& in_collseq <= cset->range_ends[i])
3873	{
3874	match_len = elem_len;
3875	goto check_node_accept_bytes_match;
3876	}
3877
3878	/ match with equivalence_class? /
3879	if (cset->nequiv_classes)
3880	{
3881	const unsigned char *cp = pin;
3882	table = (const int32_t *)
3883	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3884	weights = (const unsigned char *)
3885	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3886	extra = (const unsigned char *)
3887	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3888	indirect = (const int32_t *)
3889	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3891	int32_t rule = idx >> `24`;
3892	idx &= `0xffffff`;
3893	if (idx > `0`)
3894	{
3895	size_t weight_len = weights[idx];
3896	for (i = `0`; i < cset->nequiv_classes; ++i)
3897	{
3898	int32_t equiv_class_idx = cset->equiv_classes[i];
3899	int32_t equiv_class_rule = equiv_class_idx >> `24`;
3900	equiv_class_idx &= `0xffffff`;
3901	if (weights[equiv_class_idx] == weight_len
3902	&& equiv_class_rule == rule
3903	&& memcmp (weights + idx + `1`,
3904	weights + equiv_class_idx + `1`,
3905	weight_len) == `0`)
3906	{
3907	match_len = elem_len;
3908	goto check_node_accept_bytes_match;
3909	}
3910	}
3911	}
3912	}
3913	}
3914	else
3915	# endif /* _LIBC */
3916	{
3917	/ match with range expression? /
3918	for (i = `0`; i < cset->nranges; ++i)
3919	{
3920	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3921	{
3922	match_len = char_len;
3923	goto check_node_accept_bytes_match;
3924	}
3925	}
3926	}
3927	check_node_accept_bytes_match:
3928	if (!cset->non_match)
3929	return match_len;
3930	else
3931	{
3932	if (match_len > `0`)
3933	return `0`;
3934	else
3935	return (elem_len > char_len) ? elem_len : char_len;
3936	}
3937	}
3938	return `0`;
3939	}
3940
3941	# ifdef _LIBC
3942	static unsigned int
3943	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3944	{
3945	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3946	if (nrules == `0`)
3947	{
3948	if (mbs_len == `1`)
3949	{
3950	/ No valid character. Match it as a single byte character. /
3951	const unsigned char collseq = (const* unsigned char *)
3952	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3953	return collseq[mbs[`0`]];
3954	}
3955	return UINT_MAX;
3956	}
3957	else
3958	{
3959	int32_t idx;
3960	const unsigned char extra = (const* unsigned char *)
3961	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3962	int32_t extrasize = (const unsigned char *)
3963	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3964
3965	for (idx = `0`; idx < extrasize;)
3966	{
3967	int mbs_cnt;
3968	bool found = false;
3969	int32_t elem_mbs_len;
3970	/ Skip the name of collating element name. /
3971	idx = idx + extra[idx] + `1`;
3972	elem_mbs_len = extra[idx++];
3973	if (mbs_len == elem_mbs_len)
3974	{
3975	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3976	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3977	break;
3978	if (mbs_cnt == elem_mbs_len)
3979	/ Found the entry. /
3980	found = true;
3981	}
3982	/ Skip the byte sequence of the collating element. /
3983	idx += elem_mbs_len;
3984	/ Adjust for the alignment. /
3985	idx = (idx + `3`) & ~`3`;
3986	/ Skip the collation sequence value. /
3987	idx += sizeof (uint32_t);
3988	/ Skip the wide char sequence of the collating element. /
3989	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3990	/ If we found the entry, return the sequence value. /
3991	if (found)
3992	return (uint32_t ) (extra + idx);
3993	/ Skip the collation sequence value. /
3994	idx += sizeof (uint32_t);
3995	}
3996	return UINT_MAX;
3997	}
3998	}
3999	# endif /* _LIBC */
4000	#endif /* RE_ENABLE_I18N */
4001
4002	/ Check whether the node accepts the byte which is IDX-th*
4003	byte of the INPUT. /*
4004
4005	static bool
4006	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
4007	Idx idx)
4008	{
4009	unsigned char ch;
4010	ch = re_string_byte_at (&mctx->input, idx);
4011	switch (node->type)
4012	{
4013	case CHARACTER:
4014	if (node->opr.c != ch)
4015	return false;
4016	break;
4017
4018	case SIMPLE_BRACKET:
4019	if (!bitset_contain (node->opr.sbcset, ch))
4020	return false;
4021	break;
4022
4023	#ifdef RE_ENABLE_I18N
4024	case OP_UTF8_PERIOD:
4025	if (ch >= ASCII_CHARS)
4026	return false;
4027	FALLTHROUGH;
4028	#endif
4029	case OP_PERIOD:
4030	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4031	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4032	return false;
4033	break;
4034
4035	default:
4036	return false;
4037	}
4038
4039	if (node->constraint)
4040	{
4041	/ The node has constraints. Check whether the current context*
4042	satisfies the constraints. /*
4043	unsigned int context = re_string_context_at (&mctx->input, idx,
4044	mctx->eflags);
4045	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4046	return false;
4047	}
4048
4049	return true;
4050	}
4051
4052	/ Extend the buffers, if the buffers have run out. /
4053
4054	static reg_errcode_t
4055	__attribute_warn_unused_result__
4056	extend_buffers (re_match_context_t mctx, int* min_len)
4057	{
4058	reg_errcode_t ret;
4059	re_string_t *pstr = &mctx->input;
4060
4061	/ Avoid overflow. /
4062	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / `2`
4063	<= pstr->bufs_len))
4064	return REG_ESPACE;
4065
4066	/ Double the lengths of the buffers, but allocate at least MIN_LEN. /
4067	ret = re_string_realloc_buffers (pstr,
4068	MAX (min_len,
4069	MIN (pstr->len, pstr->bufs_len * `2`)));
4070	if (__glibc_unlikely (ret != REG_NOERROR))
4071	return ret;
4072
4073	if (mctx->state_log != NULL)
4074	{
4075	/ And double the length of state_log. /
4076	/ XXX We have no indication of the size of this buffer. If this*
4077	allocation fail we have no indication that the state_log array
4078	does not have the right size. /*
4079	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4080	pstr->bufs_len + `1`);
4081	if (__glibc_unlikely (new_array == NULL))
4082	return REG_ESPACE;
4083	mctx->state_log = new_array;
4084	}
4085
4086	/ Then reconstruct the buffers. /
4087	if (pstr->icase)
4088	{
4089	#ifdef RE_ENABLE_I18N
4090	if (pstr->mb_cur_max > `1`)
4091	{
4092	ret = build_wcs_upper_buffer (pstr);
4093	if (__glibc_unlikely (ret != REG_NOERROR))
4094	return ret;
4095	}
4096	else
4097	#endif /* RE_ENABLE_I18N */
4098	build_upper_buffer (pstr);
4099	}
4100	else
4101	{
4102	#ifdef RE_ENABLE_I18N
4103	if (pstr->mb_cur_max > `1`)
4104	build_wcs_buffer (pstr);
4105	else
4106	#endif /* RE_ENABLE_I18N */
4107	{
4108	if (pstr->trans != NULL)
4109	re_string_translate_buffer (pstr);
4110	}
4111	}
4112	return REG_NOERROR;
4113	}
4114
4115
4116	/ Functions for matching context. /
4117
4118	/ Initialize MCTX. /
4119
4120	static reg_errcode_t
4121	__attribute_warn_unused_result__
4122	match_ctx_init (re_match_context_t mctx, int* eflags, Idx n)
4123	{
4124	mctx->eflags = eflags;
4125	mctx->match_last = -`1`;
4126	if (n > `0`)
4127	{
4128	/ Avoid overflow. /
4129	size_t max_object_size =
4130	MAX (sizeof (struct re_backref_cache_entry),
4131	sizeof (re_sub_match_top_t *));
4132	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4133	return REG_ESPACE;
4134
4135	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4136	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4137	if (__glibc_unlikely (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL))
4138	return REG_ESPACE;
4139	}
4140	/ Already zero-ed by the caller.*
4141	else
4142	mctx->bkref_ents = NULL;
4143	mctx->nbkref_ents = 0;
4144	mctx->nsub_tops = 0; /*
4145	mctx->abkref_ents = n;
4146	mctx->max_mb_elem_len = `1`;
4147	mctx->asub_tops = n;
4148	return REG_NOERROR;
4149	}
4150
4151	/ Clean the entries which depend on the current input in MCTX.*
4152	This function must be invoked when the matcher changes the start index
4153	of the input, or changes the input string. /*
4154
4155	static void
4156	match_ctx_clean (re_match_context_t *mctx)
4157	{
4158	Idx st_idx;
4159	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4160	{
4161	Idx sl_idx;
4162	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4163	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4164	{
4165	re_sub_match_last_t *last = top->lasts[sl_idx];
4166	re_free (last->path.array);
4167	re_free (last);
4168	}
4169	re_free (top->lasts);
4170	if (top->path)
4171	{
4172	re_free (top->path->array);
4173	re_free (top->path);
4174	}
4175	re_free (top);
4176	}
4177
4178	mctx->nsub_tops = `0`;
4179	mctx->nbkref_ents = `0`;
4180	}
4181
4182	/ Free all the memory associated with MCTX. /
4183
4184	static void
4185	match_ctx_free (re_match_context_t *mctx)
4186	{
4187	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4188	match_ctx_clean (mctx);
4189	re_free (mctx->sub_tops);
4190	re_free (mctx->bkref_ents);
4191	}
4192
4193	/ Add a new backreference entry to MCTX.*
4194	Note that we assume that caller never call this function with duplicate
4195	entry, and call with STR_IDX which isn't smaller than any existing entry.
4196	*/
4197
4198	static reg_errcode_t
4199	__attribute_warn_unused_result__
4200	match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4201	Idx to)
4202	{
4203	if (mctx->nbkref_ents >= mctx->abkref_ents)
4204	{
4205	struct re_backref_cache_entry* new_entry;
4206	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4207	mctx->abkref_ents * `2`);
4208	if (__glibc_unlikely (new_entry == NULL))
4209	{
4210	re_free (mctx->bkref_ents);
4211	return REG_ESPACE;
4212	}
4213	mctx->bkref_ents = new_entry;
4214	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4215	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4216	mctx->abkref_ents *= `2`;
4217	}
4218	if (mctx->nbkref_ents > `0`
4219	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4220	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4221
4222	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4223	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4224	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4225	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4226
4227	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4228	If bit N is clear, means that this entry won't epsilon-transition to
4229	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4230	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4231	such node.
4232
4233	A backreference does not epsilon-transition unless it is empty, so set
4234	to all zeros if FROM != TO. /*
4235	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4236	= (from == to ? -`1` : `0`);
4237
4238	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4239	if (mctx->max_mb_elem_len < to - from)
4240	mctx->max_mb_elem_len = to - from;
4241	return REG_NOERROR;
4242	}
4243
4244	/ Return the first entry with the same str_idx, or -1 if none is*
4245	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4246
4247	static Idx
4248	search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4249	{
4250	Idx left, right, mid, last;
4251	last = right = mctx->nbkref_ents;
4252	for (left = `0`; left < right;)
4253	{
4254	mid = (left + right) / `2`;
4255	if (mctx->bkref_ents[mid].str_idx < str_idx)
4256	left = mid + `1`;
4257	else
4258	right = mid;
4259	}
4260	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4261	return left;
4262	else
4263	return -`1`;
4264	}
4265
4266	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4267	at STR_IDX. /*
4268
4269	static reg_errcode_t
4270	__attribute_warn_unused_result__
4271	match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4272	{
4273	#ifdef DEBUG
4274	assert (mctx->sub_tops != NULL);
4275	assert (mctx->asub_tops > `0`);
4276	#endif
4277	if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4278	{
4279	Idx new_asub_tops = mctx->asub_tops * `2`;
4280	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4281	re_sub_match_top_t *,
4282	new_asub_tops);
4283	if (__glibc_unlikely (new_array == NULL))
4284	return REG_ESPACE;
4285	mctx->sub_tops = new_array;
4286	mctx->asub_tops = new_asub_tops;
4287	}
4288	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4289	if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4290	return REG_ESPACE;
4291	mctx->sub_tops[mctx->nsub_tops]->node = node;
4292	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4293	return REG_NOERROR;
4294	}
4295
4296	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4297	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. /*
4298
4299	static re_sub_match_last_t *
4300	match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4301	{
4302	re_sub_match_last_t *new_entry;
4303	if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4304	{
4305	Idx new_alasts = `2` * subtop->alasts + `1`;
4306	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4307	re_sub_match_last_t *,
4308	new_alasts);
4309	if (__glibc_unlikely (new_array == NULL))
4310	return NULL;
4311	subtop->lasts = new_array;
4312	subtop->alasts = new_alasts;
4313	}
4314	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4315	if (__glibc_likely (new_entry != NULL))
4316	{
4317	subtop->lasts[subtop->nlasts] = new_entry;
4318	new_entry->node = node;
4319	new_entry->str_idx = str_idx;
4320	++subtop->nlasts;
4321	}
4322	return new_entry;
4323	}
4324
4325	static void
4326	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4327	re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4328	{
4329	sctx->sifted_states = sifted_sts;
4330	sctx->limited_states = limited_sts;
4331	sctx->last_node = last_node;
4332	sctx->last_str_idx = last_str_idx;
4333	re_node_set_init_empty (&sctx->limits);
4334	}
4335

Browse the source code of glibc/posix/regexec.c