regexec.c source code [glibc/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
21	Idx n);
22	static void match_ctx_clean (re_match_context_t *mctx);
23	static void match_ctx_free (re_match_context_t *cache);
24	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25	Idx str_idx, Idx from, Idx to);
26	static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28	Idx str_idx);
29	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30	Idx node, Idx str_idx);
31	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
32	re_dfastate_t **limited_sts, Idx last_node,
33	Idx last_str_idx);
34	static reg_errcode_t re_search_internal (const regex_t *preg,
35	const char *string, Idx length,
36	Idx start, Idx last_start, Idx stop,
37	size_t nmatch, regmatch_t pmatch[],
38	int eflags);
39	static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40	const char *string1, Idx length1,
41	const char *string2, Idx length2,
42	Idx start, regoff_t range,
43	struct re_registers *regs,
44	Idx stop, bool ret_len);
45	static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46	const char *string, Idx length, Idx start,
47	regoff_t range, Idx stop,
48	struct re_registers *regs,
49	bool ret_len);
50	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
51	Idx nregs, int regs_allocated);
52	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53	static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54	Idx *p_match_first);
55	static Idx check_halt_state_context (const re_match_context_t *mctx,
56	const re_dfastate_t *state, Idx idx);
57	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
58	regmatch_t *prev_idx_match, Idx cur_node,
59	Idx cur_idx, Idx nmatch);
60	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61	Idx str_idx, Idx dest_node, Idx nregs,
62	regmatch_t regs, regmatch_t prevregs,
63	re_node_set *eps_via_nodes);
64	static reg_errcode_t set_regs (const regex_t *preg,
65	const re_match_context_t *mctx,
66	size_t nmatch, regmatch_t *pmatch,
67	bool fl_backtrack);
68	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70	#ifdef RE_ENABLE_I18N
71	static int sift_states_iter_mb (const re_match_context_t *mctx,
72	re_sift_context_t *sctx,
73	Idx node_idx, Idx str_idx, Idx max_str_idx);
74	#endif /* RE_ENABLE_I18N */
75	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76	re_sift_context_t *sctx);
77	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78	re_sift_context_t *sctx, Idx str_idx,
79	re_node_set *cur_dest);
80	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81	re_sift_context_t *sctx,
82	Idx str_idx,
83	re_node_set *dest_nodes);
84	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85	re_node_set *dest_nodes,
86	const re_node_set *candidates);
87	static bool check_dst_limits (const re_match_context_t *mctx,
88	const re_node_set *limits,
89	Idx dst_node, Idx dst_idx, Idx src_node,
90	Idx src_idx);
91	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92	int boundaries, Idx subexp_idx,
93	Idx from_node, Idx bkref_idx);
94	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95	Idx limit, Idx subexp_idx,
96	Idx node, Idx str_idx,
97	Idx bkref_idx);
98	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99	re_node_set *dest_nodes,
100	const re_node_set *candidates,
101	re_node_set *limits,
102	struct re_backref_cache_entry *bkref_ents,
103	Idx str_idx);
104	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105	re_sift_context_t *sctx,
106	Idx str_idx, const re_node_set *candidates);
107	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108	re_dfastate_t **dst,
109	re_dfastate_t **src, Idx num);
110	static re_dfastate_t find_recover_state (reg_errcode_t err,
111	re_match_context_t *mctx);
112	static re_dfastate_t transit_state (reg_errcode_t err,
113	re_match_context_t *mctx,
114	re_dfastate_t *state);
115	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
116	re_match_context_t *mctx,
117	re_dfastate_t *next_state);
118	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119	re_node_set *cur_nodes,
120	Idx str_idx);
121	#if 0
122	static re_dfastate_t transit_state_sb (reg_errcode_t err,
123	re_match_context_t *mctx,
124	re_dfastate_t *pstate);
125	#endif
126	#ifdef RE_ENABLE_I18N
127	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128	re_dfastate_t *pstate);
129	#endif /* RE_ENABLE_I18N */
130	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131	const re_node_set *nodes);
132	static reg_errcode_t get_subexp (re_match_context_t *mctx,
133	Idx bkref_node, Idx bkref_str_idx);
134	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135	const re_sub_match_top_t *sub_top,
136	re_sub_match_last_t *sub_last,
137	Idx bkref_node, Idx bkref_str);
138	static Idx find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
139	Idx subexp_idx, int type);
140	static reg_errcode_t check_arrival (re_match_context_t *mctx,
141	state_array_t *path, Idx top_node,
142	Idx top_str, Idx last_node, Idx last_str,
143	int type);
144	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145	Idx str_idx,
146	re_node_set *cur_nodes,
147	re_node_set *next_nodes);
148	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149	re_node_set *cur_nodes,
150	Idx ex_subexp, int type);
151	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152	re_node_set *dst_nodes,
153	Idx target, Idx ex_subexp,
154	int type);
155	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156	re_node_set *cur_nodes, Idx cur_str,
157	Idx subexp_num, int type);
158	static bool build_trtable (const re_dfa_t dfa, re_dfastate_t state);
159	#ifdef RE_ENABLE_I18N
160	static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161	const re_string_t *input, Idx idx);
162	# ifdef _LIBC
163	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164	size_t name_len);
165	# endif /* _LIBC */
166	#endif /* RE_ENABLE_I18N */
167	static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168	const re_dfastate_t *state,
169	re_node_set *states_node,
170	bitset_t *states_ch);
171	static bool check_node_accept (const re_match_context_t *mctx,
172	const re_token_t *node, Idx idx);
173	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
174
175	/ Entry point for POSIX code. /
176
177	/ regexec searches for a given pattern, specified by PREG, in the*
178	string STRING.
179
180	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181	'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182	least NMATCH elements, and we set them to the offsets of the
183	corresponding matched substrings.
184
185	EFLAGS specifies "execution flags" which affect matching: if
186	REG_NOTBOL is set, then ^ does not match at the beginning of the
187	string; if REG_NOTEOL is set, then $ does not match at the end.
188
189	Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
190	EFLAGS is invalid. /*
191
192	int
193	regexec (const regex_t *__restrict preg, const char *__restrict string,
194	size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
195	{
196	reg_errcode_t err;
197	Idx start, length;
198	re_dfa_t *dfa = preg->buffer;
199
200	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
201	return REG_BADPAT;
202
203	if (eflags & REG_STARTEND)
204	{
205	start = pmatch[`0`].rm_so;
206	length = pmatch[`0`].rm_eo;
207	}
208	else
209	{
210	start = `0`;
211	length = strlen (string);
212	}
213
214	lock_lock (dfa->lock);
215	if (preg->no_sub)
216	err = re_search_internal (preg, string, length, start, length,
217	length, `0`, NULL, eflags);
218	else
219	err = re_search_internal (preg, string, length, start, length,
220	length, nmatch, pmatch, eflags);
221	lock_unlock (dfa->lock);
222	return err != REG_NOERROR;
223	}
224
225	#ifdef _LIBC
226	libc_hidden_def (__regexec)
227
228	# include <shlib-compat.h>
229	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
230
231	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
232	__typeof__ (__regexec) __compat_regexec;
233
234	int
235	attribute_compat_text_section
236	__compat_regexec (const regex_t *__restrict preg,
237	const char *__restrict string, size_t nmatch,
238	regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
239	{
240	return regexec (preg, string, nmatch, pmatch,
241	eflags & (REG_NOTBOL \| REG_NOTEOL));
242	}
243	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
244	# endif
245	#endif
246
247	/ Entry points for GNU code. /
248
249	/ re_match, re_search, re_match_2, re_search_2*
250
251	The former two functions operate on STRING with length LENGTH,
252	while the later two operate on concatenation of STRING1 and STRING2
253	with lengths LENGTH1 and LENGTH2, respectively.
254
255	re_match() matches the compiled pattern in BUFP against the string,
256	starting at index START.
257
258	re_search() first tries matching at index START, then it tries to match
259	starting from index START + 1, and so on. The last start position tried
260	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
261	way as re_match().)
262
263	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
264	the first STOP characters of the concatenation of the strings should be
265	concerned.
266
267	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
268	and all groups is stored in REGS. (For the "_2" variants, the offsets are
269	computed relative to the concatenation, not relative to the individual
270	strings.)
271
272	On success, re_match* functions return the length of the match, re_search*
273	return the position of the start of the match. They return -1 on
274	match failure, -2 on error. /*
275
276	regoff_t
277	re_match (struct re_pattern_buffer bufp, const* char *string, Idx length,
278	Idx start, struct re_registers *regs)
279	{
280	return re_search_stub (bufp, string, length, start, `0`, length, regs, true);
281	}
282	#ifdef _LIBC
283	weak_alias (__re_match, re_match)
284	#endif
285
286	regoff_t
287	re_search (struct re_pattern_buffer bufp, const* char *string, Idx length,
288	Idx start, regoff_t range, struct re_registers *regs)
289	{
290	return re_search_stub (bufp, string, length, start, range, length, regs,
291	false);
292	}
293	#ifdef _LIBC
294	weak_alias (__re_search, re_search)
295	#endif
296
297	regoff_t
298	re_match_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
299	const char *string2, Idx length2, Idx start,
300	struct re_registers *regs, Idx stop)
301	{
302	return re_search_2_stub (bufp, string1, length1, string2, length2,
303	start, `0`, regs, stop, true);
304	}
305	#ifdef _LIBC
306	weak_alias (__re_match_2, re_match_2)
307	#endif
308
309	regoff_t
310	re_search_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
311	const char *string2, Idx length2, Idx start, regoff_t range,
312	struct re_registers *regs, Idx stop)
313	{
314	return re_search_2_stub (bufp, string1, length1, string2, length2,
315	start, range, regs, stop, false);
316	}
317	#ifdef _LIBC
318	weak_alias (__re_search_2, re_search_2)
319	#endif
320
321	static regoff_t
322	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
323	Idx length1, const char *string2, Idx length2, Idx start,
324	regoff_t range, struct re_registers *regs,
325	Idx stop, bool ret_len)
326	{
327	const char *str;
328	regoff_t rval;
329	Idx len;
330	char *s = NULL;
331
332	if (__glibc_unlikely ((length1 < `0` \|\| length2 < `0` \|\| stop < `0`
333	\|\| INT_ADD_WRAPV (length1, length2, &len))))
334	return -`2`;
335
336	/ Concatenate the strings. /
337	if (length2 > `0`)
338	if (length1 > `0`)
339	{
340	s = re_malloc (char, len);
341
342	if (__glibc_unlikely (s == NULL))
343	return -`2`;
344	#ifdef _LIBC
345	memcpy (__mempcpy (s, string1, length1), string2, length2);
346	#else
347	memcpy (s, string1, length1);
348	memcpy (s + length1, string2, length2);
349	#endif
350	str = s;
351	}
352	else
353	str = string2;
354	else
355	str = string1;
356
357	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
358	ret_len);
359	re_free (s);
360	return rval;
361	}
362
363	/ The parameters have the same meaning as those of re_search.*
364	Additional parameters:
365	If RET_LEN is true the length of the match is returned (re_match style);
366	otherwise the position of the match is returned. /*
367
368	static regoff_t
369	re_search_stub (struct re_pattern_buffer bufp, const* char *string, Idx length,
370	Idx start, regoff_t range, Idx stop, struct re_registers *regs,
371	bool ret_len)
372	{
373	reg_errcode_t result;
374	regmatch_t *pmatch;
375	Idx nregs;
376	regoff_t rval;
377	int eflags = `0`;
378	re_dfa_t *dfa = bufp->buffer;
379	Idx last_start = start + range;
380
381	/ Check for out-of-range. /
382	if (__glibc_unlikely (start < `0` \|\| start > length))
383	return -`1`;
384	if (__glibc_unlikely (length < last_start
385	\|\| (`0` <= range && last_start < start)))
386	last_start = length;
387	else if (__glibc_unlikely (last_start < `0`
388	\|\| (range < `0` && start <= last_start)))
389	last_start = `0`;
390
391	lock_lock (dfa->lock);
392
393	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
394	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
395
396	/ Compile fastmap if we haven't yet. /
397	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
398	re_compile_fastmap (bufp);
399
400	if (__glibc_unlikely (bufp->no_sub))
401	regs = NULL;
402
403	/ We need at least 1 register. /
404	if (regs == NULL)
405	nregs = `1`;
406	else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
407	&& regs->num_regs <= bufp->re_nsub))
408	{
409	nregs = regs->num_regs;
410	if (__glibc_unlikely (nregs < `1`))
411	{
412	/ Nothing can be copied to regs. /
413	regs = NULL;
414	nregs = `1`;
415	}
416	}
417	else
418	nregs = bufp->re_nsub + `1`;
419	pmatch = re_malloc (regmatch_t, nregs);
420	if (__glibc_unlikely (pmatch == NULL))
421	{
422	rval = -`2`;
423	goto out;
424	}
425
426	result = re_search_internal (bufp, string, length, start, last_start, stop,
427	nregs, pmatch, eflags);
428
429	rval = `0`;
430
431	/ I hope we needn't fill their regs with -1's when no match was found. /
432	if (result != REG_NOERROR)
433	rval = result == REG_NOMATCH ? -`1` : -`2`;
434	else if (regs != NULL)
435	{
436	/ If caller wants register contents data back, copy them. /
437	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
438	bufp->regs_allocated);
439	if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
440	rval = -`2`;
441	}
442
443	if (__glibc_likely (rval == `0`))
444	{
445	if (ret_len)
446	{
447	DEBUG_ASSERT (pmatch[`0`].rm_so == start);
448	rval = pmatch[`0`].rm_eo - start;
449	}
450	else
451	rval = pmatch[`0`].rm_so;
452	}
453	re_free (pmatch);
454	out:
455	lock_unlock (dfa->lock);
456	return rval;
457	}
458
459	static unsigned
460	re_copy_regs (struct re_registers regs, regmatch_t pmatch, Idx nregs,
461	int regs_allocated)
462	{
463	int rval = REGS_REALLOCATE;
464	Idx i;
465	Idx need_regs = nregs + `1`;
466	/ We need one extra element beyond 'num_regs' for the '-1' marker GNU code*
467	uses. /*
468
469	/ Have the register data arrays been allocated? /
470	if (regs_allocated == REGS_UNALLOCATED)
471	{ / No. So allocate them with malloc. /
472	regs->start = re_malloc (regoff_t, need_regs);
473	if (__glibc_unlikely (regs->start == NULL))
474	return REGS_UNALLOCATED;
475	regs->end = re_malloc (regoff_t, need_regs);
476	if (__glibc_unlikely (regs->end == NULL))
477	{
478	re_free (regs->start);
479	return REGS_UNALLOCATED;
480	}
481	regs->num_regs = need_regs;
482	}
483	else if (regs_allocated == REGS_REALLOCATE)
484	{ / Yes. If we need more elements than were already*
485	allocated, reallocate them. If we need fewer, just
486	leave it alone. /*
487	if (__glibc_unlikely (need_regs > regs->num_regs))
488	{
489	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
490	regoff_t *new_end;
491	if (__glibc_unlikely (new_start == NULL))
492	return REGS_UNALLOCATED;
493	new_end = re_realloc (regs->end, regoff_t, need_regs);
494	if (__glibc_unlikely (new_end == NULL))
495	{
496	re_free (new_start);
497	return REGS_UNALLOCATED;
498	}
499	regs->start = new_start;
500	regs->end = new_end;
501	regs->num_regs = need_regs;
502	}
503	}
504	else
505	{
506	DEBUG_ASSERT (regs_allocated == REGS_FIXED);
507	/ This function may not be called with REGS_FIXED and nregs too big. /
508	DEBUG_ASSERT (nregs <= regs->num_regs);
509	rval = REGS_FIXED;
510	}
511
512	/ Copy the regs. /
513	for (i = `0`; i < nregs; ++i)
514	{
515	regs->start[i] = pmatch[i].rm_so;
516	regs->end[i] = pmatch[i].rm_eo;
517	}
518	for ( ; i < regs->num_regs; ++i)
519	regs->start[i] = regs->end[i] = -`1`;
520
521	return rval;
522	}
523
524	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
525	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
526	this memory for recording register information. STARTS and ENDS
527	must be allocated using the malloc library routine, and must each
528	be at least NUM_REGS sizeof (regoff_t) bytes long.*
529
530	If NUM_REGS == 0, then subsequent matches should allocate their own
531	register data.
532
533	Unless this function is called, the first search or match using
534	PATTERN_BUFFER will allocate its own register data, without
535	freeing the old data. /*
536
537	void
538	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
539	__re_size_t num_regs, regoff_t starts, regoff_t ends)
540	{
541	if (num_regs)
542	{
543	bufp->regs_allocated = REGS_REALLOCATE;
544	regs->num_regs = num_regs;
545	regs->start = starts;
546	regs->end = ends;
547	}
548	else
549	{
550	bufp->regs_allocated = REGS_UNALLOCATED;
551	regs->num_regs = `0`;
552	regs->start = regs->end = NULL;
553	}
554	}
555	#ifdef _LIBC
556	weak_alias (__re_set_registers, re_set_registers)
557	#endif
558
559	/ Entry points compatible with 4.2 BSD regex library. We don't define*
560	them unless specifically requested. /*
561
562	#if defined _REGEX_RE_COMP \|\| defined _LIBC
563	int
564	# ifdef _LIBC
565	weak_function
566	# endif
567	re_exec (const char *s)
568	{
569	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
570	}
571	#endif /* _REGEX_RE_COMP */
572
573	/ Internal entry point. /
574
575	/ Searches for a compiled pattern PREG in the string STRING, whose*
576	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
577	meaning as with regexec. LAST_START is START + RANGE, where
578	START and RANGE have the same meaning as with re_search.
579	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
580	otherwise return the error code.
581	Note: We assume front end functions already check ranges.
582	(0 <= LAST_START && LAST_START <= LENGTH) /*
583
584	static reg_errcode_t
585	__attribute_warn_unused_result__
586	re_search_internal (const regex_t preg, const* char *string, Idx length,
587	Idx start, Idx last_start, Idx stop, size_t nmatch,
588	regmatch_t pmatch[], int eflags)
589	{
590	reg_errcode_t err;
591	const re_dfa_t *dfa = preg->buffer;
592	Idx left_lim, right_lim;
593	int incr;
594	bool fl_longest_match;
595	int match_kind;
596	Idx match_first;
597	Idx match_last = -`1`;
598	Idx extra_nmatch;
599	bool sb;
600	int ch;
601	re_match_context_t mctx = { .dfa = dfa };
602	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
603	&& start != last_start && !preg->can_be_null)
604	? preg->fastmap : NULL);
605	RE_TRANSLATE_TYPE t = preg->translate;
606
607	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
608	nmatch -= extra_nmatch;
609
610	/ Check if the DFA haven't been compiled. /
611	if (__glibc_unlikely (preg->used == `0` \|\| dfa->init_state == NULL
612	\|\| dfa->init_state_word == NULL
613	\|\| dfa->init_state_nl == NULL
614	\|\| dfa->init_state_begbuf == NULL))
615	return REG_NOMATCH;
616
617	/ We assume front-end functions already check them. /
618	DEBUG_ASSERT (`0` <= last_start && last_start <= length);
619
620	/ If initial states with non-begbuf contexts have no elements,*
621	the regex must be anchored. If preg->newline_anchor is set,
622	we'll never use init_state_nl, so do not check it. /*
623	if (dfa->init_state->nodes.nelem == `0`
624	&& dfa->init_state_word->nodes.nelem == `0`
625	&& (dfa->init_state_nl->nodes.nelem == `0`
626	\|\| !preg->newline_anchor))
627	{
628	if (start != `0` && last_start != `0`)
629	return REG_NOMATCH;
630	start = last_start = `0`;
631	}
632
633	/ We must check the longest matching, if nmatch > 0. /
634	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
635
636	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
637	preg->translate, (preg->syntax & RE_ICASE) != `0`,
638	dfa);
639	if (__glibc_unlikely (err != REG_NOERROR))
640	goto free_return;
641	mctx.input.stop = stop;
642	mctx.input.raw_stop = stop;
643	mctx.input.newline_anchor = preg->newline_anchor;
644
645	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
646	if (__glibc_unlikely (err != REG_NOERROR))
647	goto free_return;
648
649	/ We will log all the DFA states through which the dfa pass,*
650	if nmatch > 1, or this dfa has "multibyte node", which is a
651	back-reference or a node which can accept multibyte character or
652	multi character collating element. /*
653	if (nmatch > `1` \|\| dfa->has_mb_node)
654	{
655	/ Avoid overflow. /
656	if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
657	<= mctx.input.bufs_len)))
658	{
659	err = REG_ESPACE;
660	goto free_return;
661	}
662
663	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
664	if (__glibc_unlikely (mctx.state_log == NULL))
665	{
666	err = REG_ESPACE;
667	goto free_return;
668	}
669	}
670
671	match_first = start;
672	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
673	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
674
675	/ Check incrementally whether the input string matches. /
676	incr = (last_start < start) ? -`1` : `1`;
677	left_lim = (last_start < start) ? last_start : start;
678	right_lim = (last_start < start) ? start : last_start;
679	sb = dfa->mb_cur_max == `1`;
680	match_kind =
681	(fastmap
682	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
683	\| (start <= last_start ? `2` : `0`)
684	\| (t != NULL ? `1` : `0`))
685	: `8`);
686
687	for (;; match_first += incr)
688	{
689	err = REG_NOMATCH;
690	if (match_first < left_lim \|\| right_lim < match_first)
691	goto free_return;
692
693	/ Advance as rapidly as possible through the string, until we*
694	find a plausible place to start matching. This may be done
695	with varying efficiency, so there are various possibilities:
696	only the most common of them are specialized, in order to
697	save on code size. We use a switch statement for speed. /*
698	switch (match_kind)
699	{
700	case `8`:
701	/ No fastmap. /
702	break;
703
704	case `7`:
705	/ Fastmap with single-byte translation, match forward. /
706	while (__glibc_likely (match_first < right_lim)
707	&& !fastmap[t[(unsigned char) string[match_first]]])
708	++match_first;
709	goto forward_match_found_start_or_reached_end;
710
711	case `6`:
712	/ Fastmap without translation, match forward. /
713	while (__glibc_likely (match_first < right_lim)
714	&& !fastmap[(unsigned char) string[match_first]])
715	++match_first;
716
717	forward_match_found_start_or_reached_end:
718	if (__glibc_unlikely (match_first == right_lim))
719	{
720	ch = match_first >= length
721	? `0` : (unsigned char) string[match_first];
722	if (!fastmap[t ? t[ch] : ch])
723	goto free_return;
724	}
725	break;
726
727	case `4`:
728	case `5`:
729	/ Fastmap without multi-byte translation, match backwards. /
730	while (match_first >= left_lim)
731	{
732	ch = match_first >= length
733	? `0` : (unsigned char) string[match_first];
734	if (fastmap[t ? t[ch] : ch])
735	break;
736	--match_first;
737	}
738	if (match_first < left_lim)
739	goto free_return;
740	break;
741
742	default:
743	/ In this case, we can't determine easily the current byte,*
744	since it might be a component byte of a multibyte
745	character. Then we use the constructed buffer instead. /*
746	for (;;)
747	{
748	/ If MATCH_FIRST is out of the valid range, reconstruct the*
749	buffers. /*
750	__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
751	if (__glibc_unlikely (offset
752	>= (__re_size_t) mctx.input.valid_raw_len))
753	{
754	err = re_string_reconstruct (&mctx.input, match_first,
755	eflags);
756	if (__glibc_unlikely (err != REG_NOERROR))
757	goto free_return;
758
759	offset = match_first - mctx.input.raw_mbs_idx;
760	}
761	/ Use buffer byte if OFFSET is in buffer, otherwise '\0'. /
762	ch = (offset < mctx.input.valid_len
763	? re_string_byte_at (&mctx.input, offset) : `0`);
764	if (fastmap[ch])
765	break;
766	match_first += incr;
767	if (match_first < left_lim \|\| match_first > right_lim)
768	{
769	err = REG_NOMATCH;
770	goto free_return;
771	}
772	}
773	break;
774	}
775
776	/ Reconstruct the buffers so that the matcher can assume that*
777	the matching starts from the beginning of the buffer. /*
778	err = re_string_reconstruct (&mctx.input, match_first, eflags);
779	if (__glibc_unlikely (err != REG_NOERROR))
780	goto free_return;
781
782	#ifdef RE_ENABLE_I18N
783	/ Don't consider this char as a possible match start if it part,*
784	yet isn't the head, of a multibyte character. /*
785	if (!sb && !re_string_first_byte (&mctx.input, `0`))
786	continue;
787	#endif
788
789	/ It seems to be appropriate one, then use the matcher. /
790	/ We assume that the matching starts from 0. /
791	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
792	match_last = check_matching (&mctx, fl_longest_match,
793	start <= last_start ? &match_first : NULL);
794	if (match_last != -`1`)
795	{
796	if (__glibc_unlikely (match_last == -`2`))
797	{
798	err = REG_ESPACE;
799	goto free_return;
800	}
801	else
802	{
803	mctx.match_last = match_last;
804	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
805	{
806	re_dfastate_t *pstate = mctx.state_log[match_last];
807	mctx.last_node = check_halt_state_context (&mctx, pstate,
808	match_last);
809	}
810	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
811	\|\| dfa->nbackref)
812	{
813	err = prune_impossible_nodes (&mctx);
814	if (err == REG_NOERROR)
815	break;
816	if (__glibc_unlikely (err != REG_NOMATCH))
817	goto free_return;
818	match_last = -`1`;
819	}
820	else
821	break; / We found a match. /
822	}
823	}
824
825	match_ctx_clean (&mctx);
826	}
827
828	DEBUG_ASSERT (match_last != -`1`);
829	DEBUG_ASSERT (err == REG_NOERROR);
830
831	/ Set pmatch[] if we need. /
832	if (nmatch > `0`)
833	{
834	Idx reg_idx;
835
836	/ Initialize registers. /
837	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
838	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
839
840	/ Set the points where matching start/end. /
841	pmatch[`0`].rm_so = `0`;
842	pmatch[`0`].rm_eo = mctx.match_last;
843	/ FIXME: This function should fail if mctx.match_last exceeds*
844	the maximum possible regoff_t value. We need a new error
845	code REG_OVERFLOW. /*
846
847	if (!preg->no_sub && nmatch > `1`)
848	{
849	err = set_regs (preg, &mctx, nmatch, pmatch,
850	dfa->has_plural_match && dfa->nbackref > `0`);
851	if (__glibc_unlikely (err != REG_NOERROR))
852	goto free_return;
853	}
854
855	/ At last, add the offset to each register, since we slid*
856	the buffers so that we could assume that the matching starts
857	from 0. /*
858	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
859	if (pmatch[reg_idx].rm_so != -`1`)
860	{
861	#ifdef RE_ENABLE_I18N
862	if (__glibc_unlikely (mctx.input.offsets_needed != `0`))
863	{
864	pmatch[reg_idx].rm_so =
865	(pmatch[reg_idx].rm_so == mctx.input.valid_len
866	? mctx.input.valid_raw_len
867	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
868	pmatch[reg_idx].rm_eo =
869	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
870	? mctx.input.valid_raw_len
871	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
872	}
873	#else
874	DEBUG_ASSERT (mctx.input.offsets_needed == `0`);
875	#endif
876	pmatch[reg_idx].rm_so += match_first;
877	pmatch[reg_idx].rm_eo += match_first;
878	}
879	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
880	{
881	pmatch[nmatch + reg_idx].rm_so = -`1`;
882	pmatch[nmatch + reg_idx].rm_eo = -`1`;
883	}
884
885	if (dfa->subexp_map)
886	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
887	if (dfa->subexp_map[reg_idx] != reg_idx)
888	{
889	pmatch[reg_idx + `1`].rm_so
890	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
891	pmatch[reg_idx + `1`].rm_eo
892	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
893	}
894	}
895
896	free_return:
897	re_free (mctx.state_log);
898	if (dfa->nbackref)
899	match_ctx_free (&mctx);
900	re_string_destruct (&mctx.input);
901	return err;
902	}
903
904	static reg_errcode_t
905	__attribute_warn_unused_result__
906	prune_impossible_nodes (re_match_context_t *mctx)
907	{
908	const re_dfa_t *const dfa = mctx->dfa;
909	Idx halt_node, match_last;
910	reg_errcode_t ret;
911	re_dfastate_t **sifted_states;
912	re_dfastate_t **lim_states = NULL;
913	re_sift_context_t sctx;
914	DEBUG_ASSERT (mctx->state_log != NULL);
915	match_last = mctx->match_last;
916	halt_node = mctx->last_node;
917
918	/ Avoid overflow. /
919	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
920	<= match_last))
921	return REG_ESPACE;
922
923	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
924	if (__glibc_unlikely (sifted_states == NULL))
925	{
926	ret = REG_ESPACE;
927	goto free_return;
928	}
929	if (dfa->nbackref)
930	{
931	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
932	if (__glibc_unlikely (lim_states == NULL))
933	{
934	ret = REG_ESPACE;
935	goto free_return;
936	}
937	while (`1`)
938	{
939	memset (lim_states, `'\0'`,
940	sizeof (re_dfastate_t ) (match_last + `1`));
941	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
942	match_last);
943	ret = sift_states_backward (mctx, &sctx);
944	re_node_set_free (&sctx.limits);
945	if (__glibc_unlikely (ret != REG_NOERROR))
946	goto free_return;
947	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
948	break;
949	do
950	{
951	--match_last;
952	if (match_last < `0`)
953	{
954	ret = REG_NOMATCH;
955	goto free_return;
956	}
957	} while (mctx->state_log[match_last] == NULL
958	\|\| !mctx->state_log[match_last]->halt);
959	halt_node = check_halt_state_context (mctx,
960	mctx->state_log[match_last],
961	match_last);
962	}
963	ret = merge_state_array (dfa, sifted_states, lim_states,
964	match_last + `1`);
965	re_free (lim_states);
966	lim_states = NULL;
967	if (__glibc_unlikely (ret != REG_NOERROR))
968	goto free_return;
969	}
970	else
971	{
972	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
973	ret = sift_states_backward (mctx, &sctx);
974	re_node_set_free (&sctx.limits);
975	if (__glibc_unlikely (ret != REG_NOERROR))
976	goto free_return;
977	if (sifted_states[`0`] == NULL)
978	{
979	ret = REG_NOMATCH;
980	goto free_return;
981	}
982	}
983	re_free (mctx->state_log);
984	mctx->state_log = sifted_states;
985	sifted_states = NULL;
986	mctx->last_node = halt_node;
987	mctx->match_last = match_last;
988	ret = REG_NOERROR;
989	free_return:
990	re_free (sifted_states);
991	re_free (lim_states);
992	return ret;
993	}
994
995	/ Acquire an initial state and return it.*
996	We must select appropriate initial state depending on the context,
997	since initial states may have constraints like "\<", "^", etc.. /*
998
999	static inline re_dfastate_t *
1000	__attribute__ ((always_inline))
1001	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1002	Idx idx)
1003	{
1004	const re_dfa_t *const dfa = mctx->dfa;
1005	if (dfa->init_state->has_constraint)
1006	{
1007	unsigned int context;
1008	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1009	if (IS_WORD_CONTEXT (context))
1010	return dfa->init_state_word;
1011	else if (IS_ORDINARY_CONTEXT (context))
1012	return dfa->init_state;
1013	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1014	return dfa->init_state_begbuf;
1015	else if (IS_NEWLINE_CONTEXT (context))
1016	return dfa->init_state_nl;
1017	else if (IS_BEGBUF_CONTEXT (context))
1018	{
1019	/ It is relatively rare case, then calculate on demand. /
1020	return re_acquire_state_context (err, dfa,
1021	dfa->init_state->entrance_nodes,
1022	context);
1023	}
1024	else
1025	/ Must not happen? /
1026	return dfa->init_state;
1027	}
1028	else
1029	return dfa->init_state;
1030	}
1031
1032	/ Check whether the regular expression match input string INPUT or not,*
1033	and return the index where the matching end. Return -1 if
1034	there is no match, and return -2 in case of an error.
1035	FL_LONGEST_MATCH means we want the POSIX longest matching.
1036	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1037	next place where we may want to try matching.
1038	Note that the matcher assumes that the matching starts from the current
1039	index of the buffer. /*
1040
1041	static Idx
1042	__attribute_warn_unused_result__
1043	check_matching (re_match_context_t *mctx, bool fl_longest_match,
1044	Idx *p_match_first)
1045	{
1046	const re_dfa_t *const dfa = mctx->dfa;
1047	reg_errcode_t err;
1048	Idx match = `0`;
1049	Idx match_last = -`1`;
1050	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1051	re_dfastate_t *cur_state;
1052	bool at_init_state = p_match_first != NULL;
1053	Idx next_start_idx = cur_str_idx;
1054
1055	err = REG_NOERROR;
1056	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1057	/ An initial state must not be NULL (invalid). /
1058	if (__glibc_unlikely (cur_state == NULL))
1059	{
1060	DEBUG_ASSERT (err == REG_ESPACE);
1061	return -`2`;
1062	}
1063
1064	if (mctx->state_log != NULL)
1065	{
1066	mctx->state_log[cur_str_idx] = cur_state;
1067
1068	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1069	later. E.g. Processing back references. /*
1070	if (__glibc_unlikely (dfa->nbackref))
1071	{
1072	at_init_state = false;
1073	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1074	if (__glibc_unlikely (err != REG_NOERROR))
1075	return err;
1076
1077	if (cur_state->has_backref)
1078	{
1079	err = transit_state_bkref (mctx, &cur_state->nodes);
1080	if (__glibc_unlikely (err != REG_NOERROR))
1081	return err;
1082	}
1083	}
1084	}
1085
1086	/ If the RE accepts NULL string. /
1087	if (__glibc_unlikely (cur_state->halt))
1088	{
1089	if (!cur_state->has_constraint
1090	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1091	{
1092	if (!fl_longest_match)
1093	return cur_str_idx;
1094	else
1095	{
1096	match_last = cur_str_idx;
1097	match = `1`;
1098	}
1099	}
1100	}
1101
1102	while (!re_string_eoi (&mctx->input))
1103	{
1104	re_dfastate_t *old_state = cur_state;
1105	Idx next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1106
1107	if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1108	&& mctx->input.bufs_len < mctx->input.len)
1109	\|\| (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1110	&& mctx->input.valid_len < mctx->input.len))
1111	{
1112	err = extend_buffers (mctx, next_char_idx + `1`);
1113	if (__glibc_unlikely (err != REG_NOERROR))
1114	{
1115	DEBUG_ASSERT (err == REG_ESPACE);
1116	return -`2`;
1117	}
1118	}
1119
1120	cur_state = transit_state (&err, mctx, cur_state);
1121	if (mctx->state_log != NULL)
1122	cur_state = merge_state_with_log (&err, mctx, cur_state);
1123
1124	if (cur_state == NULL)
1125	{
1126	/ Reached the invalid state or an error. Try to recover a valid*
1127	state using the state log, if available and if we have not
1128	already found a valid (even if not the longest) match. /*
1129	if (__glibc_unlikely (err != REG_NOERROR))
1130	return -`2`;
1131
1132	if (mctx->state_log == NULL
1133	\|\| (match && !fl_longest_match)
1134	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1135	break;
1136	}
1137
1138	if (__glibc_unlikely (at_init_state))
1139	{
1140	if (old_state == cur_state)
1141	next_start_idx = next_char_idx;
1142	else
1143	at_init_state = false;
1144	}
1145
1146	if (cur_state->halt)
1147	{
1148	/ Reached a halt state.*
1149	Check the halt state can satisfy the current context. /*
1150	if (!cur_state->has_constraint
1151	\|\| check_halt_state_context (mctx, cur_state,
1152	re_string_cur_idx (&mctx->input)))
1153	{
1154	/ We found an appropriate halt state. /
1155	match_last = re_string_cur_idx (&mctx->input);
1156	match = `1`;
1157
1158	/ We found a match, do not modify match_first below. /
1159	p_match_first = NULL;
1160	if (!fl_longest_match)
1161	break;
1162	}
1163	}
1164	}
1165
1166	if (p_match_first)
1167	*p_match_first += next_start_idx;
1168
1169	return match_last;
1170	}
1171
1172	/ Check NODE match the current context. /
1173
1174	static bool
1175	check_halt_node_context (const re_dfa_t dfa, Idx node, unsigned* int context)
1176	{
1177	re_token_type_t type = dfa->nodes[node].type;
1178	unsigned int constraint = dfa->nodes[node].constraint;
1179	if (type != END_OF_RE)
1180	return false;
1181	if (!constraint)
1182	return true;
1183	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1184	return false;
1185	return true;
1186	}
1187
1188	/ Check the halt state STATE match the current context.*
1189	Return 0 if not match, if the node, STATE has, is a halt node and
1190	match the context, return the node. /*
1191
1192	static Idx
1193	check_halt_state_context (const re_match_context_t *mctx,
1194	const re_dfastate_t *state, Idx idx)
1195	{
1196	Idx i;
1197	unsigned int context;
1198	DEBUG_ASSERT (state->halt);
1199	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1200	for (i = `0`; i < state->nodes.nelem; ++i)
1201	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1202	return state->nodes.elems[i];
1203	return `0`;
1204	}
1205
1206	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1207	corresponding to the DFA).
1208	Return the destination node, and update EPS_VIA_NODES;
1209	return -1 on match failure, -2 on error. /*
1210
1211	static Idx
1212	proceed_next_node (const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1213	regmatch_t *prevregs,
1214	Idx pidx, Idx node, re_node_set eps_via_nodes,
1215	struct re_fail_stack_t *fs)
1216	{
1217	const re_dfa_t *const dfa = mctx->dfa;
1218	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1219	{
1220	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1221	re_node_set *edests = &dfa->edests[node];
1222
1223	if (! re_node_set_contains (eps_via_nodes, node))
1224	{
1225	bool ok = re_node_set_insert (eps_via_nodes, node);
1226	if (__glibc_unlikely (! ok))
1227	return -`2`;
1228	}
1229
1230	/ Pick a valid destination, or return -1 if none is found. /
1231	Idx dest_node = -`1`;
1232	for (Idx i = `0`; i < edests->nelem; i++)
1233	{
1234	Idx candidate = edests->elems[i];
1235	if (!re_node_set_contains (cur_nodes, candidate))
1236	continue;
1237	if (dest_node == -`1`)
1238	dest_node = candidate;
1239
1240	else
1241	{
1242	/ In order to avoid infinite loop like "(a)", return the second*
1243	epsilon-transition if the first was already considered. /*
1244	if (re_node_set_contains (eps_via_nodes, dest_node))
1245	return candidate;
1246
1247	/ Otherwise, push the second epsilon-transition on the fail stack. /
1248	else if (fs != NULL
1249	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1250	prevregs, eps_via_nodes))
1251	return -`2`;
1252
1253	/ We know we are going to exit. /
1254	break;
1255	}
1256	}
1257	return dest_node;
1258	}
1259	else
1260	{
1261	Idx naccepted = `0`;
1262	re_token_type_t type = dfa->nodes[node].type;
1263
1264	#ifdef RE_ENABLE_I18N
1265	if (dfa->nodes[node].accept_mb)
1266	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1267	else
1268	#endif /* RE_ENABLE_I18N */
1269	if (type == OP_BACK_REF)
1270	{
1271	Idx subexp_idx = dfa->nodes[node].opr.idx + `1`;
1272	if (subexp_idx < nregs)
1273	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1274	if (fs != NULL)
1275	{
1276	if (subexp_idx >= nregs
1277	\|\| regs[subexp_idx].rm_so == -`1`
1278	\|\| regs[subexp_idx].rm_eo == -`1`)
1279	return -`1`;
1280	else if (naccepted)
1281	{
1282	char buf = (char* *) re_string_get_buffer (&mctx->input);
1283	if (mctx->input.valid_len - *pidx < naccepted
1284	\|\| (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1285	naccepted)
1286	!= `0`))
1287	return -`1`;
1288	}
1289	}
1290
1291	if (naccepted == `0`)
1292	{
1293	Idx dest_node;
1294	bool ok = re_node_set_insert (eps_via_nodes, node);
1295	if (__glibc_unlikely (! ok))
1296	return -`2`;
1297	dest_node = dfa->edests[node].elems[`0`];
1298	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1299	dest_node))
1300	return dest_node;
1301	}
1302	}
1303
1304	if (naccepted != `0`
1305	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1306	{
1307	Idx dest_node = dfa->nexts[node];
1308	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1309	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1310	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1311	dest_node)))
1312	return -`1`;
1313	re_node_set_empty (eps_via_nodes);
1314	return dest_node;
1315	}
1316	}
1317	return -`1`;
1318	}
1319
1320	static reg_errcode_t
1321	__attribute_warn_unused_result__
1322	push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1323	Idx nregs, regmatch_t regs, regmatch_t prevregs,
1324	re_node_set *eps_via_nodes)
1325	{
1326	reg_errcode_t err;
1327	Idx num = fs->num++;
1328	if (fs->num == fs->alloc)
1329	{
1330	struct re_fail_stack_ent_t *new_array;
1331	new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1332	fs->alloc * `2`);
1333	if (new_array == NULL)
1334	return REG_ESPACE;
1335	fs->alloc *= `2`;
1336	fs->stack = new_array;
1337	}
1338	fs->stack[num].idx = str_idx;
1339	fs->stack[num].node = dest_node;
1340	fs->stack[num].regs = re_malloc (regmatch_t, `2` * nregs);
1341	if (fs->stack[num].regs == NULL)
1342	return REG_ESPACE;
1343	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1344	memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1345	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1346	return err;
1347	}
1348
1349	static Idx
1350	pop_fail_stack (struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1351	regmatch_t regs, regmatch_t prevregs,
1352	re_node_set *eps_via_nodes)
1353	{
1354	if (fs == NULL \|\| fs->num == `0`)
1355	return -`1`;
1356	Idx num = --fs->num;
1357	*pidx = fs->stack[num].idx;
1358	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1359	memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1360	re_node_set_free (eps_via_nodes);
1361	re_free (fs->stack[num].regs);
1362	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1363	DEBUG_ASSERT (`0` <= fs->stack[num].node);
1364	return fs->stack[num].node;
1365	}
1366
1367
1368	#define DYNARRAY_STRUCT regmatch_list
1369	#define DYNARRAY_ELEMENT regmatch_t
1370	#define DYNARRAY_PREFIX regmatch_list_
1371	#include <malloc/dynarray-skeleton.c>
1372
1373	/ Set the positions where the subexpressions are starts/ends to registers*
1374	PMATCH.
1375	Note: We assume that pmatch[0] is already set, and
1376	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1377
1378	static reg_errcode_t
1379	__attribute_warn_unused_result__
1380	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1381	regmatch_t *pmatch, bool fl_backtrack)
1382	{
1383	const re_dfa_t *dfa = preg->buffer;
1384	Idx idx, cur_node;
1385	re_node_set eps_via_nodes;
1386	struct re_fail_stack_t *fs;
1387	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1388	struct regmatch_list prev_match;
1389	regmatch_list_init (&prev_match);
1390
1391	DEBUG_ASSERT (nmatch > `1`);
1392	DEBUG_ASSERT (mctx->state_log != NULL);
1393	if (fl_backtrack)
1394	{
1395	fs = &fs_body;
1396	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1397	if (fs->stack == NULL)
1398	return REG_ESPACE;
1399	}
1400	else
1401	fs = NULL;
1402
1403	cur_node = dfa->init_node;
1404	re_node_set_init_empty (&eps_via_nodes);
1405
1406	if (!regmatch_list_resize (&prev_match, nmatch))
1407	{
1408	regmatch_list_free (&prev_match);
1409	free_fail_stack_return (fs);
1410	return REG_ESPACE;
1411	}
1412	regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
1413	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1414
1415	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1416	{
1417	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1418
1419	if ((idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1420	\|\| (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1421	{
1422	Idx reg_idx;
1423	cur_node = -`1`;
1424	if (fs)
1425	{
1426	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1427	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1428	{
1429	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1430	prev_idx_match, &eps_via_nodes);
1431	break;
1432	}
1433	}
1434	if (cur_node < `0`)
1435	{
1436	re_node_set_free (&eps_via_nodes);
1437	regmatch_list_free (&prev_match);
1438	return free_fail_stack_return (fs);
1439	}
1440	}
1441
1442	/ Proceed to next node. /
1443	cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1444	&idx, cur_node,
1445	&eps_via_nodes, fs);
1446
1447	if (__glibc_unlikely (cur_node < `0`))
1448	{
1449	if (__glibc_unlikely (cur_node == -`2`))
1450	{
1451	re_node_set_free (&eps_via_nodes);
1452	regmatch_list_free (&prev_match);
1453	free_fail_stack_return (fs);
1454	return REG_ESPACE;
1455	}
1456	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1457	prev_idx_match, &eps_via_nodes);
1458	if (cur_node < `0`)
1459	{
1460	re_node_set_free (&eps_via_nodes);
1461	regmatch_list_free (&prev_match);
1462	free_fail_stack_return (fs);
1463	return REG_NOMATCH;
1464	}
1465	}
1466	}
1467	re_node_set_free (&eps_via_nodes);
1468	regmatch_list_free (&prev_match);
1469	return free_fail_stack_return (fs);
1470	}
1471
1472	static reg_errcode_t
1473	free_fail_stack_return (struct re_fail_stack_t *fs)
1474	{
1475	if (fs)
1476	{
1477	Idx fs_idx;
1478	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1479	{
1480	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1481	re_free (fs->stack[fs_idx].regs);
1482	}
1483	re_free (fs->stack);
1484	}
1485	return REG_NOERROR;
1486	}
1487
1488	static void
1489	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1490	regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1491	{
1492	int type = dfa->nodes[cur_node].type;
1493	if (type == OP_OPEN_SUBEXP)
1494	{
1495	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1496
1497	/ We are at the first node of this sub expression. /
1498	if (reg_num < nmatch)
1499	{
1500	pmatch[reg_num].rm_so = cur_idx;
1501	pmatch[reg_num].rm_eo = -`1`;
1502	}
1503	}
1504	else if (type == OP_CLOSE_SUBEXP)
1505	{
1506	/ We are at the last node of this sub expression. /
1507	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1508	if (reg_num < nmatch)
1509	{
1510	if (pmatch[reg_num].rm_so < cur_idx)
1511	{
1512	pmatch[reg_num].rm_eo = cur_idx;
1513	/ This is a non-empty match or we are not inside an optional*
1514	subexpression. Accept this right away. /*
1515	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1516	}
1517	else
1518	{
1519	if (dfa->nodes[cur_node].opt_subexp
1520	&& prev_idx_match[reg_num].rm_so != -`1`)
1521	/ We transited through an empty match for an optional*
1522	subexpression, like (a?), and this is not the subexp's*
1523	first match. Copy back the old content of the registers
1524	so that matches of an inner subexpression are undone as
1525	well, like in ((a?)). /
1526	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1527	else
1528	/ We completed a subexpression, but it may be part of*
1529	an optional one, so do not update PREV_IDX_MATCH. /*
1530	pmatch[reg_num].rm_eo = cur_idx;
1531	}
1532	}
1533	}
1534	}
1535
1536	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1537	and sift the nodes in each states according to the following rules.
1538	Updated state_log will be wrote to STATE_LOG.
1539
1540	Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1541	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1542	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1543	the LAST_NODE, we throw away the node 'a'.
1544	2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1545	string 's' and transit to 'b':
1546	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1547	away the node 'a'.
1548	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1549	thrown away, we throw away the node 'a'.
1550	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1551	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1552	node 'a'.
1553	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1554	we throw away the node 'a'. /*
1555
1556	#define STATE_NODE_CONTAINS(state,node) \
1557	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1558
1559	static reg_errcode_t
1560	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1561	{
1562	reg_errcode_t err;
1563	int null_cnt = `0`;
1564	Idx str_idx = sctx->last_str_idx;
1565	re_node_set cur_dest;
1566
1567	DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1568
1569	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1570	transit to the last_node and the last_node itself. /*
1571	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1572	if (__glibc_unlikely (err != REG_NOERROR))
1573	return err;
1574	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1575	if (__glibc_unlikely (err != REG_NOERROR))
1576	goto free_return;
1577
1578	/ Then check each states in the state_log. /
1579	while (str_idx > `0`)
1580	{
1581	/ Update counters. /
1582	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1583	if (null_cnt > mctx->max_mb_elem_len)
1584	{
1585	memset (sctx->sifted_states, `'\0'`,
1586	sizeof (re_dfastate_t ) str_idx);
1587	re_node_set_free (&cur_dest);
1588	return REG_NOERROR;
1589	}
1590	re_node_set_empty (&cur_dest);
1591	--str_idx;
1592
1593	if (mctx->state_log[str_idx])
1594	{
1595	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1596	if (__glibc_unlikely (err != REG_NOERROR))
1597	goto free_return;
1598	}
1599
1600	/ Add all the nodes which satisfy the following conditions:*
1601	- It can epsilon transit to a node in CUR_DEST.
1602	- It is in CUR_SRC.
1603	And update state_log. /*
1604	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1605	if (__glibc_unlikely (err != REG_NOERROR))
1606	goto free_return;
1607	}
1608	err = REG_NOERROR;
1609	free_return:
1610	re_node_set_free (&cur_dest);
1611	return err;
1612	}
1613
1614	static reg_errcode_t
1615	__attribute_warn_unused_result__
1616	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1617	Idx str_idx, re_node_set *cur_dest)
1618	{
1619	const re_dfa_t *const dfa = mctx->dfa;
1620	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1621	Idx i;
1622
1623	/ Then build the next sifted state.*
1624	We build the next sifted state on 'cur_dest', and update
1625	'sifted_states[str_idx]' with 'cur_dest'.
1626	Note:
1627	'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1628	'cur_src' points the node_set of the old 'state_log[str_idx]'
1629	(with the epsilon nodes pre-filtered out). /*
1630	for (i = `0`; i < cur_src->nelem; i++)
1631	{
1632	Idx prev_node = cur_src->elems[i];
1633	int naccepted = `0`;
1634	bool ok;
1635	DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1636
1637	#ifdef RE_ENABLE_I18N
1638	/ If the node may accept "multi byte". /
1639	if (dfa->nodes[prev_node].accept_mb)
1640	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1641	str_idx, sctx->last_str_idx);
1642	#endif /* RE_ENABLE_I18N */
1643
1644	/ We don't check backreferences here.*
1645	See update_cur_sifted_state(). /*
1646	if (!naccepted
1647	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1648	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1649	dfa->nexts[prev_node]))
1650	naccepted = `1`;
1651
1652	if (naccepted == `0`)
1653	continue;
1654
1655	if (sctx->limits.nelem)
1656	{
1657	Idx to_idx = str_idx + naccepted;
1658	if (check_dst_limits (mctx, &sctx->limits,
1659	dfa->nexts[prev_node], to_idx,
1660	prev_node, str_idx))
1661	continue;
1662	}
1663	ok = re_node_set_insert (cur_dest, prev_node);
1664	if (__glibc_unlikely (! ok))
1665	return REG_ESPACE;
1666	}
1667
1668	return REG_NOERROR;
1669	}
1670
1671	/ Helper functions. /
1672
1673	static reg_errcode_t
1674	clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1675	{
1676	Idx top = mctx->state_log_top;
1677
1678	if ((next_state_log_idx >= mctx->input.bufs_len
1679	&& mctx->input.bufs_len < mctx->input.len)
1680	\|\| (next_state_log_idx >= mctx->input.valid_len
1681	&& mctx->input.valid_len < mctx->input.len))
1682	{
1683	reg_errcode_t err;
1684	err = extend_buffers (mctx, next_state_log_idx + `1`);
1685	if (__glibc_unlikely (err != REG_NOERROR))
1686	return err;
1687	}
1688
1689	if (top < next_state_log_idx)
1690	{
1691	memset (mctx->state_log + top + `1`, `'\0'`,
1692	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1693	mctx->state_log_top = next_state_log_idx;
1694	}
1695	return REG_NOERROR;
1696	}
1697
1698	static reg_errcode_t
1699	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1700	re_dfastate_t **src, Idx num)
1701	{
1702	Idx st_idx;
1703	reg_errcode_t err;
1704	for (st_idx = `0`; st_idx < num; ++st_idx)
1705	{
1706	if (dst[st_idx] == NULL)
1707	dst[st_idx] = src[st_idx];
1708	else if (src[st_idx] != NULL)
1709	{
1710	re_node_set merged_set;
1711	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1712	&src[st_idx]->nodes);
1713	if (__glibc_unlikely (err != REG_NOERROR))
1714	return err;
1715	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1716	re_node_set_free (&merged_set);
1717	if (__glibc_unlikely (err != REG_NOERROR))
1718	return err;
1719	}
1720	}
1721	return REG_NOERROR;
1722	}
1723
1724	static reg_errcode_t
1725	update_cur_sifted_state (const re_match_context_t *mctx,
1726	re_sift_context_t *sctx, Idx str_idx,
1727	re_node_set *dest_nodes)
1728	{
1729	const re_dfa_t *const dfa = mctx->dfa;
1730	reg_errcode_t err = REG_NOERROR;
1731	const re_node_set *candidates;
1732	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1733	: &mctx->state_log[str_idx]->nodes);
1734
1735	if (dest_nodes->nelem == `0`)
1736	sctx->sifted_states[str_idx] = NULL;
1737	else
1738	{
1739	if (candidates)
1740	{
1741	/ At first, add the nodes which can epsilon transit to a node in*
1742	DEST_NODE. /*
1743	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1744	if (__glibc_unlikely (err != REG_NOERROR))
1745	return err;
1746
1747	/ Then, check the limitations in the current sift_context. /
1748	if (sctx->limits.nelem)
1749	{
1750	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1751	mctx->bkref_ents, str_idx);
1752	if (__glibc_unlikely (err != REG_NOERROR))
1753	return err;
1754	}
1755	}
1756
1757	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1758	if (__glibc_unlikely (err != REG_NOERROR))
1759	return err;
1760	}
1761
1762	if (candidates && mctx->state_log[str_idx]->has_backref)
1763	{
1764	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1765	if (__glibc_unlikely (err != REG_NOERROR))
1766	return err;
1767	}
1768	return REG_NOERROR;
1769	}
1770
1771	static reg_errcode_t
1772	__attribute_warn_unused_result__
1773	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1774	const re_node_set *candidates)
1775	{
1776	reg_errcode_t err = REG_NOERROR;
1777	Idx i;
1778
1779	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1780	if (__glibc_unlikely (err != REG_NOERROR))
1781	return err;
1782
1783	if (!state->inveclosure.alloc)
1784	{
1785	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1786	if (__glibc_unlikely (err != REG_NOERROR))
1787	return REG_ESPACE;
1788	for (i = `0`; i < dest_nodes->nelem; i++)
1789	{
1790	err = re_node_set_merge (&state->inveclosure,
1791	dfa->inveclosures + dest_nodes->elems[i]);
1792	if (__glibc_unlikely (err != REG_NOERROR))
1793	return REG_ESPACE;
1794	}
1795	}
1796	return re_node_set_add_intersect (dest_nodes, candidates,
1797	&state->inveclosure);
1798	}
1799
1800	static reg_errcode_t
1801	sub_epsilon_src_nodes (const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1802	const re_node_set *candidates)
1803	{
1804	Idx ecl_idx;
1805	reg_errcode_t err;
1806	re_node_set *inv_eclosure = dfa->inveclosures + node;
1807	re_node_set except_nodes;
1808	re_node_set_init_empty (&except_nodes);
1809	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1810	{
1811	Idx cur_node = inv_eclosure->elems[ecl_idx];
1812	if (cur_node == node)
1813	continue;
1814	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1815	{
1816	Idx edst1 = dfa->edests[cur_node].elems[`0`];
1817	Idx edst2 = ((dfa->edests[cur_node].nelem > `1`)
1818	? dfa->edests[cur_node].elems[`1`] : -`1`);
1819	if ((!re_node_set_contains (inv_eclosure, edst1)
1820	&& re_node_set_contains (dest_nodes, edst1))
1821	\|\| (edst2 > `0`
1822	&& !re_node_set_contains (inv_eclosure, edst2)
1823	&& re_node_set_contains (dest_nodes, edst2)))
1824	{
1825	err = re_node_set_add_intersect (&except_nodes, candidates,
1826	dfa->inveclosures + cur_node);
1827	if (__glibc_unlikely (err != REG_NOERROR))
1828	{
1829	re_node_set_free (&except_nodes);
1830	return err;
1831	}
1832	}
1833	}
1834	}
1835	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1836	{
1837	Idx cur_node = inv_eclosure->elems[ecl_idx];
1838	if (!re_node_set_contains (&except_nodes, cur_node))
1839	{
1840	Idx idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1841	re_node_set_remove_at (dest_nodes, idx);
1842	}
1843	}
1844	re_node_set_free (&except_nodes);
1845	return REG_NOERROR;
1846	}
1847
1848	static bool
1849	check_dst_limits (const re_match_context_t mctx, const* re_node_set *limits,
1850	Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1851	{
1852	const re_dfa_t *const dfa = mctx->dfa;
1853	Idx lim_idx, src_pos, dst_pos;
1854
1855	Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1856	Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1857	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1858	{
1859	Idx subexp_idx;
1860	struct re_backref_cache_entry *ent;
1861	ent = mctx->bkref_ents + limits->elems[lim_idx];
1862	subexp_idx = dfa->nodes[ent->node].opr.idx;
1863
1864	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1865	subexp_idx, dst_node, dst_idx,
1866	dst_bkref_idx);
1867	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1868	subexp_idx, src_node, src_idx,
1869	src_bkref_idx);
1870
1871	/ In case of:*
1872	<src> <dst> ( <subexp> )
1873	( <subexp> ) <src> <dst>
1874	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1875	if (src_pos == dst_pos)
1876	continue; / This is unrelated limitation. /
1877	else
1878	return true;
1879	}
1880	return false;
1881	}
1882
1883	static int
1884	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1885	Idx subexp_idx, Idx from_node, Idx bkref_idx)
1886	{
1887	const re_dfa_t *const dfa = mctx->dfa;
1888	const re_node_set *eclosures = dfa->eclosures + from_node;
1889	Idx node_idx;
1890
1891	/ Else, we are on the boundary: examine the nodes on the epsilon*
1892	closure. /*
1893	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1894	{
1895	Idx node = eclosures->elems[node_idx];
1896	switch (dfa->nodes[node].type)
1897	{
1898	case OP_BACK_REF:
1899	if (bkref_idx != -`1`)
1900	{
1901	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1902	do
1903	{
1904	Idx dst;
1905	int cpos;
1906
1907	if (ent->node != node)
1908	continue;
1909
1910	if (subexp_idx < BITSET_WORD_BITS
1911	&& !(ent->eps_reachable_subexps_map
1912	& ((bitset_word_t) `1` << subexp_idx)))
1913	continue;
1914
1915	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1916	OP_CLOSE_SUBEXP cases below. But, if the
1917	destination node is the same node as the source
1918	node, don't recurse because it would cause an
1919	infinite loop: a regex that exhibits this behavior
1920	is ()\1\1* /
1921	dst = dfa->edests[node].elems[`0`];
1922	if (dst == from_node)
1923	{
1924	if (boundaries & `1`)
1925	return -`1`;
1926	else / if (boundaries & 2) /
1927	return `0`;
1928	}
1929
1930	cpos =
1931	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1932	dst, bkref_idx);
1933	if (cpos == -`1` / && (boundaries & 1) /)
1934	return -`1`;
1935	if (cpos == `0` && (boundaries & `2`))
1936	return `0`;
1937
1938	if (subexp_idx < BITSET_WORD_BITS)
1939	ent->eps_reachable_subexps_map
1940	&= ~((bitset_word_t) `1` << subexp_idx);
1941	}
1942	while (ent++->more);
1943	}
1944	break;
1945
1946	case OP_OPEN_SUBEXP:
1947	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1948	return -`1`;
1949	break;
1950
1951	case OP_CLOSE_SUBEXP:
1952	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1953	return `0`;
1954	break;
1955
1956	default:
1957	break;
1958	}
1959	}
1960
1961	return (boundaries & `2`) ? `1` : `0`;
1962	}
1963
1964	static int
1965	check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1966	Idx subexp_idx, Idx from_node, Idx str_idx,
1967	Idx bkref_idx)
1968	{
1969	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1970	int boundaries;
1971
1972	/ If we are outside the range of the subexpression, return -1 or 1. /
1973	if (str_idx < lim->subexp_from)
1974	return -`1`;
1975
1976	if (lim->subexp_to < str_idx)
1977	return `1`;
1978
1979	/ If we are within the subexpression, return 0. /
1980	boundaries = (str_idx == lim->subexp_from);
1981	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1982	if (boundaries == `0`)
1983	return `0`;
1984
1985	/ Else, examine epsilon closure. /
1986	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1987	from_node, bkref_idx);
1988	}
1989
1990	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
1991	which are against limitations from DEST_NODES. /*
1992
1993	static reg_errcode_t
1994	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
1995	const re_node_set candidates, re_node_set limits,
1996	struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1997	{
1998	reg_errcode_t err;
1999	Idx node_idx, lim_idx;
2000
2001	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
2002	{
2003	Idx subexp_idx;
2004	struct re_backref_cache_entry *ent;
2005	ent = bkref_ents + limits->elems[lim_idx];
2006
2007	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2008	continue; / This is unrelated limitation. /
2009
2010	subexp_idx = dfa->nodes[ent->node].opr.idx;
2011	if (ent->subexp_to == str_idx)
2012	{
2013	Idx ops_node = -`1`;
2014	Idx cls_node = -`1`;
2015	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2016	{
2017	Idx node = dest_nodes->elems[node_idx];
2018	re_token_type_t type = dfa->nodes[node].type;
2019	if (type == OP_OPEN_SUBEXP
2020	&& subexp_idx == dfa->nodes[node].opr.idx)
2021	ops_node = node;
2022	else if (type == OP_CLOSE_SUBEXP
2023	&& subexp_idx == dfa->nodes[node].opr.idx)
2024	cls_node = node;
2025	}
2026
2027	/ Check the limitation of the open subexpression. /
2028	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2029	if (ops_node >= `0`)
2030	{
2031	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2032	candidates);
2033	if (__glibc_unlikely (err != REG_NOERROR))
2034	return err;
2035	}
2036
2037	/ Check the limitation of the close subexpression. /
2038	if (cls_node >= `0`)
2039	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2040	{
2041	Idx node = dest_nodes->elems[node_idx];
2042	if (!re_node_set_contains (dfa->inveclosures + node,
2043	cls_node)
2044	&& !re_node_set_contains (dfa->eclosures + node,
2045	cls_node))
2046	{
2047	/ It is against this limitation.*
2048	Remove it form the current sifted state. /*
2049	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2050	candidates);
2051	if (__glibc_unlikely (err != REG_NOERROR))
2052	return err;
2053	--node_idx;
2054	}
2055	}
2056	}
2057	else / (ent->subexp_to != str_idx) /
2058	{
2059	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2060	{
2061	Idx node = dest_nodes->elems[node_idx];
2062	re_token_type_t type = dfa->nodes[node].type;
2063	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2064	{
2065	if (subexp_idx != dfa->nodes[node].opr.idx)
2066	continue;
2067	/ It is against this limitation.*
2068	Remove it form the current sifted state. /*
2069	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2070	candidates);
2071	if (__glibc_unlikely (err != REG_NOERROR))
2072	return err;
2073	}
2074	}
2075	}
2076	}
2077	return REG_NOERROR;
2078	}
2079
2080	static reg_errcode_t
2081	__attribute_warn_unused_result__
2082	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2083	Idx str_idx, const re_node_set *candidates)
2084	{
2085	const re_dfa_t *const dfa = mctx->dfa;
2086	reg_errcode_t err;
2087	Idx node_idx, node;
2088	re_sift_context_t local_sctx;
2089	Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2090
2091	if (first_idx == -`1`)
2092	return REG_NOERROR;
2093
2094	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2095
2096	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2097	{
2098	Idx enabled_idx;
2099	re_token_type_t type;
2100	struct re_backref_cache_entry *entry;
2101	node = candidates->elems[node_idx];
2102	type = dfa->nodes[node].type;
2103	/ Avoid infinite loop for the REs like "()\1+". /
2104	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2105	continue;
2106	if (type != OP_BACK_REF)
2107	continue;
2108
2109	entry = mctx->bkref_ents + first_idx;
2110	enabled_idx = first_idx;
2111	do
2112	{
2113	Idx subexp_len;
2114	Idx to_idx;
2115	Idx dst_node;
2116	bool ok;
2117	re_dfastate_t *cur_state;
2118
2119	if (entry->node != node)
2120	continue;
2121	subexp_len = entry->subexp_to - entry->subexp_from;
2122	to_idx = str_idx + subexp_len;
2123	dst_node = (subexp_len ? dfa->nexts[node]
2124	: dfa->edests[node].elems[`0`]);
2125
2126	if (to_idx > sctx->last_str_idx
2127	\|\| sctx->sifted_states[to_idx] == NULL
2128	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2129	\|\| check_dst_limits (mctx, &sctx->limits, node,
2130	str_idx, dst_node, to_idx))
2131	continue;
2132
2133	if (local_sctx.sifted_states == NULL)
2134	{
2135	local_sctx = *sctx;
2136	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2137	if (__glibc_unlikely (err != REG_NOERROR))
2138	goto free_return;
2139	}
2140	local_sctx.last_node = node;
2141	local_sctx.last_str_idx = str_idx;
2142	ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2143	if (__glibc_unlikely (! ok))
2144	{
2145	err = REG_ESPACE;
2146	goto free_return;
2147	}
2148	cur_state = local_sctx.sifted_states[str_idx];
2149	err = sift_states_backward (mctx, &local_sctx);
2150	if (__glibc_unlikely (err != REG_NOERROR))
2151	goto free_return;
2152	if (sctx->limited_states != NULL)
2153	{
2154	err = merge_state_array (dfa, sctx->limited_states,
2155	local_sctx.sifted_states,
2156	str_idx + `1`);
2157	if (__glibc_unlikely (err != REG_NOERROR))
2158	goto free_return;
2159	}
2160	local_sctx.sifted_states[str_idx] = cur_state;
2161	re_node_set_remove (&local_sctx.limits, enabled_idx);
2162
2163	/ mctx->bkref_ents may have changed, reload the pointer. /
2164	entry = mctx->bkref_ents + enabled_idx;
2165	}
2166	while (enabled_idx++, entry++->more);
2167	}
2168	err = REG_NOERROR;
2169	free_return:
2170	if (local_sctx.sifted_states != NULL)
2171	{
2172	re_node_set_free (&local_sctx.limits);
2173	}
2174
2175	return err;
2176	}
2177
2178
2179	#ifdef RE_ENABLE_I18N
2180	static int
2181	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2182	Idx node_idx, Idx str_idx, Idx max_str_idx)
2183	{
2184	const re_dfa_t *const dfa = mctx->dfa;
2185	int naccepted;
2186	/ Check the node can accept "multi byte". /
2187	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2188	if (naccepted > `0` && str_idx + naccepted <= max_str_idx
2189	&& !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2190	dfa->nexts[node_idx]))
2191	/ The node can't accept the "multi byte", or the*
2192	destination was already thrown away, then the node
2193	couldn't accept the current input "multi byte". /*
2194	naccepted = `0`;
2195	/ Otherwise, it is sure that the node could accept*
2196	'naccepted' bytes input. /*
2197	return naccepted;
2198	}
2199	#endif /* RE_ENABLE_I18N */
2200
2201
2202	/ Functions for state transition. /
2203
2204	/ Return the next state to which the current state STATE will transit by*
2205	accepting the current input byte, and update STATE_LOG if necessary.
2206	Return NULL on failure.
2207	If STATE can accept a multibyte char/collating element/back reference
2208	update the destination of STATE_LOG. /*
2209
2210	static re_dfastate_t *
2211	__attribute_warn_unused_result__
2212	transit_state (reg_errcode_t err, re_match_context_t mctx,
2213	re_dfastate_t *state)
2214	{
2215	re_dfastate_t **trtable;
2216	unsigned char ch;
2217
2218	#ifdef RE_ENABLE_I18N
2219	/ If the current state can accept multibyte. /
2220	if (__glibc_unlikely (state->accept_mb))
2221	{
2222	*err = transit_state_mb (mctx, state);
2223	if (__glibc_unlikely (*err != REG_NOERROR))
2224	return NULL;
2225	}
2226	#endif /* RE_ENABLE_I18N */
2227
2228	/ Then decide the next state with the single byte. /
2229	#if 0
2230	if (`0`)
2231	/ don't use transition table /
2232	return transit_state_sb (err, mctx, state);
2233	#endif
2234
2235	/ Use transition table /
2236	ch = re_string_fetch_byte (&mctx->input);
2237	for (;;)
2238	{
2239	trtable = state->trtable;
2240	if (__glibc_likely (trtable != NULL))
2241	return trtable[ch];
2242
2243	trtable = state->word_trtable;
2244	if (__glibc_likely (trtable != NULL))
2245	{
2246	unsigned int context;
2247	context
2248	= re_string_context_at (&mctx->input,
2249	re_string_cur_idx (&mctx->input) - `1`,
2250	mctx->eflags);
2251	if (IS_WORD_CONTEXT (context))
2252	return trtable[ch + SBC_MAX];
2253	else
2254	return trtable[ch];
2255	}
2256
2257	if (!build_trtable (mctx->dfa, state))
2258	{
2259	*err = REG_ESPACE;
2260	return NULL;
2261	}
2262
2263	/ Retry, we now have a transition table. /
2264	}
2265	}
2266
2267	/ Update the state_log if we need /
2268	static re_dfastate_t *
2269	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2270	re_dfastate_t *next_state)
2271	{
2272	const re_dfa_t *const dfa = mctx->dfa;
2273	Idx cur_idx = re_string_cur_idx (&mctx->input);
2274
2275	if (cur_idx > mctx->state_log_top)
2276	{
2277	mctx->state_log[cur_idx] = next_state;
2278	mctx->state_log_top = cur_idx;
2279	}
2280	else if (mctx->state_log[cur_idx] == `0`)
2281	{
2282	mctx->state_log[cur_idx] = next_state;
2283	}
2284	else
2285	{
2286	re_dfastate_t *pstate;
2287	unsigned int context;
2288	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2289	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2290	the destination of a multibyte char/collating element/
2291	back reference. Then the next state is the union set of
2292	these destinations and the results of the transition table. /*
2293	pstate = mctx->state_log[cur_idx];
2294	log_nodes = pstate->entrance_nodes;
2295	if (next_state != NULL)
2296	{
2297	table_nodes = next_state->entrance_nodes;
2298	*err = re_node_set_init_union (&next_nodes, table_nodes,
2299	log_nodes);
2300	if (__glibc_unlikely (*err != REG_NOERROR))
2301	return NULL;
2302	}
2303	else
2304	next_nodes = *log_nodes;
2305	/ Note: We already add the nodes of the initial state,*
2306	then we don't need to add them here. /*
2307
2308	context = re_string_context_at (&mctx->input,
2309	re_string_cur_idx (&mctx->input) - `1`,
2310	mctx->eflags);
2311	next_state = mctx->state_log[cur_idx]
2312	= re_acquire_state_context (err, dfa, &next_nodes, context);
2313	/ We don't need to check errors here, since the return value of*
2314	this function is next_state and ERR is already set. /*
2315
2316	if (table_nodes != NULL)
2317	re_node_set_free (&next_nodes);
2318	}
2319
2320	if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2321	{
2322	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2323	later. We must check them here, since the back references in the
2324	next state might use them. /*
2325	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2326	cur_idx);
2327	if (__glibc_unlikely (*err != REG_NOERROR))
2328	return NULL;
2329
2330	/ If the next state has back references. /
2331	if (next_state->has_backref)
2332	{
2333	*err = transit_state_bkref (mctx, &next_state->nodes);
2334	if (__glibc_unlikely (*err != REG_NOERROR))
2335	return NULL;
2336	next_state = mctx->state_log[cur_idx];
2337	}
2338	}
2339
2340	return next_state;
2341	}
2342
2343	/ Skip bytes in the input that correspond to part of a*
2344	multi-byte match, then look in the log for a state
2345	from which to restart matching. /*
2346	static re_dfastate_t *
2347	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2348	{
2349	re_dfastate_t *cur_state;
2350	do
2351	{
2352	Idx max = mctx->state_log_top;
2353	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2354
2355	do
2356	{
2357	if (++cur_str_idx > max)
2358	return NULL;
2359	re_string_skip_bytes (&mctx->input, `1`);
2360	}
2361	while (mctx->state_log[cur_str_idx] == NULL);
2362
2363	cur_state = merge_state_with_log (err, mctx, NULL);
2364	}
2365	while (*err == REG_NOERROR && cur_state == NULL);
2366	return cur_state;
2367	}
2368
2369	/ Helper functions for transit_state. /
2370
2371	/ From the node set CUR_NODES, pick up the nodes whose types are*
2372	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2373	expression. And register them to use them later for evaluating the
2374	corresponding back references. /*
2375
2376	static reg_errcode_t
2377	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2378	Idx str_idx)
2379	{
2380	const re_dfa_t *const dfa = mctx->dfa;
2381	Idx node_idx;
2382	reg_errcode_t err;
2383
2384	/ TODO: This isn't efficient.*
2385	Because there might be more than one nodes whose types are
2386	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2387	nodes.
2388	E.g. RE: (a){2} /*
2389	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2390	{
2391	Idx node = cur_nodes->elems[node_idx];
2392	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2393	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2394	&& (dfa->used_bkref_map
2395	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2396	{
2397	err = match_ctx_add_subtop (mctx, node, str_idx);
2398	if (__glibc_unlikely (err != REG_NOERROR))
2399	return err;
2400	}
2401	}
2402	return REG_NOERROR;
2403	}
2404
2405	#if 0
2406	/ Return the next state to which the current state STATE will transit by*
2407	accepting the current input byte. Return NULL on failure. /*
2408
2409	static re_dfastate_t *
2410	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2411	re_dfastate_t *state)
2412	{
2413	const re_dfa_t *const dfa = mctx->dfa;
2414	re_node_set next_nodes;
2415	re_dfastate_t *next_state;
2416	Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2417	unsigned int context;
2418
2419	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2420	if (__glibc_unlikely (*err != REG_NOERROR))
2421	return NULL;
2422	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2423	{
2424	Idx cur_node = state->nodes.elems[node_cnt];
2425	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2426	{
2427	*err = re_node_set_merge (&next_nodes,
2428	dfa->eclosures + dfa->nexts[cur_node]);
2429	if (__glibc_unlikely (*err != REG_NOERROR))
2430	{
2431	re_node_set_free (&next_nodes);
2432	return NULL;
2433	}
2434	}
2435	}
2436	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2437	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2438	/ We don't need to check errors here, since the return value of*
2439	this function is next_state and ERR is already set. /*
2440
2441	re_node_set_free (&next_nodes);
2442	re_string_skip_bytes (&mctx->input, `1`);
2443	return next_state;
2444	}
2445	#endif
2446
2447	#ifdef RE_ENABLE_I18N
2448	static reg_errcode_t
2449	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2450	{
2451	const re_dfa_t *const dfa = mctx->dfa;
2452	reg_errcode_t err;
2453	Idx i;
2454
2455	for (i = `0`; i < pstate->nodes.nelem; ++i)
2456	{
2457	re_node_set dest_nodes, *new_nodes;
2458	Idx cur_node_idx = pstate->nodes.elems[i];
2459	int naccepted;
2460	Idx dest_idx;
2461	unsigned int context;
2462	re_dfastate_t *dest_state;
2463
2464	if (!dfa->nodes[cur_node_idx].accept_mb)
2465	continue;
2466
2467	if (dfa->nodes[cur_node_idx].constraint)
2468	{
2469	context = re_string_context_at (&mctx->input,
2470	re_string_cur_idx (&mctx->input),
2471	mctx->eflags);
2472	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2473	context))
2474	continue;
2475	}
2476
2477	/ How many bytes the node can accept? /
2478	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2479	re_string_cur_idx (&mctx->input));
2480	if (naccepted == `0`)
2481	continue;
2482
2483	/ The node can accepts 'naccepted' bytes. /
2484	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2485	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2486	: mctx->max_mb_elem_len);
2487	err = clean_state_log_if_needed (mctx, dest_idx);
2488	if (__glibc_unlikely (err != REG_NOERROR))
2489	return err;
2490	DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -`1`);
2491	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2492
2493	dest_state = mctx->state_log[dest_idx];
2494	if (dest_state == NULL)
2495	dest_nodes = *new_nodes;
2496	else
2497	{
2498	err = re_node_set_init_union (&dest_nodes,
2499	dest_state->entrance_nodes, new_nodes);
2500	if (__glibc_unlikely (err != REG_NOERROR))
2501	return err;
2502	}
2503	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2504	mctx->eflags);
2505	mctx->state_log[dest_idx]
2506	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2507	if (dest_state != NULL)
2508	re_node_set_free (&dest_nodes);
2509	if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2510	&& err != REG_NOERROR))
2511	return err;
2512	}
2513	return REG_NOERROR;
2514	}
2515	#endif /* RE_ENABLE_I18N */
2516
2517	static reg_errcode_t
2518	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2519	{
2520	const re_dfa_t *const dfa = mctx->dfa;
2521	reg_errcode_t err;
2522	Idx i;
2523	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2524
2525	for (i = `0`; i < nodes->nelem; ++i)
2526	{
2527	Idx dest_str_idx, prev_nelem, bkc_idx;
2528	Idx node_idx = nodes->elems[i];
2529	unsigned int context;
2530	const re_token_t *node = dfa->nodes + node_idx;
2531	re_node_set *new_dest_nodes;
2532
2533	/ Check whether 'node' is a backreference or not. /
2534	if (node->type != OP_BACK_REF)
2535	continue;
2536
2537	if (node->constraint)
2538	{
2539	context = re_string_context_at (&mctx->input, cur_str_idx,
2540	mctx->eflags);
2541	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2542	continue;
2543	}
2544
2545	/ 'node' is a backreference.*
2546	Check the substring which the substring matched. /*
2547	bkc_idx = mctx->nbkref_ents;
2548	err = get_subexp (mctx, node_idx, cur_str_idx);
2549	if (__glibc_unlikely (err != REG_NOERROR))
2550	goto free_return;
2551
2552	/ And add the epsilon closures (which is 'new_dest_nodes') of*
2553	the backreference to appropriate state_log. /*
2554	DEBUG_ASSERT (dfa->nexts[node_idx] != -`1`);
2555	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2556	{
2557	Idx subexp_len;
2558	re_dfastate_t *dest_state;
2559	struct re_backref_cache_entry *bkref_ent;
2560	bkref_ent = mctx->bkref_ents + bkc_idx;
2561	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2562	continue;
2563	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2564	new_dest_nodes = (subexp_len == `0`
2565	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2566	: dfa->eclosures + dfa->nexts[node_idx]);
2567	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2568	- bkref_ent->subexp_from);
2569	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2570	mctx->eflags);
2571	dest_state = mctx->state_log[dest_str_idx];
2572	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2573	: mctx->state_log[cur_str_idx]->nodes.nelem);
2574	/ Add 'new_dest_node' to state_log. /
2575	if (dest_state == NULL)
2576	{
2577	mctx->state_log[dest_str_idx]
2578	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2579	context);
2580	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2581	&& err != REG_NOERROR))
2582	goto free_return;
2583	}
2584	else
2585	{
2586	re_node_set dest_nodes;
2587	err = re_node_set_init_union (&dest_nodes,
2588	dest_state->entrance_nodes,
2589	new_dest_nodes);
2590	if (__glibc_unlikely (err != REG_NOERROR))
2591	{
2592	re_node_set_free (&dest_nodes);
2593	goto free_return;
2594	}
2595	mctx->state_log[dest_str_idx]
2596	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2597	re_node_set_free (&dest_nodes);
2598	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2599	&& err != REG_NOERROR))
2600	goto free_return;
2601	}
2602	/ We need to check recursively if the backreference can epsilon*
2603	transit. /*
2604	if (subexp_len == `0`
2605	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2606	{
2607	err = check_subexp_matching_top (mctx, new_dest_nodes,
2608	cur_str_idx);
2609	if (__glibc_unlikely (err != REG_NOERROR))
2610	goto free_return;
2611	err = transit_state_bkref (mctx, new_dest_nodes);
2612	if (__glibc_unlikely (err != REG_NOERROR))
2613	goto free_return;
2614	}
2615	}
2616	}
2617	err = REG_NOERROR;
2618	free_return:
2619	return err;
2620	}
2621
2622	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2623	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2624	Note that we might collect inappropriate candidates here.
2625	However, the cost of checking them strictly here is too high, then we
2626	delay these checking for prune_impossible_nodes(). /*
2627
2628	static reg_errcode_t
2629	__attribute_warn_unused_result__
2630	get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2631	{
2632	const re_dfa_t *const dfa = mctx->dfa;
2633	Idx subexp_num, sub_top_idx;
2634	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2635	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2636	Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2637	if (cache_idx != -`1`)
2638	{
2639	const struct re_backref_cache_entry *entry
2640	= mctx->bkref_ents + cache_idx;
2641	do
2642	if (entry->node == bkref_node)
2643	return REG_NOERROR; / We already checked it. /
2644	while (entry++->more);
2645	}
2646
2647	subexp_num = dfa->nodes[bkref_node].opr.idx;
2648
2649	/ For each sub expression /
2650	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2651	{
2652	reg_errcode_t err;
2653	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2654	re_sub_match_last_t *sub_last;
2655	Idx sub_last_idx, sl_str, bkref_str_off;
2656
2657	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2658	continue; / It isn't related. /
2659
2660	sl_str = sub_top->str_idx;
2661	bkref_str_off = bkref_str_idx;
2662	/ At first, check the last node of sub expressions we already*
2663	evaluated. /*
2664	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2665	{
2666	regoff_t sl_str_diff;
2667	sub_last = sub_top->lasts[sub_last_idx];
2668	sl_str_diff = sub_last->str_idx - sl_str;
2669	/ The matched string by the sub expression match with the substring*
2670	at the back reference? /*
2671	if (sl_str_diff > `0`)
2672	{
2673	if (__glibc_unlikely (bkref_str_off + sl_str_diff
2674	> mctx->input.valid_len))
2675	{
2676	/ Not enough chars for a successful match. /
2677	if (bkref_str_off + sl_str_diff > mctx->input.len)
2678	break;
2679
2680	err = clean_state_log_if_needed (mctx,
2681	bkref_str_off
2682	+ sl_str_diff);
2683	if (__glibc_unlikely (err != REG_NOERROR))
2684	return err;
2685	buf = (const char *) re_string_get_buffer (&mctx->input);
2686	}
2687	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2688	/ We don't need to search this sub expression any more. /
2689	break;
2690	}
2691	bkref_str_off += sl_str_diff;
2692	sl_str += sl_str_diff;
2693	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2694	bkref_str_idx);
2695
2696	/ Reload buf, since the preceding call might have reallocated*
2697	the buffer. /*
2698	buf = (const char *) re_string_get_buffer (&mctx->input);
2699
2700	if (err == REG_NOMATCH)
2701	continue;
2702	if (__glibc_unlikely (err != REG_NOERROR))
2703	return err;
2704	}
2705
2706	if (sub_last_idx < sub_top->nlasts)
2707	continue;
2708	if (sub_last_idx > `0`)
2709	++sl_str;
2710	/ Then, search for the other last nodes of the sub expression. /
2711	for (; sl_str <= bkref_str_idx; ++sl_str)
2712	{
2713	Idx cls_node;
2714	regoff_t sl_str_off;
2715	const re_node_set *nodes;
2716	sl_str_off = sl_str - sub_top->str_idx;
2717	/ The matched string by the sub expression match with the substring*
2718	at the back reference? /*
2719	if (sl_str_off > `0`)
2720	{
2721	if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2722	{
2723	/ If we are at the end of the input, we cannot match. /
2724	if (bkref_str_off >= mctx->input.len)
2725	break;
2726
2727	err = extend_buffers (mctx, bkref_str_off + `1`);
2728	if (__glibc_unlikely (err != REG_NOERROR))
2729	return err;
2730
2731	buf = (const char *) re_string_get_buffer (&mctx->input);
2732	}
2733	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2734	break; / We don't need to search this sub expression*
2735	any more. /*
2736	}
2737	if (mctx->state_log[sl_str] == NULL)
2738	continue;
2739	/ Does this state have a ')' of the sub expression? /
2740	nodes = &mctx->state_log[sl_str]->nodes;
2741	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2742	OP_CLOSE_SUBEXP);
2743	if (cls_node == -`1`)
2744	continue; / No. /
2745	if (sub_top->path == NULL)
2746	{
2747	sub_top->path = calloc (sizeof (state_array_t),
2748	sl_str - sub_top->str_idx + `1`);
2749	if (sub_top->path == NULL)
2750	return REG_ESPACE;
2751	}
2752	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2753	in the current context? /*
2754	err = check_arrival (mctx, sub_top->path, sub_top->node,
2755	sub_top->str_idx, cls_node, sl_str,
2756	OP_CLOSE_SUBEXP);
2757	if (err == REG_NOMATCH)
2758	continue;
2759	if (__glibc_unlikely (err != REG_NOERROR))
2760	return err;
2761	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2762	if (__glibc_unlikely (sub_last == NULL))
2763	return REG_ESPACE;
2764	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2765	bkref_str_idx);
2766	buf = (const char *) re_string_get_buffer (&mctx->input);
2767	if (err == REG_NOMATCH)
2768	continue;
2769	if (__glibc_unlikely (err != REG_NOERROR))
2770	return err;
2771	}
2772	}
2773	return REG_NOERROR;
2774	}
2775
2776	/ Helper functions for get_subexp(). /
2777
2778	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2779	If it can arrive, register the sub expression expressed with SUB_TOP
2780	and SUB_LAST. /*
2781
2782	static reg_errcode_t
2783	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2784	re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2785	{
2786	reg_errcode_t err;
2787	Idx to_idx;
2788	/ Can the subexpression arrive the back reference? /
2789	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2790	sub_last->str_idx, bkref_node, bkref_str,
2791	OP_OPEN_SUBEXP);
2792	if (err != REG_NOERROR)
2793	return err;
2794	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2795	sub_last->str_idx);
2796	if (__glibc_unlikely (err != REG_NOERROR))
2797	return err;
2798	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2799	return clean_state_log_if_needed (mctx, to_idx);
2800	}
2801
2802	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2803	Search '(' if FL_OPEN, or search ')' otherwise.
2804	TODO: This function isn't efficient...
2805	Because there might be more than one nodes whose types are
2806	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2807	nodes.
2808	E.g. RE: (a){2} /*
2809
2810	static Idx
2811	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2812	Idx subexp_idx, int type)
2813	{
2814	Idx cls_idx;
2815	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2816	{
2817	Idx cls_node = nodes->elems[cls_idx];
2818	const re_token_t *node = dfa->nodes + cls_node;
2819	if (node->type == type
2820	&& node->opr.idx == subexp_idx)
2821	return cls_node;
2822	}
2823	return -`1`;
2824	}
2825
2826	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2827	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2828	heavily reused.
2829	Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
2830	REG_ESPACE if memory is exhausted. /*
2831
2832	static reg_errcode_t
2833	__attribute_warn_unused_result__
2834	check_arrival (re_match_context_t mctx, state_array_t path, Idx top_node,
2835	Idx top_str, Idx last_node, Idx last_str, int type)
2836	{
2837	const re_dfa_t *const dfa = mctx->dfa;
2838	reg_errcode_t err = REG_NOERROR;
2839	Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2840	re_dfastate_t *cur_state = NULL;
2841	re_node_set *cur_nodes, next_nodes;
2842	re_dfastate_t **backup_state_log;
2843	unsigned int context;
2844
2845	subexp_num = dfa->nodes[top_node].opr.idx;
2846	/ Extend the buffer if we need. /
2847	if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + `1`))
2848	{
2849	re_dfastate_t **new_array;
2850	Idx old_alloc = path->alloc;
2851	Idx incr_alloc = last_str + mctx->max_mb_elem_len + `1`;
2852	Idx new_alloc;
2853	if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2854	return REG_ESPACE;
2855	new_alloc = old_alloc + incr_alloc;
2856	if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2857	return REG_ESPACE;
2858	new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2859	if (__glibc_unlikely (new_array == NULL))
2860	return REG_ESPACE;
2861	path->array = new_array;
2862	path->alloc = new_alloc;
2863	memset (new_array + old_alloc, `'\0'`,
2864	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2865	}
2866
2867	str_idx = path->next_idx ? path->next_idx : top_str;
2868
2869	/ Temporary modify MCTX. /
2870	backup_state_log = mctx->state_log;
2871	backup_cur_idx = mctx->input.cur_idx;
2872	mctx->state_log = path->array;
2873	mctx->input.cur_idx = str_idx;
2874
2875	/ Setup initial node set. /
2876	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2877	if (str_idx == top_str)
2878	{
2879	err = re_node_set_init_1 (&next_nodes, top_node);
2880	if (__glibc_unlikely (err != REG_NOERROR))
2881	return err;
2882	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2883	if (__glibc_unlikely (err != REG_NOERROR))
2884	{
2885	re_node_set_free (&next_nodes);
2886	return err;
2887	}
2888	}
2889	else
2890	{
2891	cur_state = mctx->state_log[str_idx];
2892	if (cur_state && cur_state->has_backref)
2893	{
2894	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2895	if (__glibc_unlikely (err != REG_NOERROR))
2896	return err;
2897	}
2898	else
2899	re_node_set_init_empty (&next_nodes);
2900	}
2901	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2902	{
2903	if (next_nodes.nelem)
2904	{
2905	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2906	subexp_num, type);
2907	if (__glibc_unlikely (err != REG_NOERROR))
2908	{
2909	re_node_set_free (&next_nodes);
2910	return err;
2911	}
2912	}
2913	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2914	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2915	{
2916	re_node_set_free (&next_nodes);
2917	return err;
2918	}
2919	mctx->state_log[str_idx] = cur_state;
2920	}
2921
2922	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2923	{
2924	re_node_set_empty (&next_nodes);
2925	if (mctx->state_log[str_idx + `1`])
2926	{
2927	err = re_node_set_merge (&next_nodes,
2928	&mctx->state_log[str_idx + `1`]->nodes);
2929	if (__glibc_unlikely (err != REG_NOERROR))
2930	{
2931	re_node_set_free (&next_nodes);
2932	return err;
2933	}
2934	}
2935	if (cur_state)
2936	{
2937	err = check_arrival_add_next_nodes (mctx, str_idx,
2938	&cur_state->non_eps_nodes,
2939	&next_nodes);
2940	if (__glibc_unlikely (err != REG_NOERROR))
2941	{
2942	re_node_set_free (&next_nodes);
2943	return err;
2944	}
2945	}
2946	++str_idx;
2947	if (next_nodes.nelem)
2948	{
2949	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2950	if (__glibc_unlikely (err != REG_NOERROR))
2951	{
2952	re_node_set_free (&next_nodes);
2953	return err;
2954	}
2955	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2956	subexp_num, type);
2957	if (__glibc_unlikely (err != REG_NOERROR))
2958	{
2959	re_node_set_free (&next_nodes);
2960	return err;
2961	}
2962	}
2963	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2964	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2965	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2966	{
2967	re_node_set_free (&next_nodes);
2968	return err;
2969	}
2970	mctx->state_log[str_idx] = cur_state;
2971	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2972	}
2973	re_node_set_free (&next_nodes);
2974	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2975	: &mctx->state_log[last_str]->nodes);
2976	path->next_idx = str_idx;
2977
2978	/ Fix MCTX. /
2979	mctx->state_log = backup_state_log;
2980	mctx->input.cur_idx = backup_cur_idx;
2981
2982	/ Then check the current node set has the node LAST_NODE. /
2983	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2984	return REG_NOERROR;
2985
2986	return REG_NOMATCH;
2987	}
2988
2989	/ Helper functions for check_arrival. /
2990
2991	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
2992	to NEXT_NODES.
2993	TODO: This function is similar to the functions transit_state(),*
2994	however this function has many additional works.
2995	Can't we unify them? /*
2996
2997	static reg_errcode_t
2998	__attribute_warn_unused_result__
2999	check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3000	re_node_set cur_nodes, re_node_set next_nodes)
3001	{
3002	const re_dfa_t *const dfa = mctx->dfa;
3003	bool ok;
3004	Idx cur_idx;
3005	#ifdef RE_ENABLE_I18N
3006	reg_errcode_t err = REG_NOERROR;
3007	#endif
3008	re_node_set union_set;
3009	re_node_set_init_empty (&union_set);
3010	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
3011	{
3012	int naccepted = `0`;
3013	Idx cur_node = cur_nodes->elems[cur_idx];
3014	DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3015
3016	#ifdef RE_ENABLE_I18N
3017	/ If the node may accept "multi byte". /
3018	if (dfa->nodes[cur_node].accept_mb)
3019	{
3020	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3021	str_idx);
3022	if (naccepted > `1`)
3023	{
3024	re_dfastate_t *dest_state;
3025	Idx next_node = dfa->nexts[cur_node];
3026	Idx next_idx = str_idx + naccepted;
3027	dest_state = mctx->state_log[next_idx];
3028	re_node_set_empty (&union_set);
3029	if (dest_state)
3030	{
3031	err = re_node_set_merge (&union_set, &dest_state->nodes);
3032	if (__glibc_unlikely (err != REG_NOERROR))
3033	{
3034	re_node_set_free (&union_set);
3035	return err;
3036	}
3037	}
3038	ok = re_node_set_insert (&union_set, next_node);
3039	if (__glibc_unlikely (! ok))
3040	{
3041	re_node_set_free (&union_set);
3042	return REG_ESPACE;
3043	}
3044	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3045	&union_set);
3046	if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3047	&& err != REG_NOERROR))
3048	{
3049	re_node_set_free (&union_set);
3050	return err;
3051	}
3052	}
3053	}
3054	#endif /* RE_ENABLE_I18N */
3055	if (naccepted
3056	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3057	{
3058	ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3059	if (__glibc_unlikely (! ok))
3060	{
3061	re_node_set_free (&union_set);
3062	return REG_ESPACE;
3063	}
3064	}
3065	}
3066	re_node_set_free (&union_set);
3067	return REG_NOERROR;
3068	}
3069
3070	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3071	CUR_NODES, however exclude the nodes which are:
3072	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3073	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3074	*/
3075
3076	static reg_errcode_t
3077	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3078	Idx ex_subexp, int type)
3079	{
3080	reg_errcode_t err;
3081	Idx idx, outside_node;
3082	re_node_set new_nodes;
3083	DEBUG_ASSERT (cur_nodes->nelem);
3084	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3085	if (__glibc_unlikely (err != REG_NOERROR))
3086	return err;
3087	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3088	closures of the node in CUR_NODES. /*
3089
3090	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3091	{
3092	Idx cur_node = cur_nodes->elems[idx];
3093	const re_node_set *eclosure = dfa->eclosures + cur_node;
3094	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3095	if (outside_node == -`1`)
3096	{
3097	/ There are no problematic nodes, just merge them. /
3098	err = re_node_set_merge (&new_nodes, eclosure);
3099	if (__glibc_unlikely (err != REG_NOERROR))
3100	{
3101	re_node_set_free (&new_nodes);
3102	return err;
3103	}
3104	}
3105	else
3106	{
3107	/ There are problematic nodes, re-calculate incrementally. /
3108	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3109	ex_subexp, type);
3110	if (__glibc_unlikely (err != REG_NOERROR))
3111	{
3112	re_node_set_free (&new_nodes);
3113	return err;
3114	}
3115	}
3116	}
3117	re_node_set_free (cur_nodes);
3118	*cur_nodes = new_nodes;
3119	return REG_NOERROR;
3120	}
3121
3122	/ Helper function for check_arrival_expand_ecl.*
3123	Check incrementally the epsilon closure of TARGET, and if it isn't
3124	problematic append it to DST_NODES. /*
3125
3126	static reg_errcode_t
3127	__attribute_warn_unused_result__
3128	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3129	Idx target, Idx ex_subexp, int type)
3130	{
3131	Idx cur_node;
3132	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3133	{
3134	bool ok;
3135
3136	if (dfa->nodes[cur_node].type == type
3137	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3138	{
3139	if (type == OP_CLOSE_SUBEXP)
3140	{
3141	ok = re_node_set_insert (dst_nodes, cur_node);
3142	if (__glibc_unlikely (! ok))
3143	return REG_ESPACE;
3144	}
3145	break;
3146	}
3147	ok = re_node_set_insert (dst_nodes, cur_node);
3148	if (__glibc_unlikely (! ok))
3149	return REG_ESPACE;
3150	if (dfa->edests[cur_node].nelem == `0`)
3151	break;
3152	if (dfa->edests[cur_node].nelem == `2`)
3153	{
3154	reg_errcode_t err;
3155	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3156	dfa->edests[cur_node].elems[`1`],
3157	ex_subexp, type);
3158	if (__glibc_unlikely (err != REG_NOERROR))
3159	return err;
3160	}
3161	cur_node = dfa->edests[cur_node].elems[`0`];
3162	}
3163	return REG_NOERROR;
3164	}
3165
3166
3167	/ For all the back references in the current state, calculate the*
3168	destination of the back references by the appropriate entry
3169	in MCTX->BKREF_ENTS. /*
3170
3171	static reg_errcode_t
3172	__attribute_warn_unused_result__
3173	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3174	Idx cur_str, Idx subexp_num, int type)
3175	{
3176	const re_dfa_t *const dfa = mctx->dfa;
3177	reg_errcode_t err;
3178	Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3179	struct re_backref_cache_entry *ent;
3180
3181	if (cache_idx_start == -`1`)
3182	return REG_NOERROR;
3183
3184	restart:
3185	ent = mctx->bkref_ents + cache_idx_start;
3186	do
3187	{
3188	Idx to_idx, next_node;
3189
3190	/ Is this entry ENT is appropriate? /
3191	if (!re_node_set_contains (cur_nodes, ent->node))
3192	continue; / No. /
3193
3194	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3195	/ Calculate the destination of the back reference, and append it*
3196	to MCTX->STATE_LOG. /*
3197	if (to_idx == cur_str)
3198	{
3199	/ The backreference did epsilon transit, we must re-check all the*
3200	node in the current state. /*
3201	re_node_set new_dests;
3202	reg_errcode_t err2, err3;
3203	next_node = dfa->edests[ent->node].elems[`0`];
3204	if (re_node_set_contains (cur_nodes, next_node))
3205	continue;
3206	err = re_node_set_init_1 (&new_dests, next_node);
3207	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3208	err3 = re_node_set_merge (cur_nodes, &new_dests);
3209	re_node_set_free (&new_dests);
3210	if (__glibc_unlikely (err != REG_NOERROR \|\| err2 != REG_NOERROR
3211	\|\| err3 != REG_NOERROR))
3212	{
3213	err = (err != REG_NOERROR ? err
3214	: (err2 != REG_NOERROR ? err2 : err3));
3215	return err;
3216	}
3217	/ TODO: It is still inefficient... /
3218	goto restart;
3219	}
3220	else
3221	{
3222	re_node_set union_set;
3223	next_node = dfa->nexts[ent->node];
3224	if (mctx->state_log[to_idx])
3225	{
3226	bool ok;
3227	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3228	next_node))
3229	continue;
3230	err = re_node_set_init_copy (&union_set,
3231	&mctx->state_log[to_idx]->nodes);
3232	ok = re_node_set_insert (&union_set, next_node);
3233	if (__glibc_unlikely (err != REG_NOERROR \|\| ! ok))
3234	{
3235	re_node_set_free (&union_set);
3236	err = err != REG_NOERROR ? err : REG_ESPACE;
3237	return err;
3238	}
3239	}
3240	else
3241	{
3242	err = re_node_set_init_1 (&union_set, next_node);
3243	if (__glibc_unlikely (err != REG_NOERROR))
3244	return err;
3245	}
3246	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3247	re_node_set_free (&union_set);
3248	if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3249	&& err != REG_NOERROR))
3250	return err;
3251	}
3252	}
3253	while (ent++->more);
3254	return REG_NOERROR;
3255	}
3256
3257	/ Build transition table for the state.*
3258	Return true if successful. /*
3259
3260	static bool __attribute_noinline__
3261	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3262	{
3263	reg_errcode_t err;
3264	Idx i, j;
3265	int ch;
3266	bool need_word_trtable = false;
3267	bitset_word_t elem, mask;
3268	Idx ndests; / Number of the destination states from 'state'. /
3269	re_dfastate_t **trtable;
3270	re_dfastate_t *dest_states[SBC_MAX];
3271	re_dfastate_t *dest_states_word[SBC_MAX];
3272	re_dfastate_t *dest_states_nl[SBC_MAX];
3273	re_node_set follows;
3274	bitset_t acceptable;
3275
3276	/ We build DFA states which corresponds to the destination nodes*
3277	from 'state'. 'dests_node[i]' represents the nodes which i-th
3278	destination state contains, and 'dests_ch[i]' represents the
3279	characters which i-th destination state accepts. /*
3280	re_node_set dests_node[SBC_MAX];
3281	bitset_t dests_ch[SBC_MAX];
3282
3283	/ Initialize transition table. /
3284	state->word_trtable = state->trtable = NULL;
3285
3286	/ At first, group all nodes belonging to 'state' into several*
3287	destinations. /*
3288	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3289	if (__glibc_unlikely (ndests <= `0`))
3290	{
3291	/ Return false in case of an error, true otherwise. /
3292	if (ndests == `0`)
3293	{
3294	state->trtable = (re_dfastate_t **)
3295	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3296	if (__glibc_unlikely (state->trtable == NULL))
3297	return false;
3298	return true;
3299	}
3300	return false;
3301	}
3302
3303	err = re_node_set_alloc (&follows, ndests + `1`);
3304	if (__glibc_unlikely (err != REG_NOERROR))
3305	{
3306	out_free:
3307	re_node_set_free (&follows);
3308	for (i = `0`; i < ndests; ++i)
3309	re_node_set_free (dests_node + i);
3310	return false;
3311	}
3312
3313	bitset_empty (acceptable);
3314
3315	/ Then build the states for all destinations. /
3316	for (i = `0`; i < ndests; ++i)
3317	{
3318	Idx next_node;
3319	re_node_set_empty (&follows);
3320	/ Merge the follows of this destination states. /
3321	for (j = `0`; j < dests_node[i].nelem; ++j)
3322	{
3323	next_node = dfa->nexts[dests_node[i].elems[j]];
3324	if (next_node != -`1`)
3325	{
3326	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3327	if (__glibc_unlikely (err != REG_NOERROR))
3328	goto out_free;
3329	}
3330	}
3331	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3332	if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3333	goto out_free;
3334	/ If the new state has context constraint,*
3335	build appropriate states for these contexts. /*
3336	if (dest_states[i]->has_constraint)
3337	{
3338	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3339	CONTEXT_WORD);
3340	if (__glibc_unlikely (dest_states_word[i] == NULL
3341	&& err != REG_NOERROR))
3342	goto out_free;
3343
3344	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3345	need_word_trtable = true;
3346
3347	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3348	CONTEXT_NEWLINE);
3349	if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3350	goto out_free;
3351	}
3352	else
3353	{
3354	dest_states_word[i] = dest_states[i];
3355	dest_states_nl[i] = dest_states[i];
3356	}
3357	bitset_merge (acceptable, dests_ch[i]);
3358	}
3359
3360	if (!__glibc_unlikely (need_word_trtable))
3361	{
3362	/ We don't care about whether the following character is a word*
3363	character, or we are in a single-byte character set so we can
3364	discern by looking at the character code: allocate a
3365	256-entry transition table. /*
3366	trtable = state->trtable =
3367	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3368	if (__glibc_unlikely (trtable == NULL))
3369	goto out_free;
3370
3371	/ For all characters ch...: /
3372	for (i = `0`; i < BITSET_WORDS; ++i)
3373	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3374	elem;
3375	mask <<= `1`, elem >>= `1`, ++ch)
3376	if (__glibc_unlikely (elem & `1`))
3377	{
3378	/ There must be exactly one destination which accepts*
3379	character ch. See group_nodes_into_DFAstates. /*
3380	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3381	;
3382
3383	/ j-th destination accepts the word character ch. /
3384	if (dfa->word_char[i] & mask)
3385	trtable[ch] = dest_states_word[j];
3386	else
3387	trtable[ch] = dest_states[j];
3388	}
3389	}
3390	else
3391	{
3392	/ We care about whether the following character is a word*
3393	character, and we are in a multi-byte character set: discern
3394	by looking at the character code: build two 256-entry
3395	transition tables, one starting at trtable[0] and one
3396	starting at trtable[SBC_MAX]. /*
3397	trtable = state->word_trtable =
3398	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3399	if (__glibc_unlikely (trtable == NULL))
3400	goto out_free;
3401
3402	/ For all characters ch...: /
3403	for (i = `0`; i < BITSET_WORDS; ++i)
3404	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3405	elem;
3406	mask <<= `1`, elem >>= `1`, ++ch)
3407	if (__glibc_unlikely (elem & `1`))
3408	{
3409	/ There must be exactly one destination which accepts*
3410	character ch. See group_nodes_into_DFAstates. /*
3411	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3412	;
3413
3414	/ j-th destination accepts the word character ch. /
3415	trtable[ch] = dest_states[j];
3416	trtable[ch + SBC_MAX] = dest_states_word[j];
3417	}
3418	}
3419
3420	/ new line /
3421	if (bitset_contain (acceptable, NEWLINE_CHAR))
3422	{
3423	/ The current state accepts newline character. /
3424	for (j = `0`; j < ndests; ++j)
3425	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3426	{
3427	/ k-th destination accepts newline character. /
3428	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3429	if (need_word_trtable)
3430	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3431	/ There must be only one destination which accepts*
3432	newline. See group_nodes_into_DFAstates. /*
3433	break;
3434	}
3435	}
3436
3437	re_node_set_free (&follows);
3438	for (i = `0`; i < ndests; ++i)
3439	re_node_set_free (dests_node + i);
3440	return true;
3441	}
3442
3443	/ Group all nodes belonging to STATE into several destinations.*
3444	Then for all destinations, set the nodes belonging to the destination
3445	to DESTS_NODE[i] and set the characters accepted by the destination
3446	to DEST_CH[i]. Return the number of destinations if successful,
3447	-1 on internal error. /*
3448
3449	static Idx
3450	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3451	re_node_set dests_node, bitset_t dests_ch)
3452	{
3453	reg_errcode_t err;
3454	bool ok;
3455	Idx i, j, k;
3456	Idx ndests; / Number of the destinations from 'state'. /
3457	bitset_t accepts; / Characters a node can accept. /
3458	const re_node_set *cur_nodes = &state->nodes;
3459	bitset_empty (accepts);
3460	ndests = `0`;
3461
3462	/ For all the nodes belonging to 'state', /
3463	for (i = `0`; i < cur_nodes->nelem; ++i)
3464	{
3465	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3466	re_token_type_t type = node->type;
3467	unsigned int constraint = node->constraint;
3468
3469	/ Enumerate all single byte character this node can accept. /
3470	if (type == CHARACTER)
3471	bitset_set (accepts, node->opr.c);
3472	else if (type == SIMPLE_BRACKET)
3473	{
3474	bitset_merge (accepts, node->opr.sbcset);
3475	}
3476	else if (type == OP_PERIOD)
3477	{
3478	#ifdef RE_ENABLE_I18N
3479	if (dfa->mb_cur_max > `1`)
3480	bitset_merge (accepts, dfa->sb_char);
3481	else
3482	#endif
3483	bitset_set_all (accepts);
3484	if (!(dfa->syntax & RE_DOT_NEWLINE))
3485	bitset_clear (accepts, `'\n'`);
3486	if (dfa->syntax & RE_DOT_NOT_NULL)
3487	bitset_clear (accepts, `'\0'`);
3488	}
3489	#ifdef RE_ENABLE_I18N
3490	else if (type == OP_UTF8_PERIOD)
3491	{
3492	if (ASCII_CHARS % BITSET_WORD_BITS == `0`)
3493	memset (accepts, -`1`, ASCII_CHARS / CHAR_BIT);
3494	else
3495	bitset_merge (accepts, utf8_sb_map);
3496	if (!(dfa->syntax & RE_DOT_NEWLINE))
3497	bitset_clear (accepts, `'\n'`);
3498	if (dfa->syntax & RE_DOT_NOT_NULL)
3499	bitset_clear (accepts, `'\0'`);
3500	}
3501	#endif
3502	else
3503	continue;
3504
3505	/ Check the 'accepts' and sift the characters which are not*
3506	match it the context. /*
3507	if (constraint)
3508	{
3509	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3510	{
3511	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3512	bitset_empty (accepts);
3513	if (accepts_newline)
3514	bitset_set (accepts, NEWLINE_CHAR);
3515	else
3516	continue;
3517	}
3518	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3519	{
3520	bitset_empty (accepts);
3521	continue;
3522	}
3523
3524	if (constraint & NEXT_WORD_CONSTRAINT)
3525	{
3526	bitset_word_t any_set = `0`;
3527	if (type == CHARACTER && !node->word_char)
3528	{
3529	bitset_empty (accepts);
3530	continue;
3531	}
3532	#ifdef RE_ENABLE_I18N
3533	if (dfa->mb_cur_max > `1`)
3534	for (j = `0`; j < BITSET_WORDS; ++j)
3535	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3536	else
3537	#endif
3538	for (j = `0`; j < BITSET_WORDS; ++j)
3539	any_set \|= (accepts[j] &= dfa->word_char[j]);
3540	if (!any_set)
3541	continue;
3542	}
3543	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3544	{
3545	bitset_word_t any_set = `0`;
3546	if (type == CHARACTER && node->word_char)
3547	{
3548	bitset_empty (accepts);
3549	continue;
3550	}
3551	#ifdef RE_ENABLE_I18N
3552	if (dfa->mb_cur_max > `1`)
3553	for (j = `0`; j < BITSET_WORDS; ++j)
3554	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3555	else
3556	#endif
3557	for (j = `0`; j < BITSET_WORDS; ++j)
3558	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3559	if (!any_set)
3560	continue;
3561	}
3562	}
3563
3564	/ Then divide 'accepts' into DFA states, or create a new*
3565	state. Above, we make sure that accepts is not empty. /*
3566	for (j = `0`; j < ndests; ++j)
3567	{
3568	bitset_t intersec; / Intersection sets, see below. /
3569	bitset_t remains;
3570	/ Flags, see below. /
3571	bitset_word_t has_intersec, not_subset, not_consumed;
3572
3573	/ Optimization, skip if this state doesn't accept the character. /
3574	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3575	continue;
3576
3577	/ Enumerate the intersection set of this state and 'accepts'. /
3578	has_intersec = `0`;
3579	for (k = `0`; k < BITSET_WORDS; ++k)
3580	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3581	/ And skip if the intersection set is empty. /
3582	if (!has_intersec)
3583	continue;
3584
3585	/ Then check if this state is a subset of 'accepts'. /
3586	not_subset = not_consumed = `0`;
3587	for (k = `0`; k < BITSET_WORDS; ++k)
3588	{
3589	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3590	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3591	}
3592
3593	/ If this state isn't a subset of 'accepts', create a*
3594	new group state, which has the 'remains'. /*
3595	if (not_subset)
3596	{
3597	bitset_copy (dests_ch[ndests], remains);
3598	bitset_copy (dests_ch[j], intersec);
3599	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3600	if (__glibc_unlikely (err != REG_NOERROR))
3601	goto error_return;
3602	++ndests;
3603	}
3604
3605	/ Put the position in the current group. /
3606	ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3607	if (__glibc_unlikely (! ok))
3608	goto error_return;
3609
3610	/ If all characters are consumed, go to next node. /
3611	if (!not_consumed)
3612	break;
3613	}
3614	/ Some characters remain, create a new group. /
3615	if (j == ndests)
3616	{
3617	bitset_copy (dests_ch[ndests], accepts);
3618	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3619	if (__glibc_unlikely (err != REG_NOERROR))
3620	goto error_return;
3621	++ndests;
3622	bitset_empty (accepts);
3623	}
3624	}
3625	assume (ndests <= SBC_MAX);
3626	return ndests;
3627	error_return:
3628	for (j = `0`; j < ndests; ++j)
3629	re_node_set_free (dests_node + j);
3630	return -`1`;
3631	}
3632
3633	#ifdef RE_ENABLE_I18N
3634	/ Check how many bytes the node 'dfa->nodes[node_idx]' accepts.*
3635	Return the number of the bytes the node accepts.
3636	STR_IDX is the current index of the input string.
3637
3638	This function handles the nodes which can accept one character, or
3639	one collating element like '.', '[a-z]', opposite to the other nodes
3640	can only accept one byte. /*
3641
3642	# ifdef _LIBC
3643	# include <locale/weight.h>
3644	# endif
3645
3646	static int
3647	check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3648	const re_string_t *input, Idx str_idx)
3649	{
3650	const re_token_t *node = dfa->nodes + node_idx;
3651	int char_len, elem_len;
3652	Idx i;
3653
3654	if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3655	{
3656	unsigned char c = re_string_byte_at (input, str_idx), d;
3657	if (__glibc_likely (c < `0xc2`))
3658	return `0`;
3659
3660	if (str_idx + `2` > input->len)
3661	return `0`;
3662
3663	d = re_string_byte_at (input, str_idx + `1`);
3664	if (c < `0xe0`)
3665	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3666	else if (c < `0xf0`)
3667	{
3668	char_len = `3`;
3669	if (c == `0xe0` && d < `0xa0`)
3670	return `0`;
3671	}
3672	else if (c < `0xf8`)
3673	{
3674	char_len = `4`;
3675	if (c == `0xf0` && d < `0x90`)
3676	return `0`;
3677	}
3678	else if (c < `0xfc`)
3679	{
3680	char_len = `5`;
3681	if (c == `0xf8` && d < `0x88`)
3682	return `0`;
3683	}
3684	else if (c < `0xfe`)
3685	{
3686	char_len = `6`;
3687	if (c == `0xfc` && d < `0x84`)
3688	return `0`;
3689	}
3690	else
3691	return `0`;
3692
3693	if (str_idx + char_len > input->len)
3694	return `0`;
3695
3696	for (i = `1`; i < char_len; ++i)
3697	{
3698	d = re_string_byte_at (input, str_idx + i);
3699	if (d < `0x80` \|\| d > `0xbf`)
3700	return `0`;
3701	}
3702	return char_len;
3703	}
3704
3705	char_len = re_string_char_size_at (input, str_idx);
3706	if (node->type == OP_PERIOD)
3707	{
3708	if (char_len <= `1`)
3709	return `0`;
3710	/ FIXME: I don't think this if is needed, as both '\n'*
3711	and '\0' are char_len == 1. /*
3712	/ '.' accepts any one character except the following two cases. /
3713	if ((!(dfa->syntax & RE_DOT_NEWLINE)
3714	&& re_string_byte_at (input, str_idx) == `'\n'`)
3715	\|\| ((dfa->syntax & RE_DOT_NOT_NULL)
3716	&& re_string_byte_at (input, str_idx) == `'\0'`))
3717	return `0`;
3718	return char_len;
3719	}
3720
3721	elem_len = re_string_elem_size_at (input, str_idx);
3722	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3723	return `0`;
3724
3725	if (node->type == COMPLEX_BRACKET)
3726	{
3727	const re_charset_t *cset = node->opr.mbcset;
3728	# ifdef _LIBC
3729	const unsigned char *pin
3730	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3731	Idx j;
3732	uint32_t nrules;
3733	# endif /* _LIBC */
3734	int match_len = `0`;
3735	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3736	? re_string_wchar_at (input, str_idx) : `0`);
3737
3738	/ match with multibyte character? /
3739	for (i = `0`; i < cset->nmbchars; ++i)
3740	if (wc == cset->mbchars[i])
3741	{
3742	match_len = char_len;
3743	goto check_node_accept_bytes_match;
3744	}
3745	/ match with character_class? /
3746	for (i = `0`; i < cset->nchar_classes; ++i)
3747	{
3748	wctype_t wt = cset->char_classes[i];
3749	if (__iswctype (wc, wt))
3750	{
3751	match_len = char_len;
3752	goto check_node_accept_bytes_match;
3753	}
3754	}
3755
3756	# ifdef _LIBC
3757	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3758	if (nrules != `0`)
3759	{
3760	unsigned int in_collseq = `0`;
3761	const int32_t table, indirect;
3762	const unsigned char weights, extra;
3763	const char *collseqwc;
3764
3765	/ match with collating_symbol? /
3766	if (cset->ncoll_syms)
3767	extra = (const unsigned char *)
3768	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3769	for (i = `0`; i < cset->ncoll_syms; ++i)
3770	{
3771	/ The compiler might warn that extra may be used uninitialized,*
3772	however the loop will be executed iff ncoll_syms is larger
3773	than 0,which means extra will be already initialized. /*
3774	DIAG_PUSH_NEEDS_COMMENT;
3775	DIAG_IGNORE_Os_NEEDS_COMMENT (`8`, "-Wmaybe-uninitialized");
3776	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3777	DIAG_POP_NEEDS_COMMENT;
3778	/ Compare the length of input collating element and*
3779	the length of current collating element. /*
3780	if (*coll_sym != elem_len)
3781	continue;
3782	/ Compare each bytes. /
3783	for (j = `0`; j < *coll_sym; j++)
3784	if (pin[j] != coll_sym[`1` + j])
3785	break;
3786	if (j == *coll_sym)
3787	{
3788	/ Match if every bytes is equal. /
3789	match_len = j;
3790	goto check_node_accept_bytes_match;
3791	}
3792	}
3793
3794	if (cset->nranges)
3795	{
3796	if (elem_len <= char_len)
3797	{
3798	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3799	in_collseq = __collseq_table_lookup (collseqwc, wc);
3800	}
3801	else
3802	in_collseq = find_collation_sequence_value (pin, elem_len);
3803	}
3804	/ match with range expression? /
3805	/ FIXME: Implement rational ranges here, too. /
3806	for (i = `0`; i < cset->nranges; ++i)
3807	if (cset->range_starts[i] <= in_collseq
3808	&& in_collseq <= cset->range_ends[i])
3809	{
3810	match_len = elem_len;
3811	goto check_node_accept_bytes_match;
3812	}
3813
3814	/ match with equivalence_class? /
3815	if (cset->nequiv_classes)
3816	{
3817	const unsigned char *cp = pin;
3818	table = (const int32_t *)
3819	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3820	weights = (const unsigned char *)
3821	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3822	extra = (const unsigned char *)
3823	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3824	indirect = (const int32_t *)
3825	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3826	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3827	int32_t rule = idx >> `24`;
3828	idx &= `0xffffff`;
3829	if (idx > `0`)
3830	{
3831	size_t weight_len = weights[idx];
3832	for (i = `0`; i < cset->nequiv_classes; ++i)
3833	{
3834	int32_t equiv_class_idx = cset->equiv_classes[i];
3835	int32_t equiv_class_rule = equiv_class_idx >> `24`;
3836	equiv_class_idx &= `0xffffff`;
3837	if (weights[equiv_class_idx] == weight_len
3838	&& equiv_class_rule == rule
3839	&& memcmp (weights + idx + `1`,
3840	weights + equiv_class_idx + `1`,
3841	weight_len) == `0`)
3842	{
3843	match_len = elem_len;
3844	goto check_node_accept_bytes_match;
3845	}
3846	}
3847	}
3848	}
3849	}
3850	else
3851	# endif /* _LIBC */
3852	{
3853	/ match with range expression? /
3854	for (i = `0`; i < cset->nranges; ++i)
3855	{
3856	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3857	{
3858	match_len = char_len;
3859	goto check_node_accept_bytes_match;
3860	}
3861	}
3862	}
3863	check_node_accept_bytes_match:
3864	if (!cset->non_match)
3865	return match_len;
3866	else
3867	{
3868	if (match_len > `0`)
3869	return `0`;
3870	else
3871	return (elem_len > char_len) ? elem_len : char_len;
3872	}
3873	}
3874	return `0`;
3875	}
3876
3877	# ifdef _LIBC
3878	static unsigned int
3879	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3880	{
3881	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3882	if (nrules == `0`)
3883	{
3884	if (mbs_len == `1`)
3885	{
3886	/ No valid character. Match it as a single byte character. /
3887	const unsigned char collseq = (const* unsigned char *)
3888	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3889	return collseq[mbs[`0`]];
3890	}
3891	return UINT_MAX;
3892	}
3893	else
3894	{
3895	int32_t idx;
3896	const unsigned char extra = (const* unsigned char *)
3897	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3898	int32_t extrasize = (const unsigned char *)
3899	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3900
3901	for (idx = `0`; idx < extrasize;)
3902	{
3903	int mbs_cnt;
3904	bool found = false;
3905	int32_t elem_mbs_len;
3906	/ Skip the name of collating element name. /
3907	idx = idx + extra[idx] + `1`;
3908	elem_mbs_len = extra[idx++];
3909	if (mbs_len == elem_mbs_len)
3910	{
3911	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3912	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3913	break;
3914	if (mbs_cnt == elem_mbs_len)
3915	/ Found the entry. /
3916	found = true;
3917	}
3918	/ Skip the byte sequence of the collating element. /
3919	idx += elem_mbs_len;
3920	/ Adjust for the alignment. /
3921	idx = (idx + `3`) & ~`3`;
3922	/ Skip the collation sequence value. /
3923	idx += sizeof (uint32_t);
3924	/ Skip the wide char sequence of the collating element. /
3925	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3926	/ If we found the entry, return the sequence value. /
3927	if (found)
3928	return (uint32_t ) (extra + idx);
3929	/ Skip the collation sequence value. /
3930	idx += sizeof (uint32_t);
3931	}
3932	return UINT_MAX;
3933	}
3934	}
3935	# endif /* _LIBC */
3936	#endif /* RE_ENABLE_I18N */
3937
3938	/ Check whether the node accepts the byte which is IDX-th*
3939	byte of the INPUT. /*
3940
3941	static bool
3942	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
3943	Idx idx)
3944	{
3945	unsigned char ch;
3946	ch = re_string_byte_at (&mctx->input, idx);
3947	switch (node->type)
3948	{
3949	case CHARACTER:
3950	if (node->opr.c != ch)
3951	return false;
3952	break;
3953
3954	case SIMPLE_BRACKET:
3955	if (!bitset_contain (node->opr.sbcset, ch))
3956	return false;
3957	break;
3958
3959	#ifdef RE_ENABLE_I18N
3960	case OP_UTF8_PERIOD:
3961	if (ch >= ASCII_CHARS)
3962	return false;
3963	FALLTHROUGH;
3964	#endif
3965	case OP_PERIOD:
3966	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3967	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3968	return false;
3969	break;
3970
3971	default:
3972	return false;
3973	}
3974
3975	if (node->constraint)
3976	{
3977	/ The node has constraints. Check whether the current context*
3978	satisfies the constraints. /*
3979	unsigned int context = re_string_context_at (&mctx->input, idx,
3980	mctx->eflags);
3981	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3982	return false;
3983	}
3984
3985	return true;
3986	}
3987
3988	/ Extend the buffers, if the buffers have run out. /
3989
3990	static reg_errcode_t
3991	__attribute_warn_unused_result__
3992	extend_buffers (re_match_context_t mctx, int* min_len)
3993	{
3994	reg_errcode_t ret;
3995	re_string_t *pstr = &mctx->input;
3996
3997	/ Avoid overflow. /
3998	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / `2`
3999	<= pstr->bufs_len))
4000	return REG_ESPACE;
4001
4002	/ Double the lengths of the buffers, but allocate at least MIN_LEN. /
4003	ret = re_string_realloc_buffers (pstr,
4004	MAX (min_len,
4005	MIN (pstr->len, pstr->bufs_len * `2`)));
4006	if (__glibc_unlikely (ret != REG_NOERROR))
4007	return ret;
4008
4009	if (mctx->state_log != NULL)
4010	{
4011	/ And double the length of state_log. /
4012	/ XXX We have no indication of the size of this buffer. If this*
4013	allocation fail we have no indication that the state_log array
4014	does not have the right size. /*
4015	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4016	pstr->bufs_len + `1`);
4017	if (__glibc_unlikely (new_array == NULL))
4018	return REG_ESPACE;
4019	mctx->state_log = new_array;
4020	}
4021
4022	/ Then reconstruct the buffers. /
4023	if (pstr->icase)
4024	{
4025	#ifdef RE_ENABLE_I18N
4026	if (pstr->mb_cur_max > `1`)
4027	{
4028	ret = build_wcs_upper_buffer (pstr);
4029	if (__glibc_unlikely (ret != REG_NOERROR))
4030	return ret;
4031	}
4032	else
4033	#endif /* RE_ENABLE_I18N */
4034	build_upper_buffer (pstr);
4035	}
4036	else
4037	{
4038	#ifdef RE_ENABLE_I18N
4039	if (pstr->mb_cur_max > `1`)
4040	build_wcs_buffer (pstr);
4041	else
4042	#endif /* RE_ENABLE_I18N */
4043	{
4044	if (pstr->trans != NULL)
4045	re_string_translate_buffer (pstr);
4046	}
4047	}
4048	return REG_NOERROR;
4049	}
4050
4051
4052	/ Functions for matching context. /
4053
4054	/ Initialize MCTX. /
4055
4056	static reg_errcode_t
4057	__attribute_warn_unused_result__
4058	match_ctx_init (re_match_context_t mctx, int* eflags, Idx n)
4059	{
4060	mctx->eflags = eflags;
4061	mctx->match_last = -`1`;
4062	if (n > `0`)
4063	{
4064	/ Avoid overflow. /
4065	size_t max_object_size =
4066	MAX (sizeof (struct re_backref_cache_entry),
4067	sizeof (re_sub_match_top_t *));
4068	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4069	return REG_ESPACE;
4070
4071	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4072	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4073	if (__glibc_unlikely (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL))
4074	return REG_ESPACE;
4075	}
4076	/ Already zero-ed by the caller.*
4077	else
4078	mctx->bkref_ents = NULL;
4079	mctx->nbkref_ents = 0;
4080	mctx->nsub_tops = 0; /*
4081	mctx->abkref_ents = n;
4082	mctx->max_mb_elem_len = `1`;
4083	mctx->asub_tops = n;
4084	return REG_NOERROR;
4085	}
4086
4087	/ Clean the entries which depend on the current input in MCTX.*
4088	This function must be invoked when the matcher changes the start index
4089	of the input, or changes the input string. /*
4090
4091	static void
4092	match_ctx_clean (re_match_context_t *mctx)
4093	{
4094	Idx st_idx;
4095	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4096	{
4097	Idx sl_idx;
4098	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4099	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4100	{
4101	re_sub_match_last_t *last = top->lasts[sl_idx];
4102	re_free (last->path.array);
4103	re_free (last);
4104	}
4105	re_free (top->lasts);
4106	if (top->path)
4107	{
4108	re_free (top->path->array);
4109	re_free (top->path);
4110	}
4111	re_free (top);
4112	}
4113
4114	mctx->nsub_tops = `0`;
4115	mctx->nbkref_ents = `0`;
4116	}
4117
4118	/ Free all the memory associated with MCTX. /
4119
4120	static void
4121	match_ctx_free (re_match_context_t *mctx)
4122	{
4123	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4124	match_ctx_clean (mctx);
4125	re_free (mctx->sub_tops);
4126	re_free (mctx->bkref_ents);
4127	}
4128
4129	/ Add a new backreference entry to MCTX.*
4130	Note that we assume that caller never call this function with duplicate
4131	entry, and call with STR_IDX which isn't smaller than any existing entry.
4132	*/
4133
4134	static reg_errcode_t
4135	__attribute_warn_unused_result__
4136	match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4137	Idx to)
4138	{
4139	if (mctx->nbkref_ents >= mctx->abkref_ents)
4140	{
4141	struct re_backref_cache_entry* new_entry;
4142	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4143	mctx->abkref_ents * `2`);
4144	if (__glibc_unlikely (new_entry == NULL))
4145	{
4146	re_free (mctx->bkref_ents);
4147	return REG_ESPACE;
4148	}
4149	mctx->bkref_ents = new_entry;
4150	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4151	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4152	mctx->abkref_ents *= `2`;
4153	}
4154	if (mctx->nbkref_ents > `0`
4155	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4156	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4157
4158	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4159	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4160	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4161	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4162
4163	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4164	If bit N is clear, means that this entry won't epsilon-transition to
4165	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4166	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4167	such node.
4168
4169	A backreference does not epsilon-transition unless it is empty, so set
4170	to all zeros if FROM != TO. /*
4171	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4172	= (from == to ? -`1` : `0`);
4173
4174	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4175	if (mctx->max_mb_elem_len < to - from)
4176	mctx->max_mb_elem_len = to - from;
4177	return REG_NOERROR;
4178	}
4179
4180	/ Return the first entry with the same str_idx, or -1 if none is*
4181	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4182
4183	static Idx
4184	search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4185	{
4186	Idx left, right, mid, last;
4187	last = right = mctx->nbkref_ents;
4188	for (left = `0`; left < right;)
4189	{
4190	mid = (left + right) / `2`;
4191	if (mctx->bkref_ents[mid].str_idx < str_idx)
4192	left = mid + `1`;
4193	else
4194	right = mid;
4195	}
4196	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4197	return left;
4198	else
4199	return -`1`;
4200	}
4201
4202	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4203	at STR_IDX. /*
4204
4205	static reg_errcode_t
4206	__attribute_warn_unused_result__
4207	match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4208	{
4209	DEBUG_ASSERT (mctx->sub_tops != NULL);
4210	DEBUG_ASSERT (mctx->asub_tops > `0`);
4211	if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4212	{
4213	Idx new_asub_tops = mctx->asub_tops * `2`;
4214	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4215	re_sub_match_top_t *,
4216	new_asub_tops);
4217	if (__glibc_unlikely (new_array == NULL))
4218	return REG_ESPACE;
4219	mctx->sub_tops = new_array;
4220	mctx->asub_tops = new_asub_tops;
4221	}
4222	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4223	if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4224	return REG_ESPACE;
4225	mctx->sub_tops[mctx->nsub_tops]->node = node;
4226	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4227	return REG_NOERROR;
4228	}
4229
4230	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4231	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
4232	Return the new entry if successful, NULL if memory is exhausted. /*
4233
4234	static re_sub_match_last_t *
4235	match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4236	{
4237	re_sub_match_last_t *new_entry;
4238	if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4239	{
4240	Idx new_alasts = `2` * subtop->alasts + `1`;
4241	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4242	re_sub_match_last_t *,
4243	new_alasts);
4244	if (__glibc_unlikely (new_array == NULL))
4245	return NULL;
4246	subtop->lasts = new_array;
4247	subtop->alasts = new_alasts;
4248	}
4249	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4250	if (__glibc_likely (new_entry != NULL))
4251	{
4252	subtop->lasts[subtop->nlasts] = new_entry;
4253	new_entry->node = node;
4254	new_entry->str_idx = str_idx;
4255	++subtop->nlasts;
4256	}
4257	return new_entry;
4258	}
4259
4260	static void
4261	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4262	re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4263	{
4264	sctx->sifted_states = sifted_sts;
4265	sctx->limited_states = limited_sts;
4266	sctx->last_node = last_node;
4267	sctx->last_str_idx = last_str_idx;
4268	re_node_set_init_empty (&sctx->limits);
4269	}
4270

Browse the source code of glibc/posix/regexec.c