regexec.c source code [glibc/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <stdint.h>
21
22	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
23	int n);
24	static void match_ctx_clean (re_match_context_t *mctx);
25	static void match_ctx_free (re_match_context_t *cache);
26	static reg_errcode_t match_ctx_add_entry (re_match_context_t cache, int* node,
27	int str_idx, int from, int to);
28	static int search_cur_bkref_entry (const re_match_context_t *mctx,
29	int str_idx);
30	static reg_errcode_t match_ctx_add_subtop (re_match_context_t mctx, int* node,
31	int str_idx);
32	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33	int node, int str_idx);
34	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
35	re_dfastate_t *limited_sts, int* last_node,
36	int last_str_idx);
37	static reg_errcode_t re_search_internal (const regex_t *preg,
38	const char string, int* length,
39	int start, int range, int stop,
40	size_t nmatch, regmatch_t pmatch[],
41	int eflags);
42	static int re_search_2_stub (struct re_pattern_buffer *bufp,
43	const char string1, int* length1,
44	const char string2, int* length2,
45	int start, int range, struct re_registers *regs,
46	int stop, int ret_len);
47	static int re_search_stub (struct re_pattern_buffer *bufp,
48	const char string, int* length, int start,
49	int range, int stop, struct re_registers *regs,
50	int ret_len);
51	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
52	int nregs, int regs_allocated);
53	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
54	static int check_matching (re_match_context_t mctx, int* fl_longest_match,
55	int *p_match_first);
56	static int check_halt_state_context (const re_match_context_t *mctx,
57	const re_dfastate_t state, int* idx);
58	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
59	regmatch_t prev_idx_match, int* cur_node,
60	int cur_idx, int nmatch);
61	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
62	int str_idx, int dest_node, int nregs,
63	regmatch_t *regs,
64	re_node_set *eps_via_nodes);
65	static reg_errcode_t set_regs (const regex_t *preg,
66	const re_match_context_t *mctx,
67	size_t nmatch, regmatch_t *pmatch,
68	int fl_backtrack);
69	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
70
71	#ifdef RE_ENABLE_I18N
72	static int sift_states_iter_mb (const re_match_context_t *mctx,
73	re_sift_context_t *sctx,
74	int node_idx, int str_idx, int max_str_idx);
75	#endif /* RE_ENABLE_I18N */
76	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
77	re_sift_context_t *sctx);
78	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
79	re_sift_context_t sctx, int* str_idx,
80	re_node_set *cur_dest);
81	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
82	re_sift_context_t *sctx,
83	int str_idx,
84	re_node_set *dest_nodes);
85	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
86	re_node_set *dest_nodes,
87	const re_node_set *candidates);
88	static int check_dst_limits (const re_match_context_t *mctx,
89	re_node_set *limits,
90	int dst_node, int dst_idx, int src_node,
91	int src_idx);
92	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
93	int boundaries, int subexp_idx,
94	int from_node, int bkref_idx);
95	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
96	int limit, int subexp_idx,
97	int node, int str_idx,
98	int bkref_idx);
99	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
100	re_node_set *dest_nodes,
101	const re_node_set *candidates,
102	re_node_set *limits,
103	struct re_backref_cache_entry *bkref_ents,
104	int str_idx);
105	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
106	re_sift_context_t *sctx,
107	int str_idx,
108	const re_node_set *candidates);
109	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
110	re_dfastate_t **dst,
111	re_dfastate_t *src, int* num);
112	static re_dfastate_t find_recover_state (reg_errcode_t err,
113	re_match_context_t *mctx);
114	static re_dfastate_t transit_state (reg_errcode_t err,
115	re_match_context_t *mctx,
116	re_dfastate_t *state);
117	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
118	re_match_context_t *mctx,
119	re_dfastate_t *next_state);
120	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
121	re_node_set *cur_nodes,
122	int str_idx);
123	#if 0
124	static re_dfastate_t transit_state_sb (reg_errcode_t err,
125	re_match_context_t *mctx,
126	re_dfastate_t *pstate);
127	#endif
128	#ifdef RE_ENABLE_I18N
129	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
130	re_dfastate_t *pstate);
131	#endif /* RE_ENABLE_I18N */
132	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
133	const re_node_set *nodes);
134	static reg_errcode_t get_subexp (re_match_context_t *mctx,
135	int bkref_node, int bkref_str_idx);
136	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
137	const re_sub_match_top_t *sub_top,
138	re_sub_match_last_t *sub_last,
139	int bkref_node, int bkref_str);
140	static int find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
141	int subexp_idx, int type);
142	static reg_errcode_t check_arrival (re_match_context_t *mctx,
143	state_array_t path, int* top_node,
144	int top_str, int last_node, int last_str,
145	int type);
146	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
147	int str_idx,
148	re_node_set *cur_nodes,
149	re_node_set *next_nodes);
150	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
151	re_node_set *cur_nodes,
152	int ex_subexp, int type);
153	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
154	re_node_set *dst_nodes,
155	int target, int ex_subexp,
156	int type);
157	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
158	re_node_set cur_nodes, int* cur_str,
159	int subexp_num, int type);
160	static int build_trtable (const re_dfa_t dfa, re_dfastate_t state);
161	#ifdef RE_ENABLE_I18N
162	static int check_node_accept_bytes (const re_dfa_t dfa, int* node_idx,
163	const re_string_t input, int* idx);
164	# ifdef _LIBC
165	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
166	size_t name_len);
167	# endif /* _LIBC */
168	#endif /* RE_ENABLE_I18N */
169	static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
170	const re_dfastate_t *state,
171	re_node_set *states_node,
172	bitset_t *states_ch);
173	static int check_node_accept (const re_match_context_t *mctx,
174	const re_token_t node, int* idx);
175	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
176
177	/ Entry point for POSIX code. /
178
179	/ regexec searches for a given pattern, specified by PREG, in the*
180	string STRING.
181
182	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
183	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
184	least NMATCH elements, and we set them to the offsets of the
185	corresponding matched substrings.
186
187	EFLAGS specifies `execution flags' which affect matching: if
188	REG_NOTBOL is set, then ^ does not match at the beginning of the
189	string; if REG_NOTEOL is set, then $ does not match at the end.
190
191	We return 0 if we find a match and REG_NOMATCH if not. /*
192
193	int
194	regexec (const regex_t *__restrict preg, const char *__restrict string,
195	size_t nmatch, regmatch_t pmatch[], int eflags)
196	{
197	reg_errcode_t err;
198	int start, length;
199	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
200
201	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
202	return REG_BADPAT;
203
204	if (eflags & REG_STARTEND)
205	{
206	start = pmatch[`0`].rm_so;
207	length = pmatch[`0`].rm_eo;
208	}
209	else
210	{
211	start = `0`;
212	length = strlen (string);
213	}
214
215	__libc_lock_lock (dfa->lock);
216	if (preg->no_sub)
217	err = re_search_internal (preg, string, length, start, length - start,
218	length, `0`, NULL, eflags);
219	else
220	err = re_search_internal (preg, string, length, start, length - start,
221	length, nmatch, pmatch, eflags);
222	__libc_lock_unlock (dfa->lock);
223	return err != REG_NOERROR;
224	}
225
226	#ifdef _LIBC
227	libc_hidden_def (__regexec)
228
229	# include <shlib-compat.h>
230	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
231
232	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
233	__typeof__ (__regexec) __compat_regexec;
234
235	int
236	attribute_compat_text_section
237	__compat_regexec (const regex_t *__restrict preg,
238	const char *__restrict string, size_t nmatch,
239	regmatch_t pmatch[], int eflags)
240	{
241	return regexec (preg, string, nmatch, pmatch,
242	eflags & (REG_NOTBOL \| REG_NOTEOL));
243	}
244	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
245	# endif
246	#endif
247
248	/ Entry points for GNU code. /
249
250	/ re_match, re_search, re_match_2, re_search_2*
251
252	The former two functions operate on STRING with length LENGTH,
253	while the later two operate on concatenation of STRING1 and STRING2
254	with lengths LENGTH1 and LENGTH2, respectively.
255
256	re_match() matches the compiled pattern in BUFP against the string,
257	starting at index START.
258
259	re_search() first tries matching at index START, then it tries to match
260	starting from index START + 1, and so on. The last start position tried
261	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
262	way as re_match().)
263
264	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
265	the first STOP characters of the concatenation of the strings should be
266	concerned.
267
268	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
269	and all groups is stored in REGS. (For the "_2" variants, the offsets are
270	computed relative to the concatenation, not relative to the individual
271	strings.)
272
273	On success, re_match* functions return the length of the match, re_search*
274	return the position of the start of the match. Return value -1 means no
275	match was found and -2 indicates an internal error. /*
276
277	int
278	re_match (struct re_pattern_buffer bufp, const* char string, int* length,
279	int start, struct re_registers *regs)
280	{
281	return re_search_stub (bufp, string, length, start, `0`, length, regs, `1`);
282	}
283	#ifdef _LIBC
284	weak_alias (__re_match, re_match)
285	#endif
286
287	int
288	re_search (struct re_pattern_buffer bufp, const* char string, int* length,
289	int start, int range, struct re_registers *regs)
290	{
291	return re_search_stub (bufp, string, length, start, range, length, regs, `0`);
292	}
293	#ifdef _LIBC
294	weak_alias (__re_search, re_search)
295	#endif
296
297	int
298	re_match_2 (struct re_pattern_buffer bufp, const* char string1, int* length1,
299	const char string2, int* length2, int start,
300	struct re_registers regs, int* stop)
301	{
302	return re_search_2_stub (bufp, string1, length1, string2, length2,
303	start, `0`, regs, stop, `1`);
304	}
305	#ifdef _LIBC
306	weak_alias (__re_match_2, re_match_2)
307	#endif
308
309	int
310	re_search_2 (struct re_pattern_buffer bufp, const* char string1, int* length1,
311	const char string2, int* length2, int start, int range,
312	struct re_registers regs, int* stop)
313	{
314	return re_search_2_stub (bufp, string1, length1, string2, length2,
315	start, range, regs, stop, `0`);
316	}
317	#ifdef _LIBC
318	weak_alias (__re_search_2, re_search_2)
319	#endif
320
321	static int
322	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
323	int length1, const char string2, int* length2, int start,
324	int range, struct re_registers *regs,
325	int stop, int ret_len)
326	{
327	const char *str;
328	int rval;
329	int len = length1 + length2;
330	char *s = NULL;
331
332	if (BE (length1 < `0` \|\| length2 < `0` \|\| stop < `0` \|\| len < length1, `0`))
333	return -`2`;
334
335	/ Concatenate the strings. /
336	if (length2 > `0`)
337	if (length1 > `0`)
338	{
339	s = re_malloc (char, len);
340
341	if (BE (s == NULL, `0`))
342	return -`2`;
343	#ifdef _LIBC
344	memcpy (__mempcpy (s, string1, length1), string2, length2);
345	#else
346	memcpy (s, string1, length1);
347	memcpy (s + length1, string2, length2);
348	#endif
349	str = s;
350	}
351	else
352	str = string2;
353	else
354	str = string1;
355
356	rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
357	re_free (s);
358	return rval;
359	}
360
361	/ The parameters have the same meaning as those of re_search.*
362	Additional parameters:
363	If RET_LEN is nonzero the length of the match is returned (re_match style);
364	otherwise the position of the match is returned. /*
365
366	static int
367	re_search_stub (struct re_pattern_buffer bufp, const* char string, int* length,
368	int start, int range, int stop, struct re_registers *regs,
369	int ret_len)
370	{
371	reg_errcode_t result;
372	regmatch_t *pmatch;
373	int nregs, rval;
374	int eflags = `0`;
375	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
376
377	/ Check for out-of-range. /
378	if (BE (start < `0` \|\| start > length, `0`))
379	return -`1`;
380	if (BE (start + range > length, `0`))
381	range = length - start;
382	else if (BE (start + range < `0`, `0`))
383	range = -start;
384
385	__libc_lock_lock (dfa->lock);
386
387	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
388	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
389
390	/ Compile fastmap if we haven't yet. /
391	if (range > `0` && bufp->fastmap != NULL && !bufp->fastmap_accurate)
392	re_compile_fastmap (bufp);
393
394	if (BE (bufp->no_sub, `0`))
395	regs = NULL;
396
397	/ We need at least 1 register. /
398	if (regs == NULL)
399	nregs = `1`;
400	else if (BE (bufp->regs_allocated == REGS_FIXED &&
401	regs->num_regs < bufp->re_nsub + `1`, `0`))
402	{
403	nregs = regs->num_regs;
404	if (BE (nregs < `1`, `0`))
405	{
406	/ Nothing can be copied to regs. /
407	regs = NULL;
408	nregs = `1`;
409	}
410	}
411	else
412	nregs = bufp->re_nsub + `1`;
413	pmatch = re_malloc (regmatch_t, nregs);
414	if (BE (pmatch == NULL, `0`))
415	{
416	rval = -`2`;
417	goto out;
418	}
419
420	result = re_search_internal (bufp, string, length, start, range, stop,
421	nregs, pmatch, eflags);
422
423	rval = `0`;
424
425	/ I hope we needn't fill ther regs with -1's when no match was found. /
426	if (result != REG_NOERROR)
427	rval = -`1`;
428	else if (regs != NULL)
429	{
430	/ If caller wants register contents data back, copy them. /
431	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
432	bufp->regs_allocated);
433	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, `0`))
434	rval = -`2`;
435	}
436
437	if (BE (rval == `0`, `1`))
438	{
439	if (ret_len)
440	{
441	assert (pmatch[`0`].rm_so == start);
442	rval = pmatch[`0`].rm_eo - start;
443	}
444	else
445	rval = pmatch[`0`].rm_so;
446	}
447	re_free (pmatch);
448	out:
449	__libc_lock_unlock (dfa->lock);
450	return rval;
451	}
452
453	static unsigned
454	re_copy_regs (struct re_registers regs, regmatch_t pmatch, int nregs,
455	int regs_allocated)
456	{
457	int rval = REGS_REALLOCATE;
458	int i;
459	int need_regs = nregs + `1`;
460	/ We need one extra element beyond `num_regs' for the `-1' marker GNU code*
461	uses. /*
462
463	/ Have the register data arrays been allocated? /
464	if (regs_allocated == REGS_UNALLOCATED)
465	{ / No. So allocate them with malloc. /
466	regs->start = re_malloc (regoff_t, need_regs);
467	if (BE (regs->start == NULL, `0`))
468	return REGS_UNALLOCATED;
469	regs->end = re_malloc (regoff_t, need_regs);
470	if (BE (regs->end == NULL, `0`))
471	{
472	re_free (regs->start);
473	return REGS_UNALLOCATED;
474	}
475	regs->num_regs = need_regs;
476	}
477	else if (regs_allocated == REGS_REALLOCATE)
478	{ / Yes. If we need more elements than were already*
479	allocated, reallocate them. If we need fewer, just
480	leave it alone. /*
481	if (BE (need_regs > regs->num_regs, `0`))
482	{
483	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
484	regoff_t *new_end;
485	if (BE (new_start == NULL, `0`))
486	return REGS_UNALLOCATED;
487	new_end = re_realloc (regs->end, regoff_t, need_regs);
488	if (BE (new_end == NULL, `0`))
489	{
490	re_free (new_start);
491	return REGS_UNALLOCATED;
492	}
493	regs->start = new_start;
494	regs->end = new_end;
495	regs->num_regs = need_regs;
496	}
497	}
498	else
499	{
500	assert (regs_allocated == REGS_FIXED);
501	/ This function may not be called with REGS_FIXED and nregs too big. /
502	assert (regs->num_regs >= nregs);
503	rval = REGS_FIXED;
504	}
505
506	/ Copy the regs. /
507	for (i = `0`; i < nregs; ++i)
508	{
509	regs->start[i] = pmatch[i].rm_so;
510	regs->end[i] = pmatch[i].rm_eo;
511	}
512	for ( ; i < regs->num_regs; ++i)
513	regs->start[i] = regs->end[i] = -`1`;
514
515	return rval;
516	}
517
518	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
519	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
520	this memory for recording register information. STARTS and ENDS
521	must be allocated using the malloc library routine, and must each
522	be at least NUM_REGS sizeof (regoff_t) bytes long.*
523
524	If NUM_REGS == 0, then subsequent matches should allocate their own
525	register data.
526
527	Unless this function is called, the first search or match using
528	PATTERN_BUFFER will allocate its own register data, without
529	freeing the old data. /*
530
531	void
532	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
533	unsigned num_regs, regoff_t starts, regoff_t ends)
534	{
535	if (num_regs)
536	{
537	bufp->regs_allocated = REGS_REALLOCATE;
538	regs->num_regs = num_regs;
539	regs->start = starts;
540	regs->end = ends;
541	}
542	else
543	{
544	bufp->regs_allocated = REGS_UNALLOCATED;
545	regs->num_regs = `0`;
546	regs->start = regs->end = (regoff_t *) `0`;
547	}
548	}
549	#ifdef _LIBC
550	weak_alias (__re_set_registers, re_set_registers)
551	#endif
552
553	/ Entry points compatible with 4.2 BSD regex library. We don't define*
554	them unless specifically requested. /*
555
556	#if defined _REGEX_RE_COMP \|\| defined _LIBC
557	int
558	# ifdef _LIBC
559	weak_function
560	# endif
561	re_exec (const char *s)
562	{
563	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
564	}
565	#endif /* _REGEX_RE_COMP */
566
567	/ Internal entry point. /
568
569	/ Searches for a compiled pattern PREG in the string STRING, whose*
570	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
571	meaning as with regexec. START, and RANGE have the same meanings
572	with re_search.
573	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
574	otherwise return the error code.
575	Note: We assume front end functions already check ranges.
576	(START + RANGE >= 0 && START + RANGE <= LENGTH) /*
577
578	static reg_errcode_t
579	__attribute_warn_unused_result__
580	re_search_internal (const regex_t preg, const* char string, int* length,
581	int start, int range, int stop, size_t nmatch,
582	regmatch_t pmatch[], int eflags)
583	{
584	reg_errcode_t err;
585	const re_dfa_t dfa = (const* re_dfa_t *) preg->buffer;
586	int left_lim, right_lim, incr;
587	int fl_longest_match, match_first, match_kind, match_last = -`1`;
588	int extra_nmatch;
589	int sb, ch;
590	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
591	re_match_context_t mctx = { .dfa = dfa };
592	#else
593	re_match_context_t mctx;
594	#endif
595	char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
596	&& range && !preg->can_be_null) ? preg->fastmap : NULL;
597	RE_TRANSLATE_TYPE t = preg->translate;
598
599	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
600	memset (&mctx, `'\0'`, sizeof (re_match_context_t));
601	mctx.dfa = dfa;
602	#endif
603
604	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
605	nmatch -= extra_nmatch;
606
607	/ Check if the DFA haven't been compiled. /
608	if (BE (preg->used == `0` \|\| dfa->init_state == NULL
609	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
610	\|\| dfa->init_state_begbuf == NULL, `0`))
611	return REG_NOMATCH;
612
613	#ifdef DEBUG
614	/ We assume front-end functions already check them. /
615	assert (start + range >= `0` && start + range <= length);
616	#endif
617
618	/ If initial states with non-begbuf contexts have no elements,*
619	the regex must be anchored. If preg->newline_anchor is set,
620	we'll never use init_state_nl, so do not check it. /*
621	if (dfa->init_state->nodes.nelem == `0`
622	&& dfa->init_state_word->nodes.nelem == `0`
623	&& (dfa->init_state_nl->nodes.nelem == `0`
624	\|\| !preg->newline_anchor))
625	{
626	if (start != `0` && start + range != `0`)
627	return REG_NOMATCH;
628	start = range = `0`;
629	}
630
631	/ We must check the longest matching, if nmatch > 0. /
632	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
633
634	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
635	preg->translate, preg->syntax & RE_ICASE, dfa);
636	if (BE (err != REG_NOERROR, `0`))
637	goto free_return;
638	mctx.input.stop = stop;
639	mctx.input.raw_stop = stop;
640	mctx.input.newline_anchor = preg->newline_anchor;
641
642	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
643	if (BE (err != REG_NOERROR, `0`))
644	goto free_return;
645
646	/ We will log all the DFA states through which the dfa pass,*
647	if nmatch > 1, or this dfa has "multibyte node", which is a
648	back-reference or a node which can accept multibyte character or
649	multi character collating element. /*
650	if (nmatch > `1` \|\| dfa->has_mb_node)
651	{
652	/ Avoid overflow. /
653	if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, `0`))
654	{
655	err = REG_ESPACE;
656	goto free_return;
657	}
658
659	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
660	if (BE (mctx.state_log == NULL, `0`))
661	{
662	err = REG_ESPACE;
663	goto free_return;
664	}
665	}
666	else
667	mctx.state_log = NULL;
668
669	match_first = start;
670	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
671	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
672
673	/ Check incrementally whether of not the input string match. /
674	incr = (range < `0`) ? -`1` : `1`;
675	left_lim = (range < `0`) ? start + range : start;
676	right_lim = (range < `0`) ? start : start + range;
677	sb = dfa->mb_cur_max == `1`;
678	match_kind =
679	(fastmap
680	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
681	\| (range >= `0` ? `2` : `0`)
682	\| (t != NULL ? `1` : `0`))
683	: `8`);
684
685	for (;; match_first += incr)
686	{
687	err = REG_NOMATCH;
688	if (match_first < left_lim \|\| right_lim < match_first)
689	goto free_return;
690
691	/ Advance as rapidly as possible through the string, until we*
692	find a plausible place to start matching. This may be done
693	with varying efficiency, so there are various possibilities:
694	only the most common of them are specialized, in order to
695	save on code size. We use a switch statement for speed. /*
696	switch (match_kind)
697	{
698	case `8`:
699	/ No fastmap. /
700	break;
701
702	case `7`:
703	/ Fastmap with single-byte translation, match forward. /
704	while (BE (match_first < right_lim, `1`)
705	&& !fastmap[t[(unsigned char) string[match_first]]])
706	++match_first;
707	goto forward_match_found_start_or_reached_end;
708
709	case `6`:
710	/ Fastmap without translation, match forward. /
711	while (BE (match_first < right_lim, `1`)
712	&& !fastmap[(unsigned char) string[match_first]])
713	++match_first;
714
715	forward_match_found_start_or_reached_end:
716	if (BE (match_first == right_lim, `0`))
717	{
718	ch = match_first >= length
719	? `0` : (unsigned char) string[match_first];
720	if (!fastmap[t ? t[ch] : ch])
721	goto free_return;
722	}
723	break;
724
725	case `4`:
726	case `5`:
727	/ Fastmap without multi-byte translation, match backwards. /
728	while (match_first >= left_lim)
729	{
730	ch = match_first >= length
731	? `0` : (unsigned char) string[match_first];
732	if (fastmap[t ? t[ch] : ch])
733	break;
734	--match_first;
735	}
736	if (match_first < left_lim)
737	goto free_return;
738	break;
739
740	default:
741	/ In this case, we can't determine easily the current byte,*
742	since it might be a component byte of a multibyte
743	character. Then we use the constructed buffer instead. /*
744	for (;;)
745	{
746	/ If MATCH_FIRST is out of the valid range, reconstruct the*
747	buffers. /*
748	unsigned int offset = match_first - mctx.input.raw_mbs_idx;
749	if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, `0`))
750	{
751	err = re_string_reconstruct (&mctx.input, match_first,
752	eflags);
753	if (BE (err != REG_NOERROR, `0`))
754	goto free_return;
755
756	offset = match_first - mctx.input.raw_mbs_idx;
757	}
758	/ If MATCH_FIRST is out of the buffer, leave it as '\0'.*
759	Note that MATCH_FIRST must not be smaller than 0. /*
760	ch = (match_first >= length
761	? `0` : re_string_byte_at (&mctx.input, offset));
762	if (fastmap[ch])
763	break;
764	match_first += incr;
765	if (match_first < left_lim \|\| match_first > right_lim)
766	{
767	err = REG_NOMATCH;
768	goto free_return;
769	}
770	}
771	break;
772	}
773
774	/ Reconstruct the buffers so that the matcher can assume that*
775	the matching starts from the beginning of the buffer. /*
776	err = re_string_reconstruct (&mctx.input, match_first, eflags);
777	if (BE (err != REG_NOERROR, `0`))
778	goto free_return;
779
780	#ifdef RE_ENABLE_I18N
781	/ Don't consider this char as a possible match start if it part,*
782	yet isn't the head, of a multibyte character. /*
783	if (!sb && !re_string_first_byte (&mctx.input, `0`))
784	continue;
785	#endif
786
787	/ It seems to be appropriate one, then use the matcher. /
788	/ We assume that the matching starts from 0. /
789	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
790	match_last = check_matching (&mctx, fl_longest_match,
791	range >= `0` ? &match_first : NULL);
792	if (match_last != -`1`)
793	{
794	if (BE (match_last == -`2`, `0`))
795	{
796	err = REG_ESPACE;
797	goto free_return;
798	}
799	else
800	{
801	mctx.match_last = match_last;
802	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
803	{
804	re_dfastate_t *pstate = mctx.state_log[match_last];
805	mctx.last_node = check_halt_state_context (&mctx, pstate,
806	match_last);
807	}
808	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
809	\|\| dfa->nbackref)
810	{
811	err = prune_impossible_nodes (&mctx);
812	if (err == REG_NOERROR)
813	break;
814	if (BE (err != REG_NOMATCH, `0`))
815	goto free_return;
816	match_last = -`1`;
817	}
818	else
819	break; / We found a match. /
820	}
821	}
822
823	match_ctx_clean (&mctx);
824	}
825
826	#ifdef DEBUG
827	assert (match_last != -`1`);
828	assert (err == REG_NOERROR);
829	#endif
830
831	/ Set pmatch[] if we need. /
832	if (nmatch > `0`)
833	{
834	int reg_idx;
835
836	/ Initialize registers. /
837	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
838	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
839
840	/ Set the points where matching start/end. /
841	pmatch[`0`].rm_so = `0`;
842	pmatch[`0`].rm_eo = mctx.match_last;
843
844	if (!preg->no_sub && nmatch > `1`)
845	{
846	err = set_regs (preg, &mctx, nmatch, pmatch,
847	dfa->has_plural_match && dfa->nbackref > `0`);
848	if (BE (err != REG_NOERROR, `0`))
849	goto free_return;
850	}
851
852	/ At last, add the offset to each register, since we slid*
853	the buffers so that we could assume that the matching starts
854	from 0. /*
855	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
856	if (pmatch[reg_idx].rm_so != -`1`)
857	{
858	#ifdef RE_ENABLE_I18N
859	if (BE (mctx.input.offsets_needed != `0`, `0`))
860	{
861	pmatch[reg_idx].rm_so =
862	(pmatch[reg_idx].rm_so == mctx.input.valid_len
863	? mctx.input.valid_raw_len
864	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
865	pmatch[reg_idx].rm_eo =
866	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
867	? mctx.input.valid_raw_len
868	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
869	}
870	#else
871	assert (mctx.input.offsets_needed == `0`);
872	#endif
873	pmatch[reg_idx].rm_so += match_first;
874	pmatch[reg_idx].rm_eo += match_first;
875	}
876	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
877	{
878	pmatch[nmatch + reg_idx].rm_so = -`1`;
879	pmatch[nmatch + reg_idx].rm_eo = -`1`;
880	}
881
882	if (dfa->subexp_map)
883	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
884	if (dfa->subexp_map[reg_idx] != reg_idx)
885	{
886	pmatch[reg_idx + `1`].rm_so
887	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
888	pmatch[reg_idx + `1`].rm_eo
889	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
890	}
891	}
892
893	free_return:
894	re_free (mctx.state_log);
895	if (dfa->nbackref)
896	match_ctx_free (&mctx);
897	re_string_destruct (&mctx.input);
898	return err;
899	}
900
901	static reg_errcode_t
902	__attribute_warn_unused_result__
903	prune_impossible_nodes (re_match_context_t *mctx)
904	{
905	const re_dfa_t *const dfa = mctx->dfa;
906	int halt_node, match_last;
907	reg_errcode_t ret;
908	re_dfastate_t **sifted_states;
909	re_dfastate_t **lim_states = NULL;
910	re_sift_context_t sctx;
911	#ifdef DEBUG
912	assert (mctx->state_log != NULL);
913	#endif
914	match_last = mctx->match_last;
915	halt_node = mctx->last_node;
916
917	/ Avoid overflow. /
918	if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, `0`))
919	return REG_ESPACE;
920
921	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
922	if (BE (sifted_states == NULL, `0`))
923	{
924	ret = REG_ESPACE;
925	goto free_return;
926	}
927	if (dfa->nbackref)
928	{
929	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
930	if (BE (lim_states == NULL, `0`))
931	{
932	ret = REG_ESPACE;
933	goto free_return;
934	}
935	while (`1`)
936	{
937	memset (lim_states, `'\0'`,
938	sizeof (re_dfastate_t ) (match_last + `1`));
939	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
940	match_last);
941	ret = sift_states_backward (mctx, &sctx);
942	re_node_set_free (&sctx.limits);
943	if (BE (ret != REG_NOERROR, `0`))
944	goto free_return;
945	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
946	break;
947	do
948	{
949	--match_last;
950	if (match_last < `0`)
951	{
952	ret = REG_NOMATCH;
953	goto free_return;
954	}
955	} while (mctx->state_log[match_last] == NULL
956	\|\| !mctx->state_log[match_last]->halt);
957	halt_node = check_halt_state_context (mctx,
958	mctx->state_log[match_last],
959	match_last);
960	}
961	ret = merge_state_array (dfa, sifted_states, lim_states,
962	match_last + `1`);
963	re_free (lim_states);
964	lim_states = NULL;
965	if (BE (ret != REG_NOERROR, `0`))
966	goto free_return;
967	}
968	else
969	{
970	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
971	ret = sift_states_backward (mctx, &sctx);
972	re_node_set_free (&sctx.limits);
973	if (BE (ret != REG_NOERROR, `0`))
974	goto free_return;
975	if (sifted_states[`0`] == NULL)
976	{
977	ret = REG_NOMATCH;
978	goto free_return;
979	}
980	}
981	re_free (mctx->state_log);
982	mctx->state_log = sifted_states;
983	sifted_states = NULL;
984	mctx->last_node = halt_node;
985	mctx->match_last = match_last;
986	ret = REG_NOERROR;
987	free_return:
988	re_free (sifted_states);
989	re_free (lim_states);
990	return ret;
991	}
992
993	/ Acquire an initial state and return it.*
994	We must select appropriate initial state depending on the context,
995	since initial states may have constraints like "\<", "^", etc.. /*
996
997	static inline re_dfastate_t *
998	__attribute ((always_inline))
999	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1000	int idx)
1001	{
1002	const re_dfa_t *const dfa = mctx->dfa;
1003	if (dfa->init_state->has_constraint)
1004	{
1005	unsigned int context;
1006	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1007	if (IS_WORD_CONTEXT (context))
1008	return dfa->init_state_word;
1009	else if (IS_ORDINARY_CONTEXT (context))
1010	return dfa->init_state;
1011	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1012	return dfa->init_state_begbuf;
1013	else if (IS_NEWLINE_CONTEXT (context))
1014	return dfa->init_state_nl;
1015	else if (IS_BEGBUF_CONTEXT (context))
1016	{
1017	/ It is relatively rare case, then calculate on demand. /
1018	return re_acquire_state_context (err, dfa,
1019	dfa->init_state->entrance_nodes,
1020	context);
1021	}
1022	else
1023	/ Must not happen? /
1024	return dfa->init_state;
1025	}
1026	else
1027	return dfa->init_state;
1028	}
1029
1030	/ Check whether the regular expression match input string INPUT or not,*
1031	and return the index where the matching end, return -1 if not match,
1032	or return -2 in case of an error.
1033	FL_LONGEST_MATCH means we want the POSIX longest matching.
1034	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1035	next place where we may want to try matching.
1036	Note that the matcher assume that the maching starts from the current
1037	index of the buffer. /*
1038
1039	static int
1040	__attribute_warn_unused_result__
1041	check_matching (re_match_context_t mctx, int* fl_longest_match,
1042	int *p_match_first)
1043	{
1044	const re_dfa_t *const dfa = mctx->dfa;
1045	reg_errcode_t err;
1046	int match = `0`;
1047	int match_last = -`1`;
1048	int cur_str_idx = re_string_cur_idx (&mctx->input);
1049	re_dfastate_t *cur_state;
1050	int at_init_state = p_match_first != NULL;
1051	int next_start_idx = cur_str_idx;
1052
1053	err = REG_NOERROR;
1054	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1055	/ An initial state must not be NULL (invalid). /
1056	if (BE (cur_state == NULL, `0`))
1057	{
1058	assert (err == REG_ESPACE);
1059	return -`2`;
1060	}
1061
1062	if (mctx->state_log != NULL)
1063	{
1064	mctx->state_log[cur_str_idx] = cur_state;
1065
1066	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1067	later. E.g. Processing back references. /*
1068	if (BE (dfa->nbackref, `0`))
1069	{
1070	at_init_state = `0`;
1071	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1072	if (BE (err != REG_NOERROR, `0`))
1073	return err;
1074
1075	if (cur_state->has_backref)
1076	{
1077	err = transit_state_bkref (mctx, &cur_state->nodes);
1078	if (BE (err != REG_NOERROR, `0`))
1079	return err;
1080	}
1081	}
1082	}
1083
1084	/ If the RE accepts NULL string. /
1085	if (BE (cur_state->halt, `0`))
1086	{
1087	if (!cur_state->has_constraint
1088	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1089	{
1090	if (!fl_longest_match)
1091	return cur_str_idx;
1092	else
1093	{
1094	match_last = cur_str_idx;
1095	match = `1`;
1096	}
1097	}
1098	}
1099
1100	while (!re_string_eoi (&mctx->input))
1101	{
1102	re_dfastate_t *old_state = cur_state;
1103	int next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1104
1105	if ((BE (next_char_idx >= mctx->input.bufs_len, `0`)
1106	&& mctx->input.bufs_len < mctx->input.len)
1107	\|\| (BE (next_char_idx >= mctx->input.valid_len, `0`)
1108	&& mctx->input.valid_len < mctx->input.len))
1109	{
1110	err = extend_buffers (mctx, next_char_idx + `1`);
1111	if (BE (err != REG_NOERROR, `0`))
1112	{
1113	assert (err == REG_ESPACE);
1114	return -`2`;
1115	}
1116	}
1117
1118	cur_state = transit_state (&err, mctx, cur_state);
1119	if (mctx->state_log != NULL)
1120	cur_state = merge_state_with_log (&err, mctx, cur_state);
1121
1122	if (cur_state == NULL)
1123	{
1124	/ Reached the invalid state or an error. Try to recover a valid*
1125	state using the state log, if available and if we have not
1126	already found a valid (even if not the longest) match. /*
1127	if (BE (err != REG_NOERROR, `0`))
1128	return -`2`;
1129
1130	if (mctx->state_log == NULL
1131	\|\| (match && !fl_longest_match)
1132	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1133	break;
1134	}
1135
1136	if (BE (at_init_state, `0`))
1137	{
1138	if (old_state == cur_state)
1139	next_start_idx = next_char_idx;
1140	else
1141	at_init_state = `0`;
1142	}
1143
1144	if (cur_state->halt)
1145	{
1146	/ Reached a halt state.*
1147	Check the halt state can satisfy the current context. /*
1148	if (!cur_state->has_constraint
1149	\|\| check_halt_state_context (mctx, cur_state,
1150	re_string_cur_idx (&mctx->input)))
1151	{
1152	/ We found an appropriate halt state. /
1153	match_last = re_string_cur_idx (&mctx->input);
1154	match = `1`;
1155
1156	/ We found a match, do not modify match_first below. /
1157	p_match_first = NULL;
1158	if (!fl_longest_match)
1159	break;
1160	}
1161	}
1162	}
1163
1164	if (p_match_first)
1165	*p_match_first += next_start_idx;
1166
1167	return match_last;
1168	}
1169
1170	/ Check NODE match the current context. /
1171
1172	static int
1173	check_halt_node_context (const re_dfa_t dfa, int* node, unsigned int context)
1174	{
1175	re_token_type_t type = dfa->nodes[node].type;
1176	unsigned int constraint = dfa->nodes[node].constraint;
1177	if (type != END_OF_RE)
1178	return `0`;
1179	if (!constraint)
1180	return `1`;
1181	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1182	return `0`;
1183	return `1`;
1184	}
1185
1186	/ Check the halt state STATE match the current context.*
1187	Return 0 if not match, if the node, STATE has, is a halt node and
1188	match the context, return the node. /*
1189
1190	static int
1191	check_halt_state_context (const re_match_context_t *mctx,
1192	const re_dfastate_t state, int* idx)
1193	{
1194	int i;
1195	unsigned int context;
1196	#ifdef DEBUG
1197	assert (state->halt);
1198	#endif
1199	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1200	for (i = `0`; i < state->nodes.nelem; ++i)
1201	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1202	return state->nodes.elems[i];
1203	return `0`;
1204	}
1205
1206	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1207	corresponding to the DFA).
1208	Return the destination node, and update EPS_VIA_NODES, return -1 in case
1209	of errors. /*
1210
1211	static int
1212	proceed_next_node (const re_match_context_t mctx, int* nregs, regmatch_t *regs,
1213	int pidx, int* node, re_node_set *eps_via_nodes,
1214	struct re_fail_stack_t *fs)
1215	{
1216	const re_dfa_t *const dfa = mctx->dfa;
1217	int i, err;
1218	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1219	{
1220	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1221	re_node_set *edests = &dfa->edests[node];
1222	int dest_node;
1223	err = re_node_set_insert (eps_via_nodes, node);
1224	if (BE (err < `0`, `0`))
1225	return -`2`;
1226	/ Pick up a valid destination, or return -1 if none is found. /
1227	for (dest_node = -`1`, i = `0`; i < edests->nelem; ++i)
1228	{
1229	int candidate = edests->elems[i];
1230	if (!re_node_set_contains (cur_nodes, candidate))
1231	continue;
1232	if (dest_node == -`1`)
1233	dest_node = candidate;
1234
1235	else
1236	{
1237	/ In order to avoid infinite loop like "(a)", return the second*
1238	epsilon-transition if the first was already considered. /*
1239	if (re_node_set_contains (eps_via_nodes, dest_node))
1240	return candidate;
1241
1242	/ Otherwise, push the second epsilon-transition on the fail stack. /
1243	else if (fs != NULL
1244	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1245	eps_via_nodes))
1246	return -`2`;
1247
1248	/ We know we are going to exit. /
1249	break;
1250	}
1251	}
1252	return dest_node;
1253	}
1254	else
1255	{
1256	int naccepted = `0`;
1257	re_token_type_t type = dfa->nodes[node].type;
1258
1259	#ifdef RE_ENABLE_I18N
1260	if (dfa->nodes[node].accept_mb)
1261	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1262	else
1263	#endif /* RE_ENABLE_I18N */
1264	if (type == OP_BACK_REF)
1265	{
1266	int subexp_idx = dfa->nodes[node].opr.idx + `1`;
1267	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1268	if (fs != NULL)
1269	{
1270	if (regs[subexp_idx].rm_so == -`1` \|\| regs[subexp_idx].rm_eo == -`1`)
1271	return -`1`;
1272	else if (naccepted)
1273	{
1274	char buf = (char* *) re_string_get_buffer (&mctx->input);
1275	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1276	naccepted) != `0`)
1277	return -`1`;
1278	}
1279	}
1280
1281	if (naccepted == `0`)
1282	{
1283	int dest_node;
1284	err = re_node_set_insert (eps_via_nodes, node);
1285	if (BE (err < `0`, `0`))
1286	return -`2`;
1287	dest_node = dfa->edests[node].elems[`0`];
1288	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1289	dest_node))
1290	return dest_node;
1291	}
1292	}
1293
1294	if (naccepted != `0`
1295	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1296	{
1297	int dest_node = dfa->nexts[node];
1298	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1299	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1300	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1301	dest_node)))
1302	return -`1`;
1303	re_node_set_empty (eps_via_nodes);
1304	return dest_node;
1305	}
1306	}
1307	return -`1`;
1308	}
1309
1310	static reg_errcode_t
1311	__attribute_warn_unused_result__
1312	push_fail_stack (struct re_fail_stack_t fs, int* str_idx, int dest_node,
1313	int nregs, regmatch_t regs, re_node_set eps_via_nodes)
1314	{
1315	reg_errcode_t err;
1316	int num = fs->num++;
1317	if (fs->num == fs->alloc)
1318	{
1319	struct re_fail_stack_ent_t *new_array;
1320	new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1321	* fs->alloc * `2`));
1322	if (new_array == NULL)
1323	return REG_ESPACE;
1324	fs->alloc *= `2`;
1325	fs->stack = new_array;
1326	}
1327	fs->stack[num].idx = str_idx;
1328	fs->stack[num].node = dest_node;
1329	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1330	if (fs->stack[num].regs == NULL)
1331	return REG_ESPACE;
1332	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1333	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1334	return err;
1335	}
1336
1337	static int
1338	pop_fail_stack (struct re_fail_stack_t fs, int* pidx, int* nregs,
1339	regmatch_t regs, re_node_set eps_via_nodes)
1340	{
1341	int num = --fs->num;
1342	assert (num >= `0`);
1343	*pidx = fs->stack[num].idx;
1344	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1345	re_node_set_free (eps_via_nodes);
1346	re_free (fs->stack[num].regs);
1347	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1348	return fs->stack[num].node;
1349	}
1350
1351	/ Set the positions where the subexpressions are starts/ends to registers*
1352	PMATCH.
1353	Note: We assume that pmatch[0] is already set, and
1354	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1355
1356	static reg_errcode_t
1357	__attribute_warn_unused_result__
1358	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1359	regmatch_t pmatch, int* fl_backtrack)
1360	{
1361	const re_dfa_t dfa = (const* re_dfa_t *) preg->buffer;
1362	int idx, cur_node;
1363	re_node_set eps_via_nodes;
1364	struct re_fail_stack_t *fs;
1365	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1366	regmatch_t *prev_idx_match;
1367	int prev_idx_match_malloced = `0`;
1368
1369	#ifdef DEBUG
1370	assert (nmatch > `1`);
1371	assert (mctx->state_log != NULL);
1372	#endif
1373	if (fl_backtrack)
1374	{
1375	fs = &fs_body;
1376	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1377	if (fs->stack == NULL)
1378	return REG_ESPACE;
1379	}
1380	else
1381	fs = NULL;
1382
1383	cur_node = dfa->init_node;
1384	re_node_set_init_empty (&eps_via_nodes);
1385
1386	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1387	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1388	else
1389	{
1390	prev_idx_match = re_malloc (regmatch_t, nmatch);
1391	if (prev_idx_match == NULL)
1392	{
1393	free_fail_stack_return (fs);
1394	return REG_ESPACE;
1395	}
1396	prev_idx_match_malloced = `1`;
1397	}
1398	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1399
1400	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1401	{
1402	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1403
1404	if (idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1405	{
1406	int reg_idx;
1407	if (fs)
1408	{
1409	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1410	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1411	break;
1412	if (reg_idx == nmatch)
1413	{
1414	re_node_set_free (&eps_via_nodes);
1415	if (prev_idx_match_malloced)
1416	re_free (prev_idx_match);
1417	return free_fail_stack_return (fs);
1418	}
1419	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1420	&eps_via_nodes);
1421	}
1422	else
1423	{
1424	re_node_set_free (&eps_via_nodes);
1425	if (prev_idx_match_malloced)
1426	re_free (prev_idx_match);
1427	return REG_NOERROR;
1428	}
1429	}
1430
1431	/ Proceed to next node. /
1432	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1433	&eps_via_nodes, fs);
1434
1435	if (BE (cur_node < `0`, `0`))
1436	{
1437	if (BE (cur_node == -`2`, `0`))
1438	{
1439	re_node_set_free (&eps_via_nodes);
1440	if (prev_idx_match_malloced)
1441	re_free (prev_idx_match);
1442	free_fail_stack_return (fs);
1443	return REG_ESPACE;
1444	}
1445	if (fs)
1446	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1447	&eps_via_nodes);
1448	else
1449	{
1450	re_node_set_free (&eps_via_nodes);
1451	if (prev_idx_match_malloced)
1452	re_free (prev_idx_match);
1453	return REG_NOMATCH;
1454	}
1455	}
1456	}
1457	re_node_set_free (&eps_via_nodes);
1458	if (prev_idx_match_malloced)
1459	re_free (prev_idx_match);
1460	return free_fail_stack_return (fs);
1461	}
1462
1463	static reg_errcode_t
1464	free_fail_stack_return (struct re_fail_stack_t *fs)
1465	{
1466	if (fs)
1467	{
1468	int fs_idx;
1469	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1470	{
1471	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1472	re_free (fs->stack[fs_idx].regs);
1473	}
1474	re_free (fs->stack);
1475	}
1476	return REG_NOERROR;
1477	}
1478
1479	static void
1480	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1481	regmatch_t prev_idx_match, int* cur_node, int cur_idx, int nmatch)
1482	{
1483	int type = dfa->nodes[cur_node].type;
1484	if (type == OP_OPEN_SUBEXP)
1485	{
1486	int reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1487
1488	/ We are at the first node of this sub expression. /
1489	if (reg_num < nmatch)
1490	{
1491	pmatch[reg_num].rm_so = cur_idx;
1492	pmatch[reg_num].rm_eo = -`1`;
1493	}
1494	}
1495	else if (type == OP_CLOSE_SUBEXP)
1496	{
1497	int reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1498	if (reg_num < nmatch)
1499	{
1500	/ We are at the last node of this sub expression. /
1501	if (pmatch[reg_num].rm_so < cur_idx)
1502	{
1503	pmatch[reg_num].rm_eo = cur_idx;
1504	/ This is a non-empty match or we are not inside an optional*
1505	subexpression. Accept this right away. /*
1506	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1507	}
1508	else
1509	{
1510	if (dfa->nodes[cur_node].opt_subexp
1511	&& prev_idx_match[reg_num].rm_so != -`1`)
1512	/ We transited through an empty match for an optional*
1513	subexpression, like (a?), and this is not the subexp's*
1514	first match. Copy back the old content of the registers
1515	so that matches of an inner subexpression are undone as
1516	well, like in ((a?)). /
1517	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1518	else
1519	/ We completed a subexpression, but it may be part of*
1520	an optional one, so do not update PREV_IDX_MATCH. /*
1521	pmatch[reg_num].rm_eo = cur_idx;
1522	}
1523	}
1524	}
1525	}
1526
1527	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1528	and sift the nodes in each states according to the following rules.
1529	Updated state_log will be wrote to STATE_LOG.
1530
1531	Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1532	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1533	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1534	the LAST_NODE, we throw away the node `a'.
1535	2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1536	string `s' and transit to `b':
1537	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1538	away the node `a'.
1539	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1540	thrown away, we throw away the node `a'.
1541	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1542	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1543	node `a'.
1544	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1545	we throw away the node `a'. /*
1546
1547	#define STATE_NODE_CONTAINS(state,node) \
1548	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1549
1550	static reg_errcode_t
1551	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1552	{
1553	reg_errcode_t err;
1554	int null_cnt = `0`;
1555	int str_idx = sctx->last_str_idx;
1556	re_node_set cur_dest;
1557
1558	#ifdef DEBUG
1559	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1560	#endif
1561
1562	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1563	transit to the last_node and the last_node itself. /*
1564	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1565	if (BE (err != REG_NOERROR, `0`))
1566	return err;
1567	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1568	if (BE (err != REG_NOERROR, `0`))
1569	goto free_return;
1570
1571	/ Then check each states in the state_log. /
1572	while (str_idx > `0`)
1573	{
1574	/ Update counters. /
1575	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1576	if (null_cnt > mctx->max_mb_elem_len)
1577	{
1578	memset (sctx->sifted_states, `'\0'`,
1579	sizeof (re_dfastate_t ) str_idx);
1580	re_node_set_free (&cur_dest);
1581	return REG_NOERROR;
1582	}
1583	re_node_set_empty (&cur_dest);
1584	--str_idx;
1585
1586	if (mctx->state_log[str_idx])
1587	{
1588	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1589	if (BE (err != REG_NOERROR, `0`))
1590	goto free_return;
1591	}
1592
1593	/ Add all the nodes which satisfy the following conditions:*
1594	- It can epsilon transit to a node in CUR_DEST.
1595	- It is in CUR_SRC.
1596	And update state_log. /*
1597	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1598	if (BE (err != REG_NOERROR, `0`))
1599	goto free_return;
1600	}
1601	err = REG_NOERROR;
1602	free_return:
1603	re_node_set_free (&cur_dest);
1604	return err;
1605	}
1606
1607	static reg_errcode_t
1608	__attribute_warn_unused_result__
1609	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1610	int str_idx, re_node_set *cur_dest)
1611	{
1612	const re_dfa_t *const dfa = mctx->dfa;
1613	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1614	int i;
1615
1616	/ Then build the next sifted state.*
1617	We build the next sifted state on `cur_dest', and update
1618	`sifted_states[str_idx]' with `cur_dest'.
1619	Note:
1620	`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1621	`cur_src' points the node_set of the old `state_log[str_idx]'
1622	(with the epsilon nodes pre-filtered out). /*
1623	for (i = `0`; i < cur_src->nelem; i++)
1624	{
1625	int prev_node = cur_src->elems[i];
1626	int naccepted = `0`;
1627	int ret;
1628
1629	#ifdef DEBUG
1630	re_token_type_t type = dfa->nodes[prev_node].type;
1631	assert (!IS_EPSILON_NODE (type));
1632	#endif
1633	#ifdef RE_ENABLE_I18N
1634	/ If the node may accept `multi byte'. /
1635	if (dfa->nodes[prev_node].accept_mb)
1636	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1637	str_idx, sctx->last_str_idx);
1638	#endif /* RE_ENABLE_I18N */
1639
1640	/ We don't check backreferences here.*
1641	See update_cur_sifted_state(). /*
1642	if (!naccepted
1643	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1644	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1645	dfa->nexts[prev_node]))
1646	naccepted = `1`;
1647
1648	if (naccepted == `0`)
1649	continue;
1650
1651	if (sctx->limits.nelem)
1652	{
1653	int to_idx = str_idx + naccepted;
1654	if (check_dst_limits (mctx, &sctx->limits,
1655	dfa->nexts[prev_node], to_idx,
1656	prev_node, str_idx))
1657	continue;
1658	}
1659	ret = re_node_set_insert (cur_dest, prev_node);
1660	if (BE (ret == -`1`, `0`))
1661	return REG_ESPACE;
1662	}
1663
1664	return REG_NOERROR;
1665	}
1666
1667	/ Helper functions. /
1668
1669	static reg_errcode_t
1670	clean_state_log_if_needed (re_match_context_t mctx, int* next_state_log_idx)
1671	{
1672	int top = mctx->state_log_top;
1673
1674	if ((next_state_log_idx >= mctx->input.bufs_len
1675	&& mctx->input.bufs_len < mctx->input.len)
1676	\|\| (next_state_log_idx >= mctx->input.valid_len
1677	&& mctx->input.valid_len < mctx->input.len))
1678	{
1679	reg_errcode_t err;
1680	err = extend_buffers (mctx, next_state_log_idx + `1`);
1681	if (BE (err != REG_NOERROR, `0`))
1682	return err;
1683	}
1684
1685	if (top < next_state_log_idx)
1686	{
1687	memset (mctx->state_log + top + `1`, `'\0'`,
1688	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1689	mctx->state_log_top = next_state_log_idx;
1690	}
1691	return REG_NOERROR;
1692	}
1693
1694	static reg_errcode_t
1695	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1696	re_dfastate_t *src, int* num)
1697	{
1698	int st_idx;
1699	reg_errcode_t err;
1700	for (st_idx = `0`; st_idx < num; ++st_idx)
1701	{
1702	if (dst[st_idx] == NULL)
1703	dst[st_idx] = src[st_idx];
1704	else if (src[st_idx] != NULL)
1705	{
1706	re_node_set merged_set;
1707	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1708	&src[st_idx]->nodes);
1709	if (BE (err != REG_NOERROR, `0`))
1710	return err;
1711	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1712	re_node_set_free (&merged_set);
1713	if (BE (err != REG_NOERROR, `0`))
1714	return err;
1715	}
1716	}
1717	return REG_NOERROR;
1718	}
1719
1720	static reg_errcode_t
1721	update_cur_sifted_state (const re_match_context_t *mctx,
1722	re_sift_context_t sctx, int* str_idx,
1723	re_node_set *dest_nodes)
1724	{
1725	const re_dfa_t *const dfa = mctx->dfa;
1726	reg_errcode_t err = REG_NOERROR;
1727	const re_node_set *candidates;
1728	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1729	: &mctx->state_log[str_idx]->nodes);
1730
1731	if (dest_nodes->nelem == `0`)
1732	sctx->sifted_states[str_idx] = NULL;
1733	else
1734	{
1735	if (candidates)
1736	{
1737	/ At first, add the nodes which can epsilon transit to a node in*
1738	DEST_NODE. /*
1739	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1740	if (BE (err != REG_NOERROR, `0`))
1741	return err;
1742
1743	/ Then, check the limitations in the current sift_context. /
1744	if (sctx->limits.nelem)
1745	{
1746	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1747	mctx->bkref_ents, str_idx);
1748	if (BE (err != REG_NOERROR, `0`))
1749	return err;
1750	}
1751	}
1752
1753	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1754	if (BE (err != REG_NOERROR, `0`))
1755	return err;
1756	}
1757
1758	if (candidates && mctx->state_log[str_idx]->has_backref)
1759	{
1760	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1761	if (BE (err != REG_NOERROR, `0`))
1762	return err;
1763	}
1764	return REG_NOERROR;
1765	}
1766
1767	static reg_errcode_t
1768	__attribute_warn_unused_result__
1769	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1770	const re_node_set *candidates)
1771	{
1772	reg_errcode_t err = REG_NOERROR;
1773	int i;
1774
1775	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1776	if (BE (err != REG_NOERROR, `0`))
1777	return err;
1778
1779	if (!state->inveclosure.alloc)
1780	{
1781	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1782	if (BE (err != REG_NOERROR, `0`))
1783	return REG_ESPACE;
1784	for (i = `0`; i < dest_nodes->nelem; i++)
1785	{
1786	err = re_node_set_merge (&state->inveclosure,
1787	dfa->inveclosures + dest_nodes->elems[i]);
1788	if (BE (err != REG_NOERROR, `0`))
1789	return REG_ESPACE;
1790	}
1791	}
1792	return re_node_set_add_intersect (dest_nodes, candidates,
1793	&state->inveclosure);
1794	}
1795
1796	static reg_errcode_t
1797	sub_epsilon_src_nodes (const re_dfa_t dfa, int* node, re_node_set *dest_nodes,
1798	const re_node_set *candidates)
1799	{
1800	int ecl_idx;
1801	reg_errcode_t err;
1802	re_node_set *inv_eclosure = dfa->inveclosures + node;
1803	re_node_set except_nodes;
1804	re_node_set_init_empty (&except_nodes);
1805	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1806	{
1807	int cur_node = inv_eclosure->elems[ecl_idx];
1808	if (cur_node == node)
1809	continue;
1810	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1811	{
1812	int edst1 = dfa->edests[cur_node].elems[`0`];
1813	int edst2 = ((dfa->edests[cur_node].nelem > `1`)
1814	? dfa->edests[cur_node].elems[`1`] : -`1`);
1815	if ((!re_node_set_contains (inv_eclosure, edst1)
1816	&& re_node_set_contains (dest_nodes, edst1))
1817	\|\| (edst2 > `0`
1818	&& !re_node_set_contains (inv_eclosure, edst2)
1819	&& re_node_set_contains (dest_nodes, edst2)))
1820	{
1821	err = re_node_set_add_intersect (&except_nodes, candidates,
1822	dfa->inveclosures + cur_node);
1823	if (BE (err != REG_NOERROR, `0`))
1824	{
1825	re_node_set_free (&except_nodes);
1826	return err;
1827	}
1828	}
1829	}
1830	}
1831	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1832	{
1833	int cur_node = inv_eclosure->elems[ecl_idx];
1834	if (!re_node_set_contains (&except_nodes, cur_node))
1835	{
1836	int idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1837	re_node_set_remove_at (dest_nodes, idx);
1838	}
1839	}
1840	re_node_set_free (&except_nodes);
1841	return REG_NOERROR;
1842	}
1843
1844	static int
1845	check_dst_limits (const re_match_context_t mctx, re_node_set limits,
1846	int dst_node, int dst_idx, int src_node, int src_idx)
1847	{
1848	const re_dfa_t *const dfa = mctx->dfa;
1849	int lim_idx, src_pos, dst_pos;
1850
1851	int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1852	int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1853	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1854	{
1855	int subexp_idx;
1856	struct re_backref_cache_entry *ent;
1857	ent = mctx->bkref_ents + limits->elems[lim_idx];
1858	subexp_idx = dfa->nodes[ent->node].opr.idx;
1859
1860	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1861	subexp_idx, dst_node, dst_idx,
1862	dst_bkref_idx);
1863	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1864	subexp_idx, src_node, src_idx,
1865	src_bkref_idx);
1866
1867	/ In case of:*
1868	<src> <dst> ( <subexp> )
1869	( <subexp> ) <src> <dst>
1870	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1871	if (src_pos == dst_pos)
1872	continue; / This is unrelated limitation. /
1873	else
1874	return `1`;
1875	}
1876	return `0`;
1877	}
1878
1879	static int
1880	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1881	int subexp_idx, int from_node, int bkref_idx)
1882	{
1883	const re_dfa_t *const dfa = mctx->dfa;
1884	const re_node_set *eclosures = dfa->eclosures + from_node;
1885	int node_idx;
1886
1887	/ Else, we are on the boundary: examine the nodes on the epsilon*
1888	closure. /*
1889	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1890	{
1891	int node = eclosures->elems[node_idx];
1892	switch (dfa->nodes[node].type)
1893	{
1894	case OP_BACK_REF:
1895	if (bkref_idx != -`1`)
1896	{
1897	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1898	do
1899	{
1900	int dst, cpos;
1901
1902	if (ent->node != node)
1903	continue;
1904
1905	if (subexp_idx < BITSET_WORD_BITS
1906	&& !(ent->eps_reachable_subexps_map
1907	& ((bitset_word_t) `1` << subexp_idx)))
1908	continue;
1909
1910	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1911	OP_CLOSE_SUBEXP cases below. But, if the
1912	destination node is the same node as the source
1913	node, don't recurse because it would cause an
1914	infinite loop: a regex that exhibits this behavior
1915	is ()\1\1* /
1916	dst = dfa->edests[node].elems[`0`];
1917	if (dst == from_node)
1918	{
1919	if (boundaries & `1`)
1920	return -`1`;
1921	else / if (boundaries & 2) /
1922	return `0`;
1923	}
1924
1925	cpos =
1926	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1927	dst, bkref_idx);
1928	if (cpos == -`1` / && (boundaries & 1) /)
1929	return -`1`;
1930	if (cpos == `0` && (boundaries & `2`))
1931	return `0`;
1932
1933	if (subexp_idx < BITSET_WORD_BITS)
1934	ent->eps_reachable_subexps_map
1935	&= ~((bitset_word_t) `1` << subexp_idx);
1936	}
1937	while (ent++->more);
1938	}
1939	break;
1940
1941	case OP_OPEN_SUBEXP:
1942	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1943	return -`1`;
1944	break;
1945
1946	case OP_CLOSE_SUBEXP:
1947	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1948	return `0`;
1949	break;
1950
1951	default:
1952	break;
1953	}
1954	}
1955
1956	return (boundaries & `2`) ? `1` : `0`;
1957	}
1958
1959	static int
1960	check_dst_limits_calc_pos (const re_match_context_t mctx, int* limit,
1961	int subexp_idx, int from_node, int str_idx,
1962	int bkref_idx)
1963	{
1964	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1965	int boundaries;
1966
1967	/ If we are outside the range of the subexpression, return -1 or 1. /
1968	if (str_idx < lim->subexp_from)
1969	return -`1`;
1970
1971	if (lim->subexp_to < str_idx)
1972	return `1`;
1973
1974	/ If we are within the subexpression, return 0. /
1975	boundaries = (str_idx == lim->subexp_from);
1976	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1977	if (boundaries == `0`)
1978	return `0`;
1979
1980	/ Else, examine epsilon closure. /
1981	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1982	from_node, bkref_idx);
1983	}
1984
1985	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
1986	which are against limitations from DEST_NODES. /*
1987
1988	static reg_errcode_t
1989	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
1990	const re_node_set candidates, re_node_set limits,
1991	struct re_backref_cache_entry bkref_ents, int* str_idx)
1992	{
1993	reg_errcode_t err;
1994	int node_idx, lim_idx;
1995
1996	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1997	{
1998	int subexp_idx;
1999	struct re_backref_cache_entry *ent;
2000	ent = bkref_ents + limits->elems[lim_idx];
2001
2002	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2003	continue; / This is unrelated limitation. /
2004
2005	subexp_idx = dfa->nodes[ent->node].opr.idx;
2006	if (ent->subexp_to == str_idx)
2007	{
2008	int ops_node = -`1`;
2009	int cls_node = -`1`;
2010	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2011	{
2012	int node = dest_nodes->elems[node_idx];
2013	re_token_type_t type = dfa->nodes[node].type;
2014	if (type == OP_OPEN_SUBEXP
2015	&& subexp_idx == dfa->nodes[node].opr.idx)
2016	ops_node = node;
2017	else if (type == OP_CLOSE_SUBEXP
2018	&& subexp_idx == dfa->nodes[node].opr.idx)
2019	cls_node = node;
2020	}
2021
2022	/ Check the limitation of the open subexpression. /
2023	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2024	if (ops_node >= `0`)
2025	{
2026	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2027	candidates);
2028	if (BE (err != REG_NOERROR, `0`))
2029	return err;
2030	}
2031
2032	/ Check the limitation of the close subexpression. /
2033	if (cls_node >= `0`)
2034	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2035	{
2036	int node = dest_nodes->elems[node_idx];
2037	if (!re_node_set_contains (dfa->inveclosures + node,
2038	cls_node)
2039	&& !re_node_set_contains (dfa->eclosures + node,
2040	cls_node))
2041	{
2042	/ It is against this limitation.*
2043	Remove it form the current sifted state. /*
2044	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2045	candidates);
2046	if (BE (err != REG_NOERROR, `0`))
2047	return err;
2048	--node_idx;
2049	}
2050	}
2051	}
2052	else / (ent->subexp_to != str_idx) /
2053	{
2054	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2055	{
2056	int node = dest_nodes->elems[node_idx];
2057	re_token_type_t type = dfa->nodes[node].type;
2058	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2059	{
2060	if (subexp_idx != dfa->nodes[node].opr.idx)
2061	continue;
2062	/ It is against this limitation.*
2063	Remove it form the current sifted state. /*
2064	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2065	candidates);
2066	if (BE (err != REG_NOERROR, `0`))
2067	return err;
2068	}
2069	}
2070	}
2071	}
2072	return REG_NOERROR;
2073	}
2074
2075	static reg_errcode_t
2076	__attribute_warn_unused_result__
2077	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2078	int str_idx, const re_node_set *candidates)
2079	{
2080	const re_dfa_t *const dfa = mctx->dfa;
2081	reg_errcode_t err;
2082	int node_idx, node;
2083	re_sift_context_t local_sctx;
2084	int first_idx = search_cur_bkref_entry (mctx, str_idx);
2085
2086	if (first_idx == -`1`)
2087	return REG_NOERROR;
2088
2089	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2090
2091	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2092	{
2093	int enabled_idx;
2094	re_token_type_t type;
2095	struct re_backref_cache_entry *entry;
2096	node = candidates->elems[node_idx];
2097	type = dfa->nodes[node].type;
2098	/ Avoid infinite loop for the REs like "()\1+". /
2099	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2100	continue;
2101	if (type != OP_BACK_REF)
2102	continue;
2103
2104	entry = mctx->bkref_ents + first_idx;
2105	enabled_idx = first_idx;
2106	do
2107	{
2108	int subexp_len;
2109	int to_idx;
2110	int dst_node;
2111	int ret;
2112	re_dfastate_t *cur_state;
2113
2114	if (entry->node != node)
2115	continue;
2116	subexp_len = entry->subexp_to - entry->subexp_from;
2117	to_idx = str_idx + subexp_len;
2118	dst_node = (subexp_len ? dfa->nexts[node]
2119	: dfa->edests[node].elems[`0`]);
2120
2121	if (to_idx > sctx->last_str_idx
2122	\|\| sctx->sifted_states[to_idx] == NULL
2123	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2124	\|\| check_dst_limits (mctx, &sctx->limits, node,
2125	str_idx, dst_node, to_idx))
2126	continue;
2127
2128	if (local_sctx.sifted_states == NULL)
2129	{
2130	local_sctx = *sctx;
2131	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2132	if (BE (err != REG_NOERROR, `0`))
2133	goto free_return;
2134	}
2135	local_sctx.last_node = node;
2136	local_sctx.last_str_idx = str_idx;
2137	ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2138	if (BE (ret < `0`, `0`))
2139	{
2140	err = REG_ESPACE;
2141	goto free_return;
2142	}
2143	cur_state = local_sctx.sifted_states[str_idx];
2144	err = sift_states_backward (mctx, &local_sctx);
2145	if (BE (err != REG_NOERROR, `0`))
2146	goto free_return;
2147	if (sctx->limited_states != NULL)
2148	{
2149	err = merge_state_array (dfa, sctx->limited_states,
2150	local_sctx.sifted_states,
2151	str_idx + `1`);
2152	if (BE (err != REG_NOERROR, `0`))
2153	goto free_return;
2154	}
2155	local_sctx.sifted_states[str_idx] = cur_state;
2156	re_node_set_remove (&local_sctx.limits, enabled_idx);
2157
2158	/ mctx->bkref_ents may have changed, reload the pointer. /
2159	entry = mctx->bkref_ents + enabled_idx;
2160	}
2161	while (enabled_idx++, entry++->more);
2162	}
2163	err = REG_NOERROR;
2164	free_return:
2165	if (local_sctx.sifted_states != NULL)
2166	{
2167	re_node_set_free (&local_sctx.limits);
2168	}
2169
2170	return err;
2171	}
2172
2173
2174	#ifdef RE_ENABLE_I18N
2175	static int
2176	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2177	int node_idx, int str_idx, int max_str_idx)
2178	{
2179	const re_dfa_t *const dfa = mctx->dfa;
2180	int naccepted;
2181	/ Check the node can accept `multi byte'. /
2182	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2183	if (naccepted > `0` && str_idx + naccepted <= max_str_idx &&
2184	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2185	dfa->nexts[node_idx]))
2186	/ The node can't accept the `multi byte', or the*
2187	destination was already thrown away, then the node
2188	could't accept the current input `multi byte'. /*
2189	naccepted = `0`;
2190	/ Otherwise, it is sure that the node could accept*
2191	`naccepted' bytes input. /*
2192	return naccepted;
2193	}
2194	#endif /* RE_ENABLE_I18N */
2195
2196
2197	/ Functions for state transition. /
2198
2199	/ Return the next state to which the current state STATE will transit by*
2200	accepting the current input byte, and update STATE_LOG if necessary.
2201	If STATE can accept a multibyte char/collating element/back reference
2202	update the destination of STATE_LOG. /*
2203
2204	static re_dfastate_t *
2205	__attribute_warn_unused_result__
2206	transit_state (reg_errcode_t err, re_match_context_t mctx,
2207	re_dfastate_t *state)
2208	{
2209	re_dfastate_t **trtable;
2210	unsigned char ch;
2211
2212	#ifdef RE_ENABLE_I18N
2213	/ If the current state can accept multibyte. /
2214	if (BE (state->accept_mb, `0`))
2215	{
2216	*err = transit_state_mb (mctx, state);
2217	if (BE (*err != REG_NOERROR, `0`))
2218	return NULL;
2219	}
2220	#endif /* RE_ENABLE_I18N */
2221
2222	/ Then decide the next state with the single byte. /
2223	#if 0
2224	if (`0`)
2225	/ don't use transition table /
2226	return transit_state_sb (err, mctx, state);
2227	#endif
2228
2229	/ Use transition table /
2230	ch = re_string_fetch_byte (&mctx->input);
2231	for (;;)
2232	{
2233	trtable = state->trtable;
2234	if (BE (trtable != NULL, `1`))
2235	return trtable[ch];
2236
2237	trtable = state->word_trtable;
2238	if (BE (trtable != NULL, `1`))
2239	{
2240	unsigned int context;
2241	context
2242	= re_string_context_at (&mctx->input,
2243	re_string_cur_idx (&mctx->input) - `1`,
2244	mctx->eflags);
2245	if (IS_WORD_CONTEXT (context))
2246	return trtable[ch + SBC_MAX];
2247	else
2248	return trtable[ch];
2249	}
2250
2251	if (!build_trtable (mctx->dfa, state))
2252	{
2253	*err = REG_ESPACE;
2254	return NULL;
2255	}
2256
2257	/ Retry, we now have a transition table. /
2258	}
2259	}
2260
2261	/ Update the state_log if we need /
2262	re_dfastate_t *
2263	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2264	re_dfastate_t *next_state)
2265	{
2266	const re_dfa_t *const dfa = mctx->dfa;
2267	int cur_idx = re_string_cur_idx (&mctx->input);
2268
2269	if (cur_idx > mctx->state_log_top)
2270	{
2271	mctx->state_log[cur_idx] = next_state;
2272	mctx->state_log_top = cur_idx;
2273	}
2274	else if (mctx->state_log[cur_idx] == `0`)
2275	{
2276	mctx->state_log[cur_idx] = next_state;
2277	}
2278	else
2279	{
2280	re_dfastate_t *pstate;
2281	unsigned int context;
2282	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2283	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2284	the destination of a multibyte char/collating element/
2285	back reference. Then the next state is the union set of
2286	these destinations and the results of the transition table. /*
2287	pstate = mctx->state_log[cur_idx];
2288	log_nodes = pstate->entrance_nodes;
2289	if (next_state != NULL)
2290	{
2291	table_nodes = next_state->entrance_nodes;
2292	*err = re_node_set_init_union (&next_nodes, table_nodes,
2293	log_nodes);
2294	if (BE (*err != REG_NOERROR, `0`))
2295	return NULL;
2296	}
2297	else
2298	next_nodes = *log_nodes;
2299	/ Note: We already add the nodes of the initial state,*
2300	then we don't need to add them here. /*
2301
2302	context = re_string_context_at (&mctx->input,
2303	re_string_cur_idx (&mctx->input) - `1`,
2304	mctx->eflags);
2305	next_state = mctx->state_log[cur_idx]
2306	= re_acquire_state_context (err, dfa, &next_nodes, context);
2307	/ We don't need to check errors here, since the return value of*
2308	this function is next_state and ERR is already set. /*
2309
2310	if (table_nodes != NULL)
2311	re_node_set_free (&next_nodes);
2312	}
2313
2314	if (BE (dfa->nbackref, `0`) && next_state != NULL)
2315	{
2316	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2317	later. We must check them here, since the back references in the
2318	next state might use them. /*
2319	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2320	cur_idx);
2321	if (BE (*err != REG_NOERROR, `0`))
2322	return NULL;
2323
2324	/ If the next state has back references. /
2325	if (next_state->has_backref)
2326	{
2327	*err = transit_state_bkref (mctx, &next_state->nodes);
2328	if (BE (*err != REG_NOERROR, `0`))
2329	return NULL;
2330	next_state = mctx->state_log[cur_idx];
2331	}
2332	}
2333
2334	return next_state;
2335	}
2336
2337	/ Skip bytes in the input that correspond to part of a*
2338	multi-byte match, then look in the log for a state
2339	from which to restart matching. /*
2340	re_dfastate_t *
2341	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2342	{
2343	re_dfastate_t *cur_state;
2344	do
2345	{
2346	int max = mctx->state_log_top;
2347	int cur_str_idx = re_string_cur_idx (&mctx->input);
2348
2349	do
2350	{
2351	if (++cur_str_idx > max)
2352	return NULL;
2353	re_string_skip_bytes (&mctx->input, `1`);
2354	}
2355	while (mctx->state_log[cur_str_idx] == NULL);
2356
2357	cur_state = merge_state_with_log (err, mctx, NULL);
2358	}
2359	while (*err == REG_NOERROR && cur_state == NULL);
2360	return cur_state;
2361	}
2362
2363	/ Helper functions for transit_state. /
2364
2365	/ From the node set CUR_NODES, pick up the nodes whose types are*
2366	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2367	expression. And register them to use them later for evaluating the
2368	corresponding back references. /*
2369
2370	static reg_errcode_t
2371	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2372	int str_idx)
2373	{
2374	const re_dfa_t *const dfa = mctx->dfa;
2375	int node_idx;
2376	reg_errcode_t err;
2377
2378	/ TODO: This isn't efficient.*
2379	Because there might be more than one nodes whose types are
2380	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2381	nodes.
2382	E.g. RE: (a){2} /*
2383	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2384	{
2385	int node = cur_nodes->elems[node_idx];
2386	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2387	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2388	&& (dfa->used_bkref_map
2389	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2390	{
2391	err = match_ctx_add_subtop (mctx, node, str_idx);
2392	if (BE (err != REG_NOERROR, `0`))
2393	return err;
2394	}
2395	}
2396	return REG_NOERROR;
2397	}
2398
2399	#if 0
2400	/ Return the next state to which the current state STATE will transit by*
2401	accepting the current input byte. /*
2402
2403	static re_dfastate_t *
2404	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2405	re_dfastate_t *state)
2406	{
2407	const re_dfa_t *const dfa = mctx->dfa;
2408	re_node_set next_nodes;
2409	re_dfastate_t *next_state;
2410	int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2411	unsigned int context;
2412
2413	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2414	if (BE (*err != REG_NOERROR, `0`))
2415	return NULL;
2416	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2417	{
2418	int cur_node = state->nodes.elems[node_cnt];
2419	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2420	{
2421	*err = re_node_set_merge (&next_nodes,
2422	dfa->eclosures + dfa->nexts[cur_node]);
2423	if (BE (*err != REG_NOERROR, `0`))
2424	{
2425	re_node_set_free (&next_nodes);
2426	return NULL;
2427	}
2428	}
2429	}
2430	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2431	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2432	/ We don't need to check errors here, since the return value of*
2433	this function is next_state and ERR is already set. /*
2434
2435	re_node_set_free (&next_nodes);
2436	re_string_skip_bytes (&mctx->input, `1`);
2437	return next_state;
2438	}
2439	#endif
2440
2441	#ifdef RE_ENABLE_I18N
2442	static reg_errcode_t
2443	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2444	{
2445	const re_dfa_t *const dfa = mctx->dfa;
2446	reg_errcode_t err;
2447	int i;
2448
2449	for (i = `0`; i < pstate->nodes.nelem; ++i)
2450	{
2451	re_node_set dest_nodes, *new_nodes;
2452	int cur_node_idx = pstate->nodes.elems[i];
2453	int naccepted, dest_idx;
2454	unsigned int context;
2455	re_dfastate_t *dest_state;
2456
2457	if (!dfa->nodes[cur_node_idx].accept_mb)
2458	continue;
2459
2460	if (dfa->nodes[cur_node_idx].constraint)
2461	{
2462	context = re_string_context_at (&mctx->input,
2463	re_string_cur_idx (&mctx->input),
2464	mctx->eflags);
2465	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2466	context))
2467	continue;
2468	}
2469
2470	/ How many bytes the node can accept? /
2471	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2472	re_string_cur_idx (&mctx->input));
2473	if (naccepted == `0`)
2474	continue;
2475
2476	/ The node can accepts `naccepted' bytes. /
2477	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2478	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2479	: mctx->max_mb_elem_len);
2480	err = clean_state_log_if_needed (mctx, dest_idx);
2481	if (BE (err != REG_NOERROR, `0`))
2482	return err;
2483	#ifdef DEBUG
2484	assert (dfa->nexts[cur_node_idx] != -`1`);
2485	#endif
2486	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2487
2488	dest_state = mctx->state_log[dest_idx];
2489	if (dest_state == NULL)
2490	dest_nodes = *new_nodes;
2491	else
2492	{
2493	err = re_node_set_init_union (&dest_nodes,
2494	dest_state->entrance_nodes, new_nodes);
2495	if (BE (err != REG_NOERROR, `0`))
2496	return err;
2497	}
2498	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2499	mctx->eflags);
2500	mctx->state_log[dest_idx]
2501	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2502	if (dest_state != NULL)
2503	re_node_set_free (&dest_nodes);
2504	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, `0`))
2505	return err;
2506	}
2507	return REG_NOERROR;
2508	}
2509	#endif /* RE_ENABLE_I18N */
2510
2511	static reg_errcode_t
2512	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2513	{
2514	const re_dfa_t *const dfa = mctx->dfa;
2515	reg_errcode_t err;
2516	int i;
2517	int cur_str_idx = re_string_cur_idx (&mctx->input);
2518
2519	for (i = `0`; i < nodes->nelem; ++i)
2520	{
2521	int dest_str_idx, prev_nelem, bkc_idx;
2522	int node_idx = nodes->elems[i];
2523	unsigned int context;
2524	const re_token_t *node = dfa->nodes + node_idx;
2525	re_node_set *new_dest_nodes;
2526
2527	/ Check whether `node' is a backreference or not. /
2528	if (node->type != OP_BACK_REF)
2529	continue;
2530
2531	if (node->constraint)
2532	{
2533	context = re_string_context_at (&mctx->input, cur_str_idx,
2534	mctx->eflags);
2535	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2536	continue;
2537	}
2538
2539	/ `node' is a backreference.*
2540	Check the substring which the substring matched. /*
2541	bkc_idx = mctx->nbkref_ents;
2542	err = get_subexp (mctx, node_idx, cur_str_idx);
2543	if (BE (err != REG_NOERROR, `0`))
2544	goto free_return;
2545
2546	/ And add the epsilon closures (which is `new_dest_nodes') of*
2547	the backreference to appropriate state_log. /*
2548	#ifdef DEBUG
2549	assert (dfa->nexts[node_idx] != -`1`);
2550	#endif
2551	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2552	{
2553	int subexp_len;
2554	re_dfastate_t *dest_state;
2555	struct re_backref_cache_entry *bkref_ent;
2556	bkref_ent = mctx->bkref_ents + bkc_idx;
2557	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2558	continue;
2559	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2560	new_dest_nodes = (subexp_len == `0`
2561	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2562	: dfa->eclosures + dfa->nexts[node_idx]);
2563	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2564	- bkref_ent->subexp_from);
2565	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2566	mctx->eflags);
2567	dest_state = mctx->state_log[dest_str_idx];
2568	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2569	: mctx->state_log[cur_str_idx]->nodes.nelem);
2570	/ Add `new_dest_node' to state_log. /
2571	if (dest_state == NULL)
2572	{
2573	mctx->state_log[dest_str_idx]
2574	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2575	context);
2576	if (BE (mctx->state_log[dest_str_idx] == NULL
2577	&& err != REG_NOERROR, `0`))
2578	goto free_return;
2579	}
2580	else
2581	{
2582	re_node_set dest_nodes;
2583	err = re_node_set_init_union (&dest_nodes,
2584	dest_state->entrance_nodes,
2585	new_dest_nodes);
2586	if (BE (err != REG_NOERROR, `0`))
2587	{
2588	re_node_set_free (&dest_nodes);
2589	goto free_return;
2590	}
2591	mctx->state_log[dest_str_idx]
2592	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2593	re_node_set_free (&dest_nodes);
2594	if (BE (mctx->state_log[dest_str_idx] == NULL
2595	&& err != REG_NOERROR, `0`))
2596	goto free_return;
2597	}
2598	/ We need to check recursively if the backreference can epsilon*
2599	transit. /*
2600	if (subexp_len == `0`
2601	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2602	{
2603	err = check_subexp_matching_top (mctx, new_dest_nodes,
2604	cur_str_idx);
2605	if (BE (err != REG_NOERROR, `0`))
2606	goto free_return;
2607	err = transit_state_bkref (mctx, new_dest_nodes);
2608	if (BE (err != REG_NOERROR, `0`))
2609	goto free_return;
2610	}
2611	}
2612	}
2613	err = REG_NOERROR;
2614	free_return:
2615	return err;
2616	}
2617
2618	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2619	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2620	Note that we might collect inappropriate candidates here.
2621	However, the cost of checking them strictly here is too high, then we
2622	delay these checking for prune_impossible_nodes(). /*
2623
2624	static reg_errcode_t
2625	__attribute_warn_unused_result__
2626	get_subexp (re_match_context_t mctx, int* bkref_node, int bkref_str_idx)
2627	{
2628	const re_dfa_t *const dfa = mctx->dfa;
2629	int subexp_num, sub_top_idx;
2630	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2631	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2632	int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2633	if (cache_idx != -`1`)
2634	{
2635	const struct re_backref_cache_entry *entry
2636	= mctx->bkref_ents + cache_idx;
2637	do
2638	if (entry->node == bkref_node)
2639	return REG_NOERROR; / We already checked it. /
2640	while (entry++->more);
2641	}
2642
2643	subexp_num = dfa->nodes[bkref_node].opr.idx;
2644
2645	/ For each sub expression /
2646	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2647	{
2648	reg_errcode_t err;
2649	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2650	re_sub_match_last_t *sub_last;
2651	int sub_last_idx, sl_str, bkref_str_off;
2652
2653	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2654	continue; / It isn't related. /
2655
2656	sl_str = sub_top->str_idx;
2657	bkref_str_off = bkref_str_idx;
2658	/ At first, check the last node of sub expressions we already*
2659	evaluated. /*
2660	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2661	{
2662	int sl_str_diff;
2663	sub_last = sub_top->lasts[sub_last_idx];
2664	sl_str_diff = sub_last->str_idx - sl_str;
2665	/ The matched string by the sub expression match with the substring*
2666	at the back reference? /*
2667	if (sl_str_diff > `0`)
2668	{
2669	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, `0`))
2670	{
2671	/ Not enough chars for a successful match. /
2672	if (bkref_str_off + sl_str_diff > mctx->input.len)
2673	break;
2674
2675	err = clean_state_log_if_needed (mctx,
2676	bkref_str_off
2677	+ sl_str_diff);
2678	if (BE (err != REG_NOERROR, `0`))
2679	return err;
2680	buf = (const char *) re_string_get_buffer (&mctx->input);
2681	}
2682	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2683	/ We don't need to search this sub expression any more. /
2684	break;
2685	}
2686	bkref_str_off += sl_str_diff;
2687	sl_str += sl_str_diff;
2688	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2689	bkref_str_idx);
2690
2691	/ Reload buf, since the preceding call might have reallocated*
2692	the buffer. /*
2693	buf = (const char *) re_string_get_buffer (&mctx->input);
2694
2695	if (err == REG_NOMATCH)
2696	continue;
2697	if (BE (err != REG_NOERROR, `0`))
2698	return err;
2699	}
2700
2701	if (sub_last_idx < sub_top->nlasts)
2702	continue;
2703	if (sub_last_idx > `0`)
2704	++sl_str;
2705	/ Then, search for the other last nodes of the sub expression. /
2706	for (; sl_str <= bkref_str_idx; ++sl_str)
2707	{
2708	int cls_node, sl_str_off;
2709	const re_node_set *nodes;
2710	sl_str_off = sl_str - sub_top->str_idx;
2711	/ The matched string by the sub expression match with the substring*
2712	at the back reference? /*
2713	if (sl_str_off > `0`)
2714	{
2715	if (BE (bkref_str_off >= mctx->input.valid_len, `0`))
2716	{
2717	/ If we are at the end of the input, we cannot match. /
2718	if (bkref_str_off >= mctx->input.len)
2719	break;
2720
2721	err = extend_buffers (mctx, bkref_str_off + `1`);
2722	if (BE (err != REG_NOERROR, `0`))
2723	return err;
2724
2725	buf = (const char *) re_string_get_buffer (&mctx->input);
2726	}
2727	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2728	break; / We don't need to search this sub expression*
2729	any more. /*
2730	}
2731	if (mctx->state_log[sl_str] == NULL)
2732	continue;
2733	/ Does this state have a ')' of the sub expression? /
2734	nodes = &mctx->state_log[sl_str]->nodes;
2735	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2736	OP_CLOSE_SUBEXP);
2737	if (cls_node == -`1`)
2738	continue; / No. /
2739	if (sub_top->path == NULL)
2740	{
2741	sub_top->path = calloc (sizeof (state_array_t),
2742	sl_str - sub_top->str_idx + `1`);
2743	if (sub_top->path == NULL)
2744	return REG_ESPACE;
2745	}
2746	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2747	in the current context? /*
2748	err = check_arrival (mctx, sub_top->path, sub_top->node,
2749	sub_top->str_idx, cls_node, sl_str,
2750	OP_CLOSE_SUBEXP);
2751	if (err == REG_NOMATCH)
2752	continue;
2753	if (BE (err != REG_NOERROR, `0`))
2754	return err;
2755	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2756	if (BE (sub_last == NULL, `0`))
2757	return REG_ESPACE;
2758	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2759	bkref_str_idx);
2760	if (err == REG_NOMATCH)
2761	continue;
2762	}
2763	}
2764	return REG_NOERROR;
2765	}
2766
2767	/ Helper functions for get_subexp(). /
2768
2769	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2770	If it can arrive, register the sub expression expressed with SUB_TOP
2771	and SUB_LAST. /*
2772
2773	static reg_errcode_t
2774	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2775	re_sub_match_last_t sub_last, int* bkref_node, int bkref_str)
2776	{
2777	reg_errcode_t err;
2778	int to_idx;
2779	/ Can the subexpression arrive the back reference? /
2780	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2781	sub_last->str_idx, bkref_node, bkref_str,
2782	OP_OPEN_SUBEXP);
2783	if (err != REG_NOERROR)
2784	return err;
2785	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2786	sub_last->str_idx);
2787	if (BE (err != REG_NOERROR, `0`))
2788	return err;
2789	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2790	return clean_state_log_if_needed (mctx, to_idx);
2791	}
2792
2793	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2794	Search '(' if FL_OPEN, or search ')' otherwise.
2795	TODO: This function isn't efficient...
2796	Because there might be more than one nodes whose types are
2797	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2798	nodes.
2799	E.g. RE: (a){2} /*
2800
2801	static int
2802	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2803	int subexp_idx, int type)
2804	{
2805	int cls_idx;
2806	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2807	{
2808	int cls_node = nodes->elems[cls_idx];
2809	const re_token_t *node = dfa->nodes + cls_node;
2810	if (node->type == type
2811	&& node->opr.idx == subexp_idx)
2812	return cls_node;
2813	}
2814	return -`1`;
2815	}
2816
2817	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2818	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2819	heavily reused.
2820	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. /*
2821
2822	static reg_errcode_t
2823	__attribute_warn_unused_result__
2824	check_arrival (re_match_context_t mctx, state_array_t path, int top_node,
2825	int top_str, int last_node, int last_str, int type)
2826	{
2827	const re_dfa_t *const dfa = mctx->dfa;
2828	reg_errcode_t err = REG_NOERROR;
2829	int subexp_num, backup_cur_idx, str_idx, null_cnt;
2830	re_dfastate_t *cur_state = NULL;
2831	re_node_set *cur_nodes, next_nodes;
2832	re_dfastate_t **backup_state_log;
2833	unsigned int context;
2834
2835	subexp_num = dfa->nodes[top_node].opr.idx;
2836	/ Extend the buffer if we need. /
2837	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + `1`, `0`))
2838	{
2839	re_dfastate_t **new_array;
2840	int old_alloc = path->alloc;
2841	path->alloc += last_str + mctx->max_mb_elem_len + `1`;
2842	new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2843	if (BE (new_array == NULL, `0`))
2844	{
2845	path->alloc = old_alloc;
2846	return REG_ESPACE;
2847	}
2848	path->array = new_array;
2849	memset (new_array + old_alloc, `'\0'`,
2850	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2851	}
2852
2853	str_idx = path->next_idx ?: top_str;
2854
2855	/ Temporary modify MCTX. /
2856	backup_state_log = mctx->state_log;
2857	backup_cur_idx = mctx->input.cur_idx;
2858	mctx->state_log = path->array;
2859	mctx->input.cur_idx = str_idx;
2860
2861	/ Setup initial node set. /
2862	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2863	if (str_idx == top_str)
2864	{
2865	err = re_node_set_init_1 (&next_nodes, top_node);
2866	if (BE (err != REG_NOERROR, `0`))
2867	return err;
2868	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2869	if (BE (err != REG_NOERROR, `0`))
2870	{
2871	re_node_set_free (&next_nodes);
2872	return err;
2873	}
2874	}
2875	else
2876	{
2877	cur_state = mctx->state_log[str_idx];
2878	if (cur_state && cur_state->has_backref)
2879	{
2880	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2881	if (BE (err != REG_NOERROR, `0`))
2882	return err;
2883	}
2884	else
2885	re_node_set_init_empty (&next_nodes);
2886	}
2887	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2888	{
2889	if (next_nodes.nelem)
2890	{
2891	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2892	subexp_num, type);
2893	if (BE (err != REG_NOERROR, `0`))
2894	{
2895	re_node_set_free (&next_nodes);
2896	return err;
2897	}
2898	}
2899	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2900	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2901	{
2902	re_node_set_free (&next_nodes);
2903	return err;
2904	}
2905	mctx->state_log[str_idx] = cur_state;
2906	}
2907
2908	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2909	{
2910	re_node_set_empty (&next_nodes);
2911	if (mctx->state_log[str_idx + `1`])
2912	{
2913	err = re_node_set_merge (&next_nodes,
2914	&mctx->state_log[str_idx + `1`]->nodes);
2915	if (BE (err != REG_NOERROR, `0`))
2916	{
2917	re_node_set_free (&next_nodes);
2918	return err;
2919	}
2920	}
2921	if (cur_state)
2922	{
2923	err = check_arrival_add_next_nodes (mctx, str_idx,
2924	&cur_state->non_eps_nodes,
2925	&next_nodes);
2926	if (BE (err != REG_NOERROR, `0`))
2927	{
2928	re_node_set_free (&next_nodes);
2929	return err;
2930	}
2931	}
2932	++str_idx;
2933	if (next_nodes.nelem)
2934	{
2935	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2936	if (BE (err != REG_NOERROR, `0`))
2937	{
2938	re_node_set_free (&next_nodes);
2939	return err;
2940	}
2941	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2942	subexp_num, type);
2943	if (BE (err != REG_NOERROR, `0`))
2944	{
2945	re_node_set_free (&next_nodes);
2946	return err;
2947	}
2948	}
2949	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2950	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2951	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2952	{
2953	re_node_set_free (&next_nodes);
2954	return err;
2955	}
2956	mctx->state_log[str_idx] = cur_state;
2957	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2958	}
2959	re_node_set_free (&next_nodes);
2960	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2961	: &mctx->state_log[last_str]->nodes);
2962	path->next_idx = str_idx;
2963
2964	/ Fix MCTX. /
2965	mctx->state_log = backup_state_log;
2966	mctx->input.cur_idx = backup_cur_idx;
2967
2968	/ Then check the current node set has the node LAST_NODE. /
2969	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2970	return REG_NOERROR;
2971
2972	return REG_NOMATCH;
2973	}
2974
2975	/ Helper functions for check_arrival. /
2976
2977	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
2978	to NEXT_NODES.
2979	TODO: This function is similar to the functions transit_state(),*
2980	however this function has many additional works.
2981	Can't we unify them? /*
2982
2983	static reg_errcode_t
2984	__attribute_warn_unused_result__
2985	check_arrival_add_next_nodes (re_match_context_t mctx, int* str_idx,
2986	re_node_set cur_nodes, re_node_set next_nodes)
2987	{
2988	const re_dfa_t *const dfa = mctx->dfa;
2989	int result;
2990	int cur_idx;
2991	#ifdef RE_ENABLE_I18N
2992	reg_errcode_t err = REG_NOERROR;
2993	#endif
2994	re_node_set union_set;
2995	re_node_set_init_empty (&union_set);
2996	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
2997	{
2998	int naccepted = `0`;
2999	int cur_node = cur_nodes->elems[cur_idx];
3000	#ifdef DEBUG
3001	re_token_type_t type = dfa->nodes[cur_node].type;
3002	assert (!IS_EPSILON_NODE (type));
3003	#endif
3004	#ifdef RE_ENABLE_I18N
3005	/ If the node may accept `multi byte'. /
3006	if (dfa->nodes[cur_node].accept_mb)
3007	{
3008	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3009	str_idx);
3010	if (naccepted > `1`)
3011	{
3012	re_dfastate_t *dest_state;
3013	int next_node = dfa->nexts[cur_node];
3014	int next_idx = str_idx + naccepted;
3015	dest_state = mctx->state_log[next_idx];
3016	re_node_set_empty (&union_set);
3017	if (dest_state)
3018	{
3019	err = re_node_set_merge (&union_set, &dest_state->nodes);
3020	if (BE (err != REG_NOERROR, `0`))
3021	{
3022	re_node_set_free (&union_set);
3023	return err;
3024	}
3025	}
3026	result = re_node_set_insert (&union_set, next_node);
3027	if (BE (result < `0`, `0`))
3028	{
3029	re_node_set_free (&union_set);
3030	return REG_ESPACE;
3031	}
3032	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3033	&union_set);
3034	if (BE (mctx->state_log[next_idx] == NULL
3035	&& err != REG_NOERROR, `0`))
3036	{
3037	re_node_set_free (&union_set);
3038	return err;
3039	}
3040	}
3041	}
3042	#endif /* RE_ENABLE_I18N */
3043	if (naccepted
3044	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3045	{
3046	result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3047	if (BE (result < `0`, `0`))
3048	{
3049	re_node_set_free (&union_set);
3050	return REG_ESPACE;
3051	}
3052	}
3053	}
3054	re_node_set_free (&union_set);
3055	return REG_NOERROR;
3056	}
3057
3058	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3059	CUR_NODES, however exclude the nodes which are:
3060	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3061	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3062	*/
3063
3064	static reg_errcode_t
3065	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3066	int ex_subexp, int type)
3067	{
3068	reg_errcode_t err;
3069	int idx, outside_node;
3070	re_node_set new_nodes;
3071	#ifdef DEBUG
3072	assert (cur_nodes->nelem);
3073	#endif
3074	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3075	if (BE (err != REG_NOERROR, `0`))
3076	return err;
3077	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3078	closures of the node in CUR_NODES. /*
3079
3080	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3081	{
3082	int cur_node = cur_nodes->elems[idx];
3083	const re_node_set *eclosure = dfa->eclosures + cur_node;
3084	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3085	if (outside_node == -`1`)
3086	{
3087	/ There are no problematic nodes, just merge them. /
3088	err = re_node_set_merge (&new_nodes, eclosure);
3089	if (BE (err != REG_NOERROR, `0`))
3090	{
3091	re_node_set_free (&new_nodes);
3092	return err;
3093	}
3094	}
3095	else
3096	{
3097	/ There are problematic nodes, re-calculate incrementally. /
3098	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3099	ex_subexp, type);
3100	if (BE (err != REG_NOERROR, `0`))
3101	{
3102	re_node_set_free (&new_nodes);
3103	return err;
3104	}
3105	}
3106	}
3107	re_node_set_free (cur_nodes);
3108	*cur_nodes = new_nodes;
3109	return REG_NOERROR;
3110	}
3111
3112	/ Helper function for check_arrival_expand_ecl.*
3113	Check incrementally the epsilon closure of TARGET, and if it isn't
3114	problematic append it to DST_NODES. /*
3115
3116	static reg_errcode_t
3117	__attribute_warn_unused_result__
3118	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3119	int target, int ex_subexp, int type)
3120	{
3121	int cur_node;
3122	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3123	{
3124	int err;
3125
3126	if (dfa->nodes[cur_node].type == type
3127	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3128	{
3129	if (type == OP_CLOSE_SUBEXP)
3130	{
3131	err = re_node_set_insert (dst_nodes, cur_node);
3132	if (BE (err == -`1`, `0`))
3133	return REG_ESPACE;
3134	}
3135	break;
3136	}
3137	err = re_node_set_insert (dst_nodes, cur_node);
3138	if (BE (err == -`1`, `0`))
3139	return REG_ESPACE;
3140	if (dfa->edests[cur_node].nelem == `0`)
3141	break;
3142	if (dfa->edests[cur_node].nelem == `2`)
3143	{
3144	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3145	dfa->edests[cur_node].elems[`1`],
3146	ex_subexp, type);
3147	if (BE (err != REG_NOERROR, `0`))
3148	return err;
3149	}
3150	cur_node = dfa->edests[cur_node].elems[`0`];
3151	}
3152	return REG_NOERROR;
3153	}
3154
3155
3156	/ For all the back references in the current state, calculate the*
3157	destination of the back references by the appropriate entry
3158	in MCTX->BKREF_ENTS. /*
3159
3160	static reg_errcode_t
3161	__attribute_warn_unused_result__
3162	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3163	int cur_str, int subexp_num, int type)
3164	{
3165	const re_dfa_t *const dfa = mctx->dfa;
3166	reg_errcode_t err;
3167	int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3168	struct re_backref_cache_entry *ent;
3169
3170	if (cache_idx_start == -`1`)
3171	return REG_NOERROR;
3172
3173	restart:
3174	ent = mctx->bkref_ents + cache_idx_start;
3175	do
3176	{
3177	int to_idx, next_node;
3178
3179	/ Is this entry ENT is appropriate? /
3180	if (!re_node_set_contains (cur_nodes, ent->node))
3181	continue; / No. /
3182
3183	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3184	/ Calculate the destination of the back reference, and append it*
3185	to MCTX->STATE_LOG. /*
3186	if (to_idx == cur_str)
3187	{
3188	/ The backreference did epsilon transit, we must re-check all the*
3189	node in the current state. /*
3190	re_node_set new_dests;
3191	reg_errcode_t err2, err3;
3192	next_node = dfa->edests[ent->node].elems[`0`];
3193	if (re_node_set_contains (cur_nodes, next_node))
3194	continue;
3195	err = re_node_set_init_1 (&new_dests, next_node);
3196	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3197	err3 = re_node_set_merge (cur_nodes, &new_dests);
3198	re_node_set_free (&new_dests);
3199	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3200	\|\| err3 != REG_NOERROR, `0`))
3201	{
3202	err = (err != REG_NOERROR ? err
3203	: (err2 != REG_NOERROR ? err2 : err3));
3204	return err;
3205	}
3206	/ TODO: It is still inefficient... /
3207	goto restart;
3208	}
3209	else
3210	{
3211	re_node_set union_set;
3212	next_node = dfa->nexts[ent->node];
3213	if (mctx->state_log[to_idx])
3214	{
3215	int ret;
3216	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3217	next_node))
3218	continue;
3219	err = re_node_set_init_copy (&union_set,
3220	&mctx->state_log[to_idx]->nodes);
3221	ret = re_node_set_insert (&union_set, next_node);
3222	if (BE (err != REG_NOERROR \|\| ret < `0`, `0`))
3223	{
3224	re_node_set_free (&union_set);
3225	err = err != REG_NOERROR ? err : REG_ESPACE;
3226	return err;
3227	}
3228	}
3229	else
3230	{
3231	err = re_node_set_init_1 (&union_set, next_node);
3232	if (BE (err != REG_NOERROR, `0`))
3233	return err;
3234	}
3235	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3236	re_node_set_free (&union_set);
3237	if (BE (mctx->state_log[to_idx] == NULL
3238	&& err != REG_NOERROR, `0`))
3239	return err;
3240	}
3241	}
3242	while (ent++->more);
3243	return REG_NOERROR;
3244	}
3245
3246	/ Build transition table for the state.*
3247	Return 1 if succeeded, otherwise return NULL. /*
3248
3249	static int
3250	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3251	{
3252	reg_errcode_t err;
3253	int i, j, ch, need_word_trtable = `0`;
3254	bitset_word_t elem, mask;
3255	bool dests_node_malloced = false;
3256	bool dest_states_malloced = false;
3257	int ndests; / Number of the destination states from `state'. /
3258	re_dfastate_t **trtable;
3259	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3260	re_node_set follows, *dests_node;
3261	bitset_t *dests_ch;
3262	bitset_t acceptable;
3263
3264	struct dests_alloc
3265	{
3266	re_node_set dests_node[SBC_MAX];
3267	bitset_t dests_ch[SBC_MAX];
3268	} *dests_alloc;
3269
3270	/ We build DFA states which corresponds to the destination nodes*
3271	from `state'. `dests_node[i]' represents the nodes which i-th
3272	destination state contains, and `dests_ch[i]' represents the
3273	characters which i-th destination state accepts. /*
3274	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3275	dests_alloc = (struct dests_alloc ) alloca (sizeof* (struct dests_alloc));
3276	else
3277	{
3278	dests_alloc = re_malloc (struct dests_alloc, `1`);
3279	if (BE (dests_alloc == NULL, `0`))
3280	return `0`;
3281	dests_node_malloced = true;
3282	}
3283	dests_node = dests_alloc->dests_node;
3284	dests_ch = dests_alloc->dests_ch;
3285
3286	/ Initialize transiton table. /
3287	state->word_trtable = state->trtable = NULL;
3288
3289	/ At first, group all nodes belonging to `state' into several*
3290	destinations. /*
3291	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3292	if (BE (ndests <= `0`, `0`))
3293	{
3294	if (dests_node_malloced)
3295	free (dests_alloc);
3296	/ Return 0 in case of an error, 1 otherwise. /
3297	if (ndests == `0`)
3298	{
3299	state->trtable = (re_dfastate_t **)
3300	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3301	if (BE (state->trtable == NULL, `0`))
3302	return `0`;
3303	return `1`;
3304	}
3305	return `0`;
3306	}
3307
3308	err = re_node_set_alloc (&follows, ndests + `1`);
3309	if (BE (err != REG_NOERROR, `0`))
3310	goto out_free;
3311
3312	/ Avoid arithmetic overflow in size calculation. /
3313	if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3314	/ (`3` * sizeof (re_dfastate_t *)))
3315	< ndests),
3316	`0`))
3317	goto out_free;
3318
3319	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3320	+ ndests * `3` * sizeof (re_dfastate_t *)))
3321	dest_states = (re_dfastate_t **)
3322	alloca (ndests * `3` * sizeof (re_dfastate_t *));
3323	else
3324	{
3325	dest_states = (re_dfastate_t **)
3326	malloc (ndests * `3` * sizeof (re_dfastate_t *));
3327	if (BE (dest_states == NULL, `0`))
3328	{
3329	out_free:
3330	if (dest_states_malloced)
3331	free (dest_states);
3332	re_node_set_free (&follows);
3333	for (i = `0`; i < ndests; ++i)
3334	re_node_set_free (dests_node + i);
3335	if (dests_node_malloced)
3336	free (dests_alloc);
3337	return `0`;
3338	}
3339	dest_states_malloced = true;
3340	}
3341	dest_states_word = dest_states + ndests;
3342	dest_states_nl = dest_states_word + ndests;
3343	bitset_empty (acceptable);
3344
3345	/ Then build the states for all destinations. /
3346	for (i = `0`; i < ndests; ++i)
3347	{
3348	int next_node;
3349	re_node_set_empty (&follows);
3350	/ Merge the follows of this destination states. /
3351	for (j = `0`; j < dests_node[i].nelem; ++j)
3352	{
3353	next_node = dfa->nexts[dests_node[i].elems[j]];
3354	if (next_node != -`1`)
3355	{
3356	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3357	if (BE (err != REG_NOERROR, `0`))
3358	goto out_free;
3359	}
3360	}
3361	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3362	if (BE (dest_states[i] == NULL && err != REG_NOERROR, `0`))
3363	goto out_free;
3364	/ If the new state has context constraint,*
3365	build appropriate states for these contexts. /*
3366	if (dest_states[i]->has_constraint)
3367	{
3368	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3369	CONTEXT_WORD);
3370	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, `0`))
3371	goto out_free;
3372
3373	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3374	need_word_trtable = `1`;
3375
3376	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3377	CONTEXT_NEWLINE);
3378	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, `0`))
3379	goto out_free;
3380	}
3381	else
3382	{
3383	dest_states_word[i] = dest_states[i];
3384	dest_states_nl[i] = dest_states[i];
3385	}
3386	bitset_merge (acceptable, dests_ch[i]);
3387	}
3388
3389	if (!BE (need_word_trtable, `0`))
3390	{
3391	/ We don't care about whether the following character is a word*
3392	character, or we are in a single-byte character set so we can
3393	discern by looking at the character code: allocate a
3394	256-entry transition table. /*
3395	trtable = state->trtable =
3396	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3397	if (BE (trtable == NULL, `0`))
3398	goto out_free;
3399
3400	/ For all characters ch...: /
3401	for (i = `0`; i < BITSET_WORDS; ++i)
3402	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3403	elem;
3404	mask <<= `1`, elem >>= `1`, ++ch)
3405	if (BE (elem & `1`, `0`))
3406	{
3407	/ There must be exactly one destination which accepts*
3408	character ch. See group_nodes_into_DFAstates. /*
3409	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3410	;
3411
3412	/ j-th destination accepts the word character ch. /
3413	if (dfa->word_char[i] & mask)
3414	trtable[ch] = dest_states_word[j];
3415	else
3416	trtable[ch] = dest_states[j];
3417	}
3418	}
3419	else
3420	{
3421	/ We care about whether the following character is a word*
3422	character, and we are in a multi-byte character set: discern
3423	by looking at the character code: build two 256-entry
3424	transition tables, one starting at trtable[0] and one
3425	starting at trtable[SBC_MAX]. /*
3426	trtable = state->word_trtable =
3427	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3428	if (BE (trtable == NULL, `0`))
3429	goto out_free;
3430
3431	/ For all characters ch...: /
3432	for (i = `0`; i < BITSET_WORDS; ++i)
3433	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3434	elem;
3435	mask <<= `1`, elem >>= `1`, ++ch)
3436	if (BE (elem & `1`, `0`))
3437	{
3438	/ There must be exactly one destination which accepts*
3439	character ch. See group_nodes_into_DFAstates. /*
3440	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3441	;
3442
3443	/ j-th destination accepts the word character ch. /
3444	trtable[ch] = dest_states[j];
3445	trtable[ch + SBC_MAX] = dest_states_word[j];
3446	}
3447	}
3448
3449	/ new line /
3450	if (bitset_contain (acceptable, NEWLINE_CHAR))
3451	{
3452	/ The current state accepts newline character. /
3453	for (j = `0`; j < ndests; ++j)
3454	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3455	{
3456	/ k-th destination accepts newline character. /
3457	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3458	if (need_word_trtable)
3459	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3460	/ There must be only one destination which accepts*
3461	newline. See group_nodes_into_DFAstates. /*
3462	break;
3463	}
3464	}
3465
3466	if (dest_states_malloced)
3467	free (dest_states);
3468
3469	re_node_set_free (&follows);
3470	for (i = `0`; i < ndests; ++i)
3471	re_node_set_free (dests_node + i);
3472
3473	if (dests_node_malloced)
3474	free (dests_alloc);
3475
3476	return `1`;
3477	}
3478
3479	/ Group all nodes belonging to STATE into several destinations.*
3480	Then for all destinations, set the nodes belonging to the destination
3481	to DESTS_NODE[i] and set the characters accepted by the destination
3482	to DEST_CH[i]. This function return the number of destinations. /*
3483
3484	static int
3485	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3486	re_node_set dests_node, bitset_t dests_ch)
3487	{
3488	reg_errcode_t err;
3489	int result;
3490	int i, j, k;
3491	int ndests; / Number of the destinations from `state'. /
3492	bitset_t accepts; / Characters a node can accept. /
3493	const re_node_set *cur_nodes = &state->nodes;
3494	bitset_empty (accepts);
3495	ndests = `0`;
3496
3497	/ For all the nodes belonging to `state', /
3498	for (i = `0`; i < cur_nodes->nelem; ++i)
3499	{
3500	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3501	re_token_type_t type = node->type;
3502	unsigned int constraint = node->constraint;
3503
3504	/ Enumerate all single byte character this node can accept. /
3505	if (type == CHARACTER)
3506	bitset_set (accepts, node->opr.c);
3507	else if (type == SIMPLE_BRACKET)
3508	{
3509	bitset_merge (accepts, node->opr.sbcset);
3510	}
3511	else if (type == OP_PERIOD)
3512	{
3513	#ifdef RE_ENABLE_I18N
3514	if (dfa->mb_cur_max > `1`)
3515	bitset_merge (accepts, dfa->sb_char);
3516	else
3517	#endif
3518	bitset_set_all (accepts);
3519	if (!(dfa->syntax & RE_DOT_NEWLINE))
3520	bitset_clear (accepts, `'\n'`);
3521	if (dfa->syntax & RE_DOT_NOT_NULL)
3522	bitset_clear (accepts, `'\0'`);
3523	}
3524	#ifdef RE_ENABLE_I18N
3525	else if (type == OP_UTF8_PERIOD)
3526	{
3527	memset (accepts, `'\xff'`, sizeof (bitset_t) / `2`);
3528	if (!(dfa->syntax & RE_DOT_NEWLINE))
3529	bitset_clear (accepts, `'\n'`);
3530	if (dfa->syntax & RE_DOT_NOT_NULL)
3531	bitset_clear (accepts, `'\0'`);
3532	}
3533	#endif
3534	else
3535	continue;
3536
3537	/ Check the `accepts' and sift the characters which are not*
3538	match it the context. /*
3539	if (constraint)
3540	{
3541	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3542	{
3543	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3544	bitset_empty (accepts);
3545	if (accepts_newline)
3546	bitset_set (accepts, NEWLINE_CHAR);
3547	else
3548	continue;
3549	}
3550	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3551	{
3552	bitset_empty (accepts);
3553	continue;
3554	}
3555
3556	if (constraint & NEXT_WORD_CONSTRAINT)
3557	{
3558	bitset_word_t any_set = `0`;
3559	if (type == CHARACTER && !node->word_char)
3560	{
3561	bitset_empty (accepts);
3562	continue;
3563	}
3564	#ifdef RE_ENABLE_I18N
3565	if (dfa->mb_cur_max > `1`)
3566	for (j = `0`; j < BITSET_WORDS; ++j)
3567	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3568	else
3569	#endif
3570	for (j = `0`; j < BITSET_WORDS; ++j)
3571	any_set \|= (accepts[j] &= dfa->word_char[j]);
3572	if (!any_set)
3573	continue;
3574	}
3575	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3576	{
3577	bitset_word_t any_set = `0`;
3578	if (type == CHARACTER && node->word_char)
3579	{
3580	bitset_empty (accepts);
3581	continue;
3582	}
3583	#ifdef RE_ENABLE_I18N
3584	if (dfa->mb_cur_max > `1`)
3585	for (j = `0`; j < BITSET_WORDS; ++j)
3586	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3587	else
3588	#endif
3589	for (j = `0`; j < BITSET_WORDS; ++j)
3590	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3591	if (!any_set)
3592	continue;
3593	}
3594	}
3595
3596	/ Then divide `accepts' into DFA states, or create a new*
3597	state. Above, we make sure that accepts is not empty. /*
3598	for (j = `0`; j < ndests; ++j)
3599	{
3600	bitset_t intersec; / Intersection sets, see below. /
3601	bitset_t remains;
3602	/ Flags, see below. /
3603	bitset_word_t has_intersec, not_subset, not_consumed;
3604
3605	/ Optimization, skip if this state doesn't accept the character. /
3606	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3607	continue;
3608
3609	/ Enumerate the intersection set of this state and `accepts'. /
3610	has_intersec = `0`;
3611	for (k = `0`; k < BITSET_WORDS; ++k)
3612	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3613	/ And skip if the intersection set is empty. /
3614	if (!has_intersec)
3615	continue;
3616
3617	/ Then check if this state is a subset of `accepts'. /
3618	not_subset = not_consumed = `0`;
3619	for (k = `0`; k < BITSET_WORDS; ++k)
3620	{
3621	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3622	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3623	}
3624
3625	/ If this state isn't a subset of `accepts', create a*
3626	new group state, which has the `remains'. /*
3627	if (not_subset)
3628	{
3629	bitset_copy (dests_ch[ndests], remains);
3630	bitset_copy (dests_ch[j], intersec);
3631	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3632	if (BE (err != REG_NOERROR, `0`))
3633	goto error_return;
3634	++ndests;
3635	}
3636
3637	/ Put the position in the current group. /
3638	result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3639	if (BE (result < `0`, `0`))
3640	goto error_return;
3641
3642	/ If all characters are consumed, go to next node. /
3643	if (!not_consumed)
3644	break;
3645	}
3646	/ Some characters remain, create a new group. /
3647	if (j == ndests)
3648	{
3649	bitset_copy (dests_ch[ndests], accepts);
3650	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3651	if (BE (err != REG_NOERROR, `0`))
3652	goto error_return;
3653	++ndests;
3654	bitset_empty (accepts);
3655	}
3656	}
3657	return ndests;
3658	error_return:
3659	for (j = `0`; j < ndests; ++j)
3660	re_node_set_free (dests_node + j);
3661	return -`1`;
3662	}
3663
3664	#ifdef RE_ENABLE_I18N
3665	/ Check how many bytes the node `dfa->nodes[node_idx]' accepts.*
3666	Return the number of the bytes the node accepts.
3667	STR_IDX is the current index of the input string.
3668
3669	This function handles the nodes which can accept one character, or
3670	one collating element like '.', '[a-z]', opposite to the other nodes
3671	can only accept one byte. /*
3672
3673	# ifdef _LIBC
3674	# include <locale/weight.h>
3675	# endif
3676
3677	static int
3678	check_node_accept_bytes (const re_dfa_t dfa, int* node_idx,
3679	const re_string_t input, int* str_idx)
3680	{
3681	const re_token_t *node = dfa->nodes + node_idx;
3682	int char_len, elem_len;
3683	int i;
3684
3685	if (BE (node->type == OP_UTF8_PERIOD, `0`))
3686	{
3687	unsigned char c = re_string_byte_at (input, str_idx), d;
3688	if (BE (c < `0xc2`, `1`))
3689	return `0`;
3690
3691	if (str_idx + `2` > input->len)
3692	return `0`;
3693
3694	d = re_string_byte_at (input, str_idx + `1`);
3695	if (c < `0xe0`)
3696	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3697	else if (c < `0xf0`)
3698	{
3699	char_len = `3`;
3700	if (c == `0xe0` && d < `0xa0`)
3701	return `0`;
3702	}
3703	else if (c < `0xf8`)
3704	{
3705	char_len = `4`;
3706	if (c == `0xf0` && d < `0x90`)
3707	return `0`;
3708	}
3709	else if (c < `0xfc`)
3710	{
3711	char_len = `5`;
3712	if (c == `0xf8` && d < `0x88`)
3713	return `0`;
3714	}
3715	else if (c < `0xfe`)
3716	{
3717	char_len = `6`;
3718	if (c == `0xfc` && d < `0x84`)
3719	return `0`;
3720	}
3721	else
3722	return `0`;
3723
3724	if (str_idx + char_len > input->len)
3725	return `0`;
3726
3727	for (i = `1`; i < char_len; ++i)
3728	{
3729	d = re_string_byte_at (input, str_idx + i);
3730	if (d < `0x80` \|\| d > `0xbf`)
3731	return `0`;
3732	}
3733	return char_len;
3734	}
3735
3736	char_len = re_string_char_size_at (input, str_idx);
3737	if (node->type == OP_PERIOD)
3738	{
3739	if (char_len <= `1`)
3740	return `0`;
3741	/ FIXME: I don't think this if is needed, as both '\n'*
3742	and '\0' are char_len == 1. /*
3743	/ '.' accepts any one character except the following two cases. /
3744	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3745	re_string_byte_at (input, str_idx) == `'\n'`) \|\|
3746	((dfa->syntax & RE_DOT_NOT_NULL) &&
3747	re_string_byte_at (input, str_idx) == `'\0'`))
3748	return `0`;
3749	return char_len;
3750	}
3751
3752	elem_len = re_string_elem_size_at (input, str_idx);
3753	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3754	return `0`;
3755
3756	if (node->type == COMPLEX_BRACKET)
3757	{
3758	const re_charset_t *cset = node->opr.mbcset;
3759	# ifdef _LIBC
3760	const unsigned char *pin
3761	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3762	int j;
3763	uint32_t nrules;
3764	# endif /* _LIBC */
3765	int match_len = `0`;
3766	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3767	? re_string_wchar_at (input, str_idx) : `0`);
3768
3769	/ match with multibyte character? /
3770	for (i = `0`; i < cset->nmbchars; ++i)
3771	if (wc == cset->mbchars[i])
3772	{
3773	match_len = char_len;
3774	goto check_node_accept_bytes_match;
3775	}
3776	/ match with character_class? /
3777	for (i = `0`; i < cset->nchar_classes; ++i)
3778	{
3779	wctype_t wt = cset->char_classes[i];
3780	if (__iswctype (wc, wt))
3781	{
3782	match_len = char_len;
3783	goto check_node_accept_bytes_match;
3784	}
3785	}
3786
3787	# ifdef _LIBC
3788	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3789	if (nrules != `0`)
3790	{
3791	unsigned int in_collseq = `0`;
3792	const int32_t table, indirect;
3793	const unsigned char weights, extra;
3794	const char *collseqwc;
3795
3796	/ match with collating_symbol? /
3797	if (cset->ncoll_syms)
3798	extra = (const unsigned char *)
3799	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3800	for (i = `0`; i < cset->ncoll_syms; ++i)
3801	{
3802	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3803	/ Compare the length of input collating element and*
3804	the length of current collating element. /*
3805	if (*coll_sym != elem_len)
3806	continue;
3807	/ Compare each bytes. /
3808	for (j = `0`; j < *coll_sym; j++)
3809	if (pin[j] != coll_sym[`1` + j])
3810	break;
3811	if (j == *coll_sym)
3812	{
3813	/ Match if every bytes is equal. /
3814	match_len = j;
3815	goto check_node_accept_bytes_match;
3816	}
3817	}
3818
3819	if (cset->nranges)
3820	{
3821	if (elem_len <= char_len)
3822	{
3823	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3824	in_collseq = __collseq_table_lookup (collseqwc, wc);
3825	}
3826	else
3827	in_collseq = find_collation_sequence_value (pin, elem_len);
3828	}
3829	/ match with range expression? /
3830	for (i = `0`; i < cset->nranges; ++i)
3831	if (cset->range_starts[i] <= in_collseq
3832	&& in_collseq <= cset->range_ends[i])
3833	{
3834	match_len = elem_len;
3835	goto check_node_accept_bytes_match;
3836	}
3837
3838	/ match with equivalence_class? /
3839	if (cset->nequiv_classes)
3840	{
3841	const unsigned char *cp = pin;
3842	table = (const int32_t *)
3843	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3844	weights = (const unsigned char *)
3845	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3846	extra = (const unsigned char *)
3847	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3848	indirect = (const int32_t *)
3849	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3850	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3851	if (idx > `0`)
3852	for (i = `0`; i < cset->nequiv_classes; ++i)
3853	{
3854	int32_t equiv_class_idx = cset->equiv_classes[i];
3855	size_t weight_len = weights[idx & `0xffffff`];
3856	if (weight_len == weights[equiv_class_idx & `0xffffff`]
3857	&& (idx >> `24`) == (equiv_class_idx >> `24`))
3858	{
3859	int cnt = `0`;
3860
3861	idx &= `0xffffff`;
3862	equiv_class_idx &= `0xffffff`;
3863
3864	while (cnt <= weight_len
3865	&& (weights[equiv_class_idx + `1` + cnt]
3866	== weights[idx + `1` + cnt]))
3867	++cnt;
3868	if (cnt > weight_len)
3869	{
3870	match_len = elem_len;
3871	goto check_node_accept_bytes_match;
3872	}
3873	}
3874	}
3875	}
3876	}
3877	else
3878	# endif /* _LIBC */
3879	{
3880	/ match with range expression? /
3881	#if __GNUC__ >= 2
3882	wchar_t cmp_buf[] = {L`'\0'`, L`'\0'`, wc, L`'\0'`, L`'\0'`, L`'\0'`};
3883	#else
3884	wchar_t cmp_buf[] = {L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`};
3885	cmp_buf[`2`] = wc;
3886	#endif
3887	for (i = `0`; i < cset->nranges; ++i)
3888	{
3889	cmp_buf[`0`] = cset->range_starts[i];
3890	cmp_buf[`4`] = cset->range_ends[i];
3891	if (__wcscoll (cmp_buf, cmp_buf + `2`) <= `0`
3892	&& __wcscoll (cmp_buf + `2`, cmp_buf + `4`) <= `0`)
3893	{
3894	match_len = char_len;
3895	goto check_node_accept_bytes_match;
3896	}
3897	}
3898	}
3899	check_node_accept_bytes_match:
3900	if (!cset->non_match)
3901	return match_len;
3902	else
3903	{
3904	if (match_len > `0`)
3905	return `0`;
3906	else
3907	return (elem_len > char_len) ? elem_len : char_len;
3908	}
3909	}
3910	return `0`;
3911	}
3912
3913	# ifdef _LIBC
3914	static unsigned int
3915	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3916	{
3917	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3918	if (nrules == `0`)
3919	{
3920	if (mbs_len == `1`)
3921	{
3922	/ No valid character. Match it as a single byte character. /
3923	const unsigned char collseq = (const* unsigned char *)
3924	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3925	return collseq[mbs[`0`]];
3926	}
3927	return UINT_MAX;
3928	}
3929	else
3930	{
3931	int32_t idx;
3932	const unsigned char extra = (const* unsigned char *)
3933	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3934	int32_t extrasize = (const unsigned char *)
3935	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3936
3937	for (idx = `0`; idx < extrasize;)
3938	{
3939	int mbs_cnt, found = `0`;
3940	int32_t elem_mbs_len;
3941	/ Skip the name of collating element name. /
3942	idx = idx + extra[idx] + `1`;
3943	elem_mbs_len = extra[idx++];
3944	if (mbs_len == elem_mbs_len)
3945	{
3946	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3947	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3948	break;
3949	if (mbs_cnt == elem_mbs_len)
3950	/ Found the entry. /
3951	found = `1`;
3952	}
3953	/ Skip the byte sequence of the collating element. /
3954	idx += elem_mbs_len;
3955	/ Adjust for the alignment. /
3956	idx = (idx + `3`) & ~`3`;
3957	/ Skip the collation sequence value. /
3958	idx += sizeof (uint32_t);
3959	/ Skip the wide char sequence of the collating element. /
3960	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3961	/ If we found the entry, return the sequence value. /
3962	if (found)
3963	return (uint32_t ) (extra + idx);
3964	/ Skip the collation sequence value. /
3965	idx += sizeof (uint32_t);
3966	}
3967	return UINT_MAX;
3968	}
3969	}
3970	# endif /* _LIBC */
3971	#endif /* RE_ENABLE_I18N */
3972
3973	/ Check whether the node accepts the byte which is IDX-th*
3974	byte of the INPUT. /*
3975
3976	static int
3977	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
3978	int idx)
3979	{
3980	unsigned char ch;
3981	ch = re_string_byte_at (&mctx->input, idx);
3982	switch (node->type)
3983	{
3984	case CHARACTER:
3985	if (node->opr.c != ch)
3986	return `0`;
3987	break;
3988
3989	case SIMPLE_BRACKET:
3990	if (!bitset_contain (node->opr.sbcset, ch))
3991	return `0`;
3992	break;
3993
3994	#ifdef RE_ENABLE_I18N
3995	case OP_UTF8_PERIOD:
3996	if (ch >= `0x80`)
3997	return `0`;
3998	/ FALLTHROUGH /
3999	#endif
4000	case OP_PERIOD:
4001	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4002	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4003	return `0`;
4004	break;
4005
4006	default:
4007	return `0`;
4008	}
4009
4010	if (node->constraint)
4011	{
4012	/ The node has constraints. Check whether the current context*
4013	satisfies the constraints. /*
4014	unsigned int context = re_string_context_at (&mctx->input, idx,
4015	mctx->eflags);
4016	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4017	return `0`;
4018	}
4019
4020	return `1`;
4021	}
4022
4023	/ Extend the buffers, if the buffers have run out. /
4024
4025	static reg_errcode_t
4026	__attribute_warn_unused_result__
4027	extend_buffers (re_match_context_t mctx, int* min_len)
4028	{
4029	reg_errcode_t ret;
4030	re_string_t *pstr = &mctx->input;
4031
4032	/ Avoid overflow. /
4033	if (BE (INT_MAX / `2` / sizeof (re_dfastate_t *) <= pstr->bufs_len, `0`))
4034	return REG_ESPACE;
4035
4036	/ Double the lengthes of the buffers, but allocate at least MIN_LEN. /
4037	ret = re_string_realloc_buffers (pstr,
4038	MAX (min_len,
4039	MIN (pstr->len, pstr->bufs_len * `2`)));
4040	if (BE (ret != REG_NOERROR, `0`))
4041	return ret;
4042
4043	if (mctx->state_log != NULL)
4044	{
4045	/ And double the length of state_log. /
4046	/ XXX We have no indication of the size of this buffer. If this*
4047	allocation fail we have no indication that the state_log array
4048	does not have the right size. /*
4049	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4050	pstr->bufs_len + `1`);
4051	if (BE (new_array == NULL, `0`))
4052	return REG_ESPACE;
4053	mctx->state_log = new_array;
4054	}
4055
4056	/ Then reconstruct the buffers. /
4057	if (pstr->icase)
4058	{
4059	#ifdef RE_ENABLE_I18N
4060	if (pstr->mb_cur_max > `1`)
4061	{
4062	ret = build_wcs_upper_buffer (pstr);
4063	if (BE (ret != REG_NOERROR, `0`))
4064	return ret;
4065	}
4066	else
4067	#endif /* RE_ENABLE_I18N */
4068	build_upper_buffer (pstr);
4069	}
4070	else
4071	{
4072	#ifdef RE_ENABLE_I18N
4073	if (pstr->mb_cur_max > `1`)
4074	build_wcs_buffer (pstr);
4075	else
4076	#endif /* RE_ENABLE_I18N */
4077	{
4078	if (pstr->trans != NULL)
4079	re_string_translate_buffer (pstr);
4080	}
4081	}
4082	return REG_NOERROR;
4083	}
4084
4085
4086	/ Functions for matching context. /
4087
4088	/ Initialize MCTX. /
4089
4090	static reg_errcode_t
4091	__attribute_warn_unused_result__
4092	match_ctx_init (re_match_context_t mctx, int* eflags, int n)
4093	{
4094	mctx->eflags = eflags;
4095	mctx->match_last = -`1`;
4096	if (n > `0`)
4097	{
4098	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4099	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4100	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, `0`))
4101	return REG_ESPACE;
4102	}
4103	/ Already zero-ed by the caller.*
4104	else
4105	mctx->bkref_ents = NULL;
4106	mctx->nbkref_ents = 0;
4107	mctx->nsub_tops = 0; /*
4108	mctx->abkref_ents = n;
4109	mctx->max_mb_elem_len = `1`;
4110	mctx->asub_tops = n;
4111	return REG_NOERROR;
4112	}
4113
4114	/ Clean the entries which depend on the current input in MCTX.*
4115	This function must be invoked when the matcher changes the start index
4116	of the input, or changes the input string. /*
4117
4118	static void
4119	match_ctx_clean (re_match_context_t *mctx)
4120	{
4121	int st_idx;
4122	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4123	{
4124	int sl_idx;
4125	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4126	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4127	{
4128	re_sub_match_last_t *last = top->lasts[sl_idx];
4129	re_free (last->path.array);
4130	re_free (last);
4131	}
4132	re_free (top->lasts);
4133	if (top->path)
4134	{
4135	re_free (top->path->array);
4136	re_free (top->path);
4137	}
4138	free (top);
4139	}
4140
4141	mctx->nsub_tops = `0`;
4142	mctx->nbkref_ents = `0`;
4143	}
4144
4145	/ Free all the memory associated with MCTX. /
4146
4147	static void
4148	match_ctx_free (re_match_context_t *mctx)
4149	{
4150	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4151	match_ctx_clean (mctx);
4152	re_free (mctx->sub_tops);
4153	re_free (mctx->bkref_ents);
4154	}
4155
4156	/ Add a new backreference entry to MCTX.*
4157	Note that we assume that caller never call this function with duplicate
4158	entry, and call with STR_IDX which isn't smaller than any existing entry.
4159	*/
4160
4161	static reg_errcode_t
4162	__attribute_warn_unused_result__
4163	match_ctx_add_entry (re_match_context_t mctx, int* node, int str_idx, int from,
4164	int to)
4165	{
4166	if (mctx->nbkref_ents >= mctx->abkref_ents)
4167	{
4168	struct re_backref_cache_entry* new_entry;
4169	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4170	mctx->abkref_ents * `2`);
4171	if (BE (new_entry == NULL, `0`))
4172	{
4173	re_free (mctx->bkref_ents);
4174	return REG_ESPACE;
4175	}
4176	mctx->bkref_ents = new_entry;
4177	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4178	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4179	mctx->abkref_ents *= `2`;
4180	}
4181	if (mctx->nbkref_ents > `0`
4182	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4183	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4184
4185	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4186	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4187	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4188	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4189
4190	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4191	If bit N is clear, means that this entry won't epsilon-transition to
4192	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4193	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4194	such node.
4195
4196	A backreference does not epsilon-transition unless it is empty, so set
4197	to all zeros if FROM != TO. /*
4198	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4199	= (from == to ? ~`0` : `0`);
4200
4201	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4202	if (mctx->max_mb_elem_len < to - from)
4203	mctx->max_mb_elem_len = to - from;
4204	return REG_NOERROR;
4205	}
4206
4207	/ Search for the first entry which has the same str_idx, or -1 if none is*
4208	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4209
4210	static int
4211	search_cur_bkref_entry (const re_match_context_t mctx, int* str_idx)
4212	{
4213	int left, right, mid, last;
4214	last = right = mctx->nbkref_ents;
4215	for (left = `0`; left < right;)
4216	{
4217	mid = (left + right) / `2`;
4218	if (mctx->bkref_ents[mid].str_idx < str_idx)
4219	left = mid + `1`;
4220	else
4221	right = mid;
4222	}
4223	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4224	return left;
4225	else
4226	return -`1`;
4227	}
4228
4229	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4230	at STR_IDX. /*
4231
4232	static reg_errcode_t
4233	__attribute_warn_unused_result__
4234	match_ctx_add_subtop (re_match_context_t mctx, int* node, int str_idx)
4235	{
4236	#ifdef DEBUG
4237	assert (mctx->sub_tops != NULL);
4238	assert (mctx->asub_tops > `0`);
4239	#endif
4240	if (BE (mctx->nsub_tops == mctx->asub_tops, `0`))
4241	{
4242	int new_asub_tops = mctx->asub_tops * `2`;
4243	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4244	re_sub_match_top_t *,
4245	new_asub_tops);
4246	if (BE (new_array == NULL, `0`))
4247	return REG_ESPACE;
4248	mctx->sub_tops = new_array;
4249	mctx->asub_tops = new_asub_tops;
4250	}
4251	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4252	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, `0`))
4253	return REG_ESPACE;
4254	mctx->sub_tops[mctx->nsub_tops]->node = node;
4255	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4256	return REG_NOERROR;
4257	}
4258
4259	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4260	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. /*
4261
4262	static re_sub_match_last_t *
4263	match_ctx_add_sublast (re_sub_match_top_t subtop, int* node, int str_idx)
4264	{
4265	re_sub_match_last_t *new_entry;
4266	if (BE (subtop->nlasts == subtop->alasts, `0`))
4267	{
4268	int new_alasts = `2` * subtop->alasts + `1`;
4269	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4270	re_sub_match_last_t *,
4271	new_alasts);
4272	if (BE (new_array == NULL, `0`))
4273	return NULL;
4274	subtop->lasts = new_array;
4275	subtop->alasts = new_alasts;
4276	}
4277	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4278	if (BE (new_entry != NULL, `1`))
4279	{
4280	subtop->lasts[subtop->nlasts] = new_entry;
4281	new_entry->node = node;
4282	new_entry->str_idx = str_idx;
4283	++subtop->nlasts;
4284	}
4285	return new_entry;
4286	}
4287
4288	static void
4289	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4290	re_dfastate_t *limited_sts, int* last_node, int last_str_idx)
4291	{
4292	sctx->sifted_states = sifted_sts;
4293	sctx->limited_states = limited_sts;
4294	sctx->last_node = last_node;
4295	sctx->last_str_idx = last_str_idx;
4296	re_node_set_init_empty (&sctx->limits);
4297	}
4298

Browse the source code of glibc/posix/regexec.c