regexec.c source code [glibc/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
21	Idx n);
22	static void match_ctx_clean (re_match_context_t *mctx);
23	static void match_ctx_free (re_match_context_t *cache);
24	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25	Idx str_idx, Idx from, Idx to);
26	static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28	Idx str_idx);
29	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30	Idx node, Idx str_idx);
31	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
32	re_dfastate_t **limited_sts, Idx last_node,
33	Idx last_str_idx);
34	static reg_errcode_t re_search_internal (const regex_t *preg,
35	const char *string, Idx length,
36	Idx start, Idx last_start, Idx stop,
37	size_t nmatch, regmatch_t pmatch[],
38	int eflags);
39	static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40	const char *string1, Idx length1,
41	const char *string2, Idx length2,
42	Idx start, regoff_t range,
43	struct re_registers *regs,
44	Idx stop, bool ret_len);
45	static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46	const char *string, Idx length, Idx start,
47	regoff_t range, Idx stop,
48	struct re_registers *regs,
49	bool ret_len);
50	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
51	Idx nregs, int regs_allocated);
52	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53	static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54	Idx *p_match_first);
55	static Idx check_halt_state_context (const re_match_context_t *mctx,
56	const re_dfastate_t *state, Idx idx);
57	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
58	regmatch_t *prev_idx_match, Idx cur_node,
59	Idx cur_idx, Idx nmatch);
60	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61	Idx str_idx, Idx dest_node, Idx nregs,
62	regmatch_t *regs,
63	re_node_set *eps_via_nodes);
64	static reg_errcode_t set_regs (const regex_t *preg,
65	const re_match_context_t *mctx,
66	size_t nmatch, regmatch_t *pmatch,
67	bool fl_backtrack);
68	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70	#ifdef RE_ENABLE_I18N
71	static int sift_states_iter_mb (const re_match_context_t *mctx,
72	re_sift_context_t *sctx,
73	Idx node_idx, Idx str_idx, Idx max_str_idx);
74	#endif /* RE_ENABLE_I18N */
75	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76	re_sift_context_t *sctx);
77	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78	re_sift_context_t *sctx, Idx str_idx,
79	re_node_set *cur_dest);
80	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81	re_sift_context_t *sctx,
82	Idx str_idx,
83	re_node_set *dest_nodes);
84	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85	re_node_set *dest_nodes,
86	const re_node_set *candidates);
87	static bool check_dst_limits (const re_match_context_t *mctx,
88	const re_node_set *limits,
89	Idx dst_node, Idx dst_idx, Idx src_node,
90	Idx src_idx);
91	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92	int boundaries, Idx subexp_idx,
93	Idx from_node, Idx bkref_idx);
94	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95	Idx limit, Idx subexp_idx,
96	Idx node, Idx str_idx,
97	Idx bkref_idx);
98	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99	re_node_set *dest_nodes,
100	const re_node_set *candidates,
101	re_node_set *limits,
102	struct re_backref_cache_entry *bkref_ents,
103	Idx str_idx);
104	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105	re_sift_context_t *sctx,
106	Idx str_idx, const re_node_set *candidates);
107	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108	re_dfastate_t **dst,
109	re_dfastate_t **src, Idx num);
110	static re_dfastate_t find_recover_state (reg_errcode_t err,
111	re_match_context_t *mctx);
112	static re_dfastate_t transit_state (reg_errcode_t err,
113	re_match_context_t *mctx,
114	re_dfastate_t *state);
115	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
116	re_match_context_t *mctx,
117	re_dfastate_t *next_state);
118	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119	re_node_set *cur_nodes,
120	Idx str_idx);
121	#if 0
122	static re_dfastate_t transit_state_sb (reg_errcode_t err,
123	re_match_context_t *mctx,
124	re_dfastate_t *pstate);
125	#endif
126	#ifdef RE_ENABLE_I18N
127	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128	re_dfastate_t *pstate);
129	#endif /* RE_ENABLE_I18N */
130	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131	const re_node_set *nodes);
132	static reg_errcode_t get_subexp (re_match_context_t *mctx,
133	Idx bkref_node, Idx bkref_str_idx);
134	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135	const re_sub_match_top_t *sub_top,
136	re_sub_match_last_t *sub_last,
137	Idx bkref_node, Idx bkref_str);
138	static Idx find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
139	Idx subexp_idx, int type);
140	static reg_errcode_t check_arrival (re_match_context_t *mctx,
141	state_array_t *path, Idx top_node,
142	Idx top_str, Idx last_node, Idx last_str,
143	int type);
144	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145	Idx str_idx,
146	re_node_set *cur_nodes,
147	re_node_set *next_nodes);
148	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149	re_node_set *cur_nodes,
150	Idx ex_subexp, int type);
151	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152	re_node_set *dst_nodes,
153	Idx target, Idx ex_subexp,
154	int type);
155	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156	re_node_set *cur_nodes, Idx cur_str,
157	Idx subexp_num, int type);
158	static bool build_trtable (const re_dfa_t dfa, re_dfastate_t state);
159	#ifdef RE_ENABLE_I18N
160	static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161	const re_string_t *input, Idx idx);
162	# ifdef _LIBC
163	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164	size_t name_len);
165	# endif /* _LIBC */
166	#endif /* RE_ENABLE_I18N */
167	static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168	const re_dfastate_t *state,
169	re_node_set *states_node,
170	bitset_t *states_ch);
171	static bool check_node_accept (const re_match_context_t *mctx,
172	const re_token_t *node, Idx idx);
173	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
174
175	/ Entry point for POSIX code. /
176
177	/ regexec searches for a given pattern, specified by PREG, in the*
178	string STRING.
179
180	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181	'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182	least NMATCH elements, and we set them to the offsets of the
183	corresponding matched substrings.
184
185	EFLAGS specifies "execution flags" which affect matching: if
186	REG_NOTBOL is set, then ^ does not match at the beginning of the
187	string; if REG_NOTEOL is set, then $ does not match at the end.
188
189	We return 0 if we find a match and REG_NOMATCH if not. /*
190
191	int
192	regexec (const regex_t _Restrict_ preg, const* char *_Restrict_ string,
193	size_t nmatch, regmatch_t pmatch[], int eflags)
194	{
195	reg_errcode_t err;
196	Idx start, length;
197	re_dfa_t *dfa = preg->buffer;
198
199	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
200	return REG_BADPAT;
201
202	if (eflags & REG_STARTEND)
203	{
204	start = pmatch[`0`].rm_so;
205	length = pmatch[`0`].rm_eo;
206	}
207	else
208	{
209	start = `0`;
210	length = strlen (string);
211	}
212
213	lock_lock (dfa->lock);
214	if (preg->no_sub)
215	err = re_search_internal (preg, string, length, start, length,
216	length, `0`, NULL, eflags);
217	else
218	err = re_search_internal (preg, string, length, start, length,
219	length, nmatch, pmatch, eflags);
220	lock_unlock (dfa->lock);
221	return err != REG_NOERROR;
222	}
223
224	#ifdef _LIBC
225	libc_hidden_def (__regexec)
226
227	# include <shlib-compat.h>
228	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229
230	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231	__typeof__ (__regexec) __compat_regexec;
232
233	int
234	attribute_compat_text_section
235	__compat_regexec (const regex_t *_Restrict_ preg,
236	const char *_Restrict_ string, size_t nmatch,
237	regmatch_t pmatch[], int eflags)
238	{
239	return regexec (preg, string, nmatch, pmatch,
240	eflags & (REG_NOTBOL \| REG_NOTEOL));
241	}
242	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243	# endif
244	#endif
245
246	/ Entry points for GNU code. /
247
248	/ re_match, re_search, re_match_2, re_search_2*
249
250	The former two functions operate on STRING with length LENGTH,
251	while the later two operate on concatenation of STRING1 and STRING2
252	with lengths LENGTH1 and LENGTH2, respectively.
253
254	re_match() matches the compiled pattern in BUFP against the string,
255	starting at index START.
256
257	re_search() first tries matching at index START, then it tries to match
258	starting from index START + 1, and so on. The last start position tried
259	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
260	way as re_match().)
261
262	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263	the first STOP characters of the concatenation of the strings should be
264	concerned.
265
266	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267	and all groups is stored in REGS. (For the "_2" variants, the offsets are
268	computed relative to the concatenation, not relative to the individual
269	strings.)
270
271	On success, re_match* functions return the length of the match, re_search*
272	return the position of the start of the match. Return value -1 means no
273	match was found and -2 indicates an internal error. /*
274
275	regoff_t
276	re_match (struct re_pattern_buffer bufp, const* char *string, Idx length,
277	Idx start, struct re_registers *regs)
278	{
279	return re_search_stub (bufp, string, length, start, `0`, length, regs, true);
280	}
281	#ifdef _LIBC
282	weak_alias (__re_match, re_match)
283	#endif
284
285	regoff_t
286	re_search (struct re_pattern_buffer bufp, const* char *string, Idx length,
287	Idx start, regoff_t range, struct re_registers *regs)
288	{
289	return re_search_stub (bufp, string, length, start, range, length, regs,
290	false);
291	}
292	#ifdef _LIBC
293	weak_alias (__re_search, re_search)
294	#endif
295
296	regoff_t
297	re_match_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
298	const char *string2, Idx length2, Idx start,
299	struct re_registers *regs, Idx stop)
300	{
301	return re_search_2_stub (bufp, string1, length1, string2, length2,
302	start, `0`, regs, stop, true);
303	}
304	#ifdef _LIBC
305	weak_alias (__re_match_2, re_match_2)
306	#endif
307
308	regoff_t
309	re_search_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
310	const char *string2, Idx length2, Idx start, regoff_t range,
311	struct re_registers *regs, Idx stop)
312	{
313	return re_search_2_stub (bufp, string1, length1, string2, length2,
314	start, range, regs, stop, false);
315	}
316	#ifdef _LIBC
317	weak_alias (__re_search_2, re_search_2)
318	#endif
319
320	static regoff_t
321	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
322	Idx length1, const char *string2, Idx length2, Idx start,
323	regoff_t range, struct re_registers *regs,
324	Idx stop, bool ret_len)
325	{
326	const char *str;
327	regoff_t rval;
328	Idx len;
329	char *s = NULL;
330
331	if (BE ((length1 < `0` \|\| length2 < `0` \|\| stop < `0`
332	\|\| INT_ADD_WRAPV (length1, length2, &len)),
333	`0`))
334	return -`2`;
335
336	/ Concatenate the strings. /
337	if (length2 > `0`)
338	if (length1 > `0`)
339	{
340	s = re_malloc (char, len);
341
342	if (BE (s == NULL, `0`))
343	return -`2`;
344	#ifdef _LIBC
345	memcpy (__mempcpy (s, string1, length1), string2, length2);
346	#else
347	memcpy (s, string1, length1);
348	memcpy (s + length1, string2, length2);
349	#endif
350	str = s;
351	}
352	else
353	str = string2;
354	else
355	str = string1;
356
357	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
358	ret_len);
359	re_free (s);
360	return rval;
361	}
362
363	/ The parameters have the same meaning as those of re_search.*
364	Additional parameters:
365	If RET_LEN is true the length of the match is returned (re_match style);
366	otherwise the position of the match is returned. /*
367
368	static regoff_t
369	re_search_stub (struct re_pattern_buffer bufp, const* char *string, Idx length,
370	Idx start, regoff_t range, Idx stop, struct re_registers *regs,
371	bool ret_len)
372	{
373	reg_errcode_t result;
374	regmatch_t *pmatch;
375	Idx nregs;
376	regoff_t rval;
377	int eflags = `0`;
378	re_dfa_t *dfa = bufp->buffer;
379	Idx last_start = start + range;
380
381	/ Check for out-of-range. /
382	if (BE (start < `0` \|\| start > length, `0`))
383	return -`1`;
384	if (BE (length < last_start \|\| (`0` <= range && last_start < start), `0`))
385	last_start = length;
386	else if (BE (last_start < `0` \|\| (range < `0` && start <= last_start), `0`))
387	last_start = `0`;
388
389	lock_lock (dfa->lock);
390
391	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
392	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
393
394	/ Compile fastmap if we haven't yet. /
395	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
396	re_compile_fastmap (bufp);
397
398	if (BE (bufp->no_sub, `0`))
399	regs = NULL;
400
401	/ We need at least 1 register. /
402	if (regs == NULL)
403	nregs = `1`;
404	else if (BE (bufp->regs_allocated == REGS_FIXED
405	&& regs->num_regs <= bufp->re_nsub, `0`))
406	{
407	nregs = regs->num_regs;
408	if (BE (nregs < `1`, `0`))
409	{
410	/ Nothing can be copied to regs. /
411	regs = NULL;
412	nregs = `1`;
413	}
414	}
415	else
416	nregs = bufp->re_nsub + `1`;
417	pmatch = re_malloc (regmatch_t, nregs);
418	if (BE (pmatch == NULL, `0`))
419	{
420	rval = -`2`;
421	goto out;
422	}
423
424	result = re_search_internal (bufp, string, length, start, last_start, stop,
425	nregs, pmatch, eflags);
426
427	rval = `0`;
428
429	/ I hope we needn't fill their regs with -1's when no match was found. /
430	if (result != REG_NOERROR)
431	rval = result == REG_NOMATCH ? -`1` : -`2`;
432	else if (regs != NULL)
433	{
434	/ If caller wants register contents data back, copy them. /
435	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
436	bufp->regs_allocated);
437	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, `0`))
438	rval = -`2`;
439	}
440
441	if (BE (rval == `0`, `1`))
442	{
443	if (ret_len)
444	{
445	assert (pmatch[`0`].rm_so == start);
446	rval = pmatch[`0`].rm_eo - start;
447	}
448	else
449	rval = pmatch[`0`].rm_so;
450	}
451	re_free (pmatch);
452	out:
453	lock_unlock (dfa->lock);
454	return rval;
455	}
456
457	static unsigned
458	re_copy_regs (struct re_registers regs, regmatch_t pmatch, Idx nregs,
459	int regs_allocated)
460	{
461	int rval = REGS_REALLOCATE;
462	Idx i;
463	Idx need_regs = nregs + `1`;
464	/ We need one extra element beyond 'num_regs' for the '-1' marker GNU code*
465	uses. /*
466
467	/ Have the register data arrays been allocated? /
468	if (regs_allocated == REGS_UNALLOCATED)
469	{ / No. So allocate them with malloc. /
470	regs->start = re_malloc (regoff_t, need_regs);
471	if (BE (regs->start == NULL, `0`))
472	return REGS_UNALLOCATED;
473	regs->end = re_malloc (regoff_t, need_regs);
474	if (BE (regs->end == NULL, `0`))
475	{
476	re_free (regs->start);
477	return REGS_UNALLOCATED;
478	}
479	regs->num_regs = need_regs;
480	}
481	else if (regs_allocated == REGS_REALLOCATE)
482	{ / Yes. If we need more elements than were already*
483	allocated, reallocate them. If we need fewer, just
484	leave it alone. /*
485	if (BE (need_regs > regs->num_regs, `0`))
486	{
487	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
488	regoff_t *new_end;
489	if (BE (new_start == NULL, `0`))
490	return REGS_UNALLOCATED;
491	new_end = re_realloc (regs->end, regoff_t, need_regs);
492	if (BE (new_end == NULL, `0`))
493	{
494	re_free (new_start);
495	return REGS_UNALLOCATED;
496	}
497	regs->start = new_start;
498	regs->end = new_end;
499	regs->num_regs = need_regs;
500	}
501	}
502	else
503	{
504	assert (regs_allocated == REGS_FIXED);
505	/ This function may not be called with REGS_FIXED and nregs too big. /
506	assert (regs->num_regs >= nregs);
507	rval = REGS_FIXED;
508	}
509
510	/ Copy the regs. /
511	for (i = `0`; i < nregs; ++i)
512	{
513	regs->start[i] = pmatch[i].rm_so;
514	regs->end[i] = pmatch[i].rm_eo;
515	}
516	for ( ; i < regs->num_regs; ++i)
517	regs->start[i] = regs->end[i] = -`1`;
518
519	return rval;
520	}
521
522	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
523	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
524	this memory for recording register information. STARTS and ENDS
525	must be allocated using the malloc library routine, and must each
526	be at least NUM_REGS sizeof (regoff_t) bytes long.*
527
528	If NUM_REGS == 0, then subsequent matches should allocate their own
529	register data.
530
531	Unless this function is called, the first search or match using
532	PATTERN_BUFFER will allocate its own register data, without
533	freeing the old data. /*
534
535	void
536	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
537	__re_size_t num_regs, regoff_t starts, regoff_t ends)
538	{
539	if (num_regs)
540	{
541	bufp->regs_allocated = REGS_REALLOCATE;
542	regs->num_regs = num_regs;
543	regs->start = starts;
544	regs->end = ends;
545	}
546	else
547	{
548	bufp->regs_allocated = REGS_UNALLOCATED;
549	regs->num_regs = `0`;
550	regs->start = regs->end = NULL;
551	}
552	}
553	#ifdef _LIBC
554	weak_alias (__re_set_registers, re_set_registers)
555	#endif
556
557	/ Entry points compatible with 4.2 BSD regex library. We don't define*
558	them unless specifically requested. /*
559
560	#if defined _REGEX_RE_COMP \|\| defined _LIBC
561	int
562	# ifdef _LIBC
563	weak_function
564	# endif
565	re_exec (const char *s)
566	{
567	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
568	}
569	#endif /* _REGEX_RE_COMP */
570
571	/ Internal entry point. /
572
573	/ Searches for a compiled pattern PREG in the string STRING, whose*
574	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
575	meaning as with regexec. LAST_START is START + RANGE, where
576	START and RANGE have the same meaning as with re_search.
577	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
578	otherwise return the error code.
579	Note: We assume front end functions already check ranges.
580	(0 <= LAST_START && LAST_START <= LENGTH) /*
581
582	static reg_errcode_t
583	__attribute_warn_unused_result__
584	re_search_internal (const regex_t preg, const* char *string, Idx length,
585	Idx start, Idx last_start, Idx stop, size_t nmatch,
586	regmatch_t pmatch[], int eflags)
587	{
588	reg_errcode_t err;
589	const re_dfa_t *dfa = preg->buffer;
590	Idx left_lim, right_lim;
591	int incr;
592	bool fl_longest_match;
593	int match_kind;
594	Idx match_first;
595	Idx match_last = -`1`;
596	Idx extra_nmatch;
597	bool sb;
598	int ch;
599	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
600	re_match_context_t mctx = { .dfa = dfa };
601	#else
602	re_match_context_t mctx;
603	#endif
604	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
605	&& start != last_start && !preg->can_be_null)
606	? preg->fastmap : NULL);
607	RE_TRANSLATE_TYPE t = preg->translate;
608
609	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
610	memset (&mctx, `'\0'`, sizeof (re_match_context_t));
611	mctx.dfa = dfa;
612	#endif
613
614	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
615	nmatch -= extra_nmatch;
616
617	/ Check if the DFA haven't been compiled. /
618	if (BE (preg->used == `0` \|\| dfa->init_state == NULL
619	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
620	\|\| dfa->init_state_begbuf == NULL, `0`))
621	return REG_NOMATCH;
622
623	#ifdef DEBUG
624	/ We assume front-end functions already check them. /
625	assert (`0` <= last_start && last_start <= length);
626	#endif
627
628	/ If initial states with non-begbuf contexts have no elements,*
629	the regex must be anchored. If preg->newline_anchor is set,
630	we'll never use init_state_nl, so do not check it. /*
631	if (dfa->init_state->nodes.nelem == `0`
632	&& dfa->init_state_word->nodes.nelem == `0`
633	&& (dfa->init_state_nl->nodes.nelem == `0`
634	\|\| !preg->newline_anchor))
635	{
636	if (start != `0` && last_start != `0`)
637	return REG_NOMATCH;
638	start = last_start = `0`;
639	}
640
641	/ We must check the longest matching, if nmatch > 0. /
642	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
643
644	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
645	preg->translate, (preg->syntax & RE_ICASE) != `0`,
646	dfa);
647	if (BE (err != REG_NOERROR, `0`))
648	goto free_return;
649	mctx.input.stop = stop;
650	mctx.input.raw_stop = stop;
651	mctx.input.newline_anchor = preg->newline_anchor;
652
653	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
654	if (BE (err != REG_NOERROR, `0`))
655	goto free_return;
656
657	/ We will log all the DFA states through which the dfa pass,*
658	if nmatch > 1, or this dfa has "multibyte node", which is a
659	back-reference or a node which can accept multibyte character or
660	multi character collating element. /*
661	if (nmatch > `1` \|\| dfa->has_mb_node)
662	{
663	/ Avoid overflow. /
664	if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
665	<= mctx.input.bufs_len), `0`))
666	{
667	err = REG_ESPACE;
668	goto free_return;
669	}
670
671	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
672	if (BE (mctx.state_log == NULL, `0`))
673	{
674	err = REG_ESPACE;
675	goto free_return;
676	}
677	}
678	else
679	mctx.state_log = NULL;
680
681	match_first = start;
682	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
683	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
684
685	/ Check incrementally whether the input string matches. /
686	incr = (last_start < start) ? -`1` : `1`;
687	left_lim = (last_start < start) ? last_start : start;
688	right_lim = (last_start < start) ? start : last_start;
689	sb = dfa->mb_cur_max == `1`;
690	match_kind =
691	(fastmap
692	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
693	\| (start <= last_start ? `2` : `0`)
694	\| (t != NULL ? `1` : `0`))
695	: `8`);
696
697	for (;; match_first += incr)
698	{
699	err = REG_NOMATCH;
700	if (match_first < left_lim \|\| right_lim < match_first)
701	goto free_return;
702
703	/ Advance as rapidly as possible through the string, until we*
704	find a plausible place to start matching. This may be done
705	with varying efficiency, so there are various possibilities:
706	only the most common of them are specialized, in order to
707	save on code size. We use a switch statement for speed. /*
708	switch (match_kind)
709	{
710	case `8`:
711	/ No fastmap. /
712	break;
713
714	case `7`:
715	/ Fastmap with single-byte translation, match forward. /
716	while (BE (match_first < right_lim, `1`)
717	&& !fastmap[t[(unsigned char) string[match_first]]])
718	++match_first;
719	goto forward_match_found_start_or_reached_end;
720
721	case `6`:
722	/ Fastmap without translation, match forward. /
723	while (BE (match_first < right_lim, `1`)
724	&& !fastmap[(unsigned char) string[match_first]])
725	++match_first;
726
727	forward_match_found_start_or_reached_end:
728	if (BE (match_first == right_lim, `0`))
729	{
730	ch = match_first >= length
731	? `0` : (unsigned char) string[match_first];
732	if (!fastmap[t ? t[ch] : ch])
733	goto free_return;
734	}
735	break;
736
737	case `4`:
738	case `5`:
739	/ Fastmap without multi-byte translation, match backwards. /
740	while (match_first >= left_lim)
741	{
742	ch = match_first >= length
743	? `0` : (unsigned char) string[match_first];
744	if (fastmap[t ? t[ch] : ch])
745	break;
746	--match_first;
747	}
748	if (match_first < left_lim)
749	goto free_return;
750	break;
751
752	default:
753	/ In this case, we can't determine easily the current byte,*
754	since it might be a component byte of a multibyte
755	character. Then we use the constructed buffer instead. /*
756	for (;;)
757	{
758	/ If MATCH_FIRST is out of the valid range, reconstruct the*
759	buffers. /*
760	__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
761	if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, `0`))
762	{
763	err = re_string_reconstruct (&mctx.input, match_first,
764	eflags);
765	if (BE (err != REG_NOERROR, `0`))
766	goto free_return;
767
768	offset = match_first - mctx.input.raw_mbs_idx;
769	}
770	/ If MATCH_FIRST is out of the buffer, leave it as '\0'.*
771	Note that MATCH_FIRST must not be smaller than 0. /*
772	ch = (match_first >= length
773	? `0` : re_string_byte_at (&mctx.input, offset));
774	if (fastmap[ch])
775	break;
776	match_first += incr;
777	if (match_first < left_lim \|\| match_first > right_lim)
778	{
779	err = REG_NOMATCH;
780	goto free_return;
781	}
782	}
783	break;
784	}
785
786	/ Reconstruct the buffers so that the matcher can assume that*
787	the matching starts from the beginning of the buffer. /*
788	err = re_string_reconstruct (&mctx.input, match_first, eflags);
789	if (BE (err != REG_NOERROR, `0`))
790	goto free_return;
791
792	#ifdef RE_ENABLE_I18N
793	/ Don't consider this char as a possible match start if it part,*
794	yet isn't the head, of a multibyte character. /*
795	if (!sb && !re_string_first_byte (&mctx.input, `0`))
796	continue;
797	#endif
798
799	/ It seems to be appropriate one, then use the matcher. /
800	/ We assume that the matching starts from 0. /
801	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
802	match_last = check_matching (&mctx, fl_longest_match,
803	start <= last_start ? &match_first : NULL);
804	if (match_last != -`1`)
805	{
806	if (BE (match_last == -`2`, `0`))
807	{
808	err = REG_ESPACE;
809	goto free_return;
810	}
811	else
812	{
813	mctx.match_last = match_last;
814	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
815	{
816	re_dfastate_t *pstate = mctx.state_log[match_last];
817	mctx.last_node = check_halt_state_context (&mctx, pstate,
818	match_last);
819	}
820	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
821	\|\| dfa->nbackref)
822	{
823	err = prune_impossible_nodes (&mctx);
824	if (err == REG_NOERROR)
825	break;
826	if (BE (err != REG_NOMATCH, `0`))
827	goto free_return;
828	match_last = -`1`;
829	}
830	else
831	break; / We found a match. /
832	}
833	}
834
835	match_ctx_clean (&mctx);
836	}
837
838	#ifdef DEBUG
839	assert (match_last != -`1`);
840	assert (err == REG_NOERROR);
841	#endif
842
843	/ Set pmatch[] if we need. /
844	if (nmatch > `0`)
845	{
846	Idx reg_idx;
847
848	/ Initialize registers. /
849	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
850	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
851
852	/ Set the points where matching start/end. /
853	pmatch[`0`].rm_so = `0`;
854	pmatch[`0`].rm_eo = mctx.match_last;
855	/ FIXME: This function should fail if mctx.match_last exceeds*
856	the maximum possible regoff_t value. We need a new error
857	code REG_OVERFLOW. /*
858
859	if (!preg->no_sub && nmatch > `1`)
860	{
861	err = set_regs (preg, &mctx, nmatch, pmatch,
862	dfa->has_plural_match && dfa->nbackref > `0`);
863	if (BE (err != REG_NOERROR, `0`))
864	goto free_return;
865	}
866
867	/ At last, add the offset to each register, since we slid*
868	the buffers so that we could assume that the matching starts
869	from 0. /*
870	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
871	if (pmatch[reg_idx].rm_so != -`1`)
872	{
873	#ifdef RE_ENABLE_I18N
874	if (BE (mctx.input.offsets_needed != `0`, `0`))
875	{
876	pmatch[reg_idx].rm_so =
877	(pmatch[reg_idx].rm_so == mctx.input.valid_len
878	? mctx.input.valid_raw_len
879	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
880	pmatch[reg_idx].rm_eo =
881	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
882	? mctx.input.valid_raw_len
883	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
884	}
885	#else
886	assert (mctx.input.offsets_needed == `0`);
887	#endif
888	pmatch[reg_idx].rm_so += match_first;
889	pmatch[reg_idx].rm_eo += match_first;
890	}
891	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
892	{
893	pmatch[nmatch + reg_idx].rm_so = -`1`;
894	pmatch[nmatch + reg_idx].rm_eo = -`1`;
895	}
896
897	if (dfa->subexp_map)
898	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
899	if (dfa->subexp_map[reg_idx] != reg_idx)
900	{
901	pmatch[reg_idx + `1`].rm_so
902	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
903	pmatch[reg_idx + `1`].rm_eo
904	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
905	}
906	}
907
908	free_return:
909	re_free (mctx.state_log);
910	if (dfa->nbackref)
911	match_ctx_free (&mctx);
912	re_string_destruct (&mctx.input);
913	return err;
914	}
915
916	static reg_errcode_t
917	__attribute_warn_unused_result__
918	prune_impossible_nodes (re_match_context_t *mctx)
919	{
920	const re_dfa_t *const dfa = mctx->dfa;
921	Idx halt_node, match_last;
922	reg_errcode_t ret;
923	re_dfastate_t **sifted_states;
924	re_dfastate_t **lim_states = NULL;
925	re_sift_context_t sctx;
926	#ifdef DEBUG
927	assert (mctx->state_log != NULL);
928	#endif
929	match_last = mctx->match_last;
930	halt_node = mctx->last_node;
931
932	/ Avoid overflow. /
933	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, `0`))
934	return REG_ESPACE;
935
936	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
937	if (BE (sifted_states == NULL, `0`))
938	{
939	ret = REG_ESPACE;
940	goto free_return;
941	}
942	if (dfa->nbackref)
943	{
944	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
945	if (BE (lim_states == NULL, `0`))
946	{
947	ret = REG_ESPACE;
948	goto free_return;
949	}
950	while (`1`)
951	{
952	memset (lim_states, `'\0'`,
953	sizeof (re_dfastate_t ) (match_last + `1`));
954	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
955	match_last);
956	ret = sift_states_backward (mctx, &sctx);
957	re_node_set_free (&sctx.limits);
958	if (BE (ret != REG_NOERROR, `0`))
959	goto free_return;
960	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
961	break;
962	do
963	{
964	--match_last;
965	if (match_last < `0`)
966	{
967	ret = REG_NOMATCH;
968	goto free_return;
969	}
970	} while (mctx->state_log[match_last] == NULL
971	\|\| !mctx->state_log[match_last]->halt);
972	halt_node = check_halt_state_context (mctx,
973	mctx->state_log[match_last],
974	match_last);
975	}
976	ret = merge_state_array (dfa, sifted_states, lim_states,
977	match_last + `1`);
978	re_free (lim_states);
979	lim_states = NULL;
980	if (BE (ret != REG_NOERROR, `0`))
981	goto free_return;
982	}
983	else
984	{
985	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
986	ret = sift_states_backward (mctx, &sctx);
987	re_node_set_free (&sctx.limits);
988	if (BE (ret != REG_NOERROR, `0`))
989	goto free_return;
990	if (sifted_states[`0`] == NULL)
991	{
992	ret = REG_NOMATCH;
993	goto free_return;
994	}
995	}
996	re_free (mctx->state_log);
997	mctx->state_log = sifted_states;
998	sifted_states = NULL;
999	mctx->last_node = halt_node;
1000	mctx->match_last = match_last;
1001	ret = REG_NOERROR;
1002	free_return:
1003	re_free (sifted_states);
1004	re_free (lim_states);
1005	return ret;
1006	}
1007
1008	/ Acquire an initial state and return it.*
1009	We must select appropriate initial state depending on the context,
1010	since initial states may have constraints like "\<", "^", etc.. /*
1011
1012	static inline re_dfastate_t *
1013	__attribute__ ((always_inline))
1014	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1015	Idx idx)
1016	{
1017	const re_dfa_t *const dfa = mctx->dfa;
1018	if (dfa->init_state->has_constraint)
1019	{
1020	unsigned int context;
1021	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1022	if (IS_WORD_CONTEXT (context))
1023	return dfa->init_state_word;
1024	else if (IS_ORDINARY_CONTEXT (context))
1025	return dfa->init_state;
1026	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1027	return dfa->init_state_begbuf;
1028	else if (IS_NEWLINE_CONTEXT (context))
1029	return dfa->init_state_nl;
1030	else if (IS_BEGBUF_CONTEXT (context))
1031	{
1032	/ It is relatively rare case, then calculate on demand. /
1033	return re_acquire_state_context (err, dfa,
1034	dfa->init_state->entrance_nodes,
1035	context);
1036	}
1037	else
1038	/ Must not happen? /
1039	return dfa->init_state;
1040	}
1041	else
1042	return dfa->init_state;
1043	}
1044
1045	/ Check whether the regular expression match input string INPUT or not,*
1046	and return the index where the matching end. Return -1 if
1047	there is no match, and return -2 in case of an error.
1048	FL_LONGEST_MATCH means we want the POSIX longest matching.
1049	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1050	next place where we may want to try matching.
1051	Note that the matcher assumes that the matching starts from the current
1052	index of the buffer. /*
1053
1054	static Idx
1055	__attribute_warn_unused_result__
1056	check_matching (re_match_context_t *mctx, bool fl_longest_match,
1057	Idx *p_match_first)
1058	{
1059	const re_dfa_t *const dfa = mctx->dfa;
1060	reg_errcode_t err;
1061	Idx match = `0`;
1062	Idx match_last = -`1`;
1063	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1064	re_dfastate_t *cur_state;
1065	bool at_init_state = p_match_first != NULL;
1066	Idx next_start_idx = cur_str_idx;
1067
1068	err = REG_NOERROR;
1069	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1070	/ An initial state must not be NULL (invalid). /
1071	if (BE (cur_state == NULL, `0`))
1072	{
1073	assert (err == REG_ESPACE);
1074	return -`2`;
1075	}
1076
1077	if (mctx->state_log != NULL)
1078	{
1079	mctx->state_log[cur_str_idx] = cur_state;
1080
1081	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1082	later. E.g. Processing back references. /*
1083	if (BE (dfa->nbackref, `0`))
1084	{
1085	at_init_state = false;
1086	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1087	if (BE (err != REG_NOERROR, `0`))
1088	return err;
1089
1090	if (cur_state->has_backref)
1091	{
1092	err = transit_state_bkref (mctx, &cur_state->nodes);
1093	if (BE (err != REG_NOERROR, `0`))
1094	return err;
1095	}
1096	}
1097	}
1098
1099	/ If the RE accepts NULL string. /
1100	if (BE (cur_state->halt, `0`))
1101	{
1102	if (!cur_state->has_constraint
1103	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1104	{
1105	if (!fl_longest_match)
1106	return cur_str_idx;
1107	else
1108	{
1109	match_last = cur_str_idx;
1110	match = `1`;
1111	}
1112	}
1113	}
1114
1115	while (!re_string_eoi (&mctx->input))
1116	{
1117	re_dfastate_t *old_state = cur_state;
1118	Idx next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1119
1120	if ((BE (next_char_idx >= mctx->input.bufs_len, `0`)
1121	&& mctx->input.bufs_len < mctx->input.len)
1122	\|\| (BE (next_char_idx >= mctx->input.valid_len, `0`)
1123	&& mctx->input.valid_len < mctx->input.len))
1124	{
1125	err = extend_buffers (mctx, next_char_idx + `1`);
1126	if (BE (err != REG_NOERROR, `0`))
1127	{
1128	assert (err == REG_ESPACE);
1129	return -`2`;
1130	}
1131	}
1132
1133	cur_state = transit_state (&err, mctx, cur_state);
1134	if (mctx->state_log != NULL)
1135	cur_state = merge_state_with_log (&err, mctx, cur_state);
1136
1137	if (cur_state == NULL)
1138	{
1139	/ Reached the invalid state or an error. Try to recover a valid*
1140	state using the state log, if available and if we have not
1141	already found a valid (even if not the longest) match. /*
1142	if (BE (err != REG_NOERROR, `0`))
1143	return -`2`;
1144
1145	if (mctx->state_log == NULL
1146	\|\| (match && !fl_longest_match)
1147	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1148	break;
1149	}
1150
1151	if (BE (at_init_state, `0`))
1152	{
1153	if (old_state == cur_state)
1154	next_start_idx = next_char_idx;
1155	else
1156	at_init_state = false;
1157	}
1158
1159	if (cur_state->halt)
1160	{
1161	/ Reached a halt state.*
1162	Check the halt state can satisfy the current context. /*
1163	if (!cur_state->has_constraint
1164	\|\| check_halt_state_context (mctx, cur_state,
1165	re_string_cur_idx (&mctx->input)))
1166	{
1167	/ We found an appropriate halt state. /
1168	match_last = re_string_cur_idx (&mctx->input);
1169	match = `1`;
1170
1171	/ We found a match, do not modify match_first below. /
1172	p_match_first = NULL;
1173	if (!fl_longest_match)
1174	break;
1175	}
1176	}
1177	}
1178
1179	if (p_match_first)
1180	*p_match_first += next_start_idx;
1181
1182	return match_last;
1183	}
1184
1185	/ Check NODE match the current context. /
1186
1187	static bool
1188	check_halt_node_context (const re_dfa_t dfa, Idx node, unsigned* int context)
1189	{
1190	re_token_type_t type = dfa->nodes[node].type;
1191	unsigned int constraint = dfa->nodes[node].constraint;
1192	if (type != END_OF_RE)
1193	return false;
1194	if (!constraint)
1195	return true;
1196	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1197	return false;
1198	return true;
1199	}
1200
1201	/ Check the halt state STATE match the current context.*
1202	Return 0 if not match, if the node, STATE has, is a halt node and
1203	match the context, return the node. /*
1204
1205	static Idx
1206	check_halt_state_context (const re_match_context_t *mctx,
1207	const re_dfastate_t *state, Idx idx)
1208	{
1209	Idx i;
1210	unsigned int context;
1211	#ifdef DEBUG
1212	assert (state->halt);
1213	#endif
1214	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1215	for (i = `0`; i < state->nodes.nelem; ++i)
1216	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1217	return state->nodes.elems[i];
1218	return `0`;
1219	}
1220
1221	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1222	corresponding to the DFA).
1223	Return the destination node, and update EPS_VIA_NODES;
1224	return -1 in case of errors. /*
1225
1226	static Idx
1227	proceed_next_node (const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1228	Idx pidx, Idx node, re_node_set eps_via_nodes,
1229	struct re_fail_stack_t *fs)
1230	{
1231	const re_dfa_t *const dfa = mctx->dfa;
1232	Idx i;
1233	bool ok;
1234	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1235	{
1236	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1237	re_node_set *edests = &dfa->edests[node];
1238	Idx dest_node;
1239	ok = re_node_set_insert (eps_via_nodes, node);
1240	if (BE (! ok, `0`))
1241	return -`2`;
1242	/ Pick up a valid destination, or return -1 if none*
1243	is found. /*
1244	for (dest_node = -`1`, i = `0`; i < edests->nelem; ++i)
1245	{
1246	Idx candidate = edests->elems[i];
1247	if (!re_node_set_contains (cur_nodes, candidate))
1248	continue;
1249	if (dest_node == -`1`)
1250	dest_node = candidate;
1251
1252	else
1253	{
1254	/ In order to avoid infinite loop like "(a)", return the second*
1255	epsilon-transition if the first was already considered. /*
1256	if (re_node_set_contains (eps_via_nodes, dest_node))
1257	return candidate;
1258
1259	/ Otherwise, push the second epsilon-transition on the fail stack. /
1260	else if (fs != NULL
1261	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1262	eps_via_nodes))
1263	return -`2`;
1264
1265	/ We know we are going to exit. /
1266	break;
1267	}
1268	}
1269	return dest_node;
1270	}
1271	else
1272	{
1273	Idx naccepted = `0`;
1274	re_token_type_t type = dfa->nodes[node].type;
1275
1276	#ifdef RE_ENABLE_I18N
1277	if (dfa->nodes[node].accept_mb)
1278	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1279	else
1280	#endif /* RE_ENABLE_I18N */
1281	if (type == OP_BACK_REF)
1282	{
1283	Idx subexp_idx = dfa->nodes[node].opr.idx + `1`;
1284	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1285	if (fs != NULL)
1286	{
1287	if (regs[subexp_idx].rm_so == -`1` \|\| regs[subexp_idx].rm_eo == -`1`)
1288	return -`1`;
1289	else if (naccepted)
1290	{
1291	char buf = (char* *) re_string_get_buffer (&mctx->input);
1292	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1293	naccepted) != `0`)
1294	return -`1`;
1295	}
1296	}
1297
1298	if (naccepted == `0`)
1299	{
1300	Idx dest_node;
1301	ok = re_node_set_insert (eps_via_nodes, node);
1302	if (BE (! ok, `0`))
1303	return -`2`;
1304	dest_node = dfa->edests[node].elems[`0`];
1305	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1306	dest_node))
1307	return dest_node;
1308	}
1309	}
1310
1311	if (naccepted != `0`
1312	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1313	{
1314	Idx dest_node = dfa->nexts[node];
1315	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1316	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1317	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1318	dest_node)))
1319	return -`1`;
1320	re_node_set_empty (eps_via_nodes);
1321	return dest_node;
1322	}
1323	}
1324	return -`1`;
1325	}
1326
1327	static reg_errcode_t
1328	__attribute_warn_unused_result__
1329	push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1330	Idx nregs, regmatch_t regs, re_node_set eps_via_nodes)
1331	{
1332	reg_errcode_t err;
1333	Idx num = fs->num++;
1334	if (fs->num == fs->alloc)
1335	{
1336	struct re_fail_stack_ent_t *new_array;
1337	new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1338	fs->alloc * `2`);
1339	if (new_array == NULL)
1340	return REG_ESPACE;
1341	fs->alloc *= `2`;
1342	fs->stack = new_array;
1343	}
1344	fs->stack[num].idx = str_idx;
1345	fs->stack[num].node = dest_node;
1346	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1347	if (fs->stack[num].regs == NULL)
1348	return REG_ESPACE;
1349	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1350	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1351	return err;
1352	}
1353
1354	static Idx
1355	pop_fail_stack (struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1356	regmatch_t regs, re_node_set eps_via_nodes)
1357	{
1358	Idx num = --fs->num;
1359	assert (num >= `0`);
1360	*pidx = fs->stack[num].idx;
1361	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1362	re_node_set_free (eps_via_nodes);
1363	re_free (fs->stack[num].regs);
1364	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1365	return fs->stack[num].node;
1366	}
1367
1368	/ Set the positions where the subexpressions are starts/ends to registers*
1369	PMATCH.
1370	Note: We assume that pmatch[0] is already set, and
1371	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1372
1373	static reg_errcode_t
1374	__attribute_warn_unused_result__
1375	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1376	regmatch_t *pmatch, bool fl_backtrack)
1377	{
1378	const re_dfa_t *dfa = preg->buffer;
1379	Idx idx, cur_node;
1380	re_node_set eps_via_nodes;
1381	struct re_fail_stack_t *fs;
1382	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1383	regmatch_t *prev_idx_match;
1384	bool prev_idx_match_malloced = false;
1385
1386	#ifdef DEBUG
1387	assert (nmatch > `1`);
1388	assert (mctx->state_log != NULL);
1389	#endif
1390	if (fl_backtrack)
1391	{
1392	fs = &fs_body;
1393	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1394	if (fs->stack == NULL)
1395	return REG_ESPACE;
1396	}
1397	else
1398	fs = NULL;
1399
1400	cur_node = dfa->init_node;
1401	re_node_set_init_empty (&eps_via_nodes);
1402
1403	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1404	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1405	else
1406	{
1407	prev_idx_match = re_malloc (regmatch_t, nmatch);
1408	if (prev_idx_match == NULL)
1409	{
1410	free_fail_stack_return (fs);
1411	return REG_ESPACE;
1412	}
1413	prev_idx_match_malloced = true;
1414	}
1415	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1416
1417	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1418	{
1419	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1420
1421	if (idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1422	{
1423	Idx reg_idx;
1424	if (fs)
1425	{
1426	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1427	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1428	break;
1429	if (reg_idx == nmatch)
1430	{
1431	re_node_set_free (&eps_via_nodes);
1432	if (prev_idx_match_malloced)
1433	re_free (prev_idx_match);
1434	return free_fail_stack_return (fs);
1435	}
1436	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1437	&eps_via_nodes);
1438	}
1439	else
1440	{
1441	re_node_set_free (&eps_via_nodes);
1442	if (prev_idx_match_malloced)
1443	re_free (prev_idx_match);
1444	return REG_NOERROR;
1445	}
1446	}
1447
1448	/ Proceed to next node. /
1449	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1450	&eps_via_nodes, fs);
1451
1452	if (BE (cur_node < `0`, `0`))
1453	{
1454	if (BE (cur_node == -`2`, `0`))
1455	{
1456	re_node_set_free (&eps_via_nodes);
1457	if (prev_idx_match_malloced)
1458	re_free (prev_idx_match);
1459	free_fail_stack_return (fs);
1460	return REG_ESPACE;
1461	}
1462	if (fs)
1463	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1464	&eps_via_nodes);
1465	else
1466	{
1467	re_node_set_free (&eps_via_nodes);
1468	if (prev_idx_match_malloced)
1469	re_free (prev_idx_match);
1470	return REG_NOMATCH;
1471	}
1472	}
1473	}
1474	re_node_set_free (&eps_via_nodes);
1475	if (prev_idx_match_malloced)
1476	re_free (prev_idx_match);
1477	return free_fail_stack_return (fs);
1478	}
1479
1480	static reg_errcode_t
1481	free_fail_stack_return (struct re_fail_stack_t *fs)
1482	{
1483	if (fs)
1484	{
1485	Idx fs_idx;
1486	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1487	{
1488	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1489	re_free (fs->stack[fs_idx].regs);
1490	}
1491	re_free (fs->stack);
1492	}
1493	return REG_NOERROR;
1494	}
1495
1496	static void
1497	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1498	regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1499	{
1500	int type = dfa->nodes[cur_node].type;
1501	if (type == OP_OPEN_SUBEXP)
1502	{
1503	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1504
1505	/ We are at the first node of this sub expression. /
1506	if (reg_num < nmatch)
1507	{
1508	pmatch[reg_num].rm_so = cur_idx;
1509	pmatch[reg_num].rm_eo = -`1`;
1510	}
1511	}
1512	else if (type == OP_CLOSE_SUBEXP)
1513	{
1514	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1515	if (reg_num < nmatch)
1516	{
1517	/ We are at the last node of this sub expression. /
1518	if (pmatch[reg_num].rm_so < cur_idx)
1519	{
1520	pmatch[reg_num].rm_eo = cur_idx;
1521	/ This is a non-empty match or we are not inside an optional*
1522	subexpression. Accept this right away. /*
1523	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1524	}
1525	else
1526	{
1527	if (dfa->nodes[cur_node].opt_subexp
1528	&& prev_idx_match[reg_num].rm_so != -`1`)
1529	/ We transited through an empty match for an optional*
1530	subexpression, like (a?), and this is not the subexp's*
1531	first match. Copy back the old content of the registers
1532	so that matches of an inner subexpression are undone as
1533	well, like in ((a?)). /
1534	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1535	else
1536	/ We completed a subexpression, but it may be part of*
1537	an optional one, so do not update PREV_IDX_MATCH. /*
1538	pmatch[reg_num].rm_eo = cur_idx;
1539	}
1540	}
1541	}
1542	}
1543
1544	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1545	and sift the nodes in each states according to the following rules.
1546	Updated state_log will be wrote to STATE_LOG.
1547
1548	Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1549	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1550	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1551	the LAST_NODE, we throw away the node 'a'.
1552	2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1553	string 's' and transit to 'b':
1554	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1555	away the node 'a'.
1556	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1557	thrown away, we throw away the node 'a'.
1558	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1559	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1560	node 'a'.
1561	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1562	we throw away the node 'a'. /*
1563
1564	#define STATE_NODE_CONTAINS(state,node) \
1565	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1566
1567	static reg_errcode_t
1568	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1569	{
1570	reg_errcode_t err;
1571	int null_cnt = `0`;
1572	Idx str_idx = sctx->last_str_idx;
1573	re_node_set cur_dest;
1574
1575	#ifdef DEBUG
1576	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1577	#endif
1578
1579	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1580	transit to the last_node and the last_node itself. /*
1581	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1582	if (BE (err != REG_NOERROR, `0`))
1583	return err;
1584	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1585	if (BE (err != REG_NOERROR, `0`))
1586	goto free_return;
1587
1588	/ Then check each states in the state_log. /
1589	while (str_idx > `0`)
1590	{
1591	/ Update counters. /
1592	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1593	if (null_cnt > mctx->max_mb_elem_len)
1594	{
1595	memset (sctx->sifted_states, `'\0'`,
1596	sizeof (re_dfastate_t ) str_idx);
1597	re_node_set_free (&cur_dest);
1598	return REG_NOERROR;
1599	}
1600	re_node_set_empty (&cur_dest);
1601	--str_idx;
1602
1603	if (mctx->state_log[str_idx])
1604	{
1605	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1606	if (BE (err != REG_NOERROR, `0`))
1607	goto free_return;
1608	}
1609
1610	/ Add all the nodes which satisfy the following conditions:*
1611	- It can epsilon transit to a node in CUR_DEST.
1612	- It is in CUR_SRC.
1613	And update state_log. /*
1614	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1615	if (BE (err != REG_NOERROR, `0`))
1616	goto free_return;
1617	}
1618	err = REG_NOERROR;
1619	free_return:
1620	re_node_set_free (&cur_dest);
1621	return err;
1622	}
1623
1624	static reg_errcode_t
1625	__attribute_warn_unused_result__
1626	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1627	Idx str_idx, re_node_set *cur_dest)
1628	{
1629	const re_dfa_t *const dfa = mctx->dfa;
1630	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1631	Idx i;
1632
1633	/ Then build the next sifted state.*
1634	We build the next sifted state on 'cur_dest', and update
1635	'sifted_states[str_idx]' with 'cur_dest'.
1636	Note:
1637	'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1638	'cur_src' points the node_set of the old 'state_log[str_idx]'
1639	(with the epsilon nodes pre-filtered out). /*
1640	for (i = `0`; i < cur_src->nelem; i++)
1641	{
1642	Idx prev_node = cur_src->elems[i];
1643	int naccepted = `0`;
1644	bool ok;
1645
1646	#ifdef DEBUG
1647	re_token_type_t type = dfa->nodes[prev_node].type;
1648	assert (!IS_EPSILON_NODE (type));
1649	#endif
1650	#ifdef RE_ENABLE_I18N
1651	/ If the node may accept "multi byte". /
1652	if (dfa->nodes[prev_node].accept_mb)
1653	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1654	str_idx, sctx->last_str_idx);
1655	#endif /* RE_ENABLE_I18N */
1656
1657	/ We don't check backreferences here.*
1658	See update_cur_sifted_state(). /*
1659	if (!naccepted
1660	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1661	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1662	dfa->nexts[prev_node]))
1663	naccepted = `1`;
1664
1665	if (naccepted == `0`)
1666	continue;
1667
1668	if (sctx->limits.nelem)
1669	{
1670	Idx to_idx = str_idx + naccepted;
1671	if (check_dst_limits (mctx, &sctx->limits,
1672	dfa->nexts[prev_node], to_idx,
1673	prev_node, str_idx))
1674	continue;
1675	}
1676	ok = re_node_set_insert (cur_dest, prev_node);
1677	if (BE (! ok, `0`))
1678	return REG_ESPACE;
1679	}
1680
1681	return REG_NOERROR;
1682	}
1683
1684	/ Helper functions. /
1685
1686	static reg_errcode_t
1687	clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1688	{
1689	Idx top = mctx->state_log_top;
1690
1691	if ((next_state_log_idx >= mctx->input.bufs_len
1692	&& mctx->input.bufs_len < mctx->input.len)
1693	\|\| (next_state_log_idx >= mctx->input.valid_len
1694	&& mctx->input.valid_len < mctx->input.len))
1695	{
1696	reg_errcode_t err;
1697	err = extend_buffers (mctx, next_state_log_idx + `1`);
1698	if (BE (err != REG_NOERROR, `0`))
1699	return err;
1700	}
1701
1702	if (top < next_state_log_idx)
1703	{
1704	memset (mctx->state_log + top + `1`, `'\0'`,
1705	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1706	mctx->state_log_top = next_state_log_idx;
1707	}
1708	return REG_NOERROR;
1709	}
1710
1711	static reg_errcode_t
1712	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1713	re_dfastate_t **src, Idx num)
1714	{
1715	Idx st_idx;
1716	reg_errcode_t err;
1717	for (st_idx = `0`; st_idx < num; ++st_idx)
1718	{
1719	if (dst[st_idx] == NULL)
1720	dst[st_idx] = src[st_idx];
1721	else if (src[st_idx] != NULL)
1722	{
1723	re_node_set merged_set;
1724	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1725	&src[st_idx]->nodes);
1726	if (BE (err != REG_NOERROR, `0`))
1727	return err;
1728	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1729	re_node_set_free (&merged_set);
1730	if (BE (err != REG_NOERROR, `0`))
1731	return err;
1732	}
1733	}
1734	return REG_NOERROR;
1735	}
1736
1737	static reg_errcode_t
1738	update_cur_sifted_state (const re_match_context_t *mctx,
1739	re_sift_context_t *sctx, Idx str_idx,
1740	re_node_set *dest_nodes)
1741	{
1742	const re_dfa_t *const dfa = mctx->dfa;
1743	reg_errcode_t err = REG_NOERROR;
1744	const re_node_set *candidates;
1745	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1746	: &mctx->state_log[str_idx]->nodes);
1747
1748	if (dest_nodes->nelem == `0`)
1749	sctx->sifted_states[str_idx] = NULL;
1750	else
1751	{
1752	if (candidates)
1753	{
1754	/ At first, add the nodes which can epsilon transit to a node in*
1755	DEST_NODE. /*
1756	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1757	if (BE (err != REG_NOERROR, `0`))
1758	return err;
1759
1760	/ Then, check the limitations in the current sift_context. /
1761	if (sctx->limits.nelem)
1762	{
1763	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1764	mctx->bkref_ents, str_idx);
1765	if (BE (err != REG_NOERROR, `0`))
1766	return err;
1767	}
1768	}
1769
1770	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1771	if (BE (err != REG_NOERROR, `0`))
1772	return err;
1773	}
1774
1775	if (candidates && mctx->state_log[str_idx]->has_backref)
1776	{
1777	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1778	if (BE (err != REG_NOERROR, `0`))
1779	return err;
1780	}
1781	return REG_NOERROR;
1782	}
1783
1784	static reg_errcode_t
1785	__attribute_warn_unused_result__
1786	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1787	const re_node_set *candidates)
1788	{
1789	reg_errcode_t err = REG_NOERROR;
1790	Idx i;
1791
1792	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1793	if (BE (err != REG_NOERROR, `0`))
1794	return err;
1795
1796	if (!state->inveclosure.alloc)
1797	{
1798	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1799	if (BE (err != REG_NOERROR, `0`))
1800	return REG_ESPACE;
1801	for (i = `0`; i < dest_nodes->nelem; i++)
1802	{
1803	err = re_node_set_merge (&state->inveclosure,
1804	dfa->inveclosures + dest_nodes->elems[i]);
1805	if (BE (err != REG_NOERROR, `0`))
1806	return REG_ESPACE;
1807	}
1808	}
1809	return re_node_set_add_intersect (dest_nodes, candidates,
1810	&state->inveclosure);
1811	}
1812
1813	static reg_errcode_t
1814	sub_epsilon_src_nodes (const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1815	const re_node_set *candidates)
1816	{
1817	Idx ecl_idx;
1818	reg_errcode_t err;
1819	re_node_set *inv_eclosure = dfa->inveclosures + node;
1820	re_node_set except_nodes;
1821	re_node_set_init_empty (&except_nodes);
1822	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1823	{
1824	Idx cur_node = inv_eclosure->elems[ecl_idx];
1825	if (cur_node == node)
1826	continue;
1827	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1828	{
1829	Idx edst1 = dfa->edests[cur_node].elems[`0`];
1830	Idx edst2 = ((dfa->edests[cur_node].nelem > `1`)
1831	? dfa->edests[cur_node].elems[`1`] : -`1`);
1832	if ((!re_node_set_contains (inv_eclosure, edst1)
1833	&& re_node_set_contains (dest_nodes, edst1))
1834	\|\| (edst2 > `0`
1835	&& !re_node_set_contains (inv_eclosure, edst2)
1836	&& re_node_set_contains (dest_nodes, edst2)))
1837	{
1838	err = re_node_set_add_intersect (&except_nodes, candidates,
1839	dfa->inveclosures + cur_node);
1840	if (BE (err != REG_NOERROR, `0`))
1841	{
1842	re_node_set_free (&except_nodes);
1843	return err;
1844	}
1845	}
1846	}
1847	}
1848	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1849	{
1850	Idx cur_node = inv_eclosure->elems[ecl_idx];
1851	if (!re_node_set_contains (&except_nodes, cur_node))
1852	{
1853	Idx idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1854	re_node_set_remove_at (dest_nodes, idx);
1855	}
1856	}
1857	re_node_set_free (&except_nodes);
1858	return REG_NOERROR;
1859	}
1860
1861	static bool
1862	check_dst_limits (const re_match_context_t mctx, const* re_node_set *limits,
1863	Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1864	{
1865	const re_dfa_t *const dfa = mctx->dfa;
1866	Idx lim_idx, src_pos, dst_pos;
1867
1868	Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1869	Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1870	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1871	{
1872	Idx subexp_idx;
1873	struct re_backref_cache_entry *ent;
1874	ent = mctx->bkref_ents + limits->elems[lim_idx];
1875	subexp_idx = dfa->nodes[ent->node].opr.idx;
1876
1877	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1878	subexp_idx, dst_node, dst_idx,
1879	dst_bkref_idx);
1880	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1881	subexp_idx, src_node, src_idx,
1882	src_bkref_idx);
1883
1884	/ In case of:*
1885	<src> <dst> ( <subexp> )
1886	( <subexp> ) <src> <dst>
1887	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1888	if (src_pos == dst_pos)
1889	continue; / This is unrelated limitation. /
1890	else
1891	return true;
1892	}
1893	return false;
1894	}
1895
1896	static int
1897	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1898	Idx subexp_idx, Idx from_node, Idx bkref_idx)
1899	{
1900	const re_dfa_t *const dfa = mctx->dfa;
1901	const re_node_set *eclosures = dfa->eclosures + from_node;
1902	Idx node_idx;
1903
1904	/ Else, we are on the boundary: examine the nodes on the epsilon*
1905	closure. /*
1906	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1907	{
1908	Idx node = eclosures->elems[node_idx];
1909	switch (dfa->nodes[node].type)
1910	{
1911	case OP_BACK_REF:
1912	if (bkref_idx != -`1`)
1913	{
1914	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1915	do
1916	{
1917	Idx dst;
1918	int cpos;
1919
1920	if (ent->node != node)
1921	continue;
1922
1923	if (subexp_idx < BITSET_WORD_BITS
1924	&& !(ent->eps_reachable_subexps_map
1925	& ((bitset_word_t) `1` << subexp_idx)))
1926	continue;
1927
1928	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1929	OP_CLOSE_SUBEXP cases below. But, if the
1930	destination node is the same node as the source
1931	node, don't recurse because it would cause an
1932	infinite loop: a regex that exhibits this behavior
1933	is ()\1\1* /
1934	dst = dfa->edests[node].elems[`0`];
1935	if (dst == from_node)
1936	{
1937	if (boundaries & `1`)
1938	return -`1`;
1939	else / if (boundaries & 2) /
1940	return `0`;
1941	}
1942
1943	cpos =
1944	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1945	dst, bkref_idx);
1946	if (cpos == -`1` / && (boundaries & 1) /)
1947	return -`1`;
1948	if (cpos == `0` && (boundaries & `2`))
1949	return `0`;
1950
1951	if (subexp_idx < BITSET_WORD_BITS)
1952	ent->eps_reachable_subexps_map
1953	&= ~((bitset_word_t) `1` << subexp_idx);
1954	}
1955	while (ent++->more);
1956	}
1957	break;
1958
1959	case OP_OPEN_SUBEXP:
1960	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1961	return -`1`;
1962	break;
1963
1964	case OP_CLOSE_SUBEXP:
1965	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1966	return `0`;
1967	break;
1968
1969	default:
1970	break;
1971	}
1972	}
1973
1974	return (boundaries & `2`) ? `1` : `0`;
1975	}
1976
1977	static int
1978	check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1979	Idx subexp_idx, Idx from_node, Idx str_idx,
1980	Idx bkref_idx)
1981	{
1982	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1983	int boundaries;
1984
1985	/ If we are outside the range of the subexpression, return -1 or 1. /
1986	if (str_idx < lim->subexp_from)
1987	return -`1`;
1988
1989	if (lim->subexp_to < str_idx)
1990	return `1`;
1991
1992	/ If we are within the subexpression, return 0. /
1993	boundaries = (str_idx == lim->subexp_from);
1994	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1995	if (boundaries == `0`)
1996	return `0`;
1997
1998	/ Else, examine epsilon closure. /
1999	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2000	from_node, bkref_idx);
2001	}
2002
2003	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
2004	which are against limitations from DEST_NODES. /*
2005
2006	static reg_errcode_t
2007	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
2008	const re_node_set candidates, re_node_set limits,
2009	struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2010	{
2011	reg_errcode_t err;
2012	Idx node_idx, lim_idx;
2013
2014	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
2015	{
2016	Idx subexp_idx;
2017	struct re_backref_cache_entry *ent;
2018	ent = bkref_ents + limits->elems[lim_idx];
2019
2020	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2021	continue; / This is unrelated limitation. /
2022
2023	subexp_idx = dfa->nodes[ent->node].opr.idx;
2024	if (ent->subexp_to == str_idx)
2025	{
2026	Idx ops_node = -`1`;
2027	Idx cls_node = -`1`;
2028	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2029	{
2030	Idx node = dest_nodes->elems[node_idx];
2031	re_token_type_t type = dfa->nodes[node].type;
2032	if (type == OP_OPEN_SUBEXP
2033	&& subexp_idx == dfa->nodes[node].opr.idx)
2034	ops_node = node;
2035	else if (type == OP_CLOSE_SUBEXP
2036	&& subexp_idx == dfa->nodes[node].opr.idx)
2037	cls_node = node;
2038	}
2039
2040	/ Check the limitation of the open subexpression. /
2041	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2042	if (ops_node >= `0`)
2043	{
2044	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2045	candidates);
2046	if (BE (err != REG_NOERROR, `0`))
2047	return err;
2048	}
2049
2050	/ Check the limitation of the close subexpression. /
2051	if (cls_node >= `0`)
2052	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2053	{
2054	Idx node = dest_nodes->elems[node_idx];
2055	if (!re_node_set_contains (dfa->inveclosures + node,
2056	cls_node)
2057	&& !re_node_set_contains (dfa->eclosures + node,
2058	cls_node))
2059	{
2060	/ It is against this limitation.*
2061	Remove it form the current sifted state. /*
2062	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2063	candidates);
2064	if (BE (err != REG_NOERROR, `0`))
2065	return err;
2066	--node_idx;
2067	}
2068	}
2069	}
2070	else / (ent->subexp_to != str_idx) /
2071	{
2072	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2073	{
2074	Idx node = dest_nodes->elems[node_idx];
2075	re_token_type_t type = dfa->nodes[node].type;
2076	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2077	{
2078	if (subexp_idx != dfa->nodes[node].opr.idx)
2079	continue;
2080	/ It is against this limitation.*
2081	Remove it form the current sifted state. /*
2082	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2083	candidates);
2084	if (BE (err != REG_NOERROR, `0`))
2085	return err;
2086	}
2087	}
2088	}
2089	}
2090	return REG_NOERROR;
2091	}
2092
2093	static reg_errcode_t
2094	__attribute_warn_unused_result__
2095	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2096	Idx str_idx, const re_node_set *candidates)
2097	{
2098	const re_dfa_t *const dfa = mctx->dfa;
2099	reg_errcode_t err;
2100	Idx node_idx, node;
2101	re_sift_context_t local_sctx;
2102	Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2103
2104	if (first_idx == -`1`)
2105	return REG_NOERROR;
2106
2107	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2108
2109	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2110	{
2111	Idx enabled_idx;
2112	re_token_type_t type;
2113	struct re_backref_cache_entry *entry;
2114	node = candidates->elems[node_idx];
2115	type = dfa->nodes[node].type;
2116	/ Avoid infinite loop for the REs like "()\1+". /
2117	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2118	continue;
2119	if (type != OP_BACK_REF)
2120	continue;
2121
2122	entry = mctx->bkref_ents + first_idx;
2123	enabled_idx = first_idx;
2124	do
2125	{
2126	Idx subexp_len;
2127	Idx to_idx;
2128	Idx dst_node;
2129	bool ok;
2130	re_dfastate_t *cur_state;
2131
2132	if (entry->node != node)
2133	continue;
2134	subexp_len = entry->subexp_to - entry->subexp_from;
2135	to_idx = str_idx + subexp_len;
2136	dst_node = (subexp_len ? dfa->nexts[node]
2137	: dfa->edests[node].elems[`0`]);
2138
2139	if (to_idx > sctx->last_str_idx
2140	\|\| sctx->sifted_states[to_idx] == NULL
2141	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2142	\|\| check_dst_limits (mctx, &sctx->limits, node,
2143	str_idx, dst_node, to_idx))
2144	continue;
2145
2146	if (local_sctx.sifted_states == NULL)
2147	{
2148	local_sctx = *sctx;
2149	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2150	if (BE (err != REG_NOERROR, `0`))
2151	goto free_return;
2152	}
2153	local_sctx.last_node = node;
2154	local_sctx.last_str_idx = str_idx;
2155	ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2156	if (BE (! ok, `0`))
2157	{
2158	err = REG_ESPACE;
2159	goto free_return;
2160	}
2161	cur_state = local_sctx.sifted_states[str_idx];
2162	err = sift_states_backward (mctx, &local_sctx);
2163	if (BE (err != REG_NOERROR, `0`))
2164	goto free_return;
2165	if (sctx->limited_states != NULL)
2166	{
2167	err = merge_state_array (dfa, sctx->limited_states,
2168	local_sctx.sifted_states,
2169	str_idx + `1`);
2170	if (BE (err != REG_NOERROR, `0`))
2171	goto free_return;
2172	}
2173	local_sctx.sifted_states[str_idx] = cur_state;
2174	re_node_set_remove (&local_sctx.limits, enabled_idx);
2175
2176	/ mctx->bkref_ents may have changed, reload the pointer. /
2177	entry = mctx->bkref_ents + enabled_idx;
2178	}
2179	while (enabled_idx++, entry++->more);
2180	}
2181	err = REG_NOERROR;
2182	free_return:
2183	if (local_sctx.sifted_states != NULL)
2184	{
2185	re_node_set_free (&local_sctx.limits);
2186	}
2187
2188	return err;
2189	}
2190
2191
2192	#ifdef RE_ENABLE_I18N
2193	static int
2194	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2195	Idx node_idx, Idx str_idx, Idx max_str_idx)
2196	{
2197	const re_dfa_t *const dfa = mctx->dfa;
2198	int naccepted;
2199	/ Check the node can accept "multi byte". /
2200	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2201	if (naccepted > `0` && str_idx + naccepted <= max_str_idx &&
2202	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2203	dfa->nexts[node_idx]))
2204	/ The node can't accept the "multi byte", or the*
2205	destination was already thrown away, then the node
2206	could't accept the current input "multi byte". /*
2207	naccepted = `0`;
2208	/ Otherwise, it is sure that the node could accept*
2209	'naccepted' bytes input. /*
2210	return naccepted;
2211	}
2212	#endif /* RE_ENABLE_I18N */
2213
2214
2215	/ Functions for state transition. /
2216
2217	/ Return the next state to which the current state STATE will transit by*
2218	accepting the current input byte, and update STATE_LOG if necessary.
2219	If STATE can accept a multibyte char/collating element/back reference
2220	update the destination of STATE_LOG. /*
2221
2222	static re_dfastate_t *
2223	__attribute_warn_unused_result__
2224	transit_state (reg_errcode_t err, re_match_context_t mctx,
2225	re_dfastate_t *state)
2226	{
2227	re_dfastate_t **trtable;
2228	unsigned char ch;
2229
2230	#ifdef RE_ENABLE_I18N
2231	/ If the current state can accept multibyte. /
2232	if (BE (state->accept_mb, `0`))
2233	{
2234	*err = transit_state_mb (mctx, state);
2235	if (BE (*err != REG_NOERROR, `0`))
2236	return NULL;
2237	}
2238	#endif /* RE_ENABLE_I18N */
2239
2240	/ Then decide the next state with the single byte. /
2241	#if 0
2242	if (`0`)
2243	/ don't use transition table /
2244	return transit_state_sb (err, mctx, state);
2245	#endif
2246
2247	/ Use transition table /
2248	ch = re_string_fetch_byte (&mctx->input);
2249	for (;;)
2250	{
2251	trtable = state->trtable;
2252	if (BE (trtable != NULL, `1`))
2253	return trtable[ch];
2254
2255	trtable = state->word_trtable;
2256	if (BE (trtable != NULL, `1`))
2257	{
2258	unsigned int context;
2259	context
2260	= re_string_context_at (&mctx->input,
2261	re_string_cur_idx (&mctx->input) - `1`,
2262	mctx->eflags);
2263	if (IS_WORD_CONTEXT (context))
2264	return trtable[ch + SBC_MAX];
2265	else
2266	return trtable[ch];
2267	}
2268
2269	if (!build_trtable (mctx->dfa, state))
2270	{
2271	*err = REG_ESPACE;
2272	return NULL;
2273	}
2274
2275	/ Retry, we now have a transition table. /
2276	}
2277	}
2278
2279	/ Update the state_log if we need /
2280	static re_dfastate_t *
2281	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2282	re_dfastate_t *next_state)
2283	{
2284	const re_dfa_t *const dfa = mctx->dfa;
2285	Idx cur_idx = re_string_cur_idx (&mctx->input);
2286
2287	if (cur_idx > mctx->state_log_top)
2288	{
2289	mctx->state_log[cur_idx] = next_state;
2290	mctx->state_log_top = cur_idx;
2291	}
2292	else if (mctx->state_log[cur_idx] == `0`)
2293	{
2294	mctx->state_log[cur_idx] = next_state;
2295	}
2296	else
2297	{
2298	re_dfastate_t *pstate;
2299	unsigned int context;
2300	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2301	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2302	the destination of a multibyte char/collating element/
2303	back reference. Then the next state is the union set of
2304	these destinations and the results of the transition table. /*
2305	pstate = mctx->state_log[cur_idx];
2306	log_nodes = pstate->entrance_nodes;
2307	if (next_state != NULL)
2308	{
2309	table_nodes = next_state->entrance_nodes;
2310	*err = re_node_set_init_union (&next_nodes, table_nodes,
2311	log_nodes);
2312	if (BE (*err != REG_NOERROR, `0`))
2313	return NULL;
2314	}
2315	else
2316	next_nodes = *log_nodes;
2317	/ Note: We already add the nodes of the initial state,*
2318	then we don't need to add them here. /*
2319
2320	context = re_string_context_at (&mctx->input,
2321	re_string_cur_idx (&mctx->input) - `1`,
2322	mctx->eflags);
2323	next_state = mctx->state_log[cur_idx]
2324	= re_acquire_state_context (err, dfa, &next_nodes, context);
2325	/ We don't need to check errors here, since the return value of*
2326	this function is next_state and ERR is already set. /*
2327
2328	if (table_nodes != NULL)
2329	re_node_set_free (&next_nodes);
2330	}
2331
2332	if (BE (dfa->nbackref, `0`) && next_state != NULL)
2333	{
2334	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2335	later. We must check them here, since the back references in the
2336	next state might use them. /*
2337	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2338	cur_idx);
2339	if (BE (*err != REG_NOERROR, `0`))
2340	return NULL;
2341
2342	/ If the next state has back references. /
2343	if (next_state->has_backref)
2344	{
2345	*err = transit_state_bkref (mctx, &next_state->nodes);
2346	if (BE (*err != REG_NOERROR, `0`))
2347	return NULL;
2348	next_state = mctx->state_log[cur_idx];
2349	}
2350	}
2351
2352	return next_state;
2353	}
2354
2355	/ Skip bytes in the input that correspond to part of a*
2356	multi-byte match, then look in the log for a state
2357	from which to restart matching. /*
2358	static re_dfastate_t *
2359	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2360	{
2361	re_dfastate_t *cur_state;
2362	do
2363	{
2364	Idx max = mctx->state_log_top;
2365	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2366
2367	do
2368	{
2369	if (++cur_str_idx > max)
2370	return NULL;
2371	re_string_skip_bytes (&mctx->input, `1`);
2372	}
2373	while (mctx->state_log[cur_str_idx] == NULL);
2374
2375	cur_state = merge_state_with_log (err, mctx, NULL);
2376	}
2377	while (*err == REG_NOERROR && cur_state == NULL);
2378	return cur_state;
2379	}
2380
2381	/ Helper functions for transit_state. /
2382
2383	/ From the node set CUR_NODES, pick up the nodes whose types are*
2384	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2385	expression. And register them to use them later for evaluating the
2386	corresponding back references. /*
2387
2388	static reg_errcode_t
2389	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2390	Idx str_idx)
2391	{
2392	const re_dfa_t *const dfa = mctx->dfa;
2393	Idx node_idx;
2394	reg_errcode_t err;
2395
2396	/ TODO: This isn't efficient.*
2397	Because there might be more than one nodes whose types are
2398	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2399	nodes.
2400	E.g. RE: (a){2} /*
2401	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2402	{
2403	Idx node = cur_nodes->elems[node_idx];
2404	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2405	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2406	&& (dfa->used_bkref_map
2407	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2408	{
2409	err = match_ctx_add_subtop (mctx, node, str_idx);
2410	if (BE (err != REG_NOERROR, `0`))
2411	return err;
2412	}
2413	}
2414	return REG_NOERROR;
2415	}
2416
2417	#if 0
2418	/ Return the next state to which the current state STATE will transit by*
2419	accepting the current input byte. /*
2420
2421	static re_dfastate_t *
2422	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2423	re_dfastate_t *state)
2424	{
2425	const re_dfa_t *const dfa = mctx->dfa;
2426	re_node_set next_nodes;
2427	re_dfastate_t *next_state;
2428	Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2429	unsigned int context;
2430
2431	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2432	if (BE (*err != REG_NOERROR, `0`))
2433	return NULL;
2434	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2435	{
2436	Idx cur_node = state->nodes.elems[node_cnt];
2437	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2438	{
2439	*err = re_node_set_merge (&next_nodes,
2440	dfa->eclosures + dfa->nexts[cur_node]);
2441	if (BE (*err != REG_NOERROR, `0`))
2442	{
2443	re_node_set_free (&next_nodes);
2444	return NULL;
2445	}
2446	}
2447	}
2448	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2449	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2450	/ We don't need to check errors here, since the return value of*
2451	this function is next_state and ERR is already set. /*
2452
2453	re_node_set_free (&next_nodes);
2454	re_string_skip_bytes (&mctx->input, `1`);
2455	return next_state;
2456	}
2457	#endif
2458
2459	#ifdef RE_ENABLE_I18N
2460	static reg_errcode_t
2461	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2462	{
2463	const re_dfa_t *const dfa = mctx->dfa;
2464	reg_errcode_t err;
2465	Idx i;
2466
2467	for (i = `0`; i < pstate->nodes.nelem; ++i)
2468	{
2469	re_node_set dest_nodes, *new_nodes;
2470	Idx cur_node_idx = pstate->nodes.elems[i];
2471	int naccepted;
2472	Idx dest_idx;
2473	unsigned int context;
2474	re_dfastate_t *dest_state;
2475
2476	if (!dfa->nodes[cur_node_idx].accept_mb)
2477	continue;
2478
2479	if (dfa->nodes[cur_node_idx].constraint)
2480	{
2481	context = re_string_context_at (&mctx->input,
2482	re_string_cur_idx (&mctx->input),
2483	mctx->eflags);
2484	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2485	context))
2486	continue;
2487	}
2488
2489	/ How many bytes the node can accept? /
2490	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2491	re_string_cur_idx (&mctx->input));
2492	if (naccepted == `0`)
2493	continue;
2494
2495	/ The node can accepts 'naccepted' bytes. /
2496	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2497	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2498	: mctx->max_mb_elem_len);
2499	err = clean_state_log_if_needed (mctx, dest_idx);
2500	if (BE (err != REG_NOERROR, `0`))
2501	return err;
2502	#ifdef DEBUG
2503	assert (dfa->nexts[cur_node_idx] != -`1`);
2504	#endif
2505	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2506
2507	dest_state = mctx->state_log[dest_idx];
2508	if (dest_state == NULL)
2509	dest_nodes = *new_nodes;
2510	else
2511	{
2512	err = re_node_set_init_union (&dest_nodes,
2513	dest_state->entrance_nodes, new_nodes);
2514	if (BE (err != REG_NOERROR, `0`))
2515	return err;
2516	}
2517	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2518	mctx->eflags);
2519	mctx->state_log[dest_idx]
2520	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2521	if (dest_state != NULL)
2522	re_node_set_free (&dest_nodes);
2523	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, `0`))
2524	return err;
2525	}
2526	return REG_NOERROR;
2527	}
2528	#endif /* RE_ENABLE_I18N */
2529
2530	static reg_errcode_t
2531	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2532	{
2533	const re_dfa_t *const dfa = mctx->dfa;
2534	reg_errcode_t err;
2535	Idx i;
2536	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2537
2538	for (i = `0`; i < nodes->nelem; ++i)
2539	{
2540	Idx dest_str_idx, prev_nelem, bkc_idx;
2541	Idx node_idx = nodes->elems[i];
2542	unsigned int context;
2543	const re_token_t *node = dfa->nodes + node_idx;
2544	re_node_set *new_dest_nodes;
2545
2546	/ Check whether 'node' is a backreference or not. /
2547	if (node->type != OP_BACK_REF)
2548	continue;
2549
2550	if (node->constraint)
2551	{
2552	context = re_string_context_at (&mctx->input, cur_str_idx,
2553	mctx->eflags);
2554	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2555	continue;
2556	}
2557
2558	/ 'node' is a backreference.*
2559	Check the substring which the substring matched. /*
2560	bkc_idx = mctx->nbkref_ents;
2561	err = get_subexp (mctx, node_idx, cur_str_idx);
2562	if (BE (err != REG_NOERROR, `0`))
2563	goto free_return;
2564
2565	/ And add the epsilon closures (which is 'new_dest_nodes') of*
2566	the backreference to appropriate state_log. /*
2567	#ifdef DEBUG
2568	assert (dfa->nexts[node_idx] != -`1`);
2569	#endif
2570	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2571	{
2572	Idx subexp_len;
2573	re_dfastate_t *dest_state;
2574	struct re_backref_cache_entry *bkref_ent;
2575	bkref_ent = mctx->bkref_ents + bkc_idx;
2576	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2577	continue;
2578	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2579	new_dest_nodes = (subexp_len == `0`
2580	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2581	: dfa->eclosures + dfa->nexts[node_idx]);
2582	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2583	- bkref_ent->subexp_from);
2584	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2585	mctx->eflags);
2586	dest_state = mctx->state_log[dest_str_idx];
2587	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2588	: mctx->state_log[cur_str_idx]->nodes.nelem);
2589	/ Add 'new_dest_node' to state_log. /
2590	if (dest_state == NULL)
2591	{
2592	mctx->state_log[dest_str_idx]
2593	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2594	context);
2595	if (BE (mctx->state_log[dest_str_idx] == NULL
2596	&& err != REG_NOERROR, `0`))
2597	goto free_return;
2598	}
2599	else
2600	{
2601	re_node_set dest_nodes;
2602	err = re_node_set_init_union (&dest_nodes,
2603	dest_state->entrance_nodes,
2604	new_dest_nodes);
2605	if (BE (err != REG_NOERROR, `0`))
2606	{
2607	re_node_set_free (&dest_nodes);
2608	goto free_return;
2609	}
2610	mctx->state_log[dest_str_idx]
2611	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2612	re_node_set_free (&dest_nodes);
2613	if (BE (mctx->state_log[dest_str_idx] == NULL
2614	&& err != REG_NOERROR, `0`))
2615	goto free_return;
2616	}
2617	/ We need to check recursively if the backreference can epsilon*
2618	transit. /*
2619	if (subexp_len == `0`
2620	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2621	{
2622	err = check_subexp_matching_top (mctx, new_dest_nodes,
2623	cur_str_idx);
2624	if (BE (err != REG_NOERROR, `0`))
2625	goto free_return;
2626	err = transit_state_bkref (mctx, new_dest_nodes);
2627	if (BE (err != REG_NOERROR, `0`))
2628	goto free_return;
2629	}
2630	}
2631	}
2632	err = REG_NOERROR;
2633	free_return:
2634	return err;
2635	}
2636
2637	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2638	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2639	Note that we might collect inappropriate candidates here.
2640	However, the cost of checking them strictly here is too high, then we
2641	delay these checking for prune_impossible_nodes(). /*
2642
2643	static reg_errcode_t
2644	__attribute_warn_unused_result__
2645	get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2646	{
2647	const re_dfa_t *const dfa = mctx->dfa;
2648	Idx subexp_num, sub_top_idx;
2649	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2650	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2651	Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2652	if (cache_idx != -`1`)
2653	{
2654	const struct re_backref_cache_entry *entry
2655	= mctx->bkref_ents + cache_idx;
2656	do
2657	if (entry->node == bkref_node)
2658	return REG_NOERROR; / We already checked it. /
2659	while (entry++->more);
2660	}
2661
2662	subexp_num = dfa->nodes[bkref_node].opr.idx;
2663
2664	/ For each sub expression /
2665	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2666	{
2667	reg_errcode_t err;
2668	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2669	re_sub_match_last_t *sub_last;
2670	Idx sub_last_idx, sl_str, bkref_str_off;
2671
2672	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2673	continue; / It isn't related. /
2674
2675	sl_str = sub_top->str_idx;
2676	bkref_str_off = bkref_str_idx;
2677	/ At first, check the last node of sub expressions we already*
2678	evaluated. /*
2679	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2680	{
2681	regoff_t sl_str_diff;
2682	sub_last = sub_top->lasts[sub_last_idx];
2683	sl_str_diff = sub_last->str_idx - sl_str;
2684	/ The matched string by the sub expression match with the substring*
2685	at the back reference? /*
2686	if (sl_str_diff > `0`)
2687	{
2688	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, `0`))
2689	{
2690	/ Not enough chars for a successful match. /
2691	if (bkref_str_off + sl_str_diff > mctx->input.len)
2692	break;
2693
2694	err = clean_state_log_if_needed (mctx,
2695	bkref_str_off
2696	+ sl_str_diff);
2697	if (BE (err != REG_NOERROR, `0`))
2698	return err;
2699	buf = (const char *) re_string_get_buffer (&mctx->input);
2700	}
2701	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2702	/ We don't need to search this sub expression any more. /
2703	break;
2704	}
2705	bkref_str_off += sl_str_diff;
2706	sl_str += sl_str_diff;
2707	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2708	bkref_str_idx);
2709
2710	/ Reload buf, since the preceding call might have reallocated*
2711	the buffer. /*
2712	buf = (const char *) re_string_get_buffer (&mctx->input);
2713
2714	if (err == REG_NOMATCH)
2715	continue;
2716	if (BE (err != REG_NOERROR, `0`))
2717	return err;
2718	}
2719
2720	if (sub_last_idx < sub_top->nlasts)
2721	continue;
2722	if (sub_last_idx > `0`)
2723	++sl_str;
2724	/ Then, search for the other last nodes of the sub expression. /
2725	for (; sl_str <= bkref_str_idx; ++sl_str)
2726	{
2727	Idx cls_node;
2728	regoff_t sl_str_off;
2729	const re_node_set *nodes;
2730	sl_str_off = sl_str - sub_top->str_idx;
2731	/ The matched string by the sub expression match with the substring*
2732	at the back reference? /*
2733	if (sl_str_off > `0`)
2734	{
2735	if (BE (bkref_str_off >= mctx->input.valid_len, `0`))
2736	{
2737	/ If we are at the end of the input, we cannot match. /
2738	if (bkref_str_off >= mctx->input.len)
2739	break;
2740
2741	err = extend_buffers (mctx, bkref_str_off + `1`);
2742	if (BE (err != REG_NOERROR, `0`))
2743	return err;
2744
2745	buf = (const char *) re_string_get_buffer (&mctx->input);
2746	}
2747	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2748	break; / We don't need to search this sub expression*
2749	any more. /*
2750	}
2751	if (mctx->state_log[sl_str] == NULL)
2752	continue;
2753	/ Does this state have a ')' of the sub expression? /
2754	nodes = &mctx->state_log[sl_str]->nodes;
2755	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2756	OP_CLOSE_SUBEXP);
2757	if (cls_node == -`1`)
2758	continue; / No. /
2759	if (sub_top->path == NULL)
2760	{
2761	sub_top->path = calloc (sizeof (state_array_t),
2762	sl_str - sub_top->str_idx + `1`);
2763	if (sub_top->path == NULL)
2764	return REG_ESPACE;
2765	}
2766	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2767	in the current context? /*
2768	err = check_arrival (mctx, sub_top->path, sub_top->node,
2769	sub_top->str_idx, cls_node, sl_str,
2770	OP_CLOSE_SUBEXP);
2771	if (err == REG_NOMATCH)
2772	continue;
2773	if (BE (err != REG_NOERROR, `0`))
2774	return err;
2775	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2776	if (BE (sub_last == NULL, `0`))
2777	return REG_ESPACE;
2778	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2779	bkref_str_idx);
2780	if (err == REG_NOMATCH)
2781	continue;
2782	}
2783	}
2784	return REG_NOERROR;
2785	}
2786
2787	/ Helper functions for get_subexp(). /
2788
2789	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2790	If it can arrive, register the sub expression expressed with SUB_TOP
2791	and SUB_LAST. /*
2792
2793	static reg_errcode_t
2794	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2795	re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2796	{
2797	reg_errcode_t err;
2798	Idx to_idx;
2799	/ Can the subexpression arrive the back reference? /
2800	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2801	sub_last->str_idx, bkref_node, bkref_str,
2802	OP_OPEN_SUBEXP);
2803	if (err != REG_NOERROR)
2804	return err;
2805	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2806	sub_last->str_idx);
2807	if (BE (err != REG_NOERROR, `0`))
2808	return err;
2809	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2810	return clean_state_log_if_needed (mctx, to_idx);
2811	}
2812
2813	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2814	Search '(' if FL_OPEN, or search ')' otherwise.
2815	TODO: This function isn't efficient...
2816	Because there might be more than one nodes whose types are
2817	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2818	nodes.
2819	E.g. RE: (a){2} /*
2820
2821	static Idx
2822	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2823	Idx subexp_idx, int type)
2824	{
2825	Idx cls_idx;
2826	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2827	{
2828	Idx cls_node = nodes->elems[cls_idx];
2829	const re_token_t *node = dfa->nodes + cls_node;
2830	if (node->type == type
2831	&& node->opr.idx == subexp_idx)
2832	return cls_node;
2833	}
2834	return -`1`;
2835	}
2836
2837	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2838	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2839	heavily reused.
2840	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. /*
2841
2842	static reg_errcode_t
2843	__attribute_warn_unused_result__
2844	check_arrival (re_match_context_t mctx, state_array_t path, Idx top_node,
2845	Idx top_str, Idx last_node, Idx last_str, int type)
2846	{
2847	const re_dfa_t *const dfa = mctx->dfa;
2848	reg_errcode_t err = REG_NOERROR;
2849	Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2850	re_dfastate_t *cur_state = NULL;
2851	re_node_set *cur_nodes, next_nodes;
2852	re_dfastate_t **backup_state_log;
2853	unsigned int context;
2854
2855	subexp_num = dfa->nodes[top_node].opr.idx;
2856	/ Extend the buffer if we need. /
2857	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + `1`, `0`))
2858	{
2859	re_dfastate_t **new_array;
2860	Idx old_alloc = path->alloc;
2861	Idx incr_alloc = last_str + mctx->max_mb_elem_len + `1`;
2862	Idx new_alloc;
2863	if (BE (IDX_MAX - old_alloc < incr_alloc, `0`))
2864	return REG_ESPACE;
2865	new_alloc = old_alloc + incr_alloc;
2866	if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, `0`))
2867	return REG_ESPACE;
2868	new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2869	if (BE (new_array == NULL, `0`))
2870	return REG_ESPACE;
2871	path->array = new_array;
2872	path->alloc = new_alloc;
2873	memset (new_array + old_alloc, `'\0'`,
2874	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2875	}
2876
2877	str_idx = path->next_idx ? path->next_idx : top_str;
2878
2879	/ Temporary modify MCTX. /
2880	backup_state_log = mctx->state_log;
2881	backup_cur_idx = mctx->input.cur_idx;
2882	mctx->state_log = path->array;
2883	mctx->input.cur_idx = str_idx;
2884
2885	/ Setup initial node set. /
2886	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2887	if (str_idx == top_str)
2888	{
2889	err = re_node_set_init_1 (&next_nodes, top_node);
2890	if (BE (err != REG_NOERROR, `0`))
2891	return err;
2892	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2893	if (BE (err != REG_NOERROR, `0`))
2894	{
2895	re_node_set_free (&next_nodes);
2896	return err;
2897	}
2898	}
2899	else
2900	{
2901	cur_state = mctx->state_log[str_idx];
2902	if (cur_state && cur_state->has_backref)
2903	{
2904	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2905	if (BE (err != REG_NOERROR, `0`))
2906	return err;
2907	}
2908	else
2909	re_node_set_init_empty (&next_nodes);
2910	}
2911	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2912	{
2913	if (next_nodes.nelem)
2914	{
2915	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2916	subexp_num, type);
2917	if (BE (err != REG_NOERROR, `0`))
2918	{
2919	re_node_set_free (&next_nodes);
2920	return err;
2921	}
2922	}
2923	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2924	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2925	{
2926	re_node_set_free (&next_nodes);
2927	return err;
2928	}
2929	mctx->state_log[str_idx] = cur_state;
2930	}
2931
2932	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2933	{
2934	re_node_set_empty (&next_nodes);
2935	if (mctx->state_log[str_idx + `1`])
2936	{
2937	err = re_node_set_merge (&next_nodes,
2938	&mctx->state_log[str_idx + `1`]->nodes);
2939	if (BE (err != REG_NOERROR, `0`))
2940	{
2941	re_node_set_free (&next_nodes);
2942	return err;
2943	}
2944	}
2945	if (cur_state)
2946	{
2947	err = check_arrival_add_next_nodes (mctx, str_idx,
2948	&cur_state->non_eps_nodes,
2949	&next_nodes);
2950	if (BE (err != REG_NOERROR, `0`))
2951	{
2952	re_node_set_free (&next_nodes);
2953	return err;
2954	}
2955	}
2956	++str_idx;
2957	if (next_nodes.nelem)
2958	{
2959	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2960	if (BE (err != REG_NOERROR, `0`))
2961	{
2962	re_node_set_free (&next_nodes);
2963	return err;
2964	}
2965	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2966	subexp_num, type);
2967	if (BE (err != REG_NOERROR, `0`))
2968	{
2969	re_node_set_free (&next_nodes);
2970	return err;
2971	}
2972	}
2973	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2974	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2975	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2976	{
2977	re_node_set_free (&next_nodes);
2978	return err;
2979	}
2980	mctx->state_log[str_idx] = cur_state;
2981	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2982	}
2983	re_node_set_free (&next_nodes);
2984	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2985	: &mctx->state_log[last_str]->nodes);
2986	path->next_idx = str_idx;
2987
2988	/ Fix MCTX. /
2989	mctx->state_log = backup_state_log;
2990	mctx->input.cur_idx = backup_cur_idx;
2991
2992	/ Then check the current node set has the node LAST_NODE. /
2993	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2994	return REG_NOERROR;
2995
2996	return REG_NOMATCH;
2997	}
2998
2999	/ Helper functions for check_arrival. /
3000
3001	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
3002	to NEXT_NODES.
3003	TODO: This function is similar to the functions transit_state(),*
3004	however this function has many additional works.
3005	Can't we unify them? /*
3006
3007	static reg_errcode_t
3008	__attribute_warn_unused_result__
3009	check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3010	re_node_set cur_nodes, re_node_set next_nodes)
3011	{
3012	const re_dfa_t *const dfa = mctx->dfa;
3013	bool ok;
3014	Idx cur_idx;
3015	#ifdef RE_ENABLE_I18N
3016	reg_errcode_t err = REG_NOERROR;
3017	#endif
3018	re_node_set union_set;
3019	re_node_set_init_empty (&union_set);
3020	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
3021	{
3022	int naccepted = `0`;
3023	Idx cur_node = cur_nodes->elems[cur_idx];
3024	#ifdef DEBUG
3025	re_token_type_t type = dfa->nodes[cur_node].type;
3026	assert (!IS_EPSILON_NODE (type));
3027	#endif
3028	#ifdef RE_ENABLE_I18N
3029	/ If the node may accept "multi byte". /
3030	if (dfa->nodes[cur_node].accept_mb)
3031	{
3032	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3033	str_idx);
3034	if (naccepted > `1`)
3035	{
3036	re_dfastate_t *dest_state;
3037	Idx next_node = dfa->nexts[cur_node];
3038	Idx next_idx = str_idx + naccepted;
3039	dest_state = mctx->state_log[next_idx];
3040	re_node_set_empty (&union_set);
3041	if (dest_state)
3042	{
3043	err = re_node_set_merge (&union_set, &dest_state->nodes);
3044	if (BE (err != REG_NOERROR, `0`))
3045	{
3046	re_node_set_free (&union_set);
3047	return err;
3048	}
3049	}
3050	ok = re_node_set_insert (&union_set, next_node);
3051	if (BE (! ok, `0`))
3052	{
3053	re_node_set_free (&union_set);
3054	return REG_ESPACE;
3055	}
3056	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3057	&union_set);
3058	if (BE (mctx->state_log[next_idx] == NULL
3059	&& err != REG_NOERROR, `0`))
3060	{
3061	re_node_set_free (&union_set);
3062	return err;
3063	}
3064	}
3065	}
3066	#endif /* RE_ENABLE_I18N */
3067	if (naccepted
3068	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3069	{
3070	ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3071	if (BE (! ok, `0`))
3072	{
3073	re_node_set_free (&union_set);
3074	return REG_ESPACE;
3075	}
3076	}
3077	}
3078	re_node_set_free (&union_set);
3079	return REG_NOERROR;
3080	}
3081
3082	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3083	CUR_NODES, however exclude the nodes which are:
3084	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3085	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3086	*/
3087
3088	static reg_errcode_t
3089	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3090	Idx ex_subexp, int type)
3091	{
3092	reg_errcode_t err;
3093	Idx idx, outside_node;
3094	re_node_set new_nodes;
3095	#ifdef DEBUG
3096	assert (cur_nodes->nelem);
3097	#endif
3098	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3099	if (BE (err != REG_NOERROR, `0`))
3100	return err;
3101	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3102	closures of the node in CUR_NODES. /*
3103
3104	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3105	{
3106	Idx cur_node = cur_nodes->elems[idx];
3107	const re_node_set *eclosure = dfa->eclosures + cur_node;
3108	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3109	if (outside_node == -`1`)
3110	{
3111	/ There are no problematic nodes, just merge them. /
3112	err = re_node_set_merge (&new_nodes, eclosure);
3113	if (BE (err != REG_NOERROR, `0`))
3114	{
3115	re_node_set_free (&new_nodes);
3116	return err;
3117	}
3118	}
3119	else
3120	{
3121	/ There are problematic nodes, re-calculate incrementally. /
3122	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3123	ex_subexp, type);
3124	if (BE (err != REG_NOERROR, `0`))
3125	{
3126	re_node_set_free (&new_nodes);
3127	return err;
3128	}
3129	}
3130	}
3131	re_node_set_free (cur_nodes);
3132	*cur_nodes = new_nodes;
3133	return REG_NOERROR;
3134	}
3135
3136	/ Helper function for check_arrival_expand_ecl.*
3137	Check incrementally the epsilon closure of TARGET, and if it isn't
3138	problematic append it to DST_NODES. /*
3139
3140	static reg_errcode_t
3141	__attribute_warn_unused_result__
3142	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3143	Idx target, Idx ex_subexp, int type)
3144	{
3145	Idx cur_node;
3146	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3147	{
3148	bool ok;
3149
3150	if (dfa->nodes[cur_node].type == type
3151	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3152	{
3153	if (type == OP_CLOSE_SUBEXP)
3154	{
3155	ok = re_node_set_insert (dst_nodes, cur_node);
3156	if (BE (! ok, `0`))
3157	return REG_ESPACE;
3158	}
3159	break;
3160	}
3161	ok = re_node_set_insert (dst_nodes, cur_node);
3162	if (BE (! ok, `0`))
3163	return REG_ESPACE;
3164	if (dfa->edests[cur_node].nelem == `0`)
3165	break;
3166	if (dfa->edests[cur_node].nelem == `2`)
3167	{
3168	reg_errcode_t err;
3169	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3170	dfa->edests[cur_node].elems[`1`],
3171	ex_subexp, type);
3172	if (BE (err != REG_NOERROR, `0`))
3173	return err;
3174	}
3175	cur_node = dfa->edests[cur_node].elems[`0`];
3176	}
3177	return REG_NOERROR;
3178	}
3179
3180
3181	/ For all the back references in the current state, calculate the*
3182	destination of the back references by the appropriate entry
3183	in MCTX->BKREF_ENTS. /*
3184
3185	static reg_errcode_t
3186	__attribute_warn_unused_result__
3187	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3188	Idx cur_str, Idx subexp_num, int type)
3189	{
3190	const re_dfa_t *const dfa = mctx->dfa;
3191	reg_errcode_t err;
3192	Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3193	struct re_backref_cache_entry *ent;
3194
3195	if (cache_idx_start == -`1`)
3196	return REG_NOERROR;
3197
3198	restart:
3199	ent = mctx->bkref_ents + cache_idx_start;
3200	do
3201	{
3202	Idx to_idx, next_node;
3203
3204	/ Is this entry ENT is appropriate? /
3205	if (!re_node_set_contains (cur_nodes, ent->node))
3206	continue; / No. /
3207
3208	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3209	/ Calculate the destination of the back reference, and append it*
3210	to MCTX->STATE_LOG. /*
3211	if (to_idx == cur_str)
3212	{
3213	/ The backreference did epsilon transit, we must re-check all the*
3214	node in the current state. /*
3215	re_node_set new_dests;
3216	reg_errcode_t err2, err3;
3217	next_node = dfa->edests[ent->node].elems[`0`];
3218	if (re_node_set_contains (cur_nodes, next_node))
3219	continue;
3220	err = re_node_set_init_1 (&new_dests, next_node);
3221	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3222	err3 = re_node_set_merge (cur_nodes, &new_dests);
3223	re_node_set_free (&new_dests);
3224	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3225	\|\| err3 != REG_NOERROR, `0`))
3226	{
3227	err = (err != REG_NOERROR ? err
3228	: (err2 != REG_NOERROR ? err2 : err3));
3229	return err;
3230	}
3231	/ TODO: It is still inefficient... /
3232	goto restart;
3233	}
3234	else
3235	{
3236	re_node_set union_set;
3237	next_node = dfa->nexts[ent->node];
3238	if (mctx->state_log[to_idx])
3239	{
3240	bool ok;
3241	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3242	next_node))
3243	continue;
3244	err = re_node_set_init_copy (&union_set,
3245	&mctx->state_log[to_idx]->nodes);
3246	ok = re_node_set_insert (&union_set, next_node);
3247	if (BE (err != REG_NOERROR \|\| ! ok, `0`))
3248	{
3249	re_node_set_free (&union_set);
3250	err = err != REG_NOERROR ? err : REG_ESPACE;
3251	return err;
3252	}
3253	}
3254	else
3255	{
3256	err = re_node_set_init_1 (&union_set, next_node);
3257	if (BE (err != REG_NOERROR, `0`))
3258	return err;
3259	}
3260	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3261	re_node_set_free (&union_set);
3262	if (BE (mctx->state_log[to_idx] == NULL
3263	&& err != REG_NOERROR, `0`))
3264	return err;
3265	}
3266	}
3267	while (ent++->more);
3268	return REG_NOERROR;
3269	}
3270
3271	/ Build transition table for the state.*
3272	Return true if successful. /*
3273
3274	static bool
3275	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3276	{
3277	reg_errcode_t err;
3278	Idx i, j;
3279	int ch;
3280	bool need_word_trtable = false;
3281	bitset_word_t elem, mask;
3282	bool dests_node_malloced = false;
3283	bool dest_states_malloced = false;
3284	Idx ndests; / Number of the destination states from 'state'. /
3285	re_dfastate_t **trtable;
3286	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3287	re_node_set follows, *dests_node;
3288	bitset_t *dests_ch;
3289	bitset_t acceptable;
3290
3291	struct dests_alloc
3292	{
3293	re_node_set dests_node[SBC_MAX];
3294	bitset_t dests_ch[SBC_MAX];
3295	} *dests_alloc;
3296
3297	/ We build DFA states which corresponds to the destination nodes*
3298	from 'state'. 'dests_node[i]' represents the nodes which i-th
3299	destination state contains, and 'dests_ch[i]' represents the
3300	characters which i-th destination state accepts. /*
3301	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3302	dests_alloc = (struct dests_alloc ) alloca (sizeof* (struct dests_alloc));
3303	else
3304	{
3305	dests_alloc = re_malloc (struct dests_alloc, `1`);
3306	if (BE (dests_alloc == NULL, `0`))
3307	return false;
3308	dests_node_malloced = true;
3309	}
3310	dests_node = dests_alloc->dests_node;
3311	dests_ch = dests_alloc->dests_ch;
3312
3313	/ Initialize transition table. /
3314	state->word_trtable = state->trtable = NULL;
3315
3316	/ At first, group all nodes belonging to 'state' into several*
3317	destinations. /*
3318	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3319	if (BE (ndests <= `0`, `0`))
3320	{
3321	if (dests_node_malloced)
3322	re_free (dests_alloc);
3323	/ Return false in case of an error, true otherwise. /
3324	if (ndests == `0`)
3325	{
3326	state->trtable = (re_dfastate_t **)
3327	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3328	if (BE (state->trtable == NULL, `0`))
3329	return false;
3330	return true;
3331	}
3332	return false;
3333	}
3334
3335	err = re_node_set_alloc (&follows, ndests + `1`);
3336	if (BE (err != REG_NOERROR, `0`))
3337	goto out_free;
3338
3339	/ Avoid arithmetic overflow in size calculation. /
3340	if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3341	/ (`3` * sizeof (re_dfastate_t *)))
3342	< ndests),
3343	`0`))
3344	goto out_free;
3345
3346	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3347	+ ndests * `3` * sizeof (re_dfastate_t *)))
3348	dest_states = (re_dfastate_t **)
3349	alloca (ndests * `3` * sizeof (re_dfastate_t *));
3350	else
3351	{
3352	dest_states = re_malloc (re_dfastate_t , ndests `3`);
3353	if (BE (dest_states == NULL, `0`))
3354	{
3355	out_free:
3356	if (dest_states_malloced)
3357	re_free (dest_states);
3358	re_node_set_free (&follows);
3359	for (i = `0`; i < ndests; ++i)
3360	re_node_set_free (dests_node + i);
3361	if (dests_node_malloced)
3362	re_free (dests_alloc);
3363	return false;
3364	}
3365	dest_states_malloced = true;
3366	}
3367	dest_states_word = dest_states + ndests;
3368	dest_states_nl = dest_states_word + ndests;
3369	bitset_empty (acceptable);
3370
3371	/ Then build the states for all destinations. /
3372	for (i = `0`; i < ndests; ++i)
3373	{
3374	Idx next_node;
3375	re_node_set_empty (&follows);
3376	/ Merge the follows of this destination states. /
3377	for (j = `0`; j < dests_node[i].nelem; ++j)
3378	{
3379	next_node = dfa->nexts[dests_node[i].elems[j]];
3380	if (next_node != -`1`)
3381	{
3382	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3383	if (BE (err != REG_NOERROR, `0`))
3384	goto out_free;
3385	}
3386	}
3387	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3388	if (BE (dest_states[i] == NULL && err != REG_NOERROR, `0`))
3389	goto out_free;
3390	/ If the new state has context constraint,*
3391	build appropriate states for these contexts. /*
3392	if (dest_states[i]->has_constraint)
3393	{
3394	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3395	CONTEXT_WORD);
3396	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, `0`))
3397	goto out_free;
3398
3399	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3400	need_word_trtable = true;
3401
3402	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3403	CONTEXT_NEWLINE);
3404	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, `0`))
3405	goto out_free;
3406	}
3407	else
3408	{
3409	dest_states_word[i] = dest_states[i];
3410	dest_states_nl[i] = dest_states[i];
3411	}
3412	bitset_merge (acceptable, dests_ch[i]);
3413	}
3414
3415	if (!BE (need_word_trtable, `0`))
3416	{
3417	/ We don't care about whether the following character is a word*
3418	character, or we are in a single-byte character set so we can
3419	discern by looking at the character code: allocate a
3420	256-entry transition table. /*
3421	trtable = state->trtable =
3422	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3423	if (BE (trtable == NULL, `0`))
3424	goto out_free;
3425
3426	/ For all characters ch...: /
3427	for (i = `0`; i < BITSET_WORDS; ++i)
3428	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3429	elem;
3430	mask <<= `1`, elem >>= `1`, ++ch)
3431	if (BE (elem & `1`, `0`))
3432	{
3433	/ There must be exactly one destination which accepts*
3434	character ch. See group_nodes_into_DFAstates. /*
3435	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3436	;
3437
3438	/ j-th destination accepts the word character ch. /
3439	if (dfa->word_char[i] & mask)
3440	trtable[ch] = dest_states_word[j];
3441	else
3442	trtable[ch] = dest_states[j];
3443	}
3444	}
3445	else
3446	{
3447	/ We care about whether the following character is a word*
3448	character, and we are in a multi-byte character set: discern
3449	by looking at the character code: build two 256-entry
3450	transition tables, one starting at trtable[0] and one
3451	starting at trtable[SBC_MAX]. /*
3452	trtable = state->word_trtable =
3453	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3454	if (BE (trtable == NULL, `0`))
3455	goto out_free;
3456
3457	/ For all characters ch...: /
3458	for (i = `0`; i < BITSET_WORDS; ++i)
3459	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3460	elem;
3461	mask <<= `1`, elem >>= `1`, ++ch)
3462	if (BE (elem & `1`, `0`))
3463	{
3464	/ There must be exactly one destination which accepts*
3465	character ch. See group_nodes_into_DFAstates. /*
3466	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3467	;
3468
3469	/ j-th destination accepts the word character ch. /
3470	trtable[ch] = dest_states[j];
3471	trtable[ch + SBC_MAX] = dest_states_word[j];
3472	}
3473	}
3474
3475	/ new line /
3476	if (bitset_contain (acceptable, NEWLINE_CHAR))
3477	{
3478	/ The current state accepts newline character. /
3479	for (j = `0`; j < ndests; ++j)
3480	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3481	{
3482	/ k-th destination accepts newline character. /
3483	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3484	if (need_word_trtable)
3485	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3486	/ There must be only one destination which accepts*
3487	newline. See group_nodes_into_DFAstates. /*
3488	break;
3489	}
3490	}
3491
3492	if (dest_states_malloced)
3493	re_free (dest_states);
3494
3495	re_node_set_free (&follows);
3496	for (i = `0`; i < ndests; ++i)
3497	re_node_set_free (dests_node + i);
3498
3499	if (dests_node_malloced)
3500	re_free (dests_alloc);
3501
3502	return true;
3503	}
3504
3505	/ Group all nodes belonging to STATE into several destinations.*
3506	Then for all destinations, set the nodes belonging to the destination
3507	to DESTS_NODE[i] and set the characters accepted by the destination
3508	to DEST_CH[i]. This function return the number of destinations. /*
3509
3510	static Idx
3511	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3512	re_node_set dests_node, bitset_t dests_ch)
3513	{
3514	reg_errcode_t err;
3515	bool ok;
3516	Idx i, j, k;
3517	Idx ndests; / Number of the destinations from 'state'. /
3518	bitset_t accepts; / Characters a node can accept. /
3519	const re_node_set *cur_nodes = &state->nodes;
3520	bitset_empty (accepts);
3521	ndests = `0`;
3522
3523	/ For all the nodes belonging to 'state', /
3524	for (i = `0`; i < cur_nodes->nelem; ++i)
3525	{
3526	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3527	re_token_type_t type = node->type;
3528	unsigned int constraint = node->constraint;
3529
3530	/ Enumerate all single byte character this node can accept. /
3531	if (type == CHARACTER)
3532	bitset_set (accepts, node->opr.c);
3533	else if (type == SIMPLE_BRACKET)
3534	{
3535	bitset_merge (accepts, node->opr.sbcset);
3536	}
3537	else if (type == OP_PERIOD)
3538	{
3539	#ifdef RE_ENABLE_I18N
3540	if (dfa->mb_cur_max > `1`)
3541	bitset_merge (accepts, dfa->sb_char);
3542	else
3543	#endif
3544	bitset_set_all (accepts);
3545	if (!(dfa->syntax & RE_DOT_NEWLINE))
3546	bitset_clear (accepts, `'\n'`);
3547	if (dfa->syntax & RE_DOT_NOT_NULL)
3548	bitset_clear (accepts, `'\0'`);
3549	}
3550	#ifdef RE_ENABLE_I18N
3551	else if (type == OP_UTF8_PERIOD)
3552	{
3553	if (ASCII_CHARS % BITSET_WORD_BITS == `0`)
3554	memset (accepts, -`1`, ASCII_CHARS / CHAR_BIT);
3555	else
3556	bitset_merge (accepts, utf8_sb_map);
3557	if (!(dfa->syntax & RE_DOT_NEWLINE))
3558	bitset_clear (accepts, `'\n'`);
3559	if (dfa->syntax & RE_DOT_NOT_NULL)
3560	bitset_clear (accepts, `'\0'`);
3561	}
3562	#endif
3563	else
3564	continue;
3565
3566	/ Check the 'accepts' and sift the characters which are not*
3567	match it the context. /*
3568	if (constraint)
3569	{
3570	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3571	{
3572	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3573	bitset_empty (accepts);
3574	if (accepts_newline)
3575	bitset_set (accepts, NEWLINE_CHAR);
3576	else
3577	continue;
3578	}
3579	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3580	{
3581	bitset_empty (accepts);
3582	continue;
3583	}
3584
3585	if (constraint & NEXT_WORD_CONSTRAINT)
3586	{
3587	bitset_word_t any_set = `0`;
3588	if (type == CHARACTER && !node->word_char)
3589	{
3590	bitset_empty (accepts);
3591	continue;
3592	}
3593	#ifdef RE_ENABLE_I18N
3594	if (dfa->mb_cur_max > `1`)
3595	for (j = `0`; j < BITSET_WORDS; ++j)
3596	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3597	else
3598	#endif
3599	for (j = `0`; j < BITSET_WORDS; ++j)
3600	any_set \|= (accepts[j] &= dfa->word_char[j]);
3601	if (!any_set)
3602	continue;
3603	}
3604	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3605	{
3606	bitset_word_t any_set = `0`;
3607	if (type == CHARACTER && node->word_char)
3608	{
3609	bitset_empty (accepts);
3610	continue;
3611	}
3612	#ifdef RE_ENABLE_I18N
3613	if (dfa->mb_cur_max > `1`)
3614	for (j = `0`; j < BITSET_WORDS; ++j)
3615	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3616	else
3617	#endif
3618	for (j = `0`; j < BITSET_WORDS; ++j)
3619	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3620	if (!any_set)
3621	continue;
3622	}
3623	}
3624
3625	/ Then divide 'accepts' into DFA states, or create a new*
3626	state. Above, we make sure that accepts is not empty. /*
3627	for (j = `0`; j < ndests; ++j)
3628	{
3629	bitset_t intersec; / Intersection sets, see below. /
3630	bitset_t remains;
3631	/ Flags, see below. /
3632	bitset_word_t has_intersec, not_subset, not_consumed;
3633
3634	/ Optimization, skip if this state doesn't accept the character. /
3635	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3636	continue;
3637
3638	/ Enumerate the intersection set of this state and 'accepts'. /
3639	has_intersec = `0`;
3640	for (k = `0`; k < BITSET_WORDS; ++k)
3641	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3642	/ And skip if the intersection set is empty. /
3643	if (!has_intersec)
3644	continue;
3645
3646	/ Then check if this state is a subset of 'accepts'. /
3647	not_subset = not_consumed = `0`;
3648	for (k = `0`; k < BITSET_WORDS; ++k)
3649	{
3650	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3651	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3652	}
3653
3654	/ If this state isn't a subset of 'accepts', create a*
3655	new group state, which has the 'remains'. /*
3656	if (not_subset)
3657	{
3658	bitset_copy (dests_ch[ndests], remains);
3659	bitset_copy (dests_ch[j], intersec);
3660	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3661	if (BE (err != REG_NOERROR, `0`))
3662	goto error_return;
3663	++ndests;
3664	}
3665
3666	/ Put the position in the current group. /
3667	ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3668	if (BE (! ok, `0`))
3669	goto error_return;
3670
3671	/ If all characters are consumed, go to next node. /
3672	if (!not_consumed)
3673	break;
3674	}
3675	/ Some characters remain, create a new group. /
3676	if (j == ndests)
3677	{
3678	bitset_copy (dests_ch[ndests], accepts);
3679	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3680	if (BE (err != REG_NOERROR, `0`))
3681	goto error_return;
3682	++ndests;
3683	bitset_empty (accepts);
3684	}
3685	}
3686	return ndests;
3687	error_return:
3688	for (j = `0`; j < ndests; ++j)
3689	re_node_set_free (dests_node + j);
3690	return -`1`;
3691	}
3692
3693	#ifdef RE_ENABLE_I18N
3694	/ Check how many bytes the node 'dfa->nodes[node_idx]' accepts.*
3695	Return the number of the bytes the node accepts.
3696	STR_IDX is the current index of the input string.
3697
3698	This function handles the nodes which can accept one character, or
3699	one collating element like '.', '[a-z]', opposite to the other nodes
3700	can only accept one byte. /*
3701
3702	# ifdef _LIBC
3703	# include <locale/weight.h>
3704	# endif
3705
3706	static int
3707	check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3708	const re_string_t *input, Idx str_idx)
3709	{
3710	const re_token_t *node = dfa->nodes + node_idx;
3711	int char_len, elem_len;
3712	Idx i;
3713
3714	if (BE (node->type == OP_UTF8_PERIOD, `0`))
3715	{
3716	unsigned char c = re_string_byte_at (input, str_idx), d;
3717	if (BE (c < `0xc2`, `1`))
3718	return `0`;
3719
3720	if (str_idx + `2` > input->len)
3721	return `0`;
3722
3723	d = re_string_byte_at (input, str_idx + `1`);
3724	if (c < `0xe0`)
3725	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3726	else if (c < `0xf0`)
3727	{
3728	char_len = `3`;
3729	if (c == `0xe0` && d < `0xa0`)
3730	return `0`;
3731	}
3732	else if (c < `0xf8`)
3733	{
3734	char_len = `4`;
3735	if (c == `0xf0` && d < `0x90`)
3736	return `0`;
3737	}
3738	else if (c < `0xfc`)
3739	{
3740	char_len = `5`;
3741	if (c == `0xf8` && d < `0x88`)
3742	return `0`;
3743	}
3744	else if (c < `0xfe`)
3745	{
3746	char_len = `6`;
3747	if (c == `0xfc` && d < `0x84`)
3748	return `0`;
3749	}
3750	else
3751	return `0`;
3752
3753	if (str_idx + char_len > input->len)
3754	return `0`;
3755
3756	for (i = `1`; i < char_len; ++i)
3757	{
3758	d = re_string_byte_at (input, str_idx + i);
3759	if (d < `0x80` \|\| d > `0xbf`)
3760	return `0`;
3761	}
3762	return char_len;
3763	}
3764
3765	char_len = re_string_char_size_at (input, str_idx);
3766	if (node->type == OP_PERIOD)
3767	{
3768	if (char_len <= `1`)
3769	return `0`;
3770	/ FIXME: I don't think this if is needed, as both '\n'*
3771	and '\0' are char_len == 1. /*
3772	/ '.' accepts any one character except the following two cases. /
3773	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3774	re_string_byte_at (input, str_idx) == `'\n'`) \|\|
3775	((dfa->syntax & RE_DOT_NOT_NULL) &&
3776	re_string_byte_at (input, str_idx) == `'\0'`))
3777	return `0`;
3778	return char_len;
3779	}
3780
3781	elem_len = re_string_elem_size_at (input, str_idx);
3782	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3783	return `0`;
3784
3785	if (node->type == COMPLEX_BRACKET)
3786	{
3787	const re_charset_t *cset = node->opr.mbcset;
3788	# ifdef _LIBC
3789	const unsigned char *pin
3790	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3791	Idx j;
3792	uint32_t nrules;
3793	# endif /* _LIBC */
3794	int match_len = `0`;
3795	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3796	? re_string_wchar_at (input, str_idx) : `0`);
3797
3798	/ match with multibyte character? /
3799	for (i = `0`; i < cset->nmbchars; ++i)
3800	if (wc == cset->mbchars[i])
3801	{
3802	match_len = char_len;
3803	goto check_node_accept_bytes_match;
3804	}
3805	/ match with character_class? /
3806	for (i = `0`; i < cset->nchar_classes; ++i)
3807	{
3808	wctype_t wt = cset->char_classes[i];
3809	if (__iswctype (wc, wt))
3810	{
3811	match_len = char_len;
3812	goto check_node_accept_bytes_match;
3813	}
3814	}
3815
3816	# ifdef _LIBC
3817	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3818	if (nrules != `0`)
3819	{
3820	unsigned int in_collseq = `0`;
3821	const int32_t table, indirect;
3822	const unsigned char weights, extra;
3823	const char *collseqwc;
3824
3825	/ match with collating_symbol? /
3826	if (cset->ncoll_syms)
3827	extra = (const unsigned char *)
3828	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3829	for (i = `0`; i < cset->ncoll_syms; ++i)
3830	{
3831	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3832	/ Compare the length of input collating element and*
3833	the length of current collating element. /*
3834	if (*coll_sym != elem_len)
3835	continue;
3836	/ Compare each bytes. /
3837	for (j = `0`; j < *coll_sym; j++)
3838	if (pin[j] != coll_sym[`1` + j])
3839	break;
3840	if (j == *coll_sym)
3841	{
3842	/ Match if every bytes is equal. /
3843	match_len = j;
3844	goto check_node_accept_bytes_match;
3845	}
3846	}
3847
3848	if (cset->nranges)
3849	{
3850	if (elem_len <= char_len)
3851	{
3852	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3853	in_collseq = __collseq_table_lookup (collseqwc, wc);
3854	}
3855	else
3856	in_collseq = find_collation_sequence_value (pin, elem_len);
3857	}
3858	/ match with range expression? /
3859	/ FIXME: Implement rational ranges here, too. /
3860	for (i = `0`; i < cset->nranges; ++i)
3861	if (cset->range_starts[i] <= in_collseq
3862	&& in_collseq <= cset->range_ends[i])
3863	{
3864	match_len = elem_len;
3865	goto check_node_accept_bytes_match;
3866	}
3867
3868	/ match with equivalence_class? /
3869	if (cset->nequiv_classes)
3870	{
3871	const unsigned char *cp = pin;
3872	table = (const int32_t *)
3873	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3874	weights = (const unsigned char *)
3875	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3876	extra = (const unsigned char *)
3877	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3878	indirect = (const int32_t *)
3879	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3880	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3881	int32_t rule = idx >> `24`;
3882	idx &= `0xffffff`;
3883	if (idx > `0`)
3884	{
3885	size_t weight_len = weights[idx];
3886	for (i = `0`; i < cset->nequiv_classes; ++i)
3887	{
3888	int32_t equiv_class_idx = cset->equiv_classes[i];
3889	int32_t equiv_class_rule = equiv_class_idx >> `24`;
3890	equiv_class_idx &= `0xffffff`;
3891	if (weights[equiv_class_idx] == weight_len
3892	&& equiv_class_rule == rule
3893	&& memcmp (weights + idx + `1`,
3894	weights + equiv_class_idx + `1`,
3895	weight_len) == `0`)
3896	{
3897	match_len = elem_len;
3898	goto check_node_accept_bytes_match;
3899	}
3900	}
3901	}
3902	}
3903	}
3904	else
3905	# endif /* _LIBC */
3906	{
3907	/ match with range expression? /
3908	for (i = `0`; i < cset->nranges; ++i)
3909	{
3910	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3911	{
3912	match_len = char_len;
3913	goto check_node_accept_bytes_match;
3914	}
3915	}
3916	}
3917	check_node_accept_bytes_match:
3918	if (!cset->non_match)
3919	return match_len;
3920	else
3921	{
3922	if (match_len > `0`)
3923	return `0`;
3924	else
3925	return (elem_len > char_len) ? elem_len : char_len;
3926	}
3927	}
3928	return `0`;
3929	}
3930
3931	# ifdef _LIBC
3932	static unsigned int
3933	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3934	{
3935	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3936	if (nrules == `0`)
3937	{
3938	if (mbs_len == `1`)
3939	{
3940	/ No valid character. Match it as a single byte character. /
3941	const unsigned char collseq = (const* unsigned char *)
3942	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3943	return collseq[mbs[`0`]];
3944	}
3945	return UINT_MAX;
3946	}
3947	else
3948	{
3949	int32_t idx;
3950	const unsigned char extra = (const* unsigned char *)
3951	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3952	int32_t extrasize = (const unsigned char *)
3953	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3954
3955	for (idx = `0`; idx < extrasize;)
3956	{
3957	int mbs_cnt;
3958	bool found = false;
3959	int32_t elem_mbs_len;
3960	/ Skip the name of collating element name. /
3961	idx = idx + extra[idx] + `1`;
3962	elem_mbs_len = extra[idx++];
3963	if (mbs_len == elem_mbs_len)
3964	{
3965	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3966	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3967	break;
3968	if (mbs_cnt == elem_mbs_len)
3969	/ Found the entry. /
3970	found = true;
3971	}
3972	/ Skip the byte sequence of the collating element. /
3973	idx += elem_mbs_len;
3974	/ Adjust for the alignment. /
3975	idx = (idx + `3`) & ~`3`;
3976	/ Skip the collation sequence value. /
3977	idx += sizeof (uint32_t);
3978	/ Skip the wide char sequence of the collating element. /
3979	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3980	/ If we found the entry, return the sequence value. /
3981	if (found)
3982	return (uint32_t ) (extra + idx);
3983	/ Skip the collation sequence value. /
3984	idx += sizeof (uint32_t);
3985	}
3986	return UINT_MAX;
3987	}
3988	}
3989	# endif /* _LIBC */
3990	#endif /* RE_ENABLE_I18N */
3991
3992	/ Check whether the node accepts the byte which is IDX-th*
3993	byte of the INPUT. /*
3994
3995	static bool
3996	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
3997	Idx idx)
3998	{
3999	unsigned char ch;
4000	ch = re_string_byte_at (&mctx->input, idx);
4001	switch (node->type)
4002	{
4003	case CHARACTER:
4004	if (node->opr.c != ch)
4005	return false;
4006	break;
4007
4008	case SIMPLE_BRACKET:
4009	if (!bitset_contain (node->opr.sbcset, ch))
4010	return false;
4011	break;
4012
4013	#ifdef RE_ENABLE_I18N
4014	case OP_UTF8_PERIOD:
4015	if (ch >= ASCII_CHARS)
4016	return false;
4017	FALLTHROUGH;
4018	#endif
4019	case OP_PERIOD:
4020	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4021	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4022	return false;
4023	break;
4024
4025	default:
4026	return false;
4027	}
4028
4029	if (node->constraint)
4030	{
4031	/ The node has constraints. Check whether the current context*
4032	satisfies the constraints. /*
4033	unsigned int context = re_string_context_at (&mctx->input, idx,
4034	mctx->eflags);
4035	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4036	return false;
4037	}
4038
4039	return true;
4040	}
4041
4042	/ Extend the buffers, if the buffers have run out. /
4043
4044	static reg_errcode_t
4045	__attribute_warn_unused_result__
4046	extend_buffers (re_match_context_t mctx, int* min_len)
4047	{
4048	reg_errcode_t ret;
4049	re_string_t *pstr = &mctx->input;
4050
4051	/ Avoid overflow. /
4052	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / `2`
4053	<= pstr->bufs_len, `0`))
4054	return REG_ESPACE;
4055
4056	/ Double the lengths of the buffers, but allocate at least MIN_LEN. /
4057	ret = re_string_realloc_buffers (pstr,
4058	MAX (min_len,
4059	MIN (pstr->len, pstr->bufs_len * `2`)));
4060	if (BE (ret != REG_NOERROR, `0`))
4061	return ret;
4062
4063	if (mctx->state_log != NULL)
4064	{
4065	/ And double the length of state_log. /
4066	/ XXX We have no indication of the size of this buffer. If this*
4067	allocation fail we have no indication that the state_log array
4068	does not have the right size. /*
4069	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4070	pstr->bufs_len + `1`);
4071	if (BE (new_array == NULL, `0`))
4072	return REG_ESPACE;
4073	mctx->state_log = new_array;
4074	}
4075
4076	/ Then reconstruct the buffers. /
4077	if (pstr->icase)
4078	{
4079	#ifdef RE_ENABLE_I18N
4080	if (pstr->mb_cur_max > `1`)
4081	{
4082	ret = build_wcs_upper_buffer (pstr);
4083	if (BE (ret != REG_NOERROR, `0`))
4084	return ret;
4085	}
4086	else
4087	#endif /* RE_ENABLE_I18N */
4088	build_upper_buffer (pstr);
4089	}
4090	else
4091	{
4092	#ifdef RE_ENABLE_I18N
4093	if (pstr->mb_cur_max > `1`)
4094	build_wcs_buffer (pstr);
4095	else
4096	#endif /* RE_ENABLE_I18N */
4097	{
4098	if (pstr->trans != NULL)
4099	re_string_translate_buffer (pstr);
4100	}
4101	}
4102	return REG_NOERROR;
4103	}
4104
4105
4106	/ Functions for matching context. /
4107
4108	/ Initialize MCTX. /
4109
4110	static reg_errcode_t
4111	__attribute_warn_unused_result__
4112	match_ctx_init (re_match_context_t mctx, int* eflags, Idx n)
4113	{
4114	mctx->eflags = eflags;
4115	mctx->match_last = -`1`;
4116	if (n > `0`)
4117	{
4118	/ Avoid overflow. /
4119	size_t max_object_size =
4120	MAX (sizeof (struct re_backref_cache_entry),
4121	sizeof (re_sub_match_top_t *));
4122	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, `0`))
4123	return REG_ESPACE;
4124
4125	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4126	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4127	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, `0`))
4128	return REG_ESPACE;
4129	}
4130	/ Already zero-ed by the caller.*
4131	else
4132	mctx->bkref_ents = NULL;
4133	mctx->nbkref_ents = 0;
4134	mctx->nsub_tops = 0; /*
4135	mctx->abkref_ents = n;
4136	mctx->max_mb_elem_len = `1`;
4137	mctx->asub_tops = n;
4138	return REG_NOERROR;
4139	}
4140
4141	/ Clean the entries which depend on the current input in MCTX.*
4142	This function must be invoked when the matcher changes the start index
4143	of the input, or changes the input string. /*
4144
4145	static void
4146	match_ctx_clean (re_match_context_t *mctx)
4147	{
4148	Idx st_idx;
4149	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4150	{
4151	Idx sl_idx;
4152	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4153	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4154	{
4155	re_sub_match_last_t *last = top->lasts[sl_idx];
4156	re_free (last->path.array);
4157	re_free (last);
4158	}
4159	re_free (top->lasts);
4160	if (top->path)
4161	{
4162	re_free (top->path->array);
4163	re_free (top->path);
4164	}
4165	re_free (top);
4166	}
4167
4168	mctx->nsub_tops = `0`;
4169	mctx->nbkref_ents = `0`;
4170	}
4171
4172	/ Free all the memory associated with MCTX. /
4173
4174	static void
4175	match_ctx_free (re_match_context_t *mctx)
4176	{
4177	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4178	match_ctx_clean (mctx);
4179	re_free (mctx->sub_tops);
4180	re_free (mctx->bkref_ents);
4181	}
4182
4183	/ Add a new backreference entry to MCTX.*
4184	Note that we assume that caller never call this function with duplicate
4185	entry, and call with STR_IDX which isn't smaller than any existing entry.
4186	*/
4187
4188	static reg_errcode_t
4189	__attribute_warn_unused_result__
4190	match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4191	Idx to)
4192	{
4193	if (mctx->nbkref_ents >= mctx->abkref_ents)
4194	{
4195	struct re_backref_cache_entry* new_entry;
4196	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4197	mctx->abkref_ents * `2`);
4198	if (BE (new_entry == NULL, `0`))
4199	{
4200	re_free (mctx->bkref_ents);
4201	return REG_ESPACE;
4202	}
4203	mctx->bkref_ents = new_entry;
4204	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4205	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4206	mctx->abkref_ents *= `2`;
4207	}
4208	if (mctx->nbkref_ents > `0`
4209	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4210	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4211
4212	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4213	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4214	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4215	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4216
4217	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4218	If bit N is clear, means that this entry won't epsilon-transition to
4219	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4220	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4221	such node.
4222
4223	A backreference does not epsilon-transition unless it is empty, so set
4224	to all zeros if FROM != TO. /*
4225	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4226	= (from == to ? -`1` : `0`);
4227
4228	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4229	if (mctx->max_mb_elem_len < to - from)
4230	mctx->max_mb_elem_len = to - from;
4231	return REG_NOERROR;
4232	}
4233
4234	/ Return the first entry with the same str_idx, or -1 if none is*
4235	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4236
4237	static Idx
4238	search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4239	{
4240	Idx left, right, mid, last;
4241	last = right = mctx->nbkref_ents;
4242	for (left = `0`; left < right;)
4243	{
4244	mid = (left + right) / `2`;
4245	if (mctx->bkref_ents[mid].str_idx < str_idx)
4246	left = mid + `1`;
4247	else
4248	right = mid;
4249	}
4250	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4251	return left;
4252	else
4253	return -`1`;
4254	}
4255
4256	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4257	at STR_IDX. /*
4258
4259	static reg_errcode_t
4260	__attribute_warn_unused_result__
4261	match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4262	{
4263	#ifdef DEBUG
4264	assert (mctx->sub_tops != NULL);
4265	assert (mctx->asub_tops > `0`);
4266	#endif
4267	if (BE (mctx->nsub_tops == mctx->asub_tops, `0`))
4268	{
4269	Idx new_asub_tops = mctx->asub_tops * `2`;
4270	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4271	re_sub_match_top_t *,
4272	new_asub_tops);
4273	if (BE (new_array == NULL, `0`))
4274	return REG_ESPACE;
4275	mctx->sub_tops = new_array;
4276	mctx->asub_tops = new_asub_tops;
4277	}
4278	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4279	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, `0`))
4280	return REG_ESPACE;
4281	mctx->sub_tops[mctx->nsub_tops]->node = node;
4282	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4283	return REG_NOERROR;
4284	}
4285
4286	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4287	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. /*
4288
4289	static re_sub_match_last_t *
4290	match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4291	{
4292	re_sub_match_last_t *new_entry;
4293	if (BE (subtop->nlasts == subtop->alasts, `0`))
4294	{
4295	Idx new_alasts = `2` * subtop->alasts + `1`;
4296	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4297	re_sub_match_last_t *,
4298	new_alasts);
4299	if (BE (new_array == NULL, `0`))
4300	return NULL;
4301	subtop->lasts = new_array;
4302	subtop->alasts = new_alasts;
4303	}
4304	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4305	if (BE (new_entry != NULL, `1`))
4306	{
4307	subtop->lasts[subtop->nlasts] = new_entry;
4308	new_entry->node = node;
4309	new_entry->str_idx = str_idx;
4310	++subtop->nlasts;
4311	}
4312	return new_entry;
4313	}
4314
4315	static void
4316	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4317	re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4318	{
4319	sctx->sifted_states = sifted_sts;
4320	sctx->limited_states = limited_sts;
4321	sctx->last_node = last_node;
4322	sctx->last_str_idx = last_str_idx;
4323	re_node_set_init_empty (&sctx->limits);
4324	}
4325

Browse the source code of glibc/posix/regexec.c