Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
regcomp.c
Go to the documentation of this file.
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
34
37{
39}
40
41extern int
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 if (IS_NOT_NULL(uslist->us))
199 xfree(uslist->us);
200}
201
202static int
203unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
204{
205 UnsetAddr* p;
206 int size;
207
208 if (uslist->num >= uslist->alloc) {
209 size = uslist->alloc * 2;
210 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
212 uslist->alloc = size;
213 uslist->us = p;
214 }
215
216 uslist->us[uslist->num].offset = offset;
217 uslist->us[uslist->num].target = node;
218 uslist->num++;
219 return 0;
220}
221#endif /* USE_SUBEXP_CALL */
222
223
224static int
225add_opcode(regex_t* reg, int opcode)
226{
227 BBUF_ADD1(reg, opcode);
228 return 0;
229}
230
231#ifdef USE_COMBINATION_EXPLOSION_CHECK
232static int
233add_state_check_num(regex_t* reg, int num)
234{
236
238 return 0;
239}
240#endif
241
242static int
243add_rel_addr(regex_t* reg, int addr)
244{
245 RelAddrType ra = (RelAddrType )addr;
246
247 BBUF_ADD(reg, &ra, SIZE_RELADDR);
248 return 0;
249}
250
251static int
252add_abs_addr(regex_t* reg, int addr)
253{
254 AbsAddrType ra = (AbsAddrType )addr;
255
256 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
257 return 0;
258}
259
260static int
261add_length(regex_t* reg, OnigDistance len)
262{
264
265 BBUF_ADD(reg, &l, SIZE_LENGTH);
266 return 0;
267}
268
269static int
270add_mem_num(regex_t* reg, int num)
271{
273
274 BBUF_ADD(reg, &n, SIZE_MEMNUM);
275 return 0;
276}
277
278#if 0
279static int
280add_pointer(regex_t* reg, void* addr)
281{
282 PointerType ptr = (PointerType )addr;
283
284 BBUF_ADD(reg, &ptr, SIZE_POINTER);
285 return 0;
286}
287#endif
288
289static int
290add_option(regex_t* reg, OnigOptionType option)
291{
292 BBUF_ADD(reg, &option, SIZE_OPTION);
293 return 0;
294}
295
296static int
297add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
298{
299 int r;
300
301 r = add_opcode(reg, opcode);
302 if (r) return r;
303 r = add_rel_addr(reg, addr);
304 return r;
305}
306
307static int
308add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
309{
310 BBUF_ADD(reg, bytes, len);
311 return 0;
312}
313
314static int
315add_bitset(regex_t* reg, BitSetRef bs)
316{
317 BBUF_ADD(reg, bs, SIZE_BITSET);
318 return 0;
319}
320
321static int
322add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
323{
324 int r;
325
326 r = add_opcode(reg, opcode);
327 if (r) return r;
328 r = add_option(reg, option);
329 return r;
330}
331
332static int compile_length_tree(Node* node, regex_t* reg);
333static int compile_tree(Node* node, regex_t* reg);
334
335
336#define IS_NEED_STR_LEN_OP_EXACT(op) \
337 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
338 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
339
340static int
341select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
342{
343 int op;
344 OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
345
346 if (ignore_case) {
347 switch (str_len) {
348 case 1: op = OP_EXACT1_IC; break;
349 default: op = OP_EXACTN_IC; break;
350 }
351 }
352 else {
353 switch (mb_len) {
354 case 1:
355 switch (str_len) {
356 case 1: op = OP_EXACT1; break;
357 case 2: op = OP_EXACT2; break;
358 case 3: op = OP_EXACT3; break;
359 case 4: op = OP_EXACT4; break;
360 case 5: op = OP_EXACT5; break;
361 default: op = OP_EXACTN; break;
362 }
363 break;
364
365 case 2:
366 switch (str_len) {
367 case 1: op = OP_EXACTMB2N1; break;
368 case 2: op = OP_EXACTMB2N2; break;
369 case 3: op = OP_EXACTMB2N3; break;
370 default: op = OP_EXACTMB2N; break;
371 }
372 break;
373
374 case 3:
375 op = OP_EXACTMB3N;
376 break;
377
378 default:
379 op = OP_EXACTMBN;
380 break;
381 }
382 }
383 return op;
384}
385
386static int
387compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
388{
389 int r;
390 int saved_num_null_check = reg->num_null_check;
391
392 if (empty_info != 0) {
393 r = add_opcode(reg, OP_NULL_CHECK_START);
394 if (r) return r;
395 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
396 if (r) return r;
397 reg->num_null_check++;
398 }
399
400 r = compile_tree(node, reg);
401 if (r) return r;
402
403 if (empty_info != 0) {
404 if (empty_info == NQ_TARGET_IS_EMPTY)
405 r = add_opcode(reg, OP_NULL_CHECK_END);
406 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
407 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
408 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
409 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
410
411 if (r) return r;
412 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
413 }
414 return r;
415}
416
417#ifdef USE_SUBEXP_CALL
418static int
419compile_call(CallNode* node, regex_t* reg)
420{
421 int r;
422
423 r = add_opcode(reg, OP_CALL);
424 if (r) return r;
425 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
426 node->target);
427 if (r) return r;
428 r = add_abs_addr(reg, 0 /*dummy addr.*/);
429 return r;
430}
431#endif
432
433static int
434compile_tree_n_times(Node* node, int n, regex_t* reg)
435{
436 int i, r;
437
438 for (i = 0; i < n; i++) {
439 r = compile_tree(node, reg);
440 if (r) return r;
441 }
442 return 0;
443}
444
445static int
446add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
447 regex_t* reg ARG_UNUSED, int ignore_case)
448{
449 int len;
450 int op = select_str_opcode(mb_len, byte_len, ignore_case);
451
453
454 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
456 len += SIZE_LENGTH;
457
458 len += (int )byte_len;
459 return len;
460}
461
462static int
463add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
464 regex_t* reg, int ignore_case)
465{
466 int op = select_str_opcode(mb_len, byte_len, ignore_case);
467 add_opcode(reg, op);
468
469 if (op == OP_EXACTMBN)
470 add_length(reg, mb_len);
471
472 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
473 if (op == OP_EXACTN_IC)
474 add_length(reg, byte_len);
475 else
476 add_length(reg, byte_len / mb_len);
477 }
478
479 add_bytes(reg, s, byte_len);
480 return 0;
481}
482
483
484static int
485compile_length_string_node(Node* node, regex_t* reg)
486{
487 int rlen, r, len, prev_len, blen, ambig;
488 OnigEncoding enc = reg->enc;
489 UChar *p, *prev;
490 StrNode* sn;
491
492 sn = NSTR(node);
493 if (sn->end <= sn->s)
494 return 0;
495
496 ambig = NSTRING_IS_AMBIG(node);
497
498 p = prev = sn->s;
499 prev_len = enclen(enc, p, sn->end);
500 p += prev_len;
501 blen = prev_len;
502 rlen = 0;
503
504 for (; p < sn->end; ) {
505 len = enclen(enc, p, sn->end);
506 if (len == prev_len || ambig) {
507 blen += len;
508 }
509 else {
510 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
511 rlen += r;
512 prev = p;
513 blen = len;
514 prev_len = len;
515 }
516 p += len;
517 }
518 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
519 rlen += r;
520 return rlen;
521}
522
523static int
524compile_length_string_raw_node(StrNode* sn, regex_t* reg)
525{
526 if (sn->end <= sn->s)
527 return 0;
528
529 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
530}
531
532static int
533compile_string_node(Node* node, regex_t* reg)
534{
535 int r, len, prev_len, blen, ambig;
536 OnigEncoding enc = reg->enc;
537 UChar *p, *prev, *end;
538 StrNode* sn;
539
540 sn = NSTR(node);
541 if (sn->end <= sn->s)
542 return 0;
543
544 end = sn->end;
545 ambig = NSTRING_IS_AMBIG(node);
546
547 p = prev = sn->s;
548 prev_len = enclen(enc, p, end);
549 p += prev_len;
550 blen = prev_len;
551
552 for (; p < end; ) {
553 len = enclen(enc, p, end);
554 if (len == prev_len || ambig) {
555 blen += len;
556 }
557 else {
558 r = add_compile_string(prev, prev_len, blen, reg, ambig);
559 if (r) return r;
560
561 prev = p;
562 blen = len;
563 prev_len = len;
564 }
565
566 p += len;
567 }
568 return add_compile_string(prev, prev_len, blen, reg, ambig);
569}
570
571static int
572compile_string_raw_node(StrNode* sn, regex_t* reg)
573{
574 if (sn->end <= sn->s)
575 return 0;
576
577 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
578}
579
580static int
581add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
582{
583#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
584 add_length(reg, mbuf->used);
585 return add_bytes(reg, mbuf->p, mbuf->used);
586#else
587 int r, pad_size;
589
590 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
591 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
592 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593
594 r = add_bytes(reg, mbuf->p, mbuf->used);
595
596 /* padding for return value from compile_length_cclass_node() to be fix. */
597 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
598 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
599 return r;
600#endif
601}
602
603static int
604compile_length_cclass_node(CClassNode* cc, regex_t* reg)
605{
606 int len;
607
608 if (IS_NULL(cc->mbuf)) {
610 }
611 else {
612 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
614 }
615 else {
617 }
618#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
619 len += SIZE_LENGTH + cc->mbuf->used;
620#else
621 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
622#endif
623 }
624
625 return len;
626}
627
628static int
629compile_cclass_node(CClassNode* cc, regex_t* reg)
630{
631 int r;
632
633 if (IS_NULL(cc->mbuf)) {
634 if (IS_NCCLASS_NOT(cc))
635 add_opcode(reg, OP_CCLASS_NOT);
636 else
637 add_opcode(reg, OP_CCLASS);
638
639 r = add_bitset(reg, cc->bs);
640 }
641 else {
642 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
643 if (IS_NCCLASS_NOT(cc))
644 add_opcode(reg, OP_CCLASS_MB_NOT);
645 else
646 add_opcode(reg, OP_CCLASS_MB);
647
648 r = add_multi_byte_cclass(cc->mbuf, reg);
649 }
650 else {
651 if (IS_NCCLASS_NOT(cc))
652 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 else
654 add_opcode(reg, OP_CCLASS_MIX);
655
656 r = add_bitset(reg, cc->bs);
657 if (r) return r;
658 r = add_multi_byte_cclass(cc->mbuf, reg);
659 }
660 }
661
662 return r;
663}
664
665static int
666entry_repeat_range(regex_t* reg, int id, int lower, int upper)
667{
668#define REPEAT_RANGE_ALLOC 4
669
671
672 if (reg->repeat_range_alloc == 0) {
675 reg->repeat_range = p;
677 }
678 else if (reg->repeat_range_alloc <= id) {
679 int n;
682 sizeof(OnigRepeatRange) * n);
684 reg->repeat_range = p;
685 reg->repeat_range_alloc = n;
686 }
687 else {
688 p = reg->repeat_range;
689 }
690
691 p[id].lower = lower;
692 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
693 return 0;
694}
695
696static int
697compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
698 regex_t* reg)
699{
700 int r;
701 int num_repeat = reg->num_repeat;
702
703 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 if (r) return r;
705 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
706 reg->num_repeat++;
707 if (r) return r;
708 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
709 if (r) return r;
710
711 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
712 if (r) return r;
713
714 r = compile_tree_empty_check(qn->target, reg, empty_info);
715 if (r) return r;
716
717 if (
718#ifdef USE_SUBEXP_CALL
719 reg->num_call > 0 ||
720#endif
722 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
723 }
724 else {
725 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
726 }
727 if (r) return r;
728 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
729 return r;
730}
731
732static int
733is_anychar_star_quantifier(QtfrNode* qn)
734{
735 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
736 NTYPE(qn->target) == NT_CANY)
737 return 1;
738 else
739 return 0;
740}
741
742#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
743#define CKN_ON (ckn > 0)
744
745#ifdef USE_COMBINATION_EXPLOSION_CHECK
746
747static int
748compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
749{
750 int len, mod_tlen, cklen;
751 int ckn;
752 int infinite = IS_REPEAT_INFINITE(qn->upper);
753 int empty_info = qn->target_empty_info;
754 int tlen = compile_length_tree(qn->target, reg);
755
756 if (tlen < 0) return tlen;
757
758 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759
760 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
761
762 /* anychar repeat */
763 if (NTYPE(qn->target) == NT_CANY) {
764 if (qn->greedy && infinite) {
766 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 else
768 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
769 }
770 }
771
772 if (empty_info != 0)
774 else
775 mod_tlen = tlen;
776
777 if (infinite && qn->lower <= 1) {
778 if (qn->greedy) {
779 if (qn->lower == 1)
781 else
782 len = 0;
783
784 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
785 }
786 else {
787 if (qn->lower == 0)
789 else
790 len = 0;
791
792 len += mod_tlen + SIZE_OP_PUSH + cklen;
793 }
794 }
795 else if (qn->upper == 0) {
796 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
797 len = SIZE_OP_JUMP + tlen;
798 else
799 len = 0;
800 }
801 else if (qn->upper == 1 && qn->greedy) {
802 if (qn->lower == 0) {
803 if (CKN_ON) {
804 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
805 }
806 else {
807 len = SIZE_OP_PUSH + tlen;
808 }
809 }
810 else {
811 len = tlen;
812 }
813 }
814 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
815 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
816 }
817 else {
819 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 if (CKN_ON)
821 len += SIZE_OP_STATE_CHECK;
822 }
823
824 return len;
825}
826
827static int
828compile_quantifier_node(QtfrNode* qn, regex_t* reg)
829{
830 int r, mod_tlen;
831 int ckn;
832 int infinite = IS_REPEAT_INFINITE(qn->upper);
833 int empty_info = qn->target_empty_info;
834 int tlen = compile_length_tree(qn->target, reg);
835
836 if (tlen < 0) return tlen;
837
838 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839
840 if (is_anychar_star_quantifier(qn)) {
841 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (r) return r;
843 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
844 if (IS_MULTILINE(reg->options))
845 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 else
847 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
848 if (r) return r;
849 if (CKN_ON) {
850 r = add_state_check_num(reg, ckn);
851 if (r) return r;
852 }
853
854 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
855 }
856 else {
857 if (IS_MULTILINE(reg->options)) {
858 r = add_opcode(reg, (CKN_ON ?
861 }
862 else {
863 r = add_opcode(reg, (CKN_ON ?
865 : OP_ANYCHAR_STAR));
866 }
867 if (r) return r;
868 if (CKN_ON)
869 r = add_state_check_num(reg, ckn);
870
871 return r;
872 }
873 }
874
875 if (empty_info != 0)
877 else
878 mod_tlen = tlen;
879
880 if (infinite && qn->lower <= 1) {
881 if (qn->greedy) {
882 if (qn->lower == 1) {
883 r = add_opcode_rel_addr(reg, OP_JUMP,
884 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
885 if (r) return r;
886 }
887
888 if (CKN_ON) {
889 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 if (r) return r;
891 r = add_state_check_num(reg, ckn);
892 if (r) return r;
893 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
894 }
895 else {
896 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
897 }
898 if (r) return r;
899 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 if (r) return r;
901 r = add_opcode_rel_addr(reg, OP_JUMP,
902 -(mod_tlen + (int )SIZE_OP_JUMP
903 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
904 }
905 else {
906 if (qn->lower == 0) {
907 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
908 if (r) return r;
909 }
910 r = compile_tree_empty_check(qn->target, reg, empty_info);
911 if (r) return r;
912 if (CKN_ON) {
913 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 if (r) return r;
915 r = add_state_check_num(reg, ckn);
916 if (r) return r;
917 r = add_rel_addr(reg,
918 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
919 }
920 else
921 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
922 }
923 }
924 else if (qn->upper == 0) {
925 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
926 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 if (r) return r;
928 r = compile_tree(qn->target, reg);
929 }
930 else
931 r = 0;
932 }
933 else if (qn->upper == 1 && qn->greedy) {
934 if (qn->lower == 0) {
935 if (CKN_ON) {
936 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 if (r) return r;
938 r = add_state_check_num(reg, ckn);
939 if (r) return r;
940 r = add_rel_addr(reg, tlen);
941 }
942 else {
943 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
944 }
945 if (r) return r;
946 }
947
948 r = compile_tree(qn->target, reg);
949 }
950 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
951 if (CKN_ON) {
952 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 if (r) return r;
954 r = add_state_check_num(reg, ckn);
955 if (r) return r;
956 r = add_rel_addr(reg, SIZE_OP_JUMP);
957 }
958 else {
959 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
960 }
961
962 if (r) return r;
963 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 if (r) return r;
965 r = compile_tree(qn->target, reg);
966 }
967 else {
968 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
969 if (CKN_ON) {
970 if (r) return r;
971 r = add_opcode(reg, OP_STATE_CHECK);
972 if (r) return r;
973 r = add_state_check_num(reg, ckn);
974 }
975 }
976 return r;
977}
978
979#else /* USE_COMBINATION_EXPLOSION_CHECK */
980
981static int
982compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
983{
984 int len, mod_tlen;
985 int infinite = IS_REPEAT_INFINITE(qn->upper);
986 int empty_info = qn->target_empty_info;
987 int tlen = compile_length_tree(qn->target, reg);
988
989 if (tlen < 0) return tlen;
990
991 /* anychar repeat */
992 if (NTYPE(qn->target) == NT_CANY) {
993 if (qn->greedy && infinite) {
995 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 else
997 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
998 }
999 }
1000
1001 if (empty_info != 0)
1003 else
1004 mod_tlen = tlen;
1005
1006 if (infinite &&
1007 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1008 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1009 len = SIZE_OP_JUMP;
1010 }
1011 else {
1012 len = tlen * qn->lower;
1013 }
1014
1015 if (qn->greedy) {
1016#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1017 if (IS_NOT_NULL(qn->head_exact))
1019 else
1020#endif
1023 else
1024 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1025 }
1026 else
1027 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 }
1029 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1030 len = SIZE_OP_JUMP + tlen;
1031 }
1032 else if (!infinite && qn->greedy &&
1033 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1035 len = tlen * qn->lower;
1036 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 }
1038 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1039 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1040 }
1041 else {
1043 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1044 }
1045
1046 return len;
1047}
1048
1049static int
1050compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1051{
1052 int i, r, mod_tlen;
1053 int infinite = IS_REPEAT_INFINITE(qn->upper);
1054 int empty_info = qn->target_empty_info;
1055 int tlen = compile_length_tree(qn->target, reg);
1056
1057 if (tlen < 0) return tlen;
1058
1059 if (is_anychar_star_quantifier(qn)) {
1060 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (r) return r;
1062 if (IS_NOT_NULL(qn->next_head_exact)) {
1063 if (IS_MULTILINE(reg->options))
1064 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 else
1066 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 if (r) return r;
1068 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1069 }
1070 else {
1071 if (IS_MULTILINE(reg->options))
1072 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 else
1074 return add_opcode(reg, OP_ANYCHAR_STAR);
1075 }
1076 }
1077
1078 if (empty_info != 0)
1080 else
1081 mod_tlen = tlen;
1082
1083 if (infinite &&
1084 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1085 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086 if (qn->greedy) {
1087#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088 if (IS_NOT_NULL(qn->head_exact))
1089 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1090 else
1091#endif
1093 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 else
1095 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1096 }
1097 else {
1098 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1099 }
1100 if (r) return r;
1101 }
1102 else {
1103 r = compile_tree_n_times(qn->target, qn->lower, reg);
1104 if (r) return r;
1105 }
1106
1107 if (qn->greedy) {
1108#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1109 if (IS_NOT_NULL(qn->head_exact)) {
1110 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1111 mod_tlen + SIZE_OP_JUMP);
1112 if (r) return r;
1113 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1114 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 if (r) return r;
1116 r = add_opcode_rel_addr(reg, OP_JUMP,
1117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1118 }
1119 else
1120#endif
1121 if (IS_NOT_NULL(qn->next_head_exact)) {
1122 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1123 mod_tlen + SIZE_OP_JUMP);
1124 if (r) return r;
1125 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1126 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 if (r) return r;
1128 r = add_opcode_rel_addr(reg, OP_JUMP,
1129 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1130 }
1131 else {
1132 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 if (r) return r;
1134 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 if (r) return r;
1136 r = add_opcode_rel_addr(reg, OP_JUMP,
1137 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1138 }
1139 }
1140 else {
1141 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 if (r) return r;
1143 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 if (r) return r;
1145 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1146 }
1147 }
1148 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1149 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 if (r) return r;
1151 r = compile_tree(qn->target, reg);
1152 }
1153 else if (!infinite && qn->greedy &&
1154 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1156 int n = qn->upper - qn->lower;
1157
1158 r = compile_tree_n_times(qn->target, qn->lower, reg);
1159 if (r) return r;
1160
1161 for (i = 0; i < n; i++) {
1162 r = add_opcode_rel_addr(reg, OP_PUSH,
1163 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 if (r) return r;
1165 r = compile_tree(qn->target, reg);
1166 if (r) return r;
1167 }
1168 }
1169 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1170 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 if (r) return r;
1172 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 if (r) return r;
1174 r = compile_tree(qn->target, reg);
1175 }
1176 else {
1177 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1178 }
1179 return r;
1180}
1181#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1182
1183static int
1184compile_length_option_node(EncloseNode* node, regex_t* reg)
1185{
1186 int tlen;
1187 OnigOptionType prev = reg->options;
1188
1189 reg->options = node->option;
1190 tlen = compile_length_tree(node->target, reg);
1191 reg->options = prev;
1192
1193 if (tlen < 0) return tlen;
1194
1195 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1197 + tlen + SIZE_OP_SET_OPTION;
1198 }
1199 else
1200 return tlen;
1201}
1202
1203static int
1204compile_option_node(EncloseNode* node, regex_t* reg)
1205{
1206 int r;
1207 OnigOptionType prev = reg->options;
1208
1209 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1210 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 if (r) return r;
1212 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 if (r) return r;
1214 r = add_opcode(reg, OP_FAIL);
1215 if (r) return r;
1216 }
1217
1218 reg->options = node->option;
1219 r = compile_tree(node->target, reg);
1220 reg->options = prev;
1221
1222 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 if (r) return r;
1224 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1225 }
1226 return r;
1227}
1228
1229static int
1230compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1231{
1232 int len;
1233 int tlen;
1234
1235 if (node->type == ENCLOSE_OPTION)
1236 return compile_length_option_node(node, reg);
1237
1238 if (node->target) {
1239 tlen = compile_length_tree(node->target, reg);
1240 if (tlen < 0) return tlen;
1241 }
1242 else
1243 tlen = 0;
1244
1245 switch (node->type) {
1246 case ENCLOSE_MEMORY:
1247#ifdef USE_SUBEXP_CALL
1248 if (IS_ENCLOSE_CALLED(node)) {
1251 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1252 len += (IS_ENCLOSE_RECURSION(node)
1254 else
1255 len += (IS_ENCLOSE_RECURSION(node)
1257 }
1258 else if (IS_ENCLOSE_RECURSION(node)) {
1260 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1262 }
1263 else
1264#endif
1265 {
1266 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1268 else
1270
1271 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1273 }
1274 break;
1275
1278 QtfrNode* qn = NQTFR(node->target);
1279 tlen = compile_length_tree(qn->target, reg);
1280 if (tlen < 0) return tlen;
1281
1282 len = tlen * qn->lower
1284 }
1285 else {
1287 }
1288 break;
1289
1290 case ENCLOSE_CONDITION:
1292 if (NTYPE(node->target) == NT_ALT) {
1293 Node* x = node->target;
1294
1295 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1296 if (tlen < 0) return tlen;
1297 len += tlen + SIZE_OP_JUMP;
1298 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1299 x = NCDR(x);
1300 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1301 if (tlen < 0) return tlen;
1302 len += tlen;
1304 }
1305 else {
1306 return ONIGERR_PARSER_BUG;
1307 }
1308 break;
1309
1310 case ENCLOSE_ABSENT:
1312 break;
1313
1314 default:
1315 return ONIGERR_TYPE_BUG;
1316 break;
1317 }
1318
1319 return len;
1320}
1321
1322static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1323
1324static int
1325compile_enclose_node(EncloseNode* node, regex_t* reg)
1326{
1327 int r, len;
1328
1329 if (node->type == ENCLOSE_OPTION)
1330 return compile_option_node(node, reg);
1331
1332 switch (node->type) {
1333 case ENCLOSE_MEMORY:
1334#ifdef USE_SUBEXP_CALL
1335 if (IS_ENCLOSE_CALLED(node)) {
1336 r = add_opcode(reg, OP_CALL);
1337 if (r) return r;
1339 node->state |= NST_ADDR_FIXED;
1340 r = add_abs_addr(reg, (int )node->call_addr);
1341 if (r) return r;
1342 len = compile_length_tree(node->target, reg);
1344 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1345 len += (IS_ENCLOSE_RECURSION(node)
1347 else
1348 len += (IS_ENCLOSE_RECURSION(node)
1350
1351 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1352 if (r) return r;
1353 }
1354#endif
1355 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1356 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1357 else
1358 r = add_opcode(reg, OP_MEMORY_START);
1359 if (r) return r;
1360 r = add_mem_num(reg, node->regnum);
1361 if (r) return r;
1362 r = compile_tree(node->target, reg);
1363 if (r) return r;
1364#ifdef USE_SUBEXP_CALL
1365 if (IS_ENCLOSE_CALLED(node)) {
1366 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1367 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1369 else
1370 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1372
1373 if (r) return r;
1374 r = add_mem_num(reg, node->regnum);
1375 if (r) return r;
1376 r = add_opcode(reg, OP_RETURN);
1377 }
1378 else if (IS_ENCLOSE_RECURSION(node)) {
1379 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1380 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1381 else
1382 r = add_opcode(reg, OP_MEMORY_END_REC);
1383 if (r) return r;
1384 r = add_mem_num(reg, node->regnum);
1385 }
1386 else
1387#endif
1388 {
1389 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1390 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1391 else
1392 r = add_opcode(reg, OP_MEMORY_END);
1393 if (r) return r;
1394 r = add_mem_num(reg, node->regnum);
1395 }
1396 break;
1397
1400 QtfrNode* qn = NQTFR(node->target);
1401 r = compile_tree_n_times(qn->target, qn->lower, reg);
1402 if (r) return r;
1403
1404 len = compile_length_tree(qn->target, reg);
1405 if (len < 0) return len;
1406
1407 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1408 if (r) return r;
1409 r = compile_tree(qn->target, reg);
1410 if (r) return r;
1411 r = add_opcode(reg, OP_POP);
1412 if (r) return r;
1413 r = add_opcode_rel_addr(reg, OP_JUMP,
1414 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1415 }
1416 else {
1417 r = add_opcode(reg, OP_PUSH_STOP_BT);
1418 if (r) return r;
1419 r = compile_tree(node->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP_STOP_BT);
1422 }
1423 break;
1424
1425 case ENCLOSE_CONDITION:
1426 r = add_opcode(reg, OP_CONDITION);
1427 if (r) return r;
1428 r = add_mem_num(reg, node->regnum);
1429 if (r) return r;
1430
1431 if (NTYPE(node->target) == NT_ALT) {
1432 Node* x = node->target;
1433 int len2;
1434
1435 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1436 if (len < 0) return len;
1437 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1438 x = NCDR(x);
1439 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1440 if (len2 < 0) return len2;
1442
1443 x = node->target;
1444 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1445 if (r) return r;
1446 r = compile_tree(NCAR(x), reg); /* yes-node */
1447 if (r) return r;
1448 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1449 if (r) return r;
1450 x = NCDR(x);
1451 r = compile_tree(NCAR(x), reg); /* no-node */
1452 }
1453 else {
1454 return ONIGERR_PARSER_BUG;
1455 }
1456 break;
1457
1458 case ENCLOSE_ABSENT:
1459 len = compile_length_tree(node->target, reg);
1460 if (len < 0) return len;
1461
1462 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1463 if (r) return r;
1464 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1465 if (r) return r;
1466 r = compile_tree(node->target, reg);
1467 if (r) return r;
1468 r = add_opcode(reg, OP_ABSENT_END);
1469 break;
1470
1471 default:
1472 return ONIGERR_TYPE_BUG;
1473 break;
1474 }
1475
1476 return r;
1477}
1478
1479static int
1480compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1481{
1482 int len;
1483 int tlen = 0;
1484
1485 if (node->target) {
1486 tlen = compile_length_tree(node->target, reg);
1487 if (tlen < 0) return tlen;
1488 }
1489
1490 switch (node->type) {
1491 case ANCHOR_PREC_READ:
1493 break;
1496 break;
1497 case ANCHOR_LOOK_BEHIND:
1498 len = SIZE_OP_LOOK_BEHIND + tlen;
1499 break;
1502 break;
1503
1504 default:
1505 len = SIZE_OPCODE;
1506 break;
1507 }
1508
1509 return len;
1510}
1511
1512static int
1513compile_anchor_node(AnchorNode* node, regex_t* reg)
1514{
1515 int r, len;
1516
1517 switch (node->type) {
1518 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1519 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1520 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1521 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1522 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1523 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1524
1525 case ANCHOR_WORD_BOUND:
1526 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1527 else r = add_opcode(reg, OP_WORD_BOUND);
1528 break;
1530 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1531 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1532 break;
1533#ifdef USE_WORD_BEGIN_END
1534 case ANCHOR_WORD_BEGIN:
1535 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1536 else r = add_opcode(reg, OP_WORD_BEGIN);
1537 break;
1538 case ANCHOR_WORD_END:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1540 else r = add_opcode(reg, OP_WORD_END);
1541 break;
1542#endif
1543 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1544
1545 case ANCHOR_PREC_READ:
1546 r = add_opcode(reg, OP_PUSH_POS);
1547 if (r) return r;
1548 r = compile_tree(node->target, reg);
1549 if (r) return r;
1550 r = add_opcode(reg, OP_POP_POS);
1551 break;
1552
1554 len = compile_length_tree(node->target, reg);
1555 if (len < 0) return len;
1556 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1557 if (r) return r;
1558 r = compile_tree(node->target, reg);
1559 if (r) return r;
1560 r = add_opcode(reg, OP_FAIL_POS);
1561 break;
1562
1563 case ANCHOR_LOOK_BEHIND:
1564 {
1565 int n;
1566 r = add_opcode(reg, OP_LOOK_BEHIND);
1567 if (r) return r;
1568 if (node->char_len < 0) {
1569 r = get_char_length_tree(node->target, reg, &n);
1571 }
1572 else
1573 n = node->char_len;
1574 r = add_length(reg, n);
1575 if (r) return r;
1576 r = compile_tree(node->target, reg);
1577 }
1578 break;
1579
1581 {
1582 int n;
1583 len = compile_length_tree(node->target, reg);
1584 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1586 if (r) return r;
1587 if (node->char_len < 0) {
1588 r = get_char_length_tree(node->target, reg, &n);
1590 }
1591 else
1592 n = node->char_len;
1593 r = add_length(reg, n);
1594 if (r) return r;
1595 r = compile_tree(node->target, reg);
1596 if (r) return r;
1597 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1598 }
1599 break;
1600
1601 default:
1602 return ONIGERR_TYPE_BUG;
1603 break;
1604 }
1605
1606 return r;
1607}
1608
1609static int
1610compile_length_tree(Node* node, regex_t* reg)
1611{
1612 int len, type, r;
1613
1614 type = NTYPE(node);
1615 switch (type) {
1616 case NT_LIST:
1617 len = 0;
1618 do {
1619 r = compile_length_tree(NCAR(node), reg);
1620 if (r < 0) return r;
1621 len += r;
1622 } while (IS_NOT_NULL(node = NCDR(node)));
1623 r = len;
1624 break;
1625
1626 case NT_ALT:
1627 {
1628 int n = 0;
1629 len = 0;
1630 do {
1631 r = compile_length_tree(NCAR(node), reg);
1632 if (r < 0) return r;
1633 len += r;
1634 n++;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1638 }
1639 break;
1640
1641 case NT_STR:
1642 if (NSTRING_IS_RAW(node))
1643 r = compile_length_string_raw_node(NSTR(node), reg);
1644 else
1645 r = compile_length_string_node(node, reg);
1646 break;
1647
1648 case NT_CCLASS:
1649 r = compile_length_cclass_node(NCCLASS(node), reg);
1650 break;
1651
1652 case NT_CTYPE:
1653 case NT_CANY:
1654 r = SIZE_OPCODE;
1655 break;
1656
1657 case NT_BREF:
1658 {
1659 BRefNode* br = NBREF(node);
1660
1661#ifdef USE_BACKREF_WITH_LEVEL
1662 if (IS_BACKREF_NEST_LEVEL(br)) {
1665 }
1666 else
1667#endif
1668 if (br->back_num == 1) {
1669 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1671 }
1672 else {
1674 }
1675 }
1676 break;
1677
1678#ifdef USE_SUBEXP_CALL
1679 case NT_CALL:
1680 r = SIZE_OP_CALL;
1681 break;
1682#endif
1683
1684 case NT_QTFR:
1685 r = compile_length_quantifier_node(NQTFR(node), reg);
1686 break;
1687
1688 case NT_ENCLOSE:
1689 r = compile_length_enclose_node(NENCLOSE(node), reg);
1690 break;
1691
1692 case NT_ANCHOR:
1693 r = compile_length_anchor_node(NANCHOR(node), reg);
1694 break;
1695
1696 default:
1697 return ONIGERR_TYPE_BUG;
1698 break;
1699 }
1700
1701 return r;
1702}
1703
1704static int
1705compile_tree(Node* node, regex_t* reg)
1706{
1707 int n, type, len, pos, r = 0;
1708
1709 type = NTYPE(node);
1710 switch (type) {
1711 case NT_LIST:
1712 do {
1713 r = compile_tree(NCAR(node), reg);
1714 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1715 break;
1716
1717 case NT_ALT:
1718 {
1719 Node* x = node;
1720 len = 0;
1721 do {
1722 len += compile_length_tree(NCAR(x), reg);
1723 if (NCDR(x) != NULL) {
1725 }
1726 } while (IS_NOT_NULL(x = NCDR(x)));
1727 pos = reg->used + len; /* goal position */
1728
1729 do {
1730 len = compile_length_tree(NCAR(node), reg);
1731 if (IS_NOT_NULL(NCDR(node))) {
1732 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1733 if (r) break;
1734 }
1735 r = compile_tree(NCAR(node), reg);
1736 if (r) break;
1737 if (IS_NOT_NULL(NCDR(node))) {
1738 len = pos - (reg->used + SIZE_OP_JUMP);
1739 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1740 if (r) break;
1741 }
1742 } while (IS_NOT_NULL(node = NCDR(node)));
1743 }
1744 break;
1745
1746 case NT_STR:
1747 if (NSTRING_IS_RAW(node))
1748 r = compile_string_raw_node(NSTR(node), reg);
1749 else
1750 r = compile_string_node(node, reg);
1751 break;
1752
1753 case NT_CCLASS:
1754 r = compile_cclass_node(NCCLASS(node), reg);
1755 break;
1756
1757 case NT_CTYPE:
1758 {
1759 int op;
1760
1761 switch (NCTYPE(node)->ctype) {
1762 case ONIGENC_CTYPE_WORD:
1763 if (NCTYPE(node)->ascii_range != 0) {
1764 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1765 else op = OP_ASCII_WORD;
1766 }
1767 else {
1768 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1769 else op = OP_WORD;
1770 }
1771 break;
1772 default:
1773 return ONIGERR_TYPE_BUG;
1774 break;
1775 }
1776 r = add_opcode(reg, op);
1777 }
1778 break;
1779
1780 case NT_CANY:
1781 if (IS_MULTILINE(reg->options))
1782 r = add_opcode(reg, OP_ANYCHAR_ML);
1783 else
1784 r = add_opcode(reg, OP_ANYCHAR);
1785 break;
1786
1787 case NT_BREF:
1788 {
1789 BRefNode* br = NBREF(node);
1790
1791#ifdef USE_BACKREF_WITH_LEVEL
1792 if (IS_BACKREF_NEST_LEVEL(br)) {
1793 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1794 if (r) return r;
1795 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1796 if (r) return r;
1797 r = add_length(reg, br->nest_level);
1798 if (r) return r;
1799
1800 goto add_bacref_mems;
1801 }
1802 else
1803#endif
1804 if (br->back_num == 1) {
1805 n = br->back_static[0];
1806 if (IS_IGNORECASE(reg->options)) {
1807 r = add_opcode(reg, OP_BACKREFN_IC);
1808 if (r) return r;
1809 r = add_mem_num(reg, n);
1810 }
1811 else {
1812 switch (n) {
1813 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1814 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1815 default:
1816 r = add_opcode(reg, OP_BACKREFN);
1817 if (r) return r;
1818 r = add_mem_num(reg, n);
1819 break;
1820 }
1821 }
1822 }
1823 else {
1824 int i;
1825 int* p;
1826
1827 if (IS_IGNORECASE(reg->options)) {
1828 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1829 }
1830 else {
1831 r = add_opcode(reg, OP_BACKREF_MULTI);
1832 }
1833 if (r) return r;
1834
1835#ifdef USE_BACKREF_WITH_LEVEL
1836 add_bacref_mems:
1837#endif
1838 r = add_length(reg, br->back_num);
1839 if (r) return r;
1840 p = BACKREFS_P(br);
1841 for (i = br->back_num - 1; i >= 0; i--) {
1842 r = add_mem_num(reg, p[i]);
1843 if (r) return r;
1844 }
1845 }
1846 }
1847 break;
1848
1849#ifdef USE_SUBEXP_CALL
1850 case NT_CALL:
1851 r = compile_call(NCALL(node), reg);
1852 break;
1853#endif
1854
1855 case NT_QTFR:
1856 r = compile_quantifier_node(NQTFR(node), reg);
1857 break;
1858
1859 case NT_ENCLOSE:
1860 r = compile_enclose_node(NENCLOSE(node), reg);
1861 break;
1862
1863 case NT_ANCHOR:
1864 r = compile_anchor_node(NANCHOR(node), reg);
1865 break;
1866
1867 default:
1868#ifdef ONIG_DEBUG
1869 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1870#endif
1871 break;
1872 }
1873
1874 return r;
1875}
1876
1877#ifdef USE_NAMED_GROUP
1878
1879static int
1880noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1881{
1882 int r = 0;
1883 Node* node = *plink;
1884
1885 switch (NTYPE(node)) {
1886 case NT_LIST:
1887 case NT_ALT:
1888 do {
1889 r = noname_disable_map(&(NCAR(node)), map, counter);
1890 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1891 break;
1892
1893 case NT_QTFR:
1894 {
1895 Node** ptarget = &(NQTFR(node)->target);
1896 Node* old = *ptarget;
1897 r = noname_disable_map(ptarget, map, counter);
1898 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1899 onig_reduce_nested_quantifier(node, *ptarget);
1900 }
1901 }
1902 break;
1903
1904 case NT_ENCLOSE:
1905 {
1906 EncloseNode* en = NENCLOSE(node);
1907 if (en->type == ENCLOSE_MEMORY) {
1908 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1909 (*counter)++;
1910 map[en->regnum].new_val = *counter;
1911 en->regnum = *counter;
1912 }
1913 else if (en->regnum != 0) {
1914 *plink = en->target;
1915 en->target = NULL_NODE;
1916 onig_node_free(node);
1917 r = noname_disable_map(plink, map, counter);
1918 break;
1919 }
1920 }
1921 r = noname_disable_map(&(en->target), map, counter);
1922 }
1923 break;
1924
1925 case NT_ANCHOR:
1926 if (NANCHOR(node)->target)
1927 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1928 break;
1929
1930 default:
1931 break;
1932 }
1933
1934 return r;
1935}
1936
1937static int
1938renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1939{
1940 int i, pos, n, old_num;
1941 int *backs;
1942 BRefNode* bn = NBREF(node);
1943
1944 if (! IS_BACKREF_NAME_REF(bn))
1946
1947 old_num = bn->back_num;
1948 if (IS_NULL(bn->back_dynamic))
1949 backs = bn->back_static;
1950 else
1951 backs = bn->back_dynamic;
1952
1953 for (i = 0, pos = 0; i < old_num; i++) {
1954 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1955 n = map[backs[i]].new_val;
1956 if (n > 0) {
1957 backs[pos] = n;
1958 pos++;
1959 }
1960 }
1961
1962 bn->back_num = pos;
1963 return 0;
1964}
1965
1966static int
1967renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1968{
1969 int r = 0;
1970
1971 switch (NTYPE(node)) {
1972 case NT_LIST:
1973 case NT_ALT:
1974 do {
1975 r = renumber_by_map(NCAR(node), map, num_mem);
1976 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1977 break;
1978 case NT_QTFR:
1979 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1980 break;
1981 case NT_ENCLOSE:
1982 {
1983 EncloseNode* en = NENCLOSE(node);
1984 if (en->type == ENCLOSE_CONDITION) {
1985 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1986 en->regnum = map[en->regnum].new_val;
1987 }
1988 r = renumber_by_map(en->target, map, num_mem);
1989 }
1990 break;
1991
1992 case NT_BREF:
1993 r = renumber_node_backref(node, map, num_mem);
1994 break;
1995
1996 case NT_ANCHOR:
1997 if (NANCHOR(node)->target)
1998 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
1999 break;
2000
2001 default:
2002 break;
2003 }
2004
2005 return r;
2006}
2007
2008static int
2009numbered_ref_check(Node* node)
2010{
2011 int r = 0;
2012
2013 switch (NTYPE(node)) {
2014 case NT_LIST:
2015 case NT_ALT:
2016 do {
2017 r = numbered_ref_check(NCAR(node));
2018 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2019 break;
2020 case NT_QTFR:
2021 r = numbered_ref_check(NQTFR(node)->target);
2022 break;
2023 case NT_ENCLOSE:
2024 r = numbered_ref_check(NENCLOSE(node)->target);
2025 break;
2026
2027 case NT_BREF:
2028 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2030 break;
2031
2032 case NT_ANCHOR:
2033 if (NANCHOR(node)->target)
2034 r = numbered_ref_check(NANCHOR(node)->target);
2035 break;
2036
2037 default:
2038 break;
2039 }
2040
2041 return r;
2042}
2043
2044static int
2045disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2046{
2047 int r, i, pos, counter;
2048 BitStatusType loc;
2050
2051 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2053 for (i = 1; i <= env->num_mem; i++) {
2054 map[i].new_val = 0;
2055 }
2056 counter = 0;
2057 r = noname_disable_map(root, map, &counter);
2058 if (r != 0) return r;
2059
2060 r = renumber_by_map(*root, map, env->num_mem);
2061 if (r != 0) return r;
2062
2063 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2064 if (map[i].new_val > 0) {
2066 pos++;
2067 }
2068 }
2069
2070 loc = env->capture_history;
2071 BIT_STATUS_CLEAR(env->capture_history);
2072 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2073 if (BIT_STATUS_AT(loc, i)) {
2074 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2075 }
2076 }
2077
2078 env->num_mem = env->num_named;
2079 reg->num_mem = env->num_named;
2080
2081 return onig_renumber_name_table(reg, map);
2082}
2083#endif /* USE_NAMED_GROUP */
2084
2085#ifdef USE_SUBEXP_CALL
2086static int
2087unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2088{
2089 int i, offset;
2090 EncloseNode* en;
2091 AbsAddrType addr;
2092
2093 for (i = 0; i < uslist->num; i++) {
2094 en = NENCLOSE(uslist->us[i].target);
2095 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2096 addr = en->call_addr;
2097 offset = uslist->us[i].offset;
2098
2099 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2100 }
2101 return 0;
2102}
2103#endif
2104
2105#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2106static int
2107quantifiers_memory_node_info(Node* node)
2108{
2109 int r = 0;
2110
2111 switch (NTYPE(node)) {
2112 case NT_LIST:
2113 case NT_ALT:
2114 {
2115 int v;
2116 do {
2117 v = quantifiers_memory_node_info(NCAR(node));
2118 if (v > r) r = v;
2119 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2120 }
2121 break;
2122
2123# ifdef USE_SUBEXP_CALL
2124 case NT_CALL:
2125 if (IS_CALL_RECURSION(NCALL(node))) {
2126 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2127 }
2128 else
2129 r = quantifiers_memory_node_info(NCALL(node)->target);
2130 break;
2131# endif
2132
2133 case NT_QTFR:
2134 {
2135 QtfrNode* qn = NQTFR(node);
2136 if (qn->upper != 0) {
2137 r = quantifiers_memory_node_info(qn->target);
2138 }
2139 }
2140 break;
2141
2142 case NT_ENCLOSE:
2143 {
2144 EncloseNode* en = NENCLOSE(node);
2145 switch (en->type) {
2146 case ENCLOSE_MEMORY:
2148 break;
2149
2150 case ENCLOSE_OPTION:
2152 case ENCLOSE_CONDITION:
2153 case ENCLOSE_ABSENT:
2154 r = quantifiers_memory_node_info(en->target);
2155 break;
2156 default:
2157 break;
2158 }
2159 }
2160 break;
2161
2162 case NT_BREF:
2163 case NT_STR:
2164 case NT_CTYPE:
2165 case NT_CCLASS:
2166 case NT_CANY:
2167 case NT_ANCHOR:
2168 default:
2169 break;
2170 }
2171
2172 return r;
2173}
2174#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2175
2176static int
2177get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2178{
2179 OnigDistance tmin;
2180 int r = 0;
2181
2182 *min = 0;
2183 switch (NTYPE(node)) {
2184 case NT_BREF:
2185 {
2186 int i;
2187 int* backs;
2188 Node** nodes = SCANENV_MEM_NODES(env);
2189 BRefNode* br = NBREF(node);
2190 if (br->state & NST_RECURSION) break;
2191
2192 backs = BACKREFS_P(br);
2193 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2194 r = get_min_match_length(nodes[backs[0]], min, env);
2195 if (r != 0) break;
2196 for (i = 1; i < br->back_num; i++) {
2197 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2198 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2199 if (r != 0) break;
2200 if (*min > tmin) *min = tmin;
2201 }
2202 }
2203 break;
2204
2205#ifdef USE_SUBEXP_CALL
2206 case NT_CALL:
2207 if (IS_CALL_RECURSION(NCALL(node))) {
2208 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2209 if (IS_ENCLOSE_MIN_FIXED(en))
2210 *min = en->min_len;
2211 }
2212 else
2213 r = get_min_match_length(NCALL(node)->target, min, env);
2214 break;
2215#endif
2216
2217 case NT_LIST:
2218 do {
2219 r = get_min_match_length(NCAR(node), &tmin, env);
2220 if (r == 0) *min += tmin;
2221 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2222 break;
2223
2224 case NT_ALT:
2225 {
2226 Node *x, *y;
2227 y = node;
2228 do {
2229 x = NCAR(y);
2230 r = get_min_match_length(x, &tmin, env);
2231 if (r != 0) break;
2232 if (y == node) *min = tmin;
2233 else if (*min > tmin) *min = tmin;
2234 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2235 }
2236 break;
2237
2238 case NT_STR:
2239 {
2240 StrNode* sn = NSTR(node);
2241 *min = sn->end - sn->s;
2242 }
2243 break;
2244
2245 case NT_CTYPE:
2246 *min = 1;
2247 break;
2248
2249 case NT_CCLASS:
2250 case NT_CANY:
2251 *min = 1;
2252 break;
2253
2254 case NT_QTFR:
2255 {
2256 QtfrNode* qn = NQTFR(node);
2257
2258 if (qn->lower > 0) {
2259 r = get_min_match_length(qn->target, min, env);
2260 if (r == 0)
2261 *min = distance_multiply(*min, qn->lower);
2262 }
2263 }
2264 break;
2265
2266 case NT_ENCLOSE:
2267 {
2268 EncloseNode* en = NENCLOSE(node);
2269 switch (en->type) {
2270 case ENCLOSE_MEMORY:
2271 if (IS_ENCLOSE_MIN_FIXED(en))
2272 *min = en->min_len;
2273 else {
2274 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2275 *min = 0; /* recursive */
2276 else {
2278 r = get_min_match_length(en->target, min, env);
2280 if (r == 0) {
2281 en->min_len = *min;
2283 }
2284 }
2285 }
2286 break;
2287
2288 case ENCLOSE_OPTION:
2290 case ENCLOSE_CONDITION:
2291 r = get_min_match_length(en->target, min, env);
2292 break;
2293
2294 case ENCLOSE_ABSENT:
2295 break;
2296 }
2297 }
2298 break;
2299
2300 case NT_ANCHOR:
2301 default:
2302 break;
2303 }
2304
2305 return r;
2306}
2307
2308static int
2309get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2310{
2311 OnigDistance tmax;
2312 int r = 0;
2313
2314 *max = 0;
2315 switch (NTYPE(node)) {
2316 case NT_LIST:
2317 do {
2318 r = get_max_match_length(NCAR(node), &tmax, env);
2319 if (r == 0)
2320 *max = distance_add(*max, tmax);
2321 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2322 break;
2323
2324 case NT_ALT:
2325 do {
2326 r = get_max_match_length(NCAR(node), &tmax, env);
2327 if (r == 0 && *max < tmax) *max = tmax;
2328 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2329 break;
2330
2331 case NT_STR:
2332 {
2333 StrNode* sn = NSTR(node);
2334 *max = sn->end - sn->s;
2335 }
2336 break;
2337
2338 case NT_CTYPE:
2340 break;
2341
2342 case NT_CCLASS:
2343 case NT_CANY:
2345 break;
2346
2347 case NT_BREF:
2348 {
2349 int i;
2350 int* backs;
2351 Node** nodes = SCANENV_MEM_NODES(env);
2352 BRefNode* br = NBREF(node);
2353 if (br->state & NST_RECURSION) {
2355 break;
2356 }
2357 backs = BACKREFS_P(br);
2358 for (i = 0; i < br->back_num; i++) {
2359 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2360 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2361 if (r != 0) break;
2362 if (*max < tmax) *max = tmax;
2363 }
2364 }
2365 break;
2366
2367#ifdef USE_SUBEXP_CALL
2368 case NT_CALL:
2369 if (! IS_CALL_RECURSION(NCALL(node)))
2370 r = get_max_match_length(NCALL(node)->target, max, env);
2371 else
2373 break;
2374#endif
2375
2376 case NT_QTFR:
2377 {
2378 QtfrNode* qn = NQTFR(node);
2379
2380 if (qn->upper != 0) {
2381 r = get_max_match_length(qn->target, max, env);
2382 if (r == 0 && *max != 0) {
2383 if (! IS_REPEAT_INFINITE(qn->upper))
2384 *max = distance_multiply(*max, qn->upper);
2385 else
2387 }
2388 }
2389 }
2390 break;
2391
2392 case NT_ENCLOSE:
2393 {
2394 EncloseNode* en = NENCLOSE(node);
2395 switch (en->type) {
2396 case ENCLOSE_MEMORY:
2397 if (IS_ENCLOSE_MAX_FIXED(en))
2398 *max = en->max_len;
2399 else {
2400 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2402 else {
2404 r = get_max_match_length(en->target, max, env);
2406 if (r == 0) {
2407 en->max_len = *max;
2409 }
2410 }
2411 }
2412 break;
2413
2414 case ENCLOSE_OPTION:
2416 case ENCLOSE_CONDITION:
2417 r = get_max_match_length(en->target, max, env);
2418 break;
2419
2420 case ENCLOSE_ABSENT:
2421 break;
2422 }
2423 }
2424 break;
2425
2426 case NT_ANCHOR:
2427 default:
2428 break;
2429 }
2430
2431 return r;
2432}
2433
2434#define GET_CHAR_LEN_VARLEN -1
2435#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2436
2437/* fixed size pattern node only */
2438static int
2439get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2440{
2441 int tlen;
2442 int r = 0;
2443
2444 level++;
2445 *len = 0;
2446 switch (NTYPE(node)) {
2447 case NT_LIST:
2448 do {
2449 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2450 if (r == 0)
2451 *len = (int )distance_add(*len, tlen);
2452 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2453 break;
2454
2455 case NT_ALT:
2456 {
2457 int tlen2;
2458 int varlen = 0;
2459
2460 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2461 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2463 if (r == 0) {
2464 if (tlen != tlen2)
2465 varlen = 1;
2466 }
2467 }
2468 if (r == 0) {
2469 if (varlen != 0) {
2470 if (level == 1)
2472 else
2474 }
2475 else
2476 *len = tlen;
2477 }
2478 }
2479 break;
2480
2481 case NT_STR:
2482 {
2483 StrNode* sn = NSTR(node);
2484 UChar *s = sn->s;
2485 while (s < sn->end) {
2486 s += enclen(reg->enc, s, sn->end);
2487 (*len)++;
2488 }
2489 }
2490 break;
2491
2492 case NT_QTFR:
2493 {
2494 QtfrNode* qn = NQTFR(node);
2495 if (qn->lower == qn->upper) {
2496 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2497 if (r == 0)
2498 *len = (int )distance_multiply(tlen, qn->lower);
2499 }
2500 else
2502 }
2503 break;
2504
2505#ifdef USE_SUBEXP_CALL
2506 case NT_CALL:
2507 if (! IS_CALL_RECURSION(NCALL(node)))
2508 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2509 else
2511 break;
2512#endif
2513
2514 case NT_CTYPE:
2515 *len = 1;
2516 break;
2517
2518 case NT_CCLASS:
2519 case NT_CANY:
2520 *len = 1;
2521 break;
2522
2523 case NT_ENCLOSE:
2524 {
2525 EncloseNode* en = NENCLOSE(node);
2526 switch (en->type) {
2527 case ENCLOSE_MEMORY:
2528#ifdef USE_SUBEXP_CALL
2529 if (IS_ENCLOSE_CLEN_FIXED(en))
2530 *len = en->char_len;
2531 else {
2532 r = get_char_length_tree1(en->target, reg, len, level);
2533 if (r == 0) {
2534 en->char_len = *len;
2536 }
2537 }
2538 break;
2539#endif
2540 case ENCLOSE_OPTION:
2542 case ENCLOSE_CONDITION:
2543 r = get_char_length_tree1(en->target, reg, len, level);
2544 break;
2545 case ENCLOSE_ABSENT:
2546 default:
2547 break;
2548 }
2549 }
2550 break;
2551
2552 case NT_ANCHOR:
2553 break;
2554
2555 default:
2557 break;
2558 }
2559
2560 return r;
2561}
2562
2563static int
2564get_char_length_tree(Node* node, regex_t* reg, int* len)
2565{
2566 return get_char_length_tree1(node, reg, len, 0);
2567}
2568
2569/* x is not included y ==> 1 : 0 */
2570static int
2571is_not_included(Node* x, Node* y, regex_t* reg)
2572{
2573 int i;
2576 UChar *p;
2577 int ytype;
2578
2579 retry:
2580 ytype = NTYPE(y);
2581 switch (NTYPE(x)) {
2582 case NT_CTYPE:
2583 {
2584 switch (ytype) {
2585 case NT_CTYPE:
2586 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2587 NCTYPE(y)->not != NCTYPE(x)->not &&
2588 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2589 return 1;
2590 else
2591 return 0;
2592 break;
2593
2594 case NT_CCLASS:
2595 swap:
2596 {
2597 Node* tmp;
2598 tmp = x; x = y; y = tmp;
2599 goto retry;
2600 }
2601 break;
2602
2603 case NT_STR:
2604 goto swap;
2605 break;
2606
2607 default:
2608 break;
2609 }
2610 }
2611 break;
2612
2613 case NT_CCLASS:
2614 {
2615 CClassNode* xc = NCCLASS(x);
2616 switch (ytype) {
2617 case NT_CTYPE:
2618 switch (NCTYPE(y)->ctype) {
2619 case ONIGENC_CTYPE_WORD:
2620 if (NCTYPE(y)->not == 0) {
2621 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2622 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2623 if (BITSET_AT(xc->bs, i)) {
2624 if (NCTYPE(y)->ascii_range) {
2625 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2626 }
2627 else {
2628 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2629 }
2630 }
2631 }
2632 return 1;
2633 }
2634 return 0;
2635 }
2636 else {
2637 if (IS_NOT_NULL(xc->mbuf)) return 0;
2638 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2639 int is_word;
2640 if (NCTYPE(y)->ascii_range)
2641 is_word = IS_CODE_SB_WORD(reg->enc, i);
2642 else
2643 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2644 if (! is_word) {
2645 if (!IS_NCCLASS_NOT(xc)) {
2646 if (BITSET_AT(xc->bs, i))
2647 return 0;
2648 }
2649 else {
2650 if (! BITSET_AT(xc->bs, i))
2651 return 0;
2652 }
2653 }
2654 }
2655 return 1;
2656 }
2657 break;
2658
2659 default:
2660 break;
2661 }
2662 break;
2663
2664 case NT_CCLASS:
2665 {
2666 int v;
2667 CClassNode* yc = NCCLASS(y);
2668
2669 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2670 v = BITSET_AT(xc->bs, i);
2671 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2672 (v == 0 && IS_NCCLASS_NOT(xc))) {
2673 v = BITSET_AT(yc->bs, i);
2674 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2675 (v == 0 && IS_NCCLASS_NOT(yc)))
2676 return 0;
2677 }
2678 }
2679 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2680 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2681 return 1;
2682 return 0;
2683 }
2684 break;
2685
2686 case NT_STR:
2687 goto swap;
2688 break;
2689
2690 default:
2691 break;
2692 }
2693 }
2694 break;
2695
2696 case NT_STR:
2697 {
2698 StrNode* xs = NSTR(x);
2699 if (NSTRING_LEN(x) == 0)
2700 break;
2701
2702 switch (ytype) {
2703 case NT_CTYPE:
2704 switch (NCTYPE(y)->ctype) {
2705 case ONIGENC_CTYPE_WORD:
2706 if (NCTYPE(y)->ascii_range) {
2707 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2708 return NCTYPE(y)->not;
2709 else
2710 return !(NCTYPE(y)->not);
2711 }
2712 else {
2713 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2714 return NCTYPE(y)->not;
2715 else
2716 return !(NCTYPE(y)->not);
2717 }
2718 break;
2719 default:
2720 break;
2721 }
2722 break;
2723
2724 case NT_CCLASS:
2725 {
2726 CClassNode* cc = NCCLASS(y);
2727
2728 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2729 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2730 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2731 }
2732 break;
2733
2734 case NT_STR:
2735 {
2736 UChar *q;
2737 StrNode* ys = NSTR(y);
2738 len = NSTRING_LEN(x);
2739 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2740 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2741 /* tiny version */
2742 return 0;
2743 }
2744 else {
2745 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2746 if (*p != *q) return 1;
2747 }
2748 }
2749 }
2750 break;
2751
2752 default:
2753 break;
2754 }
2755 }
2756 break;
2757
2758 default:
2759 break;
2760 }
2761
2762 return 0;
2763}
2764
2765static Node*
2766get_head_value_node(Node* node, int exact, regex_t* reg)
2767{
2768 Node* n = NULL_NODE;
2769
2770 switch (NTYPE(node)) {
2771 case NT_BREF:
2772 case NT_ALT:
2773 case NT_CANY:
2774#ifdef USE_SUBEXP_CALL
2775 case NT_CALL:
2776#endif
2777 break;
2778
2779 case NT_CTYPE:
2780 case NT_CCLASS:
2781 if (exact == 0) {
2782 n = node;
2783 }
2784 break;
2785
2786 case NT_LIST:
2787 n = get_head_value_node(NCAR(node), exact, reg);
2788 break;
2789
2790 case NT_STR:
2791 {
2792 StrNode* sn = NSTR(node);
2793
2794 if (sn->end <= sn->s)
2795 break;
2796
2797 if (exact != 0 &&
2798 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2799 }
2800 else {
2801 n = node;
2802 }
2803 }
2804 break;
2805
2806 case NT_QTFR:
2807 {
2808 QtfrNode* qn = NQTFR(node);
2809 if (qn->lower > 0) {
2810#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2811 if (IS_NOT_NULL(qn->head_exact))
2812 n = qn->head_exact;
2813 else
2814#endif
2815 n = get_head_value_node(qn->target, exact, reg);
2816 }
2817 }
2818 break;
2819
2820 case NT_ENCLOSE:
2821 {
2822 EncloseNode* en = NENCLOSE(node);
2823 switch (en->type) {
2824 case ENCLOSE_OPTION:
2825 {
2826 OnigOptionType options = reg->options;
2827
2828 reg->options = NENCLOSE(node)->option;
2829 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2830 reg->options = options;
2831 }
2832 break;
2833
2834 case ENCLOSE_MEMORY:
2836 case ENCLOSE_CONDITION:
2837 n = get_head_value_node(en->target, exact, reg);
2838 break;
2839
2840 case ENCLOSE_ABSENT:
2841 break;
2842 }
2843 }
2844 break;
2845
2846 case NT_ANCHOR:
2847 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2848 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2849 break;
2850
2851 default:
2852 break;
2853 }
2854
2855 return n;
2856}
2857
2858static int
2859check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2860{
2861 int type, r = 0;
2862
2863 type = NTYPE(node);
2864 if ((NTYPE2BIT(type) & type_mask) == 0)
2865 return 1;
2866
2867 switch (type) {
2868 case NT_LIST:
2869 case NT_ALT:
2870 do {
2871 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2872 anchor_mask);
2873 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2874 break;
2875
2876 case NT_QTFR:
2877 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2878 anchor_mask);
2879 break;
2880
2881 case NT_ENCLOSE:
2882 {
2883 EncloseNode* en = NENCLOSE(node);
2884 if ((en->type & enclose_mask) == 0)
2885 return 1;
2886
2887 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2888 }
2889 break;
2890
2891 case NT_ANCHOR:
2892 type = NANCHOR(node)->type;
2893 if ((type & anchor_mask) == 0)
2894 return 1;
2895
2896 if (NANCHOR(node)->target)
2897 r = check_type_tree(NANCHOR(node)->target,
2898 type_mask, enclose_mask, anchor_mask);
2899 break;
2900
2901 default:
2902 break;
2903 }
2904 return r;
2905}
2906
2907#ifdef USE_SUBEXP_CALL
2908
2909# define RECURSION_EXIST 1
2910# define RECURSION_INFINITE 2
2911
2912static int
2913subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2914{
2915 int type;
2916 int r = 0;
2917
2918 type = NTYPE(node);
2919 switch (type) {
2920 case NT_LIST:
2921 {
2922 Node *x;
2923 OnigDistance min;
2924 int ret;
2925
2926 x = node;
2927 do {
2928 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2929 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2930 r |= ret;
2931 if (head) {
2932 ret = get_min_match_length(NCAR(x), &min, env);
2933 if (ret != 0) return ret;
2934 if (min != 0) head = 0;
2935 }
2936 } while (IS_NOT_NULL(x = NCDR(x)));
2937 }
2938 break;
2939
2940 case NT_ALT:
2941 {
2942 int ret;
2943 r = RECURSION_EXIST;
2944 do {
2945 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2946 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2947 r &= ret;
2948 } while (IS_NOT_NULL(node = NCDR(node)));
2949 }
2950 break;
2951
2952 case NT_QTFR:
2953 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2954 if (r == RECURSION_EXIST) {
2955 if (NQTFR(node)->lower == 0) r = 0;
2956 }
2957 break;
2958
2959 case NT_ANCHOR:
2960 {
2961 AnchorNode* an = NANCHOR(node);
2962 switch (an->type) {
2963 case ANCHOR_PREC_READ:
2965 case ANCHOR_LOOK_BEHIND:
2967 r = subexp_inf_recursive_check(an->target, env, head);
2968 break;
2969 }
2970 }
2971 break;
2972
2973 case NT_CALL:
2974 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2975 break;
2976
2977 case NT_ENCLOSE:
2978 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2979 return 0;
2980 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2981 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2982 else {
2984 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2986 }
2987 break;
2988
2989 default:
2990 break;
2991 }
2992
2993 return r;
2994}
2995
2996static int
2997subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2998{
2999 int type;
3000 int r = 0;
3001
3002 type = NTYPE(node);
3003 switch (type) {
3004 case NT_LIST:
3005 case NT_ALT:
3006 do {
3007 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3008 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3009 break;
3010
3011 case NT_QTFR:
3012 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3013 break;
3014
3015 case NT_ANCHOR:
3016 {
3017 AnchorNode* an = NANCHOR(node);
3018 switch (an->type) {
3019 case ANCHOR_PREC_READ:
3021 case ANCHOR_LOOK_BEHIND:
3023 r = subexp_inf_recursive_check_trav(an->target, env);
3024 break;
3025 }
3026 }
3027 break;
3028
3029 case NT_ENCLOSE:
3030 {
3031 EncloseNode* en = NENCLOSE(node);
3032
3033 if (IS_ENCLOSE_RECURSION(en)) {
3035 r = subexp_inf_recursive_check(en->target, env, 1);
3036 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3038 }
3039 r = subexp_inf_recursive_check_trav(en->target, env);
3040 }
3041
3042 break;
3043
3044 default:
3045 break;
3046 }
3047
3048 return r;
3049}
3050
3051static int
3052subexp_recursive_check(Node* node)
3053{
3054 int r = 0;
3055
3056 switch (NTYPE(node)) {
3057 case NT_LIST:
3058 case NT_ALT:
3059 do {
3060 r |= subexp_recursive_check(NCAR(node));
3061 } while (IS_NOT_NULL(node = NCDR(node)));
3062 break;
3063
3064 case NT_QTFR:
3065 r = subexp_recursive_check(NQTFR(node)->target);
3066 break;
3067
3068 case NT_ANCHOR:
3069 {
3070 AnchorNode* an = NANCHOR(node);
3071 switch (an->type) {
3072 case ANCHOR_PREC_READ:
3074 case ANCHOR_LOOK_BEHIND:
3076 r = subexp_recursive_check(an->target);
3077 break;
3078 }
3079 }
3080 break;
3081
3082 case NT_CALL:
3083 r = subexp_recursive_check(NCALL(node)->target);
3084 if (r != 0) SET_CALL_RECURSION(node);
3085 break;
3086
3087 case NT_ENCLOSE:
3088 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3089 return 0;
3090 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3091 return 1; /* recursion */
3092 else {
3094 r = subexp_recursive_check(NENCLOSE(node)->target);
3096 }
3097 break;
3098
3099 default:
3100 break;
3101 }
3102
3103 return r;
3104}
3105
3106
3107static int
3108subexp_recursive_check_trav(Node* node, ScanEnv* env)
3109{
3110# define FOUND_CALLED_NODE 1
3111
3112 int type;
3113 int r = 0;
3114
3115 type = NTYPE(node);
3116 switch (type) {
3117 case NT_LIST:
3118 case NT_ALT:
3119 {
3120 int ret;
3121 do {
3122 ret = subexp_recursive_check_trav(NCAR(node), env);
3123 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3124 else if (ret < 0) return ret;
3125 } while (IS_NOT_NULL(node = NCDR(node)));
3126 }
3127 break;
3128
3129 case NT_QTFR:
3130 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3131 if (NQTFR(node)->upper == 0) {
3132 if (r == FOUND_CALLED_NODE)
3133 NQTFR(node)->is_referred = 1;
3134 }
3135 break;
3136
3137 case NT_ANCHOR:
3138 {
3139 AnchorNode* an = NANCHOR(node);
3140 switch (an->type) {
3141 case ANCHOR_PREC_READ:
3143 case ANCHOR_LOOK_BEHIND:
3145 r = subexp_recursive_check_trav(an->target, env);
3146 break;
3147 }
3148 }
3149 break;
3150
3151 case NT_ENCLOSE:
3152 {
3153 EncloseNode* en = NENCLOSE(node);
3154
3155 if (! IS_ENCLOSE_RECURSION(en)) {
3156 if (IS_ENCLOSE_CALLED(en)) {
3158 r = subexp_recursive_check(en->target);
3159 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3161 }
3162 }
3163 r = subexp_recursive_check_trav(en->target, env);
3164 if (IS_ENCLOSE_CALLED(en))
3165 r |= FOUND_CALLED_NODE;
3166 }
3167 break;
3168
3169 default:
3170 break;
3171 }
3172
3173 return r;
3174}
3175
3176static int
3177setup_subexp_call(Node* node, ScanEnv* env)
3178{
3179 int type;
3180 int r = 0;
3181
3182 type = NTYPE(node);
3183 switch (type) {
3184 case NT_LIST:
3185 do {
3186 r = setup_subexp_call(NCAR(node), env);
3187 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3188 break;
3189
3190 case NT_ALT:
3191 do {
3192 r = setup_subexp_call(NCAR(node), env);
3193 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3194 break;
3195
3196 case NT_QTFR:
3197 r = setup_subexp_call(NQTFR(node)->target, env);
3198 break;
3199 case NT_ENCLOSE:
3200 r = setup_subexp_call(NENCLOSE(node)->target, env);
3201 break;
3202
3203 case NT_CALL:
3204 {
3205 CallNode* cn = NCALL(node);
3206 Node** nodes = SCANENV_MEM_NODES(env);
3207
3208 if (cn->group_num != 0) {
3209 int gnum = cn->group_num;
3210
3211# ifdef USE_NAMED_GROUP
3212 if (env->num_named > 0 &&
3216 }
3217# endif
3218 if (gnum > env->num_mem) {
3222 }
3223
3224# ifdef USE_NAMED_GROUP
3225 set_call_attr:
3226# endif
3227 cn->target = nodes[cn->group_num];
3228 if (IS_NULL(cn->target)) {
3232 }
3234 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3235 cn->unset_addr_list = env->unset_addr_list;
3236 }
3237# ifdef USE_NAMED_GROUP
3238# ifdef USE_PERL_SUBEXP_CALL
3239 else if (cn->name == cn->name_end) {
3240 goto set_call_attr;
3241 }
3242# endif
3243 else {
3244 int *refs;
3245
3246 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3247 &refs);
3248 if (n <= 0) {
3252 }
3253 else if (n > 1 &&
3258 }
3259 else {
3260 cn->group_num = refs[0];
3261 goto set_call_attr;
3262 }
3263 }
3264# endif
3265 }
3266 break;
3267
3268 case NT_ANCHOR:
3269 {
3270 AnchorNode* an = NANCHOR(node);
3271
3272 switch (an->type) {
3273 case ANCHOR_PREC_READ:
3275 case ANCHOR_LOOK_BEHIND:
3277 r = setup_subexp_call(an->target, env);
3278 break;
3279 }
3280 }
3281 break;
3282
3283 default:
3284 break;
3285 }
3286
3287 return r;
3288}
3289#endif
3290
3291/* divide different length alternatives in look-behind.
3292 (?<=A|B) ==> (?<=A)|(?<=B)
3293 (?<!A|B) ==> (?<!A)(?<!B)
3294*/
3295static int
3296divide_look_behind_alternatives(Node* node)
3297{
3298 Node *head, *np, *insert_node;
3299 AnchorNode* an = NANCHOR(node);
3300 int anc_type = an->type;
3301
3302 head = an->target;
3303 np = NCAR(head);
3304 swap_node(node, head);
3305 NCAR(node) = head;
3306 NANCHOR(head)->target = np;
3307
3308 np = node;
3309 while ((np = NCDR(np)) != NULL_NODE) {
3310 insert_node = onig_node_new_anchor(anc_type);
3311 CHECK_NULL_RETURN_MEMERR(insert_node);
3312 NANCHOR(insert_node)->target = NCAR(np);
3313 NCAR(np) = insert_node;
3314 }
3315
3316 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3317 np = node;
3318 do {
3319 SET_NTYPE(np, NT_LIST); /* alt -> list */
3320 } while ((np = NCDR(np)) != NULL_NODE);
3321 }
3322 return 0;
3323}
3324
3325static int
3326setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3327{
3328 int r, len;
3329 AnchorNode* an = NANCHOR(node);
3330
3331 r = get_char_length_tree(an->target, reg, &len);
3332 if (r == 0)
3333 an->char_len = len;
3334 else if (r == GET_CHAR_LEN_VARLEN)
3336 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3338 r = divide_look_behind_alternatives(node);
3339 else
3341 }
3342
3343 return r;
3344}
3345
3346static int
3347next_setup(Node* node, Node* next_node, regex_t* reg)
3348{
3349 int type;
3350
3351 retry:
3352 type = NTYPE(node);
3353 if (type == NT_QTFR) {
3354 QtfrNode* qn = NQTFR(node);
3355 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3356#ifdef USE_QTFR_PEEK_NEXT
3357 Node* n = get_head_value_node(next_node, 1, reg);
3358 /* '\0': for UTF-16BE etc... */
3359 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3360 qn->next_head_exact = n;
3361 }
3362#endif
3363 /* automatic possessification a*b ==> (?>a*)b */
3364 if (qn->lower <= 1) {
3365 int ttype = NTYPE(qn->target);
3366 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3367 Node *x, *y;
3368 x = get_head_value_node(qn->target, 0, reg);
3369 if (IS_NOT_NULL(x)) {
3370 y = get_head_value_node(next_node, 0, reg);
3371 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3375 swap_node(node, en);
3376 NENCLOSE(node)->target = en;
3377 }
3378 }
3379 }
3380 }
3381 }
3382 }
3383 else if (type == NT_ENCLOSE) {
3384 EncloseNode* en = NENCLOSE(node);
3385 if (en->type == ENCLOSE_MEMORY) {
3386 node = en->target;
3387 goto retry;
3388 }
3389 }
3390 return 0;
3391}
3392
3393
3394static int
3395update_string_node_case_fold(regex_t* reg, Node *node)
3396{
3398 UChar *sbuf, *ebuf, *sp;
3399 int r, i, len;
3400 OnigDistance sbuf_size;
3401 StrNode* sn = NSTR(node);
3402
3403 end = sn->end;
3404 sbuf_size = (end - sn->s) * 2;
3405 sbuf = (UChar* )xmalloc(sbuf_size);
3407 ebuf = sbuf + sbuf_size;
3408
3409 sp = sbuf;
3410 p = sn->s;
3411 while (p < end) {
3412 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3413 for (i = 0; i < len; i++) {
3414 if (sp >= ebuf) {
3415 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3416 if (IS_NULL(p)) {
3417 xfree(sbuf);
3418 return ONIGERR_MEMORY;
3419 }
3420 sbuf = p;
3421 sp = sbuf + sbuf_size;
3422 sbuf_size *= 2;
3423 ebuf = sbuf + sbuf_size;
3424 }
3425
3426 *sp++ = buf[i];
3427 }
3428 }
3429
3430 r = onig_node_str_set(node, sbuf, sp);
3431
3432 xfree(sbuf);
3433 return r;
3434}
3435
3436static int
3437expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3438 regex_t* reg)
3439{
3440 int r;
3441 Node *node;
3442
3443 node = onig_node_new_str(s, end);
3444 if (IS_NULL(node)) return ONIGERR_MEMORY;
3445
3446 r = update_string_node_case_fold(reg, node);
3447 if (r != 0) {
3448 onig_node_free(node);
3449 return r;
3450 }
3451
3452 NSTRING_SET_AMBIG(node);
3454 *rnode = node;
3455 return 0;
3456}
3457
3458static int
3459is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3460 int slen)
3461{
3462 int i;
3463
3464 for (i = 0; i < item_num; i++) {
3465 if (items[i].byte_len != slen) {
3466 return 1;
3467 }
3468 if (items[i].code_len != 1) {
3469 return 1;
3470 }
3471 }
3472 return 0;
3473}
3474
3475static int
3476expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3477 UChar *p, int slen, UChar *end,
3478 regex_t* reg, Node **rnode)
3479{
3480 int r, i, j, len, varlen;
3481 Node *anode, *var_anode, *snode, *xnode, *an;
3483
3484 *rnode = var_anode = NULL_NODE;
3485
3486 varlen = 0;
3487 for (i = 0; i < item_num; i++) {
3488 if (items[i].byte_len != slen) {
3489 varlen = 1;
3490 break;
3491 }
3492 }
3493
3494 if (varlen != 0) {
3495 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3496 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3497
3498 xnode = onig_node_new_list(NULL, NULL);
3499 if (IS_NULL(xnode)) goto mem_err;
3500 NCAR(var_anode) = xnode;
3501
3503 if (IS_NULL(anode)) goto mem_err;
3504 NCAR(xnode) = anode;
3505 }
3506 else {
3507 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3508 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3509 }
3510
3511 snode = onig_node_new_str(p, p + slen);
3512 if (IS_NULL(snode)) goto mem_err;
3513
3514 NCAR(anode) = snode;
3515
3516 for (i = 0; i < item_num; i++) {
3517 snode = onig_node_new_str(NULL, NULL);
3518 if (IS_NULL(snode)) goto mem_err;
3519
3520 for (j = 0; j < items[i].code_len; j++) {
3521 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3522 if (len < 0) {
3523 r = len;
3524 goto mem_err2;
3525 }
3526
3527 r = onig_node_str_cat(snode, buf, buf + len);
3528 if (r != 0) goto mem_err2;
3529 }
3530
3532 if (IS_NULL(an)) {
3533 goto mem_err2;
3534 }
3535
3536 if (items[i].byte_len != slen) {
3537 Node *rem;
3538 UChar *q = p + items[i].byte_len;
3539
3540 if (q < end) {
3541 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3542 if (r != 0) {
3543 onig_node_free(an);
3544 goto mem_err2;
3545 }
3546
3547 xnode = onig_node_list_add(NULL_NODE, snode);
3548 if (IS_NULL(xnode)) {
3549 onig_node_free(an);
3550 onig_node_free(rem);
3551 goto mem_err2;
3552 }
3553 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3554 onig_node_free(an);
3555 onig_node_free(xnode);
3556 onig_node_free(rem);
3557 goto mem_err;
3558 }
3559
3560 NCAR(an) = xnode;
3561 }
3562 else {
3563 NCAR(an) = snode;
3564 }
3565
3566 NCDR(var_anode) = an;
3567 var_anode = an;
3568 }
3569 else {
3570 NCAR(an) = snode;
3571 NCDR(anode) = an;
3572 anode = an;
3573 }
3574 }
3575
3576 return varlen;
3577
3578 mem_err2:
3579 onig_node_free(snode);
3580
3581 mem_err:
3582 onig_node_free(*rnode);
3583
3584 return ONIGERR_MEMORY;
3585}
3586
3587static int
3588expand_case_fold_string(Node* node, regex_t* reg)
3589{
3590#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3591
3592 int r, n, len, alt_num;
3593 int varlen = 0;
3594 UChar *start, *end, *p;
3595 Node *top_root, *root, *snode, *prev_node;
3597 StrNode* sn = NSTR(node);
3598
3599 if (NSTRING_IS_AMBIG(node)) return 0;
3600
3601 start = sn->s;
3602 end = sn->end;
3603 if (start >= end) return 0;
3604
3605 r = 0;
3606 top_root = root = prev_node = snode = NULL_NODE;
3607 alt_num = 1;
3608 p = start;
3609 while (p < end) {
3611 p, end, items);
3612 if (n < 0) {
3613 r = n;
3614 goto err;
3615 }
3616
3617 len = enclen(reg->enc, p, end);
3618
3619 varlen = is_case_fold_variable_len(n, items, len);
3620 if (n == 0 || varlen == 0) {
3621 if (IS_NULL(snode)) {
3622 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3623 onig_node_free(top_root);
3624 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3625 if (IS_NULL(root)) {
3626 onig_node_free(prev_node);
3627 goto mem_err;
3628 }
3629 }
3630
3631 prev_node = snode = onig_node_new_str(NULL, NULL);
3632 if (IS_NULL(snode)) goto mem_err;
3633 if (IS_NOT_NULL(root)) {
3634 if (IS_NULL(onig_node_list_add(root, snode))) {
3635 onig_node_free(snode);
3636 goto mem_err;
3637 }
3638 }
3639 }
3640
3641 r = onig_node_str_cat(snode, p, p + len);
3642 if (r != 0) goto err;
3643 }
3644 else {
3645 alt_num *= (n + 1);
3646 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3647
3648 if (IS_NOT_NULL(snode)) {
3649 r = update_string_node_case_fold(reg, snode);
3650 if (r == 0) {
3651 NSTRING_SET_AMBIG(snode);
3652 }
3653 }
3654 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3655 onig_node_free(top_root);
3656 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3657 if (IS_NULL(root)) {
3658 onig_node_free(prev_node);
3659 goto mem_err;
3660 }
3661 }
3662
3663 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3664 if (r < 0) goto mem_err;
3665 if (r == 1) {
3666 if (IS_NULL(root)) {
3667 top_root = prev_node;
3668 }
3669 else {
3670 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671 onig_node_free(prev_node);
3672 goto mem_err;
3673 }
3674 }
3675
3676 root = NCAR(prev_node);
3677 }
3678 else { /* r == 0 */
3679 if (IS_NOT_NULL(root)) {
3680 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3681 onig_node_free(prev_node);
3682 goto mem_err;
3683 }
3684 }
3685 }
3686
3687 snode = NULL_NODE;
3688 }
3689
3690 p += len;
3691 }
3692 if (IS_NOT_NULL(snode)) {
3693 r = update_string_node_case_fold(reg, snode);
3694 if (r == 0) {
3695 NSTRING_SET_AMBIG(snode);
3696 }
3697 }
3698
3699 if (p < end) {
3700 Node *srem;
3701
3702 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3703 if (r != 0) goto mem_err;
3704
3705 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3706 onig_node_free(top_root);
3707 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3708 if (IS_NULL(root)) {
3709 onig_node_free(srem);
3710 onig_node_free(prev_node);
3711 goto mem_err;
3712 }
3713 }
3714
3715 if (IS_NULL(root)) {
3716 prev_node = srem;
3717 }
3718 else {
3719 if (IS_NULL(onig_node_list_add(root, srem))) {
3720 onig_node_free(srem);
3721 goto mem_err;
3722 }
3723 }
3724 }
3725
3726 /* ending */
3727 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3728 swap_node(node, top_root);
3729 onig_node_free(top_root);
3730 return 0;
3731
3732 mem_err:
3733 r = ONIGERR_MEMORY;
3734
3735 err:
3736 onig_node_free(top_root);
3737 return r;
3738}
3739
3740
3741#ifdef USE_COMBINATION_EXPLOSION_CHECK
3742
3743# define CEC_THRES_NUM_BIG_REPEAT 512
3744# define CEC_INFINITE_NUM 0x7fffffff
3745
3746# define CEC_IN_INFINITE_REPEAT (1<<0)
3747# define CEC_IN_FINITE_REPEAT (1<<1)
3748# define CEC_CONT_BIG_REPEAT (1<<2)
3749
3750static int
3751setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3752{
3753 int type;
3754 int r = state;
3755
3756 type = NTYPE(node);
3757 switch (type) {
3758 case NT_LIST:
3759 {
3760 Node* prev = NULL_NODE;
3761 do {
3762 r = setup_comb_exp_check(NCAR(node), r, env);
3763 prev = NCAR(node);
3764 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3765 }
3766 break;
3767
3768 case NT_ALT:
3769 {
3770 int ret;
3771 do {
3772 ret = setup_comb_exp_check(NCAR(node), state, env);
3773 r |= ret;
3774 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3775 }
3776 break;
3777
3778 case NT_QTFR:
3779 {
3780 int child_state = state;
3781 int add_state = 0;
3782 QtfrNode* qn = NQTFR(node);
3783 Node* target = qn->target;
3784 int var_num;
3785
3786 if (! IS_REPEAT_INFINITE(qn->upper)) {
3787 if (qn->upper > 1) {
3788 /* {0,1}, {1,1} are allowed */
3789 child_state |= CEC_IN_FINITE_REPEAT;
3790
3791 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3792 if (env->backrefed_mem == 0) {
3793 if (NTYPE(qn->target) == NT_ENCLOSE) {
3794 EncloseNode* en = NENCLOSE(qn->target);
3795 if (en->type == ENCLOSE_MEMORY) {
3796 if (NTYPE(en->target) == NT_QTFR) {
3797 QtfrNode* q = NQTFR(en->target);
3799 && q->greedy == qn->greedy) {
3800 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3801 if (qn->upper == 1)
3802 child_state = state;
3803 }
3804 }
3805 }
3806 }
3807 }
3808 }
3809 }
3810
3811 if (state & CEC_IN_FINITE_REPEAT) {
3812 qn->comb_exp_check_num = -1;
3813 }
3814 else {
3815 if (IS_REPEAT_INFINITE(qn->upper)) {
3816 var_num = CEC_INFINITE_NUM;
3817 child_state |= CEC_IN_INFINITE_REPEAT;
3818 }
3819 else {
3820 var_num = qn->upper - qn->lower;
3821 }
3822
3823 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3824 add_state |= CEC_CONT_BIG_REPEAT;
3825
3826 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3827 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3828 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3829 if (qn->comb_exp_check_num == 0) {
3830 env->num_comb_exp_check++;
3831 qn->comb_exp_check_num = env->num_comb_exp_check;
3832 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3833 env->comb_exp_max_regnum = env->curr_max_regnum;
3834 }
3835 }
3836 }
3837
3838 r = setup_comb_exp_check(target, child_state, env);
3839 r |= add_state;
3840 }
3841 break;
3842
3843 case NT_ENCLOSE:
3844 {
3845 EncloseNode* en = NENCLOSE(node);
3846
3847 switch (en->type) {
3848 case ENCLOSE_MEMORY:
3849 {
3850 if (env->curr_max_regnum < en->regnum)
3851 env->curr_max_regnum = en->regnum;
3852
3853 r = setup_comb_exp_check(en->target, state, env);
3854 }
3855 break;
3856
3857 default:
3858 r = setup_comb_exp_check(en->target, state, env);
3859 break;
3860 }
3861 }
3862 break;
3863
3864# ifdef USE_SUBEXP_CALL
3865 case NT_CALL:
3866 if (IS_CALL_RECURSION(NCALL(node)))
3867 env->has_recursion = 1;
3868 else
3869 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3870 break;
3871# endif
3872
3873 default:
3874 break;
3875 }
3876
3877 return r;
3878}
3879#endif
3880
3881#define IN_ALT (1<<0)
3882#define IN_NOT (1<<1)
3883#define IN_REPEAT (1<<2)
3884#define IN_VAR_REPEAT (1<<3)
3885#define IN_CALL (1<<4)
3886#define IN_RECCALL (1<<5)
3887
3888/* setup_tree does the following work.
3889 1. check empty loop. (set qn->target_empty_info)
3890 2. expand ignore-case in char class.
3891 3. set memory status bit flags. (reg->mem_stats)
3892 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3893 5. find invalid patterns in look-behind.
3894 6. expand repeated string.
3895 */
3896static int
3897setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3898{
3899 int type;
3900 int r = 0;
3901
3902restart:
3903 type = NTYPE(node);
3904 switch (type) {
3905 case NT_LIST:
3906 {
3907 Node* prev = NULL_NODE;
3908 do {
3909 r = setup_tree(NCAR(node), reg, state, env);
3910 if (IS_NOT_NULL(prev) && r == 0) {
3911 r = next_setup(prev, NCAR(node), reg);
3912 }
3913 prev = NCAR(node);
3914 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3915 }
3916 break;
3917
3918 case NT_ALT:
3919 do {
3920 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3921 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3922 break;
3923
3924 case NT_CCLASS:
3925 break;
3926
3927 case NT_STR:
3928 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3929 r = expand_case_fold_string(node, reg);
3930 }
3931 break;
3932
3933 case NT_CTYPE:
3934 case NT_CANY:
3935 break;
3936
3937#ifdef USE_SUBEXP_CALL
3938 case NT_CALL:
3939 break;
3940#endif
3941
3942 case NT_BREF:
3943 {
3944 int i;
3945 int* p;
3946 Node** nodes = SCANENV_MEM_NODES(env);
3947 BRefNode* br = NBREF(node);
3948 p = BACKREFS_P(br);
3949 for (i = 0; i < br->back_num; i++) {
3950 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3951 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3952 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3953#ifdef USE_BACKREF_WITH_LEVEL
3954 if (IS_BACKREF_NEST_LEVEL(br)) {
3955 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3956 }
3957#endif
3959 }
3960 }
3961 break;
3962
3963 case NT_QTFR:
3964 {
3965 OnigDistance d;
3966 QtfrNode* qn = NQTFR(node);
3967 Node* target = qn->target;
3968
3969 if ((state & IN_REPEAT) != 0) {
3970 qn->state |= NST_IN_REPEAT;
3971 }
3972
3973 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3974 r = get_min_match_length(target, &d, env);
3975 if (r) break;
3976 if (d == 0) {
3978#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3979 r = quantifiers_memory_node_info(target);
3980 if (r < 0) break;
3981 if (r > 0) {
3982 qn->target_empty_info = r;
3983 }
3984#endif
3985#if 0
3986 r = get_max_match_length(target, &d, env);
3987 if (r == 0 && d == 0) {
3988 /* ()* ==> ()?, ()+ ==> () */
3989 qn->upper = 1;
3990 if (qn->lower > 1) qn->lower = 1;
3991 if (NTYPE(target) == NT_STR) {
3992 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3993 }
3994 }
3995#endif
3996 }
3997 }
3998
3999 state |= IN_REPEAT;
4000 if (qn->lower != qn->upper)
4002 r = setup_tree(target, reg, state, env);
4003 if (r) break;
4004
4005 /* expand string */
4006#define EXPAND_STRING_MAX_LENGTH 100
4007 if (NTYPE(target) == NT_STR) {
4008 if (qn->lower > 1) {
4009 int i, n = qn->lower;
4010 OnigDistance len = NSTRING_LEN(target);
4011 StrNode* sn = NSTR(target);
4012 Node* np;
4013
4014 np = onig_node_new_str(sn->s, sn->end);
4015 if (IS_NULL(np)) return ONIGERR_MEMORY;
4016 NSTR(np)->flag = sn->flag;
4017
4018 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4019 r = onig_node_str_cat(np, sn->s, sn->end);
4020 if (r) {
4021 onig_node_free(np);
4022 return r;
4023 }
4024 }
4025 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4026 Node *np1, *np2;
4027
4028 qn->lower -= i;
4029 if (! IS_REPEAT_INFINITE(qn->upper))
4030 qn->upper -= i;
4031
4032 np1 = onig_node_new_list(np, NULL);
4033 if (IS_NULL(np1)) {
4034 onig_node_free(np);
4035 return ONIGERR_MEMORY;
4036 }
4037 swap_node(np1, node);
4038 np2 = onig_node_list_add(node, np1);
4039 if (IS_NULL(np2)) {
4040 onig_node_free(np1);
4041 return ONIGERR_MEMORY;
4042 }
4043 }
4044 else {
4045 swap_node(np, node);
4046 onig_node_free(np);
4047 }
4048 break; /* break case NT_QTFR: */
4049 }
4050 }
4051
4052#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4053 if (qn->greedy && (qn->target_empty_info != 0)) {
4054 if (NTYPE(target) == NT_QTFR) {
4055 QtfrNode* tqn = NQTFR(target);
4056 if (IS_NOT_NULL(tqn->head_exact)) {
4057 qn->head_exact = tqn->head_exact;
4058 tqn->head_exact = NULL;
4059 }
4060 }
4061 else {
4062 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4063 }
4064 }
4065#endif
4066 }
4067 break;
4068
4069 case NT_ENCLOSE:
4070 {
4071 EncloseNode* en = NENCLOSE(node);
4072
4073 switch (en->type) {
4074 case ENCLOSE_OPTION:
4075 {
4076 OnigOptionType options = reg->options;
4077 reg->options = NENCLOSE(node)->option;
4078 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4079 reg->options = options;
4080 }
4081 break;
4082
4083 case ENCLOSE_MEMORY:
4084 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4085 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4086 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4087 }
4088 if (IS_ENCLOSE_CALLED(en))
4089 state |= IN_CALL;
4090 if (IS_ENCLOSE_RECURSION(en))
4091 state |= IN_RECCALL;
4092 else if ((state & IN_RECCALL) != 0)
4093 SET_CALL_RECURSION(node);
4094 r = setup_tree(en->target, reg, state, env);
4095 break;
4096
4098 {
4099 Node* target = en->target;
4100 r = setup_tree(target, reg, state, env);
4101 if (NTYPE(target) == NT_QTFR) {
4102 QtfrNode* tqn = NQTFR(target);
4103 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4104 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4105 int qtype = NTYPE(tqn->target);
4106 if (IS_NODE_TYPE_SIMPLE(qtype))
4108 }
4109 }
4110 }
4111 break;
4112
4113 case ENCLOSE_CONDITION:
4114#ifdef USE_NAMED_GROUP
4115 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4116 env->num_named > 0 &&
4120 }
4121#endif
4122 if (NENCLOSE(node)->regnum > env->num_mem)
4124 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4125 break;
4126
4127 case ENCLOSE_ABSENT:
4128 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4129 break;
4130 }
4131 }
4132 break;
4133
4134 case NT_ANCHOR:
4135 {
4136 AnchorNode* an = NANCHOR(node);
4137
4138 switch (an->type) {
4139 case ANCHOR_PREC_READ:
4140 r = setup_tree(an->target, reg, state, env);
4141 break;
4143 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4144 break;
4145
4146/* allowed node types in look-behind */
4147#define ALLOWED_TYPE_IN_LB \
4148 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4149 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4150
4151#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4152#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4153
4154#define ALLOWED_ANCHOR_IN_LB \
4155( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4156 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4157 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4158 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4159#define ALLOWED_ANCHOR_IN_LB_NOT \
4160( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4161 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4162 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4163 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4164
4165 case ANCHOR_LOOK_BEHIND:
4166 {
4167 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4169 if (r < 0) return r;
4170 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4171 if (NTYPE(node) != NT_ANCHOR) goto restart;
4172 r = setup_tree(an->target, reg, state, env);
4173 if (r != 0) return r;
4174 r = setup_look_behind(node, reg, env);
4175 }
4176 break;
4177
4179 {
4180 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4182 if (r < 0) return r;
4183 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4184 if (NTYPE(node) != NT_ANCHOR) goto restart;
4185 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4186 if (r != 0) return r;
4187 r = setup_look_behind(node, reg, env);
4188 }
4189 break;
4190 }
4191 }
4192 break;
4193
4194 default:
4195 break;
4196 }
4197
4198 return r;
4199}
4200
4201#ifndef USE_SUNDAY_QUICK_SEARCH
4202/* set skip map for Boyer-Moore search */
4203static int
4204set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4205 UChar skip[], int** int_skip, int ignore_case)
4206{
4207 OnigDistance i, len;
4208 int clen, flen, n, j, k;
4211 OnigEncoding enc = reg->enc;
4212
4213 len = end - s;
4214 if (len < ONIG_CHAR_TABLE_SIZE) {
4215 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4216
4217 n = 0;
4218 for (i = 0; i < len - 1; i += clen) {
4219 p = s + i;
4220 if (ignore_case)
4222 p, end, items);
4223 clen = enclen(enc, p, end);
4224 if (p + clen > end)
4225 clen = (int )(end - p);
4226
4227 for (j = 0; j < n; j++) {
4228 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4229 return 1; /* different length isn't supported. */
4230 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4231 if (flen != clen)
4232 return 1; /* different length isn't supported. */
4233 }
4234 for (j = 0; j < clen; j++) {
4235 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4236 for (k = 0; k < n; k++) {
4237 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4238 }
4239 }
4240 }
4241 }
4242 else {
4243# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4244 /* This should not happen. */
4245 return ONIGERR_TYPE_BUG;
4246# else
4247 if (IS_NULL(*int_skip)) {
4248 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4249 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4250 }
4251 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4252
4253 n = 0;
4254 for (i = 0; i < len - 1; i += clen) {
4255 p = s + i;
4256 if (ignore_case)
4258 p, end, items);
4259 clen = enclen(enc, p, end);
4260 if (p + clen > end)
4261 clen = (int )(end - p);
4262
4263 for (j = 0; j < n; j++) {
4264 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4265 return 1; /* different length isn't supported. */
4266 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4267 if (flen != clen)
4268 return 1; /* different length isn't supported. */
4269 }
4270 for (j = 0; j < clen; j++) {
4271 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4272 for (k = 0; k < n; k++) {
4273 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4274 }
4275 }
4276 }
4277# endif
4278 }
4279 return 0;
4280}
4281
4282#else /* USE_SUNDAY_QUICK_SEARCH */
4283
4284/* set skip map for Sunday's quick search */
4285static int
4286set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4287 UChar skip[], int** int_skip, int ignore_case)
4288{
4289 OnigDistance i, len;
4290 int clen, flen, n, j, k;
4293 OnigEncoding enc = reg->enc;
4294
4295 len = end - s;
4296 if (len < ONIG_CHAR_TABLE_SIZE) {
4297 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4298
4299 n = 0;
4300 for (i = 0; i < len; i += clen) {
4301 p = s + i;
4302 if (ignore_case)
4304 p, end, items);
4305 clen = enclen(enc, p, end);
4306 if (p + clen > end)
4307 clen = (int )(end - p);
4308
4309 for (j = 0; j < n; j++) {
4310 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4311 return 1; /* different length isn't supported. */
4312 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4313 if (flen != clen)
4314 return 1; /* different length isn't supported. */
4315 }
4316 for (j = 0; j < clen; j++) {
4317 skip[s[i + j]] = (UChar )(len - i - j);
4318 for (k = 0; k < n; k++) {
4319 skip[buf[k][j]] = (UChar )(len - i - j);
4320 }
4321 }
4322 }
4323 }
4324 else {
4325# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4326 /* This should not happen. */
4327 return ONIGERR_TYPE_BUG;
4328# else
4329 if (IS_NULL(*int_skip)) {
4330 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4331 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4332 }
4333 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4334
4335 n = 0;
4336 for (i = 0; i < len; i += clen) {
4337 p = s + i;
4338 if (ignore_case)
4340 p, end, items);
4341 clen = enclen(enc, p, end);
4342 if (p + clen > end)
4343 clen = (int )(end - p);
4344
4345 for (j = 0; j < n; j++) {
4346 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4347 return 1; /* different length isn't supported. */
4348 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4349 if (flen != clen)
4350 return 1; /* different length isn't supported. */
4351 }
4352 for (j = 0; j < clen; j++) {
4353 (*int_skip)[s[i + j]] = (int )(len - i - j);
4354 for (k = 0; k < n; k++) {
4355 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4356 }
4357 }
4358 }
4359# endif
4360 }
4361 return 0;
4362}
4363#endif /* USE_SUNDAY_QUICK_SEARCH */
4364
4365typedef struct {
4366 OnigDistance min; /* min byte length */
4367 OnigDistance max; /* max byte length */
4368} MinMaxLen;
4369
4370typedef struct {
4376} OptEnv;
4377
4378typedef struct {
4381} OptAncInfo;
4382
4383typedef struct {
4384 MinMaxLen mmd; /* info position */
4386
4388 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4389 int len;
4391} OptExactInfo;
4392
4393typedef struct {
4394 MinMaxLen mmd; /* info position */
4396
4397 int value; /* weighted value */
4399} OptMapInfo;
4400
4401typedef struct {
4403
4405 OptExactInfo exb; /* boundary */
4406 OptExactInfo exm; /* middle */
4407 OptExactInfo expr; /* prec read (?=...) */
4408
4409 OptMapInfo map; /* boundary */
4410} NodeOptInfo;
4411
4412
4413static int
4414map_position_value(OnigEncoding enc, int i)
4415{
4416 static const short int ByteValTable[] = {
4417 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4419 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4420 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4421 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4422 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4423 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4424 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4425 };
4426
4427 if (i < numberof(ByteValTable)) {
4428 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4429 return 20;
4430 else
4431 return (int )ByteValTable[i];
4432 }
4433 else
4434 return 4; /* Take it easy. */
4435}
4436
4437static int
4438distance_value(MinMaxLen* mm)
4439{
4440 /* 1000 / (min-max-dist + 1) */
4441 static const short int dist_vals[] = {
4442 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4443 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4444 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4445 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4446 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4447 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4448 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4449 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4450 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4451 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4452 };
4453
4454 OnigDistance d;
4455
4456 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4457
4458 d = mm->max - mm->min;
4459 if (d < numberof(dist_vals))
4460 /* return dist_vals[d] * 16 / (mm->min + 12); */
4461 return (int )dist_vals[d];
4462 else
4463 return 1;
4464}
4465
4466static int
4467comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4468{
4469 if (v2 <= 0) return -1;
4470 if (v1 <= 0) return 1;
4471
4472 v1 *= distance_value(d1);
4473 v2 *= distance_value(d2);
4474
4475 if (v2 > v1) return 1;
4476 if (v2 < v1) return -1;
4477
4478 if (d2->min < d1->min) return 1;
4479 if (d2->min > d1->min) return -1;
4480 return 0;
4481}
4482
4483static int
4484is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4485{
4486 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4487}
4488
4489
4490static void
4491set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4492{
4493 mml->min = min;
4494 mml->max = max;
4495}
4496
4497static void
4498clear_mml(MinMaxLen* mml)
4499{
4500 mml->min = mml->max = 0;
4501}
4502
4503static void
4504copy_mml(MinMaxLen* to, MinMaxLen* from)
4505{
4506 to->min = from->min;
4507 to->max = from->max;
4508}
4509
4510static void
4511add_mml(MinMaxLen* to, MinMaxLen* from)
4512{
4513 to->min = distance_add(to->min, from->min);
4514 to->max = distance_add(to->max, from->max);
4515}
4516
4517#if 0
4518static void
4519add_len_mml(MinMaxLen* to, OnigDistance len)
4520{
4521 to->min = distance_add(to->min, len);
4522 to->max = distance_add(to->max, len);
4523}
4524#endif
4525
4526static void
4527alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4528{
4529 if (to->min > from->min) to->min = from->min;
4530 if (to->max < from->max) to->max = from->max;
4531}
4532
4533static void
4534copy_opt_env(OptEnv* to, OptEnv* from)
4535{
4536 *to = *from;
4537}
4538
4539static void
4540clear_opt_anc_info(OptAncInfo* anc)
4541{
4542 anc->left_anchor = 0;
4543 anc->right_anchor = 0;
4544}
4545
4546static void
4547copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4548{
4549 *to = *from;
4550}
4551
4552static void
4553concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4554 OnigDistance left_len, OnigDistance right_len)
4555{
4556 clear_opt_anc_info(to);
4557
4558 to->left_anchor = left->left_anchor;
4559 if (left_len == 0) {
4560 to->left_anchor |= right->left_anchor;
4561 }
4562
4563 to->right_anchor = right->right_anchor;
4564 if (right_len == 0) {
4565 to->right_anchor |= left->right_anchor;
4566 }
4567 else {
4569 }
4570}
4571
4572static int
4573is_left_anchor(int anc)
4574{
4575 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4576 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4577 anc == ANCHOR_PREC_READ_NOT)
4578 return 0;
4579
4580 return 1;
4581}
4582
4583static int
4584is_set_opt_anc_info(OptAncInfo* to, int anc)
4585{
4586 if ((to->left_anchor & anc) != 0) return 1;
4587
4588 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4589}
4590
4591static void
4592add_opt_anc_info(OptAncInfo* to, int anc)
4593{
4594 if (is_left_anchor(anc))
4595 to->left_anchor |= anc;
4596 else
4597 to->right_anchor |= anc;
4598}
4599
4600static void
4601remove_opt_anc_info(OptAncInfo* to, int anc)
4602{
4603 if (is_left_anchor(anc))
4604 to->left_anchor &= ~anc;
4605 else
4606 to->right_anchor &= ~anc;
4607}
4608
4609static void
4610alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4611{
4612 to->left_anchor &= add->left_anchor;
4613 to->right_anchor &= add->right_anchor;
4614}
4615
4616static int
4617is_full_opt_exact_info(OptExactInfo* ex)
4618{
4619 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4620}
4621
4622static void
4623clear_opt_exact_info(OptExactInfo* ex)
4624{
4625 clear_mml(&ex->mmd);
4626 clear_opt_anc_info(&ex->anc);
4627 ex->reach_end = 0;
4628 ex->ignore_case = -1; /* unset */
4629 ex->len = 0;
4630 ex->s[0] = '\0';
4631}
4632
4633static void
4634copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4635{
4636 *to = *from;
4637}
4638
4639static void
4640concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4641{
4642 int i, j, len;
4643 UChar *p, *end;
4644 OptAncInfo tanc;
4645
4646 if (to->ignore_case < 0)
4647 to->ignore_case = add->ignore_case;
4648 else if (to->ignore_case != add->ignore_case)
4649 return ; /* avoid */
4650
4651 p = add->s;
4652 end = p + add->len;
4653 for (i = to->len; p < end; ) {
4654 len = enclen(enc, p, end);
4655 if (i + len > OPT_EXACT_MAXLEN) break;
4656 for (j = 0; j < len && p < end; j++)
4657 to->s[i++] = *p++;
4658 }
4659
4660 to->len = i;
4661 to->reach_end = (p == end ? add->reach_end : 0);
4662
4663 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4664 if (! to->reach_end) tanc.right_anchor = 0;
4665 copy_opt_anc_info(&to->anc, &tanc);
4666}
4667
4668static void
4669concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4670 int raw ARG_UNUSED, OnigEncoding enc)
4671{
4672 int i, j, len;
4673 UChar *p;
4674
4675 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4676 len = enclen(enc, p, end);
4677 if (i + len > OPT_EXACT_MAXLEN) break;
4678 for (j = 0; j < len && p < end; j++)
4679 to->s[i++] = *p++;
4680 }
4681
4682 to->len = i;
4683}
4684
4685static void
4686alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4687{
4688 int i, j, len;
4689
4690 if (add->len == 0 || to->len == 0) {
4691 clear_opt_exact_info(to);
4692 return ;
4693 }
4694
4695 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4696 clear_opt_exact_info(to);
4697 return ;
4698 }
4699
4700 for (i = 0; i < to->len && i < add->len; ) {
4701 if (to->s[i] != add->s[i]) break;
4702 len = enclen(env->enc, to->s + i, to->s + to->len);
4703
4704 for (j = 1; j < len; j++) {
4705 if (to->s[i+j] != add->s[i+j]) break;
4706 }
4707 if (j < len) break;
4708 i += len;
4709 }
4710
4711 if (! add->reach_end || i < add->len || i < to->len) {
4712 to->reach_end = 0;
4713 }
4714 to->len = i;
4715 if (to->ignore_case < 0)
4716 to->ignore_case = add->ignore_case;
4717 else if (add->ignore_case >= 0)
4718 to->ignore_case |= add->ignore_case;
4719
4720 alt_merge_opt_anc_info(&to->anc, &add->anc);
4721 if (! to->reach_end) to->anc.right_anchor = 0;
4722}
4723
4724static void
4725select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4726{
4727 int v1, v2;
4728
4729 v1 = now->len;
4730 v2 = alt->len;
4731
4732 if (v2 == 0) {
4733 return ;
4734 }
4735 else if (v1 == 0) {
4736 copy_opt_exact_info(now, alt);
4737 return ;
4738 }
4739 else if (v1 <= 2 && v2 <= 2) {
4740 /* ByteValTable[x] is big value --> low price */
4741 v2 = map_position_value(enc, now->s[0]);
4742 v1 = map_position_value(enc, alt->s[0]);
4743
4744 if (now->len > 1) v1 += 5;
4745 if (alt->len > 1) v2 += 5;
4746 }
4747
4748 if (now->ignore_case <= 0) v1 *= 2;
4749 if (alt->ignore_case <= 0) v2 *= 2;
4750
4751 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4752 copy_opt_exact_info(now, alt);
4753}
4754
4755static void
4756clear_opt_map_info(OptMapInfo* map)
4757{
4758 static const OptMapInfo clean_info = {
4759 {0, 0}, {0, 0}, 0,
4760 {
4761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4777 }
4778 };
4779
4780 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4781}
4782
4783static void
4784copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4785{
4786 *to = *from;
4787}
4788
4789static void
4790add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4791{
4792 if (map->map[c] == 0) {
4793 map->map[c] = 1;
4794 map->value += map_position_value(enc, c);
4795 }
4796}
4797
4798static int
4799add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4800 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4801{
4804 int i, n;
4805
4806 add_char_opt_map_info(map, p[0], enc);
4807
4808 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4809 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4810 if (n < 0) return n;
4811
4812 for (i = 0; i < n; i++) {
4813 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4814 add_char_opt_map_info(map, buf[0], enc);
4815 }
4816
4817 return 0;
4818}
4819
4820static void
4821select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4822{
4823 const int z = 1<<15; /* 32768: something big value */
4824
4825 int v1, v2;
4826
4827 if (alt->value == 0) return ;
4828 if (now->value == 0) {
4829 copy_opt_map_info(now, alt);
4830 return ;
4831 }
4832
4833 v1 = z / now->value;
4834 v2 = z / alt->value;
4835 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4836 copy_opt_map_info(now, alt);
4837}
4838
4839static int
4840comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4841{
4842#define COMP_EM_BASE 20
4843 int ve, vm;
4844
4845 if (m->value <= 0) return -1;
4846
4847 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4848 vm = COMP_EM_BASE * 5 * 2 / m->value;
4849 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4850}
4851
4852static void
4853alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4854{
4855 int i, val;
4856
4857 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4858 if (to->value == 0) return ;
4859 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4860 clear_opt_map_info(to);
4861 return ;
4862 }
4863
4864 alt_merge_mml(&to->mmd, &add->mmd);
4865
4866 val = 0;
4867 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4868 if (add->map[i])
4869 to->map[i] = 1;
4870
4871 if (to->map[i])
4872 val += map_position_value(enc, i);
4873 }
4874 to->value = val;
4875
4876 alt_merge_opt_anc_info(&to->anc, &add->anc);
4877}
4878
4879static void
4880set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4881{
4882 copy_mml(&(opt->exb.mmd), mmd);
4883 copy_mml(&(opt->expr.mmd), mmd);
4884 copy_mml(&(opt->map.mmd), mmd);
4885}
4886
4887static void
4888clear_node_opt_info(NodeOptInfo* opt)
4889{
4890 clear_mml(&opt->len);
4891 clear_opt_anc_info(&opt->anc);
4892 clear_opt_exact_info(&opt->exb);
4893 clear_opt_exact_info(&opt->exm);
4894 clear_opt_exact_info(&opt->expr);
4895 clear_opt_map_info(&opt->map);
4896}
4897
4898static void
4899copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4900{
4901 *to = *from;
4902}
4903
4904static void
4905concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4906{
4907 int exb_reach, exm_reach;
4908 OptAncInfo tanc;
4909
4910 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4911 copy_opt_anc_info(&to->anc, &tanc);
4912
4913 if (add->exb.len > 0 && to->len.max == 0) {
4914 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4915 to->len.max, add->len.max);
4916 copy_opt_anc_info(&add->exb.anc, &tanc);
4917 }
4918
4919 if (add->map.value > 0 && to->len.max == 0) {
4920 if (add->map.mmd.max == 0)
4921 add->map.anc.left_anchor |= to->anc.left_anchor;
4922 }
4923
4924 exb_reach = to->exb.reach_end;
4925 exm_reach = to->exm.reach_end;
4926
4927 if (add->len.max != 0)
4928 to->exb.reach_end = to->exm.reach_end = 0;
4929
4930 if (add->exb.len > 0) {
4931 if (exb_reach) {
4932 concat_opt_exact_info(&to->exb, &add->exb, enc);
4933 clear_opt_exact_info(&add->exb);
4934 }
4935 else if (exm_reach) {
4936 concat_opt_exact_info(&to->exm, &add->exb, enc);
4937 clear_opt_exact_info(&add->exb);
4938 }
4939 }
4940 select_opt_exact_info(enc, &to->exm, &add->exb);
4941 select_opt_exact_info(enc, &to->exm, &add->exm);
4942
4943 if (to->expr.len > 0) {
4944 if (add->len.max > 0) {
4945 if (to->expr.len > (int )add->len.max)
4946 to->expr.len = (int )add->len.max;
4947
4948 if (to->expr.mmd.max == 0)
4949 select_opt_exact_info(enc, &to->exb, &to->expr);
4950 else
4951 select_opt_exact_info(enc, &to->exm, &to->expr);
4952 }
4953 }
4954 else if (add->expr.len > 0) {
4955 copy_opt_exact_info(&to->expr, &add->expr);
4956 }
4957
4958 select_opt_map_info(&to->map, &add->map);
4959
4960 add_mml(&to->len, &add->len);
4961}
4962
4963static void
4964alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4965{
4966 alt_merge_opt_anc_info (&to->anc, &add->anc);
4967 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4968 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4969 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4970 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4971
4972 alt_merge_mml(&to->len, &add->len);
4973}
4974
4975
4976#define MAX_NODE_OPT_INFO_REF_COUNT 5
4977
4978static int
4979optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4980{
4981 int type;
4982 int r = 0;
4983
4984 clear_node_opt_info(opt);
4985 set_bound_node_opt_info(opt, &env->mmd);
4986
4987 type = NTYPE(node);
4988 switch (type) {
4989 case NT_LIST:
4990 {
4991 OptEnv nenv;
4992 NodeOptInfo nopt;
4993 Node* nd = node;
4994
4995 copy_opt_env(&nenv, env);
4996 do {
4997 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4998 if (r == 0) {
4999 add_mml(&nenv.mmd, &nopt.len);
5000 concat_left_node_opt_info(env->enc, opt, &nopt);
5001 }
5002 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5003 }
5004 break;
5005
5006 case NT_ALT:
5007 {
5008 NodeOptInfo nopt;
5009 Node* nd = node;
5010
5011 do {
5012 r = optimize_node_left(NCAR(nd), &nopt, env);
5013 if (r == 0) {
5014 if (nd == node) copy_node_opt_info(opt, &nopt);
5015 else alt_merge_node_opt_info(opt, &nopt, env);
5016 }
5017 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5018 }
5019 break;
5020
5021 case NT_STR:
5022 {
5023 StrNode* sn = NSTR(node);
5024 OnigDistance slen = sn->end - sn->s;
5025 int is_raw = NSTRING_IS_RAW(node);
5026
5027 if (! NSTRING_IS_AMBIG(node)) {
5028 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5029 is_raw, env->enc);
5030 opt->exb.ignore_case = 0;
5031 if (slen > 0) {
5032 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5033 }
5034 set_mml(&opt->len, slen, slen);
5035 }
5036 else {
5038
5039 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5040 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5041 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
5042 }
5043 else {
5044 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5045 is_raw, env->enc);
5046 opt->exb.ignore_case = 1;
5047
5048 if (slen > 0) {
5049 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5050 env->enc, env->case_fold_flag);
5051 if (r != 0) break;
5052 }
5053
5054 max = slen;
5055 }
5056
5057 set_mml(&opt->len, slen, max);
5058 }
5059
5060 if ((OnigDistance )opt->exb.len == slen)
5061 opt->exb.reach_end = 1;
5062 }
5063 break;
5064
5065 case NT_CCLASS:
5066 {
5067 int i, z;
5068 CClassNode* cc = NCCLASS(node);
5069
5070 /* no need to check ignore case. (set in setup_tree()) */
5071
5072 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5075
5076 set_mml(&opt->len, min, max);
5077 }
5078 else {
5079 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5080 z = BITSET_AT(cc->bs, i);
5081 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5082 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5083 }
5084 }
5085 set_mml(&opt->len, 1, 1);
5086 }
5087 }
5088 break;
5089
5090 case NT_CTYPE:
5091 {
5092 int i, min, max;
5093 int maxcode;
5094
5096
5097 if (max == 1) {
5098 min = 1;
5099
5100 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5101 switch (NCTYPE(node)->ctype) {
5102 case ONIGENC_CTYPE_WORD:
5103 if (NCTYPE(node)->not != 0) {
5104 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5105 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5106 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5107 }
5108 }
5109 }
5110 else {
5111 for (i = 0; i < maxcode; i++) {
5112 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5113 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5114 }
5115 }
5116 }
5117 break;
5118 }
5119 }
5120 else {
5121 min = ONIGENC_MBC_MINLEN(env->enc);
5122 }
5123 set_mml(&opt->len, min, max);
5124 }
5125 break;
5126
5127 case NT_CANY:
5128 {
5131 set_mml(&opt->len, min, max);
5132 }
5133 break;
5134
5135 case NT_ANCHOR:
5136 switch (NANCHOR(node)->type) {
5137 case ANCHOR_BEGIN_BUF:
5139 case ANCHOR_BEGIN_LINE:
5140 case ANCHOR_END_BUF:
5142 case ANCHOR_END_LINE:
5143 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5144 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5145 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5146 break;
5147
5148 case ANCHOR_PREC_READ:
5149 {
5150 NodeOptInfo nopt;
5151
5152 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5153 if (r == 0) {
5154 if (nopt.exb.len > 0)
5155 copy_opt_exact_info(&opt->expr, &nopt.exb);
5156 else if (nopt.exm.len > 0)
5157 copy_opt_exact_info(&opt->expr, &nopt.exm);
5158
5159 opt->expr.reach_end = 0;
5160
5161 if (nopt.map.value > 0)
5162 copy_opt_map_info(&opt->map, &nopt.map);
5163 }
5164 }
5165 break;
5166
5168 break;
5169 }
5170 break;
5171
5172 case NT_BREF:
5173 {
5174 int i;
5175 int* backs;
5176 OnigDistance min, max, tmin, tmax;
5177 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5178 BRefNode* br = NBREF(node);
5179
5180 if (br->state & NST_RECURSION) {
5181 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5182 break;
5183 }
5184 backs = BACKREFS_P(br);
5185 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5186 if (r != 0) break;
5187 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5188 if (r != 0) break;
5189 for (i = 1; i < br->back_num; i++) {
5190 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5191 if (r != 0) break;
5192 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5193 if (r != 0) break;
5194 if (min > tmin) min = tmin;
5195 if (max < tmax) max = tmax;
5196 }
5197 if (r == 0) set_mml(&opt->len, min, max);
5198 }
5199 break;
5200
5201#ifdef USE_SUBEXP_CALL
5202 case NT_CALL:
5203 if (IS_CALL_RECURSION(NCALL(node)))
5204 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5205 else {
5206 OnigOptionType save = env->options;
5207 env->options = NENCLOSE(NCALL(node)->target)->option;
5208 r = optimize_node_left(NCALL(node)->target, opt, env);
5209 env->options = save;
5210 }
5211 break;
5212#endif
5213
5214 case NT_QTFR:
5215 {
5216 int i;
5217 OnigDistance min, max;
5218 NodeOptInfo nopt;
5219 QtfrNode* qn = NQTFR(node);
5220
5221 r = optimize_node_left(qn->target, &nopt, env);
5222 if (r) break;
5223
5224 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5225 if (env->mmd.max == 0 &&
5226 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5227 if (IS_MULTILINE(env->options))
5228 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5229 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5230 else
5231 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5232 }
5233 }
5234 else {
5235 if (qn->lower > 0) {
5236 copy_node_opt_info(opt, &nopt);
5237 if (nopt.exb.len > 0) {
5238 if (nopt.exb.reach_end) {
5239 for (i = 2; i <= qn->lower &&
5240 ! is_full_opt_exact_info(&opt->exb); i++) {
5241 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5242 }
5243 if (i < qn->lower) {
5244 opt->exb.reach_end = 0;
5245 }
5246 }
5247 }
5248
5249 if (qn->lower != qn->upper) {
5250 opt->exb.reach_end = 0;
5251 opt->exm.reach_end = 0;
5252 }
5253 if (qn->lower > 1)
5254 opt->exm.reach_end = 0;
5255 }
5256 }
5257
5258 min = distance_multiply(nopt.len.min, qn->lower);
5259 if (IS_REPEAT_INFINITE(qn->upper))
5260 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5261 else
5262 max = distance_multiply(nopt.len.max, qn->upper);
5263
5264 set_mml(&opt->len, min, max);
5265 }
5266 break;
5267
5268 case NT_ENCLOSE:
5269 {
5270 EncloseNode* en = NENCLOSE(node);
5271
5272 switch (en->type) {
5273 case ENCLOSE_OPTION:
5274 {
5275 OnigOptionType save = env->options;
5276
5277 env->options = en->option;
5278 r = optimize_node_left(en->target, opt, env);
5279 env->options = save;
5280 }
5281 break;
5282
5283 case ENCLOSE_MEMORY:
5284#ifdef USE_SUBEXP_CALL
5285 en->opt_count++;
5287 OnigDistance min, max;
5288
5289 min = 0;
5291 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5292 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5293 set_mml(&opt->len, min, max);
5294 }
5295 else
5296#endif
5297 {
5298 r = optimize_node_left(en->target, opt, env);
5299
5300 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5301 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5302 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5303 }
5304 }
5305 break;
5306
5308 case ENCLOSE_CONDITION:
5309 r = optimize_node_left(en->target, opt, env);
5310 break;
5311
5312 case ENCLOSE_ABSENT:
5313 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5314 break;
5315 }
5316 }
5317 break;
5318
5319 default:
5320#ifdef ONIG_DEBUG
5321 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5322 NTYPE(node));
5323#endif
5324 r = ONIGERR_TYPE_BUG;
5325 break;
5326 }
5327
5328 return r;
5329}
5330
5331static int
5332set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5333{
5334 int r;
5335 int allow_reverse;
5336
5337 if (e->len == 0) return 0;
5338
5339 reg->exact = (UChar* )xmalloc(e->len);
5341 xmemcpy(reg->exact, e->s, e->len);
5342 reg->exact_end = reg->exact + e->len;
5343
5344 allow_reverse =
5346
5347 if (e->ignore_case > 0) {
5348 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5349 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5350 reg->map, &(reg->int_map), 1);
5351 if (r == 0) {
5352 reg->optimize = (allow_reverse != 0
5354 }
5355 else {
5357 }
5358 }
5359 else {
5361 }
5362 }
5363 else {
5364 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5365 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5366 reg->map, &(reg->int_map), 0);
5367 if (r == 0) {
5368 reg->optimize = (allow_reverse != 0
5370 }
5371 else {
5373 }
5374 }
5375 else {
5377 }
5378 }
5379
5380 reg->dmin = e->mmd.min;
5381 reg->dmax = e->mmd.max;
5382
5383 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5384 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5385 }
5386
5387 return 0;
5388}
5389
5390static void
5391set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5392{
5393 int i;
5394
5395 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5396 reg->map[i] = m->map[i];
5397
5399 reg->dmin = m->mmd.min;
5400 reg->dmax = m->mmd.max;
5401
5402 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5403 reg->threshold_len = (int )(reg->dmin + 1);
5404 }
5405}
5406
5407static void
5408set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5409{
5412}
5413
5414#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5415static void print_optimize_info(FILE* f, regex_t* reg);
5416#endif
5417
5418static int
5419set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5420{
5421
5422 int r;
5423 NodeOptInfo opt;
5424 OptEnv env;
5425
5426 env.enc = reg->enc;
5427 env.options = reg->options;
5428 env.case_fold_flag = reg->case_fold_flag;
5429 env.scan_env = scan_env;
5430 clear_mml(&env.mmd);
5431
5432 r = optimize_node_left(node, &opt, &env);
5433 if (r) return r;
5434
5435 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5438
5441
5444
5445 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5446 reg->anchor_dmin = opt.len.min;
5447 reg->anchor_dmax = opt.len.max;
5448 }
5449
5450 if (opt.exb.len > 0 || opt.exm.len > 0) {
5451 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5452 if (opt.map.value > 0 &&
5453 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5454 goto set_map;
5455 }
5456 else {
5457 r = set_optimize_exact_info(reg, &opt.exb);
5458 set_sub_anchor(reg, &opt.exb.anc);
5459 }
5460 }
5461 else if (opt.map.value > 0) {
5462 set_map:
5463 set_optimize_map_info(reg, &opt.map);
5464 set_sub_anchor(reg, &opt.map.anc);
5465 }
5466 else {
5468 if (opt.len.max == 0)
5470 }
5471
5472#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5473 print_optimize_info(stderr, reg);
5474#endif
5475 return r;
5476}
5477
5478static void
5479clear_optimize_info(regex_t* reg)
5480{
5482 reg->anchor = 0;
5483 reg->anchor_dmin = 0;
5484 reg->anchor_dmax = 0;
5485 reg->sub_anchor = 0;
5486 reg->exact_end = (UChar* )NULL;
5487 reg->threshold_len = 0;
5488 if (IS_NOT_NULL(reg->exact)) {
5489 xfree(reg->exact);
5490 reg->exact = (UChar* )NULL;
5491 }
5492}
5493
5494#ifdef ONIG_DEBUG
5495
5496static void print_enc_string(FILE* fp, OnigEncoding enc,
5497 const UChar *s, const UChar *end)
5498{
5499 fprintf(fp, "\nPATTERN: /");
5500
5501 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5502 const UChar *p;
5504
5505 p = s;
5506 while (p < end) {
5507 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5508 if (code >= 0x80) {
5509 fprintf(fp, " 0x%04x ", (int )code);
5510 }
5511 else {
5512 fputc((int )code, fp);
5513 }
5514
5515 p += enclen(enc, p, end);
5516 }
5517 }
5518 else {
5519 while (s < end) {
5520 fputc((int )*s, fp);
5521 s++;
5522 }
5523 }
5524
5525 fprintf(fp, "/ (%s)\n", enc->name);
5526}
5527#endif /* ONIG_DEBUG */
5528
5529#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5530static void
5531print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5532{
5533 if (a == ONIG_INFINITE_DISTANCE)
5534 fputs("inf", f);
5535 else
5536 fprintf(f, "(%"PRIuPTR")", a);
5537
5538 fputs("-", f);
5539
5540 if (b == ONIG_INFINITE_DISTANCE)
5541 fputs("inf", f);
5542 else
5543 fprintf(f, "(%"PRIuPTR")", b);
5544}
5545
5546static void
5547print_anchor(FILE* f, int anchor)
5548{
5549 int q = 0;
5550
5551 fprintf(f, "[");
5552
5553 if (anchor & ANCHOR_BEGIN_BUF) {
5554 fprintf(f, "begin-buf");
5555 q = 1;
5556 }
5557 if (anchor & ANCHOR_BEGIN_LINE) {
5558 if (q) fprintf(f, ", ");
5559 q = 1;
5560 fprintf(f, "begin-line");
5561 }
5562 if (anchor & ANCHOR_BEGIN_POSITION) {
5563 if (q) fprintf(f, ", ");
5564 q = 1;
5565 fprintf(f, "begin-pos");
5566 }
5567 if (anchor & ANCHOR_END_BUF) {
5568 if (q) fprintf(f, ", ");
5569 q = 1;
5570 fprintf(f, "end-buf");
5571 }
5572 if (anchor & ANCHOR_SEMI_END_BUF) {
5573 if (q) fprintf(f, ", ");
5574 q = 1;
5575 fprintf(f, "semi-end-buf");
5576 }
5577 if (anchor & ANCHOR_END_LINE) {
5578 if (q) fprintf(f, ", ");
5579 q = 1;
5580 fprintf(f, "end-line");
5581 }
5582 if (anchor & ANCHOR_ANYCHAR_STAR) {
5583 if (q) fprintf(f, ", ");
5584 q = 1;
5585 fprintf(f, "anychar-star");
5586 }
5587 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5588 if (q) fprintf(f, ", ");
5589 fprintf(f, "anychar-star-ml");
5590 }
5591
5592 fprintf(f, "]");
5593}
5594
5595static void
5596print_optimize_info(FILE* f, regex_t* reg)
5597{
5598 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5599 "EXACT_IC", "MAP",
5600 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5601
5602 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5603 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5604 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5605 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5606 fprintf(f, "\n");
5607
5608 if (reg->optimize) {
5609 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5610 fprintf(f, "\n");
5611 }
5612 fprintf(f, "\n");
5613
5614 if (reg->exact) {
5615 UChar *p;
5616 fprintf(f, "exact: [");
5617 for (p = reg->exact; p < reg->exact_end; p++) {
5618 fputc(*p, f);
5619 }
5620 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5621 }
5622 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5623 int c, i, n = 0;
5624
5625 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5626 if (reg->map[i]) n++;
5627
5628 fprintf(f, "map: n=%d\n", n);
5629 if (n > 0) {
5630 c = 0;
5631 fputc('[', f);
5632 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5633 if (reg->map[i] != 0) {
5634 if (c > 0) fputs(", ", f);
5635 c++;
5636 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5638 fputc(i, f);
5639 else
5640 fprintf(f, "%d", i);
5641 }
5642 }
5643 fprintf(f, "]\n");
5644 }
5645 }
5646}
5647#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5648
5649
5650extern void
5652{
5653 if (IS_NOT_NULL(reg)) {
5654 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5655 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5656 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5658 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5659 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5660
5661#ifdef USE_NAMED_GROUP
5662 onig_names_free(reg);
5663#endif
5664 }
5665}
5666
5667extern void
5669{
5670 if (IS_NOT_NULL(reg)) {
5671 onig_free_body(reg);
5672 xfree(reg);
5673 }
5674}
5675
5676#ifdef RUBY
5677size_t
5679{
5680 size_t size = sizeof(regex_t);
5681 if (IS_NULL(reg)) return 0;
5682 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5683 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5684 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5685 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5686 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5687 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5688
5689 return size;
5690}
5691
5692size_t
5694{
5695 size_t size = sizeof(*regs);
5696 if (IS_NULL(regs)) return 0;
5697 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5698 return size;
5699}
5700#endif
5701
5702#define REGEX_TRANSFER(to,from) do {\
5703 onig_free_body(to);\
5704 xmemcpy(to, from, sizeof(regex_t));\
5705 xfree(from);\
5706} while (0)
5707
5708#if 0
5709extern void
5710onig_transfer(regex_t* to, regex_t* from)
5711{
5712 REGEX_TRANSFER(to, from);
5713}
5714#endif
5715
5716#ifdef ONIG_DEBUG_COMPILE
5717static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5718#endif
5719#ifdef ONIG_DEBUG_PARSE_TREE
5720static void print_tree(FILE* f, Node* node);
5721#endif
5722
5723#ifdef RUBY
5724extern int
5725onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5726 OnigErrorInfo* einfo)
5727{
5728 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5729}
5730#endif
5731
5732#ifdef RUBY
5733extern int
5734onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5735 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5736#else
5737extern int
5738onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5739 OnigErrorInfo* einfo)
5740#endif
5741{
5742#define COMPILE_INIT_SIZE 20
5743
5744 int r;
5745 OnigDistance init_size;
5746 Node* root;
5747 ScanEnv scan_env = {0};
5748#ifdef USE_SUBEXP_CALL
5749 UnsetAddrList uslist;
5750#endif
5751
5752 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5753
5754#ifdef RUBY
5755 scan_env.sourcefile = sourcefile;
5756 scan_env.sourceline = sourceline;
5757#endif
5758
5759#ifdef ONIG_DEBUG
5760 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5761#endif
5762
5763 if (reg->alloc == 0) {
5764 init_size = (pattern_end - pattern) * 2;
5765 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5766 r = BBUF_INIT(reg, init_size);
5767 if (r != 0) goto end;
5768 }
5769 else
5770 reg->used = 0;
5771
5772 reg->num_mem = 0;
5773 reg->num_repeat = 0;
5774 reg->num_null_check = 0;
5775 reg->repeat_range_alloc = 0;
5777#ifdef USE_COMBINATION_EXPLOSION_CHECK
5778 reg->num_comb_exp_check = 0;
5779#endif
5780
5781 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5782 if (r != 0) goto err;
5783
5784#ifdef ONIG_DEBUG_PARSE_TREE
5785# if 0
5786 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5787 print_tree(stderr, root);
5788# endif
5789#endif
5790
5791#ifdef USE_NAMED_GROUP
5792 /* mixed use named group and no-named group */
5793 if (scan_env.num_named > 0 &&
5796 if (scan_env.num_named != scan_env.num_mem)
5797 r = disable_noname_group_capture(&root, reg, &scan_env);
5798 else
5799 r = numbered_ref_check(root);
5800
5801 if (r != 0) goto err;
5802 }
5803#endif
5804
5805#ifdef USE_SUBEXP_CALL
5806 if (scan_env.num_call > 0) {
5807 r = unset_addr_list_init(&uslist, scan_env.num_call);
5808 if (r != 0) goto err;
5809 scan_env.unset_addr_list = &uslist;
5810 r = setup_subexp_call(root, &scan_env);
5811 if (r != 0) goto err_unset;
5812 r = subexp_recursive_check_trav(root, &scan_env);
5813 if (r < 0) goto err_unset;
5814 r = subexp_inf_recursive_check_trav(root, &scan_env);
5815 if (r != 0) goto err_unset;
5816
5817 reg->num_call = scan_env.num_call;
5818 }
5819 else
5820 reg->num_call = 0;
5821#endif
5822
5823 r = setup_tree(root, reg, 0, &scan_env);
5824 if (r != 0) goto err_unset;
5825
5826#ifdef ONIG_DEBUG_PARSE_TREE
5827 print_tree(stderr, root);
5828#endif
5829
5830 reg->capture_history = scan_env.capture_history;
5831 reg->bt_mem_start = scan_env.bt_mem_start;
5832 reg->bt_mem_start |= reg->capture_history;
5833 if (IS_FIND_CONDITION(reg->options))
5835 else {
5836 reg->bt_mem_end = scan_env.bt_mem_end;
5837 reg->bt_mem_end |= reg->capture_history;
5838 }
5839
5840#ifdef USE_COMBINATION_EXPLOSION_CHECK
5841 if (scan_env.backrefed_mem == 0
5842# ifdef USE_SUBEXP_CALL
5843 || scan_env.num_call == 0
5844# endif
5845 ) {
5846 setup_comb_exp_check(root, 0, &scan_env);
5847# ifdef USE_SUBEXP_CALL
5848 if (scan_env.has_recursion != 0) {
5849 scan_env.num_comb_exp_check = 0;
5850 }
5851 else
5852# endif
5853 if (scan_env.comb_exp_max_regnum > 0) {
5854 int i;
5855 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5856 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5857 scan_env.num_comb_exp_check = 0;
5858 break;
5859 }
5860 }
5861 }
5862 }
5863
5864 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5865#endif
5866
5867 clear_optimize_info(reg);
5868#ifndef ONIG_DONT_OPTIMIZE
5869 r = set_optimize_info_from_tree(root, reg, &scan_env);
5870 if (r != 0) goto err_unset;
5871#endif
5872
5873 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5874 xfree(scan_env.mem_nodes_dynamic);
5875 scan_env.mem_nodes_dynamic = (Node** )NULL;
5876 }
5877
5878 r = compile_tree(root, reg);
5879 if (r == 0) {
5880 r = add_opcode(reg, OP_END);
5881#ifdef USE_SUBEXP_CALL
5882 if (scan_env.num_call > 0) {
5883 r = unset_addr_list_fix(&uslist, reg);
5884 unset_addr_list_end(&uslist);
5885 if (r) goto err;
5886 }
5887#endif
5888
5889 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5891 else {
5892 if (reg->bt_mem_start != 0)
5894 else
5896 }
5897 }
5898#ifdef USE_SUBEXP_CALL
5899 else if (scan_env.num_call > 0) {
5900 unset_addr_list_end(&uslist);
5901 }
5902#endif
5904
5905#ifdef ONIG_DEBUG_COMPILE
5906# ifdef USE_NAMED_GROUP
5907 onig_print_names(stderr, reg);
5908# endif
5909 print_compiled_byte_code_list(stderr, reg);
5910#endif
5911
5912 end:
5913 onig_reg_resize(reg);
5914 return r;
5915
5916 err_unset:
5917#ifdef USE_SUBEXP_CALL
5918 if (scan_env.num_call > 0) {
5919 unset_addr_list_end(&uslist);
5920 }
5921#endif
5922 err:
5923 if (IS_NOT_NULL(scan_env.error)) {
5924 if (IS_NOT_NULL(einfo)) {
5925 einfo->enc = scan_env.enc;
5926 einfo->par = scan_env.error;
5927 einfo->par_end = scan_env.error_end;
5928 }
5929 }
5930
5932 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5933 xfree(scan_env.mem_nodes_dynamic);
5934 return r;
5935}
5936
5937static int onig_inited = 0;
5938
5939extern int
5941 OnigCaseFoldType case_fold_flag,
5942 OnigEncoding enc, const OnigSyntaxType* syntax)
5943{
5944 if (! onig_inited)
5945 onig_init();
5946
5947 if (IS_NULL(reg))
5949
5950 if (ONIGENC_IS_UNDEF(enc))
5952
5956 }
5957
5958 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5959 option |= syntax->options;
5960 option &= ~ONIG_OPTION_SINGLELINE;
5961 }
5962 else
5963 option |= syntax->options;
5964
5965 (reg)->enc = enc;
5966 (reg)->options = option;
5967 (reg)->syntax = syntax;
5968 (reg)->optimize = 0;
5969 (reg)->exact = (UChar* )NULL;
5970 (reg)->int_map = (int* )NULL;
5971 (reg)->int_map_backward = (int* )NULL;
5972 (reg)->chain = (regex_t* )NULL;
5973
5974 (reg)->p = (UChar* )NULL;
5975 (reg)->alloc = 0;
5976 (reg)->used = 0;
5977 (reg)->name_table = (void* )NULL;
5978
5979 (reg)->case_fold_flag = case_fold_flag;
5980 return 0;
5981}
5982
5983extern int
5985 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5986 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5987{
5988 int r;
5989
5990 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5991 if (r) return r;
5992
5993 r = onig_compile(reg, pattern, pattern_end, einfo);
5994 return r;
5995}
5996
5997extern int
5998onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5999 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6000 OnigErrorInfo* einfo)
6001{
6002 int r;
6003
6004 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6005 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6006
6007 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6008 if (r) goto err;
6009
6010 r = onig_compile(*reg, pattern, pattern_end, einfo);
6011 if (r) {
6012 err:
6013 onig_free(*reg);
6014 *reg = NULL;
6015 }
6016 return r;
6017}
6018
6019extern int
6021{
6022 return onig_init();
6023}
6024
6025extern int
6027{
6028 if (onig_inited != 0)
6029 return 0;
6030
6031 onig_inited = 1;
6032
6033#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6034 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6035#endif
6036
6037 onigenc_init();
6038 /* onigenc_set_default_caseconv_table((UChar* )0); */
6039
6040#ifdef ONIG_DEBUG_STATISTICS
6041 onig_statistics_init();
6042#endif
6043
6044 return 0;
6045}
6046
6047
6048static OnigEndCallListItemType* EndCallTop;
6049
6050extern void onig_add_end_call(void (*func)(void))
6051{
6053
6054 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6055 if (item == 0) return ;
6056
6057 item->next = EndCallTop;
6058 item->func = func;
6059
6060 EndCallTop = item;
6061}
6062
6063static void
6064exec_end_call_list(void)
6065{
6067 void (*func)(void);
6068
6069 while (EndCallTop != 0) {
6070 func = EndCallTop->func;
6071 (*func)();
6072
6073 prev = EndCallTop;
6074 EndCallTop = EndCallTop->next;
6075 xfree(prev);
6076 }
6077}
6078
6079extern int
6081{
6082 exec_end_call_list();
6083
6084#ifdef ONIG_DEBUG_STATISTICS
6085 onig_print_statistics(stderr);
6086#endif
6087
6088#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6089 _CrtDumpMemoryLeaks();
6090#endif
6091
6092 onig_inited = 0;
6093
6094 return 0;
6095}
6096
6097extern int
6099{
6100 OnigCodePoint n, *data;
6101 OnigCodePoint low, high, x;
6102
6103 GET_CODE_POINT(n, p);
6104 data = (OnigCodePoint* )p;
6105 data++;
6106
6107 for (low = 0, high = n; low < high; ) {
6108 x = (low + high) >> 1;
6109 if (code > data[x * 2 + 1])
6110 low = x + 1;
6111 else
6112 high = x;
6113 }
6114
6115 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6116}
6117
6118extern int
6120{
6121 int found;
6122
6123 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6124 if (IS_NULL(cc->mbuf)) {
6125 found = 0;
6126 }
6127 else {
6128 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6129 }
6130 }
6131 else {
6132 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6133 }
6134
6135 if (IS_NCCLASS_NOT(cc))
6136 return !found;
6137 else
6138 return found;
6139}
6140
6141extern int
6143{
6144 int len;
6145
6146 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6147 len = 2;
6148 }
6149 else {
6151 }
6152 return onig_is_code_in_cc_len(len, code, cc);
6153}
6154
6155
6156#ifdef ONIG_DEBUG
6157
6158/* arguments type */
6159# define ARG_SPECIAL -1
6160# define ARG_NON 0
6161# define ARG_RELADDR 1
6162# define ARG_ABSADDR 2
6163# define ARG_LENGTH 3
6164# define ARG_MEMNUM 4
6165# define ARG_OPTION 5
6166# define ARG_STATE_CHECK 6
6167
6168OnigOpInfoType OnigOpInfo[] = {
6169 { OP_FINISH, "finish", ARG_NON },
6170 { OP_END, "end", ARG_NON },
6171 { OP_EXACT1, "exact1", ARG_SPECIAL },
6172 { OP_EXACT2, "exact2", ARG_SPECIAL },
6173 { OP_EXACT3, "exact3", ARG_SPECIAL },
6174 { OP_EXACT4, "exact4", ARG_SPECIAL },
6175 { OP_EXACT5, "exact5", ARG_SPECIAL },
6176 { OP_EXACTN, "exactn", ARG_SPECIAL },
6177 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6178 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6179 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6180 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6181 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6182 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6183 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6184 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6185 { OP_CCLASS, "cclass", ARG_SPECIAL },
6186 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6187 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6188 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6189 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6190 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6191 { OP_ANYCHAR, "anychar", ARG_NON },
6192 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6193 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6194 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6195 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6196 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6197 { OP_WORD, "word", ARG_NON },
6198 { OP_NOT_WORD, "not-word", ARG_NON },
6199 { OP_WORD_BOUND, "word-bound", ARG_NON },
6200 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6201 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6202 { OP_WORD_END, "word-end", ARG_NON },
6203 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6204 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6205 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6206 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6207 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6208 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6209 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6210 { OP_END_BUF, "end-buf", ARG_NON },
6211 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6212 { OP_END_LINE, "end-line", ARG_NON },
6213 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6214 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6215 { OP_BACKREF1, "backref1", ARG_NON },
6216 { OP_BACKREF2, "backref2", ARG_NON },
6217 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6218 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6219 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6220 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6221 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6222 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6223 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6224 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6225 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6226 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6227 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6228 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6229 { OP_SET_OPTION, "set-option", ARG_OPTION },
6230 { OP_KEEP, "keep", ARG_NON },
6231 { OP_FAIL, "fail", ARG_NON },
6232 { OP_JUMP, "jump", ARG_RELADDR },
6233 { OP_PUSH, "push", ARG_RELADDR },
6234 { OP_POP, "pop", ARG_NON },
6235 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6236 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6237 { OP_REPEAT, "repeat", ARG_SPECIAL },
6238 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6239 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6240 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6241 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6242 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6243 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6244 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6245 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6246 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6247 { OP_PUSH_POS, "push-pos", ARG_NON },
6248 { OP_POP_POS, "pop-pos", ARG_NON },
6249 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6250 { OP_FAIL_POS, "fail-pos", ARG_NON },
6251 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6252 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6253 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6254 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6255 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6256 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6257 { OP_ABSENT, "absent", ARG_RELADDR },
6258 { OP_ABSENT_END, "absent-end", ARG_NON },
6259 { OP_CALL, "call", ARG_ABSADDR },
6260 { OP_RETURN, "return", ARG_NON },
6261 { OP_CONDITION, "condition", ARG_SPECIAL },
6262 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6263 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6264 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6265 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6267 "state-check-anychar-ml*", ARG_STATE_CHECK },
6268 { -1, "", ARG_NON }
6269};
6270
6271static const char*
6272op2name(int opcode)
6273{
6274 int i;
6275
6276 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6277 if (opcode == OnigOpInfo[i].opcode)
6278 return OnigOpInfo[i].name;
6279 }
6280 return "";
6281}
6282
6283static int
6284op2arg_type(int opcode)
6285{
6286 int i;
6287
6288 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6289 if (opcode == OnigOpInfo[i].opcode)
6290 return OnigOpInfo[i].arg_type;
6291 }
6292 return ARG_SPECIAL;
6293}
6294
6295# ifdef ONIG_DEBUG_PARSE_TREE
6296static void
6297Indent(FILE* f, int indent)
6298{
6299 int i;
6300 for (i = 0; i < indent; i++) putc(' ', f);
6301}
6302# endif /* ONIG_DEBUG_PARSE_TREE */
6303
6304static void
6305p_string(FILE* f, ptrdiff_t len, UChar* s)
6306{
6307 fputs(":", f);
6308 while (len-- > 0) { fputc(*s++, f); }
6309}
6310
6311static void
6312p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6313{
6314 int x = len * mb_len;
6315
6316 fprintf(f, ":%d:", len);
6317 while (x-- > 0) { fputc(*s++, f); }
6318}
6319
6320extern void
6321onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6322 OnigEncoding enc)
6323{
6324 int i, n, arg_type;
6325 RelAddrType addr;
6327 MemNumType mem;
6330 UChar *q;
6331
6332 fprintf(f, "[%s", op2name(*bp));
6333 arg_type = op2arg_type(*bp);
6334 if (arg_type != ARG_SPECIAL) {
6335 bp++;
6336 switch (arg_type) {
6337 case ARG_NON:
6338 break;
6339 case ARG_RELADDR:
6340 GET_RELADDR_INC(addr, bp);
6341 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6342 break;
6343 case ARG_ABSADDR:
6344 GET_ABSADDR_INC(addr, bp);
6345 fprintf(f, ":(%d)", addr);
6346 break;
6347 case ARG_LENGTH:
6349 fprintf(f, ":%d", len);
6350 break;
6351 case ARG_MEMNUM:
6352 mem = *((MemNumType* )bp);
6353 bp += SIZE_MEMNUM;
6354 fprintf(f, ":%d", mem);
6355 break;
6356 case ARG_OPTION:
6357 {
6358 OnigOptionType option = *((OnigOptionType* )bp);
6359 bp += SIZE_OPTION;
6360 fprintf(f, ":%d", option);
6361 }
6362 break;
6363
6364 case ARG_STATE_CHECK:
6365 scn = *((StateCheckNumType* )bp);
6367 fprintf(f, ":%d", scn);
6368 break;
6369 }
6370 }
6371 else {
6372 switch (*bp++) {
6373 case OP_EXACT1:
6376 p_string(f, 1, bp++); break;
6377 case OP_EXACT2:
6378 p_string(f, 2, bp); bp += 2; break;
6379 case OP_EXACT3:
6380 p_string(f, 3, bp); bp += 3; break;
6381 case OP_EXACT4:
6382 p_string(f, 4, bp); bp += 4; break;
6383 case OP_EXACT5:
6384 p_string(f, 5, bp); bp += 5; break;
6385 case OP_EXACTN:
6387 p_len_string(f, len, 1, bp);
6388 bp += len;
6389 break;
6390
6391 case OP_EXACTMB2N1:
6392 p_string(f, 2, bp); bp += 2; break;
6393 case OP_EXACTMB2N2:
6394 p_string(f, 4, bp); bp += 4; break;
6395 case OP_EXACTMB2N3:
6396 p_string(f, 6, bp); bp += 6; break;
6397 case OP_EXACTMB2N:
6399 p_len_string(f, len, 2, bp);
6400 bp += len * 2;
6401 break;
6402 case OP_EXACTMB3N:
6404 p_len_string(f, len, 3, bp);
6405 bp += len * 3;
6406 break;
6407 case OP_EXACTMBN:
6408 {
6409 int mb_len;
6410
6411 GET_LENGTH_INC(mb_len, bp);
6413 fprintf(f, ":%d:%d:", mb_len, len);
6414 n = len * mb_len;
6415 while (n-- > 0) { fputc(*bp++, f); }
6416 }
6417 break;
6418
6419 case OP_EXACT1_IC:
6420 len = enclen(enc, bp, bpend);
6421 p_string(f, len, bp);
6422 bp += len;
6423 break;
6424 case OP_EXACTN_IC:
6426 p_len_string(f, len, 1, bp);
6427 bp += len;
6428 break;
6429
6430 case OP_CCLASS:
6431 n = bitset_on_num((BitSetRef )bp);
6432 bp += SIZE_BITSET;
6433 fprintf(f, ":%d", n);
6434 break;
6435
6436 case OP_CCLASS_NOT:
6437 n = bitset_on_num((BitSetRef )bp);
6438 bp += SIZE_BITSET;
6439 fprintf(f, ":%d", n);
6440 break;
6441
6442 case OP_CCLASS_MB:
6443 case OP_CCLASS_MB_NOT:
6445 q = bp;
6446# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6447 ALIGNMENT_RIGHT(q);
6448# endif
6449 GET_CODE_POINT(code, q);
6450 bp += len;
6451 fprintf(f, ":%d:%d", (int )code, len);
6452 break;
6453
6454 case OP_CCLASS_MIX:
6455 case OP_CCLASS_MIX_NOT:
6456 n = bitset_on_num((BitSetRef )bp);
6457 bp += SIZE_BITSET;
6459 q = bp;
6460# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6461 ALIGNMENT_RIGHT(q);
6462# endif
6463 GET_CODE_POINT(code, q);
6464 bp += len;
6465 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6466 break;
6467
6468 case OP_BACKREFN_IC:
6469 mem = *((MemNumType* )bp);
6470 bp += SIZE_MEMNUM;
6471 fprintf(f, ":%d", mem);
6472 break;
6473
6475 case OP_BACKREF_MULTI:
6476 fputs(" ", f);
6478 for (i = 0; i < len; i++) {
6479 GET_MEMNUM_INC(mem, bp);
6480 if (i > 0) fputs(", ", f);
6481 fprintf(f, "%d", mem);
6482 }
6483 break;
6484
6486 {
6487 OnigOptionType option;
6488 LengthType level;
6489
6490 GET_OPTION_INC(option, bp);
6491 fprintf(f, ":%d", option);
6492 GET_LENGTH_INC(level, bp);
6493 fprintf(f, ":%d", level);
6494
6495 fputs(" ", f);
6497 for (i = 0; i < len; i++) {
6498 GET_MEMNUM_INC(mem, bp);
6499 if (i > 0) fputs(", ", f);
6500 fprintf(f, "%d", mem);
6501 }
6502 }
6503 break;
6504
6505 case OP_REPEAT:
6506 case OP_REPEAT_NG:
6507 {
6508 mem = *((MemNumType* )bp);
6509 bp += SIZE_MEMNUM;
6510 addr = *((RelAddrType* )bp);
6511 bp += SIZE_RELADDR;
6512 fprintf(f, ":%d:%d", mem, addr);
6513 }
6514 break;
6515
6518 addr = *((RelAddrType* )bp);
6519 bp += SIZE_RELADDR;
6520 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6521 p_string(f, 1, bp);
6522 bp += 1;
6523 break;
6524
6525 case OP_LOOK_BEHIND:
6527 fprintf(f, ":%d", len);
6528 break;
6529
6531 GET_RELADDR_INC(addr, bp);
6533 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6534 break;
6535
6538 scn = *((StateCheckNumType* )bp);
6540 addr = *((RelAddrType* )bp);
6541 bp += SIZE_RELADDR;
6542 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6543 break;
6544
6545 case OP_CONDITION:
6546 GET_MEMNUM_INC(mem, bp);
6547 GET_RELADDR_INC(addr, bp);
6548 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6549 break;
6550
6551 default:
6552 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6553 bp[-1]);
6554 }
6555 }
6556 fputs("]", f);
6557 if (nextp) *nextp = bp;
6558}
6559
6560# ifdef ONIG_DEBUG_COMPILE
6561static void
6562print_compiled_byte_code_list(FILE* f, regex_t* reg)
6563{
6564 int ncode;
6565 UChar* bp = reg->p;
6566 UChar* end = reg->p + reg->used;
6567
6568 fprintf(f, "code length: %d", reg->used);
6569
6570 ncode = -1;
6571 while (bp < end) {
6572 ncode++;
6573 if (ncode % 5 == 0)
6574 fprintf(f, "\n%ld:", bp - reg->p);
6575 else
6576 fprintf(f, " %ld:", bp - reg->p);
6577 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6578 }
6579
6580 fprintf(f, "\n");
6581}
6582# endif /* ONIG_DEBUG_COMPILE */
6583
6584# ifdef ONIG_DEBUG_PARSE_TREE
6585static void
6586print_indent_tree(FILE* f, Node* node, int indent)
6587{
6588 int i, type, container_p = 0;
6589 int add = 3;
6590 UChar* p;
6591
6592 Indent(f, indent);
6593 if (IS_NULL(node)) {
6594 fprintf(f, "ERROR: null node!!!\n");
6595 exit (0);
6596 }
6597
6598 type = NTYPE(node);
6599 switch (type) {
6600 case NT_LIST:
6601 case NT_ALT:
6602 if (NTYPE(node) == NT_LIST)
6603 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6604 else
6605 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6606
6607 print_indent_tree(f, NCAR(node), indent + add);
6608 while (IS_NOT_NULL(node = NCDR(node))) {
6609 if (NTYPE(node) != type) {
6610 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6611 exit(0);
6612 }
6613 print_indent_tree(f, NCAR(node), indent + add);
6614 }
6615 break;
6616
6617 case NT_STR:
6618 fprintf(f, "<string%s:%"PRIxPTR">",
6619 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6620 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6621 if (*p >= 0x20 && *p < 0x7f)
6622 fputc(*p, f);
6623 else {
6624 fprintf(f, " 0x%02x", *p);
6625 }
6626 }
6627 break;
6628
6629 case NT_CCLASS:
6630 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6631 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6632 if (NCCLASS(node)->mbuf) {
6633 BBuf* bbuf = NCCLASS(node)->mbuf;
6634 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6635 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6636 fprintf(f, "%d", *data++);
6637 for (; data < end; data+=2) {
6638 fprintf(f, ",");
6639 fprintf(f, "%04x-%04x", data[0], data[1]);
6640 }
6641 }
6642 break;
6643
6644 case NT_CTYPE:
6645 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6646 switch (NCTYPE(node)->ctype) {
6647 case ONIGENC_CTYPE_WORD:
6648 if (NCTYPE(node)->not != 0)
6649 fputs("not word", f);
6650 else
6651 fputs("word", f);
6652 break;
6653
6654 default:
6655 fprintf(f, "ERROR: undefined ctype.\n");
6656 exit(0);
6657 }
6658 break;
6659
6660 case NT_CANY:
6661 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6662 break;
6663
6664 case NT_ANCHOR:
6665 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6666 switch (NANCHOR(node)->type) {
6667 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6668 case ANCHOR_END_BUF: fputs("end buf", f); break;
6669 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6670 case ANCHOR_END_LINE: fputs("end line", f); break;
6671 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6672 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6673
6674 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6675 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6676# ifdef USE_WORD_BEGIN_END
6677 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6678 case ANCHOR_WORD_END: fputs("word end", f); break;
6679# endif
6680 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6681 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6682 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6683 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6684 case ANCHOR_KEEP: fputs("keep",f); break;
6685
6686 default:
6687 fprintf(f, "ERROR: undefined anchor type.\n");
6688 break;
6689 }
6690 break;
6691
6692 case NT_BREF:
6693 {
6694 int* p;
6695 BRefNode* br = NBREF(node);
6696 p = BACKREFS_P(br);
6697 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6698 for (i = 0; i < br->back_num; i++) {
6699 if (i > 0) fputs(", ", f);
6700 fprintf(f, "%d", p[i]);
6701 }
6702 }
6703 break;
6704
6705# ifdef USE_SUBEXP_CALL
6706 case NT_CALL:
6707 {
6708 CallNode* cn = NCALL(node);
6709 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6710 p_string(f, cn->name_end - cn->name, cn->name);
6711 }
6712 break;
6713# endif
6714
6715 case NT_QTFR:
6716 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6717 NQTFR(node)->lower, NQTFR(node)->upper,
6718 (NQTFR(node)->greedy ? "" : "?"));
6719 print_indent_tree(f, NQTFR(node)->target, indent + add);
6720 break;
6721
6722 case NT_ENCLOSE:
6723 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6724 switch (NENCLOSE(node)->type) {
6725 case ENCLOSE_OPTION:
6726 fprintf(f, "option:%d", NENCLOSE(node)->option);
6727 break;
6728 case ENCLOSE_MEMORY:
6729 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6730 break;
6732 fprintf(f, "stop-bt");
6733 break;
6734 case ENCLOSE_CONDITION:
6735 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6736 break;
6737 case ENCLOSE_ABSENT:
6738 fprintf(f, "absent");
6739 break;
6740
6741 default:
6742 break;
6743 }
6744 fprintf(f, "\n");
6745 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6746 break;
6747
6748 default:
6749 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6750 break;
6751 }
6752
6753 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6754 type != NT_ENCLOSE)
6755 fprintf(f, "\n");
6756
6757 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6758
6759 fflush(f);
6760}
6761
6762static void
6763print_tree(FILE* f, Node* node)
6764{
6765 print_indent_tree(f, node, 0);
6766}
6767# endif /* ONIG_DEBUG_PARSE_TREE */
6768#endif /* ONIG_DEBUG */
#define add(x, y)
Definition: date_strftime.c:23
struct RIMemo * ptr
Definition: debug.c:88
#define d1
int root
Definition: enough.c:226
big_t * num
Definition: enough.c:232
int max
Definition: enough.c:225
size_t map(int syms, int left, int len)
Definition: enough.c:237
uint8_t len
Definition: escape.c:17
#define numberof(array)
Definition: etc.c:649
#define PRIuPTR
Definition: ffitest.h:126
void skip(file *in, unsigned n)
Definition: gzappend.c:202
#define bp()
Definition: internal.h:105
#define PRIdPTR
Definition: inttypes.h:52
#define PRIxPTR
Definition: inttypes.h:56
voidpf void uLong size
Definition: ioapi.h:138
voidpf uLong offset
Definition: ioapi.h:144
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
voidpf void * buf
Definition: ioapi.h:138
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
const int id
Definition: nkf.c:209
#define ARG_UNUSED
Definition: nkf.h:179
#define TRUE
Definition: nkf.h:175
#define ONIG_INFINITE_DISTANCE
Definition: onigmo.h:85
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: onigmo.h:378
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: onigmo.h:363
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
OnigRegexType regex_t
Definition: onigmo.h:803
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:685
unsigned int OnigCaseFoldType
Definition: onigmo.h:95
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
#define ONIGERR_INVALID_ARGUMENT
Definition: onigmo.h:640
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: onigmo.h:686
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: onigmo.h:324
#define ONIGERR_TYPE_BUG
Definition: onigmo.h:630
#define UChar
Definition: onigmo.h:76
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
#define ONIGENC_IS_UNDEF(enc)
Definition: onigmo.h:317
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: onigmo.h:131
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: onigmo.h:662
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: onigmo.h:458
ONIG_EXTERN int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, int **nums)
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: onigmo.h:700
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: onigmo.h:683
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: onigmo.h:675
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: onigmo.h:334
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: onigmo.h:332
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: onigmo.h:340
#define ONIG_CHAR_TABLE_SIZE
Definition: onigmo.h:753
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: onigmo.h:693
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: onigmo.h:598
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
ONIG_EXTERN int onigenc_init(void)
Definition: regenc.c:36
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: onigmo.h:290
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: onigmo.h:135
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
size_t OnigDistance
Definition: onigmo.h:82
#define ONIGENC_CASE_FOLD_MIN
Definition: onigmo.h:130
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: onigmo.h:637
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: onigmo.h:322
#define IN_VAR_REPEAT
Definition: regcomp.c:3884
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6098
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5984
#define RECURSION_EXIST
Definition: regcomp.c:2909
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define EXPAND_STRING_MAX_LENGTH
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5940
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:742
#define RECURSION_INFINITE
Definition: regcomp.c:2910
int onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
Definition: regcomp.c:6020
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
#define REPEAT_RANGE_ALLOC
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2435
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5998
#define ALLOWED_ANCHOR_IN_LB_NOT
#define ALLOWED_TYPE_IN_LB
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:163
int onig_end(void)
Definition: regcomp.c:6080
#define ALLOWED_ANCHOR_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5651
#define COMP_EM_BASE
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5693
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6142
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
#define IN_REPEAT
Definition: regcomp.c:3883
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6119
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2434
#define IN_NOT
Definition: regcomp.c:3882
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo)
Definition: regcomp.c:5725
#define ALLOWED_ENCLOSE_IN_LB
#define IN_CALL
Definition: regcomp.c:3885
int onig_init(void)
Definition: regcomp.c:6026
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:336
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5678
void onig_add_end_call(void(*func)(void))
Definition: regcomp.c:6050
#define COMPILE_INIT_SIZE
#define FOUND_CALLED_NODE
void onig_free(regex_t *reg)
Definition: regcomp.c:5668
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5702
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4976
#define IN_RECCALL
Definition: regcomp.c:3886
#define IN_ALT
Definition: regcomp.c:3881
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define CKN_ON
Definition: regcomp.c:743
int onig_compile_ruby(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5734
#define NULL
Definition: regenc.h:69
#define enclen(enc, p, e)
Definition: regenc.h:93
int AbsAddrType
Definition: regint.h:668
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:323
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:711
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define SIZE_OP_PUSH_ABSENT_POS
Definition: regint.h:737
#define SIZE_OP_ABSENT_END
Definition: regint.h:739
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:728
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define USE_SUBEXP_CALL
Definition: regint.h:70
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:349
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:403
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:347
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:343
#define GET_OPTION_INC(option, p)
Definition: regint.h:692
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:356
int LengthType
Definition: regint.h:669
short int MemNumType
Definition: regint.h:671
#define OPT_EXACT_MAXLEN
Definition: regint.h:90
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:710
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:690
#define STACK_POP_LEVEL_ALL
Definition: regint.h:339
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:723
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:344
#define BITSET_AT(bs, pos)
Definition: regint.h:435
#define SIZE_OP_MEMORY_END
Definition: regint.h:725
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define SIZE_RELADDR
Definition: regint.h:676
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
#define SIZE_BITSET
Definition: regint.h:425
#define IS_NOT_NULL(p)
Definition: regint.h:299
#define ANCHOR_END_LINE
Definition: regint.h:532
#define SIZE_OP_ABSENT
Definition: regint.h:738
#define SIZE_OP_PUSH_POS
Definition: regint.h:714
#define SIZE_OP_PUSH
Definition: regint.h:708
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:493
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
#define SIZE_OP_RETURN
Definition: regint.h:735
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:727
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:730
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:345
#define SIZE_OP_MEMORY_START
Definition: regint.h:721
#define SIZE_OPCODE
Definition: regint.h:675
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:338
#define SIZE_POINTER
Definition: regint.h:684
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:360
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
#define SIZE_OP_FAIL
Definition: regint.h:720
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:724
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:544
#define STACK_POP_LEVEL_FREE
Definition: regint.h:337
#define SIZE_OP_POP_POS
Definition: regint.h:716
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:715
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:321
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:706
#define GET_LENGTH_INC(len, p)
Definition: regint.h:689
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
#define BITSET_SIZE
Definition: regint.h:415
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
unsigned int BitStatusType
Definition: regint.h:352
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
#define IS_NULL(p)
Definition: regint.h:298
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
void onig_transfer(regex_t *to, regex_t *from)
#define SIZE_OP_POP
Definition: regint.h:709
#define IS_MULTILINE(option)
Definition: regint.h:382
#define BBUF_ADD1(buf, byte)
Definition: regint.h:492
int RelAddrType
Definition: regint.h:667
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:705
#define SIZE_OP_JUMP
Definition: regint.h:707
#define ANCHOR_PREC_READ
Definition: regint.h:538
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:329
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:346
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:722
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
short int StateCheckNumType
Definition: regint.h:672
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:342
#define ANCHOR_KEEP
Definition: regint.h:546
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:491
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:732
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:494
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:729
#define SIZE_MEMNUM
Definition: regint.h:679
#define SIZE_LENGTH
Definition: regint.h:678
@ OP_EXACTMB3N
Definition: regint.h:563
@ OP_END
Definition: regint.h:551
@ OP_LOOK_BEHIND
Definition: regint.h:644
@ OP_CALL
Definition: regint.h:651
@ OP_ASCII_WORD
Definition: regint.h:590
@ OP_STATE_CHECK_PUSH_OR_JUMP
Definition: regint.h:657
@ OP_PUSH_POS_NOT
Definition: regint.h:640
@ OP_STATE_CHECK_ANYCHAR_STAR
Definition: regint.h:659
@ OP_REPEAT_INC_NG
Definition: regint.h:630
@ OP_STATE_CHECK
Definition: regint.h:658
@ OP_MEMORY_END_REC
Definition: regint.h:617
@ OP_REPEAT_INC
Definition: regint.h:629
@ OP_ANYCHAR_ML
Definition: regint.h:577
@ OP_POP_POS
Definition: regint.h:639
@ OP_WORD_END
Definition: regint.h:588
@ OP_WORD_BEGIN
Definition: regint.h:587
@ OP_POP_STOP_BT
Definition: regint.h:643
@ OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:580
@ OP_BACKREFN
Definition: regint.h:606
@ OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:645
@ OP_PUSH_STOP_BT
Definition: regint.h:642
@ OP_EXACTMBN
Definition: regint.h:564
@ OP_EXACTMB2N
Definition: regint.h:562
@ OP_MEMORY_START
Definition: regint.h:612
@ OP_SET_OPTION
Definition: regint.h:664
@ OP_NULL_CHECK_START
Definition: regint.h:633
@ OP_BEGIN_LINE
Definition: regint.h:599
@ OP_WORD_BOUND
Definition: regint.h:585
@ OP_NOT_ASCII_WORD_BOUND
Definition: regint.h:593
@ OP_ANYCHAR
Definition: regint.h:576
@ OP_SET_OPTION_PUSH
Definition: regint.h:663
@ OP_EXACT4
Definition: regint.h:556
@ OP_EXACT5
Definition: regint.h:557
@ OP_REPEAT
Definition: regint.h:627
@ OP_EXACT2
Definition: regint.h:554
@ OP_MEMORY_END
Definition: regint.h:616
@ OP_ANYCHAR_ML_STAR
Definition: regint.h:579
@ OP_EXACTN
Definition: regint.h:558
@ OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:625
@ OP_ANYCHAR_STAR
Definition: regint.h:578
@ OP_JUMP
Definition: regint.h:622
@ OP_END_LINE
Definition: regint.h:600
@ OP_MEMORY_END_PUSH_REC
Definition: regint.h:615
@ OP_BEGIN_POSITION
Definition: regint.h:602
@ OP_PUSH_POS
Definition: regint.h:638
@ OP_NOT_ASCII_WORD
Definition: regint.h:591
@ OP_NULL_CHECK_END_MEMST_PUSH
Definition: regint.h:636
@ OP_END_BUF
Definition: regint.h:598
@ OP_EXACT1
Definition: regint.h:553
@ OP_CCLASS
Definition: regint.h:569
@ OP_WORD
Definition: regint.h:583
@ OP_FINISH
Definition: regint.h:550
@ OP_ASCII_WORD_END
Definition: regint.h:595
@ OP_BACKREF1
Definition: regint.h:604
@ OP_PUSH
Definition: regint.h:623
@ OP_BACKREFN_IC
Definition: regint.h:607
@ OP_PUSH_ABSENT_POS
Definition: regint.h:647
@ OP_MEMORY_END_PUSH
Definition: regint.h:614
@ OP_ASCII_WORD_BOUND
Definition: regint.h:592
@ OP_STATE_CHECK_PUSH
Definition: regint.h:656
@ OP_CCLASS_MIX_NOT
Definition: regint.h:574
@ OP_CCLASS_MB
Definition: regint.h:570
@ OP_SEMI_END_BUF
Definition: regint.h:601
@ OP_STATE_CHECK_ANYCHAR_ML_STAR
Definition: regint.h:660
@ OP_CONDITION
Definition: regint.h:654
@ OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:626
@ OP_ABSENT
Definition: regint.h:648
@ OP_NOT_WORD
Definition: regint.h:584
@ OP_ASCII_WORD_BEGIN
Definition: regint.h:594
@ OP_REPEAT_NG
Definition: regint.h:628
@ OP_BACKREF_MULTI_IC
Definition: regint.h:609
@ OP_REPEAT_INC_SG
Definition: regint.h:631
@ OP_EXACT3
Definition: regint.h:555
@ OP_EXACTMB2N1
Definition: regint.h:559
@ OP_CCLASS_MB_NOT
Definition: regint.h:573
@ OP_FAIL_POS
Definition: regint.h:641
@ OP_EXACT1_IC
Definition: regint.h:566
@ OP_EXACTMB2N2
Definition: regint.h:560
@ OP_BEGIN_BUF
Definition: regint.h:597
@ OP_ANYCHAR_ML_STAR_PEEK_NEXT
Definition: regint.h:581
@ OP_RETURN
Definition: regint.h:652
@ OP_MEMORY_START_PUSH
Definition: regint.h:613
@ OP_KEEP
Definition: regint.h:619
@ OP_FAIL
Definition: regint.h:621
@ OP_NOT_WORD_BOUND
Definition: regint.h:586
@ OP_REPEAT_INC_NG_SG
Definition: regint.h:632
@ OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:646
@ OP_BACKREF2
Definition: regint.h:605
@ OP_EXACTMB2N3
Definition: regint.h:561
@ OP_NULL_CHECK_END
Definition: regint.h:634
@ OP_BACKREF_MULTI
Definition: regint.h:608
@ OP_CCLASS_NOT
Definition: regint.h:572
@ OP_NULL_CHECK_END_MEMST
Definition: regint.h:635
@ OP_BACKREF_WITH_LEVEL
Definition: regint.h:610
@ OP_CCLASS_MIX
Definition: regint.h:571
@ OP_EXACTN_IC
Definition: regint.h:567
@ OP_ABSENT_END
Definition: regint.h:649
@ OP_POP
Definition: regint.h:624
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:876
#define IS_FIND_CONDITION(option)
Definition: regint.h:387
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:719
#define xalloca
Definition: regint.h:213
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:680
void * PointerType
Definition: regint.h:673
#define SIZE_OP_REPEAT_INC
Definition: regint.h:712
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:543
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:348
#define xmemcpy
Definition: regint.h:202
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:688
Bits * BitSetRef
Definition: regint.h:423
#define SIZE_ABSADDR
Definition: regint.h:677
#define IS_IGNORECASE(option)
Definition: regint.h:383
#define SIZE_OP_FAIL_POS
Definition: regint.h:717
#define SIZE_OP_CONDITION
Definition: regint.h:736
#define SIZE_OPTION
Definition: regint.h:682
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:726
#define SIZE_OP_CALL
Definition: regint.h:734
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:405
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:357
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:731
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:687
#define ANCHOR_WORD_END
Definition: regint.h:537
#define ANCHOR_END_BUF
Definition: regint.h:530
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:477
#define SIZE_OP_SET_OPTION
Definition: regint.h:718
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:733
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6603
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6630
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
void onig_node_free(Node *node)
Definition: regparse.c:1062
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2203
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
#define NST_RECURSION
Definition: regparse.h:135
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:153
#define NST_ADDR_FIXED
Definition: regparse.h:137
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:163
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:165
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define NST_MEM_BACKREFED
Definition: regparse.h:133
#define NT_CANY
Definition: regparse.h:41
#define NSTR(node)
Definition: regparse.h:76
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:161
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:115
#define NT_ENCLOSE
Definition: regparse.h:44
#define NENCLOSE(node)
Definition: regparse.h:81
#define NT_QTFR
Definition: regparse.h:43
#define ENCLOSE_MEMORY
Definition: regparse.h:94
#define NT_CALL
Definition: regparse.h:48
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:148
#define NBREF(node)
Definition: regparse.h:79
#define NST_MAX_FIXED
Definition: regparse.h:129
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:154
#define NT_ANCHOR
Definition: regparse.h:45
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:158
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:92
#define NST_IN_REPEAT
Definition: regparse.h:140
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:147
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:164
#define NT_CTYPE
Definition: regparse.h:40
#define NCTYPE(node)
Definition: regparse.h:78
#define NULL_NODE
Definition: regparse.h:283
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:151
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:124
#define NST_CLEN_FIXED
Definition: regparse.h:130
#define SET_CALL_RECURSION(node)
Definition: regparse.h:160
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:149
#define ENCLOSE_CONDITION
Definition: regparse.h:97
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:152
#define NCCLASS(node)
Definition: regparse.h:77
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
#define NSTRING_LEN(node)
Definition: regparse.h:108
#define NST_MIN_FIXED
Definition: regparse.h:128
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:157
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:123
#define NT_CCLASS
Definition: regparse.h:39
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:134
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
#define NTYPE2BIT(type)
Definition: regparse.h:51
#define NST_CALLED
Definition: regparse.h:136
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:125
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:65
#define NT_LIST
Definition: regparse.h:46
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:150
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:145
#define NT_BREF
Definition: regparse.h:42
#define NCDR(node)
Definition: regparse.h:87
#define NST_MARK1
Definition: regparse.h:131
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:91
#define NCAR(node)
Definition: regparse.h:86
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:116
#define NTYPE(node)
Definition: regparse.h:69
#define NT_STR
Definition: regparse.h:38
#define NQTFR(node)
Definition: regparse.h:80
#define NT_ALT
Definition: regparse.h:47
#define NSTRING_IS_RAW(node)
Definition: regparse.h:114
#define NST_MARK2
Definition: regparse.h:132
#define NCALL(node)
Definition: regparse.h:84
#define BACKREFS_P(br)
Definition: regparse.h:119
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:112
#define NANCHOR(node)
Definition: regparse.h:82
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:155
#define f
Definition: regint.h:441
UChar * p
Definition: regint.h:442
unsigned int used
Definition: regint.h:443
int ascii_range
Definition: regparse.h:249
int type
Definition: regparse.h:246
int char_len
Definition: regparse.h:248
struct _Node * target
Definition: regparse.h:247
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:239
int state
Definition: regparse.h:237
int nest_level
Definition: regparse.h:241
int back_num
Definition: regparse.h:238
int * back_dynamic
Definition: regparse.h:240
BitSet bs
Definition: regint.h:807
BBuf * mbuf
Definition: regint.h:808
UChar * name
Definition: regparse.h:227
struct _Node * target
Definition: regparse.h:229
UChar * name_end
Definition: regparse.h:228
UnsetAddrList * unset_addr_list
Definition: regparse.h:230
int group_num
Definition: regparse.h:226
OnigDistance min_len
Definition: regparse.h:204
OnigOptionType option
Definition: regparse.h:200
int opt_count
Definition: regparse.h:207
int char_len
Definition: regparse.h:206
AbsAddrType call_addr
Definition: regparse.h:201
OnigDistance max_len
Definition: regparse.h:205
struct _Node * target
Definition: regparse.h:202
int regnum
Definition: regparse.h:199
OnigDistance min
Definition: regcomp.c:4366
OnigDistance max
Definition: regcomp.c:4367
MinMaxLen len
Definition: regcomp.c:4402
OptAncInfo anc
Definition: regcomp.c:4404
OptMapInfo map
Definition: regcomp.c:4409
OptExactInfo exm
Definition: regcomp.c:4406
OptExactInfo expr
Definition: regcomp.c:4407
OptExactInfo exb
Definition: regcomp.c:4405
const char * name
Definition: onigmo.h:162
struct OnigEndCallListItem * next
Definition: regint.h:880
void(* func)(void)
Definition: regint.h:881
OnigUChar * par
Definition: onigmo.h:740
OnigUChar * par_end
Definition: onigmo.h:741
OnigEncoding enc
Definition: onigmo.h:739
OnigOptionType options
Definition: onigmo.h:483
int right_anchor
Definition: regcomp.c:4380
int left_anchor
Definition: regcomp.c:4379
OnigEncoding enc
Definition: regcomp.c:4372
MinMaxLen mmd
Definition: regcomp.c:4371
ScanEnv * scan_env
Definition: regcomp.c:4375
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4374
OnigOptionType options
Definition: regcomp.c:4373
OptAncInfo anc
Definition: regcomp.c:4385
int ignore_case
Definition: regcomp.c:4388
MinMaxLen mmd
Definition: regcomp.c:4384
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4390
int reach_end
Definition: regcomp.c:4387
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4398
int value
Definition: regcomp.c:4397
OptAncInfo anc
Definition: regcomp.c:4395
MinMaxLen mmd
Definition: regcomp.c:4394
int lower
Definition: regparse.h:183
struct _Node * target
Definition: regparse.h:182
int target_empty_info
Definition: regparse.h:186
struct _Node * head_exact
Definition: regparse.h:187
int greedy
Definition: regparse.h:185
int is_referred
Definition: regparse.h:189
int upper
Definition: regparse.h:184
struct _Node * next_head_exact
Definition: regparse.h:188
int state
Definition: regparse.h:181
UChar * error
Definition: regparse.h:301
BitStatusType bt_mem_end
Definition: regparse.h:297
int num_call
Definition: regparse.h:307
Node ** mem_nodes_dynamic
Definition: regparse.h:314
BitStatusType bt_mem_start
Definition: regparse.h:296
int sourceline
Definition: regparse.h:325
int num_mem
Definition: regparse.h:308
UnsetAddrList * unset_addr_list
Definition: regparse.h:305
int num_named
Definition: regparse.h:310
UChar * error_end
Definition: regparse.h:302
BitStatusType backrefed_mem
Definition: regparse.h:298
OnigEncoding enc
Definition: regparse.h:293
BitStatusType capture_history
Definition: regparse.h:295
const char * sourcefile
Definition: regparse.h:324
const OnigSyntaxType * syntax
Definition: regparse.h:294
UChar * s
Definition: regparse.h:172
unsigned int flag
Definition: regparse.h:174
UChar * end
Definition: regparse.h:173
int capa
Definition: regparse.h:175
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:176
struct _Node * target
Definition: regparse.h:214
int offset
Definition: regparse.h:213
UnsetAddr * us
Definition: regparse.h:220
Definition: inftree9.h:24
OnigDistance dmin
Definition: onigmo.h:793
unsigned char * exact_end
Definition: onigmo.h:789
OnigEncoding enc
Definition: onigmo.h:776
unsigned int capture_history
Definition: onigmo.h:766
unsigned int bt_mem_start
Definition: onigmo.h:767
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
unsigned int used
Definition: onigmo.h:758
struct re_pattern_buffer * chain
Definition: onigmo.h:797
int num_null_check
Definition: onigmo.h:763
int repeat_range_alloc
Definition: onigmo.h:770
int num_comb_exp_check
Definition: onigmo.h:764
OnigRepeatRange * repeat_range
Definition: onigmo.h:774
int * int_map_backward
Definition: onigmo.h:792
unsigned int bt_mem_end
Definition: onigmo.h:768
int stack_pop_level
Definition: onigmo.h:769
OnigDistance anchor_dmax
Definition: onigmo.h:786
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: onigmo.h:790
unsigned int alloc
Definition: onigmo.h:759
OnigOptionType options
Definition: onigmo.h:772
unsigned char * p
Definition: onigmo.h:757
unsigned char * exact
Definition: onigmo.h:788
OnigDistance dmax
Definition: onigmo.h:794
OnigDistance anchor_dmin
Definition: onigmo.h:785
OnigPosition * beg
Definition: onigmo.h:719
int allocated
Definition: onigmo.h:717
OnigPosition * end
Definition: onigmo.h:720
Definition: blast.c:41
int err
Definition: win32.c:142
#define env
int intptr_t
Definition: win32.h:90
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
#define xfree
Definition: xmalloc.h:49
#define xrealloc
Definition: xmalloc.h:47
#define xmalloc
Definition: xmalloc.h:44