Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
regparse.c
Go to the documentation of this file.
1/**********************************************************************
2 regparse.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32#include <stdarg.h>
33
34#define WARN_BUFSIZE 256
35
36#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
37
38
55#ifndef RUBY
57#endif
74 ,
75 {
76 (OnigCodePoint )'\\' /* esc */
77 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
78 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
79 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
80 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
81 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
82 }
83};
84
86
87extern void onig_null_warn(const char* s ARG_UNUSED) { }
88
89#ifdef DEFAULT_WARN_FUNCTION
90static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
91#else
92static OnigWarnFunc onig_warn = onig_null_warn;
93#endif
94
95#ifdef DEFAULT_VERB_WARN_FUNCTION
96static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
97#else
98static OnigWarnFunc onig_verb_warn = onig_null_warn;
99#endif
100
102{
103 onig_warn = f;
104}
105
107{
108 onig_verb_warn = f;
109}
110
111static void CC_DUP_WARN(ScanEnv *env, OnigCodePoint from, OnigCodePoint to);
112
113
114static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
115
116extern unsigned int
118{
119 return ParseDepthLimit;
120}
121
122extern int
123onig_set_parse_depth_limit(unsigned int depth)
124{
125 if (depth == 0)
126 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
127 else
128 ParseDepthLimit = depth;
129 return 0;
130}
131
132
133static void
134bbuf_free(BBuf* bbuf)
135{
136 if (IS_NOT_NULL(bbuf)) {
137 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
138 xfree(bbuf);
139 }
140}
141
142static int
143bbuf_clone(BBuf** rto, BBuf* from)
144{
145 int r;
146 BBuf *to;
147
148 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
150 r = BBUF_INIT(to, from->alloc);
151 if (r != 0) return r;
152 to->used = from->used;
153 xmemcpy(to->p, from->p, from->used);
154 return 0;
155}
156
157#define BACKREF_REL_TO_ABS(rel_no, env) \
158 ((env)->num_mem + 1 + (rel_no))
159
160#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
161
162#define MBCODE_START_POS(enc) \
163 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
164
165#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
167
168#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
171 if (r) return r;\
172 }\
173} while (0)
174
175
176#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \
178 BS_ROOM(bs, pos) |= BS_BIT(pos); \
179} while (0)
180
181#define BITSET_IS_EMPTY(bs,empty) do {\
182 int i;\
183 empty = 1;\
184 for (i = 0; i < BITSET_SIZE; i++) {\
185 if ((bs)[i] != 0) {\
186 empty = 0; break;\
187 }\
188 }\
189} while (0)
190
191static void
192bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
193{
194 int i;
195 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
197 }
198}
199
200#if 0
201static void
202bitset_set_all(BitSetRef bs)
203{
204 int i;
205 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
206}
207#endif
208
209static void
210bitset_invert(BitSetRef bs)
211{
212 int i;
213 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
214}
215
216static void
217bitset_invert_to(BitSetRef from, BitSetRef to)
218{
219 int i;
220 for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
221}
222
223static void
224bitset_and(BitSetRef dest, BitSetRef bs)
225{
226 int i;
227 for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
228}
229
230static void
231bitset_or(BitSetRef dest, BitSetRef bs)
232{
233 int i;
234 for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
235}
236
237static void
238bitset_copy(BitSetRef dest, BitSetRef bs)
239{
240 int i;
241 for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
242}
243
244#if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
245extern int
246onig_strncmp(const UChar* s1, const UChar* s2, int n)
247{
248 int x;
249
250 while (n-- > 0) {
251 x = *s2++ - *s1++;
252 if (x) return x;
253 }
254 return 0;
255}
256#endif
257
258extern void
259onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
260{
261 ptrdiff_t len = end - src;
262 if (len > 0) {
263 xmemcpy(dest, src, len);
264 dest[len] = (UChar )0;
265 }
266}
267
268#ifdef USE_NAMED_GROUP
269static UChar*
270strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
271{
272 ptrdiff_t slen;
273 int term_len, i;
274 UChar *r;
275
276 slen = end - s;
277 term_len = ONIGENC_MBC_MINLEN(enc);
278
279 r = (UChar* )xmalloc(slen + term_len);
281 xmemcpy(r, s, slen);
282
283 for (i = 0; i < term_len; i++)
284 r[slen + i] = (UChar )0;
285
286 return r;
287}
288#endif
289
290/* scan pattern methods */
291#define PEND_VALUE 0
292
293#ifdef __GNUC__
294/* get rid of Wunused-but-set-variable and Wuninitialized */
295# define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
296#else
297# define PFETCH_READY UChar* pfetch_prev
298#endif
299#define PEND (p < end ? 0 : 1)
300#define PUNFETCH p = pfetch_prev
301#define PINC do { \
302 pfetch_prev = p; \
303 p += enclen(enc, p, end); \
304} while (0)
305#define PFETCH(c) do { \
306 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
307 pfetch_prev = p; \
308 p += enclen(enc, p, end); \
309} while (0)
310
311#define PINC_S do { \
312 p += enclen(enc, p, end); \
313} while (0)
314#define PFETCH_S(c) do { \
315 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316 p += enclen(enc, p, end); \
317} while (0)
318
319#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
321
322static UChar*
323strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
324 size_t capa)
325{
326 UChar* r;
327
328 if (dest)
329 r = (UChar* )xrealloc(dest, capa + 1);
330 else
331 r = (UChar* )xmalloc(capa + 1);
332
334 onig_strcpy(r + (dest_end - dest), src, src_end);
335 return r;
336}
337
338/* dest on static area */
339static UChar*
340strcat_capa_from_static(UChar* dest, UChar* dest_end,
341 const UChar* src, const UChar* src_end, size_t capa)
342{
343 UChar* r;
344
345 r = (UChar* )xmalloc(capa + 1);
347 onig_strcpy(r, dest, dest_end);
348 onig_strcpy(r + (dest_end - dest), src, src_end);
349 return r;
350}
351
352
353#ifdef USE_ST_LIBRARY
354
355# ifdef RUBY
356# include "ruby/st.h"
357# else
358# include "st.h"
359# endif
360
361typedef struct {
362 const UChar* s;
363 const UChar* end;
365
366static int
367str_end_cmp(st_data_t xp, st_data_t yp)
368{
369 const st_str_end_key *x, *y;
370 const UChar *p, *q;
371 int c;
372
373 x = (const st_str_end_key *)xp;
374 y = (const st_str_end_key *)yp;
375 if ((x->end - x->s) != (y->end - y->s))
376 return 1;
377
378 p = x->s;
379 q = y->s;
380 while (p < x->end) {
381 c = (int )*p - (int )*q;
382 if (c != 0) return c;
383
384 p++; q++;
385 }
386
387 return 0;
388}
389
390static st_index_t
391str_end_hash(st_data_t xp)
392{
393 const st_str_end_key *x = (const st_str_end_key *)xp;
394 const UChar *p;
395 st_index_t val = 0;
396
397 p = x->s;
398 while (p < x->end) {
399 val = val * 997 + (int )*p++;
400 }
401
402 return val + (val >> 5);
403}
404
405extern hash_table_type*
407{
408 static const struct st_hash_type hashType = {
409 str_end_cmp,
410 str_end_hash,
411 };
412
413 return (hash_table_type* )
414 onig_st_init_table_with_size(&hashType, size);
415}
416
417extern int
419 const UChar* end_key, hash_data_type *value)
420{
422
423 key.s = (UChar* )str_key;
424 key.end = (UChar* )end_key;
425
426 return onig_st_lookup(table, (st_data_t )(&key), value);
427}
428
429extern int
431 const UChar* end_key, hash_data_type value)
432{
434 int result;
435
437 key->s = (UChar* )str_key;
438 key->end = (UChar* )end_key;
439 result = onig_st_insert(table, (st_data_t )key, value);
440 if (result) {
441 xfree(key);
442 }
443 return result;
444}
445
446#endif /* USE_ST_LIBRARY */
447
448
449#ifdef USE_NAMED_GROUP
450
451# define INIT_NAME_BACKREFS_ALLOC_NUM 8
452
453typedef struct {
455 size_t name_len; /* byte length */
456 int back_num; /* number of backrefs */
460} NameEntry;
461
462# ifdef USE_ST_LIBRARY
463
465typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
466
467# ifdef ONIG_DEBUG
468static int
469i_print_name_entry(UChar* key, NameEntry* e, void* arg)
470{
471 int i;
472 FILE* fp = (FILE* )arg;
473
474 fprintf(fp, "%s: ", e->name);
475 if (e->back_num == 0)
476 fputs("-", fp);
477 else if (e->back_num == 1)
478 fprintf(fp, "%d", e->back_ref1);
479 else {
480 for (i = 0; i < e->back_num; i++) {
481 if (i > 0) fprintf(fp, ", ");
482 fprintf(fp, "%d", e->back_refs[i]);
483 }
484 }
485 fputs("\n", fp);
486 return ST_CONTINUE;
487}
488
489extern int
490onig_print_names(FILE* fp, regex_t* reg)
491{
492 NameTable* t = (NameTable* )reg->name_table;
493
494 if (IS_NOT_NULL(t)) {
495 fprintf(fp, "name table\n");
496 onig_st_foreach(t, (st_foreach_callback_func *)i_print_name_entry, (HashDataType )fp);
497 fputs("\n", fp);
498 }
499 return 0;
500}
501# endif /* ONIG_DEBUG */
502
503static int
504i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
505{
506 xfree(e->name);
508 xfree(key);
509 xfree(e);
510 return ST_DELETE;
511}
512
513static int
514names_clear(regex_t* reg)
515{
516 NameTable* t = (NameTable* )reg->name_table;
517
518 if (IS_NOT_NULL(t)) {
519 onig_st_foreach(t, (st_foreach_callback_func *)i_free_name_entry, 0);
520 }
521 return 0;
522}
523
524extern int
526{
527 int r;
528 NameTable* t;
529
530 r = names_clear(reg);
531 if (r) return r;
532
533 t = (NameTable* )reg->name_table;
534 if (IS_NOT_NULL(t)) onig_st_free_table(t);
535 reg->name_table = (void* )NULL;
536 return 0;
537}
538
539static NameEntry*
540name_find(regex_t* reg, const UChar* name, const UChar* name_end)
541{
542 NameEntry* e;
543 NameTable* t = (NameTable* )reg->name_table;
544
545 e = (NameEntry* )NULL;
546 if (IS_NOT_NULL(t)) {
547 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
548 }
549 return e;
550}
551
552typedef struct {
553 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
555 void* arg;
556 int ret;
558} INamesArg;
559
560static int
561i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
562{
563 int r = (*(arg->func))(e->name,
564 e->name + e->name_len,
565 e->back_num,
566 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
567 arg->reg, arg->arg);
568 if (r != 0) {
569 arg->ret = r;
570 return ST_STOP;
571 }
572 return ST_CONTINUE;
573}
574
575extern int
577 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
578{
579 INamesArg narg;
580 NameTable* t = (NameTable* )reg->name_table;
581
582 narg.ret = 0;
583 if (IS_NOT_NULL(t)) {
584 narg.func = func;
585 narg.reg = reg;
586 narg.arg = arg;
587 narg.enc = reg->enc; /* should be pattern encoding. */
588 onig_st_foreach(t, (st_foreach_callback_func *)i_names, (HashDataType )&narg);
589 }
590 return narg.ret;
591}
592
593static int
594i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
595{
596 int i;
597
598 if (e->back_num > 1) {
599 for (i = 0; i < e->back_num; i++) {
600 e->back_refs[i] = map[e->back_refs[i]].new_val;
601 }
602 }
603 else if (e->back_num == 1) {
604 e->back_ref1 = map[e->back_ref1].new_val;
605 }
606
607 return ST_CONTINUE;
608}
609
610extern int
612{
613 NameTable* t = (NameTable* )reg->name_table;
614
615 if (IS_NOT_NULL(t)) {
616 onig_st_foreach(t, (st_foreach_callback_func *)i_renumber_name, (HashDataType )map);
617 }
618 return 0;
619}
620
621
622extern int
624{
625 NameTable* t = (NameTable* )reg->name_table;
626
627 if (IS_NOT_NULL(t))
628 return (int )t->num_entries;
629 else
630 return 0;
631}
632
633# else /* USE_ST_LIBRARY */
634
635# define INIT_NAMES_ALLOC_NUM 8
636
637typedef struct {
638 NameEntry* e;
639 int num;
640 int alloc;
641} NameTable;
642
643# ifdef ONIG_DEBUG
644extern int
645onig_print_names(FILE* fp, regex_t* reg)
646{
647 int i, j;
648 NameEntry* e;
649 NameTable* t = (NameTable* )reg->name_table;
650
651 if (IS_NOT_NULL(t) && t->num > 0) {
652 fprintf(fp, "name table\n");
653 for (i = 0; i < t->num; i++) {
654 e = &(t->e[i]);
655 fprintf(fp, "%s: ", e->name);
656 if (e->back_num == 0) {
657 fputs("-", fp);
658 }
659 else if (e->back_num == 1) {
660 fprintf(fp, "%d", e->back_ref1);
661 }
662 else {
663 for (j = 0; j < e->back_num; j++) {
664 if (j > 0) fprintf(fp, ", ");
665 fprintf(fp, "%d", e->back_refs[j]);
666 }
667 }
668 fputs("\n", fp);
669 }
670 fputs("\n", fp);
671 }
672 return 0;
673}
674# endif
675
676static int
677names_clear(regex_t* reg)
678{
679 int i;
680 NameEntry* e;
681 NameTable* t = (NameTable* )reg->name_table;
682
683 if (IS_NOT_NULL(t)) {
684 for (i = 0; i < t->num; i++) {
685 e = &(t->e[i]);
686 if (IS_NOT_NULL(e->name)) {
687 xfree(e->name);
688 e->name = NULL;
689 e->name_len = 0;
690 e->back_num = 0;
691 e->back_alloc = 0;
693 e->back_refs = (int* )NULL;
694 }
695 }
696 if (IS_NOT_NULL(t->e)) {
697 xfree(t->e);
698 t->e = NULL;
699 }
700 t->num = 0;
701 }
702 return 0;
703}
704
705extern int
707{
708 int r;
709 NameTable* t;
710
711 r = names_clear(reg);
712 if (r) return r;
713
714 t = (NameTable* )reg->name_table;
715 if (IS_NOT_NULL(t)) xfree(t);
716 reg->name_table = NULL;
717 return 0;
718}
719
720static NameEntry*
721name_find(regex_t* reg, const UChar* name, const UChar* name_end)
722{
723 int i, len;
724 NameEntry* e;
725 NameTable* t = (NameTable* )reg->name_table;
726
727 if (IS_NOT_NULL(t)) {
728 len = name_end - name;
729 for (i = 0; i < t->num; i++) {
730 e = &(t->e[i]);
731 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
732 return e;
733 }
734 }
735 return (NameEntry* )NULL;
736}
737
738extern int
740 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
741{
742 int i, r;
743 NameEntry* e;
744 NameTable* t = (NameTable* )reg->name_table;
745
746 if (IS_NOT_NULL(t)) {
747 for (i = 0; i < t->num; i++) {
748 e = &(t->e[i]);
749 r = (*func)(e->name, e->name + e->name_len, e->back_num,
750 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
751 reg, arg);
752 if (r != 0) return r;
753 }
754 }
755 return 0;
756}
757
758extern int
760{
761 NameTable* t = (NameTable* )reg->name_table;
762
763 if (IS_NOT_NULL(t))
764 return t->num;
765 else
766 return 0;
767}
768
769# endif /* else USE_ST_LIBRARY */
770
771static int
772name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
773{
774 int alloc;
775 NameEntry* e;
776 NameTable* t = (NameTable* )reg->name_table;
777
778 if (name_end - name <= 0)
780
781 e = name_find(reg, name, name_end);
782 if (IS_NULL(e)) {
783# ifdef USE_ST_LIBRARY
784 if (IS_NULL(t)) {
786 reg->name_table = (void* )t;
787 }
788 e = (NameEntry* )xmalloc(sizeof(NameEntry));
790
791 e->name = strdup_with_null(reg->enc, name, name_end);
792 if (IS_NULL(e->name)) {
793 xfree(e);
794 return ONIGERR_MEMORY;
795 }
796 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
797 (HashDataType )e);
798
799 e->name_len = name_end - name;
800 e->back_num = 0;
801 e->back_alloc = 0;
802 e->back_refs = (int* )NULL;
803
804# else
805
806 if (IS_NULL(t)) {
807 alloc = INIT_NAMES_ALLOC_NUM;
808 t = (NameTable* )xmalloc(sizeof(NameTable));
810 t->e = NULL;
811 t->alloc = 0;
812 t->num = 0;
813
814 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
815 if (IS_NULL(t->e)) {
816 xfree(t);
817 return ONIGERR_MEMORY;
818 }
819 t->alloc = alloc;
820 reg->name_table = t;
821 goto clear;
822 }
823 else if (t->num == t->alloc) {
824 int i;
825 NameEntry* p;
826
827 alloc = t->alloc * 2;
828 p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
830 t->e = p;
831 t->alloc = alloc;
832
833 clear:
834 for (i = t->num; i < t->alloc; i++) {
835 t->e[i].name = NULL;
836 t->e[i].name_len = 0;
837 t->e[i].back_num = 0;
838 t->e[i].back_alloc = 0;
839 t->e[i].back_refs = (int* )NULL;
840 }
841 }
842 e = &(t->e[t->num]);
843 t->num++;
844 e->name = strdup_with_null(reg->enc, name, name_end);
845 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
846 e->name_len = name_end - name;
847# endif
848 }
849
850 if (e->back_num >= 1 &&
853 name, name_end);
855 }
856
857 e->back_num++;
858 if (e->back_num == 1) {
859 e->back_ref1 = backref;
860 }
861 else {
862 if (e->back_num == 2) {
864 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
866 e->back_alloc = alloc;
867 e->back_refs[0] = e->back_ref1;
868 e->back_refs[1] = backref;
869 }
870 else {
871 if (e->back_num > e->back_alloc) {
872 int* p;
873 alloc = e->back_alloc * 2;
874 p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
876 e->back_refs = p;
877 e->back_alloc = alloc;
878 }
879 e->back_refs[e->back_num - 1] = backref;
880 }
881 }
882
883 return 0;
884}
885
886extern int
888 const UChar* name_end, int** nums)
889{
890 NameEntry* e = name_find(reg, name, name_end);
891
893
894 switch (e->back_num) {
895 case 0:
896 *nums = 0;
897 break;
898 case 1:
899 *nums = &(e->back_ref1);
900 break;
901 default:
902 *nums = e->back_refs;
903 break;
904 }
905 return e->back_num;
906}
907
908extern int
910 const UChar* name_end, const OnigRegion *region)
911{
912 int i, n, *nums;
913
914 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
915 if (n < 0)
916 return n;
917 else if (n == 0)
918 return ONIGERR_PARSER_BUG;
919 else if (n == 1)
920 return nums[0];
921 else {
922 if (IS_NOT_NULL(region)) {
923 for (i = n - 1; i >= 0; i--) {
924 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
925 return nums[i];
926 }
927 }
928 return nums[n - 1];
929 }
930}
931
932#else /* USE_NAMED_GROUP */
933
934extern int
936 const UChar* name_end, int** nums)
937{
939}
940
941extern int
943 const UChar* name_end, const OnigRegion* region)
944{
946}
947
948extern int
950 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
951{
953}
954
955extern int
957{
958 return 0;
959}
960#endif /* else USE_NAMED_GROUP */
961
962extern int
964{
966 return 0;
967
968#ifdef USE_NAMED_GROUP
969 if (onig_number_of_names(reg) > 0 &&
972 return 0;
973 }
974#endif
975
976 return 1;
977}
978
979
980#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
981
982static void
983scan_env_clear(ScanEnv* env)
984{
985 int i;
986
987 BIT_STATUS_CLEAR(env->capture_history);
988 BIT_STATUS_CLEAR(env->bt_mem_start);
989 BIT_STATUS_CLEAR(env->bt_mem_end);
990 BIT_STATUS_CLEAR(env->backrefed_mem);
991 env->error = (UChar* )NULL;
992 env->error_end = (UChar* )NULL;
993 env->num_call = 0;
994 env->num_mem = 0;
995#ifdef USE_NAMED_GROUP
996 env->num_named = 0;
997#endif
998 env->mem_alloc = 0;
999 env->mem_nodes_dynamic = (Node** )NULL;
1000
1001 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
1002 env->mem_nodes_static[i] = NULL_NODE;
1003
1004#ifdef USE_COMBINATION_EXPLOSION_CHECK
1005 env->num_comb_exp_check = 0;
1006 env->comb_exp_max_regnum = 0;
1007 env->curr_max_regnum = 0;
1008 env->has_recursion = 0;
1009#endif
1010 env->parse_depth = 0;
1011 env->warnings_flag = 0;
1012}
1013
1014static int
1015scan_env_add_mem_entry(ScanEnv* env)
1016{
1017 int i, need, alloc;
1018 Node** p;
1019
1020 need = env->num_mem + 1;
1021 if (need > ONIG_MAX_CAPTURE_GROUP_NUM)
1023 if (need >= SCANENV_MEMNODES_SIZE) {
1024 if (env->mem_alloc <= need) {
1025 if (IS_NULL(env->mem_nodes_dynamic)) {
1027 p = (Node** )xmalloc(sizeof(Node*) * alloc);
1029 xmemcpy(p, env->mem_nodes_static,
1030 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
1031 }
1032 else {
1033 alloc = env->mem_alloc * 2;
1034 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
1036 }
1037
1038 for (i = env->num_mem + 1; i < alloc; i++)
1039 p[i] = NULL_NODE;
1040
1041 env->mem_nodes_dynamic = p;
1042 env->mem_alloc = alloc;
1043 }
1044 }
1045
1046 env->num_mem++;
1047 return env->num_mem;
1048}
1049
1050static int
1051scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1052{
1053 if (env->num_mem >= num)
1054 SCANENV_MEM_NODES(env)[num] = node;
1055 else
1056 return ONIGERR_PARSER_BUG;
1057 return 0;
1058}
1059
1060
1061extern void
1063{
1064 start:
1065 if (IS_NULL(node)) return ;
1066
1067 switch (NTYPE(node)) {
1068 case NT_STR:
1069 if (NSTR(node)->capa != 0 &&
1070 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1071 xfree(NSTR(node)->s);
1072 }
1073 break;
1074
1075 case NT_LIST:
1076 case NT_ALT:
1077 onig_node_free(NCAR(node));
1078 {
1079 Node* next_node = NCDR(node);
1080
1081 xfree(node);
1082 node = next_node;
1083 goto start;
1084 }
1085 break;
1086
1087 case NT_CCLASS:
1088 {
1089 CClassNode* cc = NCCLASS(node);
1090
1091 if (cc->mbuf)
1092 bbuf_free(cc->mbuf);
1093 }
1094 break;
1095
1096 case NT_QTFR:
1097 if (NQTFR(node)->target)
1098 onig_node_free(NQTFR(node)->target);
1099 break;
1100
1101 case NT_ENCLOSE:
1102 if (NENCLOSE(node)->target)
1103 onig_node_free(NENCLOSE(node)->target);
1104 break;
1105
1106 case NT_BREF:
1107 if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1108 xfree(NBREF(node)->back_dynamic);
1109 break;
1110
1111 case NT_ANCHOR:
1112 if (NANCHOR(node)->target)
1113 onig_node_free(NANCHOR(node)->target);
1114 break;
1115 }
1116
1117 xfree(node);
1118}
1119
1120static Node*
1121node_new(void)
1122{
1123 Node* node;
1124
1125 node = (Node* )xmalloc(sizeof(Node));
1126 /* xmemset(node, 0, sizeof(Node)); */
1127 return node;
1128}
1129
1130static void
1131initialize_cclass(CClassNode* cc)
1132{
1133 BITSET_CLEAR(cc->bs);
1134 /* cc->base.flags = 0; */
1135 cc->flags = 0;
1136 cc->mbuf = NULL;
1137}
1138
1139static Node*
1140node_new_cclass(void)
1141{
1142 Node* node = node_new();
1143 CHECK_NULL_RETURN(node);
1144
1145 SET_NTYPE(node, NT_CCLASS);
1146 initialize_cclass(NCCLASS(node));
1147 return node;
1148}
1149
1150static Node*
1151node_new_ctype(int type, int not, int ascii_range)
1152{
1153 Node* node = node_new();
1154 CHECK_NULL_RETURN(node);
1155
1156 SET_NTYPE(node, NT_CTYPE);
1157 NCTYPE(node)->ctype = type;
1158 NCTYPE(node)->not = not;
1159 NCTYPE(node)->ascii_range = ascii_range;
1160 return node;
1161}
1162
1163static Node*
1164node_new_anychar(void)
1165{
1166 Node* node = node_new();
1167 CHECK_NULL_RETURN(node);
1168
1169 SET_NTYPE(node, NT_CANY);
1170 return node;
1171}
1172
1173static Node*
1174node_new_list(Node* left, Node* right)
1175{
1176 Node* node = node_new();
1177 CHECK_NULL_RETURN(node);
1178
1179 SET_NTYPE(node, NT_LIST);
1180 NCAR(node) = left;
1181 NCDR(node) = right;
1182 return node;
1183}
1184
1185extern Node*
1187{
1188 return node_new_list(left, right);
1189}
1190
1191extern Node*
1193{
1194 Node *n;
1195
1196 n = onig_node_new_list(x, NULL);
1197 if (IS_NULL(n)) return NULL_NODE;
1198
1199 if (IS_NOT_NULL(list)) {
1200 while (IS_NOT_NULL(NCDR(list)))
1201 list = NCDR(list);
1202
1203 NCDR(list) = n;
1204 }
1205
1206 return n;
1207}
1208
1209extern Node*
1211{
1212 Node* node = node_new();
1213 CHECK_NULL_RETURN(node);
1214
1215 SET_NTYPE(node, NT_ALT);
1216 NCAR(node) = left;
1217 NCDR(node) = right;
1218 return node;
1219}
1220
1221extern Node*
1223{
1224 Node* node = node_new();
1225 CHECK_NULL_RETURN(node);
1226
1227 SET_NTYPE(node, NT_ANCHOR);
1228 NANCHOR(node)->type = type;
1229 NANCHOR(node)->target = NULL;
1230 NANCHOR(node)->char_len = -1;
1231 NANCHOR(node)->ascii_range = 0;
1232 return node;
1233}
1234
1235static Node*
1236node_new_backref(int back_num, int* backrefs, int by_name,
1238 int exist_level, int nest_level,
1239#endif
1240 ScanEnv* env)
1241{
1242 int i;
1243 Node* node = node_new();
1244
1245 CHECK_NULL_RETURN(node);
1246
1247 SET_NTYPE(node, NT_BREF);
1248 NBREF(node)->state = 0;
1249 NBREF(node)->back_num = back_num;
1250 NBREF(node)->back_dynamic = (int* )NULL;
1251 if (by_name != 0)
1252 NBREF(node)->state |= NST_NAME_REF;
1253
1254#ifdef USE_BACKREF_WITH_LEVEL
1255 if (exist_level != 0) {
1256 NBREF(node)->state |= NST_NEST_LEVEL;
1257 NBREF(node)->nest_level = nest_level;
1258 }
1259#endif
1260
1261 for (i = 0; i < back_num; i++) {
1262 if (backrefs[i] <= env->num_mem &&
1263 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1264 NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
1265 break;
1266 }
1267 }
1268
1269 if (back_num <= NODE_BACKREFS_SIZE) {
1270 for (i = 0; i < back_num; i++)
1271 NBREF(node)->back_static[i] = backrefs[i];
1272 }
1273 else {
1274 int* p = (int* )xmalloc(sizeof(int) * back_num);
1275 if (IS_NULL(p)) {
1276 onig_node_free(node);
1277 return NULL;
1278 }
1279 NBREF(node)->back_dynamic = p;
1280 for (i = 0; i < back_num; i++)
1281 p[i] = backrefs[i];
1282 }
1283 return node;
1284}
1285
1286#ifdef USE_SUBEXP_CALL
1287static Node*
1288node_new_call(UChar* name, UChar* name_end, int gnum)
1289{
1290 Node* node = node_new();
1291 CHECK_NULL_RETURN(node);
1292
1293 SET_NTYPE(node, NT_CALL);
1294 NCALL(node)->state = 0;
1295 NCALL(node)->target = NULL_NODE;
1296 NCALL(node)->name = name;
1297 NCALL(node)->name_end = name_end;
1298 NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
1299 return node;
1300}
1301#endif
1302
1303static Node*
1304node_new_quantifier(int lower, int upper, int by_number)
1305{
1306 Node* node = node_new();
1307 CHECK_NULL_RETURN(node);
1308
1309 SET_NTYPE(node, NT_QTFR);
1310 NQTFR(node)->state = 0;
1311 NQTFR(node)->target = NULL;
1312 NQTFR(node)->lower = lower;
1313 NQTFR(node)->upper = upper;
1314 NQTFR(node)->greedy = 1;
1315 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1316 NQTFR(node)->head_exact = NULL_NODE;
1317 NQTFR(node)->next_head_exact = NULL_NODE;
1318 NQTFR(node)->is_referred = 0;
1319 if (by_number != 0)
1320 NQTFR(node)->state |= NST_BY_NUMBER;
1321
1322#ifdef USE_COMBINATION_EXPLOSION_CHECK
1323 NQTFR(node)->comb_exp_check_num = 0;
1324#endif
1325
1326 return node;
1327}
1328
1329static Node*
1330node_new_enclose(int type)
1331{
1332 Node* node = node_new();
1333 CHECK_NULL_RETURN(node);
1334
1335 SET_NTYPE(node, NT_ENCLOSE);
1336 NENCLOSE(node)->type = type;
1337 NENCLOSE(node)->state = 0;
1338 NENCLOSE(node)->regnum = 0;
1339 NENCLOSE(node)->option = 0;
1340 NENCLOSE(node)->target = NULL;
1341 NENCLOSE(node)->call_addr = -1;
1342 NENCLOSE(node)->opt_count = 0;
1343 return node;
1344}
1345
1346extern Node*
1348{
1349 return node_new_enclose(type);
1350}
1351
1352static Node*
1353node_new_enclose_memory(OnigOptionType option, int is_named)
1354{
1355 Node* node = node_new_enclose(ENCLOSE_MEMORY);
1356 CHECK_NULL_RETURN(node);
1357 if (is_named != 0)
1359
1360#ifdef USE_SUBEXP_CALL
1361 NENCLOSE(node)->option = option;
1362#endif
1363 return node;
1364}
1365
1366static Node*
1367node_new_option(OnigOptionType option)
1368{
1369 Node* node = node_new_enclose(ENCLOSE_OPTION);
1370 CHECK_NULL_RETURN(node);
1371 NENCLOSE(node)->option = option;
1372 return node;
1373}
1374
1375extern int
1376onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1377{
1378 ptrdiff_t addlen = end - s;
1379
1380 if (addlen > 0) {
1381 ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
1382
1383 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1384 UChar* p;
1385 ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
1386
1387 if (capa <= NSTR(node)->capa) {
1388 onig_strcpy(NSTR(node)->s + len, s, end);
1389 }
1390 else {
1391 if (NSTR(node)->s == NSTR(node)->buf)
1392 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1393 s, end, capa);
1394 else
1395 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1396
1398 NSTR(node)->s = p;
1399 NSTR(node)->capa = (int )capa;
1400 }
1401 }
1402 else {
1403 onig_strcpy(NSTR(node)->s + len, s, end);
1404 }
1405 NSTR(node)->end = NSTR(node)->s + len + addlen;
1406 }
1407
1408 return 0;
1409}
1410
1411extern int
1412onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1413{
1414 onig_node_str_clear(node);
1415 return onig_node_str_cat(node, s, end);
1416}
1417
1418static int
1419node_str_cat_char(Node* node, UChar c)
1420{
1421 UChar s[1];
1422
1423 s[0] = c;
1424 return onig_node_str_cat(node, s, s + 1);
1425}
1426
1427static int
1428node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
1429{
1431 int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
1432 if (num < 0) return num;
1433 return onig_node_str_cat(node, buf, buf + num);
1434}
1435
1436#if 0
1437extern void
1438onig_node_conv_to_str_node(Node* node, int flag)
1439{
1440 SET_NTYPE(node, NT_STR);
1441 NSTR(node)->flag = flag;
1442 NSTR(node)->capa = 0;
1443 NSTR(node)->s = NSTR(node)->buf;
1444 NSTR(node)->end = NSTR(node)->buf;
1445}
1446#endif
1447
1448extern void
1450{
1451 if (NSTR(node)->capa != 0 &&
1452 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1453 xfree(NSTR(node)->s);
1454 }
1455
1456 NSTR(node)->capa = 0;
1457 NSTR(node)->flag = 0;
1458 NSTR(node)->s = NSTR(node)->buf;
1459 NSTR(node)->end = NSTR(node)->buf;
1460}
1461
1462static Node*
1463node_new_str(const UChar* s, const UChar* end)
1464{
1465 Node* node = node_new();
1466 CHECK_NULL_RETURN(node);
1467
1468 SET_NTYPE(node, NT_STR);
1469 NSTR(node)->capa = 0;
1470 NSTR(node)->flag = 0;
1471 NSTR(node)->s = NSTR(node)->buf;
1472 NSTR(node)->end = NSTR(node)->buf;
1473 if (onig_node_str_cat(node, s, end)) {
1474 onig_node_free(node);
1475 return NULL;
1476 }
1477 return node;
1478}
1479
1480extern Node*
1481onig_node_new_str(const UChar* s, const UChar* end)
1482{
1483 return node_new_str(s, end);
1484}
1485
1486static Node*
1487node_new_str_raw(UChar* s, UChar* end)
1488{
1489 Node* node = node_new_str(s, end);
1490 if (IS_NOT_NULL(node))
1491 NSTRING_SET_RAW(node);
1492 return node;
1493}
1494
1495static Node*
1496node_new_empty(void)
1497{
1498 return node_new_str(NULL, NULL);
1499}
1500
1501static Node*
1502node_new_str_raw_char(UChar c)
1503{
1504 UChar p[1];
1505
1506 p[0] = c;
1507 return node_new_str_raw(p, p + 1);
1508}
1509
1510static Node*
1511str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1512{
1513 const UChar *p;
1514 Node* n = NULL_NODE;
1515
1516 if (sn->end > sn->s) {
1517 p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
1518 if (p && p > sn->s) { /* can be split. */
1519 n = node_new_str(p, sn->end);
1520 if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
1521 NSTRING_SET_RAW(n);
1522 sn->end = (UChar* )p;
1523 }
1524 }
1525 return n;
1526}
1527
1528static int
1529str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1530{
1531 if (sn->end > sn->s) {
1532 return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
1533 }
1534 return 0;
1535}
1536
1537#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1538static int
1539node_str_head_pad(StrNode* sn, int num, UChar val)
1540{
1542 int i, len;
1543
1544 len = sn->end - sn->s;
1545 onig_strcpy(buf, sn->s, sn->end);
1546 onig_strcpy(&(sn->s[num]), buf, buf + len);
1547 sn->end += num;
1548
1549 for (i = 0; i < num; i++) {
1550 sn->s[i] = val;
1551 }
1552}
1553#endif
1554
1555extern int
1557{
1558 unsigned int num, val;
1559 OnigCodePoint c;
1560 UChar* p = *src;
1562
1563 num = 0;
1564 while (!PEND) {
1565 PFETCH(c);
1566 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1567 val = (unsigned int )DIGITVAL(c);
1568 if ((INT_MAX_LIMIT - val) / 10UL < num)
1569 return -1; /* overflow */
1570
1571 num = num * 10 + val;
1572 }
1573 else {
1574 PUNFETCH;
1575 break;
1576 }
1577 }
1578 *src = p;
1579 return num;
1580}
1581
1582static int
1583scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
1584 int maxlen, OnigEncoding enc)
1585{
1586 OnigCodePoint c;
1587 unsigned int num, val;
1588 int restlen;
1589 UChar* p = *src;
1591
1592 restlen = maxlen - minlen;
1593 num = 0;
1594 while (!PEND && maxlen-- != 0) {
1595 PFETCH(c);
1596 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1597 val = (unsigned int )XDIGITVAL(enc,c);
1598 if ((INT_MAX_LIMIT - val) / 16UL < num)
1599 return -1; /* overflow */
1600
1601 num = (num << 4) + XDIGITVAL(enc,c);
1602 }
1603 else {
1604 PUNFETCH;
1605 maxlen++;
1606 break;
1607 }
1608 }
1609 if (maxlen > restlen)
1610 return -2; /* not enough digits */
1611 *src = p;
1612 return num;
1613}
1614
1615static int
1616scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1617 OnigEncoding enc)
1618{
1619 OnigCodePoint c;
1620 unsigned int num, val;
1621 UChar* p = *src;
1623
1624 num = 0;
1625 while (!PEND && maxlen-- != 0) {
1626 PFETCH(c);
1627 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1628 val = ODIGITVAL(c);
1629 if ((INT_MAX_LIMIT - val) / 8UL < num)
1630 return -1; /* overflow */
1631
1632 num = (num << 3) + val;
1633 }
1634 else {
1635 PUNFETCH;
1636 break;
1637 }
1638 }
1639 *src = p;
1640 return num;
1641}
1642
1643
1644#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1646
1647/* data format:
1648 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1649 (all data size is OnigCodePoint)
1650 */
1651static int
1652new_code_range(BBuf** pbuf)
1653{
1654#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1655 int r;
1656 OnigCodePoint n;
1657 BBuf* bbuf;
1658
1659 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1662 if (r) return r;
1663
1664 n = 0;
1665 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1666 return 0;
1667}
1668
1669static int
1670add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
1671 int checkdup)
1672{
1673 int r, inc_n, pos;
1674 OnigCodePoint low, high, bound, x;
1675 OnigCodePoint n, *data;
1676 BBuf* bbuf;
1677
1678 if (from > to) {
1679 n = from; from = to; to = n;
1680 }
1681
1682 if (IS_NULL(*pbuf)) {
1683 r = new_code_range(pbuf);
1684 if (r) return r;
1685 bbuf = *pbuf;
1686 n = 0;
1687 }
1688 else {
1689 bbuf = *pbuf;
1690 GET_CODE_POINT(n, bbuf->p);
1691 }
1692 data = (OnigCodePoint* )(bbuf->p);
1693 data++;
1694
1695 bound = (from == 0) ? 0 : n;
1696 for (low = 0; low < bound; ) {
1697 x = (low + bound) >> 1;
1698 if (from - 1 > data[x*2 + 1])
1699 low = x + 1;
1700 else
1701 bound = x;
1702 }
1703
1704 high = (to == ONIG_LAST_CODE_POINT) ? n : low;
1705 for (bound = n; high < bound; ) {
1706 x = (high + bound) >> 1;
1707 if (to + 1 >= data[x*2])
1708 high = x + 1;
1709 else
1710 bound = x;
1711 }
1712 /* data[(low-1)*2+1] << from <= data[low*2]
1713 * data[(high-1)*2+1] <= to << data[high*2]
1714 */
1715
1716 inc_n = low + 1 - high;
1717 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1719
1720 if (inc_n != 1) {
1721 if (checkdup && from <= data[low*2+1]
1722 && (data[low*2] <= from || data[low*2+1] <= to))
1723 CC_DUP_WARN(env, from, to);
1724 if (from > data[low*2])
1725 from = data[low*2];
1726 if (to < data[(high - 1)*2 + 1])
1727 to = data[(high - 1)*2 + 1];
1728 }
1729
1730 if (inc_n != 0) {
1731 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1732 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1733
1734 if (inc_n > 0) {
1735 if (high < n) {
1736 int size = (n - high) * 2 * SIZE_CODE_POINT;
1737 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1738 }
1739 }
1740 else {
1741 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1742 }
1743 }
1744
1745 pos = SIZE_CODE_POINT * (1 + low * 2);
1746 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1747 BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1748 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1749 n += inc_n;
1750 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1751
1752 return 0;
1753}
1754
1755static int
1756add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1757{
1758 return add_code_range_to_buf0(pbuf, env, from, to, 1);
1759}
1760
1761static int
1762add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
1763{
1764 if (from > to) {
1766 return 0;
1767 else
1769 }
1770
1771 return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
1772}
1773
1774static int
1775add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1776{
1777 return add_code_range0(pbuf, env, from, to, 1);
1778}
1779
1780static int
1781not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
1782{
1783 int r, i, n;
1784 OnigCodePoint pre, from, *data, to = 0;
1785
1786 *pbuf = (BBuf* )NULL;
1787 if (IS_NULL(bbuf)) {
1788 set_all:
1789 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1790 }
1791
1792 data = (OnigCodePoint* )(bbuf->p);
1793 GET_CODE_POINT(n, data);
1794 data++;
1795 if (n <= 0) goto set_all;
1796
1797 r = 0;
1798 pre = MBCODE_START_POS(enc);
1799 for (i = 0; i < n; i++) {
1800 from = data[i*2];
1801 to = data[i*2+1];
1802 if (pre <= from - 1) {
1803 r = add_code_range_to_buf(pbuf, env, pre, from - 1);
1804 if (r != 0) return r;
1805 }
1806 if (to == ONIG_LAST_CODE_POINT) break;
1807 pre = to + 1;
1808 }
1809 if (to < ONIG_LAST_CODE_POINT) {
1810 r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
1811 }
1812 return r;
1813}
1814
1815#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1816 BBuf *tbuf; \
1817 int tnot; \
1818 tnot = not1; not1 = not2; not2 = tnot; \
1819 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1820} while (0)
1821
1822static int
1823or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1824 BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1825{
1826 int r;
1827 OnigCodePoint i, n1, *data1;
1828 OnigCodePoint from, to;
1829
1830 *pbuf = (BBuf* )NULL;
1831 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1832 if (not1 != 0 || not2 != 0)
1833 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1834 return 0;
1835 }
1836
1837 r = 0;
1838 if (IS_NULL(bbuf2))
1839 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1840
1841 if (IS_NULL(bbuf1)) {
1842 if (not1 != 0) {
1843 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1844 }
1845 else {
1846 if (not2 == 0) {
1847 return bbuf_clone(pbuf, bbuf2);
1848 }
1849 else {
1850 return not_code_range_buf(enc, bbuf2, pbuf, env);
1851 }
1852 }
1853 }
1854
1855 if (not1 != 0)
1856 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1857
1858 data1 = (OnigCodePoint* )(bbuf1->p);
1859 GET_CODE_POINT(n1, data1);
1860 data1++;
1861
1862 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1863 r = bbuf_clone(pbuf, bbuf2);
1864 }
1865 else if (not1 == 0) { /* 1 OR (not 2) */
1866 r = not_code_range_buf(enc, bbuf2, pbuf, env);
1867 }
1868 if (r != 0) return r;
1869
1870 for (i = 0; i < n1; i++) {
1871 from = data1[i*2];
1872 to = data1[i*2+1];
1873 r = add_code_range_to_buf(pbuf, env, from, to);
1874 if (r != 0) return r;
1875 }
1876 return 0;
1877}
1878
1879static int
1880and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
1881 OnigCodePoint* data, int n)
1882{
1883 int i, r;
1884 OnigCodePoint from2, to2;
1885
1886 for (i = 0; i < n; i++) {
1887 from2 = data[i*2];
1888 to2 = data[i*2+1];
1889 if (from2 < from1) {
1890 if (to2 < from1) continue;
1891 else {
1892 from1 = to2 + 1;
1893 }
1894 }
1895 else if (from2 <= to1) {
1896 if (to2 < to1) {
1897 if (from1 <= from2 - 1) {
1898 r = add_code_range_to_buf(pbuf, env, from1, from2-1);
1899 if (r != 0) return r;
1900 }
1901 from1 = to2 + 1;
1902 }
1903 else {
1904 to1 = from2 - 1;
1905 }
1906 }
1907 else {
1908 from1 = from2;
1909 }
1910 if (from1 > to1) break;
1911 }
1912 if (from1 <= to1) {
1913 r = add_code_range_to_buf(pbuf, env, from1, to1);
1914 if (r != 0) return r;
1915 }
1916 return 0;
1917}
1918
1919static int
1920and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1921{
1922 int r;
1923 OnigCodePoint i, j, n1, n2, *data1, *data2;
1924 OnigCodePoint from, to, from1, to1, from2, to2;
1925
1926 *pbuf = (BBuf* )NULL;
1927 if (IS_NULL(bbuf1)) {
1928 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1929 return bbuf_clone(pbuf, bbuf2);
1930 return 0;
1931 }
1932 else if (IS_NULL(bbuf2)) {
1933 if (not2 != 0)
1934 return bbuf_clone(pbuf, bbuf1);
1935 return 0;
1936 }
1937
1938 if (not1 != 0)
1939 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1940
1941 data1 = (OnigCodePoint* )(bbuf1->p);
1942 data2 = (OnigCodePoint* )(bbuf2->p);
1943 GET_CODE_POINT(n1, data1);
1944 GET_CODE_POINT(n2, data2);
1945 data1++;
1946 data2++;
1947
1948 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1949 for (i = 0; i < n1; i++) {
1950 from1 = data1[i*2];
1951 to1 = data1[i*2+1];
1952 for (j = 0; j < n2; j++) {
1953 from2 = data2[j*2];
1954 to2 = data2[j*2+1];
1955 if (from2 > to1) break;
1956 if (to2 < from1) continue;
1957 from = MAX(from1, from2);
1958 to = MIN(to1, to2);
1959 r = add_code_range_to_buf(pbuf, env, from, to);
1960 if (r != 0) return r;
1961 }
1962 }
1963 }
1964 else if (not1 == 0) { /* 1 AND (not 2) */
1965 for (i = 0; i < n1; i++) {
1966 from1 = data1[i*2];
1967 to1 = data1[i*2+1];
1968 r = and_code_range1(pbuf, env, from1, to1, data2, n2);
1969 if (r != 0) return r;
1970 }
1971 }
1972
1973 return 0;
1974}
1975
1976static int
1977and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
1978{
1979 OnigEncoding enc = env->enc;
1980 int r, not1, not2;
1981 BBuf *buf1, *buf2, *pbuf = 0;
1982 BitSetRef bsr1, bsr2;
1983 BitSet bs1, bs2;
1984
1985 not1 = IS_NCCLASS_NOT(dest);
1986 bsr1 = dest->bs;
1987 buf1 = dest->mbuf;
1988 not2 = IS_NCCLASS_NOT(cc);
1989 bsr2 = cc->bs;
1990 buf2 = cc->mbuf;
1991
1992 if (not1 != 0) {
1993 bitset_invert_to(bsr1, bs1);
1994 bsr1 = bs1;
1995 }
1996 if (not2 != 0) {
1997 bitset_invert_to(bsr2, bs2);
1998 bsr2 = bs2;
1999 }
2000 bitset_and(bsr1, bsr2);
2001 if (bsr1 != dest->bs) {
2002 bitset_copy(dest->bs, bsr1);
2003 bsr1 = dest->bs;
2004 }
2005 if (not1 != 0) {
2006 bitset_invert(dest->bs);
2007 }
2008
2009 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2010 if (not1 != 0 && not2 != 0) {
2011 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
2012 }
2013 else {
2014 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
2015 if (r == 0 && not1 != 0) {
2016 BBuf *tbuf = 0;
2017 r = not_code_range_buf(enc, pbuf, &tbuf, env);
2018 bbuf_free(pbuf);
2019 pbuf = tbuf;
2020 }
2021 }
2022 if (r != 0) {
2023 bbuf_free(pbuf);
2024 return r;
2025 }
2026
2027 dest->mbuf = pbuf;
2028 bbuf_free(buf1);
2029 return r;
2030 }
2031 return 0;
2032}
2033
2034static int
2035or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
2036{
2037 OnigEncoding enc = env->enc;
2038 int r, not1, not2;
2039 BBuf *buf1, *buf2, *pbuf = 0;
2040 BitSetRef bsr1, bsr2;
2041 BitSet bs1, bs2;
2042
2043 not1 = IS_NCCLASS_NOT(dest);
2044 bsr1 = dest->bs;
2045 buf1 = dest->mbuf;
2046 not2 = IS_NCCLASS_NOT(cc);
2047 bsr2 = cc->bs;
2048 buf2 = cc->mbuf;
2049
2050 if (not1 != 0) {
2051 bitset_invert_to(bsr1, bs1);
2052 bsr1 = bs1;
2053 }
2054 if (not2 != 0) {
2055 bitset_invert_to(bsr2, bs2);
2056 bsr2 = bs2;
2057 }
2058 bitset_or(bsr1, bsr2);
2059 if (bsr1 != dest->bs) {
2060 bitset_copy(dest->bs, bsr1);
2061 bsr1 = dest->bs;
2062 }
2063 if (not1 != 0) {
2064 bitset_invert(dest->bs);
2065 }
2066
2067 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2068 if (not1 != 0 && not2 != 0) {
2069 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
2070 }
2071 else {
2072 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
2073 if (r == 0 && not1 != 0) {
2074 BBuf *tbuf = 0;
2075 r = not_code_range_buf(enc, pbuf, &tbuf, env);
2076 bbuf_free(pbuf);
2077 pbuf = tbuf;
2078 }
2079 }
2080 if (r != 0) {
2081 bbuf_free(pbuf);
2082 return r;
2083 }
2084
2085 dest->mbuf = pbuf;
2086 bbuf_free(buf1);
2087 return r;
2088 }
2089 else
2090 return 0;
2091}
2092
2093static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
2094
2095static OnigCodePoint
2096conv_backslash_value(OnigCodePoint c, ScanEnv* env)
2097{
2099 switch (c) {
2100 case 'n': return '\n';
2101 case 't': return '\t';
2102 case 'r': return '\r';
2103 case 'f': return '\f';
2104 case 'a': return '\007';
2105 case 'b': return '\010';
2106 case 'e': return '\033';
2107 case 'v':
2109 return '\v';
2110 break;
2111
2112 default:
2113 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
2114 UNKNOWN_ESC_WARN(env, c);
2115 break;
2116 }
2117 }
2118 return c;
2119}
2120
2121#ifdef USE_NO_INVALID_QUANTIFIER
2122# define is_invalid_quantifier_target(node) 0
2123#else
2124static int
2126{
2127 switch (NTYPE(node)) {
2128 case NT_ANCHOR:
2129 return 1;
2130 break;
2131
2132 case NT_ENCLOSE:
2133 /* allow enclosed elements */
2134 /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2135 break;
2136
2137 case NT_LIST:
2138 do {
2139 if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2140 } while (IS_NOT_NULL(node = NCDR(node)));
2141 return 0;
2142 break;
2143
2144 case NT_ALT:
2145 do {
2146 if (is_invalid_quantifier_target(NCAR(node))) return 1;
2147 } while (IS_NOT_NULL(node = NCDR(node)));
2148 break;
2149
2150 default:
2151 break;
2152 }
2153 return 0;
2154}
2155#endif
2156
2157/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2158static int
2159popular_quantifier_num(QtfrNode* q)
2160{
2161 if (q->greedy) {
2162 if (q->lower == 0) {
2163 if (q->upper == 1) return 0;
2164 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2165 }
2166 else if (q->lower == 1) {
2167 if (IS_REPEAT_INFINITE(q->upper)) return 2;
2168 }
2169 }
2170 else {
2171 if (q->lower == 0) {
2172 if (q->upper == 1) return 3;
2173 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2174 }
2175 else if (q->lower == 1) {
2176 if (IS_REPEAT_INFINITE(q->upper)) return 5;
2177 }
2178 }
2179 return -1;
2180}
2181
2182
2184 RQ_ASIS = 0, /* as is */
2185 RQ_DEL = 1, /* delete parent */
2186 RQ_A, /* to '*' */
2187 RQ_AQ, /* to '*?' */
2188 RQ_QQ, /* to '??' */
2189 RQ_P_QQ, /* to '+)??' */
2190};
2191
2192static enum ReduceType const ReduceTypeTable[6][6] = {
2193/* '?', '*', '+', '??', '*?', '+?' p / c */
2194 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
2195 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
2196 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
2197 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
2198 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
2199 {RQ_ASIS, RQ_ASIS, RQ_ASIS, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
2200};
2201
2202extern void
2204{
2205 int pnum, cnum;
2206 QtfrNode *p, *c;
2207
2208 p = NQTFR(pnode);
2209 c = NQTFR(cnode);
2210 pnum = popular_quantifier_num(p);
2211 cnum = popular_quantifier_num(c);
2212 if (pnum < 0 || cnum < 0) return ;
2213
2214 switch (ReduceTypeTable[cnum][pnum]) {
2215 case RQ_DEL:
2216 *pnode = *cnode;
2217 break;
2218 case RQ_A:
2219 p->target = c->target;
2220 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
2221 break;
2222 case RQ_AQ:
2223 p->target = c->target;
2224 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
2225 break;
2226 case RQ_QQ:
2227 p->target = c->target;
2228 p->lower = 0; p->upper = 1; p->greedy = 0;
2229 break;
2230 case RQ_P_QQ:
2231 p->target = cnode;
2232 p->lower = 0; p->upper = 1; p->greedy = 0;
2233 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
2234 return ;
2235 break;
2236 case RQ_ASIS:
2237 p->target = cnode;
2238 return ;
2239 break;
2240 }
2241
2242 c->target = NULL_NODE;
2243 onig_node_free(cnode);
2244}
2245
2246
2248 TK_EOT = 0, /* end of token */
2260 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
2266 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
2270 /* in cc */
2274 TK_CC_AND, /* && */
2275 TK_CC_CC_OPEN /* [ */
2277
2278typedef struct {
2281 int base; /* is number: 8, 16 (used in [....]) */
2283 union {
2285 int c;
2287 struct {
2290 } anchor;
2291 struct {
2296 } repeat;
2297 struct {
2298 int num;
2299 int ref1;
2300 int* refs;
2302#ifdef USE_BACKREF_WITH_LEVEL
2304 int level; /* \k<name+n> */
2305#endif
2306 } backref;
2307 struct {
2310 int gnum;
2311 int rel;
2312 } call;
2313 struct {
2315 int not;
2316 } prop;
2317 } u;
2318} OnigToken;
2319
2320
2321static int
2322fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2323{
2324 int low, up, syn_allow, non_low = 0;
2325 int r = 0;
2326 OnigCodePoint c;
2327 OnigEncoding enc = env->enc;
2328 UChar* p = *src;
2330
2331 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
2332
2333 if (PEND) {
2334 if (syn_allow)
2335 return 1; /* "....{" : OK! */
2336 else
2337 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
2338 }
2339
2340 if (! syn_allow) {
2341 c = PPEEK;
2342 if (c == ')' || c == '(' || c == '|') {
2344 }
2345 }
2346
2347 low = onig_scan_unsigned_number(&p, end, env->enc);
2348 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2349 if (low > ONIG_MAX_REPEAT_NUM)
2351
2352 if (p == *src) { /* can't read low */
2354 /* allow {,n} as {0,n} */
2355 low = 0;
2356 non_low = 1;
2357 }
2358 else
2359 goto invalid;
2360 }
2361
2362 if (PEND) goto invalid;
2363 PFETCH(c);
2364 if (c == ',') {
2365 UChar* prev = p;
2366 up = onig_scan_unsigned_number(&p, end, env->enc);
2367 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2368 if (up > ONIG_MAX_REPEAT_NUM)
2370
2371 if (p == prev) {
2372 if (non_low != 0)
2373 goto invalid;
2374 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
2375 }
2376 }
2377 else {
2378 if (non_low != 0)
2379 goto invalid;
2380
2381 PUNFETCH;
2382 up = low; /* {n} : exact n times */
2383 r = 2; /* fixed */
2384 }
2385
2386 if (PEND) goto invalid;
2387 PFETCH(c);
2389 if (c != MC_ESC(env->syntax)) goto invalid;
2390 if (PEND) goto invalid;
2391 PFETCH(c);
2392 }
2393 if (c != '}') goto invalid;
2394
2395 if (!IS_REPEAT_INFINITE(up) && low > up) {
2397 }
2398
2399 tok->type = TK_INTERVAL;
2400 tok->u.repeat.lower = low;
2401 tok->u.repeat.upper = up;
2402 *src = p;
2403 return r; /* 0: normal {n,m}, 2: fixed {n} */
2404
2405 invalid:
2406 if (syn_allow)
2407 return 1; /* OK */
2408 else
2410}
2411
2412/* \M-, \C-, \c, or \... */
2413static int
2414fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
2415{
2416 int v;
2417 OnigCodePoint c;
2418 OnigEncoding enc = env->enc;
2419 UChar* p = *src;
2420
2422
2423 PFETCH_S(c);
2424 switch (c) {
2425 case 'M':
2428 PFETCH_S(c);
2429 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2431 PFETCH_S(c);
2432 if (c == MC_ESC(env->syntax)) {
2433 v = fetch_escaped_value(&p, end, env, &c);
2434 if (v < 0) return v;
2435 }
2436 c = ((c & 0xff) | 0x80);
2437 }
2438 else
2439 goto backslash;
2440 break;
2441
2442 case 'C':
2445 PFETCH_S(c);
2446 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2447 goto control;
2448 }
2449 else
2450 goto backslash;
2451
2452 case 'c':
2454 control:
2456 PFETCH_S(c);
2457 if (c == '?') {
2458 c = 0177;
2459 }
2460 else {
2461 if (c == MC_ESC(env->syntax)) {
2462 v = fetch_escaped_value(&p, end, env, &c);
2463 if (v < 0) return v;
2464 }
2465 c &= 0x9f;
2466 }
2467 break;
2468 }
2469 /* fall through */
2470
2471 default:
2472 {
2473 backslash:
2474 c = conv_backslash_value(c, env);
2475 }
2476 break;
2477 }
2478
2479 *src = p;
2480 *val = c;
2481 return 0;
2482}
2483
2484static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2485
2486static OnigCodePoint
2487get_name_end_code_point(OnigCodePoint start)
2488{
2489 switch (start) {
2490 case '<': return (OnigCodePoint )'>'; break;
2491 case '\'': return (OnigCodePoint )'\''; break;
2492 case '(': return (OnigCodePoint )')'; break;
2493 case '{': return (OnigCodePoint )'}'; break;
2494 default:
2495 break;
2496 }
2497
2498 return (OnigCodePoint )0;
2499}
2500
2501#ifdef USE_NAMED_GROUP
2502# ifdef RUBY
2503# define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2504# else
2505# define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2506# endif
2507
2508# ifdef USE_BACKREF_WITH_LEVEL
2509/*
2510 \k<name+n>, \k<name-n>
2511 \k<num+n>, \k<num-n>
2512 \k<-num+n>, \k<-num-n>
2513*/
2514static int
2515fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2516 UChar** rname_end, ScanEnv* env,
2517 int* rback_num, int* rlevel)
2518{
2519 int r, sign, is_num, exist_level;
2520 OnigCodePoint end_code;
2521 OnigCodePoint c = 0;
2522 OnigEncoding enc = env->enc;
2523 UChar *name_end;
2524 UChar *pnum_head;
2525 UChar *p = *src;
2527
2528 *rback_num = 0;
2529 is_num = exist_level = 0;
2530 sign = 1;
2531 pnum_head = *src;
2532
2533 end_code = get_name_end_code_point(start_code);
2534
2535 name_end = end;
2536 r = 0;
2537 if (PEND) {
2539 }
2540 else {
2541 PFETCH(c);
2542 if (c == end_code)
2544
2545 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2546 is_num = 1;
2547 }
2548 else if (c == '-') {
2549 is_num = 2;
2550 sign = -1;
2551 pnum_head = p;
2552 }
2553 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2555 }
2556 }
2557
2558 while (!PEND) {
2559 name_end = p;
2560 PFETCH(c);
2561 if (c == end_code || c == ')' || c == '+' || c == '-') {
2562 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2563 break;
2564 }
2565
2566 if (is_num != 0) {
2567 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2568 is_num = 1;
2569 }
2570 else {
2572 is_num = 0;
2573 }
2574 }
2575 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2577 }
2578 }
2579
2580 if (r == 0 && c != end_code) {
2581 if (c == '+' || c == '-') {
2582 int level;
2583 int flag = (c == '-' ? -1 : 1);
2584
2585 if (PEND) {
2587 goto end;
2588 }
2589 PFETCH(c);
2590 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2591 PUNFETCH;
2592 level = onig_scan_unsigned_number(&p, end, enc);
2593 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2594 *rlevel = (level * flag);
2595 exist_level = 1;
2596
2597 if (!PEND) {
2598 PFETCH(c);
2599 if (c == end_code)
2600 goto end;
2601 }
2602 }
2603
2604 err:
2606 name_end = end;
2607 }
2608
2609 end:
2610 if (r == 0) {
2611 if (is_num != 0) {
2612 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2613 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2614 else if (*rback_num == 0) goto err;
2615
2616 *rback_num *= sign;
2617 }
2618
2619 *rname_end = name_end;
2620 *src = p;
2621 return (exist_level ? 1 : 0);
2622 }
2623 else {
2624 onig_scan_env_set_error_string(env, r, *src, name_end);
2625 return r;
2626 }
2627}
2628# endif /* USE_BACKREF_WITH_LEVEL */
2629
2630/*
2631 ref: 0 -> define name (don't allow number name)
2632 1 -> reference name (allow number name)
2633*/
2634static int
2635fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2636 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2637{
2638 int r, is_num, sign;
2639 OnigCodePoint end_code;
2640 OnigCodePoint c = 0;
2641 OnigEncoding enc = env->enc;
2642 UChar *name_end;
2643 UChar *pnum_head;
2644 UChar *p = *src;
2645
2646 *rback_num = 0;
2647
2648 end_code = get_name_end_code_point(start_code);
2649
2650 name_end = end;
2651 pnum_head = *src;
2652 r = 0;
2653 is_num = 0;
2654 sign = 1;
2655 if (PEND) {
2657 }
2658 else {
2659 PFETCH_S(c);
2660 if (c == end_code)
2662
2663 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2664 if (ref == 1)
2665 is_num = 1;
2666 else {
2668 is_num = 0;
2669 }
2670 }
2671 else if (c == '-') {
2672 if (ref == 1) {
2673 is_num = 2;
2674 sign = -1;
2675 pnum_head = p;
2676 }
2677 else {
2679 is_num = 0;
2680 }
2681 }
2682 else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2684 }
2685 }
2686
2687 if (r == 0) {
2688 while (!PEND) {
2689 name_end = p;
2690 PFETCH_S(c);
2691 if (c == end_code || c == ')') {
2692 if (is_num == 2) {
2694 goto teardown;
2695 }
2696 break;
2697 }
2698
2699 if (is_num != 0) {
2700 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2701 is_num = 1;
2702 }
2703 else {
2704 if (!ONIGENC_IS_CODE_WORD(enc, c))
2706 else
2708 goto teardown;
2709 }
2710 }
2711 else {
2712 if (!ONIGENC_IS_CODE_NAME(enc, c)) {
2714 goto teardown;
2715 }
2716 }
2717 }
2718
2719 if (c != end_code) {
2721 name_end = end;
2722 goto err;
2723 }
2724
2725 if (is_num != 0) {
2726 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2727 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2728 else if (*rback_num == 0) {
2730 goto err;
2731 }
2732
2733 *rback_num *= sign;
2734 }
2735
2736 *rname_end = name_end;
2737 *src = p;
2738 return 0;
2739 }
2740 else {
2741teardown:
2742 while (!PEND) {
2743 name_end = p;
2744 PFETCH_S(c);
2745 if (c == end_code || c == ')')
2746 break;
2747 }
2748 if (PEND)
2749 name_end = end;
2750
2751 err:
2752 onig_scan_env_set_error_string(env, r, *src, name_end);
2753 return r;
2754 }
2755}
2756#else
2757static int
2758fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2759 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2760{
2761 int r, is_num, sign;
2762 OnigCodePoint end_code;
2763 OnigCodePoint c = 0;
2764 UChar *name_end;
2765 OnigEncoding enc = env->enc;
2766 UChar *pnum_head;
2767 UChar *p = *src;
2769
2770 *rback_num = 0;
2771
2772 end_code = get_name_end_code_point(start_code);
2773
2774 *rname_end = name_end = end;
2775 r = 0;
2776 pnum_head = *src;
2777 is_num = 0;
2778 sign = 1;
2779
2780 if (PEND) {
2782 }
2783 else {
2784 PFETCH(c);
2785 if (c == end_code)
2787
2788 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2789 is_num = 1;
2790 }
2791 else if (c == '-') {
2792 is_num = 2;
2793 sign = -1;
2794 pnum_head = p;
2795 }
2796 else {
2798 }
2799 }
2800
2801 while (!PEND) {
2802 name_end = p;
2803
2804 PFETCH(c);
2805 if (c == end_code || c == ')') break;
2806 if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2808 }
2809 if (r == 0 && c != end_code) {
2811 name_end = end;
2812 }
2813
2814 if (r == 0) {
2815 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2816 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2817 else if (*rback_num == 0) {
2819 goto err;
2820 }
2821 *rback_num *= sign;
2822
2823 *rname_end = name_end;
2824 *src = p;
2825 return 0;
2826 }
2827 else {
2828 err:
2829 onig_scan_env_set_error_string(env, r, *src, name_end);
2830 return r;
2831 }
2832}
2833#endif /* USE_NAMED_GROUP */
2834
2835
2836static void
2837onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
2838{
2839 va_list args;
2841 va_start(args, fmt);
2843 env->pattern, env->pattern_end,
2844 (const UChar *)fmt, args);
2845 va_end(args);
2846#ifdef RUBY
2847 if (env->sourcefile == NULL)
2848 rb_warn("%s", (char *)buf);
2849 else
2850 rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
2851#else
2852 (*onig_warn)((char* )buf);
2853#endif
2854}
2855
2856static void
2857CC_ESC_WARN(ScanEnv *env, UChar *c)
2858{
2859 if (onig_warn == onig_null_warn) return ;
2860
2863 onig_syntax_warn(env, "character class has '%s' without escape", c);
2864 }
2865}
2866
2867static void
2868CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2869{
2870 if (onig_warn == onig_null_warn) return ;
2871
2873 onig_syntax_warn(env, "regular expression has '%s' without escape", c);
2874 }
2875}
2876
2877#ifndef RTEST
2878# define RTEST(v) 1
2879#endif
2880
2881static void
2883{
2884 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2885
2886 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) &&
2887 !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
2888#ifdef WARN_ALL_CC_DUP
2889 onig_syntax_warn(env, "character class has duplicated range: %04x-%04x", from, to);
2890#else
2891 env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
2892 onig_syntax_warn(env, "character class has duplicated range");
2893#endif
2894 }
2895}
2896
2897static void
2898UNKNOWN_ESC_WARN(ScanEnv *env, int c)
2899{
2900 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
2901 onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
2902}
2903
2904static UChar*
2905find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2906 UChar **next, OnigEncoding enc)
2907{
2908 int i;
2909 OnigCodePoint x;
2910 UChar *q;
2911 UChar *p = from;
2912
2913 while (p < to) {
2914 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2915 q = p + enclen(enc, p, to);
2916 if (x == s[0]) {
2917 for (i = 1; i < n && q < to; i++) {
2918 x = ONIGENC_MBC_TO_CODE(enc, q, to);
2919 if (x != s[i]) break;
2920 q += enclen(enc, q, to);
2921 }
2922 if (i >= n) {
2923 if (IS_NOT_NULL(next))
2924 *next = q;
2925 return p;
2926 }
2927 }
2928 p = q;
2929 }
2930 return NULL_UCHARP;
2931}
2932
2933static int
2934str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2935 OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
2936{
2937 int i, in_esc;
2938 OnigCodePoint x;
2939 UChar *q;
2940 UChar *p = from;
2941
2942 in_esc = 0;
2943 while (p < to) {
2944 if (in_esc) {
2945 in_esc = 0;
2946 p += enclen(enc, p, to);
2947 }
2948 else {
2949 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2950 q = p + enclen(enc, p, to);
2951 if (x == s[0]) {
2952 for (i = 1; i < n && q < to; i++) {
2953 x = ONIGENC_MBC_TO_CODE(enc, q, to);
2954 if (x != s[i]) break;
2955 q += enclen(enc, q, to);
2956 }
2957 if (i >= n) return 1;
2958 p += enclen(enc, p, to);
2959 }
2960 else {
2961 x = ONIGENC_MBC_TO_CODE(enc, p, to);
2962 if (x == bad) return 0;
2963 else if (x == MC_ESC(syn)) in_esc = 1;
2964 p = q;
2965 }
2966 }
2967 }
2968 return 0;
2969}
2970
2971static int
2972fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2973{
2974 int num;
2975 OnigCodePoint c, c2;
2976 const OnigSyntaxType* syn = env->syntax;
2977 OnigEncoding enc = env->enc;
2978 UChar* prev;
2979 UChar* p = *src;
2981
2982 if (PEND) {
2983 tok->type = TK_EOT;
2984 return tok->type;
2985 }
2986
2987 PFETCH(c);
2988 tok->type = TK_CHAR;
2989 tok->base = 0;
2990 tok->u.c = c;
2991 tok->escaped = 0;
2992
2993 if (c == ']') {
2994 tok->type = TK_CC_CLOSE;
2995 }
2996 else if (c == '-') {
2997 tok->type = TK_CC_RANGE;
2998 }
2999 else if (c == MC_ESC(syn)) {
3001 goto end;
3002
3004
3005 PFETCH(c);
3006 tok->escaped = 1;
3007 tok->u.c = c;
3008 switch (c) {
3009 case 'w':
3010 tok->type = TK_CHAR_TYPE;
3011 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3012 tok->u.prop.not = 0;
3013 break;
3014 case 'W':
3015 tok->type = TK_CHAR_TYPE;
3016 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3017 tok->u.prop.not = 1;
3018 break;
3019 case 'd':
3020 tok->type = TK_CHAR_TYPE;
3021 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3022 tok->u.prop.not = 0;
3023 break;
3024 case 'D':
3025 tok->type = TK_CHAR_TYPE;
3026 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3027 tok->u.prop.not = 1;
3028 break;
3029 case 's':
3030 tok->type = TK_CHAR_TYPE;
3031 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3032 tok->u.prop.not = 0;
3033 break;
3034 case 'S':
3035 tok->type = TK_CHAR_TYPE;
3036 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3037 tok->u.prop.not = 1;
3038 break;
3039 case 'h':
3040 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3041 tok->type = TK_CHAR_TYPE;
3042 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3043 tok->u.prop.not = 0;
3044 break;
3045 case 'H':
3046 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3047 tok->type = TK_CHAR_TYPE;
3048 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3049 tok->u.prop.not = 1;
3050 break;
3051
3052 case 'p':
3053 case 'P':
3054 if (PEND) break;
3055
3056 c2 = PPEEK;
3057 if (c2 == '{' &&
3059 PINC;
3060 tok->type = TK_CHAR_PROPERTY;
3061 tok->u.prop.not = (c == 'P' ? 1 : 0);
3062
3064 PFETCH(c2);
3065 if (c2 == '^') {
3066 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3067 }
3068 else
3069 PUNFETCH;
3070 }
3071 }
3072 else {
3073 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3074 }
3075 break;
3076
3077 case 'x':
3078 if (PEND) break;
3079
3080 prev = p;
3082 PINC;
3083 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3084 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3085 if (!PEND) {
3086 c2 = PPEEK;
3087 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3089 }
3090
3091 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3092 PINC;
3093 tok->type = TK_CODE_POINT;
3094 tok->base = 16;
3095 tok->u.code = (OnigCodePoint )num;
3096 }
3097 else {
3098 /* can't read nothing or invalid format */
3099 p = prev;
3100 }
3101 }
3102 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3103 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3104 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3105 if (p == prev) { /* can't read nothing. */
3106 num = 0; /* but, it's not error */
3107 }
3108 tok->type = TK_RAW_BYTE;
3109 tok->base = 16;
3110 tok->u.c = num;
3111 }
3112 break;
3113
3114 case 'u':
3115 if (PEND) break;
3116
3117 prev = p;
3119 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3120 if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3121 else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3122 if (p == prev) { /* can't read nothing. */
3123 num = 0; /* but, it's not error */
3124 }
3125 tok->type = TK_CODE_POINT;
3126 tok->base = 16;
3127 tok->u.code = (OnigCodePoint )num;
3128 }
3129 break;
3130
3131 case 'o':
3132 if (PEND) break;
3133
3134 prev = p;
3136 PINC;
3137 num = scan_unsigned_octal_number(&p, end, 11, enc);
3138 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3139 if (!PEND) {
3140 c2 = PPEEK;
3141 if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')
3143 }
3144
3145 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3146 PINC;
3147 tok->type = TK_CODE_POINT;
3148 tok->base = 8;
3149 tok->u.code = (OnigCodePoint )num;
3150 }
3151 else {
3152 /* can't read nothing or invalid format */
3153 p = prev;
3154 }
3155 }
3156 break;
3157
3158 case '0':
3159 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3161 PUNFETCH;
3162 prev = p;
3163 num = scan_unsigned_octal_number(&p, end, 3, enc);
3164 if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3165 if (p == prev) { /* can't read nothing. */
3166 num = 0; /* but, it's not error */
3167 }
3168 tok->type = TK_RAW_BYTE;
3169 tok->base = 8;
3170 tok->u.c = num;
3171 }
3172 break;
3173
3174 default:
3175 PUNFETCH;
3176 num = fetch_escaped_value(&p, end, env, &c2);
3177 if (num < 0) return num;
3178 if ((OnigCodePoint )tok->u.c != c2) {
3179 tok->u.code = (OnigCodePoint )c2;
3180 tok->type = TK_CODE_POINT;
3181 }
3182 break;
3183 }
3184 }
3185 else if (c == '[') {
3186 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3187 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3188 tok->backp = p; /* point at '[' is read */
3189 PINC;
3190 if (str_exist_check_with_esc(send, 2, p, end,
3191 (OnigCodePoint )']', enc, syn)) {
3192 tok->type = TK_POSIX_BRACKET_OPEN;
3193 }
3194 else {
3195 PUNFETCH;
3196 goto cc_in_cc;
3197 }
3198 }
3199 else {
3200 cc_in_cc:
3202 tok->type = TK_CC_CC_OPEN;
3203 }
3204 else {
3205 CC_ESC_WARN(env, (UChar* )"[");
3206 }
3207 }
3208 }
3209 else if (c == '&') {
3211 !PEND && (PPEEK_IS('&'))) {
3212 PINC;
3213 tok->type = TK_CC_AND;
3214 }
3215 }
3216
3217 end:
3218 *src = p;
3219 return tok->type;
3220}
3221
3222#ifdef USE_NAMED_GROUP
3223static int
3224fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
3225 UChar* end, ScanEnv* env)
3226{
3227 int r, num;
3228 const OnigSyntaxType* syn = env->syntax;
3229 UChar* prev;
3230 UChar* p = *src;
3231 UChar* name_end;
3232 int* backs;
3233 int back_num;
3234
3235 prev = p;
3236
3237# ifdef USE_BACKREF_WITH_LEVEL
3238 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3239 r = fetch_name_with_level(c, &p, end, &name_end,
3240 env, &back_num, &tok->u.backref.level);
3241 if (r == 1) tok->u.backref.exist_level = 1;
3242 else tok->u.backref.exist_level = 0;
3243# else
3244 r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3245# endif
3246 if (r < 0) return r;
3247
3248 if (back_num != 0) {
3249 if (back_num < 0) {
3250 back_num = BACKREF_REL_TO_ABS(back_num, env);
3251 if (back_num <= 0)
3253 }
3254
3256 if (back_num > env->num_mem ||
3257 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3259 }
3260 tok->type = TK_BACKREF;
3261 tok->u.backref.by_name = 0;
3262 tok->u.backref.num = 1;
3263 tok->u.backref.ref1 = back_num;
3264 }
3265 else {
3266 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3267 if (num <= 0) {
3269 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3271 }
3273 int i;
3274 for (i = 0; i < num; i++) {
3275 if (backs[i] > env->num_mem ||
3276 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3278 }
3279 }
3280
3281 tok->type = TK_BACKREF;
3282 tok->u.backref.by_name = 1;
3284 tok->u.backref.num = 1;
3285 tok->u.backref.ref1 = backs[0];
3286 }
3287 else {
3288 tok->u.backref.num = num;
3289 tok->u.backref.refs = backs;
3290 }
3291 }
3292 *src = p;
3293 return 0;
3294}
3295#endif
3296
3297static int
3298fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3299{
3300 int r, num;
3301 OnigCodePoint c;
3302 OnigEncoding enc = env->enc;
3303 const OnigSyntaxType* syn = env->syntax;
3304 UChar* prev;
3305 UChar* p = *src;
3307
3308 start:
3309 if (PEND) {
3310 tok->type = TK_EOT;
3311 return tok->type;
3312 }
3313
3314 tok->type = TK_STRING;
3315 tok->base = 0;
3316 tok->backp = p;
3317
3318 PFETCH(c);
3319 if (IS_MC_ESC_CODE(c, syn)) {
3321
3322 tok->backp = p;
3323 PFETCH(c);
3324
3325 tok->u.c = c;
3326 tok->escaped = 1;
3327 switch (c) {
3328 case '*':
3330 tok->type = TK_OP_REPEAT;
3331 tok->u.repeat.lower = 0;
3332 tok->u.repeat.upper = REPEAT_INFINITE;
3333 goto greedy_check;
3334 break;
3335
3336 case '+':
3337 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3338 tok->type = TK_OP_REPEAT;
3339 tok->u.repeat.lower = 1;
3340 tok->u.repeat.upper = REPEAT_INFINITE;
3341 goto greedy_check;
3342 break;
3343
3344 case '?':
3346 tok->type = TK_OP_REPEAT;
3347 tok->u.repeat.lower = 0;
3348 tok->u.repeat.upper = 1;
3349 greedy_check:
3350 if (!PEND && PPEEK_IS('?') &&
3352 PFETCH(c);
3353 tok->u.repeat.greedy = 0;
3354 tok->u.repeat.possessive = 0;
3355 }
3356 else {
3357 possessive_check:
3358 if (!PEND && PPEEK_IS('+') &&
3360 tok->type != TK_INTERVAL) ||
3362 tok->type == TK_INTERVAL))) {
3363 PFETCH(c);
3364 tok->u.repeat.greedy = 1;
3365 tok->u.repeat.possessive = 1;
3366 }
3367 else {
3368 tok->u.repeat.greedy = 1;
3369 tok->u.repeat.possessive = 0;
3370 }
3371 }
3372 break;
3373
3374 case '{':
3376 r = fetch_range_quantifier(&p, end, tok, env);
3377 if (r < 0) return r; /* error */
3378 if (r == 0) goto greedy_check;
3379 else if (r == 2) { /* {n} */
3381 goto possessive_check;
3382
3383 goto greedy_check;
3384 }
3385 /* r == 1 : normal char */
3386 break;
3387
3388 case '|':
3389 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3390 tok->type = TK_ALT;
3391 break;
3392
3393 case '(':
3395 tok->type = TK_SUBEXP_OPEN;
3396 break;
3397
3398 case ')':
3400 tok->type = TK_SUBEXP_CLOSE;
3401 break;
3402
3403 case 'w':
3404 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3405 tok->type = TK_CHAR_TYPE;
3406 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3407 tok->u.prop.not = 0;
3408 break;
3409
3410 case 'W':
3411 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3412 tok->type = TK_CHAR_TYPE;
3413 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3414 tok->u.prop.not = 1;
3415 break;
3416
3417 case 'b':
3418 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3419 tok->type = TK_ANCHOR;
3420 tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
3421 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3422 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3423 break;
3424
3425 case 'B':
3426 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3427 tok->type = TK_ANCHOR;
3428 tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
3429 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3430 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3431 break;
3432
3433#ifdef USE_WORD_BEGIN_END
3434 case '<':
3436 tok->type = TK_ANCHOR;
3437 tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
3438 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3439 break;
3440
3441 case '>':
3443 tok->type = TK_ANCHOR;
3444 tok->u.anchor.subtype = ANCHOR_WORD_END;
3445 tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3446 break;
3447#endif
3448
3449 case 's':
3451 tok->type = TK_CHAR_TYPE;
3452 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3453 tok->u.prop.not = 0;
3454 break;
3455
3456 case 'S':
3458 tok->type = TK_CHAR_TYPE;
3459 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3460 tok->u.prop.not = 1;
3461 break;
3462
3463 case 'd':
3464 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3465 tok->type = TK_CHAR_TYPE;
3466 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3467 tok->u.prop.not = 0;
3468 break;
3469
3470 case 'D':
3471 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3472 tok->type = TK_CHAR_TYPE;
3473 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3474 tok->u.prop.not = 1;
3475 break;
3476
3477 case 'h':
3478 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3479 tok->type = TK_CHAR_TYPE;
3480 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3481 tok->u.prop.not = 0;
3482 break;
3483
3484 case 'H':
3485 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3486 tok->type = TK_CHAR_TYPE;
3487 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3488 tok->u.prop.not = 1;
3489 break;
3490
3491 case 'A':
3493 begin_buf:
3494 tok->type = TK_ANCHOR;
3495 tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
3496 break;
3497
3498 case 'Z':
3500 tok->type = TK_ANCHOR;
3501 tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
3502 break;
3503
3504 case 'z':
3506 end_buf:
3507 tok->type = TK_ANCHOR;
3508 tok->u.anchor.subtype = ANCHOR_END_BUF;
3509 break;
3510
3511 case 'G':
3513 tok->type = TK_ANCHOR;
3514 tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
3515 break;
3516
3517 case '`':
3519 goto begin_buf;
3520 break;
3521
3522 case '\'':
3524 goto end_buf;
3525 break;
3526
3527 case 'x':
3528 if (PEND) break;
3529
3530 prev = p;
3532 PINC;
3533 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3534 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3535 if (!PEND) {
3536 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3538 }
3539
3540 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3541 PINC;
3542 tok->type = TK_CODE_POINT;
3543 tok->u.code = (OnigCodePoint )num;
3544 }
3545 else {
3546 /* can't read nothing or invalid format */
3547 p = prev;
3548 }
3549 }
3550 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3551 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3552 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3553 if (p == prev) { /* can't read nothing. */
3554 num = 0; /* but, it's not error */
3555 }
3556 tok->type = TK_RAW_BYTE;
3557 tok->base = 16;
3558 tok->u.c = num;
3559 }
3560 break;
3561
3562 case 'u':
3563 if (PEND) break;
3564
3565 prev = p;
3567 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3568 if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3569 else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3570 if (p == prev) { /* can't read nothing. */
3571 num = 0; /* but, it's not error */
3572 }
3573 tok->type = TK_CODE_POINT;
3574 tok->base = 16;
3575 tok->u.code = (OnigCodePoint )num;
3576 }
3577 break;
3578
3579 case 'o':
3580 if (PEND) break;
3581
3582 prev = p;
3584 PINC;
3585 num = scan_unsigned_octal_number(&p, end, 11, enc);
3586 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3587 if (!PEND) {
3588 OnigCodePoint c = PPEEK;
3589 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')
3591 }
3592
3593 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3594 PINC;
3595 tok->type = TK_CODE_POINT;
3596 tok->u.code = (OnigCodePoint )num;
3597 }
3598 else {
3599 /* can't read nothing or invalid format */
3600 p = prev;
3601 }
3602 }
3603 break;
3604
3605 case '1': case '2': case '3': case '4':
3606 case '5': case '6': case '7': case '8': case '9':
3607 PUNFETCH;
3608 prev = p;
3609 num = onig_scan_unsigned_number(&p, end, enc);
3610 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3611 goto skip_backref;
3612 }
3613
3615 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3617 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3619 }
3620
3621 tok->type = TK_BACKREF;
3622 tok->u.backref.num = 1;
3623 tok->u.backref.ref1 = num;
3624 tok->u.backref.by_name = 0;
3625#ifdef USE_BACKREF_WITH_LEVEL
3626 tok->u.backref.exist_level = 0;
3627#endif
3628 break;
3629 }
3630
3631 skip_backref:
3632 if (c == '8' || c == '9') {
3633 /* normal char */
3634 p = prev; PINC;
3635 break;
3636 }
3637
3638 p = prev;
3639 /* fall through */
3640 case '0':
3642 prev = p;
3643 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3644 if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;
3645 if (p == prev) { /* can't read nothing. */
3646 num = 0; /* but, it's not error */
3647 }
3648 tok->type = TK_RAW_BYTE;
3649 tok->base = 8;
3650 tok->u.c = num;
3651 }
3652 else if (c != '0') {
3653 PINC;
3654 }
3655 break;
3656
3657#ifdef USE_NAMED_GROUP
3658 case 'k':
3660 PFETCH(c);
3661 if (c == '<' || c == '\'') {
3662 r = fetch_named_backref_token(c, tok, &p, end, env);
3663 if (r < 0) return r;
3664 }
3665 else {
3666 PUNFETCH;
3667 onig_syntax_warn(env, "invalid back reference");
3668 }
3669 }
3670 break;
3671#endif
3672
3673#if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3674 case 'g':
3675# ifdef USE_NAMED_GROUP
3677 PFETCH(c);
3678 if (c == '{') {
3679 r = fetch_named_backref_token(c, tok, &p, end, env);
3680 if (r < 0) return r;
3681 }
3682 else
3683 PUNFETCH;
3684 }
3685# endif
3686# ifdef USE_SUBEXP_CALL
3688 PFETCH(c);
3689 if (c == '<' || c == '\'') {
3690 int gnum = -1, rel = 0;
3691 UChar* name_end;
3692 OnigCodePoint cnext;
3693
3694 cnext = PPEEK;
3695 if (cnext == '0') {
3696 PINC;
3697 if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */
3698 PINC;
3699 name_end = p;
3700 gnum = 0;
3701 }
3702 }
3703 else if (cnext == '+') {
3704 PINC;
3705 rel = 1;
3706 }
3707 prev = p;
3708 if (gnum < 0) {
3709 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3710 if (r < 0) return r;
3711 }
3712
3713 tok->type = TK_CALL;
3714 tok->u.call.name = prev;
3715 tok->u.call.name_end = name_end;
3716 tok->u.call.gnum = gnum;
3717 tok->u.call.rel = rel;
3718 }
3719 else {
3720 onig_syntax_warn(env, "invalid subexp call");
3721 PUNFETCH;
3722 }
3723 }
3724# endif
3725 break;
3726#endif
3727
3728 case 'Q':
3730 tok->type = TK_QUOTE_OPEN;
3731 }
3732 break;
3733
3734 case 'p':
3735 case 'P':
3736 if (PPEEK_IS('{') &&
3738 PINC;
3739 tok->type = TK_CHAR_PROPERTY;
3740 tok->u.prop.not = (c == 'P' ? 1 : 0);
3741
3743 PFETCH(c);
3744 if (c == '^') {
3745 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3746 }
3747 else
3748 PUNFETCH;
3749 }
3750 }
3751 else {
3752 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3753 }
3754 break;
3755
3756 case 'R':
3758 tok->type = TK_LINEBREAK;
3759 }
3760 break;
3761
3762 case 'X':
3765 }
3766 break;
3767
3768 case 'K':
3770 tok->type = TK_KEEP;
3771 }
3772 break;
3773
3774 default:
3775 {
3776 OnigCodePoint c2;
3777
3778 PUNFETCH;
3779 num = fetch_escaped_value(&p, end, env, &c2);
3780 if (num < 0) return num;
3781 /* set_raw: */
3782 if ((OnigCodePoint )tok->u.c != c2) {
3783 tok->type = TK_CODE_POINT;
3784 tok->u.code = (OnigCodePoint )c2;
3785 }
3786 else { /* string */
3787 p = tok->backp + enclen(enc, tok->backp, end);
3788 }
3789 }
3790 break;
3791 }
3792 }
3793 else {
3794 tok->u.c = c;
3795 tok->escaped = 0;
3796
3797#ifdef USE_VARIABLE_META_CHARS
3798 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3800 if (c == MC_ANYCHAR(syn))
3801 goto any_char;
3802 else if (c == MC_ANYTIME(syn))
3803 goto anytime;
3804 else if (c == MC_ZERO_OR_ONE_TIME(syn))
3805 goto zero_or_one_time;
3806 else if (c == MC_ONE_OR_MORE_TIME(syn))
3807 goto one_or_more_time;
3808 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3809 tok->type = TK_ANYCHAR_ANYTIME;
3810 goto out;
3811 }
3812 }
3813#endif
3814
3815 switch (c) {
3816 case '.':
3817 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3818#ifdef USE_VARIABLE_META_CHARS
3819 any_char:
3820#endif
3821 tok->type = TK_ANYCHAR;
3822 break;
3823
3824 case '*':
3826#ifdef USE_VARIABLE_META_CHARS
3827 anytime:
3828#endif
3829 tok->type = TK_OP_REPEAT;
3830 tok->u.repeat.lower = 0;
3831 tok->u.repeat.upper = REPEAT_INFINITE;
3832 goto greedy_check;
3833 break;
3834
3835 case '+':
3836 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3837#ifdef USE_VARIABLE_META_CHARS
3838 one_or_more_time:
3839#endif
3840 tok->type = TK_OP_REPEAT;
3841 tok->u.repeat.lower = 1;
3842 tok->u.repeat.upper = REPEAT_INFINITE;
3843 goto greedy_check;
3844 break;
3845
3846 case '?':
3847 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3848#ifdef USE_VARIABLE_META_CHARS
3849 zero_or_one_time:
3850#endif
3851 tok->type = TK_OP_REPEAT;
3852 tok->u.repeat.lower = 0;
3853 tok->u.repeat.upper = 1;
3854 goto greedy_check;
3855 break;
3856
3857 case '{':
3858 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3859 r = fetch_range_quantifier(&p, end, tok, env);
3860 if (r < 0) return r; /* error */
3861 if (r == 0) goto greedy_check;
3862 else if (r == 2) { /* {n} */
3864 goto possessive_check;
3865
3866 goto greedy_check;
3867 }
3868 /* r == 1 : normal char */
3869 break;
3870
3871 case '|':
3872 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3873 tok->type = TK_ALT;
3874 break;
3875
3876 case '(':
3877 if (PPEEK_IS('?') &&
3879 PINC;
3880 if (PPEEK_IS('#')) {
3881 PFETCH(c);
3882 while (1) {
3884 PFETCH(c);
3885 if (c == MC_ESC(syn)) {
3886 if (!PEND) PFETCH(c);
3887 }
3888 else {
3889 if (c == ')') break;
3890 }
3891 }
3892 goto start;
3893 }
3894#ifdef USE_PERL_SUBEXP_CALL
3895 /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
3896 c = PPEEK;
3897 if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
3899 /* (?&name), (?n), (?R), (?0) */
3900 int gnum;
3901 UChar *name;
3902 UChar *name_end;
3903
3904 if (c == 'R' || c == '0') {
3905 PINC; /* skip 'R' / '0' */
3906 if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
3907 PINC; /* skip ')' */
3908 name_end = name = p;
3909 gnum = 0;
3910 }
3911 else {
3912 int numref = 1;
3913 if (c == '&') { /* (?&name) */
3914 PINC;
3915 numref = 0; /* don't allow number name */
3916 }
3917 name = p;
3918 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
3919 if (r < 0) return r;
3920 }
3921
3922 tok->type = TK_CALL;
3923 tok->u.call.name = name;
3924 tok->u.call.name_end = name_end;
3925 tok->u.call.gnum = gnum;
3926 tok->u.call.rel = 0;
3927 break;
3928 }
3929 else if ((c == '-' || c == '+') &&
3931 /* (?+n), (?-n) */
3932 int gnum;
3933 UChar *name;
3934 UChar *name_end;
3935 OnigCodePoint cnext;
3937
3938 PINC; /* skip '-' / '+' */
3939 cnext = PPEEK;
3940 if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
3941 if (c == '-') PUNFETCH;
3942 name = p;
3943 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
3944 if (r < 0) return r;
3945
3946 tok->type = TK_CALL;
3947 tok->u.call.name = name;
3948 tok->u.call.name_end = name_end;
3949 tok->u.call.gnum = gnum;
3950 tok->u.call.rel = 1;
3951 break;
3952 }
3953 }
3954#endif /* USE_PERL_SUBEXP_CALL */
3955#ifdef USE_CAPITAL_P_NAMED_GROUP
3956 if (PPEEK_IS('P') &&
3958 int gnum;
3959 UChar *name;
3960 UChar *name_end;
3962
3963 PINC; /* skip 'P' */
3965 PFETCH(c);
3966 if (c == '=') { /* (?P=name): backref */
3967 r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
3968 if (r < 0) return r;
3969 break;
3970 }
3971 else if (c == '>') { /* (?P>name): subexp call */
3972 name = p;
3973 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
3974 if (r < 0) return r;
3975
3976 tok->type = TK_CALL;
3977 tok->u.call.name = name;
3978 tok->u.call.name_end = name_end;
3979 tok->u.call.gnum = gnum;
3980 tok->u.call.rel = 0;
3981 break;
3982 }
3983 }
3984#endif /* USE_CAPITAL_P_NAMED_GROUP */
3985 PUNFETCH;
3986 }
3987
3988 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3989 tok->type = TK_SUBEXP_OPEN;
3990 break;
3991
3992 case ')':
3993 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3994 tok->type = TK_SUBEXP_CLOSE;
3995 break;
3996
3997 case '^':
3998 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3999 tok->type = TK_ANCHOR;
4000 tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4002 break;
4003
4004 case '$':
4005 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
4006 tok->type = TK_ANCHOR;
4007 tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
4009 break;
4010
4011 case '[':
4012 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
4013 tok->type = TK_CC_OPEN;
4014 break;
4015
4016 case ']':
4017 if (*src > env->pattern) /* /].../ is allowed. */
4018 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
4019 break;
4020
4021 case '#':
4022 if (IS_EXTEND(env->option)) {
4023 while (!PEND) {
4024 PFETCH(c);
4025 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
4026 break;
4027 }
4028 goto start;
4029 break;
4030 }
4031 break;
4032
4033 case ' ': case '\t': case '\n': case '\r': case '\f':
4034 if (IS_EXTEND(env->option))
4035 goto start;
4036 break;
4037
4038 default:
4039 /* string */
4040 break;
4041 }
4042 }
4043
4044#ifdef USE_VARIABLE_META_CHARS
4045 out:
4046#endif
4047 *src = p;
4048 return tok->type;
4049}
4050
4051static int
4052add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
4053 ScanEnv* env,
4054 OnigCodePoint sb_out, const OnigCodePoint mbr[])
4055{
4056 int i, r;
4057 OnigCodePoint j;
4058
4059 int n = ONIGENC_CODE_RANGE_NUM(mbr);
4060
4061 if (not == 0) {
4062 for (i = 0; i < n; i++) {
4063 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
4064 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
4065 if (j >= sb_out) {
4066 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4067 r = add_code_range_to_buf(&(cc->mbuf), env, j,
4068 ONIGENC_CODE_RANGE_TO(mbr, i));
4069 if (r != 0) return r;
4070 i++;
4071 }
4072
4073 goto sb_end;
4074 }
4075 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4076 }
4077 }
4078
4079 sb_end:
4080 for ( ; i < n; i++) {
4081 r = add_code_range_to_buf(&(cc->mbuf), env,
4083 ONIGENC_CODE_RANGE_TO(mbr, i));
4084 if (r != 0) return r;
4085 }
4086 }
4087 else {
4088 OnigCodePoint prev = 0;
4089
4090 for (i = 0; i < n; i++) {
4091 for (j = prev;
4092 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
4093 if (j >= sb_out) {
4094 goto sb_end2;
4095 }
4096 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4097 }
4098 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4099 }
4100 for (j = prev; j < sb_out; j++) {
4101 BITSET_SET_BIT_CHKDUP(cc->bs, j);
4102 }
4103
4104 sb_end2:
4105 prev = sb_out;
4106
4107 for (i = 0; i < n; i++) {
4108 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4109 r = add_code_range_to_buf(&(cc->mbuf), env, prev,
4110 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
4111 if (r != 0) return r;
4112 }
4113 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4114 }
4115 if (prev < 0x7fffffff) {
4116 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
4117 if (r != 0) return r;
4118 }
4119 }
4120
4121 return 0;
4122}
4123
4124static int
4125add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
4126{
4127 int maxcode;
4128 int c, r;
4129 const OnigCodePoint *ranges;
4130 OnigCodePoint sb_out;
4131 OnigEncoding enc = env->enc;
4132
4133 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
4134 if (r == 0) {
4135 if (ascii_range) {
4136 CClassNode ccwork;
4137 initialize_cclass(&ccwork);
4138 r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
4139 ranges);
4140 if (r == 0) {
4141 if (not) {
4142 r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
4143 }
4144 else {
4145 CClassNode ccascii;
4146 initialize_cclass(&ccascii);
4147 if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
4148 r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
4149 }
4150 else {
4151 bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
4152 r = 0;
4153 }
4154 if (r == 0) {
4155 r = and_cclass(&ccwork, &ccascii, env);
4156 }
4157 if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
4158 }
4159 if (r == 0) {
4160 r = or_cclass(cc, &ccwork, env);
4161 }
4162 if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
4163 }
4164 }
4165 else {
4166 r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
4167 }
4168 return r;
4169 }
4170 else if (r != ONIG_NO_SUPPORT_CONFIG) {
4171 return r;
4172 }
4173
4174 maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4175 r = 0;
4176 switch (ctype) {
4188 if (not != 0) {
4189 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4190 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4191 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4192 }
4194 }
4195 else {
4196 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4197 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4198 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4199 }
4200 }
4201 break;
4202
4205 if (not != 0) {
4206 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4207 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
4208 || c >= maxcode)
4209 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4210 }
4211 if (ascii_range)
4213 }
4214 else {
4215 for (c = 0; c < maxcode; c++) {
4216 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4217 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4218 }
4219 if (! ascii_range)
4221 }
4222 break;
4223
4224 case ONIGENC_CTYPE_WORD:
4225 if (not == 0) {
4226 for (c = 0; c < maxcode; c++) {
4227 if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
4228 }
4229 if (! ascii_range)
4231 }
4232 else {
4233 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4234 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
4235 && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
4236 BITSET_SET_BIT_CHKDUP(cc->bs, c);
4237 }
4238 if (ascii_range)
4240 }
4241 break;
4242
4243 default:
4244 return ONIGERR_PARSER_BUG;
4245 break;
4246 }
4247
4248 return r;
4249}
4250
4251static int
4252parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
4253 UChar** src, UChar* end, ScanEnv* env)
4254{
4255#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4256#define POSIX_BRACKET_NAME_MIN_LEN 4
4257
4258 static const PosixBracketEntryType PBS[] = {
4273 };
4274
4275 const PosixBracketEntryType *pb;
4276 int not, i, r;
4277 int ascii_range;
4278 OnigCodePoint c;
4279 OnigEncoding enc = env->enc;
4280 UChar *p = *src;
4281
4282 if (PPEEK_IS('^')) {
4283 PINC_S;
4284 not = 1;
4285 }
4286 else
4287 not = 0;
4288
4289 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
4290 goto not_posix_bracket;
4291
4292 ascii_range = IS_ASCII_RANGE(env->option) &&
4294 for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
4295 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
4296 p = (UChar* )onigenc_step(enc, p, end, pb->len);
4297 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
4299
4300 r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
4301 if (r != 0) return r;
4302
4303 if (IS_NOT_NULL(asc_cc)) {
4304 if (pb->ctype != ONIGENC_CTYPE_WORD &&
4305 pb->ctype != ONIGENC_CTYPE_ASCII &&
4306 !ascii_range)
4307 r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
4308 if (r != 0) return r;
4309 }
4310
4311 PINC_S; PINC_S;
4312 *src = p;
4313 return 0;
4314 }
4315 }
4316
4317 not_posix_bracket:
4318 c = 0;
4319 i = 0;
4320 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
4321 PINC_S;
4322 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
4323 }
4324 if (c == ':' && ! PEND) {
4325 PINC_S;
4326 if (! PEND) {
4327 PFETCH_S(c);
4328 if (c == ']')
4330 }
4331 }
4332
4333 return 1; /* 1: is not POSIX bracket, but no error. */
4334}
4335
4336static int
4337fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
4338{
4339 int r;
4340 OnigCodePoint c;
4341 OnigEncoding enc = env->enc;
4342 UChar *prev, *start, *p = *src;
4343
4345 start = prev = p;
4346
4347 while (!PEND) {
4348 prev = p;
4349 PFETCH_S(c);
4350 if (c == '}') {
4351 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4352 if (r < 0) break;
4353
4354 *src = p;
4355 return r;
4356 }
4357 else if (c == '(' || c == ')' || c == '{' || c == '|') {
4358 break;
4359 }
4360 }
4361
4362 onig_scan_env_set_error_string(env, r, *src, prev);
4363 return r;
4364}
4365
4366static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
4367
4368static int
4369parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4370 ScanEnv* env)
4371{
4372 int r, ctype;
4373 CClassNode* cc;
4374
4375 ctype = fetch_char_property_to_ctype(src, end, env);
4376 if (ctype < 0) return ctype;
4377
4378 *np = node_new_cclass();
4380 cc = NCCLASS(*np);
4381 r = add_ctype_to_cc(cc, ctype, 0, 0, env);
4382 if (r != 0) return r;
4383 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4384
4385 if (IS_IGNORECASE(env->option)) {
4386 if (ctype != ONIGENC_CTYPE_ASCII)
4387 r = cclass_case_fold(np, cc, cc, env);
4388 }
4389 return r;
4390}
4391
4392
4397 CCS_START
4399
4403 CCV_CLASS
4405
4406static int
4407next_state_class(CClassNode* cc, CClassNode* asc_cc,
4408 OnigCodePoint* vs, enum CCVALTYPE* type,
4409 enum CCSTATE* state, ScanEnv* env)
4410{
4411 int r;
4412
4413 if (*state == CCS_RANGE)
4415
4416 if (*state == CCS_VALUE && *type != CCV_CLASS) {
4417 if (*type == CCV_SB) {
4418 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4419 if (IS_NOT_NULL(asc_cc))
4420 BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
4421 }
4422 else if (*type == CCV_CODE_POINT) {
4423 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4424 if (r < 0) return r;
4425 if (IS_NOT_NULL(asc_cc)) {
4426 r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
4427 if (r < 0) return r;
4428 }
4429 }
4430 }
4431
4432 *state = CCS_VALUE;
4433 *type = CCV_CLASS;
4434 return 0;
4435}
4436
4437static int
4438next_state_val(CClassNode* cc, CClassNode* asc_cc,
4439 OnigCodePoint *from, OnigCodePoint to,
4440 int* from_israw, int to_israw,
4441 enum CCVALTYPE intype, enum CCVALTYPE* type,
4442 enum CCSTATE* state, ScanEnv* env)
4443{
4444 int r;
4445
4446 switch (*state) {
4447 case CCS_VALUE:
4448 if (*type == CCV_SB) {
4449 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from));
4450 if (IS_NOT_NULL(asc_cc))
4451 BITSET_SET_BIT(asc_cc->bs, (int )(*from));
4452 }
4453 else if (*type == CCV_CODE_POINT) {
4454 r = add_code_range(&(cc->mbuf), env, *from, *from);
4455 if (r < 0) return r;
4456 if (IS_NOT_NULL(asc_cc)) {
4457 r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0);
4458 if (r < 0) return r;
4459 }
4460 }
4461 break;
4462
4463 case CCS_RANGE:
4464 if (intype == *type) {
4465 if (intype == CCV_SB) {
4466 if (*from > 0xff || to > 0xff)
4468
4469 if (*from > to) {
4471 goto ccs_range_end;
4472 else
4474 }
4475 bitset_set_range(env, cc->bs, (int )*from, (int )to);
4476 if (IS_NOT_NULL(asc_cc))
4477 bitset_set_range(env, asc_cc->bs, (int )*from, (int )to);
4478 }
4479 else {
4480 r = add_code_range(&(cc->mbuf), env, *from, to);
4481 if (r < 0) return r;
4482 if (IS_NOT_NULL(asc_cc)) {
4483 r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0);
4484 if (r < 0) return r;
4485 }
4486 }
4487 }
4488 else {
4489 if (*from > to) {
4491 goto ccs_range_end;
4492 else
4494 }
4495 bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4496 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
4497 if (r < 0) return r;
4498 if (IS_NOT_NULL(asc_cc)) {
4499 bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
4500 r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0);
4501 if (r < 0) return r;
4502 }
4503 }
4504 ccs_range_end:
4506 break;
4507
4508 case CCS_COMPLETE:
4509 case CCS_START:
4510 *state = CCS_VALUE;
4511 break;
4512
4513 default:
4514 break;
4515 }
4516
4517 *from_israw = to_israw;
4518 *from = to;
4519 *type = intype;
4520 return 0;
4521}
4522
4523static int
4524code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4525 ScanEnv* env)
4526{
4527 int in_esc;
4529 OnigEncoding enc = env->enc;
4530 UChar* p = from;
4531
4532 in_esc = 0;
4533 while (! PEND) {
4534 if (ignore_escaped && in_esc) {
4535 in_esc = 0;
4536 }
4537 else {
4538 PFETCH_S(code);
4539 if (code == c) return 1;
4540 if (code == MC_ESC(env->syntax)) in_esc = 1;
4541 }
4542 }
4543 return 0;
4544}
4545
4546static int
4547parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
4548 ScanEnv* env)
4549{
4550 int r, neg, len, fetched, and_start;
4551 OnigCodePoint v, vs;
4552 UChar *p;
4553 Node* node;
4554 Node* asc_node;
4555 CClassNode *cc, *prev_cc;
4556 CClassNode *asc_cc, *asc_prev_cc;
4557 CClassNode work_cc, asc_work_cc;
4558
4559 enum CCSTATE state;
4560 enum CCVALTYPE val_type, in_type;
4561 int val_israw, in_israw;
4562
4563 *np = *asc_np = NULL_NODE;
4564 env->parse_depth++;
4565 if (env->parse_depth > ParseDepthLimit)
4567 prev_cc = asc_prev_cc = (CClassNode* )NULL;
4568 r = fetch_token_in_cc(tok, src, end, env);
4569 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4570 neg = 1;
4571 r = fetch_token_in_cc(tok, src, end, env);
4572 }
4573 else {
4574 neg = 0;
4575 }
4576
4577 if (r < 0) return r;
4578 if (r == TK_CC_CLOSE) {
4579 if (! code_exist_check((OnigCodePoint )']',
4580 *src, env->pattern_end, 1, env))
4582
4583 CC_ESC_WARN(env, (UChar* )"]");
4584 r = tok->type = TK_CHAR; /* allow []...] */
4585 }
4586
4587 *np = node = node_new_cclass();
4589 cc = NCCLASS(node);
4590
4591 if (IS_IGNORECASE(env->option)) {
4592 *asc_np = asc_node = node_new_cclass();
4593 CHECK_NULL_RETURN_MEMERR(asc_node);
4594 asc_cc = NCCLASS(asc_node);
4595 }
4596 else {
4597 asc_node = NULL_NODE;
4598 asc_cc = NULL;
4599 }
4600
4601 and_start = 0;
4602 state = CCS_START;
4603 p = *src;
4604 while (r != TK_CC_CLOSE) {
4605 fetched = 0;
4606 switch (r) {
4607 case TK_CHAR:
4608 if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
4609 (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
4610 in_type = CCV_CODE_POINT;
4611 }
4612 else if (len < 0) {
4613 r = len;
4614 goto err;
4615 }
4616 else {
4617 sb_char:
4618 in_type = CCV_SB;
4619 }
4620 v = (OnigCodePoint )tok->u.c;
4621 in_israw = 0;
4622 goto val_entry2;
4623 break;
4624
4625 case TK_RAW_BYTE:
4626 /* tok->base != 0 : octal or hexadec. */
4627 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4630 UChar* psave = p;
4631 int i, base = tok->base;
4632
4633 buf[0] = (UChar )tok->u.c;
4634 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4635 r = fetch_token_in_cc(tok, &p, end, env);
4636 if (r < 0) goto err;
4637 if (r != TK_RAW_BYTE || tok->base != base) {
4638 fetched = 1;
4639 break;
4640 }
4641 buf[i] = (UChar )tok->u.c;
4642 }
4643
4644 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4646 goto err;
4647 }
4648
4649 len = enclen(env->enc, buf, buf + i);
4650 if (i < len) {
4652 goto err;
4653 }
4654 else if (i > len) { /* fetch back */
4655 p = psave;
4656 for (i = 1; i < len; i++) {
4657 (void)fetch_token_in_cc(tok, &p, end, env);
4658 /* no need to check the return value (already checked above) */
4659 }
4660 fetched = 0;
4661 }
4662
4663 if (i == 1) {
4664 v = (OnigCodePoint )buf[0];
4665 goto raw_single;
4666 }
4667 else {
4668 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4669 in_type = CCV_CODE_POINT;
4670 }
4671 }
4672 else {
4673 v = (OnigCodePoint )tok->u.c;
4674 raw_single:
4675 in_type = CCV_SB;
4676 }
4677 in_israw = 1;
4678 goto val_entry2;
4679 break;
4680
4681 case TK_CODE_POINT:
4682 v = tok->u.code;
4683 in_israw = 1;
4684 val_entry:
4685 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4686 if (len < 0) {
4687 r = len;
4688 goto err;
4689 }
4690 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4691 val_entry2:
4692 r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4693 &state, env);
4694 if (r != 0) goto err;
4695 break;
4696
4698 r = parse_posix_bracket(cc, asc_cc, &p, end, env);
4699 if (r < 0) goto err;
4700 if (r == 1) { /* is not POSIX bracket */
4701 CC_ESC_WARN(env, (UChar* )"[");
4702 p = tok->backp;
4703 v = (OnigCodePoint )tok->u.c;
4704 in_israw = 0;
4705 goto val_entry;
4706 }
4707 goto next_class;
4708 break;
4709
4710 case TK_CHAR_TYPE:
4711 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
4712 IS_ASCII_RANGE(env->option), env);
4713 if (r != 0) return r;
4714 if (IS_NOT_NULL(asc_cc)) {
4715 if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
4716 r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
4717 IS_ASCII_RANGE(env->option), env);
4718 if (r != 0) return r;
4719 }
4720
4721 next_class:
4722 r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
4723 if (r != 0) goto err;
4724 break;
4725
4726 case TK_CHAR_PROPERTY:
4727 {
4728 int ctype;
4729
4730 ctype = fetch_char_property_to_ctype(&p, end, env);
4731 if (ctype < 0) return ctype;
4732 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
4733 if (r != 0) return r;
4734 if (IS_NOT_NULL(asc_cc)) {
4735 if (ctype != ONIGENC_CTYPE_ASCII)
4736 r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
4737 if (r != 0) return r;
4738 }
4739 goto next_class;
4740 }
4741 break;
4742
4743 case TK_CC_RANGE:
4744 if (state == CCS_VALUE) {
4745 r = fetch_token_in_cc(tok, &p, end, env);
4746 if (r < 0) goto err;
4747 fetched = 1;
4748 if (r == TK_CC_CLOSE) { /* allow [x-] */
4749 range_end_val:
4750 v = (OnigCodePoint )'-';
4751 in_israw = 0;
4752 goto val_entry;
4753 }
4754 else if (r == TK_CC_AND) {
4755 CC_ESC_WARN(env, (UChar* )"-");
4756 goto range_end_val;
4757 }
4758
4759 if (val_type == CCV_CLASS) {
4761 goto err;
4762 }
4763
4764 state = CCS_RANGE;
4765 }
4766 else if (state == CCS_START) {
4767 /* [-xa] is allowed */
4768 v = (OnigCodePoint )tok->u.c;
4769 in_israw = 0;
4770
4771 r = fetch_token_in_cc(tok, &p, end, env);
4772 if (r < 0) goto err;
4773 fetched = 1;
4774 /* [--x] or [a&&-x] is warned. */
4775 if (r == TK_CC_RANGE || and_start != 0)
4776 CC_ESC_WARN(env, (UChar* )"-");
4777
4778 goto val_entry;
4779 }
4780 else if (state == CCS_RANGE) {
4781 CC_ESC_WARN(env, (UChar* )"-");
4782 goto sb_char; /* [!--x] is allowed */
4783 }
4784 else { /* CCS_COMPLETE */
4785 r = fetch_token_in_cc(tok, &p, end, env);
4786 if (r < 0) goto err;
4787 fetched = 1;
4788 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4789 else if (r == TK_CC_AND) {
4790 CC_ESC_WARN(env, (UChar* )"-");
4791 goto range_end_val;
4792 }
4793
4795 CC_ESC_WARN(env, (UChar* )"-");
4796 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
4797 }
4799 goto err;
4800 }
4801 break;
4802
4803 case TK_CC_CC_OPEN: /* [ */
4804 {
4805 Node *anode, *aasc_node;
4806 CClassNode* acc;
4807
4808 r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
4809 if (r == 0) {
4810 acc = NCCLASS(anode);
4811 r = or_cclass(cc, acc, env);
4812 }
4813 if (r == 0 && IS_NOT_NULL(aasc_node)) {
4814 acc = NCCLASS(aasc_node);
4815 r = or_cclass(asc_cc, acc, env);
4816 }
4817 onig_node_free(anode);
4818 onig_node_free(aasc_node);
4819 if (r != 0) goto err;
4820 }
4821 break;
4822
4823 case TK_CC_AND: /* && */
4824 {
4825 if (state == CCS_VALUE) {
4826 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4827 &val_type, &state, env);
4828 if (r != 0) goto err;
4829 }
4830 /* initialize local variables */
4831 and_start = 1;
4832 state = CCS_START;
4833
4834 if (IS_NOT_NULL(prev_cc)) {
4835 r = and_cclass(prev_cc, cc, env);
4836 if (r != 0) goto err;
4837 bbuf_free(cc->mbuf);
4838 if (IS_NOT_NULL(asc_cc)) {
4839 r = and_cclass(asc_prev_cc, asc_cc, env);
4840 if (r != 0) goto err;
4841 bbuf_free(asc_cc->mbuf);
4842 }
4843 }
4844 else {
4845 prev_cc = cc;
4846 cc = &work_cc;
4847 if (IS_NOT_NULL(asc_cc)) {
4848 asc_prev_cc = asc_cc;
4849 asc_cc = &asc_work_cc;
4850 }
4851 }
4852 initialize_cclass(cc);
4853 if (IS_NOT_NULL(asc_cc))
4854 initialize_cclass(asc_cc);
4855 }
4856 break;
4857
4858 case TK_EOT:
4860 goto err;
4861 break;
4862 default:
4864 goto err;
4865 break;
4866 }
4867
4868 if (fetched)
4869 r = tok->type;
4870 else {
4871 r = fetch_token_in_cc(tok, &p, end, env);
4872 if (r < 0) goto err;
4873 }
4874 }
4875
4876 if (state == CCS_VALUE) {
4877 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4878 &val_type, &state, env);
4879 if (r != 0) goto err;
4880 }
4881
4882 if (IS_NOT_NULL(prev_cc)) {
4883 r = and_cclass(prev_cc, cc, env);
4884 if (r != 0) goto err;
4885 bbuf_free(cc->mbuf);
4886 cc = prev_cc;
4887 if (IS_NOT_NULL(asc_cc)) {
4888 r = and_cclass(asc_prev_cc, asc_cc, env);
4889 if (r != 0) goto err;
4890 bbuf_free(asc_cc->mbuf);
4891 asc_cc = asc_prev_cc;
4892 }
4893 }
4894
4895 if (neg != 0) {
4896 NCCLASS_SET_NOT(cc);
4897 if (IS_NOT_NULL(asc_cc))
4898 NCCLASS_SET_NOT(asc_cc);
4899 }
4900 else {
4902 if (IS_NOT_NULL(asc_cc))
4903 NCCLASS_CLEAR_NOT(asc_cc);
4904 }
4905 if (IS_NCCLASS_NOT(cc) &&
4907 int is_empty;
4908
4909 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4910 if (is_empty != 0)
4911 BITSET_IS_EMPTY(cc->bs, is_empty);
4912
4913 if (is_empty == 0) {
4914#define NEWLINE_CODE 0x0a
4915
4917 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4919 else {
4920 r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4921 if (r < 0) goto err;
4922 }
4923 }
4924 }
4925 }
4926 *src = p;
4927 env->parse_depth--;
4928 return 0;
4929
4930 err:
4931 if (cc != NCCLASS(*np))
4932 bbuf_free(cc->mbuf);
4933 if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
4934 bbuf_free(asc_cc->mbuf);
4935 return r;
4936}
4937
4938static int parse_subexp(Node** top, OnigToken* tok, int term,
4939 UChar** src, UChar* end, ScanEnv* env);
4940
4941static int
4942parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4943 ScanEnv* env)
4944{
4945 int r = 0, num;
4946 Node *target, *work1 = NULL, *work2 = NULL;
4947 OnigOptionType option;
4948 OnigCodePoint c;
4949 OnigEncoding enc = env->enc;
4950
4951#ifdef USE_NAMED_GROUP
4952 int list_capture;
4953#endif
4954
4955 UChar* p = *src;
4957
4958 *np = NULL;
4960
4961 option = env->option;
4962 if (PPEEK_IS('?') &&
4964 PINC;
4966
4967 PFETCH(c);
4968 switch (c) {
4969 case ':': /* (?:...) grouping only */
4970 group:
4971 r = fetch_token(tok, &p, end, env);
4972 if (r < 0) return r;
4973 r = parse_subexp(np, tok, term, &p, end, env);
4974 if (r < 0) return r;
4975 *src = p;
4976 return 1; /* group */
4977 break;
4978
4979 case '=':
4981 break;
4982 case '!': /* preceding read */
4984 break;
4985 case '>': /* (?>...) stop backtrack */
4986 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4987 break;
4988 case '~': /* (?~...) absent operator */
4990 *np = node_new_enclose(ENCLOSE_ABSENT);
4991 }
4992 else {
4994 }
4995 break;
4996
4997#ifdef USE_NAMED_GROUP
4998 case '\'':
5000 goto named_group1;
5001 }
5002 else
5004 break;
5005
5006# ifdef USE_CAPITAL_P_NAMED_GROUP
5007 case 'P': /* (?P<name>...) */
5008 if (!PEND &&
5010 PFETCH(c);
5011 if (c == '<') goto named_group1;
5012 }
5014 break;
5015# endif
5016#endif
5017
5018 case '<': /* look behind (?<=...), (?<!...) */
5020 PFETCH(c);
5021 if (c == '=')
5023 else if (c == '!')
5025#ifdef USE_NAMED_GROUP
5026 else { /* (?<name>...) */
5028 UChar *name;
5029 UChar *name_end;
5030
5031 PUNFETCH;
5032 c = '<';
5033
5034 named_group1:
5035 list_capture = 0;
5036
5037# ifdef USE_CAPTURE_HISTORY
5038 named_group2:
5039# endif
5040 name = p;
5041 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
5042 if (r < 0) return r;
5043
5044 num = scan_env_add_mem_entry(env);
5045 if (num < 0) return num;
5046 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
5048
5049 r = name_add(env->reg, name, name_end, num, env);
5050 if (r != 0) return r;
5051 *np = node_new_enclose_memory(env->option, 1);
5053 NENCLOSE(*np)->regnum = num;
5054 if (list_capture != 0)
5055 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5056 env->num_named++;
5057 }
5058 else {
5060 }
5061 }
5062#else
5063 else {
5065 }
5066#endif
5067 break;
5068
5069#ifdef USE_CAPTURE_HISTORY
5070 case '@':
5072# ifdef USE_NAMED_GROUP
5073 if (!PEND &&
5075 PFETCH(c);
5076 if (c == '<' || c == '\'') {
5077 list_capture = 1;
5078 goto named_group2; /* (?@<name>...) */
5079 }
5080 PUNFETCH;
5081 }
5082# endif
5083 *np = node_new_enclose_memory(env->option, 0);
5085 num = scan_env_add_mem_entry(env);
5086 if (num < 0) return num;
5087 if (num >= (int )BIT_STATUS_BITS_NUM)
5089
5090 NENCLOSE(*np)->regnum = num;
5091 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
5092 }
5093 else {
5095 }
5096 break;
5097#endif /* USE_CAPTURE_HISTORY */
5098
5099 case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
5100 if (!PEND &&
5102 UChar *name = NULL;
5103 UChar *name_end;
5104 PFETCH(c);
5105 if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */
5106 PUNFETCH;
5107 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
5108 if (r < 0) return r;
5109#if 0
5110 /* Relative number is not currently supported. (same as Perl) */
5111 if (num < 0) {
5113 if (num <= 0)
5115 }
5116#endif
5118 if (num > env->num_mem ||
5121 }
5122 }
5123#ifdef USE_NAMED_GROUP
5124 else if (c == '<' || c == '\'') { /* (<name>), ('name') */
5125 name = p;
5126 r = fetch_named_backref_token(c, tok, &p, end, env);
5127 if (r < 0) return r;
5128 if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
5129 PINC;
5130
5132 num = tok->u.backref.ref1;
5133 }
5134 else {
5135 /* FIXME:
5136 * Use left most named group for now. This is the same as Perl.
5137 * However this should use the same strategy as normal back-
5138 * references on Ruby syntax; search right to left. */
5139 int len = tok->u.backref.num;
5140 num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;
5141 }
5142 }
5143#endif
5144 else
5146 *np = node_new_enclose(ENCLOSE_CONDITION);
5148 NENCLOSE(*np)->regnum = num;
5149 if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
5150 }
5151 else
5153 break;
5154
5155#if 0
5156 case '|': /* branch reset: (?|...) */
5158 /* TODO */
5159 }
5160 else
5162 break;
5163#endif
5164
5165 case '^': /* loads default options */
5166 if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
5167 /* d-imsx */
5168 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5169 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5170 ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
5171 ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5172 ONOFF(option, ONIG_OPTION_EXTEND, 1);
5173 PFETCH(c);
5174 }
5175#if 0
5176 else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5177 /* d-imx */
5178 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5181 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5182 ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5183 ONOFF(option, ONIG_OPTION_EXTEND, 1);
5184 PFETCH(c);
5185 }
5186#endif
5187 else {
5189 }
5190 /* fall through */
5191#ifdef USE_POSIXLINE_OPTION
5192 case 'p':
5193#endif
5194 case '-': case 'i': case 'm': case 's': case 'x':
5195 case 'a': case 'd': case 'l': case 'u':
5196 {
5197 int neg = 0;
5198
5199 while (1) {
5200 switch (c) {
5201 case ':':
5202 case ')':
5203 break;
5204
5205 case '-': neg = 1; break;
5206 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
5207 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
5208 case 's':
5211 }
5212 else
5214 break;
5215
5216 case 'm':
5218 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
5219 }
5220 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5222 }
5223 else
5225 break;
5226#ifdef USE_POSIXLINE_OPTION
5227 case 'p':
5229 break;
5230#endif
5231
5232 case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
5235 (neg == 0)) {
5236 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5239 }
5240 else
5242 break;
5243
5244 case 'u':
5247 (neg == 0)) {
5248 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5251 }
5252 else
5254 break;
5255
5256 case 'd':
5258 (neg == 0)) {
5259 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5260 }
5261 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
5262 (neg == 0)) {
5263 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5266 }
5267 else
5269 break;
5270
5271 case 'l':
5272 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
5273 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5274 }
5275 else
5277 break;
5278
5279 default:
5281 }
5282
5283 if (c == ')') {
5284 *np = node_new_option(option);
5286 *src = p;
5287 return 2; /* option only */
5288 }
5289 else if (c == ':') {
5290 OnigOptionType prev = env->option;
5291
5292 env->option = option;
5293 r = fetch_token(tok, &p, end, env);
5294 if (r < 0) {
5295 env->option = prev;
5296 return r;
5297 }
5298 r = parse_subexp(&target, tok, term, &p, end, env);
5299 env->option = prev;
5300 if (r < 0) return r;
5301 *np = node_new_option(option);
5303 NENCLOSE(*np)->target = target;
5304 *src = p;
5305 return 0;
5306 }
5307
5309 PFETCH(c);
5310 }
5311 }
5312 break;
5313
5314 default:
5316 }
5317 }
5318 else {
5320 goto group;
5321
5322 *np = node_new_enclose_memory(env->option, 0);
5324 num = scan_env_add_mem_entry(env);
5325 if (num < 0) return num;
5326 NENCLOSE(*np)->regnum = num;
5327 }
5328
5330 r = fetch_token(tok, &p, end, env);
5331 if (r < 0) return r;
5332 r = parse_subexp(&target, tok, term, &p, end, env);
5333 if (r < 0) {
5334 onig_node_free(target);
5335 return r;
5336 }
5337
5338 if (NTYPE(*np) == NT_ANCHOR)
5339 NANCHOR(*np)->target = target;
5340 else {
5341 NENCLOSE(*np)->target = target;
5342 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
5343 /* Don't move this to previous of parse_subexp() */
5344 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
5345 if (r != 0) return r;
5346 }
5347 else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
5348 if (NTYPE(target) != NT_ALT) {
5349 /* convert (?(cond)yes) to (?(cond)yes|empty) */
5350 work1 = node_new_empty();
5351 if (IS_NULL(work1)) goto err;
5352 work2 = onig_node_new_alt(work1, NULL_NODE);
5353 if (IS_NULL(work2)) goto err;
5354 work1 = onig_node_new_alt(target, work2);
5355 if (IS_NULL(work1)) goto err;
5356 NENCLOSE(*np)->target = work1;
5357 }
5358 }
5359 }
5360
5361 *src = p;
5362 return 0;
5363
5364 err:
5365 onig_node_free(work1);
5366 onig_node_free(work2);
5367 onig_node_free(*np);
5368 *np = NULL;
5369 return ONIGERR_MEMORY;
5370}
5371
5372static const char* const PopularQStr[] = {
5373 "?", "*", "+", "??", "*?", "+?"
5374};
5375
5376static const char* const ReduceQStr[] = {
5377 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
5378};
5379
5380static int
5381set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
5382{
5383 QtfrNode* qn;
5384
5385 qn = NQTFR(qnode);
5386 if (qn->lower == 1 && qn->upper == 1) {
5387 return 1;
5388 }
5389
5390 switch (NTYPE(target)) {
5391 case NT_STR:
5392 if (! group) {
5393 StrNode* sn = NSTR(target);
5394 if (str_node_can_be_split(sn, env->enc)) {
5395 Node* n = str_node_split_last_char(sn, env->enc);
5396 if (IS_NOT_NULL(n)) {
5397 qn->target = n;
5398 return 2;
5399 }
5400 }
5401 }
5402 break;
5403
5404 case NT_QTFR:
5405 { /* check redundant double repeat. */
5406 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
5407 QtfrNode* qnt = NQTFR(target);
5408 int nestq_num = popular_quantifier_num(qn);
5409 int targetq_num = popular_quantifier_num(qnt);
5410
5411#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5412 if (nestq_num >= 0 && targetq_num >= 0 &&
5414 switch (ReduceTypeTable[targetq_num][nestq_num]) {
5415 case RQ_ASIS:
5416 break;
5417
5418 case RQ_DEL:
5419 if (onig_warn != onig_null_warn) {
5420 onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
5421 PopularQStr[targetq_num]);
5422 }
5423 goto warn_exit;
5424 break;
5425
5426 default:
5427 if (onig_warn != onig_null_warn) {
5428 onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5429 PopularQStr[targetq_num], PopularQStr[nestq_num],
5430 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5431 }
5432 goto warn_exit;
5433 break;
5434 }
5435 }
5436
5437 warn_exit:
5438#endif
5439 if (targetq_num >= 0) {
5440 if (nestq_num >= 0) {
5441 onig_reduce_nested_quantifier(qnode, target);
5442 goto q_exit;
5443 }
5444 else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
5445 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
5446 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
5447 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
5448 }
5449 }
5450 }
5451 }
5452 break;
5453
5454 default:
5455 break;
5456 }
5457
5458 qn->target = target;
5459 q_exit:
5460 return 0;
5461}
5462
5463
5464#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5465static int
5466clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
5467{
5468 BBuf *tbuf;
5469 int r;
5470
5471 if (IS_NCCLASS_NOT(cc)) {
5472 bitset_invert(cc->bs);
5473
5474 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
5475 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
5476 if (r != 0) return r;
5477
5478 bbuf_free(cc->mbuf);
5479 cc->mbuf = tbuf;
5480 }
5481
5483 }
5484
5485 return 0;
5486}
5487#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5488
5489typedef struct {
5496
5497static int
5498i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
5499 int to_len, void* arg)
5500{
5501 IApplyCaseFoldArg* iarg;
5502 ScanEnv* env;
5503 CClassNode* cc;
5504 CClassNode* asc_cc;
5505 BitSetRef bs;
5506 int add_flag, r;
5507
5508 iarg = (IApplyCaseFoldArg* )arg;
5509 env = iarg->env;
5510 cc = iarg->cc;
5511 asc_cc = iarg->asc_cc;
5512 bs = cc->bs;
5513
5514 if (IS_NULL(asc_cc)) {
5515 add_flag = 0;
5516 }
5517 else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
5518 add_flag = 1;
5519 }
5520 else {
5521 add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
5522 if (IS_NCCLASS_NOT(asc_cc))
5523 add_flag = !add_flag;
5524 }
5525
5526 if (to_len == 1) {
5527 int is_in = onig_is_code_in_cc(env->enc, from, cc);
5528#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5529 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
5530 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
5531 if (add_flag) {
5532 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5533 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5534 if (r < 0) return r;
5535 }
5536 else {
5537 BITSET_SET_BIT(bs, *to);
5538 }
5539 }
5540 }
5541#else
5542 if (is_in != 0) {
5543 if (add_flag) {
5544 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5545 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
5546 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5547 if (r < 0) return r;
5548 }
5549 else {
5550 if (IS_NCCLASS_NOT(cc)) {
5551 BITSET_CLEAR_BIT(bs, *to);
5552 }
5553 else {
5554 BITSET_SET_BIT(bs, *to);
5555 }
5556 }
5557 }
5558 }
5559#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5560 }
5561 else {
5562 int r, i, len;
5564 Node *snode = NULL_NODE;
5565
5566 if (onig_is_code_in_cc(env->enc, from, cc)
5568 && !IS_NCCLASS_NOT(cc)
5569#endif
5570 ) {
5571 for (i = 0; i < to_len; i++) {
5572 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5573 if (i == 0) {
5574 snode = onig_node_new_str(buf, buf + len);
5576
5577 /* char-class expanded multi-char only
5578 compare with string folded at match time. */
5579 NSTRING_SET_AMBIG(snode);
5580 }
5581 else {
5582 r = onig_node_str_cat(snode, buf, buf + len);
5583 if (r < 0) {
5584 onig_node_free(snode);
5585 return r;
5586 }
5587 }
5588 }
5589
5590 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5592 iarg->ptail = &(NCDR((*(iarg->ptail))));
5593 }
5594 }
5595
5596 return 0;
5597}
5598
5599static int
5600cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
5601{
5602 int r;
5603 IApplyCaseFoldArg iarg;
5604
5605 iarg.env = env;
5606 iarg.cc = cc;
5607 iarg.asc_cc = asc_cc;
5608 iarg.alt_root = NULL_NODE;
5609 iarg.ptail = &(iarg.alt_root);
5610
5611 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
5612 i_apply_case_fold, &iarg);
5613 if (r != 0) {
5615 return r;
5616 }
5617 if (IS_NOT_NULL(iarg.alt_root)) {
5618 Node* work = onig_node_new_alt(*np, iarg.alt_root);
5619 if (IS_NULL(work)) {
5621 return ONIGERR_MEMORY;
5622 }
5623 *np = work;
5624 }
5625 return r;
5626}
5627
5628static int
5629node_linebreak(Node** np, ScanEnv* env)
5630{
5631 /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
5632 Node* left = NULL;
5633 Node* right = NULL;
5634 Node* target1 = NULL;
5635 Node* target2 = NULL;
5636 CClassNode* cc;
5637 int num1, num2, r;
5639
5640 /* \x0D\x0A */
5641 num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5642 if (num1 < 0) return num1;
5643 num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5644 if (num2 < 0) return num2;
5645 left = node_new_str_raw(buf, buf + num1 + num2);
5646 if (IS_NULL(left)) goto err;
5647
5648 /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
5649 right = node_new_cclass();
5650 if (IS_NULL(right)) goto err;
5651 cc = NCCLASS(right);
5652 if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5653 r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
5654 if (r != 0) goto err;
5655 }
5656 else {
5657 bitset_set_range(env, cc->bs, 0x0A, 0x0D);
5658 }
5659
5660 /* TODO: move this block to enc/unicode.c */
5661 if (ONIGENC_IS_UNICODE(env->enc)) {
5662 /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5663 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
5664 if (r != 0) goto err;
5665 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
5666 if (r != 0) goto err;
5667 }
5668
5669 /* ...|... */
5670 target1 = onig_node_new_alt(right, NULL_NODE);
5671 if (IS_NULL(target1)) goto err;
5672 right = NULL;
5673 target2 = onig_node_new_alt(left, target1);
5674 if (IS_NULL(target2)) goto err;
5675 left = NULL;
5676 target1 = NULL;
5677
5678 /* (?>...) */
5679 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5680 if (IS_NULL(*np)) goto err;
5681 NENCLOSE(*np)->target = target2;
5682 return ONIG_NORMAL;
5683
5684 err:
5685 onig_node_free(left);
5686 onig_node_free(right);
5687 onig_node_free(target1);
5688 onig_node_free(target2);
5689 return ONIGERR_MEMORY;
5690}
5691
5692static int
5693propname2ctype(ScanEnv* env, const char* propname)
5694{
5695 UChar* name = (UChar* )propname;
5696 UChar* name_end = name + strlen(propname);
5697 int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,
5698 name, name_end);
5699 if (ctype < 0) {
5700 onig_scan_env_set_error_string(env, ctype, name, name_end);
5701 }
5702 return ctype;
5703}
5704
5705static int
5706add_property_to_cc(CClassNode* cc, const char* propname, int not, ScanEnv* env)
5707{
5708 int ctype = propname2ctype(env, propname);
5709 if (ctype < 0) return ctype;
5710 return add_ctype_to_cc(cc, ctype, not, 0, env);
5711}
5712
5713/*
5714 * helper methods for node_extended_grapheme_cluster (/\X/)
5715 */
5716static int
5717create_property_node(Node **np, ScanEnv* env, const char* propname)
5718{
5719 int r;
5720 CClassNode* cc;
5721
5722 *np = node_new_cclass();
5723 if (IS_NULL(*np)) return ONIGERR_MEMORY;
5724 cc = NCCLASS(*np);
5725 r = add_property_to_cc(cc, propname, 0, env);
5726 if (r != 0)
5727 onig_node_free(*np);
5728 return r;
5729}
5730
5731static int
5732quantify_node(Node **np, int lower, int upper)
5733{
5734 Node* tmp = node_new_quantifier(lower, upper, 0);
5735 if (IS_NULL(tmp)) return ONIGERR_MEMORY;
5736 NQTFR(tmp)->target = *np;
5737 *np = tmp;
5738 return 0;
5739}
5740
5741static int
5742quantify_property_node(Node **np, ScanEnv* env, const char* propname, char repetitions)
5743{
5744 int r;
5745 int lower = 0;
5746 int upper = REPEAT_INFINITE;
5747
5748 r = create_property_node(np, env, propname);
5749 if (r != 0) return r;
5750 switch (repetitions) {
5751 case '?': upper = 1; break;
5752 case '+': lower = 1; break;
5753 case '*': break;
5754 case '2': lower = upper = 2; break;
5755 default : return ONIGERR_PARSER_BUG;
5756 }
5757 return quantify_node(np, lower, upper);
5758}
5759
5760#define LIST 0
5761#define ALT 1
5762
5763/* IMPORTANT: Make sure node_array ends with NULL_NODE */
5764static int
5765create_node_from_array(int kind, Node **np, Node **node_array)
5766{
5767 Node* tmp = NULL_NODE;
5768 int i = 0;
5769
5770 while (node_array[i] != NULL_NODE) i++;
5771 while (--i >= 0) {
5772 *np = kind==LIST ? node_new_list(node_array[i], tmp)
5773 : onig_node_new_alt(node_array[i], tmp);
5774 if (IS_NULL(*np)) {
5775 while (i >= 0) {
5776 onig_node_free(node_array[i]);
5777 node_array[i--] = NULL_NODE;
5778 }
5779 onig_node_free(tmp);
5780 return ONIGERR_MEMORY;
5781 }
5782 else
5783 node_array[i] = NULL_NODE;
5784 tmp = *np;
5785 }
5786 return 0;
5787}
5788
5789#define R_ERR(call) r=(call);if(r!=0)goto err
5790
5791/* Memory layout for common node array:
5792 * The main purpose is to be able to easily free all leftover nodes
5793 * after an error. As a side effect, we share some memory.
5794 *
5795 * The layout is as shown below (each line corresponds to one call of
5796 * create_node_from_array()). Because create_node_from_array sets all
5797 * nodes of the source to NULL_NODE, we can overlap the target array
5798 * as long as we do not override the actual target location.
5799 *
5800 * Target Array name Index
5801 *
5802 * node_array 0 1 2 3 4 5 6 7 8 9 A B C D E F
5803 * top_alts alts[5] 0 1 2 3 4*
5804 * alts+1 list[4] 0 1 2 3*
5805 * list+1 core_alts[7] 0 1 2 3 4 5 6*
5806 * core_alts+0 H_list[4] 0 1 2 3*
5807 * H_list+1 H_alt2[4] 0 1 2 3*
5808 * h_alt2+1 H_list2[3] 0 1 2*
5809 * core_alts+4 XP_list[4] 0 1 2 3*
5810 * XP_list+1 Ex_list[4] 0 1 2 3*
5811 */
5812#define NODE_COMMON_SIZE 16
5813
5814static int
5815node_extended_grapheme_cluster(Node** np, ScanEnv* env)
5816{
5817 Node* tmp = NULL;
5818 Node* np1 = NULL;
5819 Node* top_alt = NULL;
5820 int r = 0;
5821 int num1;
5822 int i;
5823 int any_target_position;
5825 OnigOptionType option;
5826 /* node_common is function-global so that we can free all nodes
5827 * in case of error. Unused slots are set to NULL_NODE at all times. */
5828 Node *node_common[NODE_COMMON_SIZE];
5829 Node **alts = node_common+0; /* size: 5 */
5830
5831 for (i=0; i<NODE_COMMON_SIZE; i++)
5832 node_common[i] = NULL_NODE;
5833
5834 /* CRLF, common for both Unicode and non-Unicode */
5835 /* \x0D\x0A */
5836 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5837 if (r < 0) goto err;
5838 num1 = r;
5839 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5840 if (r < 0) goto err;
5841 alts[0] = node_new_str_raw(buf, buf + num1 + r);
5842 if (IS_NULL(alts[0])) goto err;
5843
5844#ifdef USE_UNICODE_PROPERTIES
5845 if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5846 CClassNode* cc;
5847
5848 if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err;
5849 /* Unicode 11.0.0
5850 * CRLF (already done)
5851 * | [Control CR LF]
5852 * | precore* core postcore*
5853 * | . (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */
5854
5855 /* [Control CR LF] (CR and LF are not in the spec, but this is a conformed fix) */
5856 alts[1] = node_new_cclass();
5857 if (IS_NULL(alts[1])) goto err;
5858 cc = NCCLASS(alts[1]);
5859 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5860 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5861 R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5862 R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5863 }
5864 else {
5865 BITSET_SET_BIT(cc->bs, 0x0a);
5866 BITSET_SET_BIT(cc->bs, 0x0d);
5867 }
5868
5869 /* precore* core postcore* */
5870 {
5871 Node **list = alts + 3; /* size: 4 */
5872
5873 /* precore*; precore := Prepend */
5874 R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*'));
5875
5876 /* core := hangul-syllable
5877 * | ri-sequence
5878 * | xpicto-sequence
5879 * | [^Control CR LF] */
5880 {
5881 Node **core_alts = list + 2; /* size: 7 */
5882
5883 /* hangul-syllable :=
5884 * L* (V+ | LV V* | LVT) T*
5885 * | L+
5886 * | T+ */
5887 /* hangul-syllable is an alternative (would be called H_alt)
5888 * inside an alternative, but we flatten it into core_alts */
5889
5890 /* L* (V+ | LV V* | LVT) T* */
5891 {
5892 Node **H_list = core_alts + 1; /* size: 4 */
5893 R_ERR(quantify_property_node(H_list+0, env, "Grapheme_Cluster_Break=L", '*'));
5894
5895 /* V+ | LV V* | LVT */
5896 {
5897 Node **H_alt2 = H_list + 2; /* size: 4 */
5898 R_ERR(quantify_property_node(H_alt2+0, env, "Grapheme_Cluster_Break=V", '+'));
5899
5900 /* LV V* */
5901 {
5902 Node **H_list2 = H_alt2 + 2; /* size: 3 */
5903
5904 R_ERR(create_property_node(H_list2+0, env, "Grapheme_Cluster_Break=LV"));
5905 R_ERR(quantify_property_node(H_list2+1, env, "Grapheme_Cluster_Break=V", '*'));
5906 R_ERR(create_node_from_array(LIST, H_alt2+1, H_list2));
5907 }
5908
5909 R_ERR(create_property_node(H_alt2+2, env, "Grapheme_Cluster_Break=LVT"));
5910 R_ERR(create_node_from_array(ALT, H_list+1, H_alt2));
5911 }
5912
5913 R_ERR(quantify_property_node(H_list+2, env, "Grapheme_Cluster_Break=T", '*'));
5914 R_ERR(create_node_from_array(LIST, core_alts+0, H_list));
5915 }
5916
5917 R_ERR(quantify_property_node(core_alts+1, env, "Grapheme_Cluster_Break=L", '+'));
5918 R_ERR(quantify_property_node(core_alts+2, env, "Grapheme_Cluster_Break=T", '+'));
5919 /* end of hangul-syllable */
5920
5921 /* ri-sequence := RI RI */
5922 R_ERR(quantify_property_node(core_alts+3, env, "Regional_Indicator", '2'));
5923
5924 /* xpicto-sequence := \p{Extended_Pictographic} (Extend* ZWJ \p{Extended_Pictographic})* */
5925 {
5926 Node **XP_list = core_alts + 5; /* size: 3 */
5927 R_ERR(create_property_node(XP_list+0, env, "Extended_Pictographic"));
5928
5929 /* (Extend* ZWJ \p{Extended_Pictographic})* */
5930 {
5931 Node **Ex_list = XP_list + 2; /* size: 4 */
5932 /* assert(Ex_list+4 == node_common+NODE_COMMON_SIZE); */
5933 R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*'));
5934
5935 /* ZWJ (ZERO WIDTH JOINER) */
5936 r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
5937 if (r < 0) goto err;
5938 Ex_list[1] = node_new_str_raw(buf, buf + r);
5939 if (IS_NULL(Ex_list[1])) goto err;
5940
5941 R_ERR(create_property_node(Ex_list+2, env, "Extended_Pictographic"));
5942 R_ERR(create_node_from_array(LIST, XP_list+1, Ex_list));
5943 }
5944 R_ERR(quantify_node(XP_list+1, 0, REPEAT_INFINITE)); /* TODO: Check about node freeing */
5945
5946 R_ERR(create_node_from_array(LIST, core_alts+4, XP_list));
5947 }
5948
5949 /* [^Control CR LF] */
5950 core_alts[5] = node_new_cclass();
5951 if (IS_NULL(core_alts[5])) goto err;
5952 cc = NCCLASS(core_alts[5]);
5953 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
5954 BBuf *inverted_buf = NULL;
5955
5956 /* TODO: fix false warning */
5957 const int dup_not_warned = env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;
5958 env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
5959
5960 /* Start with a positive buffer and invert at the end.
5961 * Otherwise, adding single-character ranges work the wrong way. */
5962 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
5963 R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
5964 R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
5965 R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));
5966 cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */
5967
5968 env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */
5969 }
5970 else {
5971 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 1, env));
5972 BITSET_CLEAR_BIT(cc->bs, 0x0a);
5973 BITSET_CLEAR_BIT(cc->bs, 0x0d);
5974 }
5975
5976 R_ERR(create_node_from_array(ALT, list+1, core_alts));
5977 }
5978
5979 /* postcore*; postcore = [Extend ZWJ SpacingMark] */
5980 R_ERR(create_property_node(list+2, env, "Grapheme_Cluster_Break=Extend"));
5981 cc = NCCLASS(list[2]);
5982 R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=SpacingMark", 0, env));
5983 R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D));
5984 R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE));
5985
5986 R_ERR(create_node_from_array(LIST, alts+2, list));
5987 }
5988
5989 any_target_position = 3;
5990 }
5991 else
5992#endif /* USE_UNICODE_PROPERTIES */
5993 {
5994 any_target_position = 1;
5995 }
5996
5997 /* PerlSyntax: (?s:.), RubySyntax: (?m:.), common for both Unicode and non-Unicode */
5998 /* Not in Unicode spec (UAX #29), but added to catch invalid stuff,
5999 * because this is Ruby spec for String#grapheme_clusters. */
6000 np1 = node_new_anychar();
6001 if (IS_NULL(np1)) goto err;
6002
6003 option = env->option;
6004 ONOFF(option, ONIG_OPTION_MULTILINE, 0);
6005 tmp = node_new_option(option);
6006 if (IS_NULL(tmp)) goto err;
6007 NENCLOSE(tmp)->target = np1;
6008 alts[any_target_position] = tmp;
6009 np1 = NULL;
6010
6011 R_ERR(create_node_from_array(ALT, &top_alt, alts));
6012
6013 /* (?>): For efficiency, because there is no text piece
6014 * that is not in a grapheme cluster, and there is only one way
6015 * to split a string into grapheme clusters. */
6016 tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6017 if (IS_NULL(tmp)) goto err;
6018 NENCLOSE(tmp)->target = top_alt;
6019 np1 = tmp;
6020
6021#ifdef USE_UNICODE_PROPERTIES
6022 if (ONIGENC_IS_UNICODE(env->enc)) {
6023 /* Don't ignore case. */
6024 option = env->option;
6025 ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
6026 *np = node_new_option(option);
6027 if (IS_NULL(*np)) goto err;
6028 NENCLOSE(*np)->target = np1;
6029 }
6030 else
6031#endif
6032 {
6033 *np = np1;
6034 }
6035 return ONIG_NORMAL;
6036
6037 err:
6038 onig_node_free(np1);
6039 for (i=0; i<NODE_COMMON_SIZE; i++)
6040 onig_node_free(node_common[i]);
6041 return (r == 0) ? ONIGERR_MEMORY : r;
6042}
6043#undef R_ERR
6044
6045static int
6046countbits(unsigned int bits)
6047{
6048 bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
6049 bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
6050 bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
6051 bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
6052 return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
6053}
6054
6055static int
6056is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
6057{
6058 const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
6059 OnigCodePoint c = not_found;
6060 int i;
6061 BBuf *bbuf = cc->mbuf;
6062
6063 if (IS_NCCLASS_NOT(cc)) return 0;
6064
6065 /* check bbuf */
6066 if (IS_NOT_NULL(bbuf)) {
6067 OnigCodePoint n, *data;
6068 GET_CODE_POINT(n, bbuf->p);
6069 data = (OnigCodePoint* )(bbuf->p) + 1;
6070 if ((n == 1) && (data[0] == data[1])) {
6071 /* only one char found in the bbuf, save the code point. */
6072 c = data[0];
6073 if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
6074 /* skip if c is included in the bitset */
6075 c = not_found;
6076 }
6077 }
6078 else {
6079 return 0; /* the bbuf contains multiple chars */
6080 }
6081 }
6082
6083 /* check bitset */
6084 for (i = 0; i < BITSET_SIZE; i++) {
6085 Bits b1 = cc->bs[i];
6086 if (b1 != 0) {
6087 if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6088 c = BITS_IN_ROOM * i + countbits(b1 - 1);
6089 } else {
6090 return 0; /* the character class contains multiple chars */
6091 }
6092 }
6093 }
6094
6095 if (c != not_found) {
6096 *code = c;
6097 return 1;
6098 }
6099
6100 /* the character class contains no char. */
6101 return 0;
6102}
6103
6104
6105static int
6106parse_exp(Node** np, OnigToken* tok, int term,
6107 UChar** src, UChar* end, ScanEnv* env)
6108{
6109 int r, len, group = 0;
6110 Node* qn;
6111 Node** targetp;
6112
6113 *np = NULL;
6114 if (tok->type == (enum TokenSyms )term)
6115 goto end_of_token;
6116
6117 switch (tok->type) {
6118 case TK_ALT:
6119 case TK_EOT:
6120 end_of_token:
6121 *np = node_new_empty();
6122 return tok->type;
6123 break;
6124
6125 case TK_SUBEXP_OPEN:
6126 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
6127 if (r < 0) return r;
6128 if (r == 1) group = 1;
6129 else if (r == 2) { /* option only */
6130 Node* target;
6131 OnigOptionType prev = env->option;
6132
6133 env->option = NENCLOSE(*np)->option;
6134 r = fetch_token(tok, src, end, env);
6135 if (r < 0) {
6136 env->option = prev;
6137 return r;
6138 }
6139 r = parse_subexp(&target, tok, term, src, end, env);
6140 env->option = prev;
6141 if (r < 0) {
6142 onig_node_free(target);
6143 return r;
6144 }
6145 NENCLOSE(*np)->target = target;
6146 return tok->type;
6147 }
6148 break;
6149
6150 case TK_SUBEXP_CLOSE:
6153
6154 if (tok->escaped) goto tk_raw_byte;
6155 else goto tk_byte;
6156 break;
6157
6158 case TK_LINEBREAK:
6159 r = node_linebreak(np, env);
6160 if (r < 0) return r;
6161 break;
6162
6164 r = node_extended_grapheme_cluster(np, env);
6165 if (r < 0) return r;
6166 break;
6167
6168 case TK_KEEP:
6171 break;
6172
6173 case TK_STRING:
6174 tk_byte:
6175 {
6176 *np = node_new_str(tok->backp, *src);
6178
6179 string_loop:
6180 while (1) {
6181 r = fetch_token(tok, src, end, env);
6182 if (r < 0) return r;
6183 if (r == TK_STRING) {
6184 r = onig_node_str_cat(*np, tok->backp, *src);
6185 }
6186#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6187 else if (r == TK_CODE_POINT) {
6188 r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6189 }
6190#endif
6191 else {
6192 break;
6193 }
6194 if (r < 0) return r;
6195 }
6196
6197 string_end:
6198 targetp = np;
6199 goto repeat;
6200 }
6201 break;
6202
6203 case TK_RAW_BYTE:
6204 tk_raw_byte:
6205 {
6206 *np = node_new_str_raw_char((UChar )tok->u.c);
6208 len = 1;
6209 while (1) {
6210 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
6211 if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
6212 r = fetch_token(tok, src, end, env);
6213 NSTRING_CLEAR_RAW(*np);
6214 goto string_end;
6215 }
6216 }
6217
6218 r = fetch_token(tok, src, end, env);
6219 if (r < 0) return r;
6220 if (r != TK_RAW_BYTE) {
6221 /* Don't use this, it is wrong for little endian encodings. */
6222#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6223 int rem;
6224 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
6225 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
6226 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
6227 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
6228 NSTRING_CLEAR_RAW(*np);
6229 goto string_end;
6230 }
6231 }
6232#endif
6234 }
6235
6236 r = node_str_cat_char(*np, (UChar )tok->u.c);
6237 if (r < 0) return r;
6238
6239 len++;
6240 }
6241 }
6242 break;
6243
6244 case TK_CODE_POINT:
6245 {
6246 *np = node_new_empty();
6248 r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
6249 if (r != 0) return r;
6250#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6251 NSTRING_SET_RAW(*np);
6252#else
6253 goto string_loop;
6254#endif
6255 }
6256 break;
6257
6258 case TK_QUOTE_OPEN:
6259 {
6260 OnigCodePoint end_op[2];
6261 UChar *qstart, *qend, *nextp;
6262
6263 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
6264 end_op[1] = (OnigCodePoint )'E';
6265 qstart = *src;
6266 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
6267 if (IS_NULL(qend)) {
6268 nextp = qend = end;
6269 }
6270 *np = node_new_str(qstart, qend);
6272 *src = nextp;
6273 }
6274 break;
6275
6276 case TK_CHAR_TYPE:
6277 {
6278 switch (tok->u.prop.ctype) {
6279 case ONIGENC_CTYPE_WORD:
6280 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
6281 IS_ASCII_RANGE(env->option));
6283 break;
6284
6288 {
6289 CClassNode* cc;
6290
6291 *np = node_new_cclass();
6293 cc = NCCLASS(*np);
6294 r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
6295 IS_ASCII_RANGE(env->option), env);
6296 if (r != 0) return r;
6297 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6298 }
6299 break;
6300
6301 default:
6302 return ONIGERR_PARSER_BUG;
6303 break;
6304 }
6305 }
6306 break;
6307
6308 case TK_CHAR_PROPERTY:
6309 r = parse_char_property(np, tok, src, end, env);
6310 if (r != 0) return r;
6311 break;
6312
6313 case TK_CC_OPEN:
6314 {
6315 Node *asc_node;
6316 CClassNode* cc;
6318
6319 r = parse_char_class(np, &asc_node, tok, src, end, env);
6320 if (r != 0) {
6321 onig_node_free(asc_node);
6322 return r;
6323 }
6324
6325 cc = NCCLASS(*np);
6326 if (is_onechar_cclass(cc, &code)) {
6327 onig_node_free(*np);
6328 onig_node_free(asc_node);
6329 *np = node_new_empty();
6331 r = node_str_cat_codepoint(*np, env->enc, code);
6332 if (r != 0) return r;
6333 goto string_loop;
6334 }
6335 if (IS_IGNORECASE(env->option)) {
6336 r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
6337 if (r != 0) {
6338 onig_node_free(asc_node);
6339 return r;
6340 }
6341 }
6342 onig_node_free(asc_node);
6343 }
6344 break;
6345
6346 case TK_ANYCHAR:
6347 *np = node_new_anychar();
6349 break;
6350
6351 case TK_ANYCHAR_ANYTIME:
6352 *np = node_new_anychar();
6354 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
6356 NQTFR(qn)->target = *np;
6357 *np = qn;
6358 break;
6359
6360 case TK_BACKREF:
6361 len = tok->u.backref.num;
6362 *np = node_new_backref(len,
6363 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
6364 tok->u.backref.by_name,
6366 tok->u.backref.exist_level,
6367 tok->u.backref.level,
6368#endif
6369 env);
6371 break;
6372
6373#ifdef USE_SUBEXP_CALL
6374 case TK_CALL:
6375 {
6376 int gnum = tok->u.call.gnum;
6377
6378 if (gnum < 0 || tok->u.call.rel != 0) {
6379 if (gnum > 0) gnum--;
6380 gnum = BACKREF_REL_TO_ABS(gnum, env);
6381 if (gnum <= 0)
6383 }
6384 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
6386 env->num_call++;
6387 }
6388 break;
6389#endif
6390
6391 case TK_ANCHOR:
6392 *np = onig_node_new_anchor(tok->u.anchor.subtype);
6394 NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
6395 break;
6396
6397 case TK_OP_REPEAT:
6398 case TK_INTERVAL:
6402 else
6403 *np = node_new_empty();
6404 }
6405 else {
6406 goto tk_byte;
6407 }
6408 break;
6409
6410 default:
6411 return ONIGERR_PARSER_BUG;
6412 break;
6413 }
6414
6415 {
6416 targetp = np;
6417
6418 re_entry:
6419 r = fetch_token(tok, src, end, env);
6420 if (r < 0) return r;
6421
6422 repeat:
6423 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
6424 if (is_invalid_quantifier_target(*targetp))
6426
6427 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
6428 (r == TK_INTERVAL ? 1 : 0));
6430 NQTFR(qn)->greedy = tok->u.repeat.greedy;
6431 r = set_quantifier(qn, *targetp, group, env);
6432 if (r < 0) {
6433 onig_node_free(qn);
6434 return r;
6435 }
6436
6437 if (tok->u.repeat.possessive != 0) {
6438 Node* en;
6439 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
6440 if (IS_NULL(en)) {
6441 onig_node_free(qn);
6442 return ONIGERR_MEMORY;
6443 }
6444 NENCLOSE(en)->target = qn;
6445 qn = en;
6446 }
6447
6448 if (r == 0) {
6449 *targetp = qn;
6450 }
6451 else if (r == 1) {
6452 onig_node_free(qn);
6453 }
6454 else if (r == 2) { /* split case: /abc+/ */
6455 Node *tmp;
6456
6457 *targetp = node_new_list(*targetp, NULL);
6458 if (IS_NULL(*targetp)) {
6459 onig_node_free(qn);
6460 return ONIGERR_MEMORY;
6461 }
6462 tmp = NCDR(*targetp) = node_new_list(qn, NULL);
6463 if (IS_NULL(tmp)) {
6464 onig_node_free(qn);
6465 return ONIGERR_MEMORY;
6466 }
6467 targetp = &(NCAR(tmp));
6468 }
6469 goto re_entry;
6470 }
6471 }
6472
6473 return r;
6474}
6475
6476static int
6477parse_branch(Node** top, OnigToken* tok, int term,
6478 UChar** src, UChar* end, ScanEnv* env)
6479{
6480 int r;
6481 Node *node, **headp;
6482
6483 *top = NULL;
6484 r = parse_exp(&node, tok, term, src, end, env);
6485 if (r < 0) {
6486 onig_node_free(node);
6487 return r;
6488 }
6489
6490 if (r == TK_EOT || r == term || r == TK_ALT) {
6491 *top = node;
6492 }
6493 else {
6494 *top = node_new_list(node, NULL);
6495 headp = &(NCDR(*top));
6496 while (r != TK_EOT && r != term && r != TK_ALT) {
6497 r = parse_exp(&node, tok, term, src, end, env);
6498 if (r < 0) {
6499 onig_node_free(node);
6500 return r;
6501 }
6502
6503 if (NTYPE(node) == NT_LIST) {
6504 *headp = node;
6505 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
6506 headp = &(NCDR(node));
6507 }
6508 else {
6509 *headp = node_new_list(node, NULL);
6510 headp = &(NCDR(*headp));
6511 }
6512 }
6513 }
6514
6515 return r;
6516}
6517
6518/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
6519static int
6520parse_subexp(Node** top, OnigToken* tok, int term,
6521 UChar** src, UChar* end, ScanEnv* env)
6522{
6523 int r;
6524 Node *node, **headp;
6525
6526 *top = NULL;
6527 env->parse_depth++;
6528 if (env->parse_depth > ParseDepthLimit)
6530 r = parse_branch(&node, tok, term, src, end, env);
6531 if (r < 0) {
6532 onig_node_free(node);
6533 return r;
6534 }
6535
6536 if (r == term) {
6537 *top = node;
6538 }
6539 else if (r == TK_ALT) {
6540 *top = onig_node_new_alt(node, NULL);
6541 headp = &(NCDR(*top));
6542 while (r == TK_ALT) {
6543 r = fetch_token(tok, src, end, env);
6544 if (r < 0) return r;
6545 r = parse_branch(&node, tok, term, src, end, env);
6546 if (r < 0) {
6547 onig_node_free(node);
6548 return r;
6549 }
6550
6551 *headp = onig_node_new_alt(node, NULL);
6552 headp = &(NCDR(*headp));
6553 }
6554
6555 if (tok->type != (enum TokenSyms )term)
6556 goto err;
6557 }
6558 else {
6559 onig_node_free(node);
6560 err:
6561 if (term == TK_SUBEXP_CLOSE)
6563 else
6564 return ONIGERR_PARSER_BUG;
6565 }
6566
6567 env->parse_depth--;
6568 return r;
6569}
6570
6571static int
6572parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
6573{
6574 int r;
6575 OnigToken tok;
6576
6577 r = fetch_token(&tok, src, end, env);
6578 if (r < 0) return r;
6579 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
6580 if (r < 0) return r;
6581
6582#ifdef USE_SUBEXP_CALL
6583 if (env->num_call > 0) {
6584 /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
6585 const int num = 0;
6586 Node* np;
6587 np = node_new_enclose_memory(env->option, 0);
6589 NENCLOSE(np)->regnum = num;
6590 NENCLOSE(np)->target = *top;
6591 r = scan_env_set_mem_node(env, num, np);
6592 if (r != 0) {
6593 onig_node_free(np);
6594 return r;
6595 }
6596 *top = np;
6597 }
6598#endif
6599 return 0;
6600}
6601
6602extern int
6603onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
6604 regex_t* reg, ScanEnv* env)
6605{
6606 int r;
6607 UChar* p;
6608
6609#ifdef USE_NAMED_GROUP
6610 names_clear(reg);
6611#endif
6612
6613 scan_env_clear(env);
6614 env->option = reg->options;
6615 env->case_fold_flag = reg->case_fold_flag;
6616 env->enc = reg->enc;
6617 env->syntax = reg->syntax;
6618 env->pattern = (UChar* )pattern;
6619 env->pattern_end = (UChar* )end;
6620 env->reg = reg;
6621
6622 *root = NULL;
6623 p = (UChar* )pattern;
6624 r = parse_regexp(root, &p, (UChar* )end, env);
6625 reg->num_mem = env->num_mem;
6626 return r;
6627}
6628
6629extern void
6631 UChar* arg, UChar* arg_end)
6632{
6633 env->error = arg;
6634 env->error_end = arg_end;
6635}
int bits(struct state *s, int need)
Definition: blast.c:72
int root
Definition: enough.c:226
big_t * num
Definition: enough.c:232
string_t out
Definition: enough.c:230
size_t map(int syms, int left, int len)
Definition: enough.c:237
uint8_t len
Definition: escape.c:17
#define numberof(array)
Definition: etc.c:649
void rb_compile_warn(const char *file, int line, const char *fmt,...)
Definition: error.c:351
void rb_warn(const char *fmt,...)
Definition: error.c:408
const char term
Definition: id.c:37
#define ruby_verbose
Definition: error.h:68
voidpf void uLong size
Definition: ioapi.h:138
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
voidpf void * buf
Definition: ioapi.h:138
#define MIN(a, b)
Definition: ffi.c:30
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
unsigned int top
Definition: nkf.c:4323
const char * name
Definition: nkf.c:208
#define ARG_UNUSED
Definition: nkf.h:179
#define FALSE
Definition: nkf.h:174
#define ONIG_SYN_OP_ESC_C_CONTROL
Definition: onigmo.h:546
#define ONIG_SYN_OP_LPAREN_SUBEXP
Definition: onigmo.h:531
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
Definition: onigmo.h:688
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
Definition: onigmo.h:552
#define ONIGENC_CTYPE_GRAPH
Definition: onigmo.h:299
#define ONIGERR_END_PATTERN_AT_META
Definition: onigmo.h:647
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
Definition: onigmo.h:521
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
Definition: onigmo.h:338
#define ONIGENC_CTYPE_ASCII
Definition: onigmo.h:308
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
Definition: onigmo.h:599
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
Definition: onigmo.h:669
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: onigmo.h:459
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
Definition: onigmo.h:553
#define ONIGENC_CTYPE_DIGIT
Definition: onigmo.h:298
#define ONIG_SYN_OP_QMARK_NON_GREEDY
Definition: onigmo.h:544
#define ONIG_NO_SUPPORT_CONFIG
Definition: onigmo.h:626
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: onigmo.h:368
#define ONIG_SYN_OP_BRACKET_CC
Definition: onigmo.h:536
#define ONIG_SYN_OP_ESC_VBAR_ALT
Definition: onigmo.h:530
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER
Definition: onigmo.h:636
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
Definition: onigmo.h:567
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
Definition: onigmo.h:580
#define ONIG_SYN_OP2_OPTION_PERL
Definition: onigmo.h:554
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
Definition: onigmo.h:597
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
Definition: onigmo.h:582
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
Definition: onigmo.h:372
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
Definition: onigmo.h:655
#define ONIG_MAX_REPEAT_NUM
Definition: onigmo.h:440
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
Definition: onigmo.h:568
#define ONIGENC_CTYPE_XDIGIT
Definition: onigmo.h:305
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
Definition: onigmo.h:538
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
Definition: onigmo.h:569
#define ONIGENC_CODE_RANGE_FROM(range, i)
Definition: onigmo.h:140
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
Definition: onigmo.h:532
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
Definition: onigmo.h:603
#define ONIG_IS_OPTION_ON(options, option)
Definition: onigmo.h:476
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
Definition: onigmo.h:528
#define ONIG_INEFFECTIVE_META_CHAR
Definition: onigmo.h:619
#define ONIG_REGION_NOTPOS
Definition: onigmo.h:734
#define ONIG_SYNTAX_RUBY
Definition: onigmo.h:511
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: onigmo.h:366
#define ONIG_SYN_WARN_CC_DUP
Definition: onigmo.h:609
#define ONIGERR_META_CODE_SYNTAX
Definition: onigmo.h:649
#define ONIG_SYN_OP_BRACE_INTERVAL
Definition: onigmo.h:527
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
Definition: onigmo.h:539
#define ONIGERR_PARSER_BUG
Definition: onigmo.h:631
#define ONIG_SYN_OP_DECIMAL_BACKREF
Definition: onigmo.h:535
#define ONIG_SYN_OP_ESC_W_WORD
Definition: onigmo.h:537
#define ONIGENC_CTYPE_ALNUM
Definition: onigmo.h:307
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT
Definition: onigmo.h:583
#define ONIGENC_CTYPE_ALPHA
Definition: onigmo.h:295
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
Definition: onigmo.h:398
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
Definition: onigmo.h:596
#define ONIGENC_CTYPE_SPACE
Definition: onigmo.h:303
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
Definition: onigmo.h:533
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
#define ONIGERR_END_PATTERN_AT_ESCAPE
Definition: onigmo.h:646
#define ONIG_OPTION_MULTILINE
Definition: onigmo.h:453
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
Definition: onigmo.h:678
#define ONIGENC_CTYPE_PUNCT
Definition: onigmo.h:302
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
Definition: onigmo.h:581
#define ONIGERR_INVALID_GROUP_NAME
Definition: onigmo.h:680
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
Definition: onigmo.h:549
#define ONIGENC_IS_CODE_DIGIT(enc, code)
Definition: onigmo.h:396
#define ONIGERR_EMPTY_CHAR_CLASS
Definition: onigmo.h:644
#define UChar
Definition: onigmo.h:76
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: onigmo.h:289
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
Definition: onigmo.h:592
#define ONIGENC_CTYPE_WORD
Definition: onigmo.h:306
#define ONIGERR_UNDEFINED_GROUP_OPTION
Definition: onigmo.h:660
#define ONIG_NORMAL
Definition: onigmo.h:624
#define ONIGERR_END_PATTERN_AT_CONTROL
Definition: onigmo.h:648
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
Definition: onigmo.h:642
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
Definition: onigmo.h:468
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
Definition: onigmo.h:369
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
Definition: onigmo.h:607
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: onigmo.h:682
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
Definition: onigmo.h:557
#define ONIGENC_CTYPE_UPPER
Definition: onigmo.h:304
#define ONIG_OPTION_ASCII_RANGE
Definition: onigmo.h:467
#define ONIG_SYN_OP_LINE_ANCHOR
Definition: onigmo.h:542
#define ONIG_SYN_OP2_ESC_V_VTAB
Definition: onigmo.h:565
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
Definition: onigmo.h:668
void(* OnigWarnFunc)(const char *s)
Definition: onigmo.h:749
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: onigmo.h:400
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
Definition: onigmo.h:653
unsigned int OnigCodePoint
Definition: onigmo.h:80
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
#define ONIG_SYN_OP2_OPTION_RUBY
Definition: onigmo.h:555
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
Definition: onigmo.h:608
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
Definition: onigmo.h:661
#define ONIG_SYN_OP_ESC_OCTAL3
Definition: onigmo.h:547
#define ONIGERR_MEMORY
Definition: onigmo.h:629
#define ONIG_SYN_OP_PLUS_ONE_INF
Definition: onigmo.h:523
#define ONIG_SYN_OP_DOT_ANYCHAR
Definition: onigmo.h:520
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: onigmo.h:691
#define ONIGERR_TOO_SHORT_DIGITS
Definition: onigmo.h:677
#define ONIG_ENCODING_ASCII
Definition: onigmo.h:225
#define ONIG_OPTION_CAPTURE_GROUP
Definition: onigmo.h:460
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
Definition: onigmo.h:657
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
Definition: onigmo.h:526
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
Definition: onigmo.h:574
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
Definition: onigmo.h:645
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
Definition: onigmo.h:556
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
Definition: onigmo.h:577
#define ONIGENC_MBC_MAXLEN(enc)
Definition: onigmo.h:362
#define ONIGENC_MBC_MINLEN(enc)
Definition: onigmo.h:364
#define ONIGERR_EMPTY_GROUP_NAME
Definition: onigmo.h:679
#define ONIG_SYN_OP2_ESC_H_XDIGIT
Definition: onigmo.h:571
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
Definition: onigmo.h:562
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
Definition: onigmo.h:545
#define ONIGENC_CTYPE_CNTRL
Definition: onigmo.h:297
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
Definition: onigmo.h:559
#define ONIGENC_CTYPE_PRINT
Definition: onigmo.h:301
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
Definition: onigmo.h:550
#define ONIG_SYN_OP2_CCLASS_SET_OP
Definition: onigmo.h:558
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
Definition: onigmo.h:671
#define ONIGERR_END_PATTERN_IN_GROUP
Definition: onigmo.h:659
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: onigmo.h:595
#define ONIGENC_CTYPE_BLANK
Definition: onigmo.h:296
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
Definition: onigmo.h:591
#define ONIGERR_MULTIPLEX_DEFINED_NAME
Definition: onigmo.h:684
#define ONIGENC_CTYPE_LOWER
Definition: onigmo.h:300
#define ONIG_SYN_OP_ESC_D_DIGIT
Definition: onigmo.h:541
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
Definition: onigmo.h:578
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
Definition: onigmo.h:605
#define ONIG_SYN_OP_POSIX_BRACKET
Definition: onigmo.h:543
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
Definition: onigmo.h:540
#define ONIGENC_CODE_RANGE_NUM(range)
Definition: onigmo.h:139
#define ONIG_MAX_BACKREF_NUM
Definition: onigmo.h:439
#define ONIG_SYN_STRICT_CHECK_BACKREF
Definition: onigmo.h:593
#define ONIGENC_CODE_RANGE_TO(range, i)
Definition: onigmo.h:141
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
Definition: onigmo.h:560
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: onigmo.h:594
#define ONIG_SYN_OP2_ESC_U_HEX4
Definition: onigmo.h:566
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIGENC_IS_SINGLEBYTE(enc)
Definition: onigmo.h:318
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
Definition: onigmo.h:534
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
Definition: onigmo.h:524
#define ONIG_SYN_OP_VBAR_ALT
Definition: onigmo.h:529
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
Definition: onigmo.h:654
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
Definition: onigmo.h:667
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
Definition: onigmo.h:651
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
Definition: onigmo.h:658
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: onigmo.h:664
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
Definition: onigmo.h:573
unsigned int OnigOptionType
Definition: onigmo.h:445
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
Definition: onigmo.h:469
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
Definition: onigmo.h:403
#define ONIGERR_INVALID_BACKREF
Definition: onigmo.h:674
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
Definition: onigmo.h:579
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
Definition: onigmo.h:522
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
Definition: onigmo.h:589
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
Definition: onigmo.h:663
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
Definition: onigmo.h:590
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
Definition: onigmo.h:561
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
Definition: onigmo.h:602
#define ONIG_MAX_CAPTURE_GROUP_NUM
Definition: onigmo.h:438
#define ONIG_SYN_OP_QMARK_ZERO_ONE
Definition: onigmo.h:525
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
Definition: onigmo.h:687
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: onigmo.h:689
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
Definition: onigmo.h:519
#define ONIG_SYN_OP_ESC_X_HEX2
Definition: onigmo.h:548
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
Definition: onigmo.h:563
#define ONIG_OPTION_SINGLELINE
Definition: onigmo.h:455
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
Definition: onigmo.h:441
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
Definition: onigmo.h:564
#define ONIGERR_TOO_BIG_NUMBER
Definition: onigmo.h:666
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
Definition: onigmo.h:672
#define ONIGERR_CONTROL_CODE_SYNTAX
Definition: onigmo.h:650
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
Definition: onigmo.h:604
#define ONIG_OPTION_EXTEND
Definition: onigmo.h:452
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
Definition: onigmo.h:588
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS
Definition: onigmo.h:676
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
Definition: onigmo.h:681
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
Definition: onigmo.h:374
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6142
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:860
#define NULL
Definition: regenc.h:69
#define POSIX_BRACKET_ENTRY_INIT(name, ctype)
Definition: regenc.h:124
#define enclen(enc, p, e)
Definition: regenc.h:93
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar *pat, UChar *pat_end, const UChar *fmt, va_list args)
Definition: regerror.c:314
#define INT_MAX_LIMIT
Definition: regint.h:373
st_data_t hash_data_type
Definition: regint.h:925
#define IS_MC_ESC_CODE(code, syn)
Definition: regint.h:755
#define MC_ANYTIME(syn)
Definition: regint.h:750
#define BBUF_MOVE_RIGHT(buf, from, to, n)
Definition: regint.h:497
#define ANCHOR_BEGIN_LINE
Definition: regint.h:528
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
Definition: regint.h:509
#define IS_ASCII_RANGE(option)
Definition: regint.h:393
#define DIGITVAL(code)
Definition: regint.h:375
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:301
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:539
#define BBUF_ENSURE_SIZE(buf, size)
Definition: regint.h:465
#define BIT_STATUS_BITS_NUM
Definition: regint.h:354
#define MC_ONE_OR_MORE_TIME(syn)
Definition: regint.h:752
#define BITSET_CLEAR_BIT(bs, pos)
Definition: regint.h:437
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:529
#define ONIG_LAST_CODE_POINT
Definition: regint.h:304
#define BITSET_AT(bs, pos)
Definition: regint.h:435
unsigned char Bits
Definition: regint.h:420
#define CHECK_NULL_RETURN(p)
Definition: regint.h:300
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:540
#define IS_NOT_NULL(p)
Definition: regint.h:299
#define ANCHOR_END_LINE
Definition: regint.h:532
#define ANCHOR_BEGIN_BUF
Definition: regint.h:527
#define ANCHOR_WORD_BOUND
Definition: regint.h:534
#define SYN_GNU_REGEX_OP
Definition: regint.h:767
#define DEFAULT_PARSE_DEPTH_LIMIT
Definition: regint.h:88
#define ANCHOR_WORD_BEGIN
Definition: regint.h:536
#define NULL_UCHARP
Definition: regint.h:302
#define BBUF_INIT(buf, size)
Definition: regint.h:447
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:409
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:541
#define BITSET_SIZE
Definition: regint.h:415
#define SINGLE_BYTE_SIZE
Definition: regint.h:413
#define MC_ANYCHAR(syn)
Definition: regint.h:749
#define GET_CODE_POINT(code, p)
Definition: regint.h:697
#define IS_NULL(p)
Definition: regint.h:298
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:531
#define ODIGITVAL(code)
Definition: regint.h:376
#define IS_WORD_BOUND_ALL_RANGE(option)
Definition: regint.h:395
#define SIZE_CODE_POINT
Definition: regint.h:683
#define ANCHOR_PREC_READ
Definition: regint.h:538
#define NCCLASS_CLEAR_NOT(nd)
Definition: regint.h:795
#define BITS_IN_ROOM
Definition: regint.h:414
void hash_table_type
Definition: regint.h:919
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:796
Bits BitSet[BITSET_SIZE]
Definition: regint.h:422
#define REPEAT_INFINITE
Definition: regint.h:408
#define ANCHOR_KEEP
Definition: regint.h:546
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:535
#define BITSET_SET_BIT(bs, pos)
Definition: regint.h:436
#define IS_SINGLELINE(option)
Definition: regint.h:381
#define IS_POSIX_BRACKET_ALL_RANGE(option)
Definition: regint.h:394
#define IS_EXTEND(option)
Definition: regint.h:384
#define USE_BACKREF_WITH_LEVEL
Definition: regint.h:73
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:355
#define BITSET_CLEAR(bs)
Definition: regint.h:427
#define XDIGITVAL(enc, code)
Definition: regint.h:377
#define SYN_GNU_REGEX_BV
Definition: regint.h:780
#define xmemcpy
Definition: regint.h:202
Bits * BitSetRef
Definition: regint.h:423
#define IS_IGNORECASE(option)
Definition: regint.h:383
#define MC_ESC(syn)
Definition: regint.h:748
#define MC_ANYCHAR_ANYTIME(syn)
Definition: regint.h:753
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:367
#define ANCHOR_WORD_END
Definition: regint.h:537
#define MAX(a, b)
Definition: regint.h:296
#define ANCHOR_END_BUF
Definition: regint.h:530
#define MC_ZERO_OR_ONE_TIME(syn)
Definition: regint.h:751
#define NCCLASS_SET_NOT(nd)
Definition: regint.h:794
#define is_invalid_quantifier_target(node)
Definition: regparse.c:2122
st_table NameTable
Definition: regparse.c:464
#define INIT_MULTI_BYTE_RANGE_SIZE
#define PINC
Definition: regparse.c:301
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:887
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:576
int onig_noname_group_capture_is_active(const regex_t *reg)
Definition: regparse.c:963
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1186
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1222
ReduceType
Definition: regparse.c:2183
@ RQ_QQ
Definition: regparse.c:2188
@ RQ_ASIS
Definition: regparse.c:2184
@ RQ_AQ
Definition: regparse.c:2187
@ RQ_P_QQ
Definition: regparse.c:2189
@ RQ_DEL
Definition: regparse.c:2185
@ RQ_A
Definition: regparse.c:2186
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
Definition: regparse.c:980
#define POSIX_BRACKET_NAME_MIN_LEN
void onig_null_warn(const char *s ARG_UNUSED)
Definition: regparse.c:87
#define PPEEK
Definition: regparse.c:319
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
#define R_ERR(call)
Definition: regparse.c:5789
unsigned int onig_get_parse_depth_limit(void)
Definition: regparse.c:117
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
Definition: regparse.c:430
#define PEND
Definition: regparse.c:299
#define PFETCH_READY
Definition: regparse.c:295
#define ONOFF(v, f, negative)
Definition: regparse.c:160
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1376
#define PPEEK_IS(c)
Definition: regparse.c:320
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6603
#define WARN_BUFSIZE
Definition: regparse.c:34
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
Definition: regparse.c:168
#define INIT_NAME_BACKREFS_ALLOC_NUM
Definition: regparse.c:451
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
Definition: regparse.c:406
int onig_number_of_names(const regex_t *reg)
Definition: regparse.c:623
#define MBCODE_START_POS(enc)
Definition: regparse.c:162
const OnigSyntaxType * OnigDefaultSyntax
Definition: regparse.c:85
#define PFETCH(c)
Definition: regparse.c:305
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6630
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1192
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, const OnigRegion *region)
Definition: regparse.c:909
#define NODE_COMMON_SIZE
Definition: regparse.c:5812
#define ALT
Definition: regparse.c:5761
#define NEWLINE_CODE
void onig_node_free(Node *node)
Definition: regparse.c:1062
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1347
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
Definition: regparse.c:1815
const OnigSyntaxType OnigSyntaxRuby
Definition: regparse.c:39
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1210
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1481
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
Definition: regparse.c:36
#define BACKREF_REL_TO_ABS(rel_no, env)
Definition: regparse.c:157
#define ONIGENC_IS_CODE_NAME(enc, c)
Definition: regparse.c:2503
int onig_set_parse_depth_limit(unsigned int depth)
Definition: regparse.c:123
#define BITSET_IS_EMPTY(bs, empty)
Definition: regparse.c:181
TokenSyms
Definition: regparse.c:2247
@ TK_CC_AND
Definition: regparse.c:2274
@ TK_ANYCHAR
Definition: regparse.c:2253
@ TK_ANYCHAR_ANYTIME
Definition: regparse.c:2260
@ TK_SUBEXP_CLOSE
Definition: regparse.c:2263
@ TK_CC_RANGE
Definition: regparse.c:2272
@ TK_STRING
Definition: regparse.c:2251
@ TK_ANCHOR
Definition: regparse.c:2257
@ TK_EOT
Definition: regparse.c:2248
@ TK_QUOTE_OPEN
Definition: regparse.c:2265
@ TK_CC_OPEN
Definition: regparse.c:2264
@ TK_LINEBREAK
Definition: regparse.c:2267
@ TK_BACKREF
Definition: regparse.c:2255
@ TK_CHAR_TYPE
Definition: regparse.c:2254
@ TK_SUBEXP_OPEN
Definition: regparse.c:2262
@ TK_INTERVAL
Definition: regparse.c:2259
@ TK_POSIX_BRACKET_OPEN
Definition: regparse.c:2273
@ TK_CODE_POINT
Definition: regparse.c:2252
@ TK_KEEP
Definition: regparse.c:2269
@ TK_CALL
Definition: regparse.c:2256
@ TK_CHAR_PROPERTY
Definition: regparse.c:2266
@ TK_CC_CC_OPEN
Definition: regparse.c:2275
@ TK_ALT
Definition: regparse.c:2261
@ TK_EXTENDED_GRAPHEME_CLUSTER
Definition: regparse.c:2268
@ TK_RAW_BYTE
Definition: regparse.c:2249
@ TK_CC_CLOSE
Definition: regparse.c:2271
@ TK_OP_REPEAT
Definition: regparse.c:2258
@ TK_CHAR
Definition: regparse.c:2250
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1412
void onig_node_str_clear(Node *node)
Definition: regparse.c:1449
#define PFETCH_S(c)
Definition: regparse.c:314
#define BITSET_SET_BIT_CHKDUP(bs, pos)
Definition: regparse.c:176
int onig_names_free(regex_t *reg)
Definition: regparse.c:525
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2203
#define PUNFETCH
Definition: regparse.c:300
st_data_t HashDataType
Definition: regparse.c:465
#define PINC_S
Definition: regparse.c:311
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
Definition: regparse.c:1644
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
Definition: regparse.c:418
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
Definition: regparse.c:259
CCSTATE
Definition: regparse.c:4393
@ CCS_COMPLETE
Definition: regparse.c:4396
@ CCS_START
Definition: regparse.c:4397
@ CCS_RANGE
Definition: regparse.c:4395
@ CCS_VALUE
Definition: regparse.c:4394
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:106
#define LIST
Definition: regparse.c:5760
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:611
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
Definition: regparse.c:1556
CCVALTYPE
Definition: regparse.c:4400
@ CCV_CLASS
Definition: regparse.c:4403
@ CCV_SB
Definition: regparse.c:4401
@ CCV_CODE_POINT
Definition: regparse.c:4402
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
Definition: regparse.c:165
#define NST_RECURSION
Definition: regparse.h:135
#define NSTRING_SET_RAW(node)
Definition: regparse.h:109
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:332
#define ENCLOSE_OPTION
Definition: regparse.h:95
#define NT_CANY
Definition: regparse.h:41
#define NSTR(node)
Definition: regparse.h:76
#define NT_ENCLOSE
Definition: regparse.h:44
#define NENCLOSE(node)
Definition: regparse.h:81
#define IS_SYNTAX_OP2(syn, opm)
Definition: regparse.h:331
#define NT_QTFR
Definition: regparse.h:43
#define ENCLOSE_MEMORY
Definition: regparse.h:94
#define NT_CALL
Definition: regparse.h:48
#define NBREF(node)
Definition: regparse.h:79
#define NST_NEST_LEVEL
Definition: regparse.h:141
#define NODE_STR_MARGIN
Definition: regparse.h:100
#define NT_ANCHOR
Definition: regparse.h:45
#define ENCLOSE_ABSENT
Definition: regparse.h:98
#define NT_CTYPE
Definition: regparse.h:40
#define NCTYPE(node)
Definition: regparse.h:78
#define NSTR_RAW
Definition: regparse.h:104
#define NULL_NODE
Definition: regparse.h:283
#define NSTRING_CLEAR_RAW(node)
Definition: regparse.h:110
#define NST_NAMED_GROUP
Definition: regparse.h:138
#define ENCLOSE_CONDITION
Definition: regparse.h:97
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:144
#define NCCLASS(node)
Definition: regparse.h:77
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:286
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:96
#define NODE_STR_BUF_SIZE
Definition: regparse.h:101
#define NT_CCLASS
Definition: regparse.h:39
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:111
#define NQ_TARGET_ISNOT_EMPTY
Definition: regparse.h:122
#define SET_NTYPE(node, ntype)
Definition: regparse.h:70
#define NST_BY_NUMBER
Definition: regparse.h:142
#define NT_LIST
Definition: regparse.h:46
#define NST_NAME_REF
Definition: regparse.h:139
void onig_node_conv_to_str_node(Node *node, int raw)
#define NT_BREF
Definition: regparse.h:42
#define NCDR(node)
Definition: regparse.h:87
#define NCAR(node)
Definition: regparse.h:86
#define NTYPE(node)
Definition: regparse.h:69
#define NT_STR
Definition: regparse.h:38
#define NQTFR(node)
Definition: regparse.h:80
#define SCANENV_MEMNODES_SIZE
Definition: regparse.h:285
#define NT_ALT
Definition: regparse.h:47
#define NCALL(node)
Definition: regparse.h:84
#define IS_SYNTAX_OP(syn, opm)
Definition: regparse.h:330
#define NODE_BACKREFS_SIZE
Definition: regparse.h:102
#define NANCHOR(node)
Definition: regparse.h:82
#define tok(p)
Definition: ripper.c:14208
#define RTEST
#define f
@ ST_STOP
Definition: st.h:99
@ ST_DELETE
Definition: st.h:99
@ ST_CONTINUE
Definition: st.h:99
unsigned long st_data_t
Definition: st.h:22
st_data_t st_index_t
Definition: st.h:50
int st_foreach_callback_func(st_data_t, st_data_t, st_data_t)
Definition: st.h:137
Defines old _.
size_t strlen(const char *)
Definition: regint.h:441
unsigned int alloc
Definition: regint.h:444
UChar * p
Definition: regint.h:442
unsigned int used
Definition: regint.h:443
BitSet bs
Definition: regint.h:807
unsigned int flags
Definition: regint.h:806
BBuf * mbuf
Definition: regint.h:808
ScanEnv * env
Definition: regparse.c:5490
CClassNode * asc_cc
Definition: regparse.c:5492
CClassNode * cc
Definition: regparse.c:5491
int ret
Definition: regparse.c:556
regex_t * reg
Definition: regparse.c:554
void * arg
Definition: regparse.c:555
OnigEncoding enc
Definition: regparse.c:557
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
Definition: regparse.c:553
Definition: regparse.c:453
int back_alloc
Definition: regparse.c:457
int * back_refs
Definition: regparse.c:459
size_t name_len
Definition: regparse.c:455
int back_ref1
Definition: regparse.c:458
int back_num
Definition: regparse.c:456
UChar * name
Definition: regparse.c:454
int ref1
Definition: regparse.c:2299
int ascii_range
Definition: regparse.c:2289
int subtype
Definition: regparse.c:2288
OnigCodePoint code
Definition: regparse.c:2286
int * refs
Definition: regparse.c:2300
enum TokenSyms type
Definition: regparse.c:2279
int ctype
Definition: regparse.c:2314
int exist_level
Definition: regparse.c:2303
int upper
Definition: regparse.c:2293
int escaped
Definition: regparse.c:2280
UChar * name
Definition: regparse.c:2308
UChar * s
Definition: regparse.c:2284
int base
Definition: regparse.c:2281
int by_name
Definition: regparse.c:2301
UChar * name_end
Definition: regparse.c:2309
int lower
Definition: regparse.c:2292
int possessive
Definition: regparse.c:2295
int gnum
Definition: regparse.c:2310
UChar * backp
Definition: regparse.c:2282
int greedy
Definition: regparse.c:2294
int level
Definition: regparse.c:2304
Definition: regenc.h:118
int ctype
Definition: regenc.h:121
short int len
Definition: regenc.h:119
const UChar name[6]
Definition: regenc.h:120
int lower
Definition: regparse.h:183
struct _Node * target
Definition: regparse.h:182
int greedy
Definition: regparse.h:185
int upper
Definition: regparse.h:184
UChar * s
Definition: regparse.h:172
unsigned int flag
Definition: regparse.h:174
UChar * end
Definition: regparse.h:173
Definition: inftree9.h:24
OnigEncoding enc
Definition: onigmo.h:776
void * name_table
Definition: onigmo.h:778
OnigCaseFoldType case_fold_flag
Definition: onigmo.h:779
const OnigSyntaxType * syntax
Definition: onigmo.h:777
OnigOptionType options
Definition: onigmo.h:772
OnigPosition * beg
Definition: onigmo.h:719
const UChar * s
Definition: regparse.c:362
const UChar * end
Definition: regparse.c:363
Definition: st.h:79
Definition: blast.c:41
#define t
Definition: symbol.c:253
#define neg(x)
Definition: time.c:151
int err
Definition: win32.c:142
#define env
if((ID)(DISPID) nameid !=nameid)
Definition: win32ole.c:357
#define xfree
Definition: xmalloc.h:49
#define xrealloc
Definition: xmalloc.h:47
#define xmalloc
Definition: xmalloc.h:44