34#define WARN_BUFSIZE 256
36#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
89#ifdef DEFAULT_WARN_FUNCTION
95#ifdef DEFAULT_VERB_WARN_FUNCTION
119 return ParseDepthLimit;
128 ParseDepthLimit = depth;
151 if (r != 0)
return r;
157#define BACKREF_REL_TO_ABS(rel_no, env) \
158 ((env)->num_mem + 1 + (rel_no))
160#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
162#define MBCODE_START_POS(enc) \
163 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
165#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
166 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
168#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
169 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
170 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
176#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
177 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \
178 BS_ROOM(bs, pos) |= BS_BIT(pos); \
181#define BITSET_IS_EMPTY(bs,empty) do {\
184 for (i = 0; i < BITSET_SIZE; i++) {\
213 for (i = 0; i <
BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
220 for (i = 0; i <
BITSET_SIZE; i++) { to[i] = ~(from[i]); }
227 for (i = 0; i <
BITSET_SIZE; i++) { dest[i] &= bs[i]; }
234 for (i = 0; i <
BITSET_SIZE; i++) { dest[i] |= bs[i]; }
241 for (i = 0; i <
BITSET_SIZE; i++) { dest[i] = bs[i]; }
244#if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
261 ptrdiff_t
len = end - src;
268#ifdef USE_NAMED_GROUP
283 for (i = 0; i < term_len; i++)
284 r[slen + i] = (
UChar )0;
295# define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
297# define PFETCH_READY UChar* pfetch_prev
299#define PEND (p < end ? 0 : 1)
300#define PUNFETCH p = pfetch_prev
303 p += enclen(enc, p, end); \
305#define PFETCH(c) do { \
306 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
308 p += enclen(enc, p, end); \
312 p += enclen(enc, p, end); \
314#define PFETCH_S(c) do { \
315 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
316 p += enclen(enc, p, end); \
319#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
320#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
340strcat_capa_from_static(
UChar* dest,
UChar* dest_end,
341 const UChar* src,
const UChar* src_end,
size_t capa)
375 if ((x->
end - x->
s) != (y->
end - y->
s))
381 c = (
int )*p - (
int )*q;
382 if (c != 0)
return c;
399 val = val * 997 + (
int )*p++;
402 return val + (val >> 5);
414 onig_st_init_table_with_size(&hashType,
size);
426 return onig_st_lookup(table, (
st_data_t )(&
key), value);
449#ifdef USE_NAMED_GROUP
451# define INIT_NAME_BACKREFS_ALLOC_NUM 8
462# ifdef USE_ST_LIBRARY
474 fprintf(fp,
"%s: ", e->
name);
481 if (i > 0) fprintf(fp,
", ");
495 fprintf(fp,
"name table\n");
530 r = names_clear(reg);
628 return (
int )
t->num_entries;
635# define INIT_NAMES_ALLOC_NUM 8
652 fprintf(fp,
"name table\n");
653 for (i = 0; i <
t->num; i++) {
655 fprintf(fp,
"%s: ", e->
name);
664 if (j > 0) fprintf(fp,
", ");
684 for (i = 0; i <
t->num; i++) {
711 r = names_clear(reg);
729 for (i = 0; i <
t->num; i++) {
747 for (i = 0; i <
t->num; i++) {
752 if (r != 0)
return r;
781 e = name_find(reg,
name, name_end);
783# ifdef USE_ST_LIBRARY
791 e->
name = strdup_with_null(reg->
enc,
name, name_end);
807 alloc = INIT_NAMES_ALLOC_NUM;
823 else if (
t->num ==
t->alloc) {
827 alloc =
t->alloc * 2;
834 for (i =
t->num; i < t->alloc; i++) {
836 t->e[i].name_len = 0;
837 t->e[i].back_num = 0;
838 t->e[i].back_alloc = 0;
839 t->e[i].back_refs = (
int* )
NULL;
844 e->
name = strdup_with_null(reg->
enc,
name, name_end);
888 const UChar* name_end,
int** nums)
923 for (i = n - 1; i >= 0; i--) {
936 const UChar* name_end,
int** nums)
968#ifdef USE_NAMED_GROUP
980#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
995#ifdef USE_NAMED_GROUP
1004#ifdef USE_COMBINATION_EXPLOSION_CHECK
1005 env->num_comb_exp_check = 0;
1006 env->comb_exp_max_regnum = 0;
1007 env->curr_max_regnum = 0;
1008 env->has_recursion = 0;
1010 env->parse_depth = 0;
1011 env->warnings_flag = 0;
1020 need =
env->num_mem + 1;
1024 if (
env->mem_alloc <= need) {
1033 alloc =
env->mem_alloc * 2;
1038 for (i =
env->num_mem + 1; i < alloc; i++)
1041 env->mem_nodes_dynamic = p;
1042 env->mem_alloc = alloc;
1047 return env->num_mem;
1067 switch (
NTYPE(node)) {
1069 if (
NSTR(node)->capa != 0 &&
1092 bbuf_free(cc->
mbuf);
1097 if (
NQTFR(node)->target)
1140node_new_cclass(
void)
1142 Node* node = node_new();
1146 initialize_cclass(
NCCLASS(node));
1151node_new_ctype(
int type,
int not,
int ascii_range)
1153 Node* node = node_new();
1159 NCTYPE(node)->ascii_range = ascii_range;
1164node_new_anychar(
void)
1166 Node* node = node_new();
1174node_new_list(
Node* left,
Node* right)
1176 Node* node = node_new();
1188 return node_new_list(left, right);
1212 Node* node = node_new();
1224 Node* node = node_new();
1231 NANCHOR(node)->ascii_range = 0;
1236node_new_backref(
int back_num,
int* backrefs,
int by_name,
1238 int exist_level,
int nest_level,
1243 Node* node = node_new();
1248 NBREF(node)->state = 0;
1249 NBREF(node)->back_num = back_num;
1250 NBREF(node)->back_dynamic = (
int* )
NULL;
1254#ifdef USE_BACKREF_WITH_LEVEL
1255 if (exist_level != 0) {
1257 NBREF(node)->nest_level = nest_level;
1261 for (i = 0; i < back_num; i++) {
1262 if (backrefs[i] <=
env->num_mem &&
1270 for (i = 0; i < back_num; i++)
1271 NBREF(node)->back_static[i] = backrefs[i];
1274 int* p = (
int* )
xmalloc(
sizeof(
int) * back_num);
1279 NBREF(node)->back_dynamic = p;
1280 for (i = 0; i < back_num; i++)
1286#ifdef USE_SUBEXP_CALL
1290 Node* node = node_new();
1294 NCALL(node)->state = 0;
1297 NCALL(node)->name_end = name_end;
1298 NCALL(node)->group_num = gnum;
1304node_new_quantifier(
int lower,
int upper,
int by_number)
1306 Node* node = node_new();
1310 NQTFR(node)->state = 0;
1312 NQTFR(node)->lower = lower;
1313 NQTFR(node)->upper = upper;
1314 NQTFR(node)->greedy = 1;
1318 NQTFR(node)->is_referred = 0;
1322#ifdef USE_COMBINATION_EXPLOSION_CHECK
1323 NQTFR(node)->comb_exp_check_num = 0;
1330node_new_enclose(
int type)
1332 Node* node = node_new();
1349 return node_new_enclose(type);
1360#ifdef USE_SUBEXP_CALL
1378 ptrdiff_t addlen = end - s;
1387 if (capa <=
NSTR(node)->capa) {
1392 p = strcat_capa_from_static(
NSTR(node)->s,
NSTR(node)->end,
1395 p = strcat_capa(
NSTR(node)->s,
NSTR(node)->end, s, end, capa);
1399 NSTR(node)->capa = (
int )capa;
1441 NSTR(node)->flag = flag;
1442 NSTR(node)->capa = 0;
1451 if (
NSTR(node)->capa != 0 &&
1456 NSTR(node)->capa = 0;
1457 NSTR(node)->flag = 0;
1463node_new_str(
const UChar* s,
const UChar* end)
1465 Node* node = node_new();
1469 NSTR(node)->capa = 0;
1470 NSTR(node)->flag = 0;
1483 return node_new_str(s, end);
1489 Node* node = node_new_str(s, end);
1502node_new_str_raw_char(
UChar c)
1507 return node_new_str_raw(p, p + 1);
1516 if (sn->
end > sn->
s) {
1518 if (p && p > sn->
s) {
1519 n = node_new_str(p, sn->
end);
1531 if (sn->
end > sn->
s) {
1532 return ((
enclen(enc, sn->
s, sn->
end) < sn->
end - sn->
s) ? 1 : 0);
1537#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1549 for (i = 0; i <
num; i++) {
1558 unsigned int num, val;
1583scan_unsigned_hexadecimal_number(
UChar** src,
UChar* end,
int minlen,
1587 unsigned int num, val;
1592 restlen = maxlen - minlen;
1594 while (!
PEND && maxlen-- != 0) {
1609 if (maxlen > restlen)
1616scan_unsigned_octal_number(
UChar** src,
UChar* end,
int maxlen,
1620 unsigned int num, val;
1625 while (!
PEND && maxlen-- != 0) {
1644#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1645 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1652new_code_range(
BBuf** pbuf)
1654#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1679 n = from; from = to; to = n;
1683 r = new_code_range(pbuf);
1695 bound = (from == 0) ? 0 : n;
1696 for (low = 0; low < bound; ) {
1697 x = (low + bound) >> 1;
1698 if (from - 1 > data[x*2 + 1])
1705 for (bound = n; high < bound; ) {
1706 x = (high + bound) >> 1;
1707 if (to + 1 >= data[x*2])
1716 inc_n = low + 1 - high;
1721 if (checkdup && from <= data[low*2+1]
1722 && (data[low*2] <= from || data[low*2+1] <= to))
1723 CC_DUP_WARN(
env, from, to);
1724 if (from > data[low*2])
1726 if (to < data[(high - 1)*2 + 1])
1727 to = data[(high - 1)*2 + 1];
1758 return add_code_range_to_buf0(pbuf,
env, from, to, 1);
1771 return add_code_range_to_buf0(pbuf,
env, from, to, checkdup);
1777 return add_code_range0(pbuf,
env, from, to, 1);
1795 if (n <= 0)
goto set_all;
1799 for (i = 0; i < n; i++) {
1802 if (pre <= from - 1) {
1803 r = add_code_range_to_buf(pbuf,
env, pre, from - 1);
1804 if (r != 0)
return r;
1815#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1818 tnot = not1; not1 = not2; not2 = tnot; \
1819 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1832 if (not1 != 0 || not2 != 0)
1847 return bbuf_clone(pbuf, bbuf2);
1850 return not_code_range_buf(enc, bbuf2, pbuf,
env);
1862 if (not2 == 0 && not1 == 0) {
1863 r = bbuf_clone(pbuf, bbuf2);
1865 else if (not1 == 0) {
1866 r = not_code_range_buf(enc, bbuf2, pbuf,
env);
1868 if (r != 0)
return r;
1870 for (i = 0; i < n1; i++) {
1873 r = add_code_range_to_buf(pbuf,
env, from, to);
1874 if (r != 0)
return r;
1886 for (i = 0; i < n; i++) {
1889 if (from2 < from1) {
1890 if (to2 < from1)
continue;
1895 else if (from2 <= to1) {
1897 if (from1 <= from2 - 1) {
1898 r = add_code_range_to_buf(pbuf,
env, from1, from2-1);
1899 if (r != 0)
return r;
1910 if (from1 > to1)
break;
1913 r = add_code_range_to_buf(pbuf,
env, from1, to1);
1914 if (r != 0)
return r;
1929 return bbuf_clone(pbuf, bbuf2);
1934 return bbuf_clone(pbuf, bbuf1);
1948 if (not2 == 0 && not1 == 0) {
1949 for (i = 0; i < n1; i++) {
1952 for (j = 0; j < n2; j++) {
1955 if (from2 > to1)
break;
1956 if (to2 < from1)
continue;
1957 from =
MAX(from1, from2);
1959 r = add_code_range_to_buf(pbuf,
env, from, to);
1960 if (r != 0)
return r;
1964 else if (not1 == 0) {
1965 for (i = 0; i < n1; i++) {
1968 r = and_code_range1(pbuf,
env, from1, to1, data2, n2);
1969 if (r != 0)
return r;
1981 BBuf *buf1, *buf2, *pbuf = 0;
1993 bitset_invert_to(bsr1, bs1);
1997 bitset_invert_to(bsr2, bs2);
2000 bitset_and(bsr1, bsr2);
2001 if (bsr1 != dest->
bs) {
2002 bitset_copy(dest->
bs, bsr1);
2006 bitset_invert(dest->
bs);
2010 if (not1 != 0 && not2 != 0) {
2011 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf,
env);
2014 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf,
env);
2015 if (r == 0 && not1 != 0) {
2017 r = not_code_range_buf(enc, pbuf, &tbuf,
env);
2039 BBuf *buf1, *buf2, *pbuf = 0;
2051 bitset_invert_to(bsr1, bs1);
2055 bitset_invert_to(bsr2, bs2);
2058 bitset_or(bsr1, bsr2);
2059 if (bsr1 != dest->
bs) {
2060 bitset_copy(dest->
bs, bsr1);
2064 bitset_invert(dest->
bs);
2068 if (not1 != 0 && not2 != 0) {
2069 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf,
env);
2072 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf,
env);
2073 if (r == 0 && not1 != 0) {
2075 r = not_code_range_buf(enc, pbuf, &tbuf,
env);
2093static void UNKNOWN_ESC_WARN(
ScanEnv *
env,
int c);
2100 case 'n':
return '\n';
2101 case 't':
return '\t';
2102 case 'r':
return '\r';
2103 case 'f':
return '\f';
2104 case 'a':
return '\007';
2105 case 'b':
return '\010';
2106 case 'e':
return '\033';
2113 if ((
'a' <= c && c <=
'z') || (
'A' <= c && c <=
'Z'))
2114 UNKNOWN_ESC_WARN(
env, c);
2121#ifdef USE_NO_INVALID_QUANTIFIER
2122# define is_invalid_quantifier_target(node) 0
2127 switch (
NTYPE(node)) {
2162 if (q->
lower == 0) {
2163 if (q->
upper == 1)
return 0;
2166 else if (q->
lower == 1) {
2171 if (q->
lower == 0) {
2172 if (q->
upper == 1)
return 3;
2175 else if (q->
lower == 1) {
2192static enum ReduceType const ReduceTypeTable[6][6] = {
2210 pnum = popular_quantifier_num(p);
2211 cnum = popular_quantifier_num(c);
2212 if (pnum < 0 || cnum < 0) return ;
2214 switch (ReduceTypeTable[cnum][pnum]) {
2302#ifdef USE_BACKREF_WITH_LEVEL
2324 int low, up, syn_allow, non_low = 0;
2342 if (c ==
')' || c ==
'(' || c ==
'|') {
2362 if (
PEND)
goto invalid;
2386 if (
PEND)
goto invalid;
2389 if (c !=
MC_ESC(
env->syntax))
goto invalid;
2390 if (
PEND)
goto invalid;
2393 if (c !=
'}')
goto invalid;
2400 tok->u.repeat.lower = low;
2401 tok->u.repeat.upper = up;
2433 v = fetch_escaped_value(&p, end,
env, &c);
2434 if (v < 0)
return v;
2436 c = ((c & 0xff) | 0x80);
2462 v = fetch_escaped_value(&p, end,
env, &c);
2463 if (v < 0)
return v;
2474 c = conv_backslash_value(c,
env);
2501#ifdef USE_NAMED_GROUP
2503# define ONIGENC_IS_CODE_NAME(enc, c) TRUE
2505# define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
2508# ifdef USE_BACKREF_WITH_LEVEL
2517 int* rback_num,
int* rlevel)
2519 int r, sign, is_num, exist_level;
2529 is_num = exist_level = 0;
2533 end_code = get_name_end_code_point(start_code);
2548 else if (c ==
'-') {
2561 if (c == end_code || c ==
')' || c ==
'+' || c ==
'-') {
2580 if (r == 0 && c != end_code) {
2581 if (c ==
'+' || c ==
'-') {
2583 int flag = (c ==
'-' ? -1 : 1);
2594 *rlevel = (level * flag);
2614 else if (*rback_num == 0)
goto err;
2619 *rname_end = name_end;
2621 return (exist_level ? 1 : 0);
2638 int r, is_num, sign;
2648 end_code = get_name_end_code_point(start_code);
2671 else if (c ==
'-') {
2691 if (c == end_code || c ==
')') {
2719 if (c != end_code) {
2728 else if (*rback_num == 0) {
2736 *rname_end = name_end;
2745 if (c == end_code || c ==
')')
2761 int r, is_num, sign;
2772 end_code = get_name_end_code_point(start_code);
2774 *rname_end = name_end = end;
2791 else if (c ==
'-') {
2805 if (c == end_code || c ==
')')
break;
2809 if (r == 0 && c != end_code) {
2817 else if (*rback_num == 0) {
2823 *rname_end = name_end;
2837onig_syntax_warn(
ScanEnv *
env,
const char *fmt, ...)
2841 va_start(args, fmt);
2843 env->pattern,
env->pattern_end,
2844 (
const UChar *)fmt, args);
2852 (*onig_warn)((
char* )
buf);
2863 onig_syntax_warn(
env,
"character class has '%s' without escape", c);
2873 onig_syntax_warn(
env,
"regular expression has '%s' without escape", c);
2888#ifdef WARN_ALL_CC_DUP
2889 onig_syntax_warn(
env,
"character class has duplicated range: %04x-%04x", from, to);
2892 onig_syntax_warn(
env,
"character class has duplicated range");
2901 onig_syntax_warn(
env,
"Unknown escape \\%c is ignored", c);
2915 q = p +
enclen(enc, p, to);
2917 for (i = 1; i < n && q < to; i++) {
2919 if (x != s[i])
break;
2950 q = p +
enclen(enc, p, to);
2952 for (i = 1; i < n && q < to; i++) {
2954 if (x != s[i])
break;
2957 if (i >= n)
return 1;
2962 if (x == bad)
return 0;
2963 else if (x ==
MC_ESC(syn)) in_esc = 1;
2996 else if (c ==
'-') {
2999 else if (c ==
MC_ESC(syn)) {
3012 tok->u.prop.not = 0;
3017 tok->u.prop.not = 1;
3022 tok->u.prop.not = 0;
3027 tok->u.prop.not = 1;
3032 tok->u.prop.not = 0;
3037 tok->u.prop.not = 1;
3043 tok->u.prop.not = 0;
3049 tok->u.prop.not = 1;
3061 tok->u.prop.not = (c ==
'P' ? 1 : 0);
3066 tok->u.prop.not = (
tok->u.prop.not == 0 ? 1 : 0);
3073 onig_syntax_warn(
env,
"invalid Unicode Property \\%c", c);
3083 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3103 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3119 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3137 num = scan_unsigned_octal_number(&p, end, 11, enc);
3159 case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
3163 num = scan_unsigned_octal_number(&p, end, 3, enc);
3176 num = fetch_escaped_value(&p, end,
env, &c2);
3185 else if (c ==
'[') {
3190 if (str_exist_check_with_esc(send, 2, p, end,
3209 else if (c ==
'&') {
3222#ifdef USE_NAMED_GROUP
3237# ifdef USE_BACKREF_WITH_LEVEL
3239 r = fetch_name_with_level(c, &p, end, &name_end,
3240 env, &back_num, &
tok->u.backref.level);
3241 if (r == 1)
tok->u.backref.exist_level = 1;
3242 else tok->u.backref.exist_level = 0;
3244 r = fetch_name(&p, end, &name_end,
env, &back_num, 1);
3246 if (r < 0)
return r;
3248 if (back_num != 0) {
3256 if (back_num >
env->num_mem ||
3261 tok->u.backref.by_name = 0;
3262 tok->u.backref.num = 1;
3263 tok->u.backref.ref1 = back_num;
3274 for (i = 0; i <
num; i++) {
3275 if (backs[i] >
env->num_mem ||
3282 tok->u.backref.by_name = 1;
3284 tok->u.backref.num = 1;
3285 tok->u.backref.ref1 = backs[0];
3288 tok->u.backref.num =
num;
3289 tok->u.backref.refs = backs;
3331 tok->u.repeat.lower = 0;
3339 tok->u.repeat.lower = 1;
3347 tok->u.repeat.lower = 0;
3348 tok->u.repeat.upper = 1;
3353 tok->u.repeat.greedy = 0;
3354 tok->u.repeat.possessive = 0;
3364 tok->u.repeat.greedy = 1;
3365 tok->u.repeat.possessive = 1;
3368 tok->u.repeat.greedy = 1;
3369 tok->u.repeat.possessive = 0;
3376 r = fetch_range_quantifier(&p, end,
tok,
env);
3377 if (r < 0)
return r;
3378 if (r == 0)
goto greedy_check;
3381 goto possessive_check;
3407 tok->u.prop.not = 0;
3414 tok->u.prop.not = 1;
3433#ifdef USE_WORD_BEGIN_END
3453 tok->u.prop.not = 0;
3460 tok->u.prop.not = 1;
3467 tok->u.prop.not = 0;
3474 tok->u.prop.not = 1;
3481 tok->u.prop.not = 0;
3488 tok->u.prop.not = 1;
3533 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3551 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3567 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3585 num = scan_unsigned_octal_number(&p, end, 11, enc);
3605 case '1':
case '2':
case '3':
case '4':
3606 case '5':
case '6':
case '7':
case '8':
case '9':
3615 (num <= env->num_mem ||
num <= 9)) {
3622 tok->u.backref.num = 1;
3623 tok->u.backref.ref1 =
num;
3624 tok->u.backref.by_name = 0;
3625#ifdef USE_BACKREF_WITH_LEVEL
3626 tok->u.backref.exist_level = 0;
3632 if (c ==
'8' || c ==
'9') {
3643 num = scan_unsigned_octal_number(&p, end, (c ==
'0' ? 2:3), enc);
3652 else if (c !=
'0') {
3657#ifdef USE_NAMED_GROUP
3661 if (c ==
'<' || c ==
'\'') {
3662 r = fetch_named_backref_token(c,
tok, &p, end,
env);
3663 if (r < 0)
return r;
3667 onig_syntax_warn(
env,
"invalid back reference");
3673#if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3675# ifdef USE_NAMED_GROUP
3679 r = fetch_named_backref_token(c,
tok, &p, end,
env);
3680 if (r < 0)
return r;
3686# ifdef USE_SUBEXP_CALL
3689 if (c ==
'<' || c ==
'\'') {
3690 int gnum = -1, rel = 0;
3697 if (
PPEEK_IS(get_name_end_code_point(c))) {
3703 else if (cnext ==
'+') {
3710 if (r < 0)
return r;
3714 tok->u.call.name = prev;
3715 tok->u.call.name_end = name_end;
3716 tok->u.call.gnum = gnum;
3717 tok->u.call.rel = rel;
3720 onig_syntax_warn(
env,
"invalid subexp call");
3740 tok->u.prop.not = (c ==
'P' ? 1 : 0);
3745 tok->u.prop.not = (
tok->u.prop.not == 0 ? 1 : 0);
3752 onig_syntax_warn(
env,
"invalid Unicode Property \\%c", c);
3779 num = fetch_escaped_value(&p, end,
env, &c2);
3797#ifdef USE_VARIABLE_META_CHARS
3805 goto zero_or_one_time;
3807 goto one_or_more_time;
3818#ifdef USE_VARIABLE_META_CHARS
3826#ifdef USE_VARIABLE_META_CHARS
3830 tok->u.repeat.lower = 0;
3837#ifdef USE_VARIABLE_META_CHARS
3841 tok->u.repeat.lower = 1;
3848#ifdef USE_VARIABLE_META_CHARS
3852 tok->u.repeat.lower = 0;
3853 tok->u.repeat.upper = 1;
3859 r = fetch_range_quantifier(&p, end,
tok,
env);
3860 if (r < 0)
return r;
3861 if (r == 0)
goto greedy_check;
3864 goto possessive_check;
3889 if (c ==
')')
break;
3894#ifdef USE_PERL_SUBEXP_CALL
3904 if (c ==
'R' || c ==
'0') {
3908 name_end =
name = p;
3918 r = fetch_name((
OnigCodePoint )
'(', &p, end, &name_end,
env, &gnum, numref);
3919 if (r < 0)
return r;
3924 tok->u.call.name_end = name_end;
3925 tok->u.call.gnum = gnum;
3926 tok->u.call.rel = 0;
3929 else if ((c ==
'-' || c ==
'+') &&
3944 if (r < 0)
return r;
3948 tok->u.call.name_end = name_end;
3949 tok->u.call.gnum = gnum;
3950 tok->u.call.rel = 1;
3955#ifdef USE_CAPITAL_P_NAMED_GROUP
3968 if (r < 0)
return r;
3971 else if (c ==
'>') {
3974 if (r < 0)
return r;
3978 tok->u.call.name_end = name_end;
3979 tok->u.call.gnum = gnum;
3980 tok->u.call.rel = 0;
4017 if (*src >
env->pattern)
4018 CLOSE_BRACKET_WITHOUT_ESC_WARN(
env, (
UChar* )
"]");
4033 case ' ':
case '\t':
case '\n':
case '\r':
case '\f':
4044#ifdef USE_VARIABLE_META_CHARS
4062 for (i = 0; i < n; i++) {
4067 r = add_code_range_to_buf(&(cc->
mbuf),
env, j,
4069 if (r != 0)
return r;
4080 for ( ; i < n; i++) {
4081 r = add_code_range_to_buf(&(cc->
mbuf),
env,
4084 if (r != 0)
return r;
4090 for (i = 0; i < n; i++) {
4100 for (j = prev; j < sb_out; j++) {
4107 for (i = 0; i < n; i++) {
4109 r = add_code_range_to_buf(&(cc->
mbuf),
env, prev,
4111 if (r != 0)
return r;
4115 if (prev < 0x7fffffff) {
4116 r = add_code_range_to_buf(&(cc->
mbuf),
env, prev, 0x7fffffff);
4117 if (r != 0)
return r;
4137 initialize_cclass(&ccwork);
4138 r = add_ctype_to_cc_by_range(&ccwork, ctype, not,
env, sb_out,
4146 initialize_cclass(&ccascii);
4148 r = add_code_range(&(ccascii.
mbuf),
env, 0x00, 0x7F);
4151 bitset_set_range(
env, ccascii.
bs, 0x00, 0x7F);
4155 r = and_cclass(&ccwork, &ccascii,
env);
4160 r = or_cclass(cc, &ccwork,
env);
4166 r = add_ctype_to_cc_by_range(cc, ctype, not,
env, sb_out, ranges);
4215 for (c = 0; c < maxcode; c++) {
4226 for (c = 0; c < maxcode; c++) {
4255#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4256#define POSIX_BRACKET_NAME_MIN_LEN 4
4290 goto not_posix_bracket;
4294 for (pb = PBS; pb < PBS +
numberof(PBS); pb++) {
4300 r = add_ctype_to_cc(cc, pb->
ctype, not, ascii_range,
env);
4301 if (r != 0)
return r;
4307 r = add_ctype_to_cc(asc_cc, pb->
ctype, not, ascii_range,
env);
4308 if (r != 0)
return r;
4320 while (!
PEND && ((c =
PPEEK) !=
':') && c !=
']') {
4324 if (c ==
':' && !
PEND) {
4342 UChar *prev, *start, *p = *src;
4357 else if (c ==
'(' || c ==
')' || c ==
'{' || c ==
'|') {
4375 ctype = fetch_char_property_to_ctype(src, end,
env);
4376 if (ctype < 0)
return ctype;
4378 *np = node_new_cclass();
4381 r = add_ctype_to_cc(cc, ctype, 0, 0,
env);
4382 if (r != 0)
return r;
4387 r = cclass_case_fold(np, cc, cc,
env);
4423 r = add_code_range(&(cc->
mbuf),
env, *vs, *vs);
4424 if (r < 0)
return r;
4426 r = add_code_range0(&(asc_cc->
mbuf),
env, *vs, *vs, 0);
4427 if (r < 0)
return r;
4440 int* from_israw,
int to_israw,
4454 r = add_code_range(&(cc->
mbuf),
env, *from, *from);
4455 if (r < 0)
return r;
4457 r = add_code_range0(&(asc_cc->
mbuf),
env, *from, *from, 0);
4458 if (r < 0)
return r;
4464 if (intype == *type) {
4466 if (*from > 0xff || to > 0xff)
4475 bitset_set_range(
env, cc->
bs, (
int )*from, (
int )to);
4477 bitset_set_range(
env, asc_cc->
bs, (
int )*from, (
int )to);
4480 r = add_code_range(&(cc->
mbuf),
env, *from, to);
4481 if (r < 0)
return r;
4483 r = add_code_range0(&(asc_cc->
mbuf),
env, *from, to, 0);
4484 if (r < 0)
return r;
4495 bitset_set_range(
env, cc->
bs, (
int )*from, (
int )(to < 0xff ? to : 0xff));
4497 if (r < 0)
return r;
4499 bitset_set_range(
env, asc_cc->
bs, (
int )*from, (
int )(to < 0xff ? to : 0xff));
4501 if (r < 0)
return r;
4517 *from_israw = to_israw;
4534 if (ignore_escaped && in_esc) {
4539 if (
code == c)
return 1;
4550 int r,
neg,
len, fetched, and_start;
4561 int val_israw, in_israw;
4565 if (
env->parse_depth > ParseDepthLimit)
4568 r = fetch_token_in_cc(
tok, src, end,
env);
4571 r = fetch_token_in_cc(
tok, src, end,
env);
4577 if (r < 0)
return r;
4580 *src,
env->pattern_end, 1,
env))
4587 *np = node = node_new_cclass();
4592 *asc_np = asc_node = node_new_cclass();
4631 int i, base =
tok->base;
4635 r = fetch_token_in_cc(
tok, &p, end,
env);
4636 if (r < 0)
goto err;
4656 for (i = 1; i <
len; i++) {
4657 (void)fetch_token_in_cc(
tok, &p, end,
env);
4692 r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4694 if (r != 0)
goto err;
4698 r = parse_posix_bracket(cc, asc_cc, &p, end,
env);
4699 if (r < 0)
goto err;
4711 r = add_ctype_to_cc(cc,
tok->u.prop.ctype,
tok->u.prop.not,
4713 if (r != 0)
return r;
4716 r = add_ctype_to_cc(asc_cc,
tok->u.prop.ctype,
tok->u.prop.not,
4718 if (r != 0)
return r;
4722 r = next_state_class(cc, asc_cc, &vs, &val_type, &
state,
env);
4723 if (r != 0)
goto err;
4730 ctype = fetch_char_property_to_ctype(&p, end,
env);
4731 if (ctype < 0)
return ctype;
4732 r = add_ctype_to_cc(cc, ctype,
tok->u.prop.not, 0,
env);
4733 if (r != 0)
return r;
4736 r = add_ctype_to_cc(asc_cc, ctype,
tok->u.prop.not, 0,
env);
4737 if (r != 0)
return r;
4745 r = fetch_token_in_cc(
tok, &p, end,
env);
4746 if (r < 0)
goto err;
4771 r = fetch_token_in_cc(
tok, &p, end,
env);
4772 if (r < 0)
goto err;
4785 r = fetch_token_in_cc(
tok, &p, end,
env);
4786 if (r < 0)
goto err;
4805 Node *anode, *aasc_node;
4808 r = parse_char_class(&anode, &aasc_node,
tok, &p, end,
env);
4811 r = or_cclass(cc, acc,
env);
4815 r = or_cclass(asc_cc, acc,
env);
4819 if (r != 0)
goto err;
4826 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4828 if (r != 0)
goto err;
4835 r = and_cclass(prev_cc, cc,
env);
4836 if (r != 0)
goto err;
4837 bbuf_free(cc->
mbuf);
4839 r = and_cclass(asc_prev_cc, asc_cc,
env);
4840 if (r != 0)
goto err;
4841 bbuf_free(asc_cc->
mbuf);
4848 asc_prev_cc = asc_cc;
4849 asc_cc = &asc_work_cc;
4852 initialize_cclass(cc);
4854 initialize_cclass(asc_cc);
4871 r = fetch_token_in_cc(
tok, &p, end,
env);
4872 if (r < 0)
goto err;
4877 r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
4879 if (r != 0)
goto err;
4883 r = and_cclass(prev_cc, cc,
env);
4884 if (r != 0)
goto err;
4885 bbuf_free(cc->
mbuf);
4888 r = and_cclass(asc_prev_cc, asc_cc,
env);
4889 if (r != 0)
goto err;
4890 bbuf_free(asc_cc->
mbuf);
4891 asc_cc = asc_prev_cc;
4913 if (is_empty == 0) {
4914#define NEWLINE_CODE 0x0a
4921 if (r < 0)
goto err;
4932 bbuf_free(cc->
mbuf);
4934 bbuf_free(asc_cc->
mbuf);
4951#ifdef USE_NAMED_GROUP
4961 option =
env->option;
4971 r = fetch_token(
tok, &p, end,
env);
4972 if (r < 0)
return r;
4973 r = parse_subexp(np,
tok,
term, &p, end,
env);
4974 if (r < 0)
return r;
4997#ifdef USE_NAMED_GROUP
5006# ifdef USE_CAPITAL_P_NAMED_GROUP
5011 if (c ==
'<')
goto named_group1;
5025#ifdef USE_NAMED_GROUP
5037# ifdef USE_CAPTURE_HISTORY
5042 if (r < 0)
return r;
5044 num = scan_env_add_mem_entry(
env);
5050 if (r != 0)
return r;
5051 *np = node_new_enclose_memory(
env->option, 1);
5054 if (list_capture != 0)
5069#ifdef USE_CAPTURE_HISTORY
5072# ifdef USE_NAMED_GROUP
5076 if (c ==
'<' || c ==
'\'') {
5083 *np = node_new_enclose_memory(
env->option, 0);
5085 num = scan_env_add_mem_entry(
env);
5108 if (r < 0)
return r;
5118 if (
num >
env->num_mem ||
5123#ifdef USE_NAMED_GROUP
5124 else if (c ==
'<' || c ==
'\'') {
5126 r = fetch_named_backref_token(c,
tok, &p, end,
env);
5127 if (r < 0)
return r;
5132 num =
tok->u.backref.ref1;
5139 int len =
tok->u.backref.num;
5140 num =
len > 1 ?
tok->u.backref.refs[0] :
tok->u.backref.ref1;
5191#ifdef USE_POSIXLINE_OPTION
5194 case '-':
case 'i':
case 'm':
case 's':
case 'x':
5195 case 'a':
case 'd':
case 'l':
case 'u':
5205 case '-':
neg = 1;
break;
5226#ifdef USE_POSIXLINE_OPTION
5284 *np = node_new_option(option);
5289 else if (c ==
':') {
5292 env->option = option;
5293 r = fetch_token(
tok, &p, end,
env);
5298 r = parse_subexp(&target,
tok,
term, &p, end,
env);
5300 if (r < 0)
return r;
5301 *np = node_new_option(option);
5322 *np = node_new_enclose_memory(
env->option, 0);
5324 num = scan_env_add_mem_entry(
env);
5330 r = fetch_token(
tok, &p, end,
env);
5331 if (r < 0)
return r;
5332 r = parse_subexp(&target,
tok,
term, &p, end,
env);
5339 NANCHOR(*np)->target = target;
5344 r = scan_env_set_mem_node(
env,
NENCLOSE(*np)->regnum, *np);
5345 if (r != 0)
return r;
5350 work1 = node_new_empty();
5372static const char*
const PopularQStr[] = {
5373 "?",
"*",
"+",
"??",
"*?",
"+?"
5376static const char*
const ReduceQStr[] = {
5377 "",
"",
"*",
"*?",
"??",
"+ and ??",
"+? and ?"
5390 switch (
NTYPE(target)) {
5394 if (str_node_can_be_split(sn,
env->enc)) {
5395 Node* n = str_node_split_last_char(sn,
env->enc);
5408 int nestq_num = popular_quantifier_num(qn);
5409 int targetq_num = popular_quantifier_num(qnt);
5411#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5412 if (nestq_num >= 0 && targetq_num >= 0 &&
5414 switch (ReduceTypeTable[targetq_num][nestq_num]) {
5420 onig_syntax_warn(
env,
"regular expression has redundant nested repeat operator '%s'",
5421 PopularQStr[targetq_num]);
5428 onig_syntax_warn(
env,
"nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
5429 PopularQStr[targetq_num], PopularQStr[nestq_num],
5430 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5439 if (targetq_num >= 0) {
5440 if (nestq_num >= 0) {
5444 else if (targetq_num == 1 || targetq_num == 2) {
5464#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5472 bitset_invert(cc->
bs);
5475 r = not_code_range_buf(enc, cc->
mbuf, &tbuf);
5476 if (r != 0)
return r;
5478 bbuf_free(cc->
mbuf);
5499 int to_len,
void* arg)
5523 add_flag = !add_flag;
5528#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5533 r = add_code_range0(&(cc->
mbuf),
env, *to, *to, 0);
5534 if (r < 0)
return r;
5546 r = add_code_range0(&(cc->
mbuf),
env, *to, *to, 0);
5547 if (r < 0)
return r;
5571 for (i = 0; i < to_len; i++) {
5612 i_apply_case_fold, &iarg);
5642 if (num1 < 0)
return num1;
5644 if (num2 < 0)
return num2;
5645 left = node_new_str_raw(
buf,
buf + num1 + num2);
5649 right = node_new_cclass();
5653 r = add_code_range(&(cc->
mbuf),
env, 0x0A, 0x0D);
5654 if (r != 0)
goto err;
5657 bitset_set_range(
env, cc->
bs, 0x0A, 0x0D);
5663 r = add_code_range(&(cc->
mbuf),
env, 0x85, 0x85);
5664 if (r != 0)
goto err;
5665 r = add_code_range(&(cc->
mbuf),
env, 0x2028, 0x2029);
5666 if (r != 0)
goto err;
5693propname2ctype(
ScanEnv*
env,
const char* propname)
5708 int ctype = propname2ctype(
env, propname);
5709 if (ctype < 0)
return ctype;
5710 return add_ctype_to_cc(cc, ctype, not, 0,
env);
5717create_property_node(
Node **np,
ScanEnv*
env,
const char* propname)
5722 *np = node_new_cclass();
5725 r = add_property_to_cc(cc, propname, 0,
env);
5732quantify_node(
Node **np,
int lower,
int upper)
5734 Node* tmp = node_new_quantifier(lower, upper, 0);
5736 NQTFR(tmp)->target = *np;
5742quantify_property_node(
Node **np,
ScanEnv*
env,
const char* propname,
char repetitions)
5748 r = create_property_node(np,
env, propname);
5749 if (r != 0)
return r;
5750 switch (repetitions) {
5751 case '?': upper = 1;
break;
5752 case '+': lower = 1;
break;
5754 case '2': lower = upper = 2;
break;
5757 return quantify_node(np, lower, upper);
5765create_node_from_array(
int kind,
Node **np,
Node **node_array)
5772 *np = kind==
LIST ? node_new_list(node_array[i], tmp)
5789#define R_ERR(call) r=(call);if(r!=0)goto err
5812#define NODE_COMMON_SIZE 16
5823 int any_target_position;
5829 Node **alts = node_common+0;
5837 if (r < 0)
goto err;
5840 if (r < 0)
goto err;
5841 alts[0] = node_new_str_raw(
buf,
buf + num1 + r);
5844#ifdef USE_UNICODE_PROPERTIES
5848 if (propname2ctype(
env,
"Grapheme_Cluster_Break=Extend") < 0)
goto err;
5856 alts[1] = node_new_cclass();
5859 R_ERR(add_property_to_cc(cc,
"Grapheme_Cluster_Break=Control", 0,
env));
5861 R_ERR(add_code_range(&(cc->
mbuf),
env, 0x000A, 0x000A));
5862 R_ERR(add_code_range(&(cc->
mbuf),
env, 0x000D, 0x000D));
5871 Node **list = alts + 3;
5874 R_ERR(quantify_property_node(list+0,
env,
"Grapheme_Cluster_Break=Prepend",
'*'));
5881 Node **core_alts = list + 2;
5892 Node **H_list = core_alts + 1;
5893 R_ERR(quantify_property_node(H_list+0,
env,
"Grapheme_Cluster_Break=L",
'*'));
5897 Node **H_alt2 = H_list + 2;
5898 R_ERR(quantify_property_node(H_alt2+0,
env,
"Grapheme_Cluster_Break=V",
'+'));
5902 Node **H_list2 = H_alt2 + 2;
5904 R_ERR(create_property_node(H_list2+0,
env,
"Grapheme_Cluster_Break=LV"));
5905 R_ERR(quantify_property_node(H_list2+1,
env,
"Grapheme_Cluster_Break=V",
'*'));
5906 R_ERR(create_node_from_array(
LIST, H_alt2+1, H_list2));
5909 R_ERR(create_property_node(H_alt2+2,
env,
"Grapheme_Cluster_Break=LVT"));
5910 R_ERR(create_node_from_array(
ALT, H_list+1, H_alt2));
5913 R_ERR(quantify_property_node(H_list+2,
env,
"Grapheme_Cluster_Break=T",
'*'));
5914 R_ERR(create_node_from_array(
LIST, core_alts+0, H_list));
5917 R_ERR(quantify_property_node(core_alts+1,
env,
"Grapheme_Cluster_Break=L",
'+'));
5918 R_ERR(quantify_property_node(core_alts+2,
env,
"Grapheme_Cluster_Break=T",
'+'));
5922 R_ERR(quantify_property_node(core_alts+3,
env,
"Regional_Indicator",
'2'));
5926 Node **XP_list = core_alts + 5;
5927 R_ERR(create_property_node(XP_list+0,
env,
"Extended_Pictographic"));
5931 Node **Ex_list = XP_list + 2;
5933 R_ERR(quantify_property_node(Ex_list+0,
env,
"Grapheme_Cluster_Break=Extend",
'*'));
5937 if (r < 0)
goto err;
5938 Ex_list[1] = node_new_str_raw(
buf,
buf + r);
5941 R_ERR(create_property_node(Ex_list+2,
env,
"Extended_Pictographic"));
5942 R_ERR(create_node_from_array(
LIST, XP_list+1, Ex_list));
5946 R_ERR(create_node_from_array(
LIST, core_alts+4, XP_list));
5950 core_alts[5] = node_new_cclass();
5957 const int dup_not_warned =
env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;
5962 R_ERR(add_property_to_cc(cc,
"Grapheme_Cluster_Break=Control", 0,
env));
5963 R_ERR(add_code_range(&(cc->
mbuf),
env, 0x000A, 0x000A));
5964 R_ERR(add_code_range(&(cc->
mbuf),
env, 0x000D, 0x000D));
5966 cc->
mbuf = inverted_buf;
5968 env->warnings_flag &= dup_not_warned;
5971 R_ERR(add_property_to_cc(cc,
"Grapheme_Cluster_Break=Control", 1,
env));
5976 R_ERR(create_node_from_array(
ALT, list+1, core_alts));
5980 R_ERR(create_property_node(list+2,
env,
"Grapheme_Cluster_Break=Extend"));
5982 R_ERR(add_property_to_cc(cc,
"Grapheme_Cluster_Break=SpacingMark", 0,
env));
5983 R_ERR(add_code_range(&(cc->
mbuf),
env, 0x200D, 0x200D));
5986 R_ERR(create_node_from_array(
LIST, alts+2, list));
5989 any_target_position = 3;
5994 any_target_position = 1;
6000 np1 = node_new_anychar();
6003 option =
env->option;
6005 tmp = node_new_option(option);
6008 alts[any_target_position] = tmp;
6011 R_ERR(create_node_from_array(
ALT, &top_alt, alts));
6021#ifdef USE_UNICODE_PROPERTIES
6024 option =
env->option;
6026 *np = node_new_option(option);
6046countbits(
unsigned int bits)
6048 bits = (
bits & 0x55555555) + ((
bits >> 1) & 0x55555555);
6049 bits = (
bits & 0x33333333) + ((
bits >> 2) & 0x33333333);
6050 bits = (
bits & 0x0f0f0f0f) + ((
bits >> 4) & 0x0f0f0f0f);
6051 bits = (
bits & 0x00ff00ff) + ((
bits >> 8) & 0x00ff00ff);
6052 return (
bits & 0x0000ffff) + ((
bits >>16) & 0x0000ffff);
6070 if ((n == 1) && (data[0] == data[1])) {
6087 if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
6095 if (c != not_found) {
6109 int r,
len, group = 0;
6117 switch (
tok->type) {
6121 *np = node_new_empty();
6127 if (r < 0)
return r;
6128 if (r == 1) group = 1;
6134 r = fetch_token(
tok, src, end,
env);
6139 r = parse_subexp(&target,
tok,
term, src, end,
env);
6154 if (
tok->escaped)
goto tk_raw_byte;
6159 r = node_linebreak(np,
env);
6160 if (r < 0)
return r;
6164 r = node_extended_grapheme_cluster(np,
env);
6165 if (r < 0)
return r;
6176 *np = node_new_str(
tok->backp, *src);
6181 r = fetch_token(
tok, src, end,
env);
6182 if (r < 0)
return r;
6186#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6188 r = node_str_cat_codepoint(*np,
env->enc,
tok->u.code);
6194 if (r < 0)
return r;
6206 *np = node_new_str_raw_char((
UChar )
tok->u.c);
6212 r = fetch_token(
tok, src, end,
env);
6218 r = fetch_token(
tok, src, end,
env);
6219 if (r < 0)
return r;
6222#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
6226 (void )node_str_head_pad(
NSTR(*np), rem, (
UChar )0);
6236 r = node_str_cat_char(*np, (
UChar )
tok->u.c);
6237 if (r < 0)
return r;
6246 *np = node_new_empty();
6248 r = node_str_cat_codepoint(*np,
env->enc,
tok->u.code);
6249 if (r != 0)
return r;
6250#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
6261 UChar *qstart, *qend, *nextp;
6266 qend = find_str_position(end_op, 2, qstart, end, &nextp,
env->enc);
6270 *np = node_new_str(qstart, qend);
6278 switch (
tok->u.prop.ctype) {
6280 *np = node_new_ctype(
tok->u.prop.ctype,
tok->u.prop.not,
6291 *np = node_new_cclass();
6294 r = add_ctype_to_cc(cc,
tok->u.prop.ctype, 0,
6296 if (r != 0)
return r;
6309 r = parse_char_property(np,
tok, src, end,
env);
6310 if (r != 0)
return r;
6319 r = parse_char_class(np, &asc_node,
tok, src, end,
env);
6326 if (is_onechar_cclass(cc, &
code)) {
6329 *np = node_new_empty();
6331 r = node_str_cat_codepoint(*np,
env->enc,
code);
6332 if (r != 0)
return r;
6336 r = cclass_case_fold(np, cc,
NCCLASS(asc_node),
env);
6347 *np = node_new_anychar();
6352 *np = node_new_anychar();
6356 NQTFR(qn)->target = *np;
6361 len =
tok->u.backref.num;
6362 *np = node_new_backref(
len,
6363 (
len > 1 ?
tok->u.backref.refs : &(
tok->u.backref.ref1)),
6364 tok->u.backref.by_name,
6366 tok->u.backref.exist_level,
6367 tok->u.backref.level,
6373#ifdef USE_SUBEXP_CALL
6376 int gnum =
tok->u.call.gnum;
6378 if (gnum < 0 || tok->u.call.rel != 0) {
6379 if (gnum > 0) gnum--;
6384 *np = node_new_call(
tok->u.call.name,
tok->u.call.name_end, gnum);
6394 NANCHOR(*np)->ascii_range =
tok->u.anchor.ascii_range;
6403 *np = node_new_empty();
6419 r = fetch_token(
tok, src, end,
env);
6420 if (r < 0)
return r;
6427 qn = node_new_quantifier(
tok->u.repeat.lower,
tok->u.repeat.upper,
6430 NQTFR(qn)->greedy =
tok->u.repeat.greedy;
6431 r = set_quantifier(qn, *targetp, group,
env);
6437 if (
tok->u.repeat.possessive != 0) {
6457 *targetp = node_new_list(*targetp,
NULL);
6462 tmp =
NCDR(*targetp) = node_new_list(qn,
NULL);
6467 targetp = &(
NCAR(tmp));
6481 Node *node, **headp;
6484 r = parse_exp(&node,
tok,
term, src, end,
env);
6494 *
top = node_new_list(node,
NULL);
6497 r = parse_exp(&node,
tok,
term, src, end,
env);
6506 headp = &(
NCDR(node));
6509 *headp = node_new_list(node,
NULL);
6510 headp = &(
NCDR(*headp));
6524 Node *node, **headp;
6528 if (
env->parse_depth > ParseDepthLimit)
6530 r = parse_branch(&node,
tok,
term, src, end,
env);
6543 r = fetch_token(
tok, src, end,
env);
6544 if (r < 0)
return r;
6545 r = parse_branch(&node,
tok,
term, src, end,
env);
6552 headp = &(
NCDR(*headp));
6577 r = fetch_token(&
tok, src, end,
env);
6578 if (r < 0)
return r;
6580 if (r < 0)
return r;
6582#ifdef USE_SUBEXP_CALL
6583 if (
env->num_call > 0) {
6587 np = node_new_enclose_memory(
env->option, 0);
6591 r = scan_env_set_mem_node(
env,
num, np);
6609#ifdef USE_NAMED_GROUP
6613 scan_env_clear(
env);
6623 p = (
UChar* )pattern;
6634 env->error_end = arg_end;
int bits(struct state *s, int need)
size_t map(int syms, int left, int len)
void rb_compile_warn(const char *file, int line, const char *fmt,...)
void rb_warn(const char *fmt,...)
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define ONIG_SYN_OP_ESC_C_CONTROL
#define ONIG_SYN_OP_LPAREN_SUBEXP
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
#define ONIGENC_CTYPE_GRAPH
#define ONIGERR_END_PATTERN_AT_META
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
#define ONIGENC_CTYPE_ASCII
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
#define ONIG_OPTION_DONT_CAPTURE_GROUP
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
#define ONIGENC_CTYPE_DIGIT
#define ONIG_SYN_OP_QMARK_NON_GREEDY
#define ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
#define ONIG_SYN_OP_BRACKET_CC
#define ONIG_SYN_OP_ESC_VBAR_ALT
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
#define ONIG_SYN_OP2_OPTION_PERL
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
#define ONIG_MAX_REPEAT_NUM
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
#define ONIGENC_CTYPE_XDIGIT
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
#define ONIGENC_CODE_RANGE_FROM(range, i)
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
#define ONIG_IS_OPTION_ON(options, option)
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
#define ONIG_INEFFECTIVE_META_CHAR
#define ONIG_REGION_NOTPOS
#define ONIGENC_MBC_TO_CODE(enc, p, end)
#define ONIG_SYN_WARN_CC_DUP
#define ONIGERR_META_CODE_SYNTAX
#define ONIG_SYN_OP_BRACE_INTERVAL
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
#define ONIGERR_PARSER_BUG
#define ONIG_SYN_OP_DECIMAL_BACKREF
#define ONIG_SYN_OP_ESC_W_WORD
#define ONIGENC_CTYPE_ALNUM
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT
#define ONIGENC_CTYPE_ALPHA
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
#define ONIGENC_CTYPE_SPACE
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
#define ONIGENC_IS_UNICODE(enc)
#define ONIGERR_END_PATTERN_AT_ESCAPE
#define ONIG_OPTION_MULTILINE
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
#define ONIGENC_CTYPE_PUNCT
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
#define ONIGERR_INVALID_GROUP_NAME
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
#define ONIGENC_IS_CODE_DIGIT(enc, code)
#define ONIGERR_EMPTY_CHAR_CLASS
#define ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
#define ONIGENC_CTYPE_WORD
#define ONIGERR_UNDEFINED_GROUP_OPTION
#define ONIGERR_END_PATTERN_AT_CONTROL
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
#define ONIGERR_UNDEFINED_NAME_REFERENCE
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
#define ONIGENC_CTYPE_UPPER
#define ONIG_OPTION_ASCII_RANGE
#define ONIG_SYN_OP_LINE_ANCHOR
#define ONIG_SYN_OP2_ESC_V_VTAB
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
void(* OnigWarnFunc)(const char *s)
#define ONIGENC_IS_CODE_WORD(enc, code)
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
unsigned int OnigCodePoint
#define ONIG_OPTION_IGNORECASE
#define ONIG_SYN_OP2_OPTION_RUBY
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
#define ONIG_SYN_OP_ESC_OCTAL3
#define ONIG_SYN_OP_PLUS_ONE_INF
#define ONIG_SYN_OP_DOT_ANYCHAR
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
#define ONIGERR_TOO_SHORT_DIGITS
#define ONIG_ENCODING_ASCII
#define ONIG_OPTION_CAPTURE_GROUP
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
#define ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc)
#define ONIGERR_EMPTY_GROUP_NAME
#define ONIG_SYN_OP2_ESC_H_XDIGIT
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
#define ONIGENC_CTYPE_CNTRL
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
#define ONIGENC_CTYPE_PRINT
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL
#define ONIG_SYN_OP2_CCLASS_SET_OP
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
#define ONIGERR_END_PATTERN_IN_GROUP
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
#define ONIGENC_CTYPE_BLANK
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
#define ONIGERR_MULTIPLEX_DEFINED_NAME
#define ONIGENC_CTYPE_LOWER
#define ONIG_SYN_OP_ESC_D_DIGIT
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
#define ONIG_SYN_OP_POSIX_BRACKET
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
#define ONIGENC_CODE_RANGE_NUM(range)
#define ONIG_MAX_BACKREF_NUM
#define ONIG_SYN_STRICT_CHECK_BACKREF
#define ONIGENC_CODE_RANGE_TO(range, i)
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
#define ONIG_SYN_OP2_ESC_U_HEX4
ONIG_EXTERN OnigUChar * onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIGENC_IS_SINGLEBYTE(enc)
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
#define ONIG_SYN_OP_VBAR_ALT
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
#define ONIGERR_INVALID_CONDITION_PATTERN
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
unsigned int OnigOptionType
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
#define ONIGERR_INVALID_BACKREF
ONIG_EXTERN int onigenc_strlen(OnigEncoding enc, const OnigUChar *p, const OnigUChar *end)
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
#define ONIG_MAX_CAPTURE_GROUP_NUM
#define ONIG_SYN_OP_QMARK_ZERO_ONE
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
#define ONIG_SYN_OP_ESC_X_HEX2
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
#define ONIG_OPTION_SINGLELINE
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
#define ONIGERR_TOO_BIG_NUMBER
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
#define ONIGERR_CONTROL_CODE_SYNTAX
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
#define ONIG_OPTION_EXTEND
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
#define POSIX_BRACKET_ENTRY_INIT(name, ctype)
#define enclen(enc, p, e)
#define ONIGENC_IS_ASCII_CODE(code)
void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar *pat, UChar *pat_end, const UChar *fmt, va_list args)
#define IS_MC_ESC_CODE(code, syn)
#define BBUF_MOVE_RIGHT(buf, from, to, n)
#define ANCHOR_BEGIN_LINE
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
#define IS_ASCII_RANGE(option)
#define CHECK_NULL_RETURN_MEMERR(p)
#define ANCHOR_PREC_READ_NOT
#define BBUF_ENSURE_SIZE(buf, size)
#define BIT_STATUS_BITS_NUM
#define MC_ONE_OR_MORE_TIME(syn)
#define BITSET_CLEAR_BIT(bs, pos)
#define ANCHOR_BEGIN_POSITION
#define ONIG_LAST_CODE_POINT
#define BITSET_AT(bs, pos)
#define CHECK_NULL_RETURN(p)
#define ANCHOR_LOOK_BEHIND
#define ANCHOR_WORD_BOUND
#define DEFAULT_PARSE_DEPTH_LIMIT
#define ANCHOR_WORD_BEGIN
#define BBUF_INIT(buf, size)
#define IS_REPEAT_INFINITE(n)
#define ANCHOR_LOOK_BEHIND_NOT
#define GET_CODE_POINT(code, p)
#define ANCHOR_SEMI_END_BUF
#define IS_WORD_BOUND_ALL_RANGE(option)
#define NCCLASS_CLEAR_NOT(nd)
#define IS_NCCLASS_NOT(nd)
#define ANCHOR_NOT_WORD_BOUND
#define BITSET_SET_BIT(bs, pos)
#define IS_SINGLELINE(option)
#define IS_POSIX_BRACKET_ALL_RANGE(option)
#define IS_EXTEND(option)
#define USE_BACKREF_WITH_LEVEL
#define BIT_STATUS_CLEAR(stats)
#define XDIGITVAL(enc, code)
#define IS_IGNORECASE(option)
#define MC_ANYCHAR_ANYTIME(syn)
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
#define MC_ZERO_OR_ONE_TIME(syn)
#define NCCLASS_SET_NOT(nd)
#define is_invalid_quantifier_target(node)
#define INIT_MULTI_BYTE_RANGE_SIZE
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
int onig_noname_group_capture_is_active(const regex_t *reg)
Node * onig_node_new_list(Node *left, Node *right)
Node * onig_node_new_anchor(int type)
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
#define POSIX_BRACKET_NAME_MIN_LEN
void onig_null_warn(const char *s ARG_UNUSED)
void onig_set_warn_func(OnigWarnFunc f)
unsigned int onig_get_parse_depth_limit(void)
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
#define ONOFF(v, f, negative)
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
#define INIT_NAME_BACKREFS_ALLOC_NUM
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
int onig_number_of_names(const regex_t *reg)
#define MBCODE_START_POS(enc)
const OnigSyntaxType * OnigDefaultSyntax
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Node * onig_node_list_add(Node *list, Node *x)
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, const OnigRegion *region)
void onig_node_free(Node *node)
Node * onig_node_new_enclose(int type)
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
const OnigSyntaxType OnigSyntaxRuby
Node * onig_node_new_alt(Node *left, Node *right)
Node * onig_node_new_str(const UChar *s, const UChar *end)
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
#define BACKREF_REL_TO_ABS(rel_no, env)
#define ONIGENC_IS_CODE_NAME(enc, c)
int onig_set_parse_depth_limit(unsigned int depth)
#define BITSET_IS_EMPTY(bs, empty)
@ TK_EXTENDED_GRAPHEME_CLUSTER
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
void onig_node_str_clear(Node *node)
#define BITSET_SET_BIT_CHKDUP(bs, pos)
int onig_names_free(regex_t *reg)
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
void onig_set_verb_warn_func(OnigWarnFunc f)
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
#define NSTRING_SET_RAW(node)
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
#define IS_SYNTAX_BV(syn, bvm)
#define IS_SYNTAX_OP2(syn, opm)
#define NSTRING_CLEAR_RAW(node)
#define ENCLOSE_CONDITION
#define SET_ENCLOSE_STATUS(node, f)
#define SCANENV_MEM_NODES(senv)
#define ENCLOSE_STOP_BACKTRACK
#define NODE_STR_BUF_SIZE
#define NSTRING_SET_AMBIG(node)
#define NQ_TARGET_ISNOT_EMPTY
#define SET_NTYPE(node, ntype)
void onig_node_conv_to_str_node(Node *node, int raw)
#define SCANENV_MEMNODES_SIZE
#define IS_SYNTAX_OP(syn, opm)
#define NODE_BACKREFS_SIZE
int st_foreach_callback_func(st_data_t, st_data_t, st_data_t)
size_t strlen(const char *)
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
OnigCaseFoldType case_fold_flag
const OnigSyntaxType * syntax
if((ID)(DISPID) nameid !=nameid)