22#include "internal/variable.h"
31#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
33#define BEG(no) (regs->beg[(no)])
34#define END(no) (regs->end[(no)])
37static const char casetable[] = {
38 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
39 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
40 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
41 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
43 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
45 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
47 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
49 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
51 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
53 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
55 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
57 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
59 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
61 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
63 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
65 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
66 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
67 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
68 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
69 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
70 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
71 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
72 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
73 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
74 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
75 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
76 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
77 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
78 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
79 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
80 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
81 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
84# error >>> "You lose. You will need a translation table for your character set." <<<
90 const unsigned char *p1 = x, *p2 = y;
94 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
102rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
104 const unsigned char *y;
106 if ((y = memmem(ys, n, xs, m)) !=
NULL)
113rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
115 const unsigned char *x = xs, *xe = xs + m;
116 const unsigned char *y = ys, *ye = ys + n;
117#define VALUE_MAX ((VALUE)~(VALUE)0)
121 rb_bug(
"!!too long pattern string!!");
123 if (!(y = memchr(y, *x, n - m + 1)))
127 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
147rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
149 const unsigned char *x = xs, *xe = xs + m;
150 const unsigned char *y = ys;
151 VALUE i, qstable[256];
154 for (i = 0; i < 256; ++i)
157 qstable[*x] = xe - x;
159 for (; y + m <= ys + n; y += *(qstable + y[m])) {
160 if (*xs == *y &&
memcmp(xs, y, m) == 0)
166static inline unsigned int
167rb_memsearch_qs_utf8_hash(
const unsigned char *x)
169 register const unsigned int mix = 8353;
170 register unsigned int h = *x;
195 return (
unsigned char)h;
199rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
201 const unsigned char *x = xs, *xe = xs + m;
202 const unsigned char *y = ys;
203 VALUE i, qstable[512];
206 for (i = 0; i < 512; ++i) {
209 for (; x < xe; ++x) {
210 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
213 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
214 if (*xs == *y &&
memcmp(xs, y, m) == 0)
221rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
223 const unsigned char *x = xs, x0 = *xs, *y = ys;
227 if (x0 == *y &&
memcmp(x+1, y+1, m-1) == 0)
234rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
236 const unsigned char *x = xs, x0 = *xs, *y = ys;
240 if (x0 == *y &&
memcmp(x+1, y+1, m-1) == 0)
249 const unsigned char *x = x0, *y = y0;
251 if (m > n)
return -1;
253 return memcmp(x0, y0, m) == 0 ? 0 : -1;
259 const unsigned char *ys = memchr(y, *x, n);
268 return rb_memsearch_ss(x0, m, y0, n);
271 return rb_memsearch_qs_utf8(x0, m, y0, n);
275 return rb_memsearch_wchar(x0, m, y0, n);
278 return rb_memsearch_qchar(x0, m, y0, n);
280 return rb_memsearch_qs(x0, m, y0, n);
283#define REG_LITERAL FL_USER5
284#define REG_ENCODING_NONE FL_USER6
286#define KCODE_FIXED FL_USER4
288#define ARG_REG_OPTION_MASK \
289 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
290#define ARG_ENCODING_FIXED 16
291#define ARG_ENCODING_NONE 32
348 return (*option = char_to_option(c));
355rb_reg_check(
VALUE re)
357 if (!
RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
366 const char *p, *pend;
371 p = s; pend = p +
len;
378 p +=
mbclen(p, pend, enc);
406 if (c ==
'\\' && p+clen < pend) {
407 int n = clen +
mbclen(p+clen, pend, enc);
415 c = (
unsigned char)*p;
428 else if (c ==
term) {
440 snprintf(b,
sizeof(b),
"\\x%02X", c);
452rb_reg_desc(
const char *s,
long len,
VALUE re)
465 rb_reg_expr_str(
str, s,
len, enc, resenc,
'/');
470 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
494rb_reg_source(
VALUE re)
516rb_reg_inspect(
VALUE re)
518 if (!
RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
521 return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
549 return rb_reg_str_with_term(re,
'/');
568 len = RREGEXP_SRC_LEN(re);
570 if (
len >= 4 &&
ptr[0] ==
'(' &&
ptr[1] ==
'?') {
573 if ((
len -= 2) > 0) {
575 opt = char_to_option((
int )*
ptr);
585 if (
len > 1 && *
ptr ==
'-') {
589 opt = char_to_option((
int )*
ptr);
619 len = RREGEXP_SRC_LEN(re);
625 if ((options & embeddable) != embeddable) {
627 option_to_str(optbuf + 1, ~options);
646 e = RSTRING_END(
str);
664rb_reg_raise(
const char *s,
long len,
const char *
err,
VALUE re)
666 VALUE desc = rb_reg_desc(s,
len, re);
672rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *
err)
681 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
683 option_to_str(opts + 1, options);
691rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *
err)
697rb_reg_error_desc(
VALUE str,
int options,
const char *
err)
706rb_reg_raise_str(
VALUE str,
int options,
const char *
err)
724rb_reg_casefold_p(
VALUE re)
756rb_reg_options_m(
VALUE re)
764 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
788rb_reg_names(
VALUE re)
799 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
805 for (i = 0; i < back_num; i++)
836rb_reg_named_captures(
VALUE re)
845onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
847 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
857 r =
onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
868 const char *sourcefile,
int sourceline)
943match_alloc(
VALUE klass)
972pair_byte_cmp(
const void *pair1,
const void *pair2)
974 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
975#if SIZEOF_LONG > SIZEOF_INT
976 return diff ? diff > 0 ? 1 : -1 : 0;
1021 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1025 for (i = 0; i < num_pos; i++) {
1041 found = bsearch(&
key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1045 found = bsearch(&
key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1069 rm =
RMATCH(obj)->rmatch;
1103 if (
NIL_P(regexp)) {
1153static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1176 else if (!RB_TYPE_P(backref,
T_STRING)) {
1184 name_to_backref_error(backref);
1193 return match_backref_number(
match, backref);
1217 int i = match_backref_number(
match, n);
1227 update_char_offset(
match);
1253 int i = match_backref_number(
match, n);
1263 update_char_offset(
match);
1288 int i = match_backref_number(
match, n);
1298 update_char_offset(
match);
1302#define MATCH_BUSY FL_USER2
1321 regs = RMATCH_REGS(
match);
1322 if (!regs)
return -1;
1331 regs = RMATCH_REGS(
match);
1332 if (!regs)
return FALSE;
1338 if (nth <= 0)
return FALSE;
1340 return (
BEG(nth) != -1);
1349 match->str = string;
1364 match_set_string(
match,
string, pos,
len);
1398rb_reg_fixed_encoding_p(
VALUE re)
1407rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1416 "incompatible encoding regexp match (%s regexp with %s string)",
1435 int cr = str_coderange(
str);
1439 "invalid byte sequence in %s",
1452 reg_enc_error(re,
str);
1454 else if (rb_reg_fixed_encoding_p(re)) {
1457 reg_enc_error(re,
str);
1464 rb_warn(
"historical binary regexp match /.../n against %s string",
1476 const char *pattern;
1481 if (reg->
enc == enc)
return reg;
1485 pattern = RREGEXP_SRC_PTR(re);
1487 unescaped = rb_reg_preprocess(
1488 pattern, pattern + RREGEXP_SRC_LEN(re), enc,
1491 if (unescaped ==
Qnil) {
1503 rb_reg_raise(pattern, RREGEXP_SRC_LEN(re),
err, re);
1524 enc = rb_reg_prepare_enc(re,
str, 0);
1550rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1555 char *start, *
range;
1563 if (pos >
len || pos < 0) {
1570 if (!tmpreg)
RREGEXP(re)->usecnt++;
1579 ((
UChar*)(start + pos)),
1582 if (!tmpreg)
RREGEXP(re)->usecnt--;
1601 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re),
err, re);
1610 if (set_backref_str) {
1616 if (set_match) *set_match =
match;
1624 return rb_reg_search_set_match(re,
str, pos, reverse, set_backref_str,
NULL);
1645 if (!tmpreg)
RREGEXP(re)->usecnt++;
1653 regs = RMATCH_REGS(
match);
1667 if (!tmpreg)
RREGEXP(re)->usecnt--;
1686 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re),
err, re);
1712 regs = RMATCH_REGS(
match);
1718 if (nth <= 0)
return Qnil;
1733 regs = RMATCH_REGS(
match);
1739 if (nth <= 0)
return Qnil;
1742 if (start == -1)
return Qnil;
1775 regs = RMATCH_REGS(
match);
1776 if (
BEG(0) == -1)
return Qnil;
1802 regs = RMATCH_REGS(
match);
1803 if (
BEG(0) == -1)
return Qnil;
1818 regs = RMATCH_REGS(
match);
1819 if (
BEG(0) == -1)
return Qnil;
1821 for (i=regs->
num_regs-1;
BEG(i) == -1 && i > 0; i--)
1823 if (i == 0)
return Qnil;
1828last_match_getter(
ID _x,
VALUE *_y)
1834prematch_getter(
ID _x,
VALUE *_y)
1840postmatch_getter(
ID _x,
VALUE *_y)
1846last_paren_match_getter(
ID _x,
VALUE *_y)
1860 regs = RMATCH_REGS(
match);
1864 for (i=start; i<regs->
num_regs; i++) {
1865 if (regs->
beg[i] == -1) {
1902 return match_array(
match, 0);
1921 return match_array(
match, 1);
1925name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char*
name,
const char* name_end)
1927 if (
NIL_P(regexp))
return -1;
1929 (
const unsigned char *)
name, (
const unsigned char *)name_end, regs);
1932#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
1934 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
1935 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
1951 name_to_backref_error(
name);
1959 long olen = RMATCH_REGS(
match)->num_regs;
1962 if (
len == 0)
return result;
1964 for (j =
beg; j <
end; j++) {
2027 if (
NIL_P(length)) {
2037 return match_ary_aref(
match, idx,
Qnil);
2088 for (i=0; i<
argc; i++) {
2128 int back_num,
int *back_refs,
OnigRegex regex,
void *arg) {
2139 for (i = 0; i < back_num; i++) {
2219 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2224 for (i = 0; i < back_num; i++) {
2226 arg[back_refs[i]].
len = name_end -
name;
2265 else if (
NIL_P(regexp)) {
2274 match_inspect_name_iter, names);
2279 for (i = 0; i < num_regs; i++) {
2306 const char *p = *pp;
2308 int meta_prefix = 0, ctrl_prefix = 0;
2311 if (p == end || *p++ !=
'\\') {
2312 errcpy(
err,
"too short escaped multibyte character");
2318 errcpy(
err,
"too short escape sequence");
2322 case '\\':
code =
'\\';
break;
2323 case 'n':
code =
'\n';
break;
2324 case 't':
code =
'\t';
break;
2325 case 'r':
code =
'\r';
break;
2326 case 'f':
code =
'\f';
break;
2327 case 'v':
code =
'\013';
break;
2328 case 'a':
code =
'\007';
break;
2329 case 'e':
code =
'\033';
break;
2332 case '0':
case '1':
case '2':
case '3':
2333 case '4':
case '5':
case '6':
case '7':
2354 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2368 if (p == end || *p++ !=
'-') {
2369 errcpy(
err,
"too short control escape");
2374 errcpy(
err,
"duplicate control escape");
2378 if (p < end && (*p & 0x80) == 0) {
2388 errcpy(
err,
"too short control escape");
2392 errcpy(
err,
"unexpected escape sequence");
2410unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2413 const char *p = *pp;
2415 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2416 char *chbuf = (
char *)area;
2421 memset(chbuf, 0, chmaxlen);
2423 byte = read_escaped_byte(&p, end,
err);
2428 area[chlen++] = byte;
2429 while (chlen < chmaxlen &&
2431 byte = read_escaped_byte(&p, end,
err);
2435 area[chlen++] = byte;
2440 errcpy(
err,
"invalid multibyte escape");
2443 if (1 < chlen || (area[0] & 0x80)) {
2448 else if (*encp != enc) {
2449 errcpy(
err,
"escaped non ASCII character in UTF-8 regexp");
2455 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2465 if ((0xd800 <=
code &&
code <= 0xdfff) ||
2474append_utf8(
unsigned long uv,
2477 if (check_unicode_range(uv,
err) != 0)
2481 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2493 errcpy(
err,
"UTF-8 character in non UTF-8 regexp");
2501unescape_unicode_list(
const char **pp,
const char *end,
2504 const char *p = *pp;
2505 int has_unicode = 0;
2509 while (p < end &&
ISSPACE(*p)) p++;
2524 while (p < end &&
ISSPACE(*p)) p++;
2527 if (has_unicode == 0) {
2538unescape_unicode_bmp(
const char **pp,
const char *end,
2541 const char *p = *pp;
2561unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
2572 errcpy(
err,
"invalid multibyte character");
2576 if (1 < chlen || (*p & 0x80)) {
2582 else if (*encp != enc) {
2583 errcpy(
err,
"non ASCII character in UTF-8 regexp");
2592 errcpy(
err,
"too short escape sequence");
2597 goto invalid_multibyte;
2606 case '1':
case '2':
case '3':
2607 case '4':
case '5':
case '6':
case '7':
2609 size_t len = end-(p-1), octlen;
2627 const char *pbeg = p;
2628 int byte = read_escaped_byte(&p, end,
err);
2629 if (
byte == -1)
return -1;
2634 if (unescape_escaped_nonascii(&p, end, enc,
buf, encp,
err) != 0)
2641 errcpy(
err,
"too short escape sequence");
2647 if (unescape_unicode_list(&p, end,
buf, encp,
err) != 0)
2649 if (p == end || *p++ !=
'}') {
2657 if (unescape_unicode_bmp(&p, end,
buf, encp,
err) != 0)
2688rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
2692 int has_property = 0;
2703 if (unescape_nonascii(p, end, enc,
buf, fixed_enc, &has_property,
err) != 0)
2706 if (has_property && !*fixed_enc) {
2731 buf = rb_reg_preprocess(p, end, enc, &fixed_enc,
err);
2735 return rb_reg_error_desc(
str, 0,
err);
2741rb_reg_preprocess_dregexp(
VALUE ary,
int options)
2762 src_enc != ascii8bit) {
2766 src_enc = ascii8bit;
2773 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc,
err);
2778 if (fixed_enc != 0) {
2779 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
2783 regexp_enc = fixed_enc;
2801 const char *sourcefile,
int sourceline)
2816 errcpy(
err,
"can't make regexp with dummy encoding");
2820 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc,
err);
2821 if (unescaped ==
Qnil)
2827 errcpy(
err,
"incompatible character encoding");
2830 if (fixed_enc != a_enc) {
2849 sourcefile, sourceline);
2850 if (!re->
ptr)
return -1;
2859 if (regenc != enc) {
2867 const char *sourcefile,
int sourceline)
2873 if (enc != ascii8bit) {
2875 errcpy(
err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2882 options,
err, sourcefile, sourceline);
2883 if (ret == 0) reg_set_source(obj,
str, str_enc);
2888rb_reg_s_alloc(
VALUE klass)
2916 if (rb_reg_initialize_str(re, s, options,
err,
NULL, 0) != 0) {
2917 rb_reg_raise_str(s, options,
err);
2929 enc, options,
err,
NULL, 0) != 0) {
2930 rb_reg_raise_str(s, options,
err);
2932 reg_set_source(re, s, enc);
2951 if (rb_reg_initialize(re, s,
len, enc, options,
err,
NULL, 0) != 0) {
2952 rb_enc_reg_raise(s,
len, enc, options,
err);
2972 if (rb_reg_initialize_str(re,
str, options,
err, sourcefile, sourceline) != 0) {
2981static VALUE reg_cache;
3005rb_reg_hash(
VALUE re)
3041 if (re1 == re2)
return Qtrue;
3043 rb_reg_check(re1); rb_reg_check(re2);
3046 if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2))
return Qfalse;
3048 if (
memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
3073 regs = RMATCH_REGS(
match);
3095 if (match1 == match2)
return Qtrue;
3099 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3100 regs1 = RMATCH_REGS(match1);
3101 regs2 = RMATCH_REGS(match2);
3109reg_operand(
VALUE s,
int check)
3142 return rb_reg_search_set_match(re,
str, pos, 0, 1, set_match);
3196 long pos = reg_match_pos(re, &
str, 0,
NULL);
3197 if (pos < 0)
return Qnil;
3317 pos = reg_match_pos(re, &
str, pos, &result);
3366 if (pos < 0)
return Qfalse;
3377 if (!tmpreg)
RREGEXP(re)->usecnt++;
3382 if (!tmpreg)
RREGEXP(re)->usecnt--;
3398 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re),
err, re);
3448 str = RREGEXP_SRC(re);
3457 if (kcode[0] ==
'n' || kcode[0] ==
'N') {
3468 rb_reg_init_str_enc(self,
str, enc, flags);
3488 s +=
mbclen(s, send, enc);
3492 case '[':
case ']':
case '{':
case '}':
3493 case '(':
case ')':
case '|':
case '-':
3494 case '*':
case '.':
case '\\':
3495 case '?':
case '+':
case '^':
case '$':
3497 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
3525 int n =
mbclen(s, send, enc);
3533 case '[':
case ']':
case '{':
case '}':
3534 case '(':
case ')':
case '|':
case '-':
3535 case '*':
case '.':
case '\\':
3536 case '?':
case '+':
case '^':
case '$':
3643 else if (
argc == 1) {
3650 quoted = rb_reg_s_quote(
Qnil, arg);
3659 int has_asciionly = 0;
3663 for (i = 0; i <
argc; i++) {
3674 if (!has_ascii_incompat)
3675 has_ascii_incompat = enc;
3676 else if (has_ascii_incompat != enc)
3680 else if (rb_reg_fixed_encoding_p(v)) {
3681 if (!has_ascii_compat_fixed)
3682 has_ascii_compat_fixed = enc;
3683 else if (has_ascii_compat_fixed != enc)
3690 v = rb_reg_str_with_term(v, -1);
3697 if (!has_ascii_incompat)
3698 has_ascii_incompat = enc;
3699 else if (has_ascii_incompat != enc)
3707 if (!has_ascii_compat_fixed)
3708 has_ascii_compat_fixed = enc;
3709 else if (has_ascii_compat_fixed != enc)
3713 v = rb_reg_s_quote(
Qnil, e);
3715 if (has_ascii_incompat) {
3716 if (has_asciionly) {
3720 if (has_ascii_compat_fixed) {
3732 if (has_ascii_incompat) {
3733 result_enc = has_ascii_incompat;
3735 else if (has_ascii_compat_fixed) {
3736 result_enc = has_ascii_compat_fixed;
3775 return rb_reg_s_union(self, v);
3777 return rb_reg_s_union(self, args);
3799#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
3806 int c =
ASCGET(s, e, &clen);
3810 s +=
mbclen(s, e, str_enc);
3816 if (c !=
'\\' || s == e)
continue;
3825 s +=
mbclen(s, e, str_enc);
3834 case '1':
case '2':
case '3':
case '4':
3835 case '5':
case '6':
case '7':
case '8':
case '9':
3845 if (s < e &&
ASCGET(s, e, &clen) ==
'<') {
3846 char *
name, *name_end;
3848 name_end =
name = s + clen;
3849 while (name_end < e) {
3850 c =
ASCGET(name_end, e, &clen);
3851 if (c ==
'>')
break;
3852 name_end += c == -1 ?
mbclen(name_end, e, str_enc) : clen;
3856 (
long)(name_end -
name));
3858 name_to_backref_error(n);
3860 p = s = name_end + clen;
3886 while (
BEG(no) == -1 && no > 0) no--;
3887 if (no == 0)
continue;
3900 if (no >= regs->
num_regs)
continue;
3901 if (
BEG(no) == -1)
continue;
3906 if (!val)
return str;
3915ignorecase_getter(
ID _x,
VALUE *_y)
3938get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
3940 return match_getter();
3987 n = match_backref_number(
match,
argv[0]);
3990 return match_getter();
3994re_warn(
const char *s)
void rb_ary_store(VALUE ary, long idx, VALUE val)
VALUE rb_ary_push(VALUE ary, VALUE item)
VALUE rb_ary_new_capa(long capa)
VALUE rb_check_array_type(VALUE ary)
VALUE rb_ary_resize(VALUE ary, long len)
expands or shrinks ary to len elements.
VALUE rb_ary_entry(VALUE ary, long offset)
VALUE rb_assoc_new(VALUE car, VALUE cdr)
#define rb_category_warn(category,...)
Our own, locale independent, character handling routines.
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define range(low, item, hi)
#define MJIT_FUNC_EXPORTED
#define ENCINDEX_Windows_31J
#define rb_ascii8bit_encindex()
#define rb_utf8_encindex()
int rb_enc_dummy_p(rb_encoding *enc)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_encoding * rb_utf8_encoding(void)
rb_encoding * rb_ascii8bit_encoding(void)
rb_encoding * rb_default_internal_encoding(void)
rb_encoding * rb_enc_get(VALUE obj)
int rb_enc_unicode_p(rb_encoding *enc)
void rb_enc_copy(VALUE obj1, VALUE obj2)
rb_encoding * rb_default_external_encoding(void)
VALUE rb_obj_encoding(VALUE obj)
rb_encoding * rb_usascii_encoding(void)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
char str[HTML_ESCAPE_MAX_LEN+1]
#define RSTRING_LEN(string)
#define RSTRING_PTR(string)
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
void rb_undef_method(VALUE klass, const char *name)
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
int rb_block_given_p(void)
Determines if the current method is given a block.
void rb_raise(VALUE exc, const char *fmt,...)
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
void rb_warn(const char *fmt,...)
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Tries to convert an object into another type.
VALUE rb_cObject
Object class.
VALUE rb_any_to_s(VALUE)
Default implementation of #to_s.
VALUE rb_class_new_instance(int, const VALUE *, VALUE)
Allocates and initializes an instance of klass.
VALUE rb_obj_class(VALUE)
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
unsigned char match[65280+2]
VALUE rb_hash_new_with_size(st_index_t size)
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
IMEMO: Internal memo object.
#define MEMO_NEW(a, b, c)
#define ENC_CODERANGE_7BIT
#define rb_enc_left_char_head(s, p, e, enc)
#define rb_enc_mbcput(c, buf, enc)
#define ENC_CODERANGE_CLEAN_P(cr)
int rb_enc_str_coderange(VALUE)
#define ENC_CODERANGE(obj)
#define ENC_CODERANGE_UNKNOWN
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
#define rb_enc_mbmaxlen(enc)
#define ENCODING_GET(obj)
#define rb_enc_mbc_to_codepoint(p, e, enc)
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
#define MBCLEN_CHARFOUND_LEN(ret)
#define rb_enc_asciicompat(enc)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
int rb_enc_str_asciionly_p(VALUE)
#define MBCLEN_INVALID_P(ret)
#define rb_enc_isprint(c, enc)
#define MBCLEN_NEEDMORE_P(ret)
#define rb_enc_mbminlen(enc)
long rb_enc_strlen(const char *, const char *, rb_encoding *)
#define ENC_CODERANGE_BROKEN
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
#define MBCLEN_CHARFOUND_P(ret)
#define rb_enc_isspace(c, enc)
Thin wrapper to ruby/config.h.
@ RB_WARN_CATEGORY_DEPRECATED
int rb_uv_to_utf8(char[6], unsigned long)
#define OBJ_INIT_COPY(obj, orig)
VALUE rb_backref_get(void)
VALUE rb_lastline_get(void)
void rb_backref_set(VALUE)
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
VALUE rb_str_resize(VALUE, long)
#define rb_hash_uint(h, i)
long rb_str_offset(VALUE, long)
char * rb_str_subpos(VALUE, long, long *)
st_index_t rb_memhash(const void *ptr, long len)
#define rb_str_new(str, len)
st_index_t rb_hash_start(st_index_t)
VALUE rb_str_buf_new(long)
VALUE rb_check_string_type(VALUE)
VALUE rb_str_inspect(VALUE)
VALUE rb_str_equal(VALUE str1, VALUE str2)
VALUE rb_str_subseq(VALUE, long, long)
VALUE rb_str_append(VALUE, VALUE)
VALUE rb_str_buf_cat_ascii(VALUE, const char *)
VALUE rb_str_buf_append(VALUE, VALUE)
long rb_str_sublen(VALUE, long)
st_index_t rb_str_hash(VALUE)
VALUE rb_str_length(VALUE)
VALUE rb_class_path(VALUE)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
void rb_define_const(VALUE, const char *, VALUE)
unsigned long ruby_scan_oct(const char *, size_t, size_t *)
unsigned long ruby_scan_hex(const char *, size_t, size_t *)
#define scan_hex(s, l, e)
#define scan_oct(s, l, e)
Internal header for Hash.
Internal header for Regexp.
Internal header for String.
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
void rb_gvar_ractor_local(const char *name)
int memcmp(const void *s1, const void *s2, size_t len)
#define MEMCPY(p1, p2, type, n)
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
void rb_define_virtual_variable(const char *q, type *w, void_type *e)
Define a function-backended global variable.
#define char_size(c2, c1)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end)
ONIG_EXTERN int onig_reg_init(OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
ONIG_EXTERN int onig_error_code_to_str(OnigUChar *s, OnigPosition err_code,...)
ONIG_EXTERN OnigUChar * onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar *start, const OnigUChar *s, const OnigUChar *end)
#define ONIG_MAX_ERROR_MESSAGE_LEN
ONIG_EXTERN int onig_new(OnigRegex *, const OnigUChar *pattern, const OnigUChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
ONIG_EXTERN int onig_region_resize(OnigRegion *region, int n)
ONIG_EXTERN void onig_region_free(OnigRegion *region, int free_self)
#define ONIG_OPTION_MULTILINE
ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *start, const OnigUChar *range, OnigRegion *region, OnigOptionType option)
#define ONIGENC_CASE_FOLD_DEFAULT
#define ONIG_OPTION_IGNORECASE
#define ONIG_ENCODING_ASCII
ONIG_EXTERN void onig_free(OnigRegex)
ONIG_EXTERN int onigenc_set_default_encoding(OnigEncoding enc)
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
#define ONIGENC_MBC_MAXLEN(enc)
ONIG_EXTERN void onig_set_verb_warn_func(OnigWarnFunc f)
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
unsigned int OnigOptionType
ONIG_EXTERN int onig_foreach_name(OnigRegex reg, int(*func)(const OnigUChar *, const OnigUChar *, int, int *, OnigRegex, void *), void *arg)
ONIG_EXTERN void onig_region_copy(OnigRegion *to, const OnigRegion *from)
ONIG_EXTERN int onig_name_to_backref_number(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, const OnigRegion *region)
ONIG_EXTERN int onig_noname_group_capture_is_active(const OnigRegexType *reg)
#define ONIG_OPTION_EXTEND
ONIG_EXTERN int onig_number_of_names(const OnigRegexType *reg)
ONIG_EXTERN void onig_set_warn_func(OnigWarnFunc f)
#define RARRAY_AREF(a, i)
int rb_reg_backref_number(VALUE match, VALUE backref)
void rb_backref_set_string(VALUE string, long pos, long len)
VALUE rb_reg_nth_defined(int nth, VALUE match)
VALUE rb_reg_match_last(VALUE match)
int rb_reg_options(VALUE re)
VALUE rb_reg_match(VALUE re, VALUE str)
#define REG_ENCODING_NONE
VALUE rb_reg_new_ary(VALUE ary, int opt)
VALUE rb_reg_eqq(VALUE re, VALUE str)
#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end)
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
VALUE rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
#define ARG_ENCODING_NONE
VALUE rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
#define ARG_ENCODING_FIXED
VALUE rb_reg_match_post(VALUE match)
VALUE rb_reg_last_match(VALUE match)
VALUE rb_reg_match_p(VALUE re, VALUE str, long pos)
VALUE rb_reg_match_pre(VALUE match)
char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN]
VALUE rb_reg_new(const char *s, long len, int options)
bool rb_reg_start_with_p(VALUE re, VALUE str)
int rb_char_to_option_kcode(int c, int *option, int *kcode)
void rb_match_busy(VALUE match)
long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
regex_t * rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
#define ARG_REG_OPTION_MASK
long rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
VALUE rb_reg_regcomp(VALUE str)
VALUE rb_reg_nth_match(int nth, VALUE match)
int rb_match_nth_defined(int nth, VALUE match)
long rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
int rb_memcicmp(const void *x, const void *y, long len)
VALUE rb_reg_init_str(VALUE re, VALUE s, int options)
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
void rb_match_unbusy(VALUE match)
int rb_match_count(VALUE match)
VALUE rb_reg_match2(VALUE re)
VALUE rb_reg_quote(VALUE str)
VALUE rb_check_regexp_type(VALUE re)
VALUE rb_reg_check_preprocess(VALUE str)
VALUE rb_reg_new_str(VALUE s, int options)
int rb_reg_region_copy(struct re_registers *to, const struct re_registers *from)
int onig_compile_ruby(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
#define mbclen(p, e, enc)
#define RB_OBJ_WRITE(a, slot, b)
WB for new reference from ‘a’ to ‘b’.
#define RGENGC_WB_PROTECTED_REGEXP
VALUE rb_str_to_str(VALUE)
#define StringValuePtr(v)
#define RSTRING_GETMEM(str, ptrvar, lenvar)
#define StringValueCStr(v)
VALUE rb_str_catf(VALUE, const char *,...)
VALUE rb_sprintf(const char *,...)
struct re_pattern_buffer * ptr
int char_offset_num_allocated
struct rmatch_offset * char_offset
#define ZALLOC(strm, items, size)