24#if defined HAVE_CRYPT_R
25# if defined HAVE_CRYPT_H
28#elif !defined HAVE_CRYPT
30# define HAVE_CRYPT_R 1
42#include "internal/error.h"
43#include "internal/gc.h"
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_tainted_str_new_cstr
67#undef rb_usascii_str_new_cstr
68#undef rb_utf8_str_new_cstr
69#undef rb_enc_str_new_cstr
70#undef rb_external_str_new_cstr
71#undef rb_locale_str_new_cstr
72#undef rb_str_dup_frozen
73#undef rb_str_buf_new_cstr
100#define RUBY_MAX_CHAR_LEN 16
101#define STR_SHARED_ROOT FL_USER5
102#define STR_BORROWED FL_USER6
103#define STR_TMPLOCK FL_USER7
104#define STR_NOFREE FL_USER18
105#define STR_FAKESTR FL_USER19
107#define STR_SET_NOEMBED(str) do {\
108 FL_SET((str), STR_NOEMBED);\
109 STR_SET_EMBED_LEN((str), 0);\
111#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
112#define STR_SET_EMBED_LEN(str, n) do { \
114 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
115 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
118#define STR_SET_LEN(str, n) do { \
119 if (STR_EMBED_P(str)) {\
120 STR_SET_EMBED_LEN((str), (n));\
123 RSTRING(str)->as.heap.len = (n);\
127#define STR_DEC_LEN(str) do {\
128 if (STR_EMBED_P(str)) {\
129 long n = RSTRING_LEN(str);\
131 STR_SET_EMBED_LEN((str), n);\
134 RSTRING(str)->as.heap.len--;\
138#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
139#define TERM_FILL(ptr, termlen) do {\
140 char *const term_fill_ptr = (ptr);\
141 const int term_fill_len = (termlen);\
142 *term_fill_ptr = '\0';\
143 if (UNLIKELY(term_fill_len > 1))\
144 memset(term_fill_ptr, 0, term_fill_len);\
147#define RESIZE_CAPA(str,capacity) do {\
148 const int termlen = TERM_LEN(str);\
149 RESIZE_CAPA_TERM(str,capacity,termlen);\
151#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
152 if (STR_EMBED_P(str)) {\
153 if (!STR_EMBEDDABLE_P(capacity, termlen)) {\
154 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
155 const long tlen = RSTRING_LEN(str);\
156 memcpy(tmp, RSTRING_PTR(str), tlen);\
157 RSTRING(str)->as.heap.ptr = tmp;\
158 RSTRING(str)->as.heap.len = tlen;\
159 STR_SET_NOEMBED(str);\
160 RSTRING(str)->as.heap.aux.capa = (capacity);\
164 assert(!FL_TEST((str), STR_SHARED)); \
165 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
166 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
167 RSTRING(str)->as.heap.aux.capa = (capacity);\
171#define STR_SET_SHARED(str, shared_str) do { \
172 if (!FL_TEST(str, STR_FAKESTR)) { \
173 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
174 FL_SET((str), STR_SHARED); \
175 FL_SET((shared_str), STR_SHARED_ROOT); \
176 if (RBASIC_CLASS((shared_str)) == 0) \
177 FL_SET_RAW((shared_str), STR_BORROWED); \
181#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
182#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
185#define STR_ENC_GET(str) get_encoding(str)
187#if !defined SHARABLE_MIDDLE_SUBSTRING
188# define SHARABLE_MIDDLE_SUBSTRING 0
190#if !SHARABLE_MIDDLE_SUBSTRING
191#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
193#define SHARABLE_SUBSTRING_P(beg, len, end) 1
196#define STR_EMBEDDABLE_P(len, termlen) \
197 ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen))
202static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
203static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
205static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
206static inline void str_modifiable(
VALUE str);
214 str_make_independent_expand((
str),
len, 0
L, termlen);
217static inline int str_dependent_p(
VALUE str);
222 if (str_dependent_p(
str)) {
223 str_make_independent(
str);
228static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
231get_actual_encoding(
const int encidx,
VALUE str)
233 const unsigned char *q;
239 if (q[0] == 0xFE && q[1] == 0xFF) {
242 if (q[0] == 0xFF && q[1] == 0xFE) {
249 if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
252 if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
269 if (is_broken_string(
str)) {
292#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
335 if (STR_SHARED_P(
str)) {
337 str_make_independent(
str);
365 if (STR_EMBED_P(
str)) {
381 str_replace_shared_without_enc(
str,
fstr);
400 }
while (args.fstr ==
Qundef);
412setup_fake_str(
struct RString *fake_str,
const char *
name,
long len,
int encidx)
428 return (
VALUE)fake_str;
476 const char *aptr, *bptr;
479 return (alen != blen ||
481 memcmp(aptr, bptr, alen) != 0);
504static inline const char *
505search_nonascii(
const char *p,
const char *e)
509#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
510# if SIZEOF_UINTPTR_T == 8
511# define NONASCII_MASK UINT64_C(0x8080808080808080)
512# elif SIZEOF_UINTPTR_T == 4
513# define NONASCII_MASK UINT32_C(0x80808080)
515# error "don't know what to do."
518# if SIZEOF_UINTPTR_T == 8
519# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
520# elif SIZEOF_UINTPTR_T == 4
521# define NONASCII_MASK 0x80808080UL
523# error "don't know what to do."
528#if !UNALIGNED_WORD_ACCESS
530 int l = SIZEOF_VOIDP - (
uintptr_t)p % SIZEOF_VOIDP;
535 case 7:
if (p[-7]&0x80)
return p-7;
536 case 6:
if (p[-6]&0x80)
return p-6;
537 case 5:
if (p[-5]&0x80)
return p-5;
538 case 4:
if (p[-4]&0x80)
return p-4;
540 case 3:
if (p[-3]&0x80)
return p-3;
541 case 2:
if (p[-2]&0x80)
return p-2;
542 case 1:
if (p[-1]&0x80)
return p-1;
547#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
548#define aligned_ptr(value) \
549 __builtin_assume_aligned((value), sizeof(uintptr_t))
551#define aligned_ptr(value) (uintptr_t *)(value)
557 if (*s & NONASCII_MASK) {
558#ifdef WORDS_BIGENDIAN
559 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
561 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
571 case 7:
if (e[-7]&0x80)
return e-7;
572 case 6:
if (e[-6]&0x80)
return e-6;
573 case 5:
if (e[-5]&0x80)
return e-5;
574 case 4:
if (e[-4]&0x80)
return e-4;
576 case 3:
if (e[-3]&0x80)
return e-3;
577 case 2:
if (e[-2]&0x80)
return e-2;
578 case 1:
if (e[-1]&0x80)
return e-1;
586 const char *e = p +
len;
590 p = search_nonascii(p, e);
595 p = search_nonascii(p, e);
602 p = search_nonascii(p, e);
627 p = search_nonascii(p, e);
632 p = search_nonascii(p, e);
645 p = search_nonascii(p, e);
670rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
675 str_enc_copy(dest, src);
689 search_nonascii(
RSTRING_PTR(dest), RSTRING_END(dest)))
700rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
702 str_enc_copy(dest, src);
732 cr = enc_coderange_scan(
str, enc, encidx);
751str_mod_check(
VALUE s,
const char *p,
long len)
759str_capacity(
VALUE str,
const int termlen)
761 if (STR_EMBED_P(
str)) {
779must_not_null(
const char *
ptr)
787str_alloc(
VALUE klass)
794empty_str_alloc(
VALUE klass)
797 return str_alloc(klass);
801str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
811 str = str_alloc(klass);
831 return str_new0(klass,
ptr,
len, 1);
876 __msan_unpoison_string(
ptr);
907str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
921 str = str_alloc(klass);
970static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
972 int ecflags,
VALUE ecopts);
979 return is_ascii_string(
str);
992 if (from == to)
return str;
1004 from, to, ecflags, ecopts);
1005 if (
NIL_P(newstr)) {
1019 if (ofs < -olen || olen < ofs)
1021 if (ofs < 0) ofs += olen;
1028 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1043str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1045 int ecflags,
VALUE ecopts)
1050 VALUE econv_wrapper;
1051 const unsigned char *start, *sp;
1052 unsigned char *dest, *
dp;
1053 size_t converted_output = (size_t)ofs;
1058 RBASIC_CLEAR_CLASS(econv_wrapper);
1060 if (!ec)
return Qnil;
1063 sp = (
unsigned char*)
ptr;
1065 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1066 (
dp = dest + converted_output),
1070 size_t converted_input = sp - start;
1071 size_t rest =
len - converted_input;
1072 converted_output =
dp - dest;
1074 if (converted_input && converted_output &&
1075 rest < (
LONG_MAX / converted_output)) {
1076 rest = (rest * converted_output) / converted_input;
1081 olen += rest < 2 ? 2 : rest;
1123 if (!ienc || eenc == ienc) {
1219 char *ptr2 =
RSTRING(str2)->as.ary;
1227 if (STR_SHARED_P(
str)) {
1237 rb_fatal(
"about to free a possible shared root");
1255 str_replace_shared_without_enc(str2,
str);
1256 rb_enc_cr_str_exact_copy(str2,
str);
1263 return str_replace_shared(str_alloc(klass),
str);
1280rb_str_new_frozen_String(
VALUE orig)
1290 return str_new_frozen_buffer(0, orig,
FALSE);
1299 if (STR_EMBED_P(tmp)) {
1311 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1322 return str_new_frozen_buffer(klass, orig,
TRUE);
1326str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1330 if (STR_EMBED_P(orig)) {
1341 if ((ofs > 0) || (rest > 0) ||
1355 str = str_alloc(klass);
1362 str = str_alloc(klass);
1368 RBASIC(orig)->flags &= ~STR_NOFREE;
1375 if (copy_encoding) rb_enc_cr_str_exact_copy(
str, orig);
1394#define STR_BUF_MIN_SIZE 63
1429 return str_new(0, 0,
len);
1446 if (STR_EMBED_P(
str)) {
1459RUBY_FUNC_EXPORTED
size_t
1476static inline void str_discard(
VALUE str);
1482 if (
str != str2) str_shared_replace(
str, str2);
1554 if (STR_SHARED_P(str2)) {
1561 rb_enc_cr_str_exact_copy(
str, str2);
1564 str_replace_shared(
str, str2);
1581 const VALUE flag_mask =
1595 str = str_new_frozen(klass,
str);
1609 flags &= ~ENCODING_MASK;
1619 VALUE dup = ec_str_alloc(ec, klass);
1620 return str_duplicate_setup(klass,
str, dup);
1626 VALUE dup = str_alloc(klass);
1627 return str_duplicate_setup(klass,
str, dup);
1697 static ID keyword_ids[2];
1698 VALUE orig, opt, venc, vcapa;
1703 if (!keyword_ids[0]) {
1705 CONST_ID(keyword_ids[1],
"capacity");
1730 if (orig ==
str) n = 0;
1732 str_modifiable(
str);
1733 if (STR_EMBED_P(
str)) {
1734 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1739 const size_t size = (size_t)
capa + termlen;
1742 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1755 rb_enc_cr_str_exact_copy(
str, orig);
1775#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1791count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1796 d = (d>>6) | (~d>>7);
1797 d &= NONASCII_MASK >> 7;
1800#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1802 return rb_popcount_intptr(d);
1806# if SIZEOF_VOIDP == 8
1815enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
1821 long diff = (
long)(e - p);
1827 if ((
int)
sizeof(
uintptr_t) * 2 < e - p) {
1832 while (p < (
const char *)s) {
1833 if (is_utf8_lead_byte(*p))
len++;
1837 len += count_utf8_lead_bytes_with_word(s);
1840 p = (
const char *)s;
1843 if (is_utf8_lead_byte(*p))
len++;
1854 q = search_nonascii(p, e);
1867 q = search_nonascii(p, e);
1880 for (c=0; p<e; c++) {
1904 long diff = (
long)(e - p);
1911 q = search_nonascii(p, e);
1934 for (c=0; p<e; c++) {
1962 e = RSTRING_END(
str);
1971 return enc_strlen(p, e, enc, cr);
2048 char *ptr1, *ptr2, *ptr3;
2060 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2062 memcpy(ptr3, ptr1, len1);
2063 memcpy(ptr3+len1, ptr2, len2);
2089 else if (enc2 < 0) {
2092 else if (enc1 != enc2) {
2155 while (n <=
len/2) {
2156 memcpy(ptr2 + n, ptr2, n);
2163 rb_enc_cr_str_copy_for_substr(str2,
str);
2205 rb_check_lockedtmp(
str);
2223 str_modifiable(
str);
2224 return !str_dependent_p(
str);
2228str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2264 if (!str_independent(
str))
2265 str_make_independent(
str);
2282 if (!str_independent(
str)) {
2283 str_make_independent_expand(
str,
len, expand, termlen);
2285 else if (expand > 0) {
2295 if (!str_independent(
str))
2296 str_make_independent(
str);
2305 str_modifiable(
str);
2341zero_filled(
const char *s,
int n)
2343 for (; n > 0; --n) {
2350str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2352 const char *e = s +
len;
2355 if (zero_filled(s, minlen))
return s;
2361str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2366 if (str_dependent_p(
str)) {
2367 if (!zero_filled(s +
len, termlen))
2368 str_make_independent_expand(
str,
len, 0
L, termlen);
2380 long capa = str_capacity(
str, oldtermlen) + oldtermlen;
2385 rb_check_lockedtmp(
str);
2386 str_make_independent_expand(
str,
len, 0
L, termlen);
2388 else if (str_dependent_p(
str)) {
2389 if (termlen > oldtermlen)
2390 str_make_independent_expand(
str,
len, 0
L, termlen);
2393 if (!STR_EMBED_P(
str)) {
2398 if (termlen > oldtermlen) {
2416 if (str_null_char(s,
len, minlen, enc)) {
2419 return str_fill_term(
str, s,
len, minlen);
2422 if (!s || memchr(s, 0,
len)) {
2426 s = str_fill_term(
str, s,
len, minlen);
2435 return str_null_check(
str, &w);
2443 char *s = str_null_check(
str, &w);
2458 return str_fill_term(
str, s,
len, newminlen);
2488str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2498 const char *p2, *e2;
2501 while (p < e && 0 < nth) {
2508 p2 = search_nonascii(p, e2);
2528 while (p < e && nth--) {
2540 return str_nth_len(p, e, &nth, enc);
2544str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2549 p = str_nth_len(p, e, &nth, enc);
2558str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2560 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2561 if (!pp)
return e - p;
2574str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2577 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2579 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2582 while (p < (
const char *)s) {
2583 if (is_utf8_lead_byte(*p)) nth--;
2587 nth -= count_utf8_lead_bytes_with_word(s);
2589 }
while (s <
t && (
int)SIZEOF_VOIDP <= nth);
2593 if (is_utf8_lead_byte(*p)) {
2594 if (nth == 0)
break;
2604str_utf8_offset(
const char *p,
const char *e,
long nth)
2606 const char *pp = str_utf8_nth(p, e, &nth);
2615 if (single_byte_optimizable(
str) || pos < 0)
2632 RSTRING(str2)->as.heap.ptr += beg;
2633 olen =
RSTRING(str2)->as.heap.len;
2641 rb_enc_cr_str_copy_for_substr(str2,
str);
2655 if (
len < 0)
return 0;
2659 if (single_byte_optimizable(
str)) {
2660 if (beg > blen)
return 0;
2663 if (beg < 0)
return 0;
2665 if (
len > blen - beg)
2667 if (
len < 0)
return 0;
2672 if (
len > -beg)
len = -beg;
2684 slen = str_strlen(
str, enc);
2686 if (beg < 0)
return 0;
2688 if (
len == 0)
goto end;
2695 if (beg > str_strlen(
str, enc))
return 0;
2701 p = str_utf8_nth(s, e, &beg);
2702 if (beg > 0)
return 0;
2703 len = str_utf8_offset(p, e,
len);
2709 p = s + beg * char_sz;
2713 else if (
len * char_sz > e - p)
2718 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2719 if (beg > 0)
return 0;
2723 len = str_offset(p, e,
len, enc, 0);
2740str_substr(
VALUE str,
long beg,
long len,
int empty)
2745 if (!p)
return Qnil;
2751 RSTRING(str2)->as.heap.ptr += ofs;
2756 if (!
len && !empty)
return Qnil;
2760 rb_enc_cr_str_copy_for_substr(str2,
str);
2812#define rb_str_dup_frozen rb_str_new_frozen
2834RUBY_FUNC_EXPORTED
VALUE
2847 str_modifiable(
str);
2848 if (STR_SHARED_P(
str)) {
2851 if (
len > (
capa = (
long)str_capacity(
str, termlen)) ||
len < 0) {
2868 independent = str_independent(
str);
2875 if (STR_EMBED_P(
str)) {
2876 if (
len == slen)
return str;
2882 str_make_independent_expand(
str, slen,
len - slen, termlen);
2887 if (slen >
len) slen =
len;
2894 else if (!independent) {
2895 if (
len == slen)
return str;
2896 str_make_independent_expand(
str, slen,
len - slen, termlen);
2904 else if (
len == slen)
return str;
2914 long capa, total, olen, off = -1;
2920 if (
ptr >= sptr &&
ptr <= sptr + olen) {
2924 if (
len == 0)
return 0;
2925 if (STR_EMBED_P(
str)) {
2928 olen = RSTRING_EMBED_LEN(
str);
2943 while (total >
capa) {
2959#define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
2964 if (
len == 0)
return str;
2984 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
2993 if (str_encindex == ptr_encindex) {
3012 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3021 *ptr_cr_ret = ptr_cr;
3023 if (str_encindex != ptr_encindex &&
3032 res_encindex = str_encindex;
3037 res_encindex = str_encindex;
3041 res_encindex = ptr_encindex;
3046 res_encindex = str_encindex;
3053 res_encindex = str_encindex;
3074 return rb_enc_cr_str_buf_cat(
str,
ptr,
len,
3091 unsigned int c = (
unsigned char)*
ptr;
3124#define MIN_PRE_ALLOC_SIZE 48
3147 for (i = s; i <
num; ++i) {
3148 const VALUE v = strary[i];
3181 str_modifiable(
str);
3186 else if (
argc > 1) {
3190 for (i = 0; i <
argc; i++) {
3295 str_modifiable(
str);
3300 else if (
argc > 1) {
3304 for (i = 0; i <
argc; i++) {
3327 const char *ptr1, *ptr2;
3330 return (len1 != len2 ||
3332 memcmp(ptr1, ptr2, len1) != 0);
3350#define lesser(a,b) (((a)>(b))?(b):(a))
3362 if (idx1 == idx2)
return TRUE;
3381 const char *ptr1, *ptr2;
3384 if (str1 == str2)
return 0;
3387 if (ptr1 == ptr2 || (retval =
memcmp(ptr1, ptr2,
lesser(len1, len2))) == 0) {
3396 if (len1 > len2)
return 1;
3399 if (retval > 0)
return 1;
3425 if (str1 == str2)
return Qtrue;
3432 return rb_str_eql_internal(str1, str2);
3453 if (str1 == str2)
return Qtrue;
3455 return rb_str_eql_internal(str1, str2);
3518 return str_casecmp(str1, s);
3526 char *p1, *p1end, *p2, *p2end;
3533 p1 =
RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3534 p2 =
RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3535 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3536 while (p1 < p1end && p2 < p2end) {
3538 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3539 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3541 return INT2FIX(c1 < c2 ? -1 : 1);
3548 while (p1 < p1end && p2 < p2end) {
3552 if (0 <= c1 && 0 <= c2) {
3556 return INT2FIX(c1 < c2 ? -1 : 1);
3562 len = l1 < l2 ? l1 : l2;
3565 return INT2FIX(r < 0 ? -1 : 1);
3567 return INT2FIX(l1 < l2 ? -1 : 1);
3601 return str_casecmp_p(str1, s);
3608 VALUE folded_str1, folded_str2;
3609 VALUE fold_opt = sym_fold;
3616 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3617 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3623strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3626 const char *search_start = str_ptr;
3627 long pos, search_len = str_len -
offset;
3631 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3632 if (pos < 0)
return pos;
3634 if (
t == search_start + pos)
break;
3635 search_len -=
t - search_start;
3636 if (search_len <= 0)
return -1;
3643#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3648 const char *str_ptr, *str_ptr_end, *sub_ptr;
3649 long str_len, sub_len;
3653 if (is_broken_string(
sub))
return -1;
3656 str_ptr_end = RSTRING_END(
str);
3661 if (str_len < sub_len)
return -1;
3664 long str_len_char, sub_len_char;
3665 int single_byte = single_byte_optimizable(
str);
3666 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(
str, enc);
3667 sub_len_char = in_byte ? sub_len : str_strlen(
sub, enc);
3670 if (
offset < 0)
return -1;
3672 if (str_len_char -
offset < sub_len_char)
return -1;
3673 if (!in_byte)
offset = str_offset(str_ptr, str_ptr_end,
offset, enc, single_byte);
3676 if (sub_len == 0)
return offset;
3679 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len,
offset, enc);
3741 if (pos > str_strlen(
str,
NULL))
3761 if (pos == -1)
return Qnil;
3769 char *hit, *adjusted;
3771 long slen, searchlen;
3775 if (slen == 0)
return pos;
3777 e = RSTRING_END(
str);
3780 searchlen = s - sbeg + 1;
3783 hit = memrchr(sbeg, c, searchlen);
3786 if (hit != adjusted) {
3787 searchlen = adjusted - sbeg;
3790 if (
memcmp(hit,
t, slen) == 0)
3792 searchlen = adjusted - sbeg;
3793 }
while (searchlen > 0);
3805 e = RSTRING_END(
str);
3810 if (
memcmp(s,
t, slen) == 0) {
3813 if (pos == 0)
break;
3831 if (is_broken_string(
sub))
return -1;
3832 singlebyte = single_byte_optimizable(
str);
3834 slen = str_strlen(
sub, enc);
3837 if (
len < slen)
return -1;
3838 if (
len - pos < slen) pos =
len - slen;
3839 if (
len == 0)
return pos;
3850 s = str_nth(sbeg, RSTRING_END(
str), pos, enc, singlebyte);
3851 return str_rindex(
str,
sub, s, pos, enc);
3913 long pos,
len = str_strlen(
str, enc);
3926 if (pos >
len) pos =
len;
3935 enc, single_byte_optimizable(
str));
3946 pos = rb_str_rindex(
str,
sub, pos);
3947 if (pos >= 0)
return LONG2NUM(pos);
4072 re = get_pat(
argv[0]);
4106 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4110 ++((
unsigned char*)p)[i];
4118 memset(p+l, 0xff,
len-l);
4124 for (len2 =
len-1; 0 < len2; len2--) {
4129 memset(p+len2+1, 0xff,
len-(len2+1));
4159 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4163 --((
unsigned char*)p)[i];
4171 memset(p+l, 0,
len-l);
4177 for (len2 =
len-1; 0 < len2; len2--) {
4182 memset(p+len2+1, 0,
len-(len2+1));
4197enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4207 const int max_gaps = 1;
4218 for (
try = 0;
try <= max_gaps; ++
try) {
4219 ret = enc_succ_char(p,
len, enc);
4230 ret = enc_pred_char(p,
len, enc);
4254 enc_succ_char(carry,
len, enc);
4319 rb_enc_cr_str_copy_for_substr(
str, orig);
4320 return str_succ(
str);
4327 char *sbeg, *s, *e, *last_alnum = 0;
4328 int found_alnum = 0;
4331 long carry_pos = 0, carry_len = 1;
4335 if (slen == 0)
return str;
4339 s = e = sbeg + slen;
4351 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4362 carry_pos = s - sbeg;
4374 neighbor = enc_succ_char(tmp, l, enc);
4388 enc_succ_char(s, l, enc);
4391 MEMCPY(carry, s,
char, l);
4394 carry_pos = s - sbeg;
4400 s = sbeg + carry_pos;
4401 memmove(s + carry_len, s, slen - carry_pos);
4429all_digits_p(
const char *s,
long len)
4485 VALUE current, after_end;
4493 ascii = (is_ascii_string(
beg) && is_ascii_string(
end));
4499 if (c > e || (excl && c == e))
return beg;
4502 if (!excl && c == e)
break;
4504 if (excl && c == e)
break;
4515 width = RSTRING_LENINT(
beg);
4524 if (excl && bi == ei)
break;
4525 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4530 ID op = excl ?
'<' :
idLE;
4544 if (n > 0 || (excl && n == 0))
return beg;
4552 if ((*each)(current, arg))
break;
4553 if (
NIL_P(next))
break;
4575 int width = RSTRING_LENINT(
beg);
4582 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4598 if ((*each)(current, arg))
break;
4641 if (b <= v && v < e)
return Qtrue;
4642 if (!
RTEST(exclusive) && v == e)
return Qtrue;
4680 else if (RB_TYPE_P(indx,
T_REGEXP)) {
4683 else if (RB_TYPE_P(indx,
T_STRING)) {
4702 return str_substr(
str, idx, 1,
FALSE);
4791 return rb_str_aref(
str,
argv[0]);
4800 str_modifiable(
str);
4801 if (
len > olen)
len = olen;
4829 if (
beg == 0 && vlen == 0) {
4834 str_modify_keep_cr(
str);
4852 if (vlen <
beg &&
len < 0) {
4870 int singlebyte = single_byte_optimizable(
str);
4877 slen = str_strlen(
str, enc);
4879 if ((slen <
beg) || ((
beg < 0) && (
beg + slen < 0))) {
4890 str_modify_keep_cr(
str);
4892 if (!p) p = RSTRING_END(
str);
4893 e = str_nth(p, RSTRING_END(
str),
len, enc, singlebyte);
4894 if (!e) e = RSTRING_END(
str);
4905#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
4921 regs = RMATCH_REGS(
match);
4937 rb_str_splice_0(
str, start,
len, val);
4946 switch (
TYPE(indx)) {
4948 rb_str_subpat_set(
str, indx,
INT2FIX(0), val);
5077 str_modify_keep_cr(
str);
5092 else if (
argc == 2) {
5104 else if (RB_TYPE_P(indx,
T_STRING)) {
5132 rb_enc_cr_str_copy_for_substr(result,
str);
5179get_pat_quoted(
VALUE pat,
int check)
5197 if (check && is_broken_string(pat)) {
5204rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5207 pos = rb_strseq_index(
str, pat, pos, 1);
5208 if (set_backref_str) {
5210 str = rb_str_new_frozen_String(
str);
5257 pat = get_pat_quoted(
argv[0], 1);
5259 str_modifiable(
str);
5260 beg = rb_pat_search(pat,
str, 0, 1);
5271 regs = RMATCH_REGS(
match);
5283 if (iter || !
NIL_P(hash)) {
5293 str_mod_check(
str, p,
len);
5330 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
5414 long beg, beg0, end0;
5416 enum {STR, ITER, MAP}
mode = STR;
5418 int need_backref = -1;
5440 pat = get_pat_quoted(
argv[0], 1);
5441 beg = rb_pat_search(pat,
str, 0, need_backref);
5443 if (bang)
return Qnil;
5459 regs = RMATCH_REGS(
match);
5479 str_mod_check(
str, sp, slen);
5484 else if (need_backref) {
5486 if (need_backref < 0) {
5487 need_backref = val != repl;
5520 rb_pat_search(pat,
str,
last, 1);
5522 str_shared_replace(
str, dest);
5548 str_modify_keep_cr(
str);
5634 str_modifiable(
str);
5635 if (
str == str2)
return str;
5639 return str_replace(
str, str2);
5712 char *head, *left = 0;
5717 if (pos < -
len ||
len <= pos)
5724 unsigned char byte =
NUM2INT(w) & 0xFF;
5726 if (!str_independent(
str))
5727 str_make_independent(
str);
5730 ptr = (
unsigned char *)&head[pos];
5731 if (!STR_EMBED_P(
str)) {
5778 if (!empty)
return Qnil;
5795 str_enc_copy(str2,
str);
5839 return str_byte_substr(
str, idx, 1,
FALSE);
5874 return str_byte_aref(
str,
argv[0]);
5898 p = RSTRING_END(rev);
5902 if (single_byte_optimizable(
str)) {
5930 str_enc_copy(rev,
str);
5948 if (single_byte_optimizable(
str)) {
5951 str_modify_keep_cr(
str);
5953 e = RSTRING_END(
str) - 1;
5961 str_shared_replace(
str, rb_str_reverse(
str));
5965 str_modify_keep_cr(
str);
5991 if (i == -1)
return Qfalse;
6081#define CHAR_ESC_LEN 13
6096 else if (c < 0x10000) {
6120 case '\0':
return "\\0";
6121 case '\n':
return "\\n";
6122 case '\r':
return "\\r";
6123 case '\t':
return "\\t";
6124 case '\f':
return "\\f";
6125 case '\013':
return "\\v";
6126 case '\010':
return "\\b";
6127 case '\007':
return "\\a";
6128 case '\033':
return "\\e";
6129 case '\x7f':
return "\\c?";
6140 const char *pend = RSTRING_END(
str);
6141 const char *prev = p;
6152 if (p > prev) str_buf_cat(result, prev, p - prev);
6155 n = (
int)(pend - p);
6168 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6169 str_buf_cat(result, cc,
strlen(cc));
6175 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6180 if (p > prev) str_buf_cat(result, prev, p - prev);
6203 const char *p, *pend, *prev;
6217 actenc = get_actual_encoding(encidx,
str);
6218 if (actenc != enc) {
6228 if (p > prev) str_buf_cat(result, prev, p - prev);
6231 n = (
int)(pend - p);
6242 if ((asciicompat || unicode_p) &&
6243 (c ==
'"'|| c ==
'\\' ||
6248 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6249 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6251 if (asciicompat || enc == resenc) {
6257 case '\n': cc =
'n';
break;
6258 case '\r': cc =
'r';
break;
6259 case '\t': cc =
't';
break;
6260 case '\f': cc =
'f';
break;
6261 case '\013': cc =
'v';
break;
6262 case '\010': cc =
'b';
break;
6263 case '\007': cc =
'a';
break;
6264 case 033: cc =
'e';
break;
6265 default: cc = 0;
break;
6268 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6271 str_buf_cat(result,
buf, 2);
6280 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6286 if (p > prev) str_buf_cat(result, prev, p - prev);
6292#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6316 const char *p, *pend;
6320 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6331 unsigned char c = *p++;
6334 case '"':
case '\\':
6335 case '\n':
case '\r':
6336 case '\t':
case '\f':
6337 case '\013':
case '\010':
case '\007':
case '\033':
6350 if (
u8 && c > 0x7F) {
6356 else if (cc <= 0xFFFFF)
6381 unsigned char c = *p++;
6383 if (c ==
'"' || c ==
'\\') {
6387 else if (c ==
'#') {
6388 if (
IS_EVSTR(p, pend)) *q++ =
'\\';
6391 else if (c ==
'\n') {
6395 else if (c ==
'\r') {
6399 else if (c ==
'\t') {
6403 else if (c ==
'\f') {
6407 else if (c ==
'\013') {
6411 else if (c ==
'\010') {
6415 else if (c ==
'\007') {
6419 else if (c ==
'\033') {
6458unescape_ascii(
unsigned int c)
6482undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6484 const char *s = *ss;
6488 unsigned char buf[6];
6506 *
buf = unescape_ascii(*s);
6519 if (*penc != enc_utf8) {
6538 if (hexlen == 0 || hexlen > 6) {
6544 if (0xd800 <= c && c <= 0xdfff) {
6557 if (0xd800 <= c && c <= 0xdfff) {
6604 const char *s_end = RSTRING_END(
str);
6608 bool binary =
false;
6612 if (rb_str_is_ascii_only_p(
str) ==
Qfalse) {
6615 if (!str_null_check(
str, &w)) {
6619 if (*s !=
'"')
goto invalid_format;
6637 static const char force_encoding_suffix[] =
".force_encoding(\"";
6638 static const char dup_suffix[] =
".dup";
6639 const char *encname;
6644 size =
sizeof(dup_suffix) - 1;
6647 size =
sizeof(force_encoding_suffix) - 1;
6648 if (s_end - s <=
size)
goto invalid_format;
6649 if (
memcmp(s, force_encoding_suffix,
size) != 0)
goto invalid_format;
6657 s = memchr(s,
'"', s_end-s);
6659 if (!s)
goto invalid_format;
6660 if (s_end - s != 2)
goto invalid_format;
6661 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
6677 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
6686 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
6702 rb_str_check_dummy_enc(enc);
6713 if (
argv[0]==sym_turkic) {
6716 if (
argv[1]==sym_lithuanian)
6722 else if (
argv[0]==sym_lithuanian) {
6725 if (
argv[1]==sym_turkic)
6733 else if (
argv[0]==sym_ascii)
6735 else if (
argv[0]==sym_fold) {
6755#define CASE_MAPPING_ADDITIONAL_LENGTH 20
6756#ifndef CASEMAP_DEBUG
6757# define CASEMAP_DEBUG 0
6769mapping_buffer_free(
void *p)
6773 while (current_buffer) {
6774 previous_buffer = current_buffer;
6775 current_buffer = current_buffer->
next;
6782 {0, mapping_buffer_free,}
6790 const OnigUChar *source_current, *source_end;
6791 int target_length = 0;
6792 VALUE buffer_anchor;
6795 size_t buffer_count = 0;
6796 int buffer_length_or_invalid;
6801 source_end = (
OnigUChar*)RSTRING_END(source);
6805 while (source_current < source_end) {
6809 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE"\n",
capa);
6812 *pre_buffer = current_buffer;
6813 pre_buffer = ¤t_buffer->
next;
6816 buffer_length_or_invalid = enc->
case_map(flags,
6817 (
const OnigUChar**)&source_current, source_end,
6818 current_buffer->
space,
6819 current_buffer->
space+current_buffer->
capa,
6821 if (buffer_length_or_invalid < 0) {
6822 current_buffer =
DATA_PTR(buffer_anchor);
6824 mapping_buffer_free(current_buffer);
6827 target_length += current_buffer->
used = buffer_length_or_invalid;
6830 fprintf(stderr,
"Buffer count is %"PRIuSIZE"\n", buffer_count);
6833 if (buffer_count==1) {
6834 target =
rb_str_new((
const char*)current_buffer->
space, target_length);
6837 char *target_current;
6841 current_buffer =
DATA_PTR(buffer_anchor);
6842 while (current_buffer) {
6844 target_current += current_buffer->
used;
6845 current_buffer = current_buffer->
next;
6848 current_buffer =
DATA_PTR(buffer_anchor);
6850 mapping_buffer_free(current_buffer);
6853 str_enc_copy(target, source);
6862 const OnigUChar *source_current, *source_end;
6865 int length_or_invalid;
6867 if (old_length == 0)
return Qnil;
6870 source_end = (
OnigUChar*)RSTRING_END(source);
6871 if (source == target) {
6872 target_current = (
OnigUChar*)source_current;
6877 target_end = (
OnigUChar*)RSTRING_END(target);
6881 &source_current, source_end,
6882 target_current, target_end, enc);
6883 if (length_or_invalid < 0)
6886 fprintf(stderr,
"problem with rb_str_ascii_casemap"
6887 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
6889 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
6892 str_enc_copy(target, source);
6901 bool modified =
false;
6904 unsigned int c = *(
unsigned char*)s;
6907 *s =
'A' + (c -
'a');
6932 flags = check_case_options(
argc,
argv, flags);
6933 str_modify_keep_cr(
str);
6934 enc = str_true_enc(
str);
6935 if (case_option_single_p(flags, enc,
str)) {
6936 if (upcase_single(
str))
6940 rb_str_ascii_casemap(
str,
str, &flags, enc);
6942 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
6969 flags = check_case_options(
argc,
argv, flags);
6970 enc = str_true_enc(
str);
6971 if (case_option_single_p(flags, enc,
str)) {
6973 str_enc_copy(ret,
str);
6978 rb_str_ascii_casemap(
str, ret, &flags, enc);
6981 ret = rb_str_casemap(
str, &flags, enc);
6991 bool modified =
false;
6994 unsigned int c = *(
unsigned char*)s;
6997 *s =
'a' + (c -
'A');
7023 flags = check_case_options(
argc,
argv, flags);
7024 str_modify_keep_cr(
str);
7025 enc = str_true_enc(
str);
7026 if (case_option_single_p(flags, enc,
str)) {
7027 if (downcase_single(
str))
7031 rb_str_ascii_casemap(
str,
str, &flags, enc);
7033 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
7097 flags = check_case_options(
argc,
argv, flags);
7098 enc = str_true_enc(
str);
7099 if (case_option_single_p(flags, enc,
str)) {
7101 str_enc_copy(ret,
str);
7102 downcase_single(ret);
7106 rb_str_ascii_casemap(
str, ret, &flags, enc);
7109 ret = rb_str_casemap(
str, &flags, enc);
7140 flags = check_case_options(
argc,
argv, flags);
7141 str_modify_keep_cr(
str);
7142 enc = str_true_enc(
str);
7145 rb_str_ascii_casemap(
str,
str, &flags, enc);
7147 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
7176 flags = check_case_options(
argc,
argv, flags);
7177 enc = str_true_enc(
str);
7181 rb_str_ascii_casemap(
str, ret, &flags, enc);
7184 ret = rb_str_casemap(
str, &flags, enc);
7208 flags = check_case_options(
argc,
argv, flags);
7209 str_modify_keep_cr(
str);
7210 enc = str_true_enc(
str);
7212 rb_str_ascii_casemap(
str,
str, &flags, enc);
7214 str_shared_replace(
str, rb_str_casemap(
str, &flags, enc));
7242 flags = check_case_options(
argc,
argv, flags);
7243 enc = str_true_enc(
str);
7247 rb_str_ascii_casemap(
str, ret, &flags, enc);
7250 ret = rb_str_casemap(
str, &flags, enc);
7271 if (
t->p ==
t->pend)
return -1;
7279 if (
t->p <
t->pend) {
7283 if (
t->now < 0x80 && c < 0x80) {
7285 "invalid range \"%c-%c\" in string transliteration",
7301 if (
t->now ==
t->max) {
7306 if (
t->now <
t->max) {
7322 const unsigned int errc = -1;
7323 unsigned int trans[256];
7325 struct tr trsrc, trrepl;
7327 unsigned int c, c0,
last = 0;
7328 int modify = 0, i, l;
7329 unsigned char *s, *send;
7331 int singlebyte = single_byte_optimizable(
str);
7335#define CHECK_IF_ASCII(c) \
7336 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7337 (cr = ENC_CODERANGE_VALID) : 0)
7343 return rb_str_delete_bang(1, &src,
str);
7358 trsrc.p + l < trsrc.pend) {
7364 trsrc.gen = trrepl.gen = 0;
7365 trsrc.now = trrepl.now = 0;
7366 trsrc.max = trrepl.max = 0;
7369 for (i=0; i<256; i++) {
7372 while ((c = trnext(&trsrc, enc)) != errc) {
7381 while ((c = trnext(&trrepl, enc)) != errc)
7384 for (i=0; i<256; i++) {
7385 if (trans[i] != errc) {
7393 for (i=0; i<256; i++) {
7396 while ((c = trnext(&trsrc, enc)) != errc) {
7397 r = trnext(&trrepl, enc);
7398 if (r == errc) r = trrepl.now;
7412 str_modify_keep_cr(
str);
7413 s = (
unsigned char *)
RSTRING_PTR(
str); send = (
unsigned char *)RSTRING_END(
str);
7418 unsigned int save = -1;
7434 if (cflag) c =
last;
7437 else if (cflag) c = errc;
7443 if (c != (
unsigned int)-1) {
7455 if (enc != e1) may_modify = 1;
7464 if (may_modify &&
memcmp(s,
t, tlen) != 0) {
7470 if (!STR_EMBED_P(
str)) {
7481 c = (
unsigned char)*s;
7482 if (trans[c] != errc) {
7513 if (cflag) c =
last;
7516 else if (cflag) c = errc;
7520 c = cflag ?
last : errc;
7528 if (enc != e1) may_modify = 1;
7538 if (may_modify &&
memcmp(s,
t, tlen) != 0) {
7546 if (!STR_EMBED_P(
str)) {
7578 return tr_trans(
str, src, repl, 0);
7621 tr_trans(
str, src, repl, 0);
7625#define TR_TABLE_MAX (UCHAR_MAX+1)
7626#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
7631 const unsigned int errc = -1;
7635 VALUE table = 0, ptable = 0;
7636 int i, l, cflag = 0;
7658 while ((c = trnext(&
tr, enc)) != errc) {
7660 buf[(
unsigned char)c] = !cflag;
7665 if (!table && (first || *tablep || stable[
TR_TABLE_MAX])) {
7683 stable[i] = stable[i] &&
buf[i];
7685 if (!table && !cflag) {
7695 return table[c] != 0;
7727 VALUE del = 0, nodel = 0;
7729 int i, ascompat, cr;
7733 for (i=0; i<
argc; i++) {
7738 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
7741 str_modify_keep_cr(
str);
7744 send = RSTRING_END(
str);
7750 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
7763 if (tr_find(c, squeez, del, nodel)) {
7778 if (modify)
return str;
7819 VALUE del = 0, nodel = 0;
7820 unsigned char *s, *send, *
t;
7822 int ascompat, singlebyte = single_byte_optimizable(
str);
7829 for (i=0; i<
argc; i++) {
7834 if (singlebyte && !single_byte_optimizable(s))
7836 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
7840 str_modify_keep_cr(
str);
7843 send = (
unsigned char *)RSTRING_END(
str);
7849 unsigned int c = *s++;
7850 if (c != save || (
argc > 0 && !squeez[c])) {
7860 if (ascompat && (c = *s) < 0x80) {
7861 if (c != save || (
argc > 0 && !squeez[c])) {
7869 if (c != save || (
argc > 0 && !tr_find(c, squeez, del, nodel))) {
7885 if (modify)
return str;
7925 return tr_trans(
str, src, repl, 1);
7946 tr_trans(
str, src, repl, 1);
7983 VALUE del = 0, nodel = 0, tstr;
7998 !is_broken_string(
str)) {
8005 send = RSTRING_END(
str);
8007 if (*(
unsigned char*)s++ == c) n++;
8013 tr_setup_table(tstr, table,
TRUE, &del, &nodel, enc);
8014 for (i=1; i<
argc; i++) {
8018 tr_setup_table(tstr, table,
FALSE, &del, &nodel, enc);
8023 send = RSTRING_END(
str);
8029 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8038 if (tr_find(c, table, del, nodel)) {
8049rb_fs_check(
VALUE val)
8053 if (
NIL_P(val))
return 0;
8058static const char isspacetable[256] = {
8059 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8060 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8061 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8062 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8063 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8064 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8065 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8066 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8067 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8077#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8082 if (empty_count >= 0 &&
len == 0) {
8083 return empty_count + 1;
8085 if (empty_count > 0) {
8090 }
while (--empty_count > 0);
8095 }
while (--empty_count > 0);
8124 if (
len == 1 &&
ptr[0] ==
' ') {
8134 return default_type;
8199 long beg, end, i = 0, empty_count = -1;
8206 if (lim <= 0) limit =
Qnil;
8207 else if (lim == 1) {
8219 if (
NIL_P(limit) && !lim) empty_count = 0;
8224 spat = get_pat_quoted(spat, 0);
8229 else if (!(spat = rb_fs_check(spat))) {
8239 tmp = RREGEXP_SRC(spat);
8248 mustnot_broken(spat);
8257#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8262 char *eptr = RSTRING_END(
str);
8269 if (is_ascii_string(
str)) {
8270 while (
ptr < eptr) {
8271 c = (
unsigned char)*
ptr++;
8279 if (!
NIL_P(limit) && lim <= i)
break;
8286 if (!
NIL_P(limit)) ++i;
8294 while (
ptr < eptr) {
8300 if (rb_isspace(c)) {
8306 if (!
NIL_P(limit) && lim <= i)
break;
8309 else if (rb_isspace(c)) {
8313 if (!
NIL_P(limit)) ++i;
8322 char *str_start =
ptr;
8323 char *substr_start =
ptr;
8327 mustnot_broken(
str);
8329 while (
ptr < eptr &&
8333 if (
t !=
ptr + end) {
8337 SPLIT_STR(substr_start - str_start, (
ptr+end) - substr_start);
8340 if (!
NIL_P(limit) && lim <= ++i)
break;
8342 beg =
ptr - str_start;
8345 char *str_start =
ptr;
8348 mustnot_broken(
str);
8350 while (
ptr < eptr &&
8354 if (!
NIL_P(limit) && lim <= ++i)
break;
8356 beg =
ptr - str_start;
8370 regs = RMATCH_REGS(
match);
8377 else if (last_null == 1) {
8396 for (idx=1; idx < regs->
num_regs; idx++) {
8397 if (
BEG(idx) == -1)
continue;
8400 if (!
NIL_P(limit) && lim <= ++i)
break;
8408 return result ? result :
str;
8418 return rb_str_split_m(1, &sep,
str);
8421#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8436#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8439chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8464#define rb_rs get_rs()
8471 const char *
ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8472 long pos,
len, rslen;
8478 static ID keywords[1];
8480 keywords[0] = rb_intern_const(
"chomp");
8498 pend = RSTRING_END(
str);
8511 const char *eol =
NULL;
8513 while (subend < pend) {
8519 if (eol == subend)
break;
8521 if (subptr) eol = subend;
8524 if (!subptr) subptr = subend;
8528 }
while (subend < pend);
8531 subend - subptr + (chomp ? 0 : rslen));
8535 subptr = eol =
NULL;
8554 while (subptr < pend) {
8555 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
8559 if (hit != adjusted) {
8563 subend = hit += rslen;
8566 subend = chomp_newline(subptr, subend, enc);
8579 if (subptr != pend) {
8582 pend = chomp_newline(subptr, pend, enc);
8584 else if (pend - subptr >= rslen &&
8585 memcmp(pend - rslen, rsptr, rslen) == 0) {
8652 return rb_str_enumerate_lines(
argc,
argv,
str, 0);
8678 return rb_str_enumerate_lines(
argc,
argv,
str, ary);
8720 return rb_str_enumerate_bytes(
str, 0);
8738 return rb_str_enumerate_bytes(
str, ary);
8761 for (i = 0; i <
len; i += n) {
8767 for (i = 0; i <
len; i += n) {
8798 return rb_str_enumerate_chars(
str, 0);
8816 return rb_str_enumerate_chars(
str, ary);
8828 if (single_byte_optimizable(
str))
8829 return rb_str_enumerate_bytes(
str, ary);
8872 return rb_str_enumerate_codepoints(
str, 0);
8891 return rb_str_enumerate_codepoints(
str, ary);
8903 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
8905 if (!reg_grapheme_cluster) {
8909 size_t source_len =
sizeof(source_ascii) - 1;
8911#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
8912#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
8913#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
8914#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
8915#define CASE_UTF(e) \
8916 case ENCINDEX_UTF_##e: { \
8917 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
8918 source = source_UTF_##e; \
8919 source_len = sizeof(source_UTF_##e); \
8929 int r =
onig_new(®_grapheme_cluster, source, source + source_len,
8934 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
8937 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
8940 return reg_grapheme_cluster;
8946 size_t grapheme_cluster_count = 0;
8955 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
8963 if (
len <= 0)
break;
8964 grapheme_cluster_count++;
8968 return SIZET2NUM(grapheme_cluster_count);
8977 const char *ptr0, *
ptr, *
end;
8980 return rb_str_enumerate_chars(
str, ary);
8984 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
8992 if (
len <= 0)
break;
9019rb_str_each_grapheme_cluster(
VALUE str)
9022 return rb_str_enumerate_grapheme_clusters(
str, 0);
9040 return rb_str_enumerate_grapheme_clusters(
str, ary);
9047 const char *p, *p2, *
beg, *
end;
9051 if (
beg >=
end)
return 0;
9073 str_modify_keep_cr(
str);
9076 len = chopped_length(
str);
9113smart_chomp(
VALUE str,
const char *e,
const char *p)
9132 if (--e > p && *(e-1) ==
'\r') {
9149 char *pp, *e, *rsptr;
9154 if (
len == 0)
return 0;
9157 return smart_chomp(
str, e, p);
9178 while (e > p && *(e-1) ==
'\n') {
9180 if (e > p && *(e-1) ==
'\r')
9186 if (rslen >
len)
return len;
9189 newline = rsptr[rslen-1];
9192 if (newline ==
'\n')
9193 return smart_chomp(
str, e, p);
9197 return smart_chomp(
str, e, p);
9202 if (is_broken_string(rs)) {
9206 if (p[
len-1] == newline &&
9208 memcmp(rsptr, pp, rslen) == 0)) {
9239 long len = chompped_length(
str, rs);
9241 str_modify_keep_cr(
str);
9263 str_modifiable(
str);
9304 const char *
const start = s;
9306 if (!s || s >= e)
return 0;
9309 if (single_byte_optimizable(
str)) {
9317 if (cc && !rb_isspace(cc))
break;
9346 str_modify_keep_cr(
str);
9349 loffset = lstrip_offset(
str, start, start+olen, enc);
9351 long len = olen-loffset;
9352 s = start + loffset;
9355#if !SHARABLE_MIDDLE_SUBSTRING
9393 rb_str_check_dummy_enc(enc);
9394 if (!s || s >= e)
return 0;
9398 if (single_byte_optimizable(
str)) {
9407 if (c && !rb_isspace(c))
break;
9436 str_modify_keep_cr(
str);
9439 roffset = rstrip_offset(
str, start, start+olen, enc);
9441 long len = olen - roffset;
9444#if !SHARABLE_MIDDLE_SUBSTRING
9475 roffset = rstrip_offset(
str, start, start+olen, enc);
9499 long olen, loffset, roffset;
9502 str_modify_keep_cr(
str);
9505 loffset = lstrip_offset(
str, start, start+olen, enc);
9506 roffset = rstrip_offset(
str, start+loffset, start+olen, enc);
9508 if (loffset > 0 || roffset > 0) {
9509 long len = olen-roffset;
9515#if !SHARABLE_MIDDLE_SUBSTRING
9543 long olen, loffset, roffset;
9547 loffset = lstrip_offset(
str, start, start+olen, enc);
9548 roffset = rstrip_offset(
str, start+loffset, start+olen, enc);
9550 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString,
str);
9555scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9560 long end, pos = rb_pat_search(pat,
str, *start, set_backref_str);
9568 regs = RMATCH_REGS(
match);
9579 RSTRING_END(
str), enc);
9586 if (!regs || regs->
num_regs == 1) {
9591 for (i=1; i < regs->
num_regs; i++) {
9641 long last = -1, prev = 0;
9644 pat = get_pat_quoted(pat, 1);
9645 mustnot_broken(
str);
9649 while (!
NIL_P(result = scan_once(
str, pat, &start, 0))) {
9659 while (!
NIL_P(result = scan_once(
str, pat, &start, 1))) {
9663 str_mod_check(
str, p,
len);
9781# define CRYPT_END() ALLOCV_END(databuf)
9783 extern char *
crypt(
const char *,
const char *);
9784# define CRYPT_END() (void)0
9787 const char *s, *saltp;
9790 char salt_8bit_clean[3];
9795 mustnot_wchar(salt);
9802 if (!saltp[0] || !saltp[1])
goto short_salt;
9804 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
9805 salt_8bit_clean[0] = saltp[0] & 0x7f;
9806 salt_8bit_clean[1] = saltp[1] & 0x7f;
9807 salt_8bit_clean[2] =
'\0';
9808 saltp = salt_8bit_clean;
9813# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
9814 data->initialized = 0;
9816 res =
crypt_r(s, saltp, data);
9818 res =
crypt(s, saltp);
9867 char *
ptr, *p, *pend;
9870 unsigned long sum0 = 0;
9885 sum0 += (
unsigned char)*p;
9897 sum0 &= (((
unsigned long)1)<<
bits)-1;
9921 long width,
len, flen = 1, fclen = 1;
9924 const char *
f =
" ";
9925 long n,
size, llen, rlen, llen2 = 0, rlen2 = 0;
9927 int singlebyte = 1, cr;
9939 fclen = str_strlen(pad, enc);
9940 singlebyte = single_byte_optimizable(pad);
9941 if (flen == 0 || fclen == 0) {
9945 len = str_strlen(
str, enc);
9946 if (width < 0 || len >= width)
return str_duplicate(
rb_cString,
str);
9948 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
9952 llen2 = str_offset(
f,
f + flen, llen % fclen, enc, singlebyte);
9953 rlen2 = str_offset(
f,
f + flen, rlen % fclen, enc, singlebyte);
9956 if ((
len = llen / fclen + rlen / fclen) >=
LONG_MAX / flen ||
9965 memset(p, *
f, llen);
9969 while (llen >= fclen) {
9982 memset(p, *
f, rlen);
9986 while (rlen >= fclen) {
10088 sep = get_pat_quoted(sep, 0);
10101 if (pos < 0)
goto failed;
10146 sep = get_pat_quoted(sep, 0);
10159 pos = rb_str_rindex(
str, sep, pos);
10194 for (i=0; i<
argc; i++) {
10231 for (i=0; i<
argc; i++) {
10259 char *strptr, *prefixptr;
10260 long olen, prefixlen;
10263 if (is_broken_string(
prefix))
return 0;
10268 if (prefixlen <= 0)
return 0;
10270 if (olen < prefixlen)
return 0;
10273 if (
memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10293 str_modify_keep_cr(
str);
10295 prefixlen = deleted_prefix_length(
str,
prefix);
10296 if (prefixlen <= 0)
return Qnil;
10316 prefixlen = deleted_prefix_length(
str,
prefix);
10334 char *strptr, *suffixptr, *s;
10335 long olen, suffixlen;
10339 if (is_broken_string(
suffix))
return 0;
10344 if (suffixlen <= 0)
return 0;
10346 if (olen < suffixlen)
return 0;
10349 s = strptr + olen - suffixlen;
10350 if (
memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10370 long olen, suffixlen,
len;
10371 str_modifiable(
str);
10373 suffixlen = deleted_suffix_length(
str,
suffix);
10374 if (suffixlen <= 0)
return Qnil;
10377 str_modify_keep_cr(
str);
10378 len = olen - suffixlen;
10402 suffixlen = deleted_suffix_length(
str,
suffix);
10420 val = rb_fs_check(val);
10423 "value of %"PRIsVALUE" must be String or Regexp",
10443 str_modifiable(
str);
10460 str_replace_shared_without_enc(str2,
str);
10519 static const char ellipsis[] =
"...";
10520 const long ellipsislen =
sizeof(ellipsis) - 1;
10524 VALUE estr, ret = 0;
10531 else if (
len <= ellipsislen ||
10595 return enc_str_scrub(enc,
str, repl, cr);
10603 const char *rep, *p, *e, *p1, *sp;
10616 if (!
NIL_P(repl)) {
10617 repl = str_compat_and_valid(repl, enc);
10625#define DEFAULT_REPLACE_CHAR(str) do { \
10626 static const char replace[sizeof(str)-1] = str; \
10627 rep = replace; replen = (int)sizeof(replace); \
10632 e = RSTRING_END(
str);
10642 else if (!
NIL_P(repl)) {
10657 p = search_nonascii(p, e);
10681 if (e - p < clen) clen = e - p;
10688 for (; clen > 1; clen--) {
10701 str_mod_check(
str, sp, slen);
10702 repl = str_compat_and_valid(repl, enc);
10709 p = search_nonascii(p, e);
10736 str_mod_check(
str, sp, slen);
10737 repl = str_compat_and_valid(repl, enc);
10750 else if (!
NIL_P(repl)) {
10784 if (e - p < clen) clen = e - p;
10785 if (clen <= mbminlen * 2) {
10790 for (; clen > mbminlen; clen-=mbminlen) {
10802 str_mod_check(
str, sp, slen);
10803 repl = str_compat_and_valid(repl, enc);
10829 str_mod_check(
str, sp, slen);
10830 repl = str_compat_and_valid(repl, enc);
10885static ID id_normalize;
10886static ID id_normalized_p;
10887static VALUE mUnicodeNormalize;
10892 static int UnicodeNormalizeRequired = 0;
10895 if (!UnicodeNormalizeRequired) {
10896 rb_require(
"unicode_normalize/normalize.rb");
10897 UnicodeNormalizeRequired = 1;
10929 return unicode_normalize_common(
argc,
argv,
str, id_normalize);
10965 return unicode_normalize_common(
argc,
argv,
str, id_normalized_p);
11008#define sym_equal rb_obj_equal
11011sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11038 if ((resenc != enc && !rb_str_is_ascii_only_p(
sym)) ||
len != (
long)
strlen(
ptr) ||
11059 if ((resenc != enc && !rb_str_is_ascii_only_p(
str)) ||
11649 sym_ascii =
ID2SYM(rb_intern_const(
"ascii"));
11650 sym_turkic =
ID2SYM(rb_intern_const(
"turkic"));
11651 sym_lithuanian =
ID2SYM(rb_intern_const(
"lithuanian"));
11652 sym_fold =
ID2SYM(rb_intern_const(
"fold"));
11745 id_normalize = rb_intern_const(
"normalize");
11746 id_normalized_p = rb_intern_const(
"normalized?");
#define offsetof(p_type, field)
VALUE rb_ary_push(VALUE ary, VALUE item)
VALUE rb_check_array_type(VALUE ary)
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy iff RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ALIAS_FUNCTION(prot, name, args)
#define UNREACHABLE_RETURN
#define rb_category_warn(category,...)
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
int bits(struct state *s, int need)
VALUE rb_invcmp(VALUE x, VALUE y)
Internal header absorbing C compipler differences.
#define OBJ_BUILTIN_TYPE(obj)
char * crypt_r(const char *key, const char *setting, struct crypt_data *data)
Our own, locale independent, character handling routines.
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define range(low, item, hi)
#define RB_DEBUG_COUNTER_INC_IF(type, cond)
#define RB_DEBUG_COUNTER_INC(type)
#define MJIT_FUNC_EXPORTED
#define ENCINDEX_UTF_32BE
#define ENCINDEX_UTF_32LE
#define ENCINDEX_UTF_16BE
#define ENCINDEX_UTF_16LE
#define rb_ascii8bit_encindex()
#define rb_usascii_encindex()
int rb_enc_find_index2(const char *name, long len)
#define rb_utf8_encindex()
#define ENCINDEX_US_ASCII
int rb_enc_dummy_p(rb_encoding *enc)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
int rb_enc_get_index(VALUE obj)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_encoding * rb_utf8_encoding(void)
rb_encoding * rb_enc_check_str(VALUE str1, VALUE str2)
rb_encoding * rb_ascii8bit_encoding(void)
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
rb_encoding * rb_enc_from_index(int index)
rb_encoding * rb_filesystem_encoding(void)
int rb_enc_autoload(rb_encoding *enc)
rb_encoding * rb_default_internal_encoding(void)
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
rb_encoding * rb_enc_get(VALUE obj)
rb_encoding * rb_enc_get_from_index(int index)
int rb_enc_unicode_p(rb_encoding *enc)
void rb_enc_copy(VALUE obj1, VALUE obj2)
int rb_enc_to_index(rb_encoding *enc)
void rb_enc_set_index(VALUE obj, int idx)
rb_encoding * rb_default_external_encoding(void)
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
rb_encoding * rb_locale_encoding(void)
VALUE rb_obj_encoding(VALUE obj)
rb_encoding * rb_to_encoding(VALUE enc)
rb_encoding * rb_usascii_encoding(void)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
VALUE rb_enc_associate_index(VALUE obj, int idx)
int rb_enc_codelen(int c, rb_encoding *enc)
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
char str[HTML_ESCAPE_MAX_LEN+1]
#define RSTRING_LEN(string)
#define RSTRING_PTR(string)
void ruby_xfree(void *x)
Deallocates a storage instance.
int rb_objspace_garbage_object_p(VALUE obj)
void rb_gc_force_recycle(VALUE obj)
void rb_gc_register_address(VALUE *addr)
Inform the garbage collector that valptr points to a live Ruby object that should not be moved.
#define rb_intern_str(string)
void rb_include_module(VALUE klass, VALUE module)
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
void rb_undef_method(VALUE klass, const char *name)
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
void rb_syserr_fail(int e, const char *mesg)
void rb_raise(VALUE exc, const char *fmt,...)
void rb_warn_deprecated_to_remove(const char *fmt, const char *removal,...)
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
void rb_fatal(const char *fmt,...)
void rb_warn_deprecated(const char *fmt, const char *suggest,...)
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
VALUE rb_cObject
Object class.
VALUE rb_any_to_s(VALUE)
Default implementation of #to_s.
VALUE rb_obj_alloc(VALUE)
Allocates an instance of klass.
VALUE rb_obj_frozen_p(VALUE)
double rb_str_to_dbl(VALUE, int)
Parses a string representation of a floating point number.
VALUE rb_obj_class(VALUE)
VALUE rb_check_convert_type_with_id(VALUE, int, const char *, ID)
VALUE rb_convert_type_with_id(VALUE v, int t, const char *nam, ID mid)
VALUE rb_equal(VALUE, VALUE)
This function is an optimized version of calling #==.
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
VALUE rb_str_escape(VALUE str)
VALUE rb_to_int(VALUE)
Converts val into Integer.
unsigned char suffix[65536]
unsigned char match[65280+2]
unsigned short prefix[65536]
void skip(file *in, unsigned n)
VALUE rb_check_hash_type(VALUE hash)
VALUE rb_hash_aref(VALUE hash, VALUE key)
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
VALUE rb_hash_lookup(VALUE hash, VALUE key)
#define ENCODING_SET_INLINED(obj, i)
#define ENC_CODERANGE_7BIT
#define ENC_CODERANGE_VALID
#define rb_enc_left_char_head(s, p, e, enc)
#define rb_enc_mbcput(c, buf, enc)
#define ENC_CODERANGE_CLEAN_P(cr)
#define rb_enc_isctype(c, t, enc)
#define ENC_CODERANGE_AND(a, b)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
@ econv_destination_buffer_full
#define rb_enc_step_back(s, p, e, n, enc)
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
#define rb_enc_prev_char(s, p, e, enc)
int rb_enc_symname2_p(const char *, long, rb_encoding *)
#define ENC_CODERANGE(obj)
#define ENC_CODERANGE_UNKNOWN
#define rb_enc_isascii(c, enc)
#define rb_enc_mbmaxlen(enc)
#define ENCODING_GET(obj)
#define ENC_CODERANGE_MASK
#define rb_enc_mbc_to_codepoint(p, e, enc)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
#define MBCLEN_CHARFOUND_LEN(ret)
#define rb_enc_asciicompat(enc)
#define rb_enc_codepoint(p, e, enc)
VALUE rb_enc_sprintf(rb_encoding *, const char *,...)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
#define ENCODING_INLINE_MAX
#define MBCLEN_INVALID_P(ret)
#define rb_enc_code_to_mbclen(c, enc)
#define rb_enc_isprint(c, enc)
#define MBCLEN_NEEDMORE_P(ret)
#define rb_enc_mbminlen(enc)
#define ENC_CODERANGE_BROKEN
#define MBCLEN_CHARFOUND_P(ret)
void rb_econv_close(rb_econv_t *ec)
#define rb_enc_right_char_head(s, p, e, enc)
#define ENCODING_GET_INLINED(obj)
#define ENC_CODERANGE_CLEAR(obj)
#define ENCODING_IS_ASCII8BIT(obj)
#define ENC_CODERANGE_SET(obj, cr)
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
#define rb_enc_is_newline(p, end, enc)
Thin wrapper to ruby/config.h.
#define UNALIGNED_WORD_ACCESS
@ RB_WARN_CATEGORY_DEPRECATED
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE, ID, int, const VALUE *, VALUE, int)
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
#define RETURN_ENUMERATOR(obj, argc, argv)
#define UNLIMITED_ARGUMENTS
void rb_error_arity(int, int, int)
VALUE rb_backref_get(void)
VALUE rb_sym_all_symbols(void)
void rb_backref_set(VALUE)
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
int rb_reg_backref_number(VALUE match, VALUE backref)
void rb_match_busy(VALUE)
int rb_reg_options(VALUE)
VALUE rb_reg_match(VALUE, VALUE)
VALUE rb_reg_nth_match(int, VALUE)
#define rb_utf8_str_new_cstr(str)
st_index_t rb_memhash(const void *ptr, long len)
#define rb_str_new(str, len)
#define rb_usascii_str_new(str, len)
#define rb_str_buf_new_cstr(str)
#define rb_usascii_str_new_cstr(str)
rb_gvar_setter_t rb_str_setter
#define rb_external_str_new_cstr(str)
#define rb_strlen_lit(str)
VALUE rb_str_intern(VALUE)
#define rb_locale_str_new_cstr(str)
VALUE rb_str_locktmp(VALUE)
#define rb_str_cat_cstr(buf, str)
#define rb_utf8_str_new(str, len)
#define rb_tainted_str_new_cstr(str)
#define rb_str_new_cstr(str)
int rb_respond_to(VALUE, ID)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
void rb_undef_alloc_func(VALUE)
ID rb_intern(const char *)
void * memmove(void *, const void *, size_t)
char * crypt(const char *, const char *)
VALUE rb_reg_regcomp(VALUE)
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
long rb_reg_search(VALUE, VALUE, long, int)
#define scan_hex(s, l, e)
Internal header for Array.
Internal header for Comparable.
Internal header for Encoding.
#define rb_enc_autoload_p(enc)
#define SIZED_REALLOC_N(v, T, m, n)
#define RB_EC_NEWOBJ_OF(ec, var, T, c, f)
Internal header for Numeric.
VALUE rb_int_and(VALUE x, VALUE y)
int rb_num_to_uint(VALUE val, unsigned int *ret)
Internal header for Object.
Internal header for Proc.
VALUE rb_sym_to_proc(VALUE sym)
Internal header for Regexp.
void rb_backref_set_string(VALUE string, long pos, long len)
VALUE rb_reg_check_preprocess(VALUE)
long rb_reg_search0(VALUE, VALUE, long, int, int)
VALUE rb_reg_match_p(VALUE re, VALUE str, long pos)
bool rb_reg_start_with_p(VALUE re, VALUE str)
void rb_match_unbusy(VALUE)
Internal header for String.
#define rb_fstring_lit(str)
st_table * rb_vm_fstring_table(void)
#define RUBY_DTRACE_CREATE_HOOK(name, arg)
#define rb_fstring_cstr(...)
typedef long(ZCALLBACK *tell_file_func) OF((voidpf opaque
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
Internal header for Math.
int memcmp(const void *s1, const void *s2, size_t len)
#define MEMCPY(p1, p2, type, n)
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
#define ONIGENC_CTYPE_DIGIT
ONIG_EXTERN int onig_error_code_to_str(OnigUChar *s, OnigPosition err_code,...)
#define ONIGENC_CASE_ASCII_ONLY
unsigned int OnigCaseFoldType
#define ONIG_MAX_ERROR_MESSAGE_LEN
ONIG_EXTERN int onig_new(OnigRegex *, const OnigUChar *pattern, const OnigUChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
#define ONIGENC_CASE_MODIFIED
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)
#define ONIGENC_CTYPE_ALPHA
#define ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_MBCLEN_CHARFOUND_P(r)
#define ONIGENC_CASE_UPCASE
#define ONIGENC_CASE_FOLD
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
#define ONIGENC_CASE_DOWNCASE
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
ONIG_EXTERN int onigenc_ascii_only_case_map(OnigCaseFoldType *flagP, const OnigUChar **pp, const OnigUChar *end, OnigUChar *to, OnigUChar *to_end, const struct OnigEncodingTypeST *enc)
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
#define ONIGENC_CASE_FOLD_TURKISH_AZERI
#define ONIGENC_CASE_TITLECASE
#define ONIGENC_CASE_FOLD_LITHUANIAN
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define ONIG_OPTION_DEFAULT
#define RARRAY_CONST_PTR(s)
#define RGENGC_WB_PROTECTED_STRING
#define RB_OBJ_WRITE(a, slot, b)
WB for new reference from ‘a’ to ‘b’.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
#define RSTRING_EMBED_LEN_MAX
#define StringValueCStr(v)
#define RSTRING_EMBED_LEN_MASK
#define TypedData_Wrap_Struct(klass, data_type, sval)
VALUE rb_require(const char *)
#define RB_INTEGER_TYPE_P(obj)
Internal header for ASAN / MSAN / etc.
VALUE rb_str_format(int, const VALUE *, VALUE)
size_t strlen(const char *)
VALUE rb_str_to_interned_str(VALUE str)
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc)
VALUE rb_str_initialize(VALUE str, const char *ptr, long len, rb_encoding *enc)
#define STR_SET_LEN(str, n)
#define STR_EMBEDDABLE_P(len, termlen)
int rb_str_symname_p(VALUE sym)
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
VALUE rb_str_include_range_p(VALUE beg, VALUE end, VALUE val, VALUE exclusive)
void rb_str_free(VALUE str)
VALUE rb_str_times(VALUE str, VALUE times)
#define SHARABLE_SUBSTRING_P(beg, len, end)
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
#define STR_HEAP_PTR(str)
VALUE rb_str_new_shared(VALUE str)
const char * ruby_escaped_char(int c)
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
VALUE rb_str_new_frozen(VALUE orig)
VALUE rb_str_eql(VALUE str1, VALUE str2)
#define aligned_ptr(value)
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
#define STR_SET_NOEMBED(str)
#define STR_SET_SHARED(str, shared_str)
VALUE rb_str_buf_append(VALUE str, VALUE str2)
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
VALUE rb_filesystem_str_new(const char *ptr, long len)
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
#define rb_str_splice(str, beg, len, val)
VALUE rb_str_export(VALUE str)
#define DEFAULT_REPLACE_CHAR(str)
char * rb_string_value_cstr(volatile VALUE *ptr)
VALUE rb_sym_to_s(VALUE sym)
VALUE rb_sym_proc_call(ID mid, int argc, const VALUE *argv, int kw_splat, VALUE passed_proc)
void rb_str_shared_replace(VALUE str, VALUE str2)
VALUE rb_external_str_new(const char *ptr, long len)
VALUE rb_str_tmp_new(long len)
char * rb_str_fill_terminator(VALUE str, const int newminlen)
long rb_str_offset(VALUE str, long pos)
char * rb_str_subpos(VALUE str, long beg, long *lenp)
VALUE rb_str_succ(VALUE orig)
#define CASE_MAPPING_ADDITIONAL_LENGTH
int rb_str_hash_cmp(VALUE str1, VALUE str2)
VALUE rb_str_subseq(VALUE str, long beg, long len)
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
#define RUBY_MAX_CHAR_LEN
VALUE rb_str_new_static(const char *ptr, long len)
*_str_new_static functions are intended for C string literals.
int rb_enc_str_coderange(VALUE str)
VALUE rb_str_chomp_string(VALUE str, VALUE rs)
#define ENUM_ELEM(ary, e)
VALUE rb_str_upto_each(VALUE beg, VALUE end, int excl, int(*each)(VALUE, VALUE), VALUE arg)
size_t rb_str_capacity(VALUE str)
VALUE rb_str_cat_conv_enc_opts(VALUE newstr, long ofs, const char *ptr, long len, rb_encoding *from, int ecflags, VALUE ecopts)
#define STR_SET_EMBED(str)
const struct st_hash_type rb_fstring_hash_type
#define BARE_STRING_P(str)
VALUE rb_str_dup(VALUE str)
void rb_str_modify(VALUE str)
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
VALUE rb_str_to_str(VALUE str)
st_index_t rb_str_hash(VALUE str)
VALUE rb_fstring_enc_new(const char *ptr, long len, rb_encoding *enc)
long rb_str_strlen(VALUE str)
VALUE rb_str_resurrect(VALUE str)
VALUE rb_str_quote_unprintable(VALUE str)
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc)
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
VALUE rb_str_opt_plus(VALUE str1, VALUE str2)
#define WANTARRAY(m, size)
VALUE rb_interned_str_cstr(const char *ptr)
VALUE rb_filesystem_str_new_cstr(const char *ptr)
#define rb_str_index(str, sub, offset)
size_t rb_str_memsize(VALUE str)
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
VALUE rb_str_plus(VALUE str1, VALUE str2)
long rb_str_sublen(VALUE str, long pos)
VALUE rb_str_equal(VALUE str1, VALUE str2)
VALUE rb_str_tmp_frozen_acquire(VALUE orig)
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
VALUE rb_str_concat_literals(size_t num, const VALUE *strary)
VALUE rb_str_replace(VALUE str, VALUE str2)
VALUE rb_check_string_type(VALUE str)
void rb_str_set_len(VALUE str, long len)
VALUE rb_str_export_locale(VALUE str)
VALUE rb_str_inspect(VALUE str)
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
VALUE rb_interned_str(const char *ptr, long len)
void rb_str_make_independent(VALUE str)
#define CHECK_IF_ASCII(c)
VALUE rb_tainted_str_new(const char *ptr, long len)
VALUE rb_str_length(VALUE str)
int rb_str_comparable(VALUE str1, VALUE str2)
#define str_buf_cat2(str, ptr)
VALUE rb_obj_as_string_result(VALUE str, VALUE obj)
#define MIN_PRE_ALLOC_SIZE
VALUE rb_str_append(VALUE str, VALUE str2)
#define RESIZE_CAPA_TERM(str, capacity, termlen)
VALUE rb_fstring_new(const char *ptr, long len)
VALUE rb_str_freeze(VALUE str)
VALUE rb_string_value(volatile VALUE *ptr)
void rb_str_modify_expand(VALUE str, long expand)
#define RESIZE_CAPA(str, capacity)
VALUE rb_str_scrub(VALUE str, VALUE repl)
int rb_enc_str_asciionly_p(VALUE str)
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
#define STR_SET_EMBED_LEN(str, n)
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
#define rb_str_dup_frozen
int rb_str_cmp(VALUE str1, VALUE str2)
VALUE rb_to_symbol(VALUE name)
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
void rb_str_update(VALUE str, long beg, long len, VALUE val)
char * rb_str_to_cstr(VALUE str)
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
VALUE rb_str_substr(VALUE str, long beg, long len)
VALUE rb_str_unlocktmp(VALUE str)
VALUE rb_id_quote_unprintable(ID id)
VALUE rb_str_upto_endless_each(VALUE beg, int(*each)(VALUE, VALUE), VALUE arg)
VALUE rb_str_resize(VALUE str, long len)
void rb_must_asciicompat(VALUE str)
VALUE rb_utf8_str_new_static(const char *ptr, long len)
VALUE rb_str_split(VALUE str, const char *sep0)
char * rb_string_value_ptr(volatile VALUE *ptr)
VALUE rb_str_dump(VALUE str)
VALUE rb_str_concat(VALUE str1, VALUE str2)
void rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
VALUE rb_locale_str_new(const char *ptr, long len)
VALUE rb_str_buf_new(long capa)
#define SPLIT_STR(beg, len)
#define STR_HEAP_SIZE(str)
VALUE rb_str_drop_bytes(VALUE str, long len)
int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc)
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
#define TERM_FILL(ptr, termlen)
VALUE rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str)
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
VALUE rb_fstring(VALUE str)
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
VALUE rb_usascii_str_new_static(const char *ptr, long len)
VALUE rb_obj_as_string(VALUE obj)
int(* case_map)(OnigCaseFoldType *flagP, const OnigUChar **pp, const OnigUChar *end, OnigUChar *to, OnigUChar *to_end, const struct OnigEncodingTypeST *enc)
union RString::@100::@101::@102 aux
struct RString::@100::@101 heap
OnigUChar space[FLEX_ARY_LEN]
struct mapping_buffer * next
Internal header for Encoding::Converter.
VALUE rb_cEncodingConverter
#define RB_VM_LOCK_ENTER()
#define RB_VM_LOCK_LEAVE()