27#define ENABLE_ECONV_NEWLINE_OPTION 1
30static VALUE rb_eUndefinedConversionError;
31static VALUE rb_eInvalidByteSequenceError;
32static VALUE rb_eConverterNotFoundError;
36static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
37static VALUE sym_xml, sym_text, sym_attr;
38static VALUE sym_universal_newline;
39static VALUE sym_crlf_newline;
40static VALUE sym_cr_newline;
41#ifdef ENABLE_ECONV_NEWLINE_OPTION
42static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
44static VALUE sym_partial_input;
46static VALUE sym_invalid_byte_sequence;
47static VALUE sym_undefined_conversion;
48static VALUE sym_destination_buffer_full;
49static VALUE sym_source_buffer_empty;
50static VALUE sym_finished;
51static VALUE sym_after_output;
52static VALUE sym_incomplete_input;
55allocate_converted_string(
const char *sname,
const char *dname,
56 const unsigned char *
str,
size_t len,
57 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
89 char ary[
sizeof(double) >
sizeof(
void*) ?
sizeof(double) :
sizeof(
void*)];
93#define TRANSCODING_READBUF(tc) \
94 ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
97#define TRANSCODING_WRITEBUF(tc) \
98 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
99 (tc)->writebuf.ary : \
101#define TRANSCODING_WRITEBUF_SIZE(tc) \
102 ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
103 sizeof((tc)->writebuf.ary) : \
104 (size_t)(tc)->transcoder->max_output)
105#define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
106#define TRANSCODING_STATE(tc) \
107 ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
163#define DECORATOR_P(sname, dname) (*(sname) == '\0')
175make_transcoder_entry(
const char *sname,
const char *dname)
187 entry->
sname = sname;
188 entry->
dname = dname;
198get_transcoder_entry(
const char *sname,
const char *dname)
216 const char *
const sname =
tr->src_encoding;
217 const char *
const dname =
tr->dst_encoding;
221 entry = make_transcoder_entry(sname, dname);
231declare_transcoder(
const char *sname,
const char *dname,
const char *lib)
235 entry = make_transcoder_entry(sname, dname);
239static const char transcoder_lib_prefix[] =
"enc/trans/";
247 declare_transcoder(enc1, enc2, lib);
250#define encoding_equal(enc1, enc2) (STRCASECMP((enc1), (enc2)) == 0)
267 const char *dname = (
const char *)
key;
286transcode_search_path(
const char *sname,
const char *dname,
287 void (*callback)(
const char *sname,
const char *dname,
int depth,
void *arg),
344 const char *enc = dname;
352 enc = (
const char *)val;
360 callback((
const char *)val, enc, --depth, arg);
361 enc = (
const char *)val;
377 const char *
const lib = entry->
lib;
379 const size_t total_len =
sizeof(transcoder_lib_prefix) - 1 +
len;
383 memcpy(path, transcoder_lib_prefix,
sizeof(transcoder_lib_prefix) - 1);
384 memcpy(path +
sizeof(transcoder_lib_prefix) - 1, lib,
len);
397get_replacement_character(
const char *encname,
size_t *len_ret,
const char **repl_encname_ptr)
401 *repl_encname_ptr =
"UTF-8";
402 return "\xEF\xBF\xBD";
406 *repl_encname_ptr =
"US-ASCII";
415static const unsigned char *
417 const unsigned char *in_start,
418 const unsigned char *inchar_start,
419 const unsigned char *in_p,
420 size_t *char_len_ptr)
422 const unsigned char *
ptr;
423 if (inchar_start - in_start < tc->recognized_len) {
425 inchar_start,
unsigned char, in_p - inchar_start);
436transcode_restartable0(
const unsigned char **in_pos,
unsigned char **out_pos,
437 const unsigned char *in_stop,
unsigned char *out_stop,
442 int unitlen =
tr->input_unit_length;
443 ssize_t readagain_len = 0;
445 const unsigned char *inchar_start;
446 const unsigned char *in_p;
448 unsigned char *out_p;
450 in_p = inchar_start = *in_pos;
454#define SUSPEND(ret, num) \
456 tc->resume_position = (num); \
457 if (0 < in_p - inchar_start) \
458 MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
459 inchar_start, unsigned char, in_p - inchar_start); \
462 tc->recognized_len += in_p - inchar_start; \
463 if (readagain_len) { \
464 tc->recognized_len -= readagain_len; \
465 tc->readagain_len = readagain_len; \
468 resume_label ## num:; \
470#define SUSPEND_OBUF(num) \
472 while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
475#define SUSPEND_AFTER_OUTPUT(num) \
476 if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
477 SUSPEND(econv_after_output, num); \
480#define next_table (tc->next_table)
481#define next_info (tc->next_info)
482#define next_byte (tc->next_byte)
483#define writebuf_len (tc->writebuf_len)
484#define writebuf_off (tc->writebuf_off)
488 case 1:
goto resume_label1;
489 case 2:
goto resume_label2;
490 case 3:
goto resume_label3;
491 case 4:
goto resume_label4;
492 case 5:
goto resume_label5;
493 case 6:
goto resume_label6;
494 case 7:
goto resume_label7;
495 case 8:
goto resume_label8;
496 case 9:
goto resume_label9;
497 case 10:
goto resume_label10;
498 case 11:
goto resume_label11;
499 case 12:
goto resume_label12;
500 case 13:
goto resume_label13;
501 case 14:
goto resume_label14;
502 case 15:
goto resume_label15;
503 case 16:
goto resume_label16;
504 case 17:
goto resume_label17;
505 case 18:
goto resume_label18;
506 case 19:
goto resume_label19;
507 case 20:
goto resume_label20;
508 case 21:
goto resume_label21;
509 case 22:
goto resume_label22;
510 case 23:
goto resume_label23;
511 case 24:
goto resume_label24;
512 case 25:
goto resume_label25;
513 case 26:
goto resume_label26;
514 case 27:
goto resume_label27;
515 case 28:
goto resume_label28;
516 case 29:
goto resume_label29;
517 case 30:
goto resume_label30;
518 case 31:
goto resume_label31;
519 case 32:
goto resume_label32;
520 case 33:
goto resume_label33;
521 case 34:
goto resume_label34;
531 if (in_stop <= in_p) {
538#define BYTE_ADDR(index) (tr->byte_array + (index))
539#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
540#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
541#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
542#define BL_MIN_BYTE (BL_BASE[0])
543#define BL_MAX_BYTE (BL_BASE[1])
544#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
545#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
558 const unsigned char *p = inchar_start;
571 case 0x00:
case 0x04:
case 0x08:
case 0x0C:
572 case 0x10:
case 0x14:
case 0x18:
case 0x1C:
574 while (in_p >= in_stop) {
620 const unsigned char *char_start;
622 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
628 if (
tr->max_output <= out_stop - out_p)
644 const unsigned char *char_start;
647 if (
tr->max_output <= out_stop - out_p) {
648 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
650 char_start, (
size_t)char_len,
651 out_p, out_stop - out_p);
654 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
656 char_start, (
size_t)char_len,
668 const unsigned char *char_start;
671 if (
tr->max_output <= out_stop - out_p) {
672 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
675 out_p, out_stop - out_p);
678 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
709 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
710 readagain_len = invalid_len - discard_len;
734 if (
tr->finish_func) {
736 if (
tr->max_output <= out_stop - out_p) {
738 out_p, out_stop - out_p);
761transcode_restartable(
const unsigned char **in_pos,
unsigned char **out_pos,
762 const unsigned char *in_stop,
unsigned char *out_stop,
768 const unsigned char *readagain_pos = readagain_buf;
769 const unsigned char *readagain_stop = readagain_buf + tc->
readagain_len;
775 res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|
ECONV_PARTIAL_INPUT);
778 readagain_pos,
unsigned char, readagain_stop - readagain_pos);
783 return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
794 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
796 if (
tr->state_init_func) {
815 const unsigned char **input_ptr,
const unsigned char *input_stop,
816 unsigned char **output_ptr,
unsigned char *output_stop,
819 return transcode_restartable(
820 input_ptr, output_ptr,
821 input_stop, output_stop,
829 if (
tr->state_fini_func) {
832 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
847 if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
860rb_econv_alloc(
int n_hint)
914 ec->
elems[i].
tc = rb_transcoding_open_by_transcoder(
tr, 0);
942 for (i = 0; i < n; i++) {
944 tr = load_transcoder_entry(entries[i]);
949 ec = rb_econv_alloc(n);
951 for (i = 0; i < n; i++) {
953 ret = rb_econv_add_transcoder_at(ec,
tr, ec->
num_trans);
969trans_open_i(
const char *sname,
const char *dname,
int depth,
void *arg)
976 toarg->
entries[depth] = get_transcoder_entry(sname, dname);
980rb_econv_open0(
const char *sname,
const char *dname,
int ecflags)
991 if (*sname ==
'\0' && *dname ==
'\0') {
999 toarg.num_additional = 0;
1000 num_trans = transcode_search_path(sname, dname, trans_open_i, (
void *)&toarg);
1002 if (num_trans < 0) {
1008 ec = rb_econv_open_by_transcoder_entries(num_trans,
entries);
1013 ec->
flags = ecflags;
1020#define MAX_ECFLAGS_DECORATORS 32
1023decorator_names(
int ecflags,
const char **decorators_ret)
1044 decorators_ret[num_decorators++] =
"xml_text_escape";
1046 decorators_ret[num_decorators++] =
"xml_attr_content_escape";
1048 decorators_ret[num_decorators++] =
"xml_attr_quote";
1051 decorators_ret[num_decorators++] =
"crlf_newline";
1053 decorators_ret[num_decorators++] =
"cr_newline";
1055 decorators_ret[num_decorators++] =
"universal_newline";
1057 return num_decorators;
1068 num_decorators = decorator_names(ecflags, decorators);
1069 if (num_decorators == -1)
1076 for (i = 0; i < num_decorators; i++)
1082 ec->
flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
1089 const unsigned char **input_ptr,
const unsigned char *input_stop,
1090 unsigned char **output_ptr,
unsigned char *output_stop,
1097 const unsigned char **ipp, *is, *iold;
1098 unsigned char **opp, *os, *oold;
1104 for (i = start; i < ec->
num_trans; i++) {
1138 flags &= ~ECONV_AFTER_OUTPUT;
1141 f &= ~ECONV_AFTER_OUTPUT;
1144 te->
last_result = res = rb_transcoding_convert(te->
tc, ipp, is, opp, os,
f);
1145 if (iold != *ipp || oold != *opp)
1170 const unsigned char **input_ptr,
const unsigned char *input_stop,
1171 unsigned char **output_ptr,
unsigned char *output_stop,
1173 int *result_position_ptr)
1176 int needreport_index;
1179 unsigned char empty_buf;
1180 unsigned char *empty_ptr = &empty_buf;
1183 input_ptr = (
const unsigned char **)&empty_ptr;
1184 input_stop = empty_ptr;
1188 output_ptr = &empty_ptr;
1189 output_stop = empty_ptr;
1195 for (i = ec->
num_trans-1; 0 <= i; i--) {
1203 goto found_needreport;
1210 rb_bug(
"unexpected transcode last result");
1220 res = rb_trans_conv(ec,
NULL,
NULL, output_ptr, output_stop,
1222 result_position_ptr);
1234 needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
1235 sweep_start = needreport_index + 1;
1236 }
while (needreport_index != -1 && needreport_index != ec->
num_trans-1);
1238 for (i = ec->
num_trans-1; 0 <= i; i--) {
1247 if (result_position_ptr)
1248 *result_position_ptr = i;
1252 if (result_position_ptr)
1253 *result_position_ptr = -1;
1259 const unsigned char **input_ptr,
const unsigned char *input_stop,
1260 unsigned char **output_ptr,
unsigned char *output_stop,
1264 int result_position;
1272 if (output_stop - *output_ptr < ec->in_data_end - ec->
in_data_start) {
1273 len = output_stop - *output_ptr;
1275 *output_ptr = output_stop;
1289 if (output_stop - *output_ptr < input_stop - *input_ptr) {
1290 len = output_stop - *output_ptr;
1293 len = input_stop - *input_ptr;
1296 *(*output_ptr)++ = *(*input_ptr)++;
1303 if (*input_ptr != input_stop)
1315 if (data_start != data_end) {
1317 if (output_stop - *output_ptr < data_end - data_start) {
1318 len = output_stop - *output_ptr;
1320 *output_ptr = output_stop;
1325 len = data_end - data_start;
1345 *input_ptr != input_stop) {
1346 input_stop = *input_ptr;
1347 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1353 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1358 res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
1379static int output_replacement_character(
rb_econv_t *ec);
1385 unsigned char utfbuf[1024];
1386 const unsigned char *utf;
1388 int utf_allocated = 0;
1389 char charef_buf[16];
1390 const unsigned char *p;
1399 utfbuf,
sizeof(utfbuf),
1407 if (utf_len % 4 != 0)
1411 while (4 <= utf_len) {
1417 snprintf(charef_buf,
sizeof(charef_buf),
"&#x%X;", u);
1439 const unsigned char **input_ptr,
const unsigned char *input_stop,
1440 unsigned char **output_ptr,
unsigned char *output_stop,
1445 unsigned char empty_buf;
1446 unsigned char *empty_ptr = &empty_buf;
1451 input_ptr = (
const unsigned char **)&empty_ptr;
1452 input_stop = empty_ptr;
1456 output_ptr = &empty_ptr;
1457 output_stop = empty_ptr;
1461 ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
1469 if (output_replacement_character(ec) == 0)
1480 if (output_replacement_character(ec) == 0)
1485 if (output_hex_charref(ec) == 0)
1506 return tr->src_encoding;
1507 return tr->dst_encoding;
1510static unsigned char *
1511allocate_converted_string(
const char *sname,
const char *dname,
1512 const unsigned char *
str,
size_t len,
1513 unsigned char *caller_dst_buf,
size_t caller_dst_bufsize,
1514 size_t *dst_len_ptr)
1516 unsigned char *dst_str;
1523 const unsigned char *sp;
1527 dst_bufsize = caller_dst_bufsize;
1537 dst_str = caller_dst_buf;
1539 dst_str =
xmalloc(dst_bufsize);
1542 dp = dst_str+dst_len;
1544 dst_len =
dp - dst_str;
1550 if (dst_str == caller_dst_buf) {
1553 memcpy(tmp, dst_str, dst_bufsize/2);
1557 dst_str =
xrealloc(dst_str, dst_bufsize);
1559 dp = dst_str+dst_len;
1561 dst_len =
dp - dst_str;
1567 *dst_len_ptr = dst_len;
1571 if (dst_str != caller_dst_buf)
1580 const unsigned char *
str,
size_t len,
const char *str_encoding)
1583 unsigned char insert_buf[4096];
1584 const unsigned char *insert_str =
NULL;
1587 int last_trans_index;
1590 unsigned char **buf_start_p;
1591 unsigned char **data_start_p;
1592 unsigned char **data_end_p;
1593 unsigned char **buf_end_p;
1607 insert_str = allocate_converted_string(str_encoding, insert_encoding,
1608 str,
len, insert_buf,
sizeof(insert_buf), &insert_len);
1609 if (insert_str ==
NULL)
1624 tc = ec->
elems[last_trans_index].
tc;
1626 if (need < insert_len)
1628 if (last_trans_index == 0) {
1648 tc = ec->
elems[last_trans_index].
tc;
1651 if (*buf_start_p ==
NULL) {
1654 *data_start_p =
buf;
1656 *buf_end_p =
buf+need;
1658 else if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1659 MEMMOVE(*buf_start_p, *data_start_p,
unsigned char, *data_end_p - *data_start_p);
1660 *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
1661 *data_start_p = *buf_start_p;
1662 if ((
size_t)(*buf_end_p - *data_end_p) < need) {
1664 size_t s = (*data_end_p - *buf_start_p) + need;
1668 *data_start_p =
buf;
1669 *data_end_p =
buf + (*data_end_p - *buf_start_p);
1671 *buf_end_p =
buf + s;
1675 memcpy(*data_end_p, insert_str, insert_len);
1676 *data_end_p += insert_len;
1683 if (insert_str !=
str && insert_str != insert_buf)
1684 xfree((
void*)insert_str);
1688 if (insert_str !=
str && insert_str != insert_buf)
1689 xfree((
void*)insert_str);
1702 rb_transcoding_close(ec->
elems[i].
tc);
1738#if SIZEOF_SIZE_T > SIZEOF_INT
1769 tr = load_transcoder_entry(entry);
1807 unsigned const char *sp, *se;
1808 unsigned char *ds, *
dp, *de;
1826 unsigned long new_capa = (
unsigned long)dlen +
len + max_output;
1832 sp = (
const unsigned char *)ss;
1838 len -= (
const char *)sp - ss;
1839 ss = (
const char *)sp;
1875rb_econv_add_converter(
rb_econv_t *ec,
const char *sname,
const char *dname,
int n)
1883 entry = get_transcoder_entry(sname, dname);
1887 tr = load_transcoder_entry(entry);
1890 return rb_econv_add_transcoder_at(ec,
tr, n);
1894rb_econv_decorate_at(
rb_econv_t *ec,
const char *decorator_name,
int n)
1896 return rb_econv_add_converter(ec,
"", decorator_name, n);
1905 return rb_econv_decorate_at(ec, decorator_name, 0);
1911 return rb_econv_decorate_at(ec, decorator_name, 1);
1913 return rb_econv_decorate_at(ec, decorator_name, 0);
1922 return rb_econv_decorate_at(ec, decorator_name, 0);
1928 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans-1);
1930 return rb_econv_decorate_at(ec, decorator_name, ec->
num_trans);
1936 const char *dname = 0;
1940 dname =
"universal_newline";
1943 dname =
"crlf_newline";
1946 dname =
"cr_newline";
1955 for (i=0; i < num_trans; i++) {
1957 rb_transcoding_close(ec->
elems[i].
tc);
1966 ec->
flags &= ~ECONV_NEWLINE_DECORATOR_MASK;
1970econv_description(
const char *sname,
const char *dname,
int ecflags,
VALUE mesg)
1972 int has_description = 0;
1977 if (*sname !=
'\0' || *dname !=
'\0') {
1980 else if (*dname ==
'\0')
1984 has_description = 1;
1991 const char *pre =
"";
1992 if (has_description)
2018 has_description = 1;
2020 if (!has_description) {
2032 econv_description(sname, dname, ecflags, mesg);
2034 exc =
rb_exc_new3(rb_eConverterNotFoundError, mesg);
2056 else if (readagain_len) {
2070 exc =
rb_exc_new3(rb_eInvalidByteSequenceError, mesg);
2083 const char *start, *end;
2107 mesg =
rb_sprintf(
"%s to %s in conversion from %s",
2118 exc =
rb_exc_new3(rb_eUndefinedConversionError, mesg);
2142 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2144 unsigned char **out_start_ptr,
2145 unsigned char **out_pos,
2146 unsigned char **out_stop_ptr)
2148 size_t len = (*out_pos - *out_start_ptr);
2149 size_t new_len = (
len + max_output) * 2;
2150 *out_start_ptr = resize_destination(destination,
len, new_len);
2151 *out_pos = *out_start_ptr +
len;
2152 *out_stop_ptr = *out_start_ptr + new_len;
2160 const unsigned char *replacement;
2161 const char *repl_enc;
2162 const char *ins_enc;
2174 replacement = (
const unsigned char *)get_replacement_character(ins_enc, &
len, &repl_enc);
2177 replacement = (
unsigned char *)
"?";
2191 const unsigned char *
str,
size_t len,
const char *encname)
2193 unsigned char *str2;
2195 const char *encname2;
2206 str2 = allocate_converted_string(encname, encname2,
str,
len,
NULL, 0, &len2);
2226 if (make_replacement(ec) == -1)
2237#define hash_fallback rb_hash_aref
2258transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2259 const unsigned char *in_stop,
unsigned char *out_stop,
2261 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2262 const char *src_encoding,
2263 const char *dst_encoding,
2270 unsigned char *out_start = *out_pos;
2280 if (!
NIL_P(ecopts) && RB_TYPE_P(ecopts,
T_HASH)) {
2282 if (RB_TYPE_P(fallback,
T_HASH)) {
2306 rep = (*fallback_func)(fallback, rep);
2311 if ((
int)ret == -1) {
2321 exc = make_econv_exception(ec);
2327 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2337transcode_loop(
const unsigned char **in_pos,
unsigned char **out_pos,
2338 const unsigned char *in_stop,
unsigned char *out_stop,
2340 unsigned char *(*resize_destination)(
VALUE,
size_t,
size_t),
2341 const char *src_encoding,
2342 const char *dst_encoding,
2349 unsigned char *out_start = *out_pos;
2350 const unsigned char *
ptr;
2364 unsigned char input_byte;
2365 const unsigned char *p = &input_byte;
2368 if (
ptr < in_stop) {
2379 if (&input_byte != p)
2380 ptr += p - &input_byte;
2385 exc = make_econv_exception(ec);
2391 more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
2412static unsigned char *
2413str_transcoding_resize(
VALUE destination,
size_t len,
size_t new_len)
2420econv_opts(
VALUE opt,
int ecflags)
2423 int newlineflag = 0;
2428 else if (v==sym_replace) {
2438 else if (v==sym_replace) {
2455 else if (v==sym_attr) {
2466#ifdef ENABLE_ECONV_NEWLINE_OPTION
2470 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2471 if (v == sym_universal) {
2474 else if (v == sym_crlf) {
2477 else if (v == sym_cr) {
2480 else if (v == sym_lf) {
2498 newlineflag |= !
NIL_P(v);
2503 newlineflag |= !
NIL_P(v);
2508 newlineflag |= !
NIL_P(v);
2510 switch (newlineflag) {
2512 ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
2513 ecflags |= setflags;
2517 rb_warning(
":newline option preceds other newline options");
2531 if (
NIL_P(opthash)) {
2535 ecflags = econv_opts(opthash, ecflags);
2563 if (!
NIL_P(newhash))
2582 if (
NIL_P(opthash)) {
2587 rb_bug(
"rb_econv_open_opts called with invalid opthash");
2591 ec =
rb_econv_open(source_encoding, destination_encoding, ecflags);
2595 if (!
NIL_P(replacement)) {
2641 const char *sname, *dname;
2642 int sencidx, dencidx;
2644 dencidx = enc_arg(arg1, &dname, &denc);
2652 sencidx = enc_arg(arg2, &sname, &senc);
2669 unsigned char *
buf, *
bp, *sp;
2670 const unsigned char *fromp;
2672 const char *sname, *dname;
2674 int explicitly_invalid_replace =
TRUE;
2681 if (!ecflags)
return -1;
2685 explicitly_invalid_replace =
FALSE;
2693 dencidx = str_transcode_enc_args(
str, &arg1, &arg2, &sname, &senc, &dname, &denc);
2699 if (senc && senc == denc) {
2702 if (!
NIL_P(ecopts)) {
2710 return NIL_P(arg2) ? -1 : dencidx;
2718 return NIL_P(arg2) ? -1 : dencidx;
2740 transcode_loop(&fromp, &
bp, (sp+slen), (
bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
2741 if (fromp != sp+slen) {
2770 return str_transcode0(
argc,
argv, self, ecflags, ecopts);
2774str_encode_associate(
VALUE str,
int encidx)
2814 encidx = str_transcode(
argc,
argv, &newstr);
2816 if (encidx < 0)
return str;
2817 if (newstr ==
str) {
2822 return str_encode_associate(
str, encidx);
2887 int encidx = str_transcode(
argc,
argv, &newstr);
2888 return encoded_dup(newstr,
str, encidx);
2897 int encidx = str_transcode0(
argc,
argv, &newstr, ecflags, ecopts);
2898 return encoded_dup(newstr,
str, encidx);
2905 if (newstr ==
str) {
2913 return str_encode_associate(newstr, encidx);
2922econv_free(
void *
ptr)
2929econv_memsize(
const void *
ptr)
2936 {0, econv_free, econv_memsize,},
2941econv_s_allocate(
VALUE klass)
2947make_dummy_encoding(
const char *
name)
2957make_encoding(
const char *
name)
2962 enc = make_dummy_encoding(
name);
2967make_encobj(
const char *
name)
2991econv_s_asciicompat_encoding(
VALUE klass,
VALUE arg)
2993 const char *arg_name, *result_name;
2996 enc_arg(&arg, &arg_name, &arg_enc);
3000 if (result_name ==
NULL)
3003 result_enc = make_encoding(result_name);
3011 const char **sname_p,
const char **dname_p,
3016 VALUE opt, flags_v, ecopts;
3018 const char *sname, *dname;
3024 if (!
NIL_P(flags_v)) {
3031 else if (!
NIL_P(opt)) {
3064 *ecflags_p = ecflags;
3069decorate_convpath(
VALUE convpath,
int ecflags)
3076 num_decorators = decorator_names(ecflags, decorators);
3077 if (num_decorators == -1)
3080 len = n = RARRAY_LENINT(convpath);
3083 if (RB_TYPE_P(pair,
T_ARRAY)) {
3101 for (i = 0; i < num_decorators; i++)
3108search_convpath_i(
const char *sname,
const char *dname,
int depth,
void *arg)
3113 if (*ary_p ==
Qnil) {
3121 v =
rb_assoc_new(make_encobj(sname), make_encobj(dname));
3154 VALUE snamev, dnamev;
3155 const char *sname, *dname;
3161 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3164 transcode_search_path(sname, dname, search_convpath_i, &convpath);
3166 if (
NIL_P(convpath)) {
3173 if (decorate_convpath(convpath, ecflags) == -1) {
3192 transcode_search_path(from_encoding, to_encoding, search_convpath_i,
3194 return RTEST(convpath);
3204rb_econv_init_by_convpath_i(
const char *sname,
const char *dname,
int depth,
void *arg)
3212 ret = rb_econv_add_converter(a->
ec, sname, dname, a->
index);
3219rb_econv_init_by_convpath(
VALUE self,
VALUE convpath,
3220 const char **sname_p,
const char **dname_p,
3228 const char *sname, *dname;
3230 ec = rb_econv_alloc(RARRAY_LENINT(convpath));
3234 VALUE snamev, dnamev;
3241 enc_arg(&snamev, &sname, &senc);
3243 enc_arg(&dnamev, &dname, &denc);
3264 ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
3265 if (
ret == -1 || arg.ret == -1) {
3266 VALUE msg =
rb_sprintf(
"adding conversion failed: %s to %s", sname, dname);
3404 VALUE snamev, dnamev;
3405 const char *sname, *dname;
3416 ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
3421 econv_args(
argc,
argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
3434 senc = make_dummy_encoding(sname);
3436 denc = make_dummy_encoding(dname);
3460econv_inspect(
VALUE self)
3467 return rb_sprintf(
"#<%s: uninitialized>", cname);
3473 econv_description(sname, dname,
ec->
flags,
str);
3480check_econv(
VALUE self)
3498econv_source_encoding(
VALUE self)
3513econv_destination_encoding(
VALUE self)
3544econv_convpath(
VALUE self)
3557 v =
rb_assoc_new(make_encobj(
tr->src_encoding), make_encobj(
tr->dst_encoding));
3714 VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
3717 const unsigned char *ip, *is;
3718 unsigned char *op, *os;
3719 long output_byteoffset, output_bytesize;
3720 unsigned long output_byteend;
3725 if (
NIL_P(output_byteoffset_v))
3726 output_byteoffset = 0;
3728 output_byteoffset =
NUM2LONG(output_byteoffset_v);
3730 if (
NIL_P(output_bytesize_v))
3731 output_bytesize = 0;
3733 output_bytesize =
NUM2LONG(output_bytesize_v);
3735 if (!
NIL_P(flags_v)) {
3741 else if (!
NIL_P(opt)) {
3760 if (
NIL_P(output_bytesize_v)) {
3768 if (
NIL_P(output_byteoffset_v))
3771 if (output_byteoffset < 0)
3777 if (output_bytesize < 0)
3780 output_byteend = (
unsigned long)output_byteoffset +
3781 (
unsigned long)output_bytesize;
3783 if (output_byteend < (
unsigned long)output_byteoffset ||
3798 op = (
unsigned char *)
RSTRING_PTR(output) + output_byteoffset;
3799 os = op + output_bytesize;
3808 if (
LONG_MAX / 2 < output_bytesize)
3810 output_bytesize *= 2;
3811 output_byteoffset_v =
Qnil;
3819 return econv_result_to_symbol(res);
3857econv_convert(
VALUE self,
VALUE source_string)
3875 ret = econv_primitive_convert(ac, av, self);
3877 if (
ret == sym_invalid_byte_sequence ||
3878 ret == sym_undefined_conversion ||
3879 ret == sym_incomplete_input) {
3880 VALUE exc = make_econv_exception(
ec);
3884 if (
ret == sym_finished) {
3888 if (
ret != sym_source_buffer_empty) {
3889 rb_bug(
"unexpected result of econv_primitive_convert");
3907econv_finish(
VALUE self)
3923 ret = econv_primitive_convert(ac, av, self);
3925 if (
ret == sym_invalid_byte_sequence ||
3926 ret == sym_undefined_conversion ||
3927 ret == sym_incomplete_input) {
3928 VALUE exc = make_econv_exception(
ec);
3932 if (
ret != sym_finished) {
3933 rb_bug(
"unexpected result of econv_primitive_convert");
4015econv_primitive_errinfo(
VALUE self)
4075 const char *insert_enc;
4131 if (putbackable < n)
4166econv_last_error(
VALUE self)
4171 exc = make_econv_exception(
ec);
4190econv_get_replacement(
VALUE self)
4196 ret = make_replacement(
ec);
4198 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4233 rb_raise(rb_eUndefinedConversionError,
"replacement character setup failed");
4242 return make_econv_exception(
ec);
4250 exc = make_econv_exception(
ec);
4263ecerr_source_encoding_name(
VALUE self)
4289ecerr_source_encoding(
VALUE self)
4301ecerr_destination_encoding_name(
VALUE self)
4313ecerr_destination_encoding(
VALUE self)
4334ecerr_error_char(
VALUE self)
4355ecerr_error_bytes(
VALUE self)
4367ecerr_readagain_bytes(
VALUE self)
4397ecerr_incomplete_input(
VALUE self)
4438 sym_invalid_byte_sequence =
ID2SYM(
rb_intern(
"invalid_byte_sequence"));
4440 sym_destination_buffer_full =
ID2SYM(
rb_intern(
"destination_buffer_full"));
4450#ifdef ENABLE_ECONV_NEWLINE_OPTION
4574 rb_define_method(rb_eUndefinedConversionError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4575 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4576 rb_define_method(rb_eUndefinedConversionError,
"source_encoding", ecerr_source_encoding, 0);
4577 rb_define_method(rb_eUndefinedConversionError,
"destination_encoding", ecerr_destination_encoding, 0);
4578 rb_define_method(rb_eUndefinedConversionError,
"error_char", ecerr_error_char, 0);
4580 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding_name", ecerr_source_encoding_name, 0);
4581 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding_name", ecerr_destination_encoding_name, 0);
4582 rb_define_method(rb_eInvalidByteSequenceError,
"source_encoding", ecerr_source_encoding, 0);
4583 rb_define_method(rb_eInvalidByteSequenceError,
"destination_encoding", ecerr_destination_encoding, 0);
4584 rb_define_method(rb_eInvalidByteSequenceError,
"error_bytes", ecerr_error_bytes, 0);
4585 rb_define_method(rb_eInvalidByteSequenceError,
"readagain_bytes", ecerr_readagain_bytes, 0);
4586 rb_define_method(rb_eInvalidByteSequenceError,
"incomplete_input?", ecerr_incomplete_input, 0);
void rb_ary_store(VALUE ary, long idx, VALUE val)
VALUE rb_ary_push(VALUE ary, VALUE item)
VALUE rb_check_array_type(VALUE ary)
VALUE rb_ary_entry(VALUE ary, long offset)
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Our own, locale independent, character handling routines.
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
int rb_enc_get_index(VALUE obj)
int rb_to_encoding_index(VALUE enc)
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
rb_encoding * rb_utf8_encoding(void)
rb_encoding * rb_enc_from_index(int index)
rb_encoding * rb_enc_get(VALUE obj)
rb_encoding * rb_enc_find(const char *name)
int rb_define_dummy_encoding(const char *name)
VALUE rb_enc_default_internal(void)
VALUE rb_obj_encoding(VALUE obj)
rb_encoding * rb_to_encoding(VALUE enc)
VALUE rb_enc_from_encoding(rb_encoding *encoding)
VALUE rb_enc_associate_index(VALUE obj, int idx)
int rb_enc_find_index(const char *name)
char str[HTML_ESCAPE_MAX_LEN+1]
#define RSTRING_LEN(string)
#define RSTRING_PTR(string)
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
void rb_raise(VALUE exc, const char *fmt,...)
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
void rb_bug(const char *fmt,...)
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
VALUE rb_exc_new_str(VALUE etype, VALUE str)
void rb_warning(const char *fmt,...)
VALUE rb_cObject
Object class.
VALUE rb_obj_class(VALUE)
VALUE rb_to_int(VALUE)
Converts val into Integer.
VALUE rb_check_hash_type(VALUE hash)
VALUE rb_hash_aref(VALUE hash, VALUE key)
VALUE rb_hash_freeze(VALUE hash)
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
#define ECONV_XML_ATTR_QUOTE_DECORATOR
#define ECONV_AFTER_OUTPUT
#define ENC_CODERANGE_7BIT
#define ENC_CODERANGE_VALID
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
#define ECONV_XML_ATTR_CONTENT_DECORATOR
#define ECONV_INVALID_MASK
#define ECONV_CRLF_NEWLINE_DECORATOR
@ econv_undefined_conversion
@ econv_source_buffer_empty
@ econv_destination_buffer_full
@ econv_invalid_byte_sequence
#define ECONV_UNDEF_REPLACE
int rb_enc_str_coderange(VALUE)
#define ECONV_XML_TEXT_DECORATOR
#define ECONV_CR_NEWLINE_DECORATOR
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
#define ECONV_INVALID_REPLACE
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define MBCLEN_CHARFOUND_LEN(ret)
#define rb_enc_asciicompat(enc)
#define ECONV_PARTIAL_INPUT
#define ECONV_ERROR_HANDLER_MASK
#define ENC_CODERANGE_BROKEN
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
#define MBCLEN_CHARFOUND_P(ret)
#define ECONV_UNDEF_HEX_CHARREF
#define ECONV_NEWLINE_DECORATOR_MASK
#define ENC_CODERANGE_SET(obj, cr)
Thin wrapper to ruby/config.h.
VALUE rb_funcallv_public(VALUE, ID, int, const VALUE *)
Calls a method.
void rb_error_arity(int, int, int)
VALUE rb_require_string(VALUE)
VALUE rb_obj_is_method(VALUE)
VALUE rb_obj_is_proc(VALUE)
VALUE rb_method_call(int, const VALUE *, VALUE)
VALUE rb_proc_call(VALUE, VALUE)
VALUE rb_str_resize(VALUE, long)
void rb_str_shared_replace(VALUE, VALUE)
#define rb_str_new(str, len)
void rb_str_set_len(VALUE, long)
void rb_str_modify(VALUE)
VALUE rb_str_buf_new(long)
VALUE rb_str_tmp_new(long)
VALUE rb_str_new_frozen(VALUE)
VALUE rb_str_drop_bytes(VALUE, long)
size_t rb_str_capacity(VALUE)
#define rb_str_new_cstr(str)
VALUE rb_attr_get(VALUE, ID)
VALUE rb_ivar_set(VALUE, ID, VALUE)
int rb_respond_to(VALUE, ID)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
ID rb_intern(const char *)
void rb_define_const(VALUE, const char *, VALUE)
Internal header aggregating init functions.
Internal header for Array.
Internal header for Object.
Internal header for String.
VALUE rb_enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl)
typedef long(ZCALLBACK *tell_file_func) OF((voidpf opaque
typedef int(ZCALLBACK *close_file_func) OF((voidpf opaque
int memcmp(const void *s1, const void *s2, size_t len)
#define MEMCPY(p1, p2, type, n)
#define ALLOCA_N(type, n)
#define MEMMOVE(p1, p2, type, n)
#define RARRAY_AREF(a, i)
#define RSTRING_EMBED_LEN_MAX
#define StringValueCStr(v)
#define TypedData_Get_Struct(obj, type, data_type, sval)
#define TypedData_Wrap_Struct(klass, data_type, sval)
@ RUBY_TYPED_FREE_IMMEDIATELY
const char * rb_obj_classname(VALUE)
VALUE rb_str_catf(VALUE, const char *,...)
VALUE rb_sprintf(const char *,...)
#define st_init_strcasetable
size_t strlen(const char *)
const char * ascii_compat_name
const char * ascii_incompat_name
unsigned char * out_data_start
struct rb_transcoding * tc
unsigned char * out_buf_start
rb_econv_result_t last_result
unsigned char * out_buf_end
unsigned char * out_data_end
rb_encoding * destination_encoding
unsigned char * in_buf_start
struct rb_econv_t::@177 last_error
const char * source_encoding_name
unsigned char * in_buf_end
struct rb_transcoding * error_tc
unsigned char * in_data_start
rb_encoding * source_encoding
const char * replacement_enc
const char * source_encoding
int replacement_allocated
const char * destination_encoding
const unsigned char * replacement_str
struct rb_transcoding * last_tc
unsigned char * in_data_end
const unsigned char * error_bytes_start
const char * destination_encoding_name
const char * dst_encoding
const char * src_encoding
rb_transcoder_asciicompat_type_t asciicompat_type
unsigned int output_index
union rb_transcoding::@176 writebuf
const rb_transcoder * transcoder
union rb_transcoding::@175 readbuf
union rb_transcoding::rb_transcoding_state_t state
search_path_queue_t * queue
search_path_queue_t ** queue_last_ptr
struct search_path_queue_tag * next
transcoder_entry_t ** entries
const rb_transcoder * transcoder
#define TRANSCODING_WRITEBUF(tc)
rb_econv_t * rb_econv_open(const char *sname, const char *dname, int ecflags)
VALUE rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec)
#define TRANSCODING_STATE(tc)
int rb_econv_putbackable(rb_econv_t *ec)
int rb_econv_has_convpath_p(const char *from_encoding, const char *to_encoding)
#define SUSPEND_AFTER_OUTPUT(num)
#define SUSPEND_OBUF(num)
VALUE rb_cEncodingConverter
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
#define TRANSCODING_WRITEBUF_SIZE(tc)
size_t rb_econv_memsize(rb_econv_t *ec)
#define DECORATOR_P(sname, dname)
int rb_econv_insert_output(rb_econv_t *ec, const unsigned char *str, size_t len, const char *str_encoding)
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
#define SUSPEND(ret, num)
void rb_econv_binmode(rb_econv_t *ec)
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
VALUE rb_econv_make_exception(rb_econv_t *ec)
void rb_econv_check_error(rb_econv_t *ec)
int rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags)
#define TRANSCODING_READBUF(tc)
void Init_transcode(void)
#define MAX_ECFLAGS_DECORATORS
int rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags)
void InitVM_transcode(void)
void rb_econv_close(rb_econv_t *ec)
struct search_path_queue_tag search_path_queue_t
#define encoding_equal(enc1, enc2)
void rb_register_transcoder(const rb_transcoder *tr)
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname)
const char * rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
Internal header for Encoding::Converter.
#define STR1_LENGTH(byte_addr)
#define STR1_BYTEINDEX(w)
double dummy_for_alignment
char ary[sizeof(double) > sizeof(void *) ? sizeof(double) :sizeof(void *)]
VALUE(* fallback_func)(VALUE obj, VALUE name)