29#include "internal/error.h"
40#define BITSPERSHORT (2*CHAR_BIT)
41#define SHORTMASK ((1<<BITSPERSHORT)-1)
42#define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
44#if SIZEOF_SHORT == SIZEOF_BDIGIT
45#define SHORTLEN(x) (x)
60#define SHORTLEN(x) shortlen((x),d)
63#define MARSHAL_MAJOR 4
64#define MARSHAL_MINOR 8
69#define TYPE_FIXNUM 'i'
71#define TYPE_EXTENDED 'e'
72#define TYPE_UCLASS 'C'
73#define TYPE_OBJECT 'o'
75#define TYPE_USERDEF 'u'
76#define TYPE_USRMARSHAL 'U'
78#define TYPE_BIGNUM 'l'
79#define TYPE_STRING '"'
80#define TYPE_REGEXP '/'
83#define TYPE_HASH_DEF '}'
84#define TYPE_STRUCT 'S'
85#define TYPE_MODULE_OLD 'M'
87#define TYPE_MODULE 'm'
89#define TYPE_SYMBOL ':'
90#define TYPE_SYMLINK ';'
95static ID s_dump, s_load, s_mdump, s_mload;
96static ID s_dump_data, s_load_data, s_alloc, s_call;
97static ID s_getbyte, s_read, s_write, s_binmode;
98static ID s_encoding_short, s_ruby2_keywords_flag;
100#define name_s_dump "_dump"
101#define name_s_load "_load"
102#define name_s_mdump "marshal_dump"
103#define name_s_mload "marshal_load"
104#define name_s_dump_data "_dump_data"
105#define name_s_load_data "_load_data"
106#define name_s_alloc "_alloc"
107#define name_s_call "call"
108#define name_s_getbyte "getbyte"
109#define name_s_read "read"
110#define name_s_write "write"
111#define name_s_binmode "binmode"
112#define name_s_encoding_short "E"
113#define name_s_ruby2_keywords_flag "K"
118 VALUE (*dumper)(VALUE);
119 VALUE (*loader)(VALUE, VALUE);
122static st_table *compat_allocator_tbl;
123static VALUE compat_allocator_tbl_wrapper;
124static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
125static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc);
128mark_marshal_compat_i(st_data_t key, st_data_t value, st_data_t _)
130 marshal_compat_t *p = (marshal_compat_t *)value;
131 rb_gc_mark(p->newclass);
132 rb_gc_mark(p->oldclass);
137mark_marshal_compat_t(void *tbl)
140 st_foreach(tbl, mark_marshal_compat_i, 0);
143static st_table *compat_allocator_table(void);
146rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
148 marshal_compat_t *compat;
149 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
152 rb_raise(rb_eTypeError, "no allocator");
155 compat = ALLOC(marshal_compat_t);
156 compat->newclass = Qnil;
157 compat->oldclass = Qnil;
158 compat->newclass = newclass;
159 compat->oldclass = oldclass;
160 compat->dumper = dumper;
161 compat->loader = loader;
163 st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
170 st_table *compat_tbl;
174struct dump_call_arg {
176 struct dump_arg *arg;
181check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
184 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
191check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
192 struct dump_arg *arg, const char *name)
194 VALUE ret = rb_funcallv(obj, sym, argc, argv);
195 VALUE klass = CLASS_OF(obj);
196 if (CLASS_OF(ret) == klass) {
197 rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
200 return check_dump_arg(ret, arg, name);
203#define dump_funcall(arg, obj, sym, argc, argv) \
204 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
205#define dump_check_funcall(arg, obj, sym, argc, argv) \
206 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
208static void clear_dump_arg(struct dump_arg *arg);
211mark_dump_arg(void *ptr)
213 struct dump_arg *p = ptr;
216 rb_mark_set(p->symbols);
217 rb_mark_set(p->data);
218 rb_mark_hash(p->compat_tbl);
223free_dump_arg(void *ptr)
230memsize_dump_arg(const void *ptr)
232 return sizeof(struct dump_arg);
235static const rb_data_type_t dump_arg_data = {
237 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
238 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
242must_not_be_anonymous(const char *type, VALUE path)
244 char *n = RSTRING_PTR(path);
246 if (!rb_enc_asciicompat(rb_enc_get(path))) {
248 rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
252 rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
259class2path(VALUE klass)
261 VALUE path = rb_class_path(klass);
263 must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
264 if (rb_path_to_class(path) != rb_class_real(klass)) {
265 rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
270int ruby_marshal_write_long(long x, char *buf);
271static void w_long(long, struct dump_arg*);
272static int w_encoding(VALUE encname, struct dump_call_arg *arg);
273static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
276w_nbyte(const char *s, long n, struct dump_arg *arg)
278 VALUE buf = arg->str;
279 rb_str_buf_cat(buf, s, n);
280 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
281 rb_io_write(arg->dest, buf);
282 rb_str_resize(buf, 0);
287w_byte(char c, struct dump_arg *arg)
293w_bytes(const char *s, long n, struct dump_arg *arg)
299#define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
302w_short(int x, struct dump_arg *arg)
304 w_byte((char)((x >> 0) & 0xff), arg);
305 w_byte((char)((x >> 8) & 0xff), arg);
309w_long(long x, struct dump_arg *arg)
311 char buf[sizeof(long)+1];
312 int i = ruby_marshal_write_long(x, buf);
314 rb_raise(rb_eTypeError, "long too big to dump");
316 w_nbyte(buf, i, arg);
320ruby_marshal_write_long(long x, char *buf)
325 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
326 /* big long does not fit in 4 bytes */
335 if (0 < x && x < 123) {
336 buf[0] = (char)(x + 5);
339 if (-124 < x && x < 0) {
340 buf[0] = (char)((x - 5)&0xff);
343 for (i=1;i<(int)sizeof(long)+1;i++) {
344 buf[i] = (char)(x & 0xff);
359#define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
363#elif DBL_MANT_DIG > 24
365#elif DBL_MANT_DIG > 16
372load_mantissa(double d, const char *buf, long len)
375 if (--len > 0 && !*buf++) { /* binary mantissa mark */
376 int e, s = d < 0, dig = 0;
379 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
383 default: m = *buf++ & 0xff; /* fall through */
385 case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */
388 case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */
391 case 1: m = (m << 8) | (*buf++ & 0xff);
394 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
395 d += ldexp((double)m, dig);
396 } while ((len -= MANT_BITS / 8) > 0);
397 d = ldexp(d, e - DECIMAL_MANT);
403#define load_mantissa(d, buf, len) (d)
407#define FLOAT_DIG (DBL_DIG+2)
413w_float(double d, struct dump_arg *arg)
415 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
418 if (d < 0) w_cstr("-inf", arg);
419 else w_cstr("inf", arg);
425 if (signbit(d)) w_cstr("-0", arg);
426 else w_cstr("0", arg);
429 int decpt, sign, digs, len = 0;
430 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
431 if (sign) buf[len++] = '-';
433 if (decpt < -3 || decpt > digs) {
435 if (--digs > 0) buf[len++] = '.';
436 memcpy(buf + len, p + 1, digs);
438 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
440 else if (decpt > 0) {
441 memcpy(buf + len, p, decpt);
443 if ((digs -= decpt) > 0) {
445 memcpy(buf + len, p + decpt, digs);
453 memset(buf + len, '0', -decpt);
456 memcpy(buf + len, p, digs);
460 w_bytes(buf, len, arg);
465w_symbol(VALUE sym, struct dump_arg *arg)
470 if (st_lookup(arg->symbols, sym, &num)) {
471 w_byte(TYPE_SYMLINK, arg);
472 w_long((long)num, arg);
475 const VALUE orig_sym = sym;
476 sym = rb_sym2str(sym);
478 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
480 encname = encoding_name(sym, arg);
481 if (NIL_P(encname) ||
482 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
486 w_byte(TYPE_IVAR, arg);
488 w_byte(TYPE_SYMBOL, arg);
489 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
490 st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
491 if (!NIL_P(encname)) {
492 struct dump_call_arg c_arg;
496 w_encoding(encname, &c_arg);
502w_unique(VALUE s, struct dump_arg *arg)
504 must_not_be_anonymous("class", s);
505 w_symbol(rb_str_intern(s), arg);
508static void w_object(VALUE,struct dump_arg*,int);
511hash_each(VALUE key, VALUE value, VALUE v)
513 struct dump_call_arg *arg = (void *)v;
514 w_object(key, arg->arg, arg->limit);
515 w_object(value, arg->arg, arg->limit);
519#define SINGLETON_DUMP_UNABLE_P(klass) \
520 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
521 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
524w_extended(VALUE klass, struct dump_arg *arg, int check)
526 if (check && FL_TEST(klass, FL_SINGLETON)) {
527 VALUE origin = RCLASS_ORIGIN(klass);
528 if (SINGLETON_DUMP_UNABLE_P(klass) ||
529 (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
530 rb_raise(rb_eTypeError, "singleton can't be dumped");
532 klass = RCLASS_SUPER(klass);
534 while (BUILTIN_TYPE(klass) == T_ICLASS) {
535 if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
536 BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
537 VALUE path = rb_class_name(RBASIC(klass)->klass);
538 w_byte(TYPE_EXTENDED, arg);
541 klass = RCLASS_SUPER(klass);
546w_class(char type, VALUE obj, struct dump_arg *arg, int check)
552 if (arg->compat_tbl &&
553 st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
554 obj = (VALUE)real_obj;
556 klass = CLASS_OF(obj);
557 w_extended(klass, arg, check);
559 path = class2path(rb_class_real(klass));
564w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
566 VALUE klass = CLASS_OF(obj);
568 w_extended(klass, arg, TRUE);
569 klass = rb_class_real(klass);
570 if (klass != super) {
571 w_byte(TYPE_UCLASS, arg);
572 w_unique(class2path(klass), arg);
576#define to_be_skipped_id(id) (id == rb_id_encoding() || id == s_encoding_short || id == s_ruby2_keywords_flag || !rb_id2str(id))
579 struct dump_call_arg *dump;
584w_obj_each(st_data_t key, st_data_t val, st_data_t a)
587 VALUE value = (VALUE)val;
588 struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a;
589 struct dump_call_arg *arg = ivarg->dump;
591 if (to_be_skipped_id(id)) {
592 if (id == s_encoding_short) {
593 rb_warn("instance variable `"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped",
596 if (id == s_ruby2_keywords_flag) {
597 rb_warn("instance variable `"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped",
602 if (!ivarg->num_ivar) {
603 rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
607 w_symbol(ID2SYM(id), arg->arg);
608 w_object(value, arg->arg, arg->limit);
613obj_count_ivars(st_data_t key, st_data_t val, st_data_t a)
616 if (!to_be_skipped_id(id)) ++*(st_index_t *)a;
621encoding_name(VALUE obj, struct dump_arg *arg)
623 if (rb_enc_capable(obj)) {
624 int encidx = rb_enc_get_index(obj);
625 rb_encoding *enc = 0;
628 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
632 /* special treatment for US-ASCII and UTF-8 */
633 if (encidx == rb_usascii_encindex()) {
636 else if (encidx == rb_utf8_encindex()) {
641 !st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
642 (arg->encodings = st_init_strcasetable(), 1)) {
643 name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
644 st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
654w_encoding(VALUE encname, struct dump_call_arg *arg)
656 int limit = arg->limit;
657 if (limit >= 0) ++limit;
661 w_symbol(ID2SYM(s_encoding_short), arg->arg);
662 w_object(encname, arg->arg, limit);
667 w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
668 w_object(encname, arg->arg, limit);
673has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
675 st_index_t enc = !NIL_P(encname);
677 st_index_t ruby2_keywords_flag = 0;
679 if (SPECIAL_CONST_P(obj)) goto generic;
680 switch (BUILTIN_TYPE(obj)) {
684 break; /* counted elsewhere */
686 ruby2_keywords_flag = RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS ? 1 : 0;
690 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
691 if (ruby2_keywords_flag || num) *ivobj = obj;
694 return num + enc + ruby2_keywords_flag;
698w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
700 struct w_ivar_arg ivarg = {arg, num};
702 rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
703 if (ivarg.num_ivar) {
704 rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
710w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
712 w_long(num, arg->arg);
713 num -= w_encoding(encname, arg);
714 if (RB_TYPE_P(ivobj, T_HASH) && (RHASH(ivobj)->basic.flags & RHASH_PASS_AS_KEYWORDS)) {
715 int limit = arg->limit;
716 if (limit >= 0) ++limit;
717 w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg);
718 w_object(Qtrue, arg->arg, limit);
721 if (ivobj != Qundef && num) {
722 w_ivar_each(ivobj, num, arg);
727w_objivar(VALUE obj, struct dump_call_arg *arg)
731 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
732 w_long(num, arg->arg);
733 w_ivar_each(obj, num, arg);
737w_object(VALUE obj, struct dump_arg *arg, int limit)
739 struct dump_call_arg c_arg;
740 VALUE ivobj = Qundef;
742 st_index_t hasiv = 0;
743 VALUE encname = Qnil;
746 rb_raise(rb_eArgError, "exceed depth limit");
749 if (limit > 0) limit--;
754 if (st_lookup(arg->data, obj, &num)) {
755 w_byte(TYPE_LINK, arg);
756 w_long((long)num, arg);
761 w_byte(TYPE_NIL, arg);
763 else if (obj == Qtrue) {
764 w_byte(TYPE_TRUE, arg);
766 else if (obj == Qfalse) {
767 w_byte(TYPE_FALSE, arg);
769 else if (FIXNUM_P(obj)) {
771 w_byte(TYPE_FIXNUM, arg);
772 w_long(FIX2INT(obj), arg);
774 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
775 w_byte(TYPE_FIXNUM, arg);
776 w_long(FIX2LONG(obj), arg);
779 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
783 else if (SYMBOL_P(obj)) {
786 else if (FLONUM_P(obj)) {
787 st_add_direct(arg->data, obj, arg->data->num_entries);
788 w_byte(TYPE_FLOAT, arg);
789 w_float(RFLOAT_VALUE(obj), arg);
794 if (!RBASIC_CLASS(obj)) {
795 rb_raise(rb_eTypeError, "can't dump internal %s",
796 rb_builtin_type_name(BUILTIN_TYPE(obj)));
799 if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
800 st_add_direct(arg->data, obj, arg->data->num_entries);
802 v = dump_funcall(arg, obj, s_mdump, 0, 0);
803 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
804 w_object(v, arg, limit);
807 if (rb_obj_respond_to(obj, s_dump, TRUE)) {
808 VALUE ivobj2 = Qundef;
813 v = dump_funcall(arg, obj, s_dump, 1, &v);
814 if (!RB_TYPE_P(v, T_STRING)) {
815 rb_raise(rb_eTypeError, "_dump() must return string");
817 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
818 hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
824 if (hasiv) w_byte(TYPE_IVAR, arg);
825 w_class(TYPE_USERDEF, obj, arg, FALSE);
826 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
828 w_ivar(hasiv, ivobj, encname, &c_arg);
830 st_add_direct(arg->data, obj, arg->data->num_entries);
834 st_add_direct(arg->data, obj, arg->data->num_entries);
836 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
838 st_data_t compat_data;
839 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
840 if (st_lookup(compat_allocator_tbl,
841 (st_data_t)allocator,
843 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
844 VALUE real_obj = obj;
845 obj = compat->dumper(real_obj);
846 if (!arg->compat_tbl) {
847 arg->compat_tbl = rb_init_identtable();
849 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
850 if (obj != real_obj && ivobj == Qundef) hasiv = 0;
853 if (hasiv) w_byte(TYPE_IVAR, arg);
855 switch (BUILTIN_TYPE(obj)) {
857 if (FL_TEST(obj, FL_SINGLETON)) {
858 rb_raise(rb_eTypeError, "singleton class can't be dumped");
860 w_byte(TYPE_CLASS, arg);
862 VALUE path = class2path(obj);
863 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
869 w_byte(TYPE_MODULE, arg);
871 VALUE path = class2path(obj);
872 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
878 w_byte(TYPE_FLOAT, arg);
879 w_float(RFLOAT_VALUE(obj), arg);
883 w_byte(TYPE_BIGNUM, arg);
885 char sign = BIGNUM_SIGN(obj) ? '+' : '-';
886 size_t len = BIGNUM_LEN(obj);
889 BDIGIT *d = BIGNUM_DIGITS(obj);
891 slen = SHORTLEN(len);
892 if (LONG_MAX < slen) {
893 rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
897 w_long((long)slen, arg);
898 for (j = 0; j < len; j++) {
899#if SIZEOF_BDIGIT > SIZEOF_SHORT
903 for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
904 w_short(num & SHORTMASK, arg);
906 if (j == len - 1 && num == 0) break;
917 w_uclass(obj, rb_cString, arg);
918 w_byte(TYPE_STRING, arg);
919 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
923 w_uclass(obj, rb_cRegexp, arg);
924 w_byte(TYPE_REGEXP, arg);
926 int opts = rb_reg_options(obj);
927 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
928 w_byte((char)opts, arg);
933 w_uclass(obj, rb_cArray, arg);
934 w_byte(TYPE_ARRAY, arg);
936 long i, len = RARRAY_LEN(obj);
939 for (i=0; i<RARRAY_LEN(obj); i++) {
940 w_object(RARRAY_AREF(obj, i), arg, limit);
941 if (len != RARRAY_LEN(obj)) {
942 rb_raise(rb_eRuntimeError, "array modified during dump");
949 w_uclass(obj, rb_cHash, arg);
950 if (NIL_P(RHASH_IFNONE(obj))) {
951 w_byte(TYPE_HASH, arg);
953 else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
954 rb_raise(rb_eTypeError, "can't dump hash with default proc");
957 w_byte(TYPE_HASH_DEF, arg);
959 w_long(rb_hash_size_num(obj), arg);
960 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
961 if (!NIL_P(RHASH_IFNONE(obj))) {
962 w_object(RHASH_IFNONE(obj), arg, limit);
967 w_class(TYPE_STRUCT, obj, arg, TRUE);
969 long len = RSTRUCT_LEN(obj);
974 mem = rb_struct_members(obj);
975 for (i=0; i<len; i++) {
976 w_symbol(RARRAY_AREF(mem, i), arg);
977 w_object(RSTRUCT_GET(obj, i), arg, limit);
983 w_class(TYPE_OBJECT, obj, arg, TRUE);
984 w_objivar(obj, &c_arg);
991 if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
992 rb_raise(rb_eTypeError,
993 "no _dump_data is defined for class %"PRIsVALUE,
996 v = dump_funcall(arg, obj, s_dump_data, 0, 0);
997 w_class(TYPE_DATA, obj, arg, TRUE);
998 w_object(v, arg, limit);
1003 rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
1010 w_ivar(hasiv, ivobj, encname, &c_arg);
1015clear_dump_arg(struct dump_arg *arg)
1017 if (!arg->symbols) return;
1018 st_free_table(arg->symbols);
1020 st_free_table(arg->data);
1022 if (arg->compat_tbl) {
1023 st_free_table(arg->compat_tbl);
1024 arg->compat_tbl = 0;
1026 if (arg->encodings) {
1027 st_free_table(arg->encodings);
1032NORETURN(static inline void io_needed(void));
1036 rb_raise(rb_eTypeError, "instance of IO needed");
1041 * dump( obj [, anIO] , limit=-1 ) -> anIO
1043 * Serializes obj and all descendant objects. If anIO is
1044 * specified, the serialized data will be written to it, otherwise the
1045 * data will be returned as a String. If limit is specified, the
1046 * traversal of subobjects will be limited to that depth. If limit is
1047 * negative, no checking of depth will be performed.
1050 * def initialize(str)
1058 * (produces no output)
1060 * o = Klass.new("hello\n")
1061 * data = Marshal.dump(o)
1062 * obj = Marshal.load(data)
1063 * obj.say_hello #=> "hello\n"
1065 * Marshal can't dump following objects:
1066 * * anonymous Class/Module.
1067 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1069 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1070 * ThreadGroup, Continuation
1071 * * objects which define singleton methods
1074marshal_dump(int argc, VALUE *argv, VALUE _)
1076 VALUE obj, port, a1, a2;
1080 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1082 if (!NIL_P(a2)) limit = NUM2INT(a2);
1083 if (NIL_P(a1)) io_needed();
1086 else if (argc == 2) {
1087 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1088 else if (NIL_P(a1)) io_needed();
1091 return rb_marshal_dump_limited(obj, port, limit);
1095rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1097 struct dump_arg *arg;
1098 VALUE wrapper; /* used to avoid memory leak in case of exception */
1100 wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1102 arg->symbols = st_init_numtable();
1103 arg->data = rb_init_identtable();
1104 arg->compat_tbl = 0;
1106 arg->str = rb_str_buf_new(0);
1108 if (!rb_respond_to(port, s_write)) {
1112 dump_check_funcall(arg, port, s_binmode, 0, 0);
1118 w_byte(MARSHAL_MAJOR, arg);
1119 w_byte(MARSHAL_MINOR, arg);
1121 w_object(obj, arg, limit);
1123 rb_io_write(arg->dest, arg->str);
1124 rb_str_resize(arg->str, 0);
1126 clear_dump_arg(arg);
1127 RB_GC_GUARD(wrapper);
1140 st_table *partial_objects;
1142 st_table *compat_tbl;
1146check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1148 if (!arg->symbols) {
1149 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1154#define load_funcall(arg, obj, sym, argc, argv) \
1155 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1157static void clear_load_arg(struct load_arg *arg);
1160mark_load_arg(void *ptr)
1162 struct load_arg *p = ptr;
1165 rb_mark_tbl(p->symbols);
1166 rb_mark_tbl(p->data);
1167 rb_mark_tbl(p->partial_objects);
1168 rb_mark_hash(p->compat_tbl);
1172free_load_arg(void *ptr)
1174 clear_load_arg(ptr);
1179memsize_load_arg(const void *ptr)
1181 return sizeof(struct load_arg);
1184static const rb_data_type_t load_arg_data = {
1186 {mark_load_arg, free_load_arg, memsize_load_arg,},
1187 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
1190#define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1191static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
1192static VALUE r_object(struct load_arg *arg);
1193static VALUE r_symbol(struct load_arg *arg);
1194static VALUE path2class(VALUE path);
1196NORETURN(static void too_short(void));
1200 rb_raise(rb_eArgError, "marshal data too short");
1204r_prepare(struct load_arg *arg)
1206 st_index_t idx = arg->data->num_entries;
1208 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1213r_byte1_buffered(struct load_arg *arg)
1215 if (arg->buflen == 0) {
1216 long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1217 VALUE str, n = LONG2NUM(readable);
1219 str = load_funcall(arg, arg->src, s_read, 1, &n);
1220 if (NIL_P(str)) too_short();
1222 memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1224 arg->buflen = RSTRING_LEN(str);
1227 return arg->buf[arg->offset++];
1231r_byte(struct load_arg *arg)
1235 if (RB_TYPE_P(arg->src, T_STRING)) {
1236 if (RSTRING_LEN(arg->src) > arg->offset) {
1237 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1244 if (arg->readable >0 || arg->buflen > 0) {
1245 c = r_byte1_buffered(arg);
1248 VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1249 if (NIL_P(v)) rb_eof_error();
1250 c = (unsigned char)NUM2CHR(v);
1256NORETURN(static void long_toobig(int size));
1259long_toobig(int size)
1261 rb_raise(rb_eTypeError, "long too big for this architecture (size "
1262 STRINGIZE(SIZEOF_LONG)", given %d)", size);
1266r_long(struct load_arg *arg)
1269 int c = (signed char)r_byte(arg);
1272 if (c == 0) return 0;
1274 if (4 < c && c < 128) {
1277 if (c > (int)sizeof(long)) long_toobig(c);
1280 x |= (long)r_byte(arg) << (8*i);
1284 if (-129 < c && c < -4) {
1288 if (c > (int)sizeof(long)) long_toobig(c);
1291 x &= ~((long)0xff << (8*i));
1292 x |= (long)r_byte(arg) << (8*i);
1299ruby_marshal_read_long(const char **buf, long len)
1303 struct load_arg arg;
1304 memset(&arg, 0, sizeof(arg));
1305 arg.src = rb_setup_fake_str(&src, *buf, len, 0);
1312r_bytes1(long len, struct load_arg *arg)
1314 VALUE str, n = LONG2NUM(len);
1316 str = load_funcall(arg, arg->src, s_read, 1, &n);
1317 if (NIL_P(str)) too_short();
1319 if (RSTRING_LEN(str) != len) too_short();
1325r_bytes1_buffered(long len, struct load_arg *arg)
1329 if (len <= arg->buflen) {
1330 str = rb_str_new(arg->buf+arg->offset, len);
1335 long buflen = arg->buflen;
1336 long readable = arg->readable + 1;
1337 long tmp_len, read_len, need_len = len - buflen;
1340 readable = readable < BUFSIZ ? readable : BUFSIZ;
1341 read_len = need_len > readable ? need_len : readable;
1342 n = LONG2NUM(read_len);
1343 tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1344 if (NIL_P(tmp)) too_short();
1347 tmp_len = RSTRING_LEN(tmp);
1349 if (tmp_len < need_len) too_short();
1351 str = rb_str_new(arg->buf+arg->offset, buflen);
1352 rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1354 if (tmp_len > need_len) {
1355 buflen = tmp_len - need_len;
1356 memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1357 arg->buflen = buflen;
1368#define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1371r_bytes0(long len, struct load_arg *arg)
1375 if (len == 0) return rb_str_new(0, 0);
1376 if (RB_TYPE_P(arg->src, T_STRING)) {
1377 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1378 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1386 if (arg->readable > 0 || arg->buflen > 0) {
1387 str = r_bytes1_buffered(len, arg);
1390 str = r_bytes1(len, arg);
1397name_equal(const char *name, size_t nlen, const char *p, long l)
1399 if ((size_t)l != nlen || *p != *name) return 0;
1400 return nlen == 1 || memcmp(p+1, name+1, nlen-1) == 0;
1404sym2encidx(VALUE sym, VALUE val)
1406 static const char name_encoding[8] = "encoding";
1409 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1410 RSTRING_GETMEM(sym, p, l);
1411 if (l <= 0) return -1;
1412 if (name_equal(name_encoding, sizeof(name_encoding), p, l)) {
1413 int idx = rb_enc_find_index(StringValueCStr(val));
1416 if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) {
1417 if (val == Qfalse) return rb_usascii_encindex();
1418 else if (val == Qtrue) return rb_utf8_encindex();
1425ruby2_keywords_flag_check(VALUE sym)
1429 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0;
1430 RSTRING_GETMEM(sym, p, l);
1431 if (l <= 0) return 0;
1432 if (name_equal(name_s_ruby2_keywords_flag, rb_strlen_lit(name_s_ruby2_keywords_flag), p, l)) {
1439r_symlink(struct load_arg *arg)
1442 long num = r_long(arg);
1444 if (!st_lookup(arg->symbols, num, &sym)) {
1445 rb_raise(rb_eArgError, "bad symbol");
1451r_symreal(struct load_arg *arg, int ivar)
1453 VALUE s = r_bytes(arg);
1456 st_index_t n = arg->symbols->num_entries;
1458 if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
1459 st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1461 long num = r_long(arg);
1463 sym = r_symbol(arg);
1464 idx = sym2encidx(sym, r_object(arg));
1468 rb_enc_associate_index(s, idx);
1469 if (rb_enc_str_coderange(s) == ENC_CODERANGE_BROKEN) {
1470 rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE,
1471 rb_enc_name(rb_enc_from_index(idx)), s);
1479r_symbol(struct load_arg *arg)
1484 switch ((type = r_byte(arg))) {
1486 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1491 return r_symreal(arg, ivar);
1494 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1496 return r_symlink(arg);
1501r_unique(struct load_arg *arg)
1503 return r_symbol(arg);
1507r_string(struct load_arg *arg)
1509 return r_bytes(arg);
1513r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1515 st_data_t real_obj = (st_data_t)v;
1516 if (arg->compat_tbl) {
1517 /* real_obj is kept if not found */
1518 st_lookup(arg->compat_tbl, v, &real_obj);
1520 st_insert(arg->data, num, real_obj);
1521 st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
1526r_fixup_compat(VALUE v, struct load_arg *arg)
1529 st_data_t key = (st_data_t)v;
1530 if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1531 VALUE real_obj = (VALUE)data;
1532 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1533 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1534 marshal_compat_t *compat = (marshal_compat_t*)data;
1535 compat->loader(real_obj, v);
1543r_post_proc(VALUE v, struct load_arg *arg)
1546 v = load_funcall(arg, arg->proc, s_call, 1, &v);
1552r_leave(VALUE v, struct load_arg *arg, bool partial)
1554 v = r_fixup_compat(v, arg);
1557 st_data_t key = (st_data_t)v;
1558 st_delete(arg->partial_objects, &key, &data);
1559 v = r_post_proc(v, arg);
1565copy_ivar_i(st_data_t key, st_data_t val, st_data_t arg)
1567 VALUE obj = (VALUE)arg, value = (VALUE)val;
1570 if (!rb_ivar_defined(obj, vid))
1571 rb_ivar_set(obj, vid, value);
1576r_copy_ivar(VALUE v, VALUE data)
1578 rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1583r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1590 VALUE sym = r_symbol(arg);
1591 VALUE val = r_object(arg);
1592 int idx = sym2encidx(sym, val);
1594 if (rb_enc_capable(obj)) {
1595 rb_enc_associate_index(obj, idx);
1598 rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1600 if (has_encoding) *has_encoding = TRUE;
1602 else if (ruby2_keywords_flag_check(sym)) {
1603 if (RB_TYPE_P(obj, T_HASH)) {
1604 RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
1607 rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj);
1611 rb_ivar_set(obj, rb_intern_str(sym), val);
1613 } while (--len > 0);
1618path2class(VALUE path)
1620 VALUE v = rb_path_to_class(path);
1622 if (!RB_TYPE_P(v, T_CLASS)) {
1623 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1628#define path2module(path) must_be_module(rb_path_to_class(path), path)
1631must_be_module(VALUE v, VALUE path)
1633 if (!RB_TYPE_P(v, T_MODULE)) {
1634 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1640obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1643 rb_alloc_func_t allocator;
1645 allocator = rb_get_alloc_func(klass);
1646 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1647 marshal_compat_t *compat = (marshal_compat_t*)data;
1648 VALUE real_obj = rb_obj_alloc(klass);
1649 VALUE obj = rb_obj_alloc(compat->oldclass);
1650 if (oldclass) *oldclass = compat->oldclass;
1652 if (!arg->compat_tbl) {
1653 arg->compat_tbl = rb_init_identtable();
1655 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1659 return rb_obj_alloc(klass);
1663obj_alloc_by_path(VALUE path, struct load_arg *arg)
1665 return obj_alloc_by_klass(path2class(path), arg, 0);
1669append_extmod(VALUE obj, VALUE extmod)
1671 long i = RARRAY_LEN(extmod);
1673 VALUE m = RARRAY_AREF(extmod, --i);
1674 rb_extend_object(obj, m);
1679#define prohibit_ivar(type, str) do { \
1680 if (!ivp || !*ivp) break; \
1681 rb_raise(rb_eTypeError, \
1682 "can't override instance variable of "type" `%"PRIsVALUE"'", \
1687r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
1690 int type = r_byte(arg);
1697 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1698 rb_raise(rb_eArgError, "dump format error (unlinked)");
1701 if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
1702 v = r_post_proc(v, arg);
1710 v = r_object0(arg, true, &ivar, extmod);
1711 if (ivar) r_ivar(v, NULL, arg);
1712 v = r_leave(v, arg, partial);
1718 VALUE path = r_unique(arg);
1719 VALUE m = rb_path_to_class(path);
1720 if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0);
1722 if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1725 v = r_object0(arg, true, 0, Qnil);
1727 if (c != m || FL_TEST(c, FL_SINGLETON)) {
1728 rb_raise(rb_eArgError,
1729 "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1730 path, rb_class_name(c));
1732 c = rb_singleton_class(v);
1733 while (RARRAY_LEN(extmod) > 0) {
1734 m = rb_ary_pop(extmod);
1735 rb_prepend_module(c, m);
1739 must_be_module(m, path);
1740 rb_ary_push(extmod, m);
1742 v = r_object0(arg, true, 0, extmod);
1743 while (RARRAY_LEN(extmod) > 0) {
1744 m = rb_ary_pop(extmod);
1745 rb_extend_object(v, m);
1753 VALUE c = path2class(r_unique(arg));
1755 if (FL_TEST(c, FL_SINGLETON)) {
1756 rb_raise(rb_eTypeError, "singleton can't be loaded");
1758 v = r_object0(arg, partial, 0, extmod);
1759 if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1762 if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1763 VALUE tmp = rb_obj_alloc(c);
1765 if (TYPE(v) != TYPE(tmp)) goto format_error;
1767 RBASIC_SET_CLASS(v, c);
1772 rb_raise(rb_eArgError, "dump format error (user class)");
1776 v = r_leave(v, arg, false);
1781 v = r_leave(v, arg, false);
1786 v = r_leave(v, arg, false);
1791 long i = r_long(arg);
1794 v = r_leave(v, arg, false);
1800 VALUE str = r_bytes(arg);
1801 const char *ptr = RSTRING_PTR(str);
1803 if (strcmp(ptr, "nan") == 0) {
1806 else if (strcmp(ptr, "inf") == 0) {
1809 else if (strcmp(ptr, "-inf") == 0) {
1814 d = strtod(ptr, &e);
1815 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1818 v = r_entry(v, arg);
1819 v = r_leave(v, arg, false);
1831 data = r_bytes0(len * 2, arg);
1832 v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
1833 INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
1834 rb_str_resize(data, 0L);
1835 v = r_entry(v, arg);
1836 v = r_leave(v, arg, false);
1841 v = r_entry(r_string(arg), arg);
1842 v = r_leave(v, arg, partial);
1847 VALUE str = r_bytes(arg);
1848 int options = r_byte(arg);
1849 int has_encoding = FALSE;
1850 st_index_t idx = r_prepare(arg);
1853 r_ivar(str, &has_encoding, arg);
1856 if (!has_encoding) {
1857 /* 1.8 compatibility; remove escapes undefined in 1.8 */
1858 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
1859 long len = RSTRING_LEN(str);
1861 for (; len-- > 0; *dst++ = *src++) {
1863 case '\\': bs++; break;
1864 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1865 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
1866 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
1867 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
1868 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
1871 default: bs = 0; break;
1874 rb_str_set_len(str, dst - ptr);
1876 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
1877 v = r_leave(v, arg, partial);
1883 long len = r_long(arg);
1885 v = rb_ary_new2(len);
1886 v = r_entry(v, arg);
1887 arg->readable += len - 1;
1889 rb_ary_push(v, r_object(arg));
1892 v = r_leave(v, arg, partial);
1900 long len = r_long(arg);
1902 v = rb_hash_new_with_size(len);
1903 v = r_entry(v, arg);
1904 arg->readable += (len - 1) * 2;
1906 VALUE key = r_object(arg);
1907 VALUE value = r_object(arg);
1908 rb_hash_aset(v, key, value);
1912 if (type == TYPE_HASH_DEF) {
1913 RHASH_SET_IFNONE(v, r_object(arg));
1915 v = r_leave(v, arg, partial);
1924 st_index_t idx = r_prepare(arg);
1925 VALUE klass = path2class(r_unique(arg));
1926 long len = r_long(arg);
1928 v = rb_obj_alloc(klass);
1929 if (!RB_TYPE_P(v, T_STRUCT)) {
1930 rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
1932 mem = rb_struct_s_members(klass);
1933 if (RARRAY_LEN(mem) != len) {
1934 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
1935 rb_class_name(klass));
1938 arg->readable += (len - 1) * 2;
1939 v = r_entry0(v, idx, arg);
1940 values = rb_ary_new2(len);
1942 VALUE keywords = Qfalse;
1943 if (RTEST(rb_struct_s_keyword_init(klass))) {
1944 keywords = rb_hash_new();
1945 rb_ary_push(values, keywords);
1948 for (i=0; i<len; i++) {
1949 VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
1950 slot = r_symbol(arg);
1952 if (!rb_str_equal(n, slot)) {
1953 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
1954 rb_class_name(klass),
1958 rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg));
1961 rb_ary_push(values, r_object(arg));
1966 rb_struct_initialize(v, values);
1967 v = r_leave(v, arg, partial);
1974 VALUE name = r_unique(arg);
1975 VALUE klass = path2class(name);
1979 if (!rb_obj_respond_to(klass, s_load, TRUE)) {
1980 rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method `_load'",
1983 data = r_string(arg);
1985 r_ivar(data, NULL, arg);
1988 v = load_funcall(arg, klass, s_load, 1, &data);
1989 v = r_entry(v, arg);
1990 if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
1991 marshal_compat_t *compat = (marshal_compat_t*)d;
1992 v = compat->loader(klass, v);
1994 if (!partial) v = r_post_proc(v, arg);
1998 case TYPE_USRMARSHAL:
2000 VALUE name = r_unique(arg);
2001 VALUE klass = path2class(name);
2005 v = obj_alloc_by_klass(klass, arg, &oldclass);
2006 if (!NIL_P(extmod)) {
2007 /* for the case marshal_load is overridden */
2008 append_extmod(v, extmod);
2010 if (!rb_obj_respond_to(v, s_mload, TRUE)) {
2011 rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method `marshal_load'",
2014 v = r_entry(v, arg);
2015 data = r_object(arg);
2016 load_funcall(arg, v, s_mload, 1, &data);
2017 v = r_fixup_compat(v, arg);
2018 v = r_copy_ivar(v, data);
2019 v = r_post_proc(v, arg);
2020 if (!NIL_P(extmod)) {
2021 if (oldclass) append_extmod(v, extmod);
2022 rb_ary_clear(extmod);
2029 st_index_t idx = r_prepare(arg);
2030 v = obj_alloc_by_path(r_unique(arg), arg);
2031 if (!RB_TYPE_P(v, T_OBJECT)) {
2032 rb_raise(rb_eArgError, "dump format error");
2034 v = r_entry0(v, idx, arg);
2035 r_ivar(v, NULL, arg);
2036 v = r_leave(v, arg, partial);
2042 VALUE name = r_unique(arg);
2043 VALUE klass = path2class(name);
2047 v = obj_alloc_by_klass(klass, arg, &oldclass);
2048 if (!RB_TYPE_P(v, T_DATA)) {
2049 rb_raise(rb_eArgError, "dump format error");
2051 v = r_entry(v, arg);
2052 if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
2053 rb_raise(rb_eTypeError,
2054 "class %"PRIsVALUE" needs to have instance method `_load_data'",
2057 r = r_object0(arg, partial, 0, extmod);
2058 load_funcall(arg, v, s_load_data, 1, &r);
2059 v = r_leave(v, arg, partial);
2063 case TYPE_MODULE_OLD:
2065 VALUE str = r_bytes(arg);
2067 v = rb_path_to_class(str);
2068 prohibit_ivar("class/module", str);
2069 v = r_entry(v, arg);
2070 v = r_leave(v, arg, partial);
2076 VALUE str = r_bytes(arg);
2078 v = path2class(str);
2079 prohibit_ivar("class", str);
2080 v = r_entry(v, arg);
2081 v = r_leave(v, arg, partial);
2087 VALUE str = r_bytes(arg);
2089 v = path2module(str);
2090 prohibit_ivar("module", str);
2091 v = r_entry(v, arg);
2092 v = r_leave(v, arg, partial);
2098 v = r_symreal(arg, *ivp);
2102 v = r_symreal(arg, 0);
2104 v = rb_str_intern(v);
2105 v = r_leave(v, arg, partial);
2109 v = rb_str_intern(r_symlink(arg));
2113 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
2118 rb_raise(rb_eArgError, "dump format error (bad link)");
2125r_object(struct load_arg *arg)
2127 return r_object0(arg, false, 0, Qnil);
2131clear_load_arg(struct load_arg *arg)
2140 if (!arg->symbols) return;
2141 st_free_table(arg->symbols);
2143 st_free_table(arg->data);
2145 st_free_table(arg->partial_objects);
2146 arg->partial_objects = 0;
2147 if (arg->compat_tbl) {
2148 st_free_table(arg->compat_tbl);
2149 arg->compat_tbl = 0;
2155 * load( source [, proc] ) -> obj
2156 * restore( source [, proc] ) -> obj
2158 * Returns the result of converting the serialized data in source into a
2159 * Ruby object (possibly with associated subordinate objects). source
2160 * may be either an instance of IO or an object that responds to
2161 * to_str. If proc is specified, each object will be passed to the proc, as the object
2162 * is being deserialized.
2164 * Never pass untrusted data (including user supplied input) to this method.
2165 * Please see the overview for further details.
2168marshal_load(int argc, VALUE *argv, VALUE _)
2172 rb_check_arity(argc, 1, 2);
2174 proc = argc > 1 ? argv[1] : Qnil;
2175 return rb_marshal_load_with_proc(port, proc);
2179rb_marshal_load_with_proc(VALUE port, VALUE proc)
2183 VALUE wrapper; /* used to avoid memory leak in case of exception */
2184 struct load_arg *arg;
2186 v = rb_check_string_type(port);
2190 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2191 rb_check_funcall(port, s_binmode, 0, 0);
2196 wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2199 arg->symbols = st_init_numtable();
2200 arg->data = rb_init_identtable();
2201 arg->partial_objects = rb_init_identtable();
2202 arg->compat_tbl = 0;
2207 arg->buf = xmalloc(BUFSIZ);
2211 major = r_byte(arg);
2212 minor = r_byte(arg);
2213 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2214 clear_load_arg(arg);
2215 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2216\tformat version %d.%d required; %d.%d given",
2217 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2219 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2220 rb_warn("incompatible marshal file format (can be read)\n\
2221\tformat version %d.%d required; %d.%d given",
2222 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2225 if (!NIL_P(proc)) arg->proc = proc;
2227 clear_load_arg(arg);
2228 RB_GC_GUARD(wrapper);
2234 * The marshaling library converts collections of Ruby objects into a
2235 * byte stream, allowing them to be stored outside the currently
2236 * active script. This data may subsequently be read and the original
2237 * objects reconstituted.
2239 * Marshaled data has major and minor version numbers stored along
2240 * with the object information. In normal use, marshaling can only
2241 * load data written with the same major version number and an equal
2242 * or lower minor version number. If Ruby's ``verbose'' flag is set
2243 * (normally using -d, -v, -w, or --verbose) the major and minor
2244 * numbers must match exactly. Marshal versioning is independent of
2245 * Ruby's version numbers. You can extract the version by reading the
2246 * first two bytes of marshaled data.
2248 * str = Marshal.dump("thing")
2249 * RUBY_VERSION #=> "1.9.0"
2253 * Some objects cannot be dumped: if the objects to be dumped include
2254 * bindings, procedure or method objects, instances of class IO, or
2255 * singleton objects, a TypeError will be raised.
2257 * If your class has special serialization needs (for example, if you
2258 * want to serialize in some specific format), or if it contains
2259 * objects that would otherwise not be serializable, you can implement
2260 * your own serialization strategy.
2262 * There are two methods of doing this, your object can define either
2263 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2264 * precedence over _dump if both are defined. marshal_dump may result in
2265 * smaller Marshal strings.
2267 * == Security considerations
2269 * By design, Marshal.load can deserialize almost any class loaded into the
2270 * Ruby process. In many cases this can lead to remote code execution if the
2271 * Marshal data is loaded from an untrusted source.
2273 * As a result, Marshal.load is not suitable as a general purpose serialization
2274 * format and you should never unmarshal user supplied input or other untrusted
2277 * If you need to deserialize untrusted data, use JSON or another serialization
2278 * format that is only able to load simple, 'primitive' types such as String,
2279 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2282 * == marshal_dump and marshal_load
2284 * When dumping an object the method marshal_dump will be called.
2285 * marshal_dump must return a result containing the information necessary for
2286 * marshal_load to reconstitute the object. The result can be any object.
2288 * When loading an object dumped using marshal_dump the object is first
2289 * allocated then marshal_load is called with the result from marshal_dump.
2290 * marshal_load must recreate the object from the information in the result.
2295 * def initialize name, version, data
2297 * @version = version
2305 * def marshal_load array
2306 * @name, @version = array
2310 * == _dump and _load
2312 * Use _dump and _load when you need to allocate the object you're restoring
2315 * When dumping an object the instance method _dump is called with an Integer
2316 * which indicates the maximum depth of objects to dump (a value of -1 implies
2317 * that you should disable depth checking). _dump must return a String
2318 * containing the information necessary to reconstitute the object.
2320 * The class method _load should take a String and use it to return an object
2321 * of the same class.
2326 * def initialize name, version, data
2328 * @version = version
2333 * [@name, @version].join ':'
2336 * def self._load args
2337 * new(*args.split(':'))
2341 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2342 * string which is Marshal.loaded in _load for complex objects.
2347 VALUE rb_mMarshal = rb_define_module("Marshal");
2348#define set_id(sym) sym = rb_intern_const(name_##sym)
2353 set_id(s_dump_data);
2354 set_id(s_load_data);
2361 set_id(s_encoding_short);
2362 set_id(s_ruby2_keywords_flag);
2364 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2365 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
2366 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
2369 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2371 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2375compat_allocator_table(void)
2377 if (compat_allocator_tbl) return compat_allocator_tbl;
2378 compat_allocator_tbl = st_init_numtable();
2379#undef RUBY_UNTYPED_DATA_WARNING
2380#define RUBY_UNTYPED_DATA_WARNING 0
2381 compat_allocator_tbl_wrapper =
2382 Data_Wrap_Struct(0, mark_marshal_compat_t, 0, compat_allocator_tbl);
2383 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
2384 return compat_allocator_tbl;
2388rb_marshal_dump(VALUE obj, VALUE port)
2390 return rb_marshal_dump_limited(obj, port, -1);
2394rb_marshal_load(VALUE port)
2396 return rb_marshal_load_with_proc(port, Qnil);
Thin wrapper to ruby/config.h.
Internal header for Array.
Internal header for Bignums.
Internal header for Class.
Internal header for Encoding.
Internal header for Hash.
Internal header for Object.
Internal header for Struct.
Internal header corresponding util.c.
Internal header for RubyVM.
Internal header for Math.