Ruby 3.0.5p211 (2022-11-24 revision ba5cf0f7c52d4d35cc6a173c89eda98ceffa2dcf)
marshal.c
Go to the documentation of this file.
1/**********************************************************************
2
3 marshal.c -
4
5 $Author$
6 created at: Thu Apr 27 16:30:01 JST 1995
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
13
14#include <math.h>
15#ifdef HAVE_FLOAT_H
16#include <float.h>
17#endif
18#ifdef HAVE_IEEEFP_H
19#include <ieeefp.h>
20#endif
21
22#include "encindex.h"
23#include "id_table.h"
24#include "internal.h"
25#include "internal/array.h"
26#include "internal/bignum.h"
27#include "internal/class.h"
28#include "internal/encoding.h"
29#include "internal/error.h"
30#include "internal/hash.h"
31#include "internal/object.h"
32#include "internal/struct.h"
33#include "internal/util.h"
34#include "internal/vm.h"
35#include "ruby/io.h"
36#include "ruby/ruby.h"
37#include "ruby/st.h"
38#include "ruby/util.h"
39
40#define BITSPERSHORT (2*CHAR_BIT)
41#define SHORTMASK ((1<<BITSPERSHORT)-1)
42#define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
43
44#if SIZEOF_SHORT == SIZEOF_BDIGIT
45#define SHORTLEN(x) (x)
46#else
47static size_t
48shortlen(size_t len, BDIGIT *ds)
49{
50 BDIGIT num;
51 int offset = 0;
52
53 num = ds[len-1];
54 while (num) {
55 num = SHORTDN(num);
56 offset++;
57 }
58 return (len - 1)*SIZEOF_BDIGIT/2 + offset;
59}
60#define SHORTLEN(x) shortlen((x),d)
61#endif
62
63#define MARSHAL_MAJOR 4
64#define MARSHAL_MINOR 8
65
66#define TYPE_NIL '0'
67#define TYPE_TRUE 'T'
68#define TYPE_FALSE 'F'
69#define TYPE_FIXNUM 'i'
70
71#define TYPE_EXTENDED 'e'
72#define TYPE_UCLASS 'C'
73#define TYPE_OBJECT 'o'
74#define TYPE_DATA 'd'
75#define TYPE_USERDEF 'u'
76#define TYPE_USRMARSHAL 'U'
77#define TYPE_FLOAT 'f'
78#define TYPE_BIGNUM 'l'
79#define TYPE_STRING '"'
80#define TYPE_REGEXP '/'
81#define TYPE_ARRAY '['
82#define TYPE_HASH '{'
83#define TYPE_HASH_DEF '}'
84#define TYPE_STRUCT 'S'
85#define TYPE_MODULE_OLD 'M'
86#define TYPE_CLASS 'c'
87#define TYPE_MODULE 'm'
88
89#define TYPE_SYMBOL ':'
90#define TYPE_SYMLINK ';'
91
92#define TYPE_IVAR 'I'
93#define TYPE_LINK '@'
94
95static ID s_dump, s_load, s_mdump, s_mload;
96static ID s_dump_data, s_load_data, s_alloc, s_call;
97static ID s_getbyte, s_read, s_write, s_binmode;
98static ID s_encoding_short, s_ruby2_keywords_flag;
99
100#define name_s_dump "_dump"
101#define name_s_load "_load"
102#define name_s_mdump "marshal_dump"
103#define name_s_mload "marshal_load"
104#define name_s_dump_data "_dump_data"
105#define name_s_load_data "_load_data"
106#define name_s_alloc "_alloc"
107#define name_s_call "call"
108#define name_s_getbyte "getbyte"
109#define name_s_read "read"
110#define name_s_write "write"
111#define name_s_binmode "binmode"
112#define name_s_encoding_short "E"
113#define name_s_ruby2_keywords_flag "K"
114
115typedef struct {
116 VALUE newclass;
117 VALUE oldclass;
118 VALUE (*dumper)(VALUE);
119 VALUE (*loader)(VALUE, VALUE);
120} marshal_compat_t;
121
122static st_table *compat_allocator_tbl;
123static VALUE compat_allocator_tbl_wrapper;
124static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
125static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc);
126
127static int
128mark_marshal_compat_i(st_data_t key, st_data_t value, st_data_t _)
129{
130 marshal_compat_t *p = (marshal_compat_t *)value;
131 rb_gc_mark(p->newclass);
132 rb_gc_mark(p->oldclass);
133 return ST_CONTINUE;
134}
135
136static void
137mark_marshal_compat_t(void *tbl)
138{
139 if (!tbl) return;
140 st_foreach(tbl, mark_marshal_compat_i, 0);
141}
142
143static st_table *compat_allocator_table(void);
144
145void
146rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
147{
148 marshal_compat_t *compat;
149 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
150
151 if (!allocator) {
152 rb_raise(rb_eTypeError, "no allocator");
153 }
154
155 compat = ALLOC(marshal_compat_t);
156 compat->newclass = Qnil;
157 compat->oldclass = Qnil;
158 compat->newclass = newclass;
159 compat->oldclass = oldclass;
160 compat->dumper = dumper;
161 compat->loader = loader;
162
163 st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
164}
165
166struct dump_arg {
167 VALUE str, dest;
168 st_table *symbols;
169 st_table *data;
170 st_table *compat_tbl;
171 st_table *encodings;
172};
173
174struct dump_call_arg {
175 VALUE obj;
176 struct dump_arg *arg;
177 int limit;
178};
179
180static VALUE
181check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
182{
183 if (!arg->symbols) {
184 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
185 name);
186 }
187 return ret;
188}
189
190static VALUE
191check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
192 struct dump_arg *arg, const char *name)
193{
194 VALUE ret = rb_funcallv(obj, sym, argc, argv);
195 VALUE klass = CLASS_OF(obj);
196 if (CLASS_OF(ret) == klass) {
197 rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
198 klass, name);
199 }
200 return check_dump_arg(ret, arg, name);
201}
202
203#define dump_funcall(arg, obj, sym, argc, argv) \
204 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
205#define dump_check_funcall(arg, obj, sym, argc, argv) \
206 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
207
208static void clear_dump_arg(struct dump_arg *arg);
209
210static void
211mark_dump_arg(void *ptr)
212{
213 struct dump_arg *p = ptr;
214 if (!p->symbols)
215 return;
216 rb_mark_set(p->symbols);
217 rb_mark_set(p->data);
218 rb_mark_hash(p->compat_tbl);
219 rb_gc_mark(p->str);
220}
221
222static void
223free_dump_arg(void *ptr)
224{
225 clear_dump_arg(ptr);
226 xfree(ptr);
227}
228
229static size_t
230memsize_dump_arg(const void *ptr)
231{
232 return sizeof(struct dump_arg);
233}
234
235static const rb_data_type_t dump_arg_data = {
236 "dump_arg",
237 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
238 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
239};
240
241static VALUE
242must_not_be_anonymous(const char *type, VALUE path)
243{
244 char *n = RSTRING_PTR(path);
245
246 if (!rb_enc_asciicompat(rb_enc_get(path))) {
247 /* cannot occur? */
248 rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
249 type, path);
250 }
251 if (n[0] == '#') {
252 rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
253 type, path);
254 }
255 return path;
256}
257
258static VALUE
259class2path(VALUE klass)
260{
261 VALUE path = rb_class_path(klass);
262
263 must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
264 if (rb_path_to_class(path) != rb_class_real(klass)) {
265 rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
266 }
267 return path;
268}
269
270int ruby_marshal_write_long(long x, char *buf);
271static void w_long(long, struct dump_arg*);
272static int w_encoding(VALUE encname, struct dump_call_arg *arg);
273static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
274
275static void
276w_nbyte(const char *s, long n, struct dump_arg *arg)
277{
278 VALUE buf = arg->str;
279 rb_str_buf_cat(buf, s, n);
280 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
281 rb_io_write(arg->dest, buf);
282 rb_str_resize(buf, 0);
283 }
284}
285
286static void
287w_byte(char c, struct dump_arg *arg)
288{
289 w_nbyte(&c, 1, arg);
290}
291
292static void
293w_bytes(const char *s, long n, struct dump_arg *arg)
294{
295 w_long(n, arg);
296 w_nbyte(s, n, arg);
297}
298
299#define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
300
301static void
302w_short(int x, struct dump_arg *arg)
303{
304 w_byte((char)((x >> 0) & 0xff), arg);
305 w_byte((char)((x >> 8) & 0xff), arg);
306}
307
308static void
309w_long(long x, struct dump_arg *arg)
310{
311 char buf[sizeof(long)+1];
312 int i = ruby_marshal_write_long(x, buf);
313 if (i < 0) {
314 rb_raise(rb_eTypeError, "long too big to dump");
315 }
316 w_nbyte(buf, i, arg);
317}
318
319int
320ruby_marshal_write_long(long x, char *buf)
321{
322 int i;
323
324#if SIZEOF_LONG > 4
325 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
326 /* big long does not fit in 4 bytes */
327 return -1;
328 }
329#endif
330
331 if (x == 0) {
332 buf[0] = 0;
333 return 1;
334 }
335 if (0 < x && x < 123) {
336 buf[0] = (char)(x + 5);
337 return 1;
338 }
339 if (-124 < x && x < 0) {
340 buf[0] = (char)((x - 5)&0xff);
341 return 1;
342 }
343 for (i=1;i<(int)sizeof(long)+1;i++) {
344 buf[i] = (char)(x & 0xff);
345 x = RSHIFT(x,8);
346 if (x == 0) {
347 buf[0] = i;
348 break;
349 }
350 if (x == -1) {
351 buf[0] = -i;
352 break;
353 }
354 }
355 return i+1;
356}
357
358#ifdef DBL_MANT_DIG
359#define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
360
361#if DBL_MANT_DIG > 32
362#define MANT_BITS 32
363#elif DBL_MANT_DIG > 24
364#define MANT_BITS 24
365#elif DBL_MANT_DIG > 16
366#define MANT_BITS 16
367#else
368#define MANT_BITS 8
369#endif
370
371static double
372load_mantissa(double d, const char *buf, long len)
373{
374 if (!len) return d;
375 if (--len > 0 && !*buf++) { /* binary mantissa mark */
376 int e, s = d < 0, dig = 0;
377 unsigned long m;
378
379 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
380 do {
381 m = 0;
382 switch (len) {
383 default: m = *buf++ & 0xff; /* fall through */
384#if MANT_BITS > 24
385 case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */
386#endif
387#if MANT_BITS > 16
388 case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */
389#endif
390#if MANT_BITS > 8
391 case 1: m = (m << 8) | (*buf++ & 0xff);
392#endif
393 }
394 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
395 d += ldexp((double)m, dig);
396 } while ((len -= MANT_BITS / 8) > 0);
397 d = ldexp(d, e - DECIMAL_MANT);
398 if (s) d = -d;
399 }
400 return d;
401}
402#else
403#define load_mantissa(d, buf, len) (d)
404#endif
405
406#ifdef DBL_DIG
407#define FLOAT_DIG (DBL_DIG+2)
408#else
409#define FLOAT_DIG 17
410#endif
411
412static void
413w_float(double d, struct dump_arg *arg)
414{
415 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
416
417 if (isinf(d)) {
418 if (d < 0) w_cstr("-inf", arg);
419 else w_cstr("inf", arg);
420 }
421 else if (isnan(d)) {
422 w_cstr("nan", arg);
423 }
424 else if (d == 0.0) {
425 if (signbit(d)) w_cstr("-0", arg);
426 else w_cstr("0", arg);
427 }
428 else {
429 int decpt, sign, digs, len = 0;
430 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
431 if (sign) buf[len++] = '-';
432 digs = (int)(e - p);
433 if (decpt < -3 || decpt > digs) {
434 buf[len++] = p[0];
435 if (--digs > 0) buf[len++] = '.';
436 memcpy(buf + len, p + 1, digs);
437 len += digs;
438 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
439 }
440 else if (decpt > 0) {
441 memcpy(buf + len, p, decpt);
442 len += decpt;
443 if ((digs -= decpt) > 0) {
444 buf[len++] = '.';
445 memcpy(buf + len, p + decpt, digs);
446 len += digs;
447 }
448 }
449 else {
450 buf[len++] = '0';
451 buf[len++] = '.';
452 if (decpt) {
453 memset(buf + len, '0', -decpt);
454 len -= decpt;
455 }
456 memcpy(buf + len, p, digs);
457 len += digs;
458 }
459 xfree(p);
460 w_bytes(buf, len, arg);
461 }
462}
463
464static void
465w_symbol(VALUE sym, struct dump_arg *arg)
466{
467 st_data_t num;
468 VALUE encname;
469
470 if (st_lookup(arg->symbols, sym, &num)) {
471 w_byte(TYPE_SYMLINK, arg);
472 w_long((long)num, arg);
473 }
474 else {
475 const VALUE orig_sym = sym;
476 sym = rb_sym2str(sym);
477 if (!sym) {
478 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
479 }
480 encname = encoding_name(sym, arg);
481 if (NIL_P(encname) ||
482 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
483 encname = Qnil;
484 }
485 else {
486 w_byte(TYPE_IVAR, arg);
487 }
488 w_byte(TYPE_SYMBOL, arg);
489 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
490 st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
491 if (!NIL_P(encname)) {
492 struct dump_call_arg c_arg;
493 c_arg.limit = 1;
494 c_arg.arg = arg;
495 w_long(1L, arg);
496 w_encoding(encname, &c_arg);
497 }
498 }
499}
500
501static void
502w_unique(VALUE s, struct dump_arg *arg)
503{
504 must_not_be_anonymous("class", s);
505 w_symbol(rb_str_intern(s), arg);
506}
507
508static void w_object(VALUE,struct dump_arg*,int);
509
510static int
511hash_each(VALUE key, VALUE value, VALUE v)
512{
513 struct dump_call_arg *arg = (void *)v;
514 w_object(key, arg->arg, arg->limit);
515 w_object(value, arg->arg, arg->limit);
516 return ST_CONTINUE;
517}
518
519#define SINGLETON_DUMP_UNABLE_P(klass) \
520 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
521 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
522
523static void
524w_extended(VALUE klass, struct dump_arg *arg, int check)
525{
526 if (check && FL_TEST(klass, FL_SINGLETON)) {
527 VALUE origin = RCLASS_ORIGIN(klass);
528 if (SINGLETON_DUMP_UNABLE_P(klass) ||
529 (origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
530 rb_raise(rb_eTypeError, "singleton can't be dumped");
531 }
532 klass = RCLASS_SUPER(klass);
533 }
534 while (BUILTIN_TYPE(klass) == T_ICLASS) {
535 if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
536 BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
537 VALUE path = rb_class_name(RBASIC(klass)->klass);
538 w_byte(TYPE_EXTENDED, arg);
539 w_unique(path, arg);
540 }
541 klass = RCLASS_SUPER(klass);
542 }
543}
544
545static void
546w_class(char type, VALUE obj, struct dump_arg *arg, int check)
547{
548 VALUE path;
549 st_data_t real_obj;
550 VALUE klass;
551
552 if (arg->compat_tbl &&
553 st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
554 obj = (VALUE)real_obj;
555 }
556 klass = CLASS_OF(obj);
557 w_extended(klass, arg, check);
558 w_byte(type, arg);
559 path = class2path(rb_class_real(klass));
560 w_unique(path, arg);
561}
562
563static void
564w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
565{
566 VALUE klass = CLASS_OF(obj);
567
568 w_extended(klass, arg, TRUE);
569 klass = rb_class_real(klass);
570 if (klass != super) {
571 w_byte(TYPE_UCLASS, arg);
572 w_unique(class2path(klass), arg);
573 }
574}
575
576#define to_be_skipped_id(id) (id == rb_id_encoding() || id == s_encoding_short || id == s_ruby2_keywords_flag || !rb_id2str(id))
577
578struct w_ivar_arg {
579 struct dump_call_arg *dump;
580 st_data_t num_ivar;
581};
582
583static int
584w_obj_each(st_data_t key, st_data_t val, st_data_t a)
585{
586 ID id = (ID)key;
587 VALUE value = (VALUE)val;
588 struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a;
589 struct dump_call_arg *arg = ivarg->dump;
590
591 if (to_be_skipped_id(id)) {
592 if (id == s_encoding_short) {
593 rb_warn("instance variable `"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped",
594 CLASS_OF(arg->obj));
595 }
596 if (id == s_ruby2_keywords_flag) {
597 rb_warn("instance variable `"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped",
598 CLASS_OF(arg->obj));
599 }
600 return ST_CONTINUE;
601 }
602 if (!ivarg->num_ivar) {
603 rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
604 CLASS_OF(arg->obj));
605 }
606 --ivarg->num_ivar;
607 w_symbol(ID2SYM(id), arg->arg);
608 w_object(value, arg->arg, arg->limit);
609 return ST_CONTINUE;
610}
611
612static int
613obj_count_ivars(st_data_t key, st_data_t val, st_data_t a)
614{
615 ID id = (ID)key;
616 if (!to_be_skipped_id(id)) ++*(st_index_t *)a;
617 return ST_CONTINUE;
618}
619
620static VALUE
621encoding_name(VALUE obj, struct dump_arg *arg)
622{
623 if (rb_enc_capable(obj)) {
624 int encidx = rb_enc_get_index(obj);
625 rb_encoding *enc = 0;
626 st_data_t name;
627
628 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
629 return Qnil;
630 }
631
632 /* special treatment for US-ASCII and UTF-8 */
633 if (encidx == rb_usascii_encindex()) {
634 return Qfalse;
635 }
636 else if (encidx == rb_utf8_encindex()) {
637 return Qtrue;
638 }
639
640 if (arg->encodings ?
641 !st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
642 (arg->encodings = st_init_strcasetable(), 1)) {
643 name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
644 st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
645 }
646 return (VALUE)name;
647 }
648 else {
649 return Qnil;
650 }
651}
652
653static int
654w_encoding(VALUE encname, struct dump_call_arg *arg)
655{
656 int limit = arg->limit;
657 if (limit >= 0) ++limit;
658 switch (encname) {
659 case Qfalse:
660 case Qtrue:
661 w_symbol(ID2SYM(s_encoding_short), arg->arg);
662 w_object(encname, arg->arg, limit);
663 return 1;
664 case Qnil:
665 return 0;
666 }
667 w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
668 w_object(encname, arg->arg, limit);
669 return 1;
670}
671
672static st_index_t
673has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
674{
675 st_index_t enc = !NIL_P(encname);
676 st_index_t num = 0;
677 st_index_t ruby2_keywords_flag = 0;
678
679 if (SPECIAL_CONST_P(obj)) goto generic;
680 switch (BUILTIN_TYPE(obj)) {
681 case T_OBJECT:
682 case T_CLASS:
683 case T_MODULE:
684 break; /* counted elsewhere */
685 case T_HASH:
686 ruby2_keywords_flag = RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS ? 1 : 0;
687 /* fall through */
688 default:
689 generic:
690 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
691 if (ruby2_keywords_flag || num) *ivobj = obj;
692 }
693
694 return num + enc + ruby2_keywords_flag;
695}
696
697static void
698w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
699{
700 struct w_ivar_arg ivarg = {arg, num};
701 if (!num) return;
702 rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
703 if (ivarg.num_ivar) {
704 rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
705 CLASS_OF(arg->obj));
706 }
707}
708
709static void
710w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
711{
712 w_long(num, arg->arg);
713 num -= w_encoding(encname, arg);
714 if (RB_TYPE_P(ivobj, T_HASH) && (RHASH(ivobj)->basic.flags & RHASH_PASS_AS_KEYWORDS)) {
715 int limit = arg->limit;
716 if (limit >= 0) ++limit;
717 w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg);
718 w_object(Qtrue, arg->arg, limit);
719 num--;
720 }
721 if (ivobj != Qundef && num) {
722 w_ivar_each(ivobj, num, arg);
723 }
724}
725
726static void
727w_objivar(VALUE obj, struct dump_call_arg *arg)
728{
729 st_data_t num = 0;
730
731 rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
732 w_long(num, arg->arg);
733 w_ivar_each(obj, num, arg);
734}
735
736static void
737w_object(VALUE obj, struct dump_arg *arg, int limit)
738{
739 struct dump_call_arg c_arg;
740 VALUE ivobj = Qundef;
741 st_data_t num;
742 st_index_t hasiv = 0;
743 VALUE encname = Qnil;
744
745 if (limit == 0) {
746 rb_raise(rb_eArgError, "exceed depth limit");
747 }
748
749 if (limit > 0) limit--;
750 c_arg.limit = limit;
751 c_arg.arg = arg;
752 c_arg.obj = obj;
753
754 if (st_lookup(arg->data, obj, &num)) {
755 w_byte(TYPE_LINK, arg);
756 w_long((long)num, arg);
757 return;
758 }
759
760 if (obj == Qnil) {
761 w_byte(TYPE_NIL, arg);
762 }
763 else if (obj == Qtrue) {
764 w_byte(TYPE_TRUE, arg);
765 }
766 else if (obj == Qfalse) {
767 w_byte(TYPE_FALSE, arg);
768 }
769 else if (FIXNUM_P(obj)) {
770#if SIZEOF_LONG <= 4
771 w_byte(TYPE_FIXNUM, arg);
772 w_long(FIX2INT(obj), arg);
773#else
774 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
775 w_byte(TYPE_FIXNUM, arg);
776 w_long(FIX2LONG(obj), arg);
777 }
778 else {
779 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
780 }
781#endif
782 }
783 else if (SYMBOL_P(obj)) {
784 w_symbol(obj, arg);
785 }
786 else if (FLONUM_P(obj)) {
787 st_add_direct(arg->data, obj, arg->data->num_entries);
788 w_byte(TYPE_FLOAT, arg);
789 w_float(RFLOAT_VALUE(obj), arg);
790 }
791 else {
792 VALUE v;
793
794 if (!RBASIC_CLASS(obj)) {
795 rb_raise(rb_eTypeError, "can't dump internal %s",
796 rb_builtin_type_name(BUILTIN_TYPE(obj)));
797 }
798
799 if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
800 st_add_direct(arg->data, obj, arg->data->num_entries);
801
802 v = dump_funcall(arg, obj, s_mdump, 0, 0);
803 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
804 w_object(v, arg, limit);
805 return;
806 }
807 if (rb_obj_respond_to(obj, s_dump, TRUE)) {
808 VALUE ivobj2 = Qundef;
809 st_index_t hasiv2;
810 VALUE encname2;
811
812 v = INT2NUM(limit);
813 v = dump_funcall(arg, obj, s_dump, 1, &v);
814 if (!RB_TYPE_P(v, T_STRING)) {
815 rb_raise(rb_eTypeError, "_dump() must return string");
816 }
817 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
818 hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
819 if (hasiv2) {
820 hasiv = hasiv2;
821 ivobj = ivobj2;
822 encname = encname2;
823 }
824 if (hasiv) w_byte(TYPE_IVAR, arg);
825 w_class(TYPE_USERDEF, obj, arg, FALSE);
826 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
827 if (hasiv) {
828 w_ivar(hasiv, ivobj, encname, &c_arg);
829 }
830 st_add_direct(arg->data, obj, arg->data->num_entries);
831 return;
832 }
833
834 st_add_direct(arg->data, obj, arg->data->num_entries);
835
836 hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
837 {
838 st_data_t compat_data;
839 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
840 if (st_lookup(compat_allocator_tbl,
841 (st_data_t)allocator,
842 &compat_data)) {
843 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
844 VALUE real_obj = obj;
845 obj = compat->dumper(real_obj);
846 if (!arg->compat_tbl) {
847 arg->compat_tbl = rb_init_identtable();
848 }
849 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
850 if (obj != real_obj && ivobj == Qundef) hasiv = 0;
851 }
852 }
853 if (hasiv) w_byte(TYPE_IVAR, arg);
854
855 switch (BUILTIN_TYPE(obj)) {
856 case T_CLASS:
857 if (FL_TEST(obj, FL_SINGLETON)) {
858 rb_raise(rb_eTypeError, "singleton class can't be dumped");
859 }
860 w_byte(TYPE_CLASS, arg);
861 {
862 VALUE path = class2path(obj);
863 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
864 RB_GC_GUARD(path);
865 }
866 break;
867
868 case T_MODULE:
869 w_byte(TYPE_MODULE, arg);
870 {
871 VALUE path = class2path(obj);
872 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
873 RB_GC_GUARD(path);
874 }
875 break;
876
877 case T_FLOAT:
878 w_byte(TYPE_FLOAT, arg);
879 w_float(RFLOAT_VALUE(obj), arg);
880 break;
881
882 case T_BIGNUM:
883 w_byte(TYPE_BIGNUM, arg);
884 {
885 char sign = BIGNUM_SIGN(obj) ? '+' : '-';
886 size_t len = BIGNUM_LEN(obj);
887 size_t slen;
888 size_t j;
889 BDIGIT *d = BIGNUM_DIGITS(obj);
890
891 slen = SHORTLEN(len);
892 if (LONG_MAX < slen) {
893 rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
894 }
895
896 w_byte(sign, arg);
897 w_long((long)slen, arg);
898 for (j = 0; j < len; j++) {
899#if SIZEOF_BDIGIT > SIZEOF_SHORT
900 BDIGIT num = *d;
901 int i;
902
903 for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
904 w_short(num & SHORTMASK, arg);
905 num = SHORTDN(num);
906 if (j == len - 1 && num == 0) break;
907 }
908#else
909 w_short(*d, arg);
910#endif
911 d++;
912 }
913 }
914 break;
915
916 case T_STRING:
917 w_uclass(obj, rb_cString, arg);
918 w_byte(TYPE_STRING, arg);
919 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
920 break;
921
922 case T_REGEXP:
923 w_uclass(obj, rb_cRegexp, arg);
924 w_byte(TYPE_REGEXP, arg);
925 {
926 int opts = rb_reg_options(obj);
927 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
928 w_byte((char)opts, arg);
929 }
930 break;
931
932 case T_ARRAY:
933 w_uclass(obj, rb_cArray, arg);
934 w_byte(TYPE_ARRAY, arg);
935 {
936 long i, len = RARRAY_LEN(obj);
937
938 w_long(len, arg);
939 for (i=0; i<RARRAY_LEN(obj); i++) {
940 w_object(RARRAY_AREF(obj, i), arg, limit);
941 if (len != RARRAY_LEN(obj)) {
942 rb_raise(rb_eRuntimeError, "array modified during dump");
943 }
944 }
945 }
946 break;
947
948 case T_HASH:
949 w_uclass(obj, rb_cHash, arg);
950 if (NIL_P(RHASH_IFNONE(obj))) {
951 w_byte(TYPE_HASH, arg);
952 }
953 else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
954 rb_raise(rb_eTypeError, "can't dump hash with default proc");
955 }
956 else {
957 w_byte(TYPE_HASH_DEF, arg);
958 }
959 w_long(rb_hash_size_num(obj), arg);
960 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
961 if (!NIL_P(RHASH_IFNONE(obj))) {
962 w_object(RHASH_IFNONE(obj), arg, limit);
963 }
964 break;
965
966 case T_STRUCT:
967 w_class(TYPE_STRUCT, obj, arg, TRUE);
968 {
969 long len = RSTRUCT_LEN(obj);
970 VALUE mem;
971 long i;
972
973 w_long(len, arg);
974 mem = rb_struct_members(obj);
975 for (i=0; i<len; i++) {
976 w_symbol(RARRAY_AREF(mem, i), arg);
977 w_object(RSTRUCT_GET(obj, i), arg, limit);
978 }
979 }
980 break;
981
982 case T_OBJECT:
983 w_class(TYPE_OBJECT, obj, arg, TRUE);
984 w_objivar(obj, &c_arg);
985 break;
986
987 case T_DATA:
988 {
989 VALUE v;
990
991 if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
992 rb_raise(rb_eTypeError,
993 "no _dump_data is defined for class %"PRIsVALUE,
994 rb_obj_class(obj));
995 }
996 v = dump_funcall(arg, obj, s_dump_data, 0, 0);
997 w_class(TYPE_DATA, obj, arg, TRUE);
998 w_object(v, arg, limit);
999 }
1000 break;
1001
1002 default:
1003 rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
1004 rb_obj_class(obj));
1005 break;
1006 }
1007 RB_GC_GUARD(obj);
1008 }
1009 if (hasiv) {
1010 w_ivar(hasiv, ivobj, encname, &c_arg);
1011 }
1012}
1013
1014static void
1015clear_dump_arg(struct dump_arg *arg)
1016{
1017 if (!arg->symbols) return;
1018 st_free_table(arg->symbols);
1019 arg->symbols = 0;
1020 st_free_table(arg->data);
1021 arg->data = 0;
1022 if (arg->compat_tbl) {
1023 st_free_table(arg->compat_tbl);
1024 arg->compat_tbl = 0;
1025 }
1026 if (arg->encodings) {
1027 st_free_table(arg->encodings);
1028 arg->encodings = 0;
1029 }
1030}
1031
1032NORETURN(static inline void io_needed(void));
1033static inline void
1034io_needed(void)
1035{
1036 rb_raise(rb_eTypeError, "instance of IO needed");
1037}
1038
1039/*
1040 * call-seq:
1041 * dump( obj [, anIO] , limit=-1 ) -> anIO
1042 *
1043 * Serializes obj and all descendant objects. If anIO is
1044 * specified, the serialized data will be written to it, otherwise the
1045 * data will be returned as a String. If limit is specified, the
1046 * traversal of subobjects will be limited to that depth. If limit is
1047 * negative, no checking of depth will be performed.
1048 *
1049 * class Klass
1050 * def initialize(str)
1051 * @str = str
1052 * end
1053 * def say_hello
1054 * @str
1055 * end
1056 * end
1057 *
1058 * (produces no output)
1059 *
1060 * o = Klass.new("hello\n")
1061 * data = Marshal.dump(o)
1062 * obj = Marshal.load(data)
1063 * obj.say_hello #=> "hello\n"
1064 *
1065 * Marshal can't dump following objects:
1066 * * anonymous Class/Module.
1067 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1068 * and so on)
1069 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1070 * ThreadGroup, Continuation
1071 * * objects which define singleton methods
1072 */
1073static VALUE
1074marshal_dump(int argc, VALUE *argv, VALUE _)
1075{
1076 VALUE obj, port, a1, a2;
1077 int limit = -1;
1078
1079 port = Qnil;
1080 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1081 if (argc == 3) {
1082 if (!NIL_P(a2)) limit = NUM2INT(a2);
1083 if (NIL_P(a1)) io_needed();
1084 port = a1;
1085 }
1086 else if (argc == 2) {
1087 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1088 else if (NIL_P(a1)) io_needed();
1089 else port = a1;
1090 }
1091 return rb_marshal_dump_limited(obj, port, limit);
1092}
1093
1094VALUE
1095rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1096{
1097 struct dump_arg *arg;
1098 VALUE wrapper; /* used to avoid memory leak in case of exception */
1099
1100 wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1101 arg->dest = 0;
1102 arg->symbols = st_init_numtable();
1103 arg->data = rb_init_identtable();
1104 arg->compat_tbl = 0;
1105 arg->encodings = 0;
1106 arg->str = rb_str_buf_new(0);
1107 if (!NIL_P(port)) {
1108 if (!rb_respond_to(port, s_write)) {
1109 io_needed();
1110 }
1111 arg->dest = port;
1112 dump_check_funcall(arg, port, s_binmode, 0, 0);
1113 }
1114 else {
1115 port = arg->str;
1116 }
1117
1118 w_byte(MARSHAL_MAJOR, arg);
1119 w_byte(MARSHAL_MINOR, arg);
1120
1121 w_object(obj, arg, limit);
1122 if (arg->dest) {
1123 rb_io_write(arg->dest, arg->str);
1124 rb_str_resize(arg->str, 0);
1125 }
1126 clear_dump_arg(arg);
1127 RB_GC_GUARD(wrapper);
1128
1129 return port;
1130}
1131
1132struct load_arg {
1133 VALUE src;
1134 char *buf;
1135 long buflen;
1136 long readable;
1137 long offset;
1138 st_table *symbols;
1139 st_table *data;
1140 st_table *partial_objects;
1141 VALUE proc;
1142 st_table *compat_tbl;
1143};
1144
1145static VALUE
1146check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1147{
1148 if (!arg->symbols) {
1149 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1150 name);
1151 }
1152 return ret;
1153}
1154#define load_funcall(arg, obj, sym, argc, argv) \
1155 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1156
1157static void clear_load_arg(struct load_arg *arg);
1158
1159static void
1160mark_load_arg(void *ptr)
1161{
1162 struct load_arg *p = ptr;
1163 if (!p->symbols)
1164 return;
1165 rb_mark_tbl(p->symbols);
1166 rb_mark_tbl(p->data);
1167 rb_mark_tbl(p->partial_objects);
1168 rb_mark_hash(p->compat_tbl);
1169}
1170
1171static void
1172free_load_arg(void *ptr)
1173{
1174 clear_load_arg(ptr);
1175 xfree(ptr);
1176}
1177
1178static size_t
1179memsize_load_arg(const void *ptr)
1180{
1181 return sizeof(struct load_arg);
1182}
1183
1184static const rb_data_type_t load_arg_data = {
1185 "load_arg",
1186 {mark_load_arg, free_load_arg, memsize_load_arg,},
1187 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
1188};
1189
1190#define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1191static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
1192static VALUE r_object(struct load_arg *arg);
1193static VALUE r_symbol(struct load_arg *arg);
1194static VALUE path2class(VALUE path);
1195
1196NORETURN(static void too_short(void));
1197static void
1198too_short(void)
1199{
1200 rb_raise(rb_eArgError, "marshal data too short");
1201}
1202
1203static st_index_t
1204r_prepare(struct load_arg *arg)
1205{
1206 st_index_t idx = arg->data->num_entries;
1207
1208 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1209 return idx;
1210}
1211
1212static unsigned char
1213r_byte1_buffered(struct load_arg *arg)
1214{
1215 if (arg->buflen == 0) {
1216 long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1217 VALUE str, n = LONG2NUM(readable);
1218
1219 str = load_funcall(arg, arg->src, s_read, 1, &n);
1220 if (NIL_P(str)) too_short();
1221 StringValue(str);
1222 memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1223 arg->offset = 0;
1224 arg->buflen = RSTRING_LEN(str);
1225 }
1226 arg->buflen--;
1227 return arg->buf[arg->offset++];
1228}
1229
1230static int
1231r_byte(struct load_arg *arg)
1232{
1233 int c;
1234
1235 if (RB_TYPE_P(arg->src, T_STRING)) {
1236 if (RSTRING_LEN(arg->src) > arg->offset) {
1237 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1238 }
1239 else {
1240 too_short();
1241 }
1242 }
1243 else {
1244 if (arg->readable >0 || arg->buflen > 0) {
1245 c = r_byte1_buffered(arg);
1246 }
1247 else {
1248 VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1249 if (NIL_P(v)) rb_eof_error();
1250 c = (unsigned char)NUM2CHR(v);
1251 }
1252 }
1253 return c;
1254}
1255
1256NORETURN(static void long_toobig(int size));
1257
1258static void
1259long_toobig(int size)
1260{
1261 rb_raise(rb_eTypeError, "long too big for this architecture (size "
1262 STRINGIZE(SIZEOF_LONG)", given %d)", size);
1263}
1264
1265static long
1266r_long(struct load_arg *arg)
1267{
1268 register long x;
1269 int c = (signed char)r_byte(arg);
1270 long i;
1271
1272 if (c == 0) return 0;
1273 if (c > 0) {
1274 if (4 < c && c < 128) {
1275 return c - 5;
1276 }
1277 if (c > (int)sizeof(long)) long_toobig(c);
1278 x = 0;
1279 for (i=0;i<c;i++) {
1280 x |= (long)r_byte(arg) << (8*i);
1281 }
1282 }
1283 else {
1284 if (-129 < c && c < -4) {
1285 return c + 5;
1286 }
1287 c = -c;
1288 if (c > (int)sizeof(long)) long_toobig(c);
1289 x = -1;
1290 for (i=0;i<c;i++) {
1291 x &= ~((long)0xff << (8*i));
1292 x |= (long)r_byte(arg) << (8*i);
1293 }
1294 }
1295 return x;
1296}
1297
1298long
1299ruby_marshal_read_long(const char **buf, long len)
1300{
1301 long x;
1302 struct RString src;
1303 struct load_arg arg;
1304 memset(&arg, 0, sizeof(arg));
1305 arg.src = rb_setup_fake_str(&src, *buf, len, 0);
1306 x = r_long(&arg);
1307 *buf += arg.offset;
1308 return x;
1309}
1310
1311static VALUE
1312r_bytes1(long len, struct load_arg *arg)
1313{
1314 VALUE str, n = LONG2NUM(len);
1315
1316 str = load_funcall(arg, arg->src, s_read, 1, &n);
1317 if (NIL_P(str)) too_short();
1318 StringValue(str);
1319 if (RSTRING_LEN(str) != len) too_short();
1320
1321 return str;
1322}
1323
1324static VALUE
1325r_bytes1_buffered(long len, struct load_arg *arg)
1326{
1327 VALUE str;
1328
1329 if (len <= arg->buflen) {
1330 str = rb_str_new(arg->buf+arg->offset, len);
1331 arg->offset += len;
1332 arg->buflen -= len;
1333 }
1334 else {
1335 long buflen = arg->buflen;
1336 long readable = arg->readable + 1;
1337 long tmp_len, read_len, need_len = len - buflen;
1338 VALUE tmp, n;
1339
1340 readable = readable < BUFSIZ ? readable : BUFSIZ;
1341 read_len = need_len > readable ? need_len : readable;
1342 n = LONG2NUM(read_len);
1343 tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1344 if (NIL_P(tmp)) too_short();
1345 StringValue(tmp);
1346
1347 tmp_len = RSTRING_LEN(tmp);
1348
1349 if (tmp_len < need_len) too_short();
1350
1351 str = rb_str_new(arg->buf+arg->offset, buflen);
1352 rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1353
1354 if (tmp_len > need_len) {
1355 buflen = tmp_len - need_len;
1356 memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1357 arg->buflen = buflen;
1358 }
1359 else {
1360 arg->buflen = 0;
1361 }
1362 arg->offset = 0;
1363 }
1364
1365 return str;
1366}
1367
1368#define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1369
1370static VALUE
1371r_bytes0(long len, struct load_arg *arg)
1372{
1373 VALUE str;
1374
1375 if (len == 0) return rb_str_new(0, 0);
1376 if (RB_TYPE_P(arg->src, T_STRING)) {
1377 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1378 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1379 arg->offset += len;
1380 }
1381 else {
1382 too_short();
1383 }
1384 }
1385 else {
1386 if (arg->readable > 0 || arg->buflen > 0) {
1387 str = r_bytes1_buffered(len, arg);
1388 }
1389 else {
1390 str = r_bytes1(len, arg);
1391 }
1392 }
1393 return str;
1394}
1395
1396static inline int
1397name_equal(const char *name, size_t nlen, const char *p, long l)
1398{
1399 if ((size_t)l != nlen || *p != *name) return 0;
1400 return nlen == 1 || memcmp(p+1, name+1, nlen-1) == 0;
1401}
1402
1403static int
1404sym2encidx(VALUE sym, VALUE val)
1405{
1406 static const char name_encoding[8] = "encoding";
1407 const char *p;
1408 long l;
1409 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1410 RSTRING_GETMEM(sym, p, l);
1411 if (l <= 0) return -1;
1412 if (name_equal(name_encoding, sizeof(name_encoding), p, l)) {
1413 int idx = rb_enc_find_index(StringValueCStr(val));
1414 return idx;
1415 }
1416 if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) {
1417 if (val == Qfalse) return rb_usascii_encindex();
1418 else if (val == Qtrue) return rb_utf8_encindex();
1419 /* bogus ignore */
1420 }
1421 return -1;
1422}
1423
1424static int
1425ruby2_keywords_flag_check(VALUE sym)
1426{
1427 const char *p;
1428 long l;
1429 if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0;
1430 RSTRING_GETMEM(sym, p, l);
1431 if (l <= 0) return 0;
1432 if (name_equal(name_s_ruby2_keywords_flag, rb_strlen_lit(name_s_ruby2_keywords_flag), p, l)) {
1433 return 1;
1434 }
1435 return 0;
1436}
1437
1438static VALUE
1439r_symlink(struct load_arg *arg)
1440{
1441 st_data_t sym;
1442 long num = r_long(arg);
1443
1444 if (!st_lookup(arg->symbols, num, &sym)) {
1445 rb_raise(rb_eArgError, "bad symbol");
1446 }
1447 return (VALUE)sym;
1448}
1449
1450static VALUE
1451r_symreal(struct load_arg *arg, int ivar)
1452{
1453 VALUE s = r_bytes(arg);
1454 VALUE sym;
1455 int idx = -1;
1456 st_index_t n = arg->symbols->num_entries;
1457
1458 if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
1459 st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1460 if (ivar) {
1461 long num = r_long(arg);
1462 while (num-- > 0) {
1463 sym = r_symbol(arg);
1464 idx = sym2encidx(sym, r_object(arg));
1465 }
1466 }
1467 if (idx > 0) {
1468 rb_enc_associate_index(s, idx);
1469 if (rb_enc_str_coderange(s) == ENC_CODERANGE_BROKEN) {
1470 rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE,
1471 rb_enc_name(rb_enc_from_index(idx)), s);
1472 }
1473 }
1474
1475 return s;
1476}
1477
1478static VALUE
1479r_symbol(struct load_arg *arg)
1480{
1481 int type, ivar = 0;
1482
1483 again:
1484 switch ((type = r_byte(arg))) {
1485 default:
1486 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1487 case TYPE_IVAR:
1488 ivar = 1;
1489 goto again;
1490 case TYPE_SYMBOL:
1491 return r_symreal(arg, ivar);
1492 case TYPE_SYMLINK:
1493 if (ivar) {
1494 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1495 }
1496 return r_symlink(arg);
1497 }
1498}
1499
1500static VALUE
1501r_unique(struct load_arg *arg)
1502{
1503 return r_symbol(arg);
1504}
1505
1506static VALUE
1507r_string(struct load_arg *arg)
1508{
1509 return r_bytes(arg);
1510}
1511
1512static VALUE
1513r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1514{
1515 st_data_t real_obj = (st_data_t)v;
1516 if (arg->compat_tbl) {
1517 /* real_obj is kept if not found */
1518 st_lookup(arg->compat_tbl, v, &real_obj);
1519 }
1520 st_insert(arg->data, num, real_obj);
1521 st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
1522 return v;
1523}
1524
1525static VALUE
1526r_fixup_compat(VALUE v, struct load_arg *arg)
1527{
1528 st_data_t data;
1529 st_data_t key = (st_data_t)v;
1530 if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1531 VALUE real_obj = (VALUE)data;
1532 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1533 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1534 marshal_compat_t *compat = (marshal_compat_t*)data;
1535 compat->loader(real_obj, v);
1536 }
1537 v = real_obj;
1538 }
1539 return v;
1540}
1541
1542static VALUE
1543r_post_proc(VALUE v, struct load_arg *arg)
1544{
1545 if (arg->proc) {
1546 v = load_funcall(arg, arg->proc, s_call, 1, &v);
1547 }
1548 return v;
1549}
1550
1551static VALUE
1552r_leave(VALUE v, struct load_arg *arg, bool partial)
1553{
1554 v = r_fixup_compat(v, arg);
1555 if (!partial) {
1556 st_data_t data;
1557 st_data_t key = (st_data_t)v;
1558 st_delete(arg->partial_objects, &key, &data);
1559 v = r_post_proc(v, arg);
1560 }
1561 return v;
1562}
1563
1564static int
1565copy_ivar_i(st_data_t key, st_data_t val, st_data_t arg)
1566{
1567 VALUE obj = (VALUE)arg, value = (VALUE)val;
1568 ID vid = (ID)key;
1569
1570 if (!rb_ivar_defined(obj, vid))
1571 rb_ivar_set(obj, vid, value);
1572 return ST_CONTINUE;
1573}
1574
1575static VALUE
1576r_copy_ivar(VALUE v, VALUE data)
1577{
1578 rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1579 return v;
1580}
1581
1582static void
1583r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1584{
1585 long len;
1586
1587 len = r_long(arg);
1588 if (len > 0) {
1589 do {
1590 VALUE sym = r_symbol(arg);
1591 VALUE val = r_object(arg);
1592 int idx = sym2encidx(sym, val);
1593 if (idx >= 0) {
1594 if (rb_enc_capable(obj)) {
1595 rb_enc_associate_index(obj, idx);
1596 }
1597 else {
1598 rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1599 }
1600 if (has_encoding) *has_encoding = TRUE;
1601 }
1602 else if (ruby2_keywords_flag_check(sym)) {
1603 if (RB_TYPE_P(obj, T_HASH)) {
1604 RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
1605 }
1606 else {
1607 rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj);
1608 }
1609 }
1610 else {
1611 rb_ivar_set(obj, rb_intern_str(sym), val);
1612 }
1613 } while (--len > 0);
1614 }
1615}
1616
1617static VALUE
1618path2class(VALUE path)
1619{
1620 VALUE v = rb_path_to_class(path);
1621
1622 if (!RB_TYPE_P(v, T_CLASS)) {
1623 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1624 }
1625 return v;
1626}
1627
1628#define path2module(path) must_be_module(rb_path_to_class(path), path)
1629
1630static VALUE
1631must_be_module(VALUE v, VALUE path)
1632{
1633 if (!RB_TYPE_P(v, T_MODULE)) {
1634 rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1635 }
1636 return v;
1637}
1638
1639static VALUE
1640obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1641{
1642 st_data_t data;
1643 rb_alloc_func_t allocator;
1644
1645 allocator = rb_get_alloc_func(klass);
1646 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1647 marshal_compat_t *compat = (marshal_compat_t*)data;
1648 VALUE real_obj = rb_obj_alloc(klass);
1649 VALUE obj = rb_obj_alloc(compat->oldclass);
1650 if (oldclass) *oldclass = compat->oldclass;
1651
1652 if (!arg->compat_tbl) {
1653 arg->compat_tbl = rb_init_identtable();
1654 }
1655 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1656 return obj;
1657 }
1658
1659 return rb_obj_alloc(klass);
1660}
1661
1662static VALUE
1663obj_alloc_by_path(VALUE path, struct load_arg *arg)
1664{
1665 return obj_alloc_by_klass(path2class(path), arg, 0);
1666}
1667
1668static VALUE
1669append_extmod(VALUE obj, VALUE extmod)
1670{
1671 long i = RARRAY_LEN(extmod);
1672 while (i > 0) {
1673 VALUE m = RARRAY_AREF(extmod, --i);
1674 rb_extend_object(obj, m);
1675 }
1676 return obj;
1677}
1678
1679#define prohibit_ivar(type, str) do { \
1680 if (!ivp || !*ivp) break; \
1681 rb_raise(rb_eTypeError, \
1682 "can't override instance variable of "type" `%"PRIsVALUE"'", \
1683 (str)); \
1684 } while (0)
1685
1686static VALUE
1687r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
1688{
1689 VALUE v = Qnil;
1690 int type = r_byte(arg);
1691 long id;
1692 st_data_t link;
1693
1694 switch (type) {
1695 case TYPE_LINK:
1696 id = r_long(arg);
1697 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1698 rb_raise(rb_eArgError, "dump format error (unlinked)");
1699 }
1700 v = (VALUE)link;
1701 if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
1702 v = r_post_proc(v, arg);
1703 }
1704 break;
1705
1706 case TYPE_IVAR:
1707 {
1708 int ivar = TRUE;
1709
1710 v = r_object0(arg, true, &ivar, extmod);
1711 if (ivar) r_ivar(v, NULL, arg);
1712 v = r_leave(v, arg, partial);
1713 }
1714 break;
1715
1716 case TYPE_EXTENDED:
1717 {
1718 VALUE path = r_unique(arg);
1719 VALUE m = rb_path_to_class(path);
1720 if (NIL_P(extmod)) extmod = rb_ary_tmp_new(0);
1721
1722 if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1723 VALUE c;
1724
1725 v = r_object0(arg, true, 0, Qnil);
1726 c = CLASS_OF(v);
1727 if (c != m || FL_TEST(c, FL_SINGLETON)) {
1728 rb_raise(rb_eArgError,
1729 "prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1730 path, rb_class_name(c));
1731 }
1732 c = rb_singleton_class(v);
1733 while (RARRAY_LEN(extmod) > 0) {
1734 m = rb_ary_pop(extmod);
1735 rb_prepend_module(c, m);
1736 }
1737 }
1738 else {
1739 must_be_module(m, path);
1740 rb_ary_push(extmod, m);
1741
1742 v = r_object0(arg, true, 0, extmod);
1743 while (RARRAY_LEN(extmod) > 0) {
1744 m = rb_ary_pop(extmod);
1745 rb_extend_object(v, m);
1746 }
1747 }
1748 }
1749 break;
1750
1751 case TYPE_UCLASS:
1752 {
1753 VALUE c = path2class(r_unique(arg));
1754
1755 if (FL_TEST(c, FL_SINGLETON)) {
1756 rb_raise(rb_eTypeError, "singleton can't be loaded");
1757 }
1758 v = r_object0(arg, partial, 0, extmod);
1759 if (rb_special_const_p(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1760 goto format_error;
1761 }
1762 if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1763 VALUE tmp = rb_obj_alloc(c);
1764
1765 if (TYPE(v) != TYPE(tmp)) goto format_error;
1766 }
1767 RBASIC_SET_CLASS(v, c);
1768 }
1769 break;
1770
1771 format_error:
1772 rb_raise(rb_eArgError, "dump format error (user class)");
1773
1774 case TYPE_NIL:
1775 v = Qnil;
1776 v = r_leave(v, arg, false);
1777 break;
1778
1779 case TYPE_TRUE:
1780 v = Qtrue;
1781 v = r_leave(v, arg, false);
1782 break;
1783
1784 case TYPE_FALSE:
1785 v = Qfalse;
1786 v = r_leave(v, arg, false);
1787 break;
1788
1789 case TYPE_FIXNUM:
1790 {
1791 long i = r_long(arg);
1792 v = LONG2FIX(i);
1793 }
1794 v = r_leave(v, arg, false);
1795 break;
1796
1797 case TYPE_FLOAT:
1798 {
1799 double d;
1800 VALUE str = r_bytes(arg);
1801 const char *ptr = RSTRING_PTR(str);
1802
1803 if (strcmp(ptr, "nan") == 0) {
1804 d = nan("");
1805 }
1806 else if (strcmp(ptr, "inf") == 0) {
1807 d = HUGE_VAL;
1808 }
1809 else if (strcmp(ptr, "-inf") == 0) {
1810 d = -HUGE_VAL;
1811 }
1812 else {
1813 char *e;
1814 d = strtod(ptr, &e);
1815 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1816 }
1817 v = DBL2NUM(d);
1818 v = r_entry(v, arg);
1819 v = r_leave(v, arg, false);
1820 }
1821 break;
1822
1823 case TYPE_BIGNUM:
1824 {
1825 long len;
1826 VALUE data;
1827 int sign;
1828
1829 sign = r_byte(arg);
1830 len = r_long(arg);
1831 data = r_bytes0(len * 2, arg);
1832 v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
1833 INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
1834 rb_str_resize(data, 0L);
1835 v = r_entry(v, arg);
1836 v = r_leave(v, arg, false);
1837 }
1838 break;
1839
1840 case TYPE_STRING:
1841 v = r_entry(r_string(arg), arg);
1842 v = r_leave(v, arg, partial);
1843 break;
1844
1845 case TYPE_REGEXP:
1846 {
1847 VALUE str = r_bytes(arg);
1848 int options = r_byte(arg);
1849 int has_encoding = FALSE;
1850 st_index_t idx = r_prepare(arg);
1851
1852 if (ivp) {
1853 r_ivar(str, &has_encoding, arg);
1854 *ivp = FALSE;
1855 }
1856 if (!has_encoding) {
1857 /* 1.8 compatibility; remove escapes undefined in 1.8 */
1858 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
1859 long len = RSTRING_LEN(str);
1860 long bs = 0;
1861 for (; len-- > 0; *dst++ = *src++) {
1862 switch (*src) {
1863 case '\\': bs++; break;
1864 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1865 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
1866 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
1867 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
1868 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
1869 if (bs & 1) --dst;
1870 /* fall through */
1871 default: bs = 0; break;
1872 }
1873 }
1874 rb_str_set_len(str, dst - ptr);
1875 }
1876 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
1877 v = r_leave(v, arg, partial);
1878 }
1879 break;
1880
1881 case TYPE_ARRAY:
1882 {
1883 long len = r_long(arg);
1884
1885 v = rb_ary_new2(len);
1886 v = r_entry(v, arg);
1887 arg->readable += len - 1;
1888 while (len--) {
1889 rb_ary_push(v, r_object(arg));
1890 arg->readable--;
1891 }
1892 v = r_leave(v, arg, partial);
1893 arg->readable++;
1894 }
1895 break;
1896
1897 case TYPE_HASH:
1898 case TYPE_HASH_DEF:
1899 {
1900 long len = r_long(arg);
1901
1902 v = rb_hash_new_with_size(len);
1903 v = r_entry(v, arg);
1904 arg->readable += (len - 1) * 2;
1905 while (len--) {
1906 VALUE key = r_object(arg);
1907 VALUE value = r_object(arg);
1908 rb_hash_aset(v, key, value);
1909 arg->readable -= 2;
1910 }
1911 arg->readable += 2;
1912 if (type == TYPE_HASH_DEF) {
1913 RHASH_SET_IFNONE(v, r_object(arg));
1914 }
1915 v = r_leave(v, arg, partial);
1916 }
1917 break;
1918
1919 case TYPE_STRUCT:
1920 {
1921 VALUE mem, values;
1922 long i;
1923 VALUE slot;
1924 st_index_t idx = r_prepare(arg);
1925 VALUE klass = path2class(r_unique(arg));
1926 long len = r_long(arg);
1927
1928 v = rb_obj_alloc(klass);
1929 if (!RB_TYPE_P(v, T_STRUCT)) {
1930 rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
1931 }
1932 mem = rb_struct_s_members(klass);
1933 if (RARRAY_LEN(mem) != len) {
1934 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
1935 rb_class_name(klass));
1936 }
1937
1938 arg->readable += (len - 1) * 2;
1939 v = r_entry0(v, idx, arg);
1940 values = rb_ary_new2(len);
1941 {
1942 VALUE keywords = Qfalse;
1943 if (RTEST(rb_struct_s_keyword_init(klass))) {
1944 keywords = rb_hash_new();
1945 rb_ary_push(values, keywords);
1946 }
1947
1948 for (i=0; i<len; i++) {
1949 VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
1950 slot = r_symbol(arg);
1951
1952 if (!rb_str_equal(n, slot)) {
1953 rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
1954 rb_class_name(klass),
1955 slot, n);
1956 }
1957 if (keywords) {
1958 rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg));
1959 }
1960 else {
1961 rb_ary_push(values, r_object(arg));
1962 }
1963 arg->readable -= 2;
1964 }
1965 }
1966 rb_struct_initialize(v, values);
1967 v = r_leave(v, arg, partial);
1968 arg->readable += 2;
1969 }
1970 break;
1971
1972 case TYPE_USERDEF:
1973 {
1974 VALUE name = r_unique(arg);
1975 VALUE klass = path2class(name);
1976 VALUE data;
1977 st_data_t d;
1978
1979 if (!rb_obj_respond_to(klass, s_load, TRUE)) {
1980 rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method `_load'",
1981 name);
1982 }
1983 data = r_string(arg);
1984 if (ivp) {
1985 r_ivar(data, NULL, arg);
1986 *ivp = FALSE;
1987 }
1988 v = load_funcall(arg, klass, s_load, 1, &data);
1989 v = r_entry(v, arg);
1990 if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
1991 marshal_compat_t *compat = (marshal_compat_t*)d;
1992 v = compat->loader(klass, v);
1993 }
1994 if (!partial) v = r_post_proc(v, arg);
1995 }
1996 break;
1997
1998 case TYPE_USRMARSHAL:
1999 {
2000 VALUE name = r_unique(arg);
2001 VALUE klass = path2class(name);
2002 VALUE oldclass = 0;
2003 VALUE data;
2004
2005 v = obj_alloc_by_klass(klass, arg, &oldclass);
2006 if (!NIL_P(extmod)) {
2007 /* for the case marshal_load is overridden */
2008 append_extmod(v, extmod);
2009 }
2010 if (!rb_obj_respond_to(v, s_mload, TRUE)) {
2011 rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method `marshal_load'",
2012 name);
2013 }
2014 v = r_entry(v, arg);
2015 data = r_object(arg);
2016 load_funcall(arg, v, s_mload, 1, &data);
2017 v = r_fixup_compat(v, arg);
2018 v = r_copy_ivar(v, data);
2019 v = r_post_proc(v, arg);
2020 if (!NIL_P(extmod)) {
2021 if (oldclass) append_extmod(v, extmod);
2022 rb_ary_clear(extmod);
2023 }
2024 }
2025 break;
2026
2027 case TYPE_OBJECT:
2028 {
2029 st_index_t idx = r_prepare(arg);
2030 v = obj_alloc_by_path(r_unique(arg), arg);
2031 if (!RB_TYPE_P(v, T_OBJECT)) {
2032 rb_raise(rb_eArgError, "dump format error");
2033 }
2034 v = r_entry0(v, idx, arg);
2035 r_ivar(v, NULL, arg);
2036 v = r_leave(v, arg, partial);
2037 }
2038 break;
2039
2040 case TYPE_DATA:
2041 {
2042 VALUE name = r_unique(arg);
2043 VALUE klass = path2class(name);
2044 VALUE oldclass = 0;
2045 VALUE r;
2046
2047 v = obj_alloc_by_klass(klass, arg, &oldclass);
2048 if (!RB_TYPE_P(v, T_DATA)) {
2049 rb_raise(rb_eArgError, "dump format error");
2050 }
2051 v = r_entry(v, arg);
2052 if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
2053 rb_raise(rb_eTypeError,
2054 "class %"PRIsVALUE" needs to have instance method `_load_data'",
2055 name);
2056 }
2057 r = r_object0(arg, partial, 0, extmod);
2058 load_funcall(arg, v, s_load_data, 1, &r);
2059 v = r_leave(v, arg, partial);
2060 }
2061 break;
2062
2063 case TYPE_MODULE_OLD:
2064 {
2065 VALUE str = r_bytes(arg);
2066
2067 v = rb_path_to_class(str);
2068 prohibit_ivar("class/module", str);
2069 v = r_entry(v, arg);
2070 v = r_leave(v, arg, partial);
2071 }
2072 break;
2073
2074 case TYPE_CLASS:
2075 {
2076 VALUE str = r_bytes(arg);
2077
2078 v = path2class(str);
2079 prohibit_ivar("class", str);
2080 v = r_entry(v, arg);
2081 v = r_leave(v, arg, partial);
2082 }
2083 break;
2084
2085 case TYPE_MODULE:
2086 {
2087 VALUE str = r_bytes(arg);
2088
2089 v = path2module(str);
2090 prohibit_ivar("module", str);
2091 v = r_entry(v, arg);
2092 v = r_leave(v, arg, partial);
2093 }
2094 break;
2095
2096 case TYPE_SYMBOL:
2097 if (ivp) {
2098 v = r_symreal(arg, *ivp);
2099 *ivp = FALSE;
2100 }
2101 else {
2102 v = r_symreal(arg, 0);
2103 }
2104 v = rb_str_intern(v);
2105 v = r_leave(v, arg, partial);
2106 break;
2107
2108 case TYPE_SYMLINK:
2109 v = rb_str_intern(r_symlink(arg));
2110 break;
2111
2112 default:
2113 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
2114 break;
2115 }
2116
2117 if (v == Qundef) {
2118 rb_raise(rb_eArgError, "dump format error (bad link)");
2119 }
2120
2121 return v;
2122}
2123
2124static VALUE
2125r_object(struct load_arg *arg)
2126{
2127 return r_object0(arg, false, 0, Qnil);
2128}
2129
2130static void
2131clear_load_arg(struct load_arg *arg)
2132{
2133 if (arg->buf) {
2134 xfree(arg->buf);
2135 arg->buf = 0;
2136 }
2137 arg->buflen = 0;
2138 arg->offset = 0;
2139 arg->readable = 0;
2140 if (!arg->symbols) return;
2141 st_free_table(arg->symbols);
2142 arg->symbols = 0;
2143 st_free_table(arg->data);
2144 arg->data = 0;
2145 st_free_table(arg->partial_objects);
2146 arg->partial_objects = 0;
2147 if (arg->compat_tbl) {
2148 st_free_table(arg->compat_tbl);
2149 arg->compat_tbl = 0;
2150 }
2151}
2152
2153/*
2154 * call-seq:
2155 * load( source [, proc] ) -> obj
2156 * restore( source [, proc] ) -> obj
2157 *
2158 * Returns the result of converting the serialized data in source into a
2159 * Ruby object (possibly with associated subordinate objects). source
2160 * may be either an instance of IO or an object that responds to
2161 * to_str. If proc is specified, each object will be passed to the proc, as the object
2162 * is being deserialized.
2163 *
2164 * Never pass untrusted data (including user supplied input) to this method.
2165 * Please see the overview for further details.
2166 */
2167static VALUE
2168marshal_load(int argc, VALUE *argv, VALUE _)
2169{
2170 VALUE port, proc;
2171
2172 rb_check_arity(argc, 1, 2);
2173 port = argv[0];
2174 proc = argc > 1 ? argv[1] : Qnil;
2175 return rb_marshal_load_with_proc(port, proc);
2176}
2177
2178VALUE
2179rb_marshal_load_with_proc(VALUE port, VALUE proc)
2180{
2181 int major, minor;
2182 VALUE v;
2183 VALUE wrapper; /* used to avoid memory leak in case of exception */
2184 struct load_arg *arg;
2185
2186 v = rb_check_string_type(port);
2187 if (!NIL_P(v)) {
2188 port = v;
2189 }
2190 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2191 rb_check_funcall(port, s_binmode, 0, 0);
2192 }
2193 else {
2194 io_needed();
2195 }
2196 wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2197 arg->src = port;
2198 arg->offset = 0;
2199 arg->symbols = st_init_numtable();
2200 arg->data = rb_init_identtable();
2201 arg->partial_objects = rb_init_identtable();
2202 arg->compat_tbl = 0;
2203 arg->proc = 0;
2204 arg->readable = 0;
2205
2206 if (NIL_P(v))
2207 arg->buf = xmalloc(BUFSIZ);
2208 else
2209 arg->buf = 0;
2210
2211 major = r_byte(arg);
2212 minor = r_byte(arg);
2213 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2214 clear_load_arg(arg);
2215 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2216\tformat version %d.%d required; %d.%d given",
2217 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2218 }
2219 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2220 rb_warn("incompatible marshal file format (can be read)\n\
2221\tformat version %d.%d required; %d.%d given",
2222 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2223 }
2224
2225 if (!NIL_P(proc)) arg->proc = proc;
2226 v = r_object(arg);
2227 clear_load_arg(arg);
2228 RB_GC_GUARD(wrapper);
2229
2230 return v;
2231}
2232
2233/*
2234 * The marshaling library converts collections of Ruby objects into a
2235 * byte stream, allowing them to be stored outside the currently
2236 * active script. This data may subsequently be read and the original
2237 * objects reconstituted.
2238 *
2239 * Marshaled data has major and minor version numbers stored along
2240 * with the object information. In normal use, marshaling can only
2241 * load data written with the same major version number and an equal
2242 * or lower minor version number. If Ruby's ``verbose'' flag is set
2243 * (normally using -d, -v, -w, or --verbose) the major and minor
2244 * numbers must match exactly. Marshal versioning is independent of
2245 * Ruby's version numbers. You can extract the version by reading the
2246 * first two bytes of marshaled data.
2247 *
2248 * str = Marshal.dump("thing")
2249 * RUBY_VERSION #=> "1.9.0"
2250 * str[0].ord #=> 4
2251 * str[1].ord #=> 8
2252 *
2253 * Some objects cannot be dumped: if the objects to be dumped include
2254 * bindings, procedure or method objects, instances of class IO, or
2255 * singleton objects, a TypeError will be raised.
2256 *
2257 * If your class has special serialization needs (for example, if you
2258 * want to serialize in some specific format), or if it contains
2259 * objects that would otherwise not be serializable, you can implement
2260 * your own serialization strategy.
2261 *
2262 * There are two methods of doing this, your object can define either
2263 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2264 * precedence over _dump if both are defined. marshal_dump may result in
2265 * smaller Marshal strings.
2266 *
2267 * == Security considerations
2268 *
2269 * By design, Marshal.load can deserialize almost any class loaded into the
2270 * Ruby process. In many cases this can lead to remote code execution if the
2271 * Marshal data is loaded from an untrusted source.
2272 *
2273 * As a result, Marshal.load is not suitable as a general purpose serialization
2274 * format and you should never unmarshal user supplied input or other untrusted
2275 * data.
2276 *
2277 * If you need to deserialize untrusted data, use JSON or another serialization
2278 * format that is only able to load simple, 'primitive' types such as String,
2279 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2280 * deserialize into.
2281 *
2282 * == marshal_dump and marshal_load
2283 *
2284 * When dumping an object the method marshal_dump will be called.
2285 * marshal_dump must return a result containing the information necessary for
2286 * marshal_load to reconstitute the object. The result can be any object.
2287 *
2288 * When loading an object dumped using marshal_dump the object is first
2289 * allocated then marshal_load is called with the result from marshal_dump.
2290 * marshal_load must recreate the object from the information in the result.
2291 *
2292 * Example:
2293 *
2294 * class MyObj
2295 * def initialize name, version, data
2296 * @name = name
2297 * @version = version
2298 * @data = data
2299 * end
2300 *
2301 * def marshal_dump
2302 * [@name, @version]
2303 * end
2304 *
2305 * def marshal_load array
2306 * @name, @version = array
2307 * end
2308 * end
2309 *
2310 * == _dump and _load
2311 *
2312 * Use _dump and _load when you need to allocate the object you're restoring
2313 * yourself.
2314 *
2315 * When dumping an object the instance method _dump is called with an Integer
2316 * which indicates the maximum depth of objects to dump (a value of -1 implies
2317 * that you should disable depth checking). _dump must return a String
2318 * containing the information necessary to reconstitute the object.
2319 *
2320 * The class method _load should take a String and use it to return an object
2321 * of the same class.
2322 *
2323 * Example:
2324 *
2325 * class MyObj
2326 * def initialize name, version, data
2327 * @name = name
2328 * @version = version
2329 * @data = data
2330 * end
2331 *
2332 * def _dump level
2333 * [@name, @version].join ':'
2334 * end
2335 *
2336 * def self._load args
2337 * new(*args.split(':'))
2338 * end
2339 * end
2340 *
2341 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2342 * string which is Marshal.loaded in _load for complex objects.
2343 */
2344void
2345Init_marshal(void)
2346{
2347 VALUE rb_mMarshal = rb_define_module("Marshal");
2348#define set_id(sym) sym = rb_intern_const(name_##sym)
2349 set_id(s_dump);
2350 set_id(s_load);
2351 set_id(s_mdump);
2352 set_id(s_mload);
2353 set_id(s_dump_data);
2354 set_id(s_load_data);
2355 set_id(s_alloc);
2356 set_id(s_call);
2357 set_id(s_getbyte);
2358 set_id(s_read);
2359 set_id(s_write);
2360 set_id(s_binmode);
2361 set_id(s_encoding_short);
2362 set_id(s_ruby2_keywords_flag);
2363
2364 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2365 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
2366 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
2367
2368 /* major version */
2369 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2370 /* minor version */
2371 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2372}
2373
2374static st_table *
2375compat_allocator_table(void)
2376{
2377 if (compat_allocator_tbl) return compat_allocator_tbl;
2378 compat_allocator_tbl = st_init_numtable();
2379#undef RUBY_UNTYPED_DATA_WARNING
2380#define RUBY_UNTYPED_DATA_WARNING 0
2381 compat_allocator_tbl_wrapper =
2382 Data_Wrap_Struct(0, mark_marshal_compat_t, 0, compat_allocator_tbl);
2383 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
2384 return compat_allocator_tbl;
2385}
2386
2387VALUE
2388rb_marshal_dump(VALUE obj, VALUE port)
2389{
2390 return rb_marshal_dump_limited(obj, port, -1);
2391}
2392
2393VALUE
2394rb_marshal_load(VALUE port)
2395{
2396 return rb_marshal_load_with_proc(port, Qnil);
2397}
#define BDIGIT
Definition: bigdecimal.h:48
big_t * num
Definition: enough.c:232
uint8_t len
Definition: escape.c:17
Thin wrapper to ruby/config.h.
Internal header for Array.
Internal header for Bignums.
#define SIZEOF_BDIGIT
Definition: bignum.h:25
Internal header for Class.
Internal header for Encoding.
Internal header for Hash.
Internal header for Object.
Internal header for Struct.
Internal header corresponding util.c.
Internal header for RubyVM.
voidpf uLong offset
Definition: ioapi.h:144
#define SHORTDN(x)
Definition: marshal.c:42
Internal header for Math.