00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017 #include "internal.h"
00018
00019 #include <math.h>
00020 #ifdef HAVE_FLOAT_H
00021 #include <float.h>
00022 #endif
00023 #ifdef HAVE_IEEEFP_H
00024 #include <ieeefp.h>
00025 #endif
00026
00027 #define BITSPERSHORT (2*CHAR_BIT)
00028 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
00030
00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00032 #define SHORTLEN(x) (x)
00033 #else
00034 static long
00035 shortlen(long len, BDIGIT *ds)
00036 {
00037 BDIGIT num;
00038 int offset = 0;
00039
00040 num = ds[len-1];
00041 while (num) {
00042 num = SHORTDN(num);
00043 offset++;
00044 }
00045 return (len - 1)*sizeof(BDIGIT)/2 + offset;
00046 }
00047 #define SHORTLEN(x) shortlen((x),d)
00048 #endif
00049
00050 #define MARSHAL_MAJOR 4
00051 #define MARSHAL_MINOR 8
00052
00053 #define TYPE_NIL '0'
00054 #define TYPE_TRUE 'T'
00055 #define TYPE_FALSE 'F'
00056 #define TYPE_FIXNUM 'i'
00057
00058 #define TYPE_EXTENDED 'e'
00059 #define TYPE_UCLASS 'C'
00060 #define TYPE_OBJECT 'o'
00061 #define TYPE_DATA 'd'
00062 #define TYPE_USERDEF 'u'
00063 #define TYPE_USRMARSHAL 'U'
00064 #define TYPE_FLOAT 'f'
00065 #define TYPE_BIGNUM 'l'
00066 #define TYPE_STRING '"'
00067 #define TYPE_REGEXP '/'
00068 #define TYPE_ARRAY '['
00069 #define TYPE_HASH '{'
00070 #define TYPE_HASH_DEF '}'
00071 #define TYPE_STRUCT 'S'
00072 #define TYPE_MODULE_OLD 'M'
00073 #define TYPE_CLASS 'c'
00074 #define TYPE_MODULE 'm'
00075
00076 #define TYPE_SYMBOL ':'
00077 #define TYPE_SYMLINK ';'
00078
00079 #define TYPE_IVAR 'I'
00080 #define TYPE_LINK '@'
00081
00082 static ID s_dump, s_load, s_mdump, s_mload;
00083 static ID s_dump_data, s_load_data, s_alloc, s_call;
00084 static ID s_getbyte, s_read, s_write, s_binmode;
00085
00086 typedef struct {
00087 VALUE newclass;
00088 VALUE oldclass;
00089 VALUE (*dumper)(VALUE);
00090 VALUE (*loader)(VALUE, VALUE);
00091 } marshal_compat_t;
00092
00093 static st_table *compat_allocator_tbl;
00094 static VALUE compat_allocator_tbl_wrapper;
00095
00096 static int
00097 mark_marshal_compat_i(st_data_t key, st_data_t value)
00098 {
00099 marshal_compat_t *p = (marshal_compat_t *)value;
00100 rb_gc_mark(p->newclass);
00101 rb_gc_mark(p->oldclass);
00102 return ST_CONTINUE;
00103 }
00104
00105 static void
00106 mark_marshal_compat_t(void *tbl)
00107 {
00108 if (!tbl) return;
00109 st_foreach(tbl, mark_marshal_compat_i, 0);
00110 }
00111
00112 void
00113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00114 {
00115 marshal_compat_t *compat;
00116 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00117
00118 if (!allocator) {
00119 rb_raise(rb_eTypeError, "no allocator");
00120 }
00121
00122 compat = ALLOC(marshal_compat_t);
00123 compat->newclass = Qnil;
00124 compat->oldclass = Qnil;
00125 compat->newclass = newclass;
00126 compat->oldclass = oldclass;
00127 compat->dumper = dumper;
00128 compat->loader = loader;
00129
00130 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
00131 }
00132
00133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
00134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00135
00136 struct dump_arg {
00137 VALUE str, dest;
00138 st_table *symbols;
00139 st_table *data;
00140 st_table *compat_tbl;
00141 st_table *encodings;
00142 int infection;
00143 };
00144
00145 struct dump_call_arg {
00146 VALUE obj;
00147 struct dump_arg *arg;
00148 int limit;
00149 };
00150
00151 static void
00152 check_dump_arg(struct dump_arg *arg, ID sym)
00153 {
00154 if (!arg->symbols) {
00155 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00156 rb_id2name(sym));
00157 }
00158 }
00159
00160 static void clear_dump_arg(struct dump_arg *arg);
00161
00162 static void
00163 mark_dump_arg(void *ptr)
00164 {
00165 struct dump_arg *p = ptr;
00166 if (!p->symbols)
00167 return;
00168 rb_mark_set(p->data);
00169 rb_mark_hash(p->compat_tbl);
00170 rb_gc_mark(p->str);
00171 }
00172
00173 static void
00174 free_dump_arg(void *ptr)
00175 {
00176 clear_dump_arg(ptr);
00177 xfree(ptr);
00178 }
00179
00180 static size_t
00181 memsize_dump_arg(const void *ptr)
00182 {
00183 return ptr ? sizeof(struct dump_arg) : 0;
00184 }
00185
00186 static const rb_data_type_t dump_arg_data = {
00187 "dump_arg",
00188 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
00189 };
00190
00191 static const char *
00192 must_not_be_anonymous(const char *type, VALUE path)
00193 {
00194 char *n = RSTRING_PTR(path);
00195
00196 if (!rb_enc_asciicompat(rb_enc_get(path))) {
00197
00198 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00199 }
00200 if (n[0] == '#') {
00201 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00202 (int)RSTRING_LEN(path), n);
00203 }
00204 return n;
00205 }
00206
00207 static VALUE
00208 class2path(VALUE klass)
00209 {
00210 VALUE path = rb_class_path(klass);
00211 const char *n;
00212
00213 n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path);
00214 if (rb_path_to_class(path) != rb_class_real(klass)) {
00215 rb_raise(rb_eTypeError, "%s can't be referred to", n);
00216 }
00217 return path;
00218 }
00219
00220 static void w_long(long, struct dump_arg*);
00221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00222
00223 static void
00224 w_nbyte(const char *s, long n, struct dump_arg *arg)
00225 {
00226 VALUE buf = arg->str;
00227 rb_str_buf_cat(buf, s, n);
00228 RBASIC(buf)->flags |= arg->infection;
00229 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00230 rb_io_write(arg->dest, buf);
00231 rb_str_resize(buf, 0);
00232 }
00233 }
00234
00235 static void
00236 w_byte(char c, struct dump_arg *arg)
00237 {
00238 w_nbyte(&c, 1, arg);
00239 }
00240
00241 static void
00242 w_bytes(const char *s, long n, struct dump_arg *arg)
00243 {
00244 w_long(n, arg);
00245 w_nbyte(s, n, arg);
00246 }
00247
00248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
00249
00250 static void
00251 w_short(int x, struct dump_arg *arg)
00252 {
00253 w_byte((char)((x >> 0) & 0xff), arg);
00254 w_byte((char)((x >> 8) & 0xff), arg);
00255 }
00256
00257 static void
00258 w_long(long x, struct dump_arg *arg)
00259 {
00260 char buf[sizeof(long)+1];
00261 int i, len = 0;
00262
00263 #if SIZEOF_LONG > 4
00264 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00265
00266 rb_raise(rb_eTypeError, "long too big to dump");
00267 }
00268 #endif
00269
00270 if (x == 0) {
00271 w_byte(0, arg);
00272 return;
00273 }
00274 if (0 < x && x < 123) {
00275 w_byte((char)(x + 5), arg);
00276 return;
00277 }
00278 if (-124 < x && x < 0) {
00279 w_byte((char)((x - 5)&0xff), arg);
00280 return;
00281 }
00282 for (i=1;i<(int)sizeof(long)+1;i++) {
00283 buf[i] = (char)(x & 0xff);
00284 x = RSHIFT(x,8);
00285 if (x == 0) {
00286 buf[0] = i;
00287 break;
00288 }
00289 if (x == -1) {
00290 buf[0] = -i;
00291 break;
00292 }
00293 }
00294 len = i;
00295 for (i=0;i<=len;i++) {
00296 w_byte(buf[i], arg);
00297 }
00298 }
00299
00300 #ifdef DBL_MANT_DIG
00301 #define DECIMAL_MANT (53-16)
00302
00303 #if DBL_MANT_DIG > 32
00304 #define MANT_BITS 32
00305 #elif DBL_MANT_DIG > 24
00306 #define MANT_BITS 24
00307 #elif DBL_MANT_DIG > 16
00308 #define MANT_BITS 16
00309 #else
00310 #define MANT_BITS 8
00311 #endif
00312
00313 static double
00314 load_mantissa(double d, const char *buf, long len)
00315 {
00316 if (!len) return d;
00317 if (--len > 0 && !*buf++) {
00318 int e, s = d < 0, dig = 0;
00319 unsigned long m;
00320
00321 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00322 do {
00323 m = 0;
00324 switch (len) {
00325 default: m = *buf++ & 0xff;
00326 #if MANT_BITS > 24
00327 case 3: m = (m << 8) | (*buf++ & 0xff);
00328 #endif
00329 #if MANT_BITS > 16
00330 case 2: m = (m << 8) | (*buf++ & 0xff);
00331 #endif
00332 #if MANT_BITS > 8
00333 case 1: m = (m << 8) | (*buf++ & 0xff);
00334 #endif
00335 }
00336 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00337 d += ldexp((double)m, dig);
00338 } while ((len -= MANT_BITS / 8) > 0);
00339 d = ldexp(d, e - DECIMAL_MANT);
00340 if (s) d = -d;
00341 }
00342 return d;
00343 }
00344 #else
00345 #define load_mantissa(d, buf, len) (d)
00346 #endif
00347
00348 #ifdef DBL_DIG
00349 #define FLOAT_DIG (DBL_DIG+2)
00350 #else
00351 #define FLOAT_DIG 17
00352 #endif
00353
00354 static void
00355 w_float(double d, struct dump_arg *arg)
00356 {
00357 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve);
00358 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00359
00360 if (isinf(d)) {
00361 if (d < 0) w_cstr("-inf", arg);
00362 else w_cstr("inf", arg);
00363 }
00364 else if (isnan(d)) {
00365 w_cstr("nan", arg);
00366 }
00367 else if (d == 0.0) {
00368 if (1.0/d < 0) w_cstr("-0", arg);
00369 else w_cstr("0", arg);
00370 }
00371 else {
00372 int decpt, sign, digs, len = 0;
00373 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
00374 if (sign) buf[len++] = '-';
00375 digs = (int)(e - p);
00376 if (decpt < -3 || decpt > digs) {
00377 buf[len++] = p[0];
00378 if (--digs > 0) buf[len++] = '.';
00379 memcpy(buf + len, p + 1, digs);
00380 len += digs;
00381 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
00382 }
00383 else if (decpt > 0) {
00384 memcpy(buf + len, p, decpt);
00385 len += decpt;
00386 if ((digs -= decpt) > 0) {
00387 buf[len++] = '.';
00388 memcpy(buf + len, p + decpt, digs);
00389 len += digs;
00390 }
00391 }
00392 else {
00393 buf[len++] = '0';
00394 buf[len++] = '.';
00395 if (decpt) {
00396 memset(buf + len, '0', -decpt);
00397 len -= decpt;
00398 }
00399 memcpy(buf + len, p, digs);
00400 len += digs;
00401 }
00402 xfree(p);
00403 w_bytes(buf, len, arg);
00404 }
00405 }
00406
00407 static void
00408 w_symbol(ID id, struct dump_arg *arg)
00409 {
00410 VALUE sym;
00411 st_data_t num;
00412 int encidx = -1;
00413
00414 if (st_lookup(arg->symbols, id, &num)) {
00415 w_byte(TYPE_SYMLINK, arg);
00416 w_long((long)num, arg);
00417 }
00418 else {
00419 sym = rb_id2str(id);
00420 if (!sym) {
00421 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id);
00422 }
00423 encidx = rb_enc_get_index(sym);
00424 if (encidx == rb_usascii_encindex() ||
00425 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00426 encidx = -1;
00427 }
00428 else {
00429 w_byte(TYPE_IVAR, arg);
00430 }
00431 w_byte(TYPE_SYMBOL, arg);
00432 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00433 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00434 if (encidx != -1) {
00435 struct dump_call_arg c_arg;
00436 c_arg.limit = 1;
00437 c_arg.arg = arg;
00438 w_encoding(sym, 0, &c_arg);
00439 }
00440 }
00441 }
00442
00443 static void
00444 w_unique(VALUE s, struct dump_arg *arg)
00445 {
00446 must_not_be_anonymous("class", s);
00447 w_symbol(rb_intern_str(s), arg);
00448 }
00449
00450 static void w_object(VALUE,struct dump_arg*,int);
00451
00452 static int
00453 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00454 {
00455 w_object(key, arg->arg, arg->limit);
00456 w_object(value, arg->arg, arg->limit);
00457 return ST_CONTINUE;
00458 }
00459
00460 static void
00461 w_extended(VALUE klass, struct dump_arg *arg, int check)
00462 {
00463 if (check && FL_TEST(klass, FL_SINGLETON)) {
00464 if (RCLASS_M_TBL(klass)->num_entries ||
00465 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
00466 rb_raise(rb_eTypeError, "singleton can't be dumped");
00467 }
00468 klass = RCLASS_SUPER(klass);
00469 }
00470 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00471 VALUE path = rb_class_name(RBASIC(klass)->klass);
00472 w_byte(TYPE_EXTENDED, arg);
00473 w_unique(path, arg);
00474 klass = RCLASS_SUPER(klass);
00475 }
00476 }
00477
00478 static void
00479 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00480 {
00481 VALUE path;
00482 st_data_t real_obj;
00483 VALUE klass;
00484
00485 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00486 obj = (VALUE)real_obj;
00487 }
00488 klass = CLASS_OF(obj);
00489 w_extended(klass, arg, check);
00490 w_byte(type, arg);
00491 path = class2path(rb_class_real(klass));
00492 w_unique(path, arg);
00493 }
00494
00495 static void
00496 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00497 {
00498 VALUE klass = CLASS_OF(obj);
00499
00500 w_extended(klass, arg, TRUE);
00501 klass = rb_class_real(klass);
00502 if (klass != super) {
00503 w_byte(TYPE_UCLASS, arg);
00504 w_unique(class2path(klass), arg);
00505 }
00506 }
00507
00508 static int
00509 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
00510 {
00511 if (id == rb_id_encoding()) return ST_CONTINUE;
00512 if (id == rb_intern("E")) return ST_CONTINUE;
00513 w_symbol(id, arg->arg);
00514 w_object(value, arg->arg, arg->limit);
00515 return ST_CONTINUE;
00516 }
00517
00518 static void
00519 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00520 {
00521 int encidx = rb_enc_get_index(obj);
00522 rb_encoding *enc = 0;
00523 st_data_t name;
00524
00525 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00526 w_long(num, arg->arg);
00527 return;
00528 }
00529 w_long(num + 1, arg->arg);
00530
00531
00532 if (encidx == rb_usascii_encindex()) {
00533 w_symbol(rb_intern("E"), arg->arg);
00534 w_object(Qfalse, arg->arg, arg->limit + 1);
00535 return;
00536 }
00537 else if (encidx == rb_utf8_encindex()) {
00538 w_symbol(rb_intern("E"), arg->arg);
00539 w_object(Qtrue, arg->arg, arg->limit + 1);
00540 return;
00541 }
00542
00543 w_symbol(rb_id_encoding(), arg->arg);
00544 do {
00545 if (!arg->arg->encodings)
00546 arg->arg->encodings = st_init_strcasetable();
00547 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00548 break;
00549 name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00550 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00551 } while (0);
00552 w_object(name, arg->arg, arg->limit + 1);
00553 }
00554
00555 static void
00556 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00557 {
00558 long num = tbl ? tbl->num_entries : 0;
00559
00560 w_encoding(obj, num, arg);
00561 if (tbl) {
00562 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00563 }
00564 }
00565
00566 static void
00567 w_objivar(VALUE obj, struct dump_call_arg *arg)
00568 {
00569 VALUE *ptr;
00570 long i, len, num;
00571
00572 len = ROBJECT_NUMIV(obj);
00573 ptr = ROBJECT_IVPTR(obj);
00574 num = 0;
00575 for (i = 0; i < len; i++)
00576 if (ptr[i] != Qundef)
00577 num += 1;
00578
00579 w_encoding(obj, num, arg);
00580 if (num != 0) {
00581 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00582 }
00583 }
00584
00585 static void
00586 w_object(VALUE obj, struct dump_arg *arg, int limit)
00587 {
00588 struct dump_call_arg c_arg;
00589 st_table *ivtbl = 0;
00590 st_data_t num;
00591 int hasiv = 0;
00592 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
00593 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00594
00595 if (limit == 0) {
00596 rb_raise(rb_eArgError, "exceed depth limit");
00597 }
00598
00599 limit--;
00600 c_arg.limit = limit;
00601 c_arg.arg = arg;
00602
00603 if (st_lookup(arg->data, obj, &num)) {
00604 w_byte(TYPE_LINK, arg);
00605 w_long((long)num, arg);
00606 return;
00607 }
00608
00609 if (obj == Qnil) {
00610 w_byte(TYPE_NIL, arg);
00611 }
00612 else if (obj == Qtrue) {
00613 w_byte(TYPE_TRUE, arg);
00614 }
00615 else if (obj == Qfalse) {
00616 w_byte(TYPE_FALSE, arg);
00617 }
00618 else if (FIXNUM_P(obj)) {
00619 #if SIZEOF_LONG <= 4
00620 w_byte(TYPE_FIXNUM, arg);
00621 w_long(FIX2INT(obj), arg);
00622 #else
00623 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00624 w_byte(TYPE_FIXNUM, arg);
00625 w_long(FIX2LONG(obj), arg);
00626 }
00627 else {
00628 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00629 }
00630 #endif
00631 }
00632 else if (SYMBOL_P(obj)) {
00633 w_symbol(SYM2ID(obj), arg);
00634 }
00635 else {
00636 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00637
00638 if (rb_respond_to(obj, s_mdump)) {
00639 volatile VALUE v;
00640
00641 st_add_direct(arg->data, obj, arg->data->num_entries);
00642
00643 v = rb_funcall(obj, s_mdump, 0, 0);
00644 check_dump_arg(arg, s_mdump);
00645 hasiv = has_ivars(obj, ivtbl);
00646 if (hasiv) w_byte(TYPE_IVAR, arg);
00647 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00648 w_object(v, arg, limit);
00649 if (hasiv) w_ivar(obj, ivtbl, &c_arg);
00650 return;
00651 }
00652 if (rb_respond_to(obj, s_dump)) {
00653 VALUE v;
00654 st_table *ivtbl2 = 0;
00655 int hasiv2;
00656
00657 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
00658 check_dump_arg(arg, s_dump);
00659 if (TYPE(v) != T_STRING) {
00660 rb_raise(rb_eTypeError, "_dump() must return string");
00661 }
00662 hasiv = has_ivars(obj, ivtbl);
00663 if (hasiv) w_byte(TYPE_IVAR, arg);
00664 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00665 w_byte(TYPE_IVAR, arg);
00666 }
00667 w_class(TYPE_USERDEF, obj, arg, FALSE);
00668 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00669 if (hasiv2) {
00670 w_ivar(v, ivtbl2, &c_arg);
00671 }
00672 else if (hasiv) {
00673 w_ivar(obj, ivtbl, &c_arg);
00674 }
00675 st_add_direct(arg->data, obj, arg->data->num_entries);
00676 return;
00677 }
00678
00679 st_add_direct(arg->data, obj, arg->data->num_entries);
00680
00681 hasiv = has_ivars(obj, ivtbl);
00682 {
00683 st_data_t compat_data;
00684 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00685 if (st_lookup(compat_allocator_tbl,
00686 (st_data_t)allocator,
00687 &compat_data)) {
00688 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00689 VALUE real_obj = obj;
00690 obj = compat->dumper(real_obj);
00691 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00692 if (obj != real_obj && !ivtbl) hasiv = 0;
00693 }
00694 }
00695 if (hasiv) w_byte(TYPE_IVAR, arg);
00696
00697 switch (BUILTIN_TYPE(obj)) {
00698 case T_CLASS:
00699 if (FL_TEST(obj, FL_SINGLETON)) {
00700 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00701 }
00702 w_byte(TYPE_CLASS, arg);
00703 {
00704 volatile VALUE path = class2path(obj);
00705 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00706 }
00707 break;
00708
00709 case T_MODULE:
00710 w_byte(TYPE_MODULE, arg);
00711 {
00712 VALUE path = class2path(obj);
00713 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00714 }
00715 break;
00716
00717 case T_FLOAT:
00718 w_byte(TYPE_FLOAT, arg);
00719 w_float(RFLOAT_VALUE(obj), arg);
00720 break;
00721
00722 case T_BIGNUM:
00723 w_byte(TYPE_BIGNUM, arg);
00724 {
00725 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00726 long len = RBIGNUM_LEN(obj);
00727 BDIGIT *d = RBIGNUM_DIGITS(obj);
00728
00729 w_byte(sign, arg);
00730 w_long(SHORTLEN(len), arg);
00731 while (len--) {
00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00733 BDIGIT num = *d;
00734 int i;
00735
00736 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00737 w_short(num & SHORTMASK, arg);
00738 num = SHORTDN(num);
00739 if (len == 0 && num == 0) break;
00740 }
00741 #else
00742 w_short(*d, arg);
00743 #endif
00744 d++;
00745 }
00746 }
00747 break;
00748
00749 case T_STRING:
00750 w_uclass(obj, rb_cString, arg);
00751 w_byte(TYPE_STRING, arg);
00752 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00753 break;
00754
00755 case T_REGEXP:
00756 w_uclass(obj, rb_cRegexp, arg);
00757 w_byte(TYPE_REGEXP, arg);
00758 {
00759 int opts = rb_reg_options(obj);
00760 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00761 w_byte((char)opts, arg);
00762 }
00763 break;
00764
00765 case T_ARRAY:
00766 w_uclass(obj, rb_cArray, arg);
00767 w_byte(TYPE_ARRAY, arg);
00768 {
00769 long i, len = RARRAY_LEN(obj);
00770
00771 w_long(len, arg);
00772 for (i=0; i<RARRAY_LEN(obj); i++) {
00773 w_object(RARRAY_PTR(obj)[i], arg, limit);
00774 if (len != RARRAY_LEN(obj)) {
00775 rb_raise(rb_eRuntimeError, "array modified during dump");
00776 }
00777 }
00778 }
00779 break;
00780
00781 case T_HASH:
00782 w_uclass(obj, rb_cHash, arg);
00783 if (NIL_P(RHASH_IFNONE(obj))) {
00784 w_byte(TYPE_HASH, arg);
00785 }
00786 else if (FL_TEST(obj, FL_USER2)) {
00787
00788 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00789 }
00790 else {
00791 w_byte(TYPE_HASH_DEF, arg);
00792 }
00793 w_long(RHASH_SIZE(obj), arg);
00794 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00795 if (!NIL_P(RHASH_IFNONE(obj))) {
00796 w_object(RHASH_IFNONE(obj), arg, limit);
00797 }
00798 break;
00799
00800 case T_STRUCT:
00801 w_class(TYPE_STRUCT, obj, arg, TRUE);
00802 {
00803 long len = RSTRUCT_LEN(obj);
00804 VALUE mem;
00805 long i;
00806
00807 w_long(len, arg);
00808 mem = rb_struct_members(obj);
00809 for (i=0; i<len; i++) {
00810 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
00811 w_object(RSTRUCT_PTR(obj)[i], arg, limit);
00812 }
00813 }
00814 break;
00815
00816 case T_OBJECT:
00817 w_class(TYPE_OBJECT, obj, arg, TRUE);
00818 w_objivar(obj, &c_arg);
00819 break;
00820
00821 case T_DATA:
00822 {
00823 VALUE v;
00824
00825 if (!rb_respond_to(obj, s_dump_data)) {
00826 rb_raise(rb_eTypeError,
00827 "no _dump_data is defined for class %s",
00828 rb_obj_classname(obj));
00829 }
00830 v = rb_funcall(obj, s_dump_data, 0);
00831 check_dump_arg(arg, s_dump_data);
00832 w_class(TYPE_DATA, obj, arg, TRUE);
00833 w_object(v, arg, limit);
00834 }
00835 break;
00836
00837 default:
00838 rb_raise(rb_eTypeError, "can't dump %s",
00839 rb_obj_classname(obj));
00840 break;
00841 }
00842 }
00843 if (hasiv) {
00844 w_ivar(obj, ivtbl, &c_arg);
00845 }
00846 }
00847
00848 static void
00849 clear_dump_arg(struct dump_arg *arg)
00850 {
00851 if (!arg->symbols) return;
00852 st_free_table(arg->symbols);
00853 arg->symbols = 0;
00854 st_free_table(arg->data);
00855 arg->data = 0;
00856 st_free_table(arg->compat_tbl);
00857 arg->compat_tbl = 0;
00858 if (arg->encodings) {
00859 st_free_table(arg->encodings);
00860 arg->encodings = 0;
00861 }
00862 }
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898 static VALUE
00899 marshal_dump(int argc, VALUE *argv)
00900 {
00901 VALUE obj, port, a1, a2;
00902 int limit = -1;
00903 struct dump_arg *arg;
00904 volatile VALUE wrapper;
00905
00906 port = Qnil;
00907 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00908 if (argc == 3) {
00909 if (!NIL_P(a2)) limit = NUM2INT(a2);
00910 if (NIL_P(a1)) goto type_error;
00911 port = a1;
00912 }
00913 else if (argc == 2) {
00914 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00915 else if (NIL_P(a1)) goto type_error;
00916 else port = a1;
00917 }
00918 wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00919 arg->dest = 0;
00920 arg->symbols = st_init_numtable();
00921 arg->data = st_init_numtable();
00922 arg->infection = 0;
00923 arg->compat_tbl = st_init_numtable();
00924 arg->encodings = 0;
00925 arg->str = rb_str_buf_new(0);
00926 if (!NIL_P(port)) {
00927 if (!rb_respond_to(port, s_write)) {
00928 type_error:
00929 rb_raise(rb_eTypeError, "instance of IO needed");
00930 }
00931 arg->dest = port;
00932 if (rb_respond_to(port, s_binmode)) {
00933 rb_funcall2(port, s_binmode, 0, 0);
00934 check_dump_arg(arg, s_binmode);
00935 }
00936 }
00937 else {
00938 port = arg->str;
00939 }
00940
00941 w_byte(MARSHAL_MAJOR, arg);
00942 w_byte(MARSHAL_MINOR, arg);
00943
00944 w_object(obj, arg, limit);
00945 if (arg->dest) {
00946 rb_io_write(arg->dest, arg->str);
00947 rb_str_resize(arg->str, 0);
00948 }
00949 clear_dump_arg(arg);
00950 RB_GC_GUARD(wrapper);
00951
00952 return port;
00953 }
00954
00955 struct load_arg {
00956 VALUE src;
00957 long offset;
00958 st_table *symbols;
00959 st_table *data;
00960 VALUE proc;
00961 st_table *compat_tbl;
00962 int infection;
00963 };
00964
00965 static void
00966 check_load_arg(struct load_arg *arg, ID sym)
00967 {
00968 if (!arg->symbols) {
00969 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00970 rb_id2name(sym));
00971 }
00972 }
00973
00974 static void clear_load_arg(struct load_arg *arg);
00975
00976 static void
00977 mark_load_arg(void *ptr)
00978 {
00979 struct load_arg *p = ptr;
00980 if (!p->symbols)
00981 return;
00982 rb_mark_tbl(p->data);
00983 rb_mark_hash(p->compat_tbl);
00984 }
00985
00986 static void
00987 free_load_arg(void *ptr)
00988 {
00989 clear_load_arg(ptr);
00990 xfree(ptr);
00991 }
00992
00993 static size_t
00994 memsize_load_arg(const void *ptr)
00995 {
00996 return ptr ? sizeof(struct load_arg) : 0;
00997 }
00998
00999 static const rb_data_type_t load_arg_data = {
01000 "load_arg",
01001 {mark_load_arg, free_load_arg, memsize_load_arg,},
01002 };
01003
01004 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01006 static VALUE r_object(struct load_arg *arg);
01007 static ID r_symbol(struct load_arg *arg);
01008 static VALUE path2class(VALUE path);
01009
01010 static st_index_t
01011 r_prepare(struct load_arg *arg)
01012 {
01013 st_index_t idx = arg->data->num_entries;
01014
01015 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01016 return idx;
01017 }
01018
01019 static int
01020 r_byte(struct load_arg *arg)
01021 {
01022 int c;
01023
01024 if (TYPE(arg->src) == T_STRING) {
01025 if (RSTRING_LEN(arg->src) > arg->offset) {
01026 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01027 }
01028 else {
01029 rb_raise(rb_eArgError, "marshal data too short");
01030 }
01031 }
01032 else {
01033 VALUE src = arg->src;
01034 VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
01035 check_load_arg(arg, s_getbyte);
01036 if (NIL_P(v)) rb_eof_error();
01037 c = (unsigned char)NUM2CHR(v);
01038 }
01039 return c;
01040 }
01041
01042 static void
01043 long_toobig(int size)
01044 {
01045 rb_raise(rb_eTypeError, "long too big for this architecture (size "
01046 STRINGIZE(SIZEOF_LONG)", given %d)", size);
01047 }
01048
01049 #undef SIGN_EXTEND_CHAR
01050 #if __STDC__
01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01052 #else
01053
01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01055 #endif
01056
01057 static long
01058 r_long(struct load_arg *arg)
01059 {
01060 register long x;
01061 int c = SIGN_EXTEND_CHAR(r_byte(arg));
01062 long i;
01063
01064 if (c == 0) return 0;
01065 if (c > 0) {
01066 if (4 < c && c < 128) {
01067 return c - 5;
01068 }
01069 if (c > (int)sizeof(long)) long_toobig(c);
01070 x = 0;
01071 for (i=0;i<c;i++) {
01072 x |= (long)r_byte(arg) << (8*i);
01073 }
01074 }
01075 else {
01076 if (-129 < c && c < -4) {
01077 return c + 5;
01078 }
01079 c = -c;
01080 if (c > (int)sizeof(long)) long_toobig(c);
01081 x = -1;
01082 for (i=0;i<c;i++) {
01083 x &= ~((long)0xff << (8*i));
01084 x |= (long)r_byte(arg) << (8*i);
01085 }
01086 }
01087 return x;
01088 }
01089
01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01091
01092 static VALUE
01093 r_bytes0(long len, struct load_arg *arg)
01094 {
01095 VALUE str;
01096
01097 if (len == 0) return rb_str_new(0, 0);
01098 if (TYPE(arg->src) == T_STRING) {
01099 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01100 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01101 arg->offset += len;
01102 }
01103 else {
01104 too_short:
01105 rb_raise(rb_eArgError, "marshal data too short");
01106 }
01107 }
01108 else {
01109 VALUE src = arg->src;
01110 VALUE n = LONG2NUM(len);
01111 str = rb_funcall2(src, s_read, 1, &n);
01112 check_load_arg(arg, s_read);
01113 if (NIL_P(str)) goto too_short;
01114 StringValue(str);
01115 if (RSTRING_LEN(str) != len) goto too_short;
01116 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01117 }
01118 return str;
01119 }
01120
01121 static int
01122 id2encidx(ID id, VALUE val)
01123 {
01124 if (id == rb_id_encoding()) {
01125 int idx = rb_enc_find_index(StringValueCStr(val));
01126 return idx;
01127 }
01128 else if (id == rb_intern("E")) {
01129 if (val == Qfalse) return rb_usascii_encindex();
01130 else if (val == Qtrue) return rb_utf8_encindex();
01131
01132 }
01133 return -1;
01134 }
01135
01136 static ID
01137 r_symlink(struct load_arg *arg)
01138 {
01139 st_data_t id;
01140 long num = r_long(arg);
01141
01142 if (st_lookup(arg->symbols, num, &id)) {
01143 return (ID)id;
01144 }
01145 rb_raise(rb_eArgError, "bad symbol");
01146 }
01147
01148 static ID
01149 r_symreal(struct load_arg *arg, int ivar)
01150 {
01151 volatile VALUE s = r_bytes(arg);
01152 ID id;
01153 int idx = -1;
01154 st_index_t n = arg->symbols->num_entries;
01155
01156 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
01157 if (ivar) {
01158 long num = r_long(arg);
01159 while (num-- > 0) {
01160 id = r_symbol(arg);
01161 idx = id2encidx(id, r_object(arg));
01162 }
01163 }
01164 if (idx < 0) idx = rb_usascii_encindex();
01165 rb_enc_associate_index(s, idx);
01166 id = rb_intern_str(s);
01167 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01168
01169 return id;
01170 }
01171
01172 static ID
01173 r_symbol(struct load_arg *arg)
01174 {
01175 int type, ivar = 0;
01176
01177 again:
01178 switch ((type = r_byte(arg))) {
01179 case TYPE_IVAR:
01180 ivar = 1;
01181 goto again;
01182 case TYPE_SYMBOL:
01183 return r_symreal(arg, ivar);
01184 case TYPE_SYMLINK:
01185 if (ivar) {
01186 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01187 }
01188 return r_symlink(arg);
01189 default:
01190 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01191 break;
01192 }
01193 }
01194
01195 static VALUE
01196 r_unique(struct load_arg *arg)
01197 {
01198 return rb_id2str(r_symbol(arg));
01199 }
01200
01201 static VALUE
01202 r_string(struct load_arg *arg)
01203 {
01204 return r_bytes(arg);
01205 }
01206
01207 static VALUE
01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01209 {
01210 st_data_t real_obj = (VALUE)Qundef;
01211 if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01212 st_insert(arg->data, num, (st_data_t)real_obj);
01213 }
01214 else {
01215 st_insert(arg->data, num, (st_data_t)v);
01216 }
01217 if (arg->infection) {
01218 FL_SET(v, arg->infection);
01219 if ((VALUE)real_obj != Qundef)
01220 FL_SET((VALUE)real_obj, arg->infection);
01221 }
01222 return v;
01223 }
01224
01225 static VALUE
01226 r_leave(VALUE v, struct load_arg *arg)
01227 {
01228 st_data_t data;
01229 if (st_lookup(arg->compat_tbl, v, &data)) {
01230 VALUE real_obj = (VALUE)data;
01231 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01232 st_data_t key = v;
01233 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01234 marshal_compat_t *compat = (marshal_compat_t*)data;
01235 compat->loader(real_obj, v);
01236 }
01237 st_delete(arg->compat_tbl, &key, 0);
01238 v = real_obj;
01239 }
01240 if (arg->proc) {
01241 v = rb_funcall(arg->proc, s_call, 1, v);
01242 check_load_arg(arg, s_call);
01243 }
01244 return v;
01245 }
01246
01247 static void
01248 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01249 {
01250 long len;
01251
01252 len = r_long(arg);
01253 if (len > 0) {
01254 do {
01255 ID id = r_symbol(arg);
01256 VALUE val = r_object(arg);
01257 int idx = id2encidx(id, val);
01258 if (idx >= 0) {
01259 rb_enc_associate_index(obj, idx);
01260 if (has_encoding) *has_encoding = TRUE;
01261 }
01262 else {
01263 rb_ivar_set(obj, id, val);
01264 }
01265 } while (--len > 0);
01266 }
01267 }
01268
01269 static VALUE
01270 path2class(VALUE path)
01271 {
01272 VALUE v = rb_path_to_class(path);
01273
01274 if (TYPE(v) != T_CLASS) {
01275 rb_raise(rb_eArgError, "%.*s does not refer to class",
01276 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01277 }
01278 return v;
01279 }
01280
01281 static VALUE
01282 path2module(VALUE path)
01283 {
01284 VALUE v = rb_path_to_class(path);
01285
01286 if (TYPE(v) != T_MODULE) {
01287 rb_raise(rb_eArgError, "%.*s does not refer to module",
01288 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01289 }
01290 return v;
01291 }
01292
01293 static VALUE
01294 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01295 {
01296 VALUE klass;
01297 st_data_t data;
01298 rb_alloc_func_t allocator;
01299
01300 klass = path2class(path);
01301
01302 allocator = rb_get_alloc_func(klass);
01303 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01304 marshal_compat_t *compat = (marshal_compat_t*)data;
01305 VALUE real_obj = rb_obj_alloc(klass);
01306 VALUE obj = rb_obj_alloc(compat->oldclass);
01307 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01308 return obj;
01309 }
01310
01311 return rb_obj_alloc(klass);
01312 }
01313
01314 static VALUE
01315 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01316 {
01317 VALUE v = Qnil;
01318 int type = r_byte(arg);
01319 long id;
01320 st_data_t link;
01321
01322 switch (type) {
01323 case TYPE_LINK:
01324 id = r_long(arg);
01325 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01326 rb_raise(rb_eArgError, "dump format error (unlinked)");
01327 }
01328 v = (VALUE)link;
01329 if (arg->proc) {
01330 v = rb_funcall(arg->proc, s_call, 1, v);
01331 check_load_arg(arg, s_call);
01332 }
01333 break;
01334
01335 case TYPE_IVAR:
01336 {
01337 int ivar = TRUE;
01338
01339 v = r_object0(arg, &ivar, extmod);
01340 if (ivar) r_ivar(v, NULL, arg);
01341 }
01342 break;
01343
01344 case TYPE_EXTENDED:
01345 {
01346 VALUE m = path2module(r_unique(arg));
01347
01348 if (NIL_P(extmod)) extmod = rb_ary_new2(0);
01349 rb_ary_push(extmod, m);
01350
01351 v = r_object0(arg, 0, extmod);
01352 while (RARRAY_LEN(extmod) > 0) {
01353 m = rb_ary_pop(extmod);
01354 rb_extend_object(v, m);
01355 }
01356 }
01357 break;
01358
01359 case TYPE_UCLASS:
01360 {
01361 VALUE c = path2class(r_unique(arg));
01362
01363 if (FL_TEST(c, FL_SINGLETON)) {
01364 rb_raise(rb_eTypeError, "singleton can't be loaded");
01365 }
01366 v = r_object0(arg, 0, extmod);
01367 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
01368 format_error:
01369 rb_raise(rb_eArgError, "dump format error (user class)");
01370 }
01371 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01372 VALUE tmp = rb_obj_alloc(c);
01373
01374 if (TYPE(v) != TYPE(tmp)) goto format_error;
01375 }
01376 RBASIC(v)->klass = c;
01377 }
01378 break;
01379
01380 case TYPE_NIL:
01381 v = Qnil;
01382 v = r_leave(v, arg);
01383 break;
01384
01385 case TYPE_TRUE:
01386 v = Qtrue;
01387 v = r_leave(v, arg);
01388 break;
01389
01390 case TYPE_FALSE:
01391 v = Qfalse;
01392 v = r_leave(v, arg);
01393 break;
01394
01395 case TYPE_FIXNUM:
01396 {
01397 long i = r_long(arg);
01398 v = LONG2FIX(i);
01399 }
01400 v = r_leave(v, arg);
01401 break;
01402
01403 case TYPE_FLOAT:
01404 {
01405 double d;
01406 VALUE str = r_bytes(arg);
01407 const char *ptr = RSTRING_PTR(str);
01408
01409 if (strcmp(ptr, "nan") == 0) {
01410 d = NAN;
01411 }
01412 else if (strcmp(ptr, "inf") == 0) {
01413 d = INFINITY;
01414 }
01415 else if (strcmp(ptr, "-inf") == 0) {
01416 d = -INFINITY;
01417 }
01418 else {
01419 char *e;
01420 d = strtod(ptr, &e);
01421 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01422 }
01423 v = DBL2NUM(d);
01424 v = r_entry(v, arg);
01425 v = r_leave(v, arg);
01426 }
01427 break;
01428
01429 case TYPE_BIGNUM:
01430 {
01431 long len;
01432 BDIGIT *digits;
01433 volatile VALUE data;
01434
01435 NEWOBJ(big, struct RBignum);
01436 OBJSETUP(big, rb_cBignum, T_BIGNUM);
01437 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
01438 len = r_long(arg);
01439 data = r_bytes0(len * 2, arg);
01440 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01441 rb_big_resize((VALUE)big, len);
01442 #else
01443 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
01444 #endif
01445 digits = RBIGNUM_DIGITS(big);
01446 MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
01447 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01448 MEMZERO((char *)digits + len * 2, char,
01449 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
01450 #endif
01451 len = RBIGNUM_LEN(big);
01452 while (len > 0) {
01453 unsigned char *p = (unsigned char *)digits;
01454 BDIGIT num = 0;
01455 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01456 int shift = 0;
01457 int i;
01458
01459 for (i=0; i<SIZEOF_BDIGITS; i++) {
01460 num |= (int)p[i] << shift;
01461 shift += 8;
01462 }
01463 #else
01464 num = p[0] | (p[1] << 8);
01465 #endif
01466 *digits++ = num;
01467 len--;
01468 }
01469 v = rb_big_norm((VALUE)big);
01470 v = r_entry(v, arg);
01471 v = r_leave(v, arg);
01472 }
01473 break;
01474
01475 case TYPE_STRING:
01476 v = r_entry(r_string(arg), arg);
01477 v = r_leave(v, arg);
01478 break;
01479
01480 case TYPE_REGEXP:
01481 {
01482 volatile VALUE str = r_bytes(arg);
01483 int options = r_byte(arg);
01484 int has_encoding = FALSE;
01485 st_index_t idx = r_prepare(arg);
01486
01487 if (ivp) {
01488 r_ivar(str, &has_encoding, arg);
01489 *ivp = FALSE;
01490 }
01491 if (!has_encoding) {
01492
01493 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01494 long len = RSTRING_LEN(str);
01495 long bs = 0;
01496 for (; len-- > 0; *dst++ = *src++) {
01497 switch (*src) {
01498 case '\\': bs++; break;
01499 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01500 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01501 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01502 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01503 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01504 if (bs & 1) --dst;
01505 default: bs = 0; break;
01506 }
01507 }
01508 rb_str_set_len(str, dst - ptr);
01509 }
01510 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01511 v = r_leave(v, arg);
01512 }
01513 break;
01514
01515 case TYPE_ARRAY:
01516 {
01517 volatile long len = r_long(arg);
01518
01519 v = rb_ary_new2(len);
01520 v = r_entry(v, arg);
01521 while (len--) {
01522 rb_ary_push(v, r_object(arg));
01523 }
01524 v = r_leave(v, arg);
01525 }
01526 break;
01527
01528 case TYPE_HASH:
01529 case TYPE_HASH_DEF:
01530 {
01531 long len = r_long(arg);
01532
01533 v = rb_hash_new();
01534 v = r_entry(v, arg);
01535 while (len--) {
01536 VALUE key = r_object(arg);
01537 VALUE value = r_object(arg);
01538 rb_hash_aset(v, key, value);
01539 }
01540 if (type == TYPE_HASH_DEF) {
01541 RHASH_IFNONE(v) = r_object(arg);
01542 }
01543 v = r_leave(v, arg);
01544 }
01545 break;
01546
01547 case TYPE_STRUCT:
01548 {
01549 VALUE mem, values;
01550 volatile long i;
01551 ID slot;
01552 st_index_t idx = r_prepare(arg);
01553 VALUE klass = path2class(r_unique(arg));
01554 long len = r_long(arg);
01555
01556 v = rb_obj_alloc(klass);
01557 if (TYPE(v) != T_STRUCT) {
01558 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01559 }
01560 mem = rb_struct_s_members(klass);
01561 if (RARRAY_LEN(mem) != len) {
01562 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01563 rb_class2name(klass));
01564 }
01565
01566 v = r_entry0(v, idx, arg);
01567 values = rb_ary_new2(len);
01568 for (i=0; i<len; i++) {
01569 slot = r_symbol(arg);
01570
01571 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
01572 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01573 rb_class2name(klass),
01574 rb_id2name(slot),
01575 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
01576 }
01577 rb_ary_push(values, r_object(arg));
01578 }
01579 rb_struct_initialize(v, values);
01580 v = r_leave(v, arg);
01581 }
01582 break;
01583
01584 case TYPE_USERDEF:
01585 {
01586 VALUE klass = path2class(r_unique(arg));
01587 VALUE data;
01588
01589 if (!rb_respond_to(klass, s_load)) {
01590 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01591 rb_class2name(klass));
01592 }
01593 data = r_string(arg);
01594 if (ivp) {
01595 r_ivar(data, NULL, arg);
01596 *ivp = FALSE;
01597 }
01598 v = rb_funcall(klass, s_load, 1, data);
01599 check_load_arg(arg, s_load);
01600 v = r_entry(v, arg);
01601 v = r_leave(v, arg);
01602 }
01603 break;
01604
01605 case TYPE_USRMARSHAL:
01606 {
01607 VALUE klass = path2class(r_unique(arg));
01608 VALUE data;
01609
01610 v = rb_obj_alloc(klass);
01611 if (!NIL_P(extmod)) {
01612 while (RARRAY_LEN(extmod) > 0) {
01613 VALUE m = rb_ary_pop(extmod);
01614 rb_extend_object(v, m);
01615 }
01616 }
01617 if (!rb_respond_to(v, s_mload)) {
01618 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01619 rb_class2name(klass));
01620 }
01621 v = r_entry(v, arg);
01622 data = r_object(arg);
01623 rb_funcall(v, s_mload, 1, data);
01624 check_load_arg(arg, s_mload);
01625 v = r_leave(v, arg);
01626 }
01627 break;
01628
01629 case TYPE_OBJECT:
01630 {
01631 st_index_t idx = r_prepare(arg);
01632 v = obj_alloc_by_path(r_unique(arg), arg);
01633 if (TYPE(v) != T_OBJECT) {
01634 rb_raise(rb_eArgError, "dump format error");
01635 }
01636 v = r_entry0(v, idx, arg);
01637 r_ivar(v, NULL, arg);
01638 v = r_leave(v, arg);
01639 }
01640 break;
01641
01642 case TYPE_DATA:
01643 {
01644 VALUE klass = path2class(r_unique(arg));
01645 if (rb_respond_to(klass, s_alloc)) {
01646 static int warn = TRUE;
01647 if (warn) {
01648 rb_warn("define `allocate' instead of `_alloc'");
01649 warn = FALSE;
01650 }
01651 v = rb_funcall(klass, s_alloc, 0);
01652 check_load_arg(arg, s_alloc);
01653 }
01654 else {
01655 v = rb_obj_alloc(klass);
01656 }
01657 if (TYPE(v) != T_DATA) {
01658 rb_raise(rb_eArgError, "dump format error");
01659 }
01660 v = r_entry(v, arg);
01661 if (!rb_respond_to(v, s_load_data)) {
01662 rb_raise(rb_eTypeError,
01663 "class %s needs to have instance method `_load_data'",
01664 rb_class2name(klass));
01665 }
01666 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
01667 check_load_arg(arg, s_load_data);
01668 v = r_leave(v, arg);
01669 }
01670 break;
01671
01672 case TYPE_MODULE_OLD:
01673 {
01674 volatile VALUE str = r_bytes(arg);
01675
01676 v = rb_path_to_class(str);
01677 v = r_entry(v, arg);
01678 v = r_leave(v, arg);
01679 }
01680 break;
01681
01682 case TYPE_CLASS:
01683 {
01684 volatile VALUE str = r_bytes(arg);
01685
01686 v = path2class(str);
01687 v = r_entry(v, arg);
01688 v = r_leave(v, arg);
01689 }
01690 break;
01691
01692 case TYPE_MODULE:
01693 {
01694 volatile VALUE str = r_bytes(arg);
01695
01696 v = path2module(str);
01697 v = r_entry(v, arg);
01698 v = r_leave(v, arg);
01699 }
01700 break;
01701
01702 case TYPE_SYMBOL:
01703 if (ivp) {
01704 v = ID2SYM(r_symreal(arg, *ivp));
01705 *ivp = FALSE;
01706 }
01707 else {
01708 v = ID2SYM(r_symreal(arg, 0));
01709 }
01710 v = r_leave(v, arg);
01711 break;
01712
01713 case TYPE_SYMLINK:
01714 v = ID2SYM(r_symlink(arg));
01715 break;
01716
01717 default:
01718 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01719 break;
01720 }
01721 return v;
01722 }
01723
01724 static VALUE
01725 r_object(struct load_arg *arg)
01726 {
01727 return r_object0(arg, 0, Qnil);
01728 }
01729
01730 static void
01731 clear_load_arg(struct load_arg *arg)
01732 {
01733 if (!arg->symbols) return;
01734 st_free_table(arg->symbols);
01735 arg->symbols = 0;
01736 st_free_table(arg->data);
01737 arg->data = 0;
01738 st_free_table(arg->compat_tbl);
01739 arg->compat_tbl = 0;
01740 }
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753 static VALUE
01754 marshal_load(int argc, VALUE *argv)
01755 {
01756 VALUE port, proc;
01757 int major, minor, infection = 0;
01758 VALUE v;
01759 volatile VALUE wrapper;
01760 struct load_arg *arg;
01761
01762 rb_scan_args(argc, argv, "11", &port, &proc);
01763 v = rb_check_string_type(port);
01764 if (!NIL_P(v)) {
01765 infection = (int)FL_TEST(port, MARSHAL_INFECTION);
01766 port = v;
01767 }
01768 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01769 if (rb_respond_to(port, s_binmode)) {
01770 rb_funcall2(port, s_binmode, 0, 0);
01771 }
01772 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
01773 }
01774 else {
01775 rb_raise(rb_eTypeError, "instance of IO needed");
01776 }
01777 wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01778 arg->infection = infection;
01779 arg->src = port;
01780 arg->offset = 0;
01781 arg->symbols = st_init_numtable();
01782 arg->data = st_init_numtable();
01783 arg->compat_tbl = st_init_numtable();
01784 arg->proc = 0;
01785
01786 major = r_byte(arg);
01787 minor = r_byte(arg);
01788 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01789 clear_load_arg(arg);
01790 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01791 \tformat version %d.%d required; %d.%d given",
01792 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01793 }
01794 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01795 rb_warn("incompatible marshal file format (can be read)\n\
01796 \tformat version %d.%d required; %d.%d given",
01797 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01798 }
01799
01800 if (!NIL_P(proc)) arg->proc = proc;
01801 v = r_object(arg);
01802 clear_load_arg(arg);
01803 RB_GC_GUARD(wrapper);
01804
01805 return v;
01806 }
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878
01879
01880
01881
01882
01883
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904 void
01905 Init_marshal(void)
01906 {
01907 #undef rb_intern
01908 #define rb_intern(str) rb_intern_const(str)
01909
01910 VALUE rb_mMarshal = rb_define_module("Marshal");
01911
01912 s_dump = rb_intern("_dump");
01913 s_load = rb_intern("_load");
01914 s_mdump = rb_intern("marshal_dump");
01915 s_mload = rb_intern("marshal_load");
01916 s_dump_data = rb_intern("_dump_data");
01917 s_load_data = rb_intern("_load_data");
01918 s_alloc = rb_intern("_alloc");
01919 s_call = rb_intern("call");
01920 s_getbyte = rb_intern("getbyte");
01921 s_read = rb_intern("read");
01922 s_write = rb_intern("write");
01923 s_binmode = rb_intern("binmode");
01924
01925 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
01926 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
01927 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
01928
01929 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
01930 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
01931
01932 compat_allocator_tbl = st_init_numtable();
01933 compat_allocator_tbl_wrapper =
01934 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
01935 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
01936 }
01937
01938 VALUE
01939 rb_marshal_dump(VALUE obj, VALUE port)
01940 {
01941 int argc = 1;
01942 VALUE argv[2];
01943
01944 argv[0] = obj;
01945 argv[1] = port;
01946 if (!NIL_P(port)) argc = 2;
01947 return marshal_dump(argc, argv);
01948 }
01949
01950 VALUE
01951 rb_marshal_load(VALUE port)
01952 {
01953 return marshal_load(1, &port);
01954 }
01955