Default encoding for Marshal.load

e$BA0ED$G$9!#e(B

String#dumpe$B$NOC$r8+$F$U$H5$$K$J$C$?$N$G$9$,!"e(B1.8e$B$Ge(BMarshal.dumpe$B$7$?%G!<%?$re(B
1.9e$B$Ge(BMarshal.loade$B$9$k$He(Bencodinge$B$,e(BASCII-8BITe$B$K$J$k$N$O0U?^E*$G$7$g$&$+e(B?

$ ruby1.8 -e ‘print Marshal.dump(“foo”)’
| ruby-trunk --encoding utf-8 -e ‘p
Marshal.load(ARGF.read).encoding’
#Encoding:ASCII-8BIT

Encoding.default_externale$B$K$J$C$F$/$l$k$+!“e(Bloade$B$N0z?t$Ge(Bencodinge$B$r;XDj$G$-$?e(B
e$BJ}$,$&$l$7$$$h$&$K;W$$$^$9$,!”$I$&$G$7$g$&$+e(B?

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:33164] default encoding for Marshal.load”
on Fri, 18 Jan 2008 12:26:03 +0900, “Shugo M.”
[email protected] writes:

|String#dumpe$B$NOC$r8+$F$U$H5$$K$J$C$?$N$G$9$,!"e(B1.8e$B$Ge(BMarshal.dumpe$B$7$?%G!<%?$re(B
|1.9e$B$Ge(BMarshal.loade$B$9$k$He(Bencodinge$B$,e(BASCII-8BITe$B$K$J$k$N$O0U?^E*$G$7$g$&$+e(B?

e$B$($(!"e(B1.8e$B$G$OJ8;zNs$O4pK\E*$Ke(BASCII-8BITe$B$G$9$+$i!#e(B

|$ ruby1.8 -e ‘print Marshal.dump(“foo”)’
| | ruby-trunk --encoding utf-8 -e ‘p Marshal.load(ARGF.read).encoding’
|#Encoding:ASCII-8BIT
|
|Encoding.default_externale$B$K$J$C$F$/$l$k$+!“e(Bloade$B$N0z?t$Ge(Bencodinge$B$r;XDj$G$-$?e(B
|e$BJ}$,$&$l$7$$$h$&$K;W$$$^$9$,!”$I$&$G$7$g$&$+e(B?

default_externale$B$H$$$&$N$Oe(Bloade$BB&$N4D6-$7$+0UL#$7$J$$$N$G!"e(B
dumpe$BB&$N4D6-$r?dB,$9$k$?$a$K$OITE,@Z$@$H;W$$$^$9!#4V0c$C$??de(B
e$BB,$O$^$C$?$/?dB,$7$J$$$h$j$b0-$$$G$7$g$&!#e(B

loade$B$N0z?t$H$$$&$N$Oe(Bdefault_externale$B$h$j$O$"$j$($k$H;W$$$^$9e(B
e$B$,!"e(Bloade$B$N0z?t%Q%?!<%s$O7k9=J#;($G$3$3$Ke(Bencodinge$B$rIU2C$9$k$Ne(B
e$B$O$A$g$C$HFq$7$=$&$G$9$M!#%-!<%o!<%I0z?t$K$9$Y$-$G$7$g$&$+!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

e$BA0ED$G$9!#e(B

On 01/18/08 14:24, Yukihiro M. wrote:

|$ ruby1.8 -e ‘print Marshal.dump(“foo”)’
| | ruby-trunk --encoding utf-8 -e ‘p Marshal.load(ARGF.read).encoding’
|#Encoding:ASCII-8BIT
|
|Encoding.default_externale$B$K$J$C$F$/$l$k$+!“e(Bloade$B$N0z?t$Ge(Bencodinge$B$r;XDj$G$-$?e(B
|e$BJ}$,$&$l$7$$$h$&$K;W$$$^$9$,!”$I$&$G$7$g$&$+e(B?

default_externale$B$H$$$&$N$Oe(Bloade$BB&$N4D6-$7$+0UL#$7$J$$$N$G!"e(B
dumpe$BB&$N4D6-$r?dB,$9$k$?$a$K$OITE,@Z$@$H;W$$$^$9!#4V0c$C$??de(B
e$BB,$O$^$C$?$/?dB,$7$J$$$h$j$b0-$$$G$7$g$&!#e(B

e$B$=$&$+$b$7$l$^$;$s!#e(B
e$B$?$@!"=q$-=P$7B&$N4D6-$r?dB,$G$-$J$$$H$$$&0UL#$G$O%U%!%$%k$be(B
e$BF1$8$G$9$h$M!#e(B

e$B%U%!%$%k$N>l9g$Oe(Bencodinge$B$rL@<($7$Fe(Breade$B$G$-$k$N$G!“e(Bdefault_external
e$B$K$J$C$F$^$:$$>l9g$Oe(Bencodinge$B$rL@<($7$F$J$$$N$,0-$$$H$$$&$3$H$K$J$ke(B
e$B$+$H;W$&$N$G$9$,!”$H$9$k$H!"e(B

loade$B$N0z?t$H$$$&$N$Oe(Bdefault_externale$B$h$j$O$"$j$($k$H;W$$$^$9e(B
e$B$,!"e(Bloade$B$N0z?t%Q%?!<%s$O7k9=J#;($G$3$3$Ke(Bencodinge$B$rIU2C$9$k$Ne(B
e$B$O$A$g$C$HFq$7$=$&$G$9$M!#%-!<%o!<%I0z?t$K$9$Y$-$G$7$g$&$+!#e(B

e$B$H$$$&J}8~$G$7$g$&$+!#e(B
e$B$G$be(B1.9e$B$@$1$r9M$($k$HI,MW$,$J$$$N$G$A$g$C$H$$$2$5$J5$$,!De(B
e$B$d$C$Q$j;_$a$?J}$,$$$$$H;W$$$^$9!#e(B

e$B%"%W%j%1!<%7%g%s$,e(Bloade$B$7$?%G!<%?Cf$NJ8;zNs$r$?$I$C$Fe(Bforce_encoding
e$B$9$k$/$i$$$,BEEv$G$9$+$M$(!#e(B

On Fri, 18 Jan 2008 16:28:58 +0900
In article [email protected]
[[ruby-dev:33171] Re: default encoding for Marshal.load]
Shugo M. [email protected] wrote:

loade$B$N0z?t$H$$$&$N$Oe(Bdefault_externale$B$h$j$O$"$j$($k$H;W$$$^$9e(B
e$B$,!"e(Bloade$B$N0z?t%Q%?!<%s$O7k9=J#;($G$3$3$Ke(Bencodinge$B$rIU2C$9$k$Ne(B
e$B$O$A$g$C$HFq$7$=$&$G$9$M!#%-!<%o!<%I0z?t$K$9$Y$-$G$7$g$&$+!#e(B

e$B$H$$$&J}8~$G$7$g$&$+!#e(B
e$B$G$be(B1.9e$B$@$1$r9M$($k$HI,MW$,$J$$$N$G$A$g$C$H$$$2$5$J5$$,!De(B
e$B$d$C$Q$j;_$a$?J}$,$$$$$H;W$$$^$9!#e(B

e$B%"%W%j%1!<%7%g%s$,e(Bloade$B$7$?%G!<%?Cf$NJ8;zNs$r$?$I$C$Fe(Bforce_encoding
e$B$9$k$/$i$$$,BEEv$G$9$+$M$(!#e(B

e$B$“$kDxEYJ#;($J%*%V%8%'%/%H$NCf$KJ8;zNs$,J]B8$5$l$F$$$k>l9g!”$=$l$r$/$^$Je(B
e$B$/C)$C$Fe(B force_encoding
e$B$9$k$N$O9|$,@^$l$k$74V0c$$$b5/$3$j$d$9$$$N$G$O$Je(B
e$B$$$G$7$g$&$+!#e(B

e$B$=$&$G$J$/$F$b!“!VJ8;zNs$He(B nil
e$B$,:.$8$C$F$$$kG[Ns!W$N$h$&$JC1=c$J9=B$$Ge(B
e$B$b!”;w$FHs$J$k%3!<%I$r=q$-;6$i$5$J$/$F$O$J$i$J$/$J$j$=$&$G$9!#e(B

e$B@.@%$G$9!#e(B

Tietew wrote:

e$B%"%W%j%1!<%7%g%s$,e(Bloade$B$7$?%G!<%?Cf$NJ8;zNs$r$?$I$C$Fe(Bforce_encoding
e$B$9$k$/$i$$$,BEEv$G$9$+$M$(!#e(B

e$B$"$kDxEYJ#;($J%*%V%8%’%/%H$NCf$KJ8;zNs$,J]B8$5$l$F$$$k>l9g!"$=$l$r$/$^$Je(B
e$B$/C)$C$Fe(B force_encoding e$B$9$k$N$O9|$,@^$l$k$74V0c$$$b5/$3$j$d$9$$$N$G$O$Je(B
e$B$$$G$7$g$&$+!#e(B

e$B$=$&$G$J$/$F$b!"!VJ8;zNs$He(B nil e$B$,:.$8$C$F$$$kG[Ns!W$N$h$&$JC1=c$J9=B$$Ge(B
e$B$b!";w$FHs$J$k%3!<%I$r=q$-;6$i$5$J$/$F$O$J$i$J$/$J$j$=$&$G$9!#e(B

Marlshal.recursive_force_encoding
e$B$H$+$$$&!V%G!<%?Cf$NJ8;zNs$r$?$I$C$Fe(B
force_encoding
e$B$9$k!W%a%=%C%I$,$"$C$?$i3Z$@$m$&$H;W$C$?$s$G$9$,!"$9$4$$e(B
e$BLLE]$=$&$@$C$?$N$Ge(B [ruby-dev:33205]
e$B$N!Ve(Bdumpe$B$NF~NO%G!<%?$N%(%s%3!<%G%#%se(B
e$B%0$K$9$k!W$r=q$$$F$_$^$7$?!#e(B

Index: marshal.c

— marshal.c (revision 15163)
+++ marshal.c (working copy)
@@ -882,6 +882,7 @@ struct load_arg {
int taint;
st_table *compat_tbl;
VALUE compat_tbl_wrapper;

  • rb_encoding *enc;
    };

static VALUE r_entry(VALUE v, struct load_arg *arg);
@@ -968,7 +969,7 @@ r_bytes0(long len, struct load_arg *arg)
if (len == 0) return rb_str_new(0, 0);
if (TYPE(arg->src) == T_STRING) {
if (RSTRING_LEN(arg->src) - arg->offset >= len) {

  •       str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
    
  •       str = rb_enc_str_new(RSTRING_PTR(arg->src)+arg->offset, len,
    

arg->enc);
arg->offset += len;
}
else {
@@ -1569,6 +1570,7 @@ marshal_load(int argc, VALUE *argv)
arg.offset = 0;
arg.compat_tbl = st_init_numtable();
arg.compat_tbl_wrapper = Data_Wrap_Struct(rb_cData, rb_mark_tbl, 0,
arg.compat_tbl);

  • arg.enc = rb_enc_get(arg.src);

    major = r_byte(&arg);
    minor = r_byte(&arg);

e$B$3$Ne(B arg.enc e$B$Ke(B encoding
e$B$r$D$C$3$`J}K!$,2?$+$"$l$PL@<(E*$K;XDj$G$-$k$Ne(B
e$B$G$9$,!"$^$!!"$3$l$G;veLdBj$J$$$N$G$O$J$$$G$7$g$&$+!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:33256] Re: default encoding for Marshal.load”
on Mon, 21 Jan 2008 21:29:58 +0900, Tietew
[email protected] writes:

|e$B$“$kDxEYJ#;($J%*%V%8%'%/%H$NCf$KJ8;zNs$,J]B8$5$l$F$$$k>l9g!”$=$l$r$/$^$Je(B
|e$B$/C)$C$Fe(B force_encoding e$B$9$k$N$O9|$,@^$l$k$74V0c$$$b5/$3$j$d$9$$$N$G$O$Je(B
|e$B$$$G$7$g$&$+!#e(B

Marshal.load()e$B$NBhFs0z?t$Ke(BProce$B$r;XDj$7$F!"J8;zNs$@$C$?$ie(B
force_encodinge$B$9$k$C$F$N$O$I$&$G$7$g$&$M!#$3$l$J$i:#$9$0$K$Ge(B
e$B$bF0$-$^$9$,!#e(B

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

force_encodinge$B$9$k$C$F$N$O$I$&$G$7$g$&$M!#$3$l$J$i:#$9$0$K$Ge(B
e$B$bF0$-$^$9$,!#e(B

e$B$=$NJ}K!$@$H!“e(BRuby 1.9 e$B$G:n$C$?!“e(Bencoding
e$B$N@_Dj$7$F$”$kJ8;zNs$K$^$Ge(B
force_encoding
e$B$7$F$7$^$&$H;W$$$^$9!#!Ve(Bdumpe$B$NF~NO%G!<%?$N%(%s%3!<%G%#%se(B
e$B%0$K$9$k!W$@$He(B String e$B$rFI$_9~$s$@;~$Ke(B encoding
e$B$r@_Dj$7$F$$$k$N$G!”%(%se(B
e$B%3!<%G%#%s%0>pJs$,$"$l$P$=$l$G>e=q$-$5$l$^$9!#e(B

e$B$J$+$@$G$9!#e(B

At Tue, 22 Jan 2008 04:59:27 +0900,
NARUSE, Yui wrote in [ruby-dev:33268]:

Marlshal.recursive_force_encoding e$B$H$+$$$&!V%G!<%?Cf$NJ8;zNs$r$?$I$C$Fe(B
force_encoding e$B$9$k!W%a%=%C%I$,$"$C$?$i3Z$@$m$&$H;W$C$?$s$G$9$,!"$9$4$$e(B
e$BLLE]$=$&$@$C$?$N$Ge(B [ruby-dev:33205] e$B$N!Ve(Bdumpe$B$NF~NO%G!<%?$N%(%s%3!<%G%#%se(B
e$B%0$K$9$k!W$r=q$$$F$_$^$7$?!#e(B

@@ -968,7 +969,7 @@ r_bytes0(long len, struct load_arg *arg)
if (len == 0) return rb_str_new(0, 0);
if (TYPE(arg->src) == T_STRING) {
if (RSTRING_LEN(arg->src) - arg->offset >= len) {

  •       str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
    
  •       str = rb_enc_str_new(RSTRING_PTR(arg->src)+arg->offset, len, arg->enc);
    

Stringe$B8BDj$J$i!"$3$3$Ge(Brb_str_subseq()e$B$r;H$&$@$1$G$b$G$-$^$9!#e(B

e$B$=$l0J30$N$H$-$,LdBj$G!"e(BIO#read(length)e$B$O>o$Ke(BASCII-8BITe$B$rJV$9$Ne(B
e$B$GF~NO$+$iD4$Y$k$3$H$b$G$-$^$;$s!#e(BIOe$B$K$be(Brb_enc_get()e$B$G<h$l$k$h$&e(B
e$B$K%(%s%3!<%G%#%s%0$r%;%C%H$7$F$*$1$P4JC1$J$s$G$9$1$I!#e(B

e$B$3$Ne(B arg.enc e$B$Ke(B encoding e$B$r$D$C$3$`J}K!$,2?$+$"$l$PL@<(E*$K;XDj$G$-$k$Ne(B
e$B$G$9$,!"$^$!!"$3$l$G;veLdBj$J$$$N$G$O$J$$$G$7$g$&$+!#e(B

e$B0z?t$J$7$Ne(BMarshal.loade$B$,e(BMarshal::Loadere$B%*%V%8%’%/%H$rJV$9!"$H$+e(B
e$B$G$7$g$&$+!#e(B

Index: marshal.c

— marshal.c (revision 15163)
+++ marshal.c (working copy)
@@ -81,5 +81,5 @@ shortlen(long len, BDIGIT *ds)
static ID s_dump, s_load, s_mdump, s_mload;
static ID s_dump_data, s_load_data, s_alloc;
-static ID s_getc, s_read, s_write, s_binmode;
+static ID s_getc, s_read, s_write, s_binmode, s_external_encoding;

ID rb_id_encoding(void);
@@ -883,4 +883,5 @@ struct load_arg {
st_table *compat_tbl;
VALUE compat_tbl_wrapper;

  • rb_encoding *enc;
    };

@@ -966,5 +967,5 @@ r_bytes0(long len, struct load_arg *arg)
VALUE str;

  • if (len == 0) return rb_str_new(0, 0);
  • if (len == 0) return rb_enc_str_new(0, 0, arg->enc);
    if (TYPE(arg->src) == T_STRING) {
    if (RSTRING_LEN(arg->src) - arg->offset >= len) {
    @@ -986,4 +987,5 @@ r_bytes0(long len, struct load_arg *arg)
    if (OBJ_TAINTED(str)) arg->taint = Qtrue;
    }

  • rb_enc_associate(str, arg->enc);
    return str;
    }
    @@ -1553,7 +1555,9 @@ marshal_load(int argc, VALUE *argv)

    rb_scan_args(argc, argv, “11”, &port, &proc);

  • arg.enc = 0;
    if (rb_respond_to(port, rb_intern(“to_str”))) {
    arg.taint = OBJ_TAINTED(port); /* original taintedness /
    StringValue(port); /
    possible conversion */

  • arg.enc = rb_enc_get(port);
    }
    else if (rb_respond_to(port, s_getc) && rb_respond_to(port,
    s_read)) {
    @@ -1561,4 +1565,9 @@ marshal_load(int argc, VALUE *argv)
    rb_funcall2(port, s_binmode, 0, 0);
    }

  • if (rb_respond_to(port, s_external_encoding)) {

  •  VALUE enc = rb_funcall2(port, s_external_encoding, 0, 0);
    
  •  rb_p(enc);
    
  •  arg.enc = rb_to_encoding(enc);
    
  • }
    arg.taint = Qtrue;
    }
    @@ -1642,4 +1651,5 @@ Init_marshal(void)
    s_write = rb_intern(“write”);
    s_binmode = rb_intern(“binmode”);

  • s_external_encoding = rb_intern(“external_encoding”);

    rb_define_module_function(rb_mMarshal, “dump”, marshal_dump, -1);

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:33270] Re: default encoding for Marshal.load”
on Tue, 22 Jan 2008 05:37:31 +0900, “NARUSE, Yui”
[email protected] writes:

|> Marshal.load()e$B$NBhFs0z?t$Ke(BProce$B$r;XDj$7$F!“J8;zNs$@$C$?$ie(B
|> force_encodinge$B$9$k$C$F$N$O$I$&$G$7$g$&$M!#$3$l$J$i:#$9$0$K$Ge(B
|> e$B$bF0$-$^$9$,!#e(B
|
|e$B$=$NJ}K!$@$H!“e(BRuby 1.9 e$B$G:n$C$?!“e(Bencoding e$B$N@_Dj$7$F$”$kJ8;zNs$K$^$Ge(B
|force_encoding e$B$7$F$7$^$&$H;W$$$^$9!#!Ve(Bdumpe$B$NF~NO%G!<%?$N%(%s%3!<%G%#%se(B
|e$B%0$K$9$k!W$@$He(B String e$B$rFI$_9~$s$@;~$Ke(B encoding e$B$r@_Dj$7$F$$$k$N$G!”%(%se(B
|e$B%3!<%G%#%s%0>pJs$,$”$l$P$=$l$G>e=q$-$5$l$^$9!#e(B

e$B$$$d!“$?$V$se(B1.8e$B$N%G!<%?$re(B1.9e$B$K%”%C%W%0%l!<%I$9$k%9%/%j%W%H$re(B
e$B=q$/$s$G$9$h!#e(B