[Bug #981] String#encode() keeps invalid bytes when converting to US-ASCII


#1

Bug #981: String#encode() keeps invalid bytes when converting to
US-ASCII
http://redmine.ruby-lang.org/issues/show/981

e$B5/I<<Te(B: Martin De$(D+de(Brst
e$B%9%F!<%?%9e(B: Open, e$BM%@hEYe(B: Normal
e$BC4Ev<Te(B: Akira T.

String#encode() (e$BJQ49$N;XDj$J$7!"e(Bdefault_internal e$B$KJQ49e(B)
e$B$Oe(B
default_internal e$B$,e(B US-ASCII e$B$N>l9g$@$1e(B invalid
e$B$J%P%$%H$de(B
unknown e$B$JJ8;z$r$=$N$^$^%3%T!<$7$^$9!#B>$N%(%s%3!<%G%#%s%0e(B
e$B$G$Oe(B ‘?’ e$B$J$I$K%l%W%l!<%9$5$l$^$9!#e(B

e$B<B9TNce(B:

ruby -E:ascii-8bit -e’puts "\uabcd’.encode.inspect’
“?”

ruby -E:euc-jp -e’puts "\uabcd’.encode.inspect’
“?”

ruby -E:us-ascii -e’puts "\uabcd’.encode.inspect’
“\xEA\xAF\x8D”

e$B0U?^E*$J$b$N$G$"$k$H$O$I$&$7$F$b;W$$$^$;$s$,!"$b$7$=$&e(B
e$B$G$7$?$ie(B rdoc e$B$G=q$$$?J}$,$$$$$+$H;W$$$^$9!#e(B

([ruby-dev:37662] e$B;2>He(B)

e$B$h$m$7$/$*4j$$$7$^$9!#e(B Martin.


#2

e$B%A%1%C%He(B #981 e$B$,99?7$5$l$^$7$?!#e(B (by Yui NARUSE)

e$BC4Ev<Te(B Akira T.e$B$+$ie(BYukihiro M.e$B$KJQ99e(B

e$B$3$N5sF0$Oe(B String#encode
e$B$,D>@$N860x$G$O$J$/!"e(BEncoding.default_internal
e$BB&$,860x$G$9!#e(B
% ruby19 -E:us-ascii -e’p Encoding.default_internal’
#Encoding:UTF-8

enc_set_default_encoding e$B$r8+$k$H!"e(B
if (def->index == ENCINDEX_US_ASCII)
def->index = defindex;
e$B$H!"e(BUS-ASCII e$B$N>l9g$K%G%U%)%k%H$N%(%s%3!<%G%#%s%0e(B
(default_internal e$B$G$Oe(B UTF-8) e$B$,%;%C%H$5$l$F$$$^$9!#e(B
e$B8=:_$N;EMM$G$O$3$NJU$N=hM}$OITMW$J5$$,$9$k$N$G!"0J2<$NDL$j$H$+!#e(B

Index: encoding.c

— encoding.c (revision 21519)
+++ encoding.c (working copy)
@@ -1051,7 +1051,7 @@ struct default_encoding {

static int
enc_set_default_encoding(struct default_encoding *def, VALUE encoding,

  •                    const char *name, int defindex)
    
  •                    const char *name)
    

{
int overridden = Qfalse;
if (def->index != -2)
@@ -1066,8 +1066,6 @@ enc_set_default_encoding(struct default_encoding *
}
else {
def->index = rb_enc_to_index(rb_to_encoding(encoding));

  •   if (def->index == ENCINDEX_US_ASCII)
    
  •       def->index = defindex;
      def->enc = 0;
      enc_alias_internal(name, def->index);
    
    }
    @@ -1118,7 +1116,7 @@ rb_enc_set_default_external(VALUE encoding)
    rb_raise(rb_eArgError, “default external can not be nil”);
    }
    enc_set_default_encoding(&default_external, encoding,
  •                        "external", ENCINDEX_US_ASCII);
    
  •                        "external");
    

}

/*
@@ -1171,7 +1169,7 @@ void
rb_enc_set_default_internal(VALUE encoding)
{
enc_set_default_encoding(&default_internal, encoding,

  •                        "internal", ENCINDEX_UTF_8);
    
  •                        "internal");
    

}

/*


http://redmine.ruby-lang.org/issues/show/981


#3

e$B%A%1%C%He(B #981 e$B$,99?7$5$l$^$7$?!#e(B (by Yukihiro M.)

e$B%9%F!<%?%9e(B Opene$B$+$ie(BClosede$B$KJQ99e(B
e$B?JD=e(B % 0e$B$+$ie(B100e$B$KJQ99e(B

Applied in changeset r21528.

http://redmine.ruby-lang.org/issues/show/981