String literal encoding

e$B$J$+$@$G$9!#e(B

-- coding: euc-jp -- e$B$J$I$H$7$F$*$/$H!"$9$Y$F$NJ8;zNs%j%F%i%ke(B
e$B$Ne(Bencodinge$B$,e(B"EUC-JP"e$B$K$J$C$F$$$^$9$,!"e(BASCIIe$B$N$_$+$i$J$kJ8;zNs$Ne(B
e$B>l9g$Oe(BUS-ASCIIe$B$N$^$^$N$[$&$,JXMx$G$O$J$$$G$7$g$&$+!#e(B

Index: encoding.c

— encoding.c (revision 13339)
+++ encoding.c (working copy)
@@ -153,17 +153,13 @@ rb_enc_check(VALUE str1, VALUE str2)
if (idx1 == 0) {
enc = rb_enc_from_index(idx2);
-#if 0

  • if (m17n_asciicompat(enc)) {
  • if (rb_enc_asciicompat(enc)) {
    return enc;
    }
    -#endif
    }
    else if (idx2 == 0) {
    enc = rb_enc_from_index(idx1);
    -#if 0
  • if (m17n_asciicompat(enc)) {
  • if (rb_enc_asciicompat(enc)) {
    return enc;
    }
    -#endif
    }
    rb_raise(rb_eArgError, “character encodings differ”);
    Index: parse.y
    ===================================================================
    — parse.y (revision 13339)
    +++ parse.y (working copy)
    @@ -264,4 +264,6 @@ struct parser_params {
    #define STR_NEW(p,n) rb_enc_str_new(§,(n),parser->enc)
    #define STR_NEW2§ rb_enc_str_new(§,strlen§,parser->enc)
    +#define STR_NEW3(p,n,m) rb_enc_str_new(§,(n), STR_ENC(m))
    +#define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))

#ifdef YYMALLOC
@@ -3882,5 +3884,5 @@ dsym : tSYMBEG xstring_contents tSTRING
break;
}

  •    $$->nd_lit = ID2SYM(rb_intern2(RSTRING_PTR(lit), 
    

RSTRING_LEN(lit)));

  •    $$->nd_lit = ID2SYM(rb_intern_str(lit));
       nd_set_type($$, NODE_LIT);
       break;
    

@@ -4474,5 +4476,5 @@ none : /* none */

static int parser_regx_options(struct parser_params*);
-static int parser_tokadd_string(struct
parser_params*,int,int,int,long*);
+static int parser_tokadd_string(struct
parser_params*,int,int,int,long*,int*);
static int parser_parse_string(struct parser_params*,NODE*);
static int parser_here_document(struct parser_params*,NODE*);
@@ -4485,5 +4487,5 @@ static int parser_here_document(struct p

define tokadd_escape(t) parser_tokadd_escape(parser, t)

define regx_options() parser_regx_options(parser)

-# define tokadd_string(f,t,p,n) parser_tokadd_string(parser,f,t,p,n)
+# define tokadd_string(f,t,p,n,m)
parser_tokadd_string(parser,f,t,p,n,m)

define parse_string(n) parser_parse_string(parser,n)

define here_document(n) parser_here_document(parser,n)

@@ -5132,13 +5134,22 @@ dispose_string(VALUE str)
}

+static void
+parser_tokadd_mbchar(struct parser_params *parser, int c)
+{

  • int len = parser_mbclen();
  • do {
  • tokadd©;
  • } while (–len > 0 && (c = nextc()) != -1);
    +}

+#define tokadd_mbchar© parser_tokadd_mbchar(parser, c)
+
static int
parser_tokadd_string(struct parser_params *parser,

  • int func, int term, int paren, long *nest)
  •     int func, int term, int paren, long *nest, int *mb)
    

{
int c;

  • unsigned char uc;

    while ((c = nextc()) != -1) {

  •    uc = (unsigned char)c;
    

    if (paren && c == paren) {
    ++*nest;
    @@ -5192,10 +5203,7 @@ parser_tokadd_string(struct parser_param
    }
    else if (parser_ismbchar()) {

  •  int i, len = parser_mbclen()-1;
    
  •  for (i = 0; i < len; i++) {
    
  • tokadd©;

  • c = nextc();

  •  }
    
  •  tokadd_mbchar(c);
    
  •  if (mb) *mb = 1;
    
  •  continue;
    
    }
    else if ((func & STR_FUNC_QWORDS) && ISSPACE©) {
    @@ -5222,5 +5230,5 @@ parser_parse_string(struct parser_params
    int term = nd_term(quote);
    int paren = nd_paren(quote);
  • int c, space = 0;
  • int c, space = 0, mb = 0;

    if (func == -1) return tSTRING_END;
    @@ -5256,5 +5264,5 @@ parser_parse_string(struct parser_params
    }
    pushback©;

  • if (tokadd_string(func, term, paren, &quote->nd_nest) == -1) {
  • if (tokadd_string(func, term, paren, &quote->nd_nest, &mb) == -1) {
    if (func & STR_FUNC_REGEXP) {
    ruby_sourceline = nd_line(quote);
    @@ -5270,5 +5278,5 @@ parser_parse_string(struct parser_params

    tokfix();

  • set_yylval_str(STR_NEW(tok(), toklen()));
  • set_yylval_str(STR_NEW3(tok(), toklen(), mb));
    return tSTRING_CONTENT;
    }
    @@ -5433,4 +5441,5 @@ parser_here_document(struct parser_param
    }
    else {
  • int mb = 0;
    newtok();
    if (c == ‘#’) {
    @@ -5447,7 +5456,7 @@ parser_here_document(struct parser_param
    do {
    pushback©;
  •  if ((c = tokadd_string(func, '\n', 0, NULL)) == -1) goto error;
    
  •  if ((c = tokadd_string(func, '\n', 0, NULL, &mb)) == -1) goto 
    

error;
if (c != ‘\n’) {

  •            set_yylval_str(STR_NEW(tok(), toklen()));
    
  •            set_yylval_str(STR_NEW3(tok(), toklen(), mb));
    
    return tSTRING_CONTENT;
    }
    @@ -5455,5 +5464,5 @@ parser_here_document(struct parser_param
    if ((c = nextc()) == -1) goto error;
    } while (!whole_match_p(eos, len, indent));
  • str = STR_NEW(tok(), toklen());
  • str = STR_NEW3(tok(), toklen(), mb);
    }
    heredoc_restore(lex_strterm);
    @@ -5669,4 +5678,5 @@ parser_yylex(struct parser_params *parse
    int cmd_state;
    enum lex_state_e last_state;
  • int mb;
    #ifdef RIPPER
    int fallthru = Qfalse;
    @@ -5987,11 +5997,5 @@ parser_yylex(struct parser_params *parse
    newtok();
    if (parser_ismbchar()) {
  •  int i, len = parser_mbclen()-1;
    
  •  tokadd(c);
    
  •  for (i = 0; i < len; i++) {
    
  • c = nextc();
  • tokadd©;
  •  }
    
  •  tokadd_mbchar(c);
    
    }
    else if ((rb_enc_isalnum(c, parser->enc) || c == ‘_’) &&
    @@ -6678,5 +6682,5 @@ parser_yylex(struct parser_params *parse
    c = nextc();
    if (parser_is_identchar()) {
  • tokadd©;
  • tokadd_mbchar©;
    }
    else {
    @@ -6776,13 +6780,8 @@ parser_yylex(struct parser_params *parse
    }

  • mb = 0;
    do {

  •    int i, len;
    
  • tokadd©;
  • len = parser_mbclen()-1;
  •    for (i = 0; i < len; i++) {
    
  •  c = nextc();
    
  •  tokadd(c);
    
  • }
  • if (!ISASCII©) mb = 1;
  • tokadd_mbchar©;
    c = nextc();
    } while (parser_is_identchar());
    @@ -6836,5 +6835,5 @@ parser_yylex(struct parser_params *parse
    }
  •  if (lex_state != EXPR_DOT) {
    
  •  if (!mb && lex_state != EXPR_DOT) {
    
    const struct kwtable *kw;

@@ -6878,5 +6877,5 @@ parser_yylex(struct parser_params *parse
lex_state = EXPR_BEG;
nextc();

  •    set_yylval_id(rb_intern(tok()));
    
  •    set_yylval_id(rb_intern3(tok(), toklen(), STR_ENC(mb)));
       return tLABEL;
    
    }
    @@ -6897,5 +6896,5 @@ parser_yylex(struct parser_params *parse
    }
    {
  •        ID ident = rb_intern(tok());
    
  •        ID ident = rb_intern3(tok(), toklen(), STR_ENC(mb));
    
           set_yylval_id(ident);
    

@@ -8353,7 +8352,12 @@ int
rb_symname_p(const char *name)
{

  • return rb_enc_symname_p(name, rb_enc_from_index(0));
    +}

+int
+rb_enc_symname_p(const char *name, rb_encoding *enc)
+{
const char *m = name;
int localid = Qfalse;

  • rb_encoding *enc = rb_enc_from_index(0);

    if (!m) return Qfalse;
    @@ -8437,6 +8441,8 @@ rb_intern3(const char *name, long len, r
    fake_str.as.heap.ptr = (char *)name;
    fake_str.as.heap.aux.capa = len;

  • str = (VALUE)&fake_str;
  • rb_enc_associate(str, enc);
  • if (st_lookup(global_symbols.sym_id, (st_data_t)&fake_str,
    (st_data_t *)&id))
  • if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
    return id;

@@ -8499,5 +8505,5 @@ rb_intern3(const char *name, long len, r
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_register:

  • str = rb_str_new(name, len);
  • str = rb_enc_str_new(name, len, enc);
    OBJ_FREEZE(str);
    st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
    Index: string.c
    ===================================================================
    — string.c (revision 13339)
    +++ string.c (working copy)
    @@ -1130,5 +1130,6 @@ int
    rb_str_hash(VALUE str)
    {
  • return rb_memhash(RSTRING_PTR(str), RSTRING_LEN(str));
  • return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str),
  • rb_enc_get_index(str));
    }

@@ -1149,4 +1150,30 @@ rb_str_hash_m(VALUE str)
#define lesser(a,b) (((a)>(b))?(b):(a))

+static int
+is_ascii_string(VALUE str)
+{

  • long i;
  • for (i = 0; i < RSTRING_LEN(str); ++i) {
  • int c = (unsigned char)RSTRING_PTR(str)[i];
  • if (!ISASCII©) return Qfalse;
  • }
  • return Qtrue;
    +}

+int
+rb_str_comparable(VALUE str1, VALUE str2)
+{

  • int idx1 = rb_enc_get_index(str1);
  • int idx2 = rb_enc_get_index(str2);
  • if (idx1 == idx2) return Qtrue;
  • if (!rb_enc_asciicompat(idx1)) return Qfalse;
  • if (!rb_enc_asciicompat(idx2)) return Qfalse;
  • if (!is_ascii_string(str1)) return Qfalse;
  • if (!is_ascii_string(str2)) return Qfalse;
  • return Qtrue;
    +}

int
rb_str_cmp(VALUE str1, VALUE str2)
@@ -1177,5 +1204,5 @@ rb_str_cmp(VALUE str1, VALUE str2)
*/

-static VALUE
+VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
@@ -1187,5 +1214,5 @@ rb_str_equal(VALUE str1, VALUE str2)
return rb_equal(str2, str1);
}

  • rb_enc_check(str1, str2); /* need weak check */
  • if (!rb_str_comparable(str1, str2)) return Qfalse;
    if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
    rb_str_cmp(str1, str2) == 0) {
    @@ -1208,4 +1235,7 @@ rb_str_eql(VALUE str1, VALUE str2)
    return Qfalse;

  • if (rb_enc_get_index(str1) != rb_enc_get_index(str2))

  • return Qfalse;

  • if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2),
    lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0)
    @@ -5031,11 +5061,13 @@ sym_inspect(VALUE sym)
    VALUE str, klass = Qundef;
    ID id = SYM2ID(sym);

  • rb_encoding *enc;

    sym = rb_id2str(id);

  • str = rb_str_new(0, RSTRING_LEN(sym)+1);
  • enc = rb_enc_get(sym);
  • str = rb_enc_str_new(0, RSTRING_LEN(sym)+1, enc);
    RSTRING_PTR(str)[0] = ‘:’;
    memcpy(RSTRING_PTR(str)+1, RSTRING_PTR(sym), RSTRING_LEN(sym));
    if (RSTRING_LEN(sym) != strlen(RSTRING_PTR(sym)) ||
  • !rb_symname_p(RSTRING_PTR(sym))) {
  • !rb_enc_symname_p(RSTRING_PTR(sym), enc)) {
    str = rb_str_dump(str);
    strncpy(RSTRING_PTR(str), “:”", 2);
    Index: vm.c
    ===================================================================
    — vm.c (revision 13339)
    +++ vm.c (working copy)
    @@ -13,4 +13,5 @@
    #include “ruby/node.h”
    #include “ruby/st.h”
    +#include “ruby/encoding.h”
    #include “gc.h”

Index: insns.def

— insns.def (revision 13339)
+++ insns.def (working copy)
@@ -1701,5 +1701,6 @@ opt_eq
val = Qtrue;
}

  •  else if (RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
    
  •  else if (!ENCODING_GET(str1) && !ENCODING_GET(str2) &&
    
  •     RSTRING_LEN(str1) == RSTRING_LEN(str2) &&
        rb_memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2),
            RSTRING_LEN(str1)) == 0) {
    

@@ -1707,5 +1708,5 @@ opt_eq
}
else {

  • val = Qfalse;
  • val = rb_str_equal(str1, str2);
    }
    }
    Index: include/ruby/encoding.h
    ===================================================================
    — include/ruby/encoding.h (revision 13339)
    +++ include/ruby/encoding.h (working copy)
    @@ -28,4 +28,5 @@ typedef OnigEncodingType rb_encoding;

int rb_enc_to_index(rb_encoding*);
+int rb_enc_get_index(VALUE obj);
rb_encoding* rb_enc_get(VALUE);
rb_encoding* rb_enc_check(VALUE,VALUE);
@@ -74,7 +75,10 @@ int rb_enc_codelen(int, rb_encoding*);
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)

+#define rb_enc_asciicompat(enc) ((enc)!=0)
+
int rb_enc_toupper(int c, rb_encoding enc);
int rb_enc_tolower(int c, rb_encoding enc);
ID rb_intern3(const char
, long, rb_encoding
);
+int rb_enc_symname_p(const char*, rb_encoding*);

#endif /* RUBY_ENCODING_H */
Index: include/ruby/intern.h

— include/ruby/intern.h (revision 13339)
+++ include/ruby/intern.h (working copy)
@@ -514,5 +514,7 @@ VALUE rb_str_concat(VALUE, VALUE);
int rb_memhash(const void *ptr, long len);
int rb_str_hash(VALUE);
+int rb_str_comparable(VALUE, VALUE);
int rb_str_cmp(VALUE, VALUE);
+VALUE rb_str_equal(VALUE str1, VALUE str2);
void rb_str_update(VALUE, long, long, VALUE);
VALUE rb_str_inspect(VALUE);

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31715] string literal encoding”
on Mon, 3 Sep 2007 05:08:01 +0900, Nobuyoshi N.
[email protected] writes:

|-- coding: euc-jp -- e$B$J$I$H$7$F$*$/$H!"$9$Y$F$NJ8;zNs%j%F%i%ke(B
|e$B$Ne(Bencodinge$B$,e(B"EUC-JP"e$B$K$J$C$F$$$^$9$,!"e(BASCIIe$B$N$_$+$i$J$kJ8;zNs$Ne(B
|e$B>l9g$Oe(BUS-ASCIIe$B$N$^$^$N$[$&$,JXMx$G$O$J$$$G$7$g$&$+!#e(B

e$B3N$+$KJXMx$=$&$G$9$7!"8D?ME*$K$OM_$7$$5$$,$9$k$N$G$9$,!"e(B
JRuby(e$BFbIt%3!<%I$O4pK\E*$Ke(BUTF-16)e$B$GL@$i$+$K<BAu$,BgJQ$K$J$ke(B
e$B;EMM$r<h$j9~$`$3$H$Km4m0$7$^$9!#$I$&$7$?$b$s$G$7$g$&$M!#e(B

e$B5$$K$7$J$/$F$h$$!)e(B

e$B$J$+$@$G$9!#e(B

At Mon, 3 Sep 2007 08:28:36 +0900,
Yukihiro M. wrote in [ruby-dev:31716]:

|-- coding: euc-jp -- e$B$J$I$H$7$F$*$/$H!"$9$Y$F$NJ8;zNs%j%F%i%ke(B
|e$B$Ne(Bencodinge$B$,e(B"EUC-JP"e$B$K$J$C$F$$$^$9$,!"e(BASCIIe$B$N$_$+$i$J$kJ8;zNs$Ne(B
|e$B>l9g$Oe(BUS-ASCIIe$B$N$^$^$N$[$&$,JXMx$G$O$J$$$G$7$g$&$+!#e(B

e$B3N$+$KJXMx$=$&$G$9$7!"8D?ME*$K$OM_$7$$5$$,$9$k$N$G$9$,!"e(B
JRuby(e$BFbIt%3!<%I$O4pK\E*$Ke(BUTF-16)e$B$GL@$i$+$K<BAu$,BgJQ$K$J$ke(B
e$B;EMM$r<h$j9~$`$3$H$Km4m0$7$^$9!#$I$&$7$?$b$s$G$7$g$&$M!#e(B

JRubye$B$G$O!"e(BUnicodee$B7O0J30$NJ8;zNs$O$I$&$$$&J}?K$K$J$k$s$G$7$g$&$+!#e(B
e$B$J$s$H$J$/!"e(Bbinarye$B$K$D$$$F$Oe(Bbyte
arraye$B$H$$$&OC$r8+3]$1$?$h$&$J5$e(B
e$B$b$9$k$s$G$9$,!#e(B

e$B$b$7$9$Y$Fe(BUTF-16e$B$G<BAu$9$k$J$i!"e(BString#encodinge$B$O>o$Ke(B"UTF-16"e$B$re(B
e$BJV$9$s$G$7$g$&$+!#$=$l$J$i$=$l$Ge(Bcoding:e$B$G;XDj$7$?$b$N$H$O0lCW$9e(B
e$B$k$H$O8B$i$J$$$o$1$G!"5$$K$9$k$3$H$O$J$$$h$&$K$b;W$($^$9!#e(B

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B
|e$B$b$7$9$Y$Fe(BUTF-16e$B$G<BAu$9$k$J$i!"e(BString#encodinge$B$O>o$Ke(B"UTF-16"e$B$re(B
|e$BJV$9$s$G$7$g$&$+!#e(B

e$B$=$&$@$H;W$$$^$9!#e(B

e$B$=$l$@$H!“e(B
str1 = b”\x82\A0" # “e$B$“e(B” in Shift_JIS
str1.encode(“Shift_JIS”)
str2 = b”\xA4\A2" # “e$B$“e(B” in EUC-JP
str2.encode(“EUC-JP”)
p str1 == str2 # Ruby1.9=>false, JRuby=>true
e$B$C$F!”$J$j$^$;$s$+!#e(B

e$B7k2L$@$18+$l$Pe(BJRubye$B$N5sF0$NJ}$,K>$^$7$$$h$&$J5$$b$7$^$9$,!"e(BCSIe$B$G<B8=$7$he(B
e$B$&$H;W$&$He(BCitrus
iconve$B$N%T%%C%H$_$?$$$J$b$N$,I,MW$=$&$G$9$M!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31724] Re: string literal encoding”
on Mon, 3 Sep 2007 19:42:11 +0900, “NARUSE, Yui”
[email protected] writes:

|> |e$B$b$7$9$Y$Fe(BUTF-16e$B$G<BAu$9$k$J$i!"e(BString#encodinge$B$O>o$Ke(B"UTF-16"e$B$re(B
|> |e$BJV$9$s$G$7$g$&$+!#e(B
|>
|> e$B$=$&$@$H;W$$$^$9!#e(B

|str1 = b"\x82\A0" # “e$B$“e(B” in Shift_JIS
|str1.encode(“Shift_JIS”)
|str2 = b”\xA4\A2" # “e$B$“e(B” in EUC-JP
|str2.encode(“EUC-JP”)
|p str1 == str2 # Ruby1.9=>false, JRuby=>true
|e$B$C$F!”$J$j$^$;$s$+!#e(B

e$B$d$d$d!“e(Bencode()e$B$O!Ve(Bencodinge$B$re(BEUC-JPe$B$K$9$k$h$&$KJQ49$9$k!W$be(B
e$B$N$G$9$+$i!“e(BJRubye$B$G$be(Bencode(“EUC-JP”)e$B$N7k2L$Oe(B"EUC-JP"e$B$G$9$Me(B
(e$B>e5-$N!V>o$K!W$HL7=b$7$^$9$,e(B)e$B!#$”$k$$$O!”$=$b$=$be(BJRubye$B$G$O%Pe(B
e$B%$%J%j"*e(BEUC-JPe$B$K$OBP1~$7$J$$$H$7$F%(%i!<$K$9$k$+$b$7$l$^$;$s!#e(B

e$B>/$J$/$H$b>e5-$K<($5$l$k$h$&$J!Ve(BEUC-JP e$B$G$"$k$H$_$J$7$Fe(B
UTF-16e$B$KJQ49$9$k!W$H$$$&5sF0$O$7$J$$$H;W$$$^$9!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31717] Re: string literal encoding”
on Mon, 3 Sep 2007 09:55:23 +0900, Nobuyoshi N.
[email protected] writes:

|JRubye$B$G$O!"e(BUnicodee$B7O0J30$NJ8;zNs$O$I$&$$$&J}?K$K$J$k$s$G$7$g$&$+!#e(B
|e$B$J$s$H$J$/!"e(Bbinarye$B$K$D$$$F$Oe(Bbyte arraye$B$H$$$&OC$r8+3]$1$?$h$&$J5$e(B
|e$B$b$9$k$s$G$9$,!#e(B

JRubye$B$G$O4pK\E*$KFbIt%3!<%I$KJQ49$9$k%]%j%7!<$K$J$k$H;W$$$^e(B
e$B$9!#$G!“%P%$%J%j$,I,MW$J>l9g$K$OL@<(E*$K%P%$%J%j$H;XDj$9$k$Ne(B
e$B$@$H;W$$$^$9!#FbItE*$K<BAu$r@Z$jBX$($ke(B(e$BDL>oJ8;zNs$Oe(BJavae$B$Ne(B
Stringe$B$r;HMQ!”%P%$%J%jJ8;zNs$Oe(Bbyte arraye$B!#I=LLE*$K$OF1$8%/%ie(B
e$B%9$K8+$($ke(B)e$B$HJ9$$$F$$$^$9!#e(B

|e$B$b$7$9$Y$Fe(BUTF-16e$B$G<BAu$9$k$J$i!"e(BString#encodinge$B$O>o$Ke(B"UTF-16"e$B$re(B
|e$BJV$9$s$G$7$g$&$+!#e(B

e$B$=$&$@$H;W$$$^$9!#e(B

|e$B$=$l$J$i$=$l$Ge(Bcoding:e$B$G;XDj$7$?$b$N$H$O0lCW$9e(B
|e$B$k$H$O8B$i$J$$$o$1$G!"5$$K$9$k$3$H$O$J$$$h$&$K$b;W$($^$9!#e(B

codinge$B$G;XDj$7$?$b$N$H0lCW$7$J$$$N$O$=$l$O$=$l$G9=$o$J$$$N$Ge(B
e$B$9$,!"!Ve(BASCIIe$B$7$+4^$^$J$$$b$N$Oe(BASCII(e$B"b%P%$%J%je(B)e$B$H$$$&;EMM!We(B
e$B$Oe(BJRubye$BE*$K<h$j9~$_$K$/$$$s$8$c$J$$$G$7$g$&$+!#e(BUTF-8e$B$H0c$C$F!"e(B
e$BDL>oJ8;zNse(B(UTF-16)e$B$H%P%$%J%jJ8;zNse(B(ASCII)e$B$N8_49@-$r0];}$9$ke(B
e$B$N$,:$Fq$@$H;W$$$^$9$+$i!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

e$B$J$+$@$G$9!#e(B

At Mon, 3 Sep 2007 16:34:12 +0900,
Yukihiro M. wrote in [ruby-dev:31723]:

|JRubye$B$G$O!"e(BUnicodee$B7O0J30$NJ8;zNs$O$I$&$$$&J}?K$K$J$k$s$G$7$g$&$+!#e(B
|e$B$J$s$H$J$/!"e(Bbinarye$B$K$D$$$F$Oe(Bbyte arraye$B$H$$$&OC$r8+3]$1$?$h$&$J5$e(B
|e$B$b$9$k$s$G$9$,!#e(B

JRubye$B$G$O4pK\E*$KFbIt%3!<%I$KJQ49$9$k%]%j%7!<$K$J$k$H;W$$$^e(B
e$B$9!#$G!"%P%$%J%j$,I,MW$J>l9g$K$OL@<(E*$K%P%$%J%j$H;XDj$9$k$Ne(B
e$B$@$H;W$$$^$9!#FbItE*$K<BAu$r@Z$jBX$($ke(B(e$BDL>oJ8;zNs$Oe(BJavae$B$Ne(B
Stringe$B$r;HMQ!"%P%$%J%jJ8;zNs$Oe(Bbyte arraye$B!#I=LLE*$K$OF1$8%/%ie(B
e$B%9$K8+$($ke(B)e$B$HJ9$$$F$$$^$9!#e(B

e$B$D$^$j!";XDj$5$l$F$$$J$$8B$je(BASCIIe$B$N$_$NJ8;zNs$K$be(BUTF-16e$B$r;H$&$He(B
e$B$$$&$3$H$G$7$g$&$+!#e(B

e$B$Oe(BJRubye$BE*$K<h$j9~$_$K$/$$$s$8$c$J$$$G$7$g$&$+!#e(BUTF-8e$B$H0c$C$F!"e(B
e$BDL>oJ8;zNse(B(UTF-16)e$B$H%P%$%J%jJ8;zNse(B(ASCII)e$B$N8_49@-$r0];}$9$ke(B
e$B$N$,:$Fq$@$H;W$$$^$9$+$i!#e(B

JRubye$BE*$K$O!Ve(BASCIIe$B!b%P%$%J%j!W$H$$$&$3$H$G$$$$$s$8$c$J$$$G$7$g$&e(B
e$B$+!#$b$7$=$&$G$"$l$P!“e(BASCIIe$B$N$_$G$bHse(BASCIIe$B$r4^$s$G$$$F$bJ8;zNs%je(B
e$B%F%i%k$O>o$Ke(BJava
Stringe$B$G!“7k9g$dHf3S$OLdBj$J$/9T$($k$O$:$G$9!#e(B
e$B5U$K!Ve(BASCII(e$B"b%P%$%J%je(B)=byte
arraye$B!W$H$$$&$3$H$G$”$l$P!”$=$&$$$Ce(B
e$B$?e(Bbyte array/Java
Stringe$B4V$NJQ49$O!"%j%F%i%k$K8B$i$:$7$P$7$PI,MWe(B
e$B$K$J$k$O$:$G!"%j%F%i%k$K$D$$$F$@$15$$K$7$F$b$7$g$&$,$J$$$H;W$$$^e(B
e$B$9!#e(B

e$BBE6(E@$H$7$F!"!Ve(BASCIIe$B$7$+4^$^$J$$$b$N$Oe(Bencoding=“ASCII"e$B!W$^$G5,e(B
e$BDj$;$:$K!”!V%j%F%i%k$Ne(Bencodinge$B$O:GBg8xLs?tE*$J$b$N$r;H$&!W$/$i$$e(B
e$B$G$I$&$G$7$g$&$+!#e(BASCIIe$B%Y!<%9$N<BAu$J$ie(BASCII/binarye$B!"e(BUTF-16e$B%Y!<e(B
e$B%9$J$ie(BUTF-16e$B%Y!<%9!#$G$J$$$H<B:]$K$O$+$($C$FITJX$G$7$g$&$,$J$/$Je(B
e$B$k$H;W$&$N$G$9$,!#e(B

e$B@.@%$G$9!#e(B

Nobuyoshi N. wrote:

JRubye$BE*$K$O!Ve(BASCIIe$B!b%P%$%J%j!W$H$$$&$3$H$G$$$$$s$8$c$J$$$G$7$g$&e(B
e$B$+!#$b$7$=$&$G$"$l$P!“e(BASCIIe$B$N$_$G$bHse(BASCIIe$B$r4^$s$G$$$F$bJ8;zNs%je(B
e$B%F%i%k$O>o$Ke(BJava Stringe$B$G!“7k9g$dHf3S$OLdBj$J$/9T$($k$O$:$G$9!#e(B
e$B5U$K!Ve(BASCII(e$B"b%P%$%J%je(B)=byte arraye$B!W$H$$$&$3$H$G$”$l$P!”$=$&$$$Ce(B
e$B$?e(Bbyte array/Java Stringe$B4V$NJQ49$O!"%j%F%i%k$K8B$i$:$7$P$7$PI,MWe(B
e$B$K$J$k$O$:$G!"%j%F%i%k$K$D$$$F$@$15$$K$7$F$b$7$g$&$,$J$$$H;W$$$^e(B
e$B$9!#e(B

e$B$b$C$H$b$JOC$@$H$O;W$&$N$G$9$,!“e(BJRubye$B$,$”$ke(BStringe$B%$%s%9%?%s%9$rFbItE*$Ke(B
e$B%P%$%J%j$G;}$C$F$$$k$+e(BUTF-16e$B$G;}$C$F$$$k$+$C$F!"$=$s$J$K=EMW$JOC$J$N$G$9e(B
e$B$+!#%f!<%6$+$i!J30It$+$i!K$O$I$A$i$bF1$8e(BStringe$B%$%s%9%?%s%9$K8+$($k$N$G$9e(B
e$B$+$i!J$=$N$O$:$G$9$h$M!K!"$I$&$G$b$$$$$N$G$O!#e(B

e$BBE6(E@$H$7$F!"!Ve(BASCIIe$B$7$+4^$^$J$$$b$N$Oe(Bencoding=“ASCII"e$B!W$^$G5,e(B
e$BDj$;$:$K!”!V%j%F%i%k$Ne(Bencodinge$B$O:GBg8xLs?tE*$J$b$N$r;H$&!W$/$i$$e(B
e$B$G$I$&$G$7$g$&$+!#e(BASCIIe$B%Y!<%9$N<BAu$J$ie(BASCII/binarye$B!"e(BUTF-16e$B%Y!<e(B
e$B%9$J$ie(BUTF-16e$B%Y!<%9!#$G$J$$$H<B:]$K$O$+$($C$FITJX$G$7$g$&$,$J$/$Je(B
e$B$k$H;W$&$N$G$9$,!#e(B

e$B$`$7$m5,Dj$9$Y$-$Oe(Bencodinge$B$NLa$jCM$N$h$&$J%f!<%6$+$i8+$($kItJ,$G!"e(B
e$B!Ve(BASCIIe$B$7$+4^$^$J$$$b$N$Oe(Bencoding="ASCII"e$B!W$H$+$r5,Dj$9$k$Y$-$G$O$J$$$Ge(B
e$B$7$g$&$+!#$?$H$(FbItE*$Ke(BUTF-16e$B$G;}$C$F$$$F$b!“e(Bencodinge$B$Oe(B"ASCII"e$B$rJV$5$Me(B
e$B$P$J$i$J$$!”$H!#@5D>$KFbItJ8;zNs$NJ8;z%3!<%I$rJV$5$J$1$l$P$$$1$J$$I,MW$Oe(B
e$B$J$$$N$G$9$+$i!#e(B

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

e$B$d$d$d!“e(Bencode()e$B$O!Ve(Bencodinge$B$re(BEUC-JPe$B$K$9$k$h$&$KJQ49$9$k!W$be(B
e$B$N$G$9$+$i!“e(BJRubye$B$G$be(Bencode(“EUC-JP”)e$B$N7k2L$Oe(B"EUC-JP"e$B$G$9$Me(B
(e$B>e5-$N!V>o$K!W$HL7=b$7$^$9$,e(B)e$B!#$”$k$$$O!”$=$b$=$be(BJRubye$B$G$O%Pe(B
e$B%$%J%j"*e(BEUC-JPe$B$K$OBP1~$7$J$$$H$7$F%(%i!<$K$9$k$+$b$7$l$^$;$s!#e(B

e$B%P%$%J%jJ8;zNs$Ke(BString#encodee$B$+e(BString#encoding=e$B$GJ8;z%3!<%I$r@_Dj$9$ke(B
e$B$+!"$^$?JL$NJ}K!$rMQ$$$k$+$O$H$b$+$/!“e(BUnicodee$B0J30$NJ8;zNs$r%3!<%I$KKd$ae(B
e$B9~$`J}K!$OI,MW$K46$8$k$N$G$9$,!”$=$l$OJL$NOC$G$9$+$i$H$j$"$($:$$$$F$$$$F!"e(B

e$B>/$J$/$H$b>e5-$K<($5$l$k$h$&$J!Ve(BEUC-JP e$B$G$"$k$H$_$J$7$Fe(B
UTF-16e$B$KJQ49$9$k!W$H$$$&5sF0$O$7$J$$$H;W$$$^$9!#e(B

e$B$($C$H!"3NG’$7$?$$$N$G$9$,!"e(B

p utf16_str #=> “e$B$”$$$&4A;ze(B" # UTF-16 e$B$JJ8;zNse(B
p utf16_str.encoding #=> “UTF-16”
euc_str = utf16_str.encode(“EUC-JP”)
p euc_str.encoding #=> “EUC-JP”

UTF-16e$B$JJ8;zNs$,$"$C$?$H$7$F!"$=$l$Ne(Bencode(“EUC-JP”).encodinge$B$O>o$Ke(B"EUC-
JP"e$B$G$9$h$M!#e(BRubyM17Ne$B$r<BAu$7$?!Ve(BRubye$B!W$J$i$Pe(BRuby1.9e$B$G$be(BJRubye$B$G$b!#e(B

e$B$^$?!"e(BEUC-JPe$B$N%F%-%9%H$rFI$_9~$s$@>l9g$G$9$,!"e(B
open(“eucjp.txt”,“r:euc-jp”) do |f|
str = f.read
p str.encoding #=> “EUC-JP”
end
e$B$3$3$Ne(Bstr.encodinge$B$be(BEUC-JPe$B$G$9$h$M!#e(Br:utf-8<euc-jpe$B$J$i$5$F$*$-!#e(B

e$B$3$N$h$&$J%1!<%9$r9M$($k$H!"e(BJRubye$B$N$h$&$KFbItE*$Ke(BUTF-16e$B$rMQ$$$F$$$?$H$7e(B
e$B$F$b!“e(BString#encodinge$B$,e(BUTF-16e$B0J30$rJV$9$Y$-$H9M$($i$l$k%1!<%9$,7k9=$”$ke(B
e$B$h$&$K46$8$^$9!#e(B

e$B$J$+$@$G$9!#e(B

At Thu, 6 Sep 2007 16:40:46 +0900,
NARUSE, Yui wrote in [ruby-dev:31746]:

e$B$b$C$H$b$JOC$@$H$O;W$&$N$G$9$,!“e(BJRubye$B$,$”$ke(BStringe$B%$%s%9%?%s%9$rFbItE*$Ke(B
e$B%P%$%J%j$G;}$C$F$$$k$+e(BUTF-16e$B$G;}$C$F$$$k$+$C$F!"$=$s$J$K=EMW$JOC$J$N$G$9e(B
e$B$+!#%f!<%6$+$i!J30It$+$i!K$O$I$A$i$bF1$8e(BStringe$B%$%s%9%?%s%9$K8+$($k$N$G$9e(B
e$B$+$i!J$=$N$O$:$G$9$h$M!K!"$I$&$G$b$$$$$N$G$O!#e(B

e$B$=$&;W$$$^$9!#>:Y$Oe(BJRubye$B$N$[$&$G9M$($F$/$l$k$G$7$g$&!#e(B

e$B$`$7$m5,Dj$9$Y$-$Oe(Bencodinge$B$NLa$jCM$N$h$&$J%f!<%6$+$i8+$($kItJ,$G!"e(B
e$B!Ve(BASCIIe$B$7$+4^$^$J$$$b$N$Oe(Bencoding="ASCII"e$B!W$H$+$r5,Dj$9$k$Y$-$G$O$J$$$Ge(B
e$B$7$g$&$+!#$?$H$(FbItE*$Ke(BUTF-16e$B$G;}$C$F$$$F$b!“e(Bencodinge$B$Oe(B"ASCII"e$B$rJV$5$Me(B
e$B$P$J$i$J$$!”$H!#@5D>$KFbItJ8;zNs$NJ8;z%3!<%I$rJV$5$J$1$l$P$$$1$J$$I,MW$Oe(B
e$B$J$$$N$G$9$+$i!#e(B

e$B$=$l$G$b$$$$$H;W$$$^$9!#>o$Ke(B"UTF-16"e$B$rJV$9$N$G$J$1$l$P!"IiC4$O0le(B
e$B=o$G$7$g$&!#e(B

JRubye$B$NET9g$r5$$K$9$k$N$G$"$l$P!"e(BJRubye$B%A!<%`$KJ9$$$F$_$k$N$,0lHVe(B
e$B$+$J!#e(Bruby-coree$B$G$$$$$s$@$m$&$+!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31796] Re: string literal encoding”
on Tue, 18 Sep 2007 01:29:55 +0900, Nobuyoshi N.
[email protected] writes:

|(rb_enc_mbminlen(enc)==1) e$B$N$[$&$,$h$5$=$&$J46$8$G$9!#e(B

e$B$=$&$+$b$7$l$^$;$s$M!#e(B

|e$B$=$l$H$b!“e(BEBCDICe$B%Y!<%9$N%(%s%3!<%G%#%s%0$^$G%5%]!<%H$9$k$D$b$j$Ce(B
|e$B$F$”$j$^$9$+e(B?

e$B$$$d!"$J$$$G$9!#e(BPerle$B$H$+4hD%$C$F$k$=$&$G$9$,!#e(B

e$B$J$+$@$G$9!#e(B

e$B0lE@=$@5!#e(B

At Mon, 3 Sep 2007 05:08:01 +0900,
Nobuyoshi N. wrote in [ruby-dev:31715]:

+#define rb_enc_asciicompat(enc) ((enc)!=0)

(rb_enc_mbminlen(enc)==1) e$B$N$[$&$,$h$5$=$&$J46$8$G$9!#e(B

e$B$=$l$H$b!“e(BEBCDICe$B%Y!<%9$N%(%s%3!<%G%#%s%0$^$G%5%]!<%H$9$k$D$b$j$Ce(B
e$B$F$”$j$^$9$+e(B?