[m17n] String#encode

e$B$J$+$@$G$9!#e(B

String#encodee$B$r$?$a$7$K<BAu$7$F$_$^$7$?!#e(B

e$B$J$<$+e(B"sjis"e$B$GEPO?$5$l$F$$$k$N$Ke(B"Shift_JIS"e$B$H=P$F$-$?$j$9$k$N$G!“e(B
e$BJLL>$rIU$1$i$l$k$h$&$K$7$Fe(Brb_enc_name()e$B$K$”$o$;$F$_$^$7$?!#e(B

Index: encoding.c

— encoding.c (revision 13347)
+++ encoding.c (working copy)
@@ -24,4 +24,5 @@ struct rb_encoding_entry {
static struct rb_encoding_entry *enc_table;
static int enc_table_size;
+static st_table *enc_table_alias;

void
@@ -44,10 +45,21 @@ rb_enc_register(const char *name, rb_enc

void
+rb_enc_alias(const char *alias, const char *orig)
+{

  • if (!enc_table_alias) {
  • enc_table_alias = st_init_strtable();
  • }
  • st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig);
    +}

+void
rb_enc_init(void)
{
rb_enc_register(“ascii”, ONIG_ENCODING_ASCII);

  • rb_enc_register(“sjis”, ONIG_ENCODING_SJIS);
  • rb_enc_register(“shift_jis”, ONIG_ENCODING_SJIS);
    rb_enc_register(“euc-jp”, ONIG_ENCODING_EUC_JP);
    rb_enc_register(“utf-8”, ONIG_ENCODING_UTF8);
  • rb_enc_alias(“binary”, “ascii”);
  • rb_enc_alias(“sjis”, “shift_jis”);
    }

@@ -68,8 +80,10 @@ rb_enc_find(const char *name)
{
int i;

  • st_data_t alias = 0;

    if (!enc_table) {
    rb_enc_init();
    }

  • find:
    for (i=0; i<enc_table_size; i++) {
    if (strcmp(name, enc_table[i].name) == 0) {
    @@ -77,4 +91,10 @@ rb_enc_find(const char *name)
    }
    }

  • if (!alias && enc_table_alias) {

  • if (st_lookup(enc_table_alias, (st_data_t)name, &alias)) {

  •  name = (const char *)alias;
    
  •  goto find;
    
  • }

  • }
    return ONIG_ENCODING_ASCII;
    }
    Index: string.c
    ===================================================================
    — string.c (revision 13347)
    +++ string.c (working copy)
    @@ -194,5 +194,5 @@ rb_tainted_str_new2(const char *ptr)

static VALUE
-str_new3(VALUE klass, VALUE str)
+str_new_shared(VALUE klass, VALUE str)
{
VALUE str2 = str_alloc(klass);
@@ -210,9 +210,17 @@ str_new3(VALUE klass, VALUE str)
FL_SET(str2, ELTS_SHARED);
}

  • rb_enc_copy((VALUE)str2, str);

    return str2;
    }

+static VALUE
+str_new3(VALUE klass, VALUE str)
+{

  • VALUE str2 = str_new_shared(klass, str);
  • rb_enc_copy(str2, str);
  • return str2;
    +}

VALUE
rb_str_new3(VALUE str)
@@ -401,4 +409,21 @@ rb_str_init(int argc, VALUE *argv, VALUE
}

+static VALUE
+rb_str_encode(VALUE str, VALUE encode)
+{

  • int idx = rb_enc_get_index(str);
  • rb_encoding *enc = rb_enc_find(StringValueCStr(encode));
  • VALUE str2;
  • if (idx && rb_enc_from_index(idx) != enc) {
  • rb_raise(rb_eArgError, “cannot convert encoding from %s to %s”,
  • rb_enc_name(rb_enc_from_index(idx)), rb_enc_name(enc));
    
  • }
  • str2 = str_new_shared(rb_obj_class(str), str);
  • rb_enc_associate(str2, enc);
  • OBJ_INFECT(str2, str);
  • return str2;
    +}

static long
str_strlen(VALUE str, rb_encoding *enc)
@@ -5429,4 +5454,5 @@ Init_String(void)

 rb_define_method(rb_cString, "encoding", str_encoding, 0);
  • rb_define_method(rb_cString, “encode”, rb_str_encode, 1);

    id_to_s = rb_intern(“to_s”);
    Index: include/ruby/encoding.h
    ===================================================================
    — include/ruby/encoding.h (revision 13347)
    +++ include/ruby/encoding.h (working copy)
    @@ -28,4 +28,5 @@ typedef OnigEncodingType rb_encoding;

int rb_enc_to_index(rb_encoding*);
+int rb_enc_get_index(VALUE obj);
rb_encoding* rb_enc_get(VALUE);
rb_encoding* rb_enc_check(VALUE,VALUE);
@@ -65,4 +66,5 @@ int rb_enc_codelen(int, rb_encoding*);
#define rb_enc_prev_char(s,p,enc) (char
)onigenc_get_prev_char_head(enc,(UChar)s,(UChar*)p)

+#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII©
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31744] [m17n] String#encode”
on Thu, 6 Sep 2007 14:08:26 +0900, Nobuyoshi N.
[email protected] writes:

|String#encodee$B$r$?$a$7$K<BAu$7$F$$^$7$?!#e(B
|
|e$B$J$<$+e(B"sjis"e$B$GEPO?$5$l$F$$$k$N$Ke(B"Shift_JIS"e$B$H=P$F$-$?$j$9$k$N$G!“e(B
|e$BJLL>$rIU$1$i$l$k$h$&$K$7$Fe(Brb_enc_name()e$B$K$”$o$;$F$
$^$7$?!#e(B

encodee$B$H$$$&L>A0$OJQ49$r%$%a!<%8$5$;$k$N$G$h$/$J$$$J$“$H;W$&e(B
e$B$h$&$K$J$j$^$7$?!#$7$+$7!”$^!"L>A0$O8e$GJQ$($k$3$H$K$7$F%3%_%Ce(B
e$B%H$7$F$/$@$5$$$^$;$s$+!)e(B

e$B$J$+$@$G$9!#e(B

At Thu, 6 Sep 2007 14:08:26 +0900,
Nobuyoshi N. wrote in [ruby-dev:31744]:

e$B$J$<$+e(B"sjis"e$B$GEPO?$5$l$F$$$k$N$Ke(B"Shift_JIS"e$B$H=P$F$-$?$j$9$k$N$G!“e(B
e$BJLL>$rIU$1$i$l$k$h$&$K$7$Fe(Brb_enc_name()e$B$K$”$o$;$F$_$^$7$?!#e(B

rb_enc_name()e$B$OBgJ8;z$r4^$`L>A0$r;H$C$F$$$k$N$K!">.J8;z$G$7$+8!e(B
e$B:w$G$-$J$$$H$$$&$N$OHs>o$KITJX$J$N$G!"J8;z%1!<%9$OL5;k$9$k$h$&$Ke(B
e$B$7$F$_$^$7$?!#e(B

Index: encoding.c

— encoding.c (revision 13544)
+++ encoding.c (working copy)
@@ -24,4 +24,5 @@ struct rb_encoding_entry {
static struct rb_encoding_entry *enc_table;
static int enc_table_size;
+static st_table *enc_table_alias;

void
@@ -44,10 +45,22 @@ rb_enc_register(const char *name, rb_enc

void
+rb_enc_alias(const char *alias, const char *orig)
+{

  • if (!enc_table_alias) {
  • enc_table_alias = st_init_strcasetable();
  • }
  • st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig);
    +}

+void
rb_enc_init(void)
{

  • rb_enc_register(“ascii”, ONIG_ENCODING_ASCII);
  • rb_enc_register(“sjis”, ONIG_ENCODING_SJIS);
  • rb_enc_register(“euc-jp”, ONIG_ENCODING_EUC_JP);
  • rb_enc_register(“utf-8”, ONIG_ENCODING_UTF8);
    +#define enc_register(enc) rb_enc_register(rb_enc_name(enc), enc)
  • ENC_REGISTER(ONIG_ENCODING_ASCII);
  • ENC_REGISTER(ONIG_ENCODING_SJIS);
  • ENC_REGISTER(ONIG_ENCODING_EUC_JP);
  • ENC_REGISTER(ONIG_ENCODING_UTF8);
  • rb_enc_alias(“binary”, “ascii”);
  • rb_enc_alias(“sjis”, “shift_jis”);
    }

@@ -64,18 +77,35 @@ rb_enc_from_index(int index)
}

-rb_encoding *
-rb_enc_find(const char *name)
+int
+rb_enc_find_index(const char *name)
{
int i;

  • st_data_t alias = 0;

  • if (!name) return -1;
    if (!enc_table) {
    rb_enc_init();
    }

  • find:
    for (i=0; i<enc_table_size; i++) {

  • if (strcmp(name, enc_table[i].name) == 0) {
  •  return enc_table[i].enc;
    
  • if (strcasecmp(name, enc_table[i].name) == 0) {
  •  return i;
    
    }
    }
  • return ONIG_ENCODING_ASCII;
  • if (!alias && enc_table_alias) {
  • if (st_lookup(enc_table_alias, (st_data_t)name, &alias)) {
  •  name = (const char *)alias;
    
  •  goto find;
    
  • }
  • }
  • return -1;
    +}

+rb_encoding *
+rb_enc_find(const char *name)
+{

  • rb_encoding *enc = rb_enc_from_index(rb_enc_find_index(name));
  • if (!enc) enc = ONIG_ENCODING_ASCII;
  • return enc;
    }

@@ -164,5 +194,5 @@ rb_enc_get_index(VALUE obj)
int i;

  • enc_check_capable(obj);
  • if (!enc_capable(obj)) return -1;
    i = ENCODING_GET(obj);
    if (i == ENCODING_INLINE_MAX) {
    Index: st.c
    ===================================================================
    — st.c (revision 13544)
    +++ st.c (working copy)
    @@ -53,4 +53,10 @@ static const struct st_hash_type type_st
    };

+static int strcasehash(const char *);
+static const struct st_hash_type type_strcasehash = {

  • strcasecmp,
  • strcasehash,
    +};

static void rehash(st_table *);

@@ -203,4 +209,16 @@ st_init_strtable_with_size(int size)
}

+st_table*
+st_init_strcasetable(void)
+{

  • return st_init_table(&type_strcasehash);
    +}

+st_table*
+st_init_strcasetable_with_size(int size)
+{

  • return st_init_table_with_size(&type_strcasehash, size);
    +}

void
st_clear(st_table *table)
@@ -815,4 +833,23 @@ strhash(register const char *string)
}

+static int
+strcasehash(register const char *string)
+{

  • register unsigned int hval = FNV1_32A_INIT;
  • /*
  • * FNV-1a hash each octet in the buffer
    
  • */
    
  • while (*string) {
  • unsigned int c = (unsigned char)*string++;
  • if ((unsigned int)(c - ‘A’) > (‘Z’ - ‘A’)) c += ‘a’ - ‘A’;
  • hval ^= c;
  • /* multiply by the 32 bit FNV magic prime mod 2^32 */
  • hval *= FNV_32_PRIME;
  • }
  • return hval;
    +}

int
st_numcmp(st_data_t x, st_data_t y)
Index: include/ruby/encoding.h

— include/ruby/encoding.h (revision 13544)
+++ include/ruby/encoding.h (working copy)
@@ -40,6 +40,8 @@ typedef OnigEncodingType rb_encoding;
int rb_enc_to_index(rb_encoding*);
int rb_enc_get_index(VALUE obj);
+int rb_enc_find_index(const char name);
rb_encoding
rb_enc_get(VALUE);
rb_encoding* rb_enc_check(VALUE,VALUE);
+void rb_enc_associate_index(VALUE, int);
void rb_enc_associate(VALUE, rb_encoding*);
void rb_enc_copy(VALUE, VALUE);
@@ -77,4 +79,5 @@ int rb_enc_codelen(int, rb_encoding*);
#define rb_enc_prev_char(s,p,enc) (char
)onigenc_get_prev_char_head(enc,(UChar)s,(UChar*)p)

+#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII©
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
Index: include/ruby/st.h

— include/ruby/st.h (revision 13544)
+++ include/ruby/st.h (working copy)
@@ -72,4 +72,6 @@ st_table *st_init_numtable_with_size(int
st_table *st_init_strtable(void);
st_table *st_init_strtable_with_size(int);
+st_table *st_init_strcasetable(void);
+st_table *st_init_strcasetable_with_size(int);
int st_delete(st_table *, st_data_t *, st_data_t *);
int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);

At 18:12 07/09/28, NARUSE, Yui wrote:

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

encodee$B$H$$$&L>A0$OJQ49$r%$%a!<%8$5$;$k$N$G$h$/$J$$$J$“$H;W$&e(B
e$B$h$&$K$J$j$^$7$?!#$7$+$7!”$^!"L>A0$O8e$GJQ$($k$3$H$K$7$F%3%_%Ce(B
e$B%H$7$F$/$@$5$$$^$;$s$+!)e(B

e$BJQ49$J$7$KJ8;z%3!<%IL>$r:9$7BX$($k$@$1$N>l9g$Oe(B String#encoding= e$B$G!"e(B
e$BJQ49$rH<$&>l9g$Oe(B String#encode e$B$H$$$&$N$O$I$&$J$N$G$7$g$&!#e(B

e$B8@8l3XE*$K$O6Z$,DL$k$H$3$m$,$"$j$^$9$,!"0lHL$N%f!<%6!<$K$O:.Mp$,e(B
e$BM=A[$5$l$k!#e(B

Shift_JIS e$B$re(B
CP932 e$B$K:9$7BX$($k$H$+!"e(B

e$B$=$NFs$D$N0c$$$O6qBNE*$K2?$G$7$g$&$+!#$?$@8e<T$Ge(B MS e$BFCM-$Ne(B
e$B30;z$,DI2C$5$l$?$i%P%$%HNs"*J8;z%3!<%ICe$-%P%$%HNs$HF1Ey$J$N$Oe(B
e$B$9$4$/G<F@$7$^$9!#e(B

e$B$7$+$7!"e(Biconv (e$B0lIte(B?) e$B$_$?$$$K!"e(BShift_JIS e$B$Ne(B
0x5c e$B$r1_5-9f!“e(B
CP932 e$B$N>l9g$K$O%P%C%/%9%i%C%7%e!”$H$$$&>l9g$K$O$A$g$C$He(B
e$B0c$&$N$G$O$J$$$+$H;W$$$^$9!#e(B

e$B%P%$%HNs$KJ8;z%3!<%I$r65$($k>l9g$KJXMx$=$&$@$J$!$H!#e(B

e$B8D?ME*$K$Oe(B encode e$B$h$j$be(B convert e$B$NJ}$,$7$C$/$j$/$k$N$G$9$,e(B

e$BA4$/F10U$G$9!#e(B

e$B8D?ME*$K$O;0<oN`$N$b$N$,$"$k$+$H;W$$$^$9!#e(B
1)
e$B2r<a$7$F$J$$$b$N$r2r<a$9$k!#$3$l$O>l$$$DNs$KJ8;z%3!<%I$r65$($k!"e(B
(e$B>e5-$N>r7o$Ge(B) Shift_JIS e$B$+$ie(B CP932
e$B$H$+$,9M$($i$l$F$$$k!#e(B
e$B$3$l$Oe(B e$B!Ve(Binterpret_ase$B!W$H$+e(B
e$B!Ve(Base$B!W$_$?$$$JL>A0$,$$$$$N$G$O$J$$e(B
e$B$+$H;W$$$^$9!#B?J,$3$3$O5U$N%1!<%9$bF~$k$N$G$O$J$$$+$H;W$$$^$9!#e(B
(String#interpret_as(‘UNKNOWN-8BIT’) e$B$H$+e(B; “UNKNOWN-8BIT”
e$B$K$D$$$F$Oe(B
Character Sets e$B;2>He(B)
2)
e$BJ8;z%3!<%I$rL5M}LpM}JQ$($k$3$H!#$3$l$OIaDL$OM>$j;H$o$J$$J}$,$$$$$N$G!"e(B
e$BD9$$L>A0$,$$$$$N$G$O$J$$$+$H;W$$$^$9!#Nc$($P!Ve(Bforce_encodinge$B!W$H$+e(B
e$B$N$h$&$J$b$N!#e(B
e$B$3$3$^$G$NLdBj$O!"AjEv$N>pJs$,$J$$$H$J$+$J$+e(B 1) e$B$He(B 2)
e$B$r$h$/6hJL$G$-$J$$!#e(B

  1. e$BJ8;z%3!<%I>pJs$r%Y!<%9$K%3!<%IJQ49$r9T$&!#$3$l$OB?J,e(B
    String#convert e$B$He(B String#convert!
    e$B$0$i$$$G$$$$$@$m$&$H;W$$$^$9!#e(B

e$B59$7$/$*4j$$$7$^$9!#e(B Martin.

#-#-# Martin J. Du"rst, Assoc. Professor, Aoyama Gakuin University
#-#-# http://www.sw.it.aoyama.ac.jp mailto:[email protected]

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

encodee$B$H$$$&L>A0$OJQ49$r%$%a!<%8$5$;$k$N$G$h$/$J$$$J$"$H;W$&e(B
e$B$h$&$K$J$j$^$7$?!#$7$+$7!"$^!"L>A0$O8e$GJQ$($k$3$H$K$7$F%3%_%Ce(B
e$B%H$7$F$/$@$5$$$^$;$s$+!)e(B

e$BJQ49$J$7$KJ8;z%3!<%IL>$r:9$7BX$($k$@$1$N>l9g$Oe(B String#encoding=
e$B$G!“e(B
e$BJQ49$rH<$&>l9g$Oe(B String#encode
e$B$H$$$&$N$O$I$&$J$N$G$7$g$&!#e(BShift_JIS e$B$re(B
CP932
e$B$K:9$7BX$($k$H$+!”%P%$%HNs$KJ8;z%3!<%I$r65$($k>l9g$KJXMx$=$&$@$J$!$H!#e(B

e$B8D?ME*$K$Oe(B encode e$B$h$j$be(B convert

e$B$NJ}$,$7$C$/$j$/$k$N$G$9$,e(B

e$B@.@%$G$9!#e(B

Martin D. wrote:

Shift_JIS e$B$re(B
CP932 e$B$K:9$7BX$($k$H$+!"e(B

e$B$=$NFs$D$N0c$$$O6qBNE*$K2?$G$7$g$&$+!#$?$@8e<T$Ge(B MS e$BFCM-$Ne(B
e$B30;z$,DI2C$5$l$?$i%P%$%HNs"*J8;z%3!<%ICe$-%P%$%HNs$HF1Ey$J$N$Oe(B
e$B$9$4$/G<F@$7$^$9!#e(B

e$B$7$+$7!"e(Biconv (e$B0lIte(B?) e$B$_$?$$$K!"e(BShift_JIS e$B$Ne(B 0x5c e$B$r1_5-9f!“e(B
CP932 e$B$N>l9g$K$O%P%C%/%9%i%C%7%e!”$H$$$&>l9g$K$O$A$g$C$He(B
e$B0c$&$N$G$O$J$$$+$H;W$$$^$9!#e(B

e$B!VJ8;z%3!<%I!W$H$$$&35G0$OB?5AE*$J$b$N$G!“<g$Ke(B
1.valid e$B$J%P%$%HNs$NDj5Ae(B
e$B!!e(B(/[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]/e$B$H$+!Ke(B
2.e$B%3!<%I%]%$%s%H$X$NJ8;z$N3d$jEv$F$NM-L5e(B
3.Unicode e$B$H$NJQ49I=e(B
4.e$BJ8;zNs!J%3!<%I%]%$%s%HNs!&%P%$%HNs!K$N0UL#e(B
e$B$H$$$C$?$b$N$,$”$k$H;W$$$^$9!#e(B

e$B$G!"e(BShift_JIS e$B$He(B CP932 e$B$N0c$$$Oe(B 2, 3, 4
e$B$J$N$G$9$,!“e(B
e$B<B:]$KLdBj$H$J$C$F$/$k$N$Oe(B 3 e$B$G$7$g$&!#e(B
e$B$3$l$K$O!“0lIt%Y%s%@!<$NFH<+J8;z$r%^%C%W$9$k$+H]$+$b$”$k$N$G$9$,!“e(B
e$B1_5-9fLdBj$N$h$&$KJQ49I=$4$H$K%^%C%W$,0[$J$C$F$k>l9g$b$”$j$^$9!#e(B
e$B$J$*!”$3$l$Oe(BShift_JIS e$BEy$He(B Unicode
e$B$N4V$NJQ49$N$_$J$i$:!"e(B
"\u00A5"e$B$r$$$+$K%P%$%HNs$KJQ49$9$k$+Ey$K$bMm$s$G$-$^$9!#e(B
http://www.w3.org/Submission/2005/SUBM-japanese-xml-20050324/

e$B$3$3$^$G$NLdBj$O!"AjEv$N>pJs$,$J$$$H$J$+$J$+e(B 1) e$B$He(B 2) e$B$r$h$/6hJL$G$-e(B
e$B$J$$!#e(B

  1. e$BJ8;z%3!<%I>pJs$r%Y!<%9$K%3!<%IJQ49$r9T$&!#$3$l$OB?J,e(B
    String#convert e$B$He(B String#convert! e$B$0$i$$$G$$$$$@$m$&$H;W$$$^$9!#e(B

e$BB>$N8@8l$J$i$P$=$N$h$&$Ke(B3e$B$D$N%a%=%C%I$KJ,$1$k$H;W$&$N$G$9$,!“e(B
Ruby
e$B$O;w$?5!G=$O0l$D$N%a%=%C%Ie(B/e$B%/%i%9$K$^$H$a$k$H$$$&798~$,$”$k$N$G!“e(B
String#encode e$B$K8=:_$^$H$a$F$”$k$N$@$H;W$C$F$$$^$9!#e(B

e$B$1$l$I$b!“JQ49$7$F$$$J$$$N$KJQ49$r0UL#$9$k8l$rMQ$$$F$$$k$N$O%”%l$J$N$Ge(B

    1. e$B$rJL$K$7$h$&$H$$$&OC$J$N$G$9$h$M!#e(B

e$B$J$+$@$G$9!#e(B

At Fri, 28 Sep 2007 22:13:54 +0900,
NARUSE, Yui wrote in [ruby-dev:31897]:

Martin D. wrote:

e$B8D?ME*$K$O;0<oN`$N$b$N$,$"$k$+$H;W$$$^$9!#e(B

  1. e$B2r<a$7$F$J$$$b$N$r2r<a$9$k!#$3$l$O>l$$$DNs$KJ8;z%3!<%I$r65$($k!"e(B
    (e$B>e5-$N>r7o$Ge(B) Shift_JIS e$B$+$ie(B CP932 e$B$H$+$,9M$($i$l$F$$$k!#e(B
    e$B$3$l$Oe(B e$B!Ve(Binterpret_ase$B!W$H$+e(B e$B!Ve(Base$B!W$_$?$$$JL>A0$,$$$$$N$G$O$J$$e(B
    e$B$+$H;W$$$^$9!#B?J,$3$3$O5U$N%1!<%9$bF~$k$N$G$O$J$$$+$H;W$$$^$9!#e(B
    (String#interpret_as(‘UNKNOWN-8BIT’) e$B$H$+e(B; “UNKNOWN-8BIT” e$B$K$D$$$F$Oe(B
    Character Sets e$B;2>He(B)

e$B8=:_e(B"US-ASCII"e$B$He(B"UNKNOWN-8BIT"e$B$O$H$/$K6hJL$7$F$$$^$;$s$,!“e(B
“UNKNOWN"e$B$de(B"BINARY"e$B!”$”$k$$$Oe(B1.8e$B$Ne(BKCODEe$B$N$h$&$Je(B"NONE"e$B$rJ,$1$F!"e(B
“UNKNOWN”<->e$B$=$NB>$Ne(Bencodinge$B$@$1$r5v$7$F$=$l0J30$O%(%i!<$K$9$k!"e(B
e$B$H$$$&$h$&$Ke(B1)e$B$rFs$D$KJ,$1$k$H$$$&$3$H$b9M$($i$l$k$H;W$$$^$9!#e(B

binay_string.encode(encoding_name) #=> encoded_string
encoded_string.decode #=> binay_string

  1. e$BJ8;z%3!<%I$rL5M}LpM}JQ$($k$3$H!#$3$l$OIaDL$OM>$j;H$o$J$$J}$,$$$$$N$G!"e(B
    e$BD9$$L>A0$,$$$$$N$G$O$J$$$+$H;W$$$^$9!#Nc$($P!Ve(Bforce_encodinge$B!W$H$+e(B
    e$B$N$h$&$J$b$N!#e(B
    e$B$3$3$^$G$NLdBj$O!"AjEv$N>pJs$,$J$$$H$J$+$J$+e(B 1) e$B$He(B 2) e$B$r$h$/6hJL$G$-$J$$!#e(B

e$B>e5-$NJ}K!$G$“$l$P!”$3$l$Oe(B str.decode.encode(new_encoding)
e$B$H=qe(B
e$B$/$3$H$K$J$j!"6hJL$5$;$k$3$H$O$G$-$k$N$G$O$J$$$G$7$g$&$+!#e(B

  1. e$BJ8;z%3!<%I>pJs$r%Y!<%9$K%3!<%IJQ49$r9T$&!#$3$l$OB?J,e(B
    String#convert e$B$He(B String#convert! e$B$0$i$$$G$$$$$@$m$&$H;W$$$^$9!#e(B

e$BB>$N8@8l$J$i$P$=$N$h$&$Ke(B3e$B$D$N%a%=%C%I$KJ,$1$k$H;W$&$N$G$9$,!“e(B
Ruby e$B$O;w$?5!G=$O0l$D$N%a%=%C%Ie(B/e$B%/%i%9$K$^$H$a$k$H$$$&798~$,$”$k$N$G!“e(B
String#encode e$B$K8=:_$^$H$a$F$”$k$N$@$H;W$C$F$$$^$9!#e(B

e$B$1$l$I$b!“JQ49$7$F$$$J$$$N$KJQ49$r0UL#$9$k8l$rMQ$$$F$$$k$N$O%”%l$J$N$Ge(B

    1. e$B$rJL$K$7$h$&$H$$$&OC$J$N$G$9$h$M!#e(B

3)e$B$+$iJL$K$9$k!"$H$$$&0UL#$G$9$h$M!#JQ49$K$D$$$F$O$^$?JLESDI2C$9e(B
e$B$k$3$H$K$7$^$7$g$&!#e(B

e$BL>A0$K$D$$$F$O!"$R$H$^$:e(BMartine$B$5$s$Ne(Bforce_encodinge$B$r;H$o$;$F$b$ie(B
e$B$$$^$7$?!#e(B