Multibyte string/regex literal with escape sequence

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

trunke$B$G!"e(B

% ./miniruby -ve ‘# -- coding: EUC-JP --
p "e$B$“e(B\n”.encoding’
ruby 1.9.0 (2007-10-07 patchlevel 0) [i386-netbsdelf]
“US-ASCII”

e$B$H$J$k$3$H$K5$IU$-$^$7$?!#e(B
e$BJ8;zNs%j%F%i%k$"$k$$$O@55,I=8=%j%F%i%k$K%(%9%1!<%W%7!<%1%s%9e(B
e$B$r4^$a$k$HLdEzL5MQ$Ge(BUS-ASCII(e$B$H$$$&$+<B:]$O%P%$%J%j07$$e(B?)e$B$K$Je(B
e$B$k$h$&$G$9!#e(B

e$B$*$=$i$/!"0U?^$H$7$F$O!“e(B\nnne$B$H$+e(B\xnne$B$H$+e(B\M-xe$B$H$+$Ge(B8bite$BL$,N)e(B
e$B$C$F$kJ8;z$r;E9~$s$8$c$C$?$i$b$&@5BNITL@$@$H$$$&$3$H$K$7$?$$e(B
e$B$N$@$m$&$H;W$&$N$G!”??LLL$K%A%’%C%/$9$k$h$&$K$7$F$_$^$7$?!#e(B

  • parse.y (parser_read_escape, parser_tokadd_escape): check code
    range.

Index: parse.y

— parse.y (revision 13660)
+++ parse.y (working copy)
@@ -4491,8 +4491,8 @@ static int parser_here_document(struct p

define pushback© parser_pushback(parser, c)

define newtok() parser_newtok(parser)

define tokadd© parser_tokadd(parser, c)

-# define read_escape() parser_read_escape(parser)
-# define tokadd_escape(t) parser_tokadd_escape(parser, t)
+# define read_escape(m) parser_read_escape(parser, m)
+# define tokadd_escape(t,m) parser_tokadd_escape(parser, t, m)

define regx_options() parser_regx_options(parser)

define tokadd_string(f,t,p,n,m)

parser_tokadd_string(parser,f,t,p,n,m)

define parse_string(n) parser_parse_string(parser,n)

@@ -4920,7 +4920,7 @@ parser_tokadd(struct parser_params *pars
}

static int
-parser_read_escape(struct parser_params *parser)
+parser_read_escape(struct parser_params *parser, int *mb)
{
int c;

@@ -4958,6 +4958,7 @@ parser_read_escape(struct parser_params
c = scan_oct(lex_p, 3, &numlen);
lex_p += numlen;
}

  • if (mb && (c >= 0200)) *mb = ENC_CODERANGE_UNKNOWN;
    return c;

    case 'x':  /* hex constant */
    

@@ -4971,6 +4972,7 @@ parser_read_escape(struct parser_params
}
lex_p += numlen;
}

  • if (mb && (c >= 0x80)) *mb = ENC_CODERANGE_UNKNOWN;
    return c;

    case 'b':  /* backspace */
    

@@ -4986,10 +4988,12 @@ parser_read_escape(struct parser_params
return ‘\0’;
}
if ((c = nextc()) == ‘\’) {

  •  return read_escape() | 0x80;
    
  •  if (mb) *mb = ENC_CODERANGE_UNKNOWN;
    
  •  return read_escape(0) | 0x80;
    
    }
    else if (c == -1) goto eof;
    else {
  •  if (mb) *mb = ENC_CODERANGE_UNKNOWN;
     return ((c & 0xff) | 0x80);
    
    }

@@ -5001,7 +5005,7 @@ parser_read_escape(struct parser_params
}
case ‘c’:
if ((c = nextc())== ‘\’) {

  •  c = read_escape();
    
  •  c = read_escape(mb);
    
    }
    else if (c == ‘?’)
    return 0177;
    @@ -5019,7 +5023,7 @@ parser_read_escape(struct parser_params
    }

static int
-parser_tokadd_escape(struct parser_params *parser, int term)
+parser_tokadd_escape(struct parser_params *parser, int term, int *mb)
{
int c;

@@ -5030,35 +5034,37 @@ parser_tokadd_escape(struct parser_param
case ‘0’: case ‘1’: case ‘2’: case ‘3’: /* octal constant */
case ‘4’: case ‘5’: case ‘6’: case ‘7’:
{

  •  int i;
    
  •  int numlen;
    
  •  int oct;
    
     tokadd('\\');
    
  •  tokadd(c);
    
  •  for (i=0; i<2; i++) {
    
  • c = nextc();
  • if (c == -1) goto eof;
  • if (c < ‘0’ || ‘7’ < c) {
  •    pushback(c);
    
  •    break;
    
  • }
  • tokadd©;
  •  pushback(c);
    
  •  oct = scan_oct(lex_p, 3, &numlen);
    
  •  if (numlen == 0) {
    
  • yyerror(“Invalid escape character syntax”);

  • return -1;
    }

  •  while (numlen--)
    
  • tokadd(nextc());

  •  if (mb && (oct >= 0200)) *mb = ENC_CODERANGE_UNKNOWN;
    

    }
    return 0;

     case 'x':  /* hex constant */
    

    {
    int numlen;

  •  int hex;
    
     tokadd('\\');
     tokadd(c);
    
  •  scan_hex(lex_p, 2, &numlen);
    
  •  hex = scan_hex(lex_p, 2, &numlen);
     if (numlen == 0) {
    
    yyerror(“Invalid escape character syntax”);
    return -1;
    }
    while (numlen–)
    tokadd(nextc());
  •  if (mb && (hex >= 0x80)) *mb = ENC_CODERANGE_UNKNOWN;
    
    }
    return 0;

@@ -5069,6 +5075,7 @@ parser_tokadd_escape(struct parser_param
return 0;
}
tokadd(’\’); tokadd(‘M’); tokadd(’-’);

  • if (mb) *mb = ENC_CODERANGE_UNKNOWN;
    goto escaped;

    case 'C':
    

@@ -5084,7 +5091,7 @@ parser_tokadd_escape(struct parser_param
tokadd(’\’); tokadd(‘c’);
escaped:
if ((c = nextc()) == ‘\’) {

  •  return tokadd_escape(term);
    
  •  return tokadd_escape(term, mb);
    
    }
    else if (c == -1) goto eof;
    tokadd©;
    @@ -5212,22 +5219,14 @@ parser_tokadd_string(struct parser_param
    default:
    if (func & STR_FUNC_REGEXP) {
    pushback©;
  •    if (tokadd_escape(term) < 0)
    
  •    if (tokadd_escape(term, mb) < 0)
     return -1;
    
  •    if (mb) {
    
  •  *mb = ENC_CODERANGE_UNKNOWN;
    
  •  mb = 0;
    
  •    }
       continue;
    
    }
    else if (func & STR_FUNC_EXPAND) {
    pushback©;
    if (func & STR_FUNC_ESCAPE) tokadd(’\’);
  •    c = read_escape();
    
  •    if (mb) {
    
  •  *mb = ENC_CODERANGE_UNKNOWN;
    
  •  mb = 0;
    
  •    }
    
  •    c = read_escape(mb);
    
    }
    else if ((func & STR_FUNC_QWORDS) && ISSPACE©) {
    /* ignore backslashed spaces in %w */
    @@ -6043,7 +6042,7 @@ parser_yylex(struct parser_params *parse
    goto ternary;
    }
    else if (c == ‘\’) {
  •  c = read_escape();
    
  •  c = read_escape(0);
     tokadd(c);
    
    }
    else {

e$B$=$l$G$O!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31980] multibyte string/regex literal with
escape sequence”
on Tue, 9 Oct 2007 14:57:16 +0900, “U.Nakamura”
[email protected] writes:

|trunke$B$G!“e(B
|
| % ./miniruby -ve ‘# -- coding: EUC-JP --
| p "e$B$“e(B\n”.encoding’
| ruby 1.9.0 (2007-10-07 patchlevel 0) [i386-netbsdelf]
| “US-ASCII”
|
|e$B$H$J$k$3$H$K5$IU$-$^$7$?!#e(B
|e$BJ8;zNs%j%F%i%k$”$k$$$O@55,I=8=%j%F%i%k$K%(%9%1!<%W%7!<%1%s%9e(B
|e$B$r4^$a$k$HLdEzL5MQ$Ge(BUS-ASCII(e$B$H$$$&$+<B:]$O%P%$%J%j07$$e(B?)e$B$K$Je(B
|e$B$k$h$&$G$9!#e(B

e$B$&!<$`!#e(B

|e$B$*$=$i$/!"0U?^$H$7$F$O!“e(B\nnne$B$H$+e(B\xnne$B$H$+e(B\M-xe$B$H$+$Ge(B8bite$BL$,N)e(B
|e$B$C$F$kJ8;z$r;E9~$s$8$c$C$?$i$b$&@5BNITL@$@$H$$$&$3$H$K$7$?$$e(B
|e$B$N$@$m$&$H;W$&$N$G!”??LLL$K%A%'%C%/$9$k$h$&$K$7$F$_$^$7$?!#e(B

e$B$9$/$J$/$H$b%^%k%A%P%$%HJ8;z$r4^$`$N$Ke(BUS-ASCIIe$B$O$*$+$7$$$N$G!"e(B
e$B%3%_%C%H$7$F$/$@$5$$!#e(B

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:31981] Re: multibyte string/regex literal with
escape sequence”
on Oct.09,2007 15:09:56, [email protected] wrote:
| e$B$9$/$J$/$H$b%^%k%A%P%$%HJ8;z$r4^$`$N$Ke(BUS-ASCIIe$B$O$*$+$7$$$N$G!"e(B
| e$B%3%_%C%H$7$F$/$@$5$$!#e(B

e$BF~$l$^$7$?!#e(B

e$B$J$+$@$5$s$Ne(Bcommite$B$H9g$o$;$F!"e(B

  • 7bite$BJ8;ze(B(e$B%3%s%H%m!<%kJ8;z$r4^$`e(B)e$B$@$1$J$ie(BUS-ASCII
  • 8bite$BJ8;z$r4^$`$J$ie(Bparser->enc

e$B$K$J$k$h$&$K$J$j$^$7$?!#e(B

e$B$H$3$m$G!"7k6I!V%P%$%J%j!W$H$$$&e(Bencodinge$B$O$J$$$s$G$7$g$&$+e(B?

e$B$=$l$G$O!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31983] Re: multibyte string/regex literal with
escape sequence”
on Tue, 9 Oct 2007 16:11:43 +0900, “U.Nakamura”
[email protected] writes:

|e$B$H$3$m$G!"7k6I!V%P%$%J%j!W$H$$$&e(Bencodinge$B$O$J$$$s$G$7$g$&$+e(B?

unknown-8bite$B$H$$$&L>A0$GEPO?$7$h$&$+$J$H;W$C$F$^$9!#<BBV$Oe(B
ASCIIe$B$Ne(Baliase$B$J$s$G$9$,!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31986] Re: multibyte string/regex literal with
escape sequence”
on Tue, 9 Oct 2007 17:22:55 +0900, “U.Nakamura”
[email protected] writes:

|| unknown-8bite$B$H$$$&L>A0$GEPO?$7$h$&$+$J$H;W$C$F$^$9!#<BBV$Oe(B
|| ASCIIe$B$Ne(Baliase$B$J$s$G$9$,!#e(B
|
|e$B$3$l$C$F!“C1$Ke(Bforce_encodinge$B$G$=$&$$$&L>A0$b;XDj$G$-$k$h$&$Ke(B
|e$B$J$k$@$1!”$J$s$G$7$g$&$+e(B?

e$BEvLL$O$=$&$G$9!#e(B

|e$B$=$l$H$b%$%s%?%W%j%?$,$=$&$$$&e(Bencodinge$B$r2?$i$+$N>r7o$G<+F0E*e(B
|e$B$K@_Dj$9$k$3$H$b$"$k$N$G$7$g$&$+e(B?

e$B!V<+F0E*!W$O4m81$J9a$j$,$7$^$9$M!#e(B

|e$BK?e(BIRCe$B$G$O$5$C$-$N%Q%C%AMm$_$+$i$$$m$$$mOC$,=P$F$$$?$N$G$9$,!“e(B
|e$BC1$Ke(Baliase$B$,MQ0U$5$l$k$@$1$@$H$”$s$^$j4r$7$/$J$$$N$G!“e(B
|
| (1) e$B$”$N%Q%C%A$+$i$b$&$A$g$C$H3HD%$7$F!"Nc$($Pe(B8bite$B$J%(%9%1e(B
| e$B!<%W%7!<%1%s%9$,4^$^$l$k%j%F%i%k$O!V%P%$%J%j!W$H$9$k!#e(B

e$B$=$l$r!V%P%$%J%j!W$H8F$V$V$s$K$O9=$$$^$;$s$,!“$=$l$,!V$o$+e(B
e$B$k!W!”!V6hJL$G$-$k!W$3$H$K$I$s$J4r$7$5$,$"$j$^$9$+!#e(B

| (2) encodinge$B$,e(BUS-ASCIIe$B$G!"$+$DFbItE*$Ke(BENC_CODERANGE_UNKNOWN
| e$B$J>l9g$Oe(BString#encodinge$B$de(BRegexp#encodinge$B$O!V%P%$%J%j!We(B
| e$B$rJV$9!#e(B
| # e$B$b$7$3$N@he(BIO#encodinge$B$,$G$-$k$h$&$@$H$A$g$C$H:$$k$+$be(B

e$B$3$l$b$I$&$$$&%1!<%9$G4r$7$$$s$G$7$g$&$+!#e(B

| (3) e$B%P%$%J%j%j%F%i%kI=5-e(B(%b"…" e$B$H$+e(B "…"b e$B$H$+e(B)e$B$rMQ0U$7e(B
| e$B$F%f!<%6$,<+H/E*$K4JJX$K!V%P%$%J%j!W$JJ8;zNs$rI=5-$G$-e(B
| e$B$k$h$&$K$9$k!#e(B

e$B$3$l$O$“$j$($k$H;W$C$F$^$7$?!#$,!”$b$&$A$g$C$H%f!<%9%1!<%9$,e(B
e$B$?$^$C$F$+$i9M$($h$&$H;W$C$F$^$7$?!#$I$&$$$&%1!<%9$G!V%P%$%Je(B
e$B%j!WJ8;zNs$,4r$7$$$N$+$h$/$o$+$i$J$$$s$G$9$h$M!#3N$+$Ke(Bpacke$B$He(B
e$B$+$G@8@.$5$l$kJ8;zNs$O%P%$%J%j$G$9$,!“e(BCRubye$B$G$O$$$:$l$K$7$F$be(B
e$BFbItI=8=$KJQ49$,9T$o$l$J$$$N$G!”%P%$%J%j$rFCJL07$$$7$J$1$l$Pe(B
e$B$J$i$J$$6ILL$O$=$l$[$IB?$/$J$$$HM=A[$7$F$$$^$9!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

e$B@.@%$G$9!#e(B

Yukihiro M. wrote:

e$B$=$l$r!V%P%$%J%j!W$H8F$V$V$s$K$O9=$$$^$;$s$,!"$=$l$,!V$o$+e(B
e$B$k!W!"!V6hJL$G$-$k!W$3$H$K$I$s$J4r$7$5$,$"$j$^$9$+!#e(B

e$BG$0U$NJ8;zNs$H7k9g$G$-$ke(B US-ASCII (ASCII e$B$NHO0O$N$_e(B)
e$B$H!“7k9g$G$-$J$$e(B
US-ASCII (e$B%P%$%J%je(B) e$B$r6hJL$9$kJ}K!$C$F:#$”$j$^$7$?$C$1!#e(B

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:31984] Re: multibyte string/regex literal with
escape sequence”
on Oct.09,2007 16:57:01, [email protected] wrote:
| |e$B$H$3$m$G!"7k6I!V%P%$%J%j!W$H$$$&e(Bencodinge$B$O$J$$$s$G$7$g$&$+e(B?
|
| unknown-8bite$B$H$$$&L>A0$GEPO?$7$h$&$+$J$H;W$C$F$^$9!#<BBV$Oe(B
| ASCIIe$B$Ne(Baliase$B$J$s$G$9$,!#e(B

e$B$3$l$C$F!“C1$Ke(Bforce_encodinge$B$G$=$&$$$&L>A0$b;XDj$G$-$k$h$&$Ke(B
e$B$J$k$@$1!”$J$s$G$7$g$&$+e(B?
e$B$=$l$H$b%$%s%?%W%j%?$,$=$&$$$&e(Bencodinge$B$r2?$i$+$N>r7o$G<+F0E*e(B
e$B$K@_Dj$9$k$3$H$b$"$k$N$G$7$g$&$+e(B?

e$BK?e(BIRCe$B$G$O$5$C$-$N%Q%C%AMm$_$+$i$$$m$$$mOC$,=P$F$$$?$N$G$9$,!“e(B
e$BC1$Ke(Baliase$B$,MQ0U$5$l$k$@$1$@$H$”$s$^$j4r$7$/$J$$$N$G!"e(B

(1) e$B$“$N%Q%C%A$+$i$b$&$A$g$C$H3HD%$7$F!“Nc$($Pe(B8bite$B$J%(%9%1e(B
e$B!<%W%7!<%1%s%9$,4^$^$l$k%j%F%i%k$O!V%P%$%J%j!W$H$9$k!#e(B
(2)
encodinge$B$,e(BUS-ASCIIe$B$G!”$+$DFbItE*$Ke(BENC_CODERANGE_UNKNOWN
e$B$J>l9g$Oe(BString#encodinge$B$de(BRegexp#encodinge$B$O!V%P%$%J%j!We(B
e$B$rJV$9!#e(B
#
e$B$b$7$3$N@he(BIO#encodinge$B$,$G$-$k$h$&$@$H$A$g$C$H:$$k$+$be(B
(3) e$B%P%$%J%j%j%F%i%kI=5-e(B(%b”…" e$B$H$+e(B "…"b
e$B$H$+e(B)e$B$rMQ0U$7e(B
e$B$F%f!<%6$,<+H/E*$K4JJX$K!V%P%$%J%j!W$JJ8;zNs$rI=5-$G$-e(B
e$B$k$h$&$K$9$k!#e(B

e$B$H$+$$$&$h$&$J0F$,=P$^$7$?!#e(B

e$B$=$l$G$O!#e(B

On Tue, 9 Oct 2007 17:47:06 +0900
In article E1IfAjM-00072z-67@x31
[[ruby-dev:31987] Re: multibyte string/regex literal with escape
sequence]
Yukihiro M. [email protected] wrote:

| (1) e$B$"$N%Q%C%A$+$i$b$&$A$g$C$H3HD%$7$F!"Nc$($Pe(B8bite$B$J%(%9%1e(B
| e$B!<%W%7!<%1%s%9$,4^$^$l$k%j%F%i%k$O!V%P%$%J%j!W$H$9$k!#e(B

e$B$=$l$r!V%P%$%J%j!W$H8F$V$V$s$K$O9=$$$^$;$s$,!“$=$l$,!V$o$+e(B
e$B$k!W!”!V6hJL$G$-$k!W$3$H$K$I$s$J4r$7$5$,$"$j$^$9$+!#e(B

e$B%P%$%J%j$+$ie(B/e$B$X$N%(%s%3!<%G%#%s%0JQ49$,!JDL>o!K$G$-$J$$$h$&$K$9e(B
e$B$k$H!"%U!<%k%W%k!<%U$H$7$FM-8z$8$c$J$$$+$H;W$$$^$9!#e(B

e$BL}CG$9$k$He(BUCS-2e$B$GFI$_9~$s$@!J$H;W$C$F$$$?!KJ8;zNs$,<B$O%P%$%J%je(B
e$B!Je(BASCIIe$B!K$G!“$=$l$re(BShift_JISe$B$KJQ49$7$h$&$H$7$F$$m$$m$K$J$k!”$He(B
e$B$+IaDL$K$d$j$=$&$G$9!#e(B

e$B$=$s$J$H$-!"2u$l$?J8;zNs$de(B Iconv::IllegalSequence
e$B$rA0$KG:$`$h$je(B
Cannot convert encoding to/from binary stream e$B$H$+=P$F$/$l$?J}$,e(B
e$BM-$jFq$$!#e(B

MIMEe$B$G%+%W%;%k2=$9$k$H$-Ey$b!"e(BASCIIe$B$J$iCf?H$Oe(B7bite$B$J$N$Ge(B
quoted-printable e$B$rABr$9$k$,!"%P%$%J%j$J$ie(B base64
e$B$rA
Br$9$Y$7e(B
e$B$H$+!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31989] Re: multibyte string/regex literal with
escape sequence”
on Tue, 9 Oct 2007 19:35:16 +0900, “NARUSE, Yui”
[email protected] writes:

|e$BG$0U$NJ8;zNs$H7k9g$G$-$ke(B US-ASCII (ASCII e$B$NHO0O$N$_e(B) e$B$H!“7k9g$G$-$J$$e(B
|US-ASCII (e$B%P%$%J%je(B) e$B$r6hJL$9$kJ}K!$C$F:#$”$j$^$7$?$C$1!#e(B

e$B!V<B:]$K=hM}$7$F$$FNc30$,5/$-$k$+$I$&$+!W0J30$NJ}K!$O8=;~E@e(B
e$B$G$ODs6!$7$F$$$^$;$s!#$=$&$$$&0UL#$@$H!V0B?4$7$F:.:
$G$-$ke(B
US-ASCIIe$B!W$H!V$d$C$F$_$J$$$H$o$+$i$J$$e(BBINARYe$B!W$O6hJL$7$?J}$,e(B
e$B4r$7$$%1!<%9$,$"$k$+$b$7$l$^$;$s$M!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31991] Re: multibyte string/regex literal with
escape sequence”
on Tue, 9 Oct 2007 19:43:18 +0900, Tietew
[email protected] writes:

|e$B%P%$%J%j$+$ie(B/e$B$X$N%(%s%3!<%G%#%s%0JQ49$,!JDL>o!K$G$-$J$$$h$&$K$9e(B
|e$B$k$H!"%U!<%k%W%k!<%U$H$7$FM-8z$8$c$J$$$+$H;W$$$^$9!#e(B

e$B$b$&$A$g$C$H@bL@$7$F$$$?$@$1$^$;$s$+!#e(B7bit
ASCIIe$B$H!V%P%$%J%j!We(B
e$B$N6hJL$,M-1W$+$b$7$l$J$$$H$O;W$$;O$a$F$O$$$^$9$,!"%(%s%3!<%G%#e(B
e$B%s%0JQ49$,DL>o$G$-$J$$$H$$$&$N$O!"7k9=;H$$$K$/$=$&$J5$$,$7$^e(B
e$B$9!#e(B

|e$BL}CG$9$k$He(BUCS-2e$B$GFI$_9~$s$@!J$H;W$C$F$$$?!KJ8;zNs$,<B$O%P%$%J%je(B
|e$B!Je(BASCIIe$B!K$G!“$=$l$re(BShift_JISe$B$KJQ49$7$h$&$H$7$F$$m$$m$K$J$k!”$He(B
|e$B$+IaDL$K$d$j$=$&$G$9!#e(B

e$B!Ve(BUCS-2e$B$GFI$_9~$s$@$H;W$C$F$$$?J8;zNs$,<B$O%P%$%J%j!W$H$$$&>ue(B
e$B67$O%$%a!<%8$7$K$/$$$G$9!#e(BUCS-2e$B$G$“$ke(B(e$B$O$:$Ne(B)e$B%U%!%$%k$+$iFI$e(B
e$B9~$s$@$N$G$“$l$P!”$=$l$,2u$l$F$$$Fe(BUCS-2e$B$NHO0OFb$K$$5$^$C$Fe(B
e$B$$$J$/$F$b<+F0E
$K!V%P%$%J%j!W$K$J$k$h$&$J;EAH$
$rDs6!$9$kM=e(B
e$BDj$O$”$j$^$;$s!#$N$G!"e(B

|e$B$=$s$J$H$-!"2u$l$?J8;zNs$de(B Iconv::IllegalSequence e$B$rA0$KG:$`$h$je(B
|Cannot convert encoding to/from binary stream e$B$H$+=P$F$/$l$?J}$,e(B
|e$BM-$jFq$$!#e(B

e$B$3$&$$$&Nc30$,H/@8$9$k>u67$O$J$+$J$+9M$($K$/$$$h$&$K;W$$$^$9!#e(B

  • e$BJ8;zNs$Oe(BUCS-2e$B$N$O$:e(B(e$B%(%s%3!<%G%#%s%0$be(BUCS-2)

    • e$BK\Ev$Ke(BUCS-2 e$B"*e(B e$BLdBj$J$7e(B
    • e$B2u$l$F$$$ke(B e$B"*e(B e$B2u$l$?J8;zNse(B/IllegalSequence

e$BJ8;zNs$Oe(BUCS-2e$B$N$O$:e(B(e$B%(%s%3!<%G%#%s%0$O%P%$%J%je(B)e$B!#e(B
e$BJQ49$^$?$O%(%s%3!<%G%#%s%0@_Dj$,I,MWe(B

  • e$BK\Ev$Ke(BUCS-2 e$B"*e(B e$BLdBj$J$$e(B
  • e$B2u$l$F$$$ke(B e$B"*e(B e$B2u$l$?J8;zNse(B/IllegalSequence

e$B>e5-$N!VM-$jFq$$!W$,@.N)$9$k$?$a$K$Oe(B

  • e$B@5$7$/e(BUCS-2e$B$JJ8;zNs$Oe(BUCS-2e$B%(%s%3!<%G%#%s%0!#e(B
    e$B2u$l$?J8;zNs$O%P%$%J%j%(%s%3!<%G%#%s%0e(B

e$B$G$“$k>l9g$@$H;W$$$^$9$,!”%W%m%0%i%`Cf$G$I$&$$$&7P0^$G$=$N$he(B
e$B$&$K%(%s%3!<%G%#%s%0$,@_Dj$5$l$k$N$+!"$J$+$J$+%$%a!<%8$G$-$^e(B
e$B$;$s!#e(B

|MIMEe$B$G%+%W%;%k2=$9$k$H$-Ey$b!"e(BASCIIe$B$J$iCf?H$Oe(B7bite$B$J$N$Ge(B
|quoted-printable e$B$rABr$9$k$,!"%P%$%J%j$J$ie(B base64 e$B$rABr$9$Y$7e(B
|e$B$H$+!#e(B

quoted-printablee$B$be(Bbase64e$B$bN>J}$H$bG$0U$N%P%$%HNs$rI=8=$G$-$^e(B
e$B$9$+$ie(B(e$BI=8=8zN($O0c$$$^$9$,e(B)e$B!"e(BASCIIe$B$J$ie(Bquoted-printablee$B$H$$$&e(B
e$B4p=`$OE,@Z$G$O$J$$$+$b$7$l$^$;$s!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

e$B1J0f!wCNG=!%6e9)Bg$G$9!%e(B

From: Hidetoshi NAGAI [email protected]
Subject: [ruby-dev:31999] Re: multibyte string/regex literal with escape
sequence
Date: Wed, 10 Oct 2007 06:53:17 +0900
Message-ID: [email protected]

e$BDL>o$O$3$l$G$&$^$/5!G=$9$k$N$G$9$,!$e(B
e$B%$%a!<%8%G!<%?$J$I$N%P%$%J%j%G!<%?$r07$&>l9g$,LdBj$G$9!%e(B
(snip)
e$B$=$3$G!$e(BTk.encoding_names e$B$K$O4^$^$l$J$$e(B ‘binary’ e$B$,;XDj$5$l$?>l9g$Oe(B
e$BJQ49$r9T$o$J$$$h$&$K$9$k$3$H$G!$e(B
(snip)
e$B$H$$$&$h$&$K!$LdBj$,=P$J$$$h$&$K$7$F$$$^$9!%e(B

e$B!%!%!%$J$I$H=q$$$?8e$G$b$&0lEY%A%'%C%/$7$F$?$i!$e(B
Tk.encoding_names e$B$Ke(B ‘identity’ e$B$J$s$F$N$,!%!%!%e(B
e$B$b$7$+$7$Fe(B ‘binary’ e$B$J$s$FMQ0U$7$J$/$F$be(B
‘identity’ e$B$r;H$($PNI$+$C$?$C$F$3$H!)e(B orz

e$B$^$!!$e(BUS-ASCII e$B$He(B BINARY (IDENTITY) e$B$N6hJL<+BN$Oe(B
e$BI,MW$=$&$G$O$"$k$N$G$9$,!%!%!%e(B

e$B1J0f!wCNG=!%6e9)Bg$G$9!%e(B

From: Yukihiro M. [email protected]
Subject: [ruby-dev:31994] Re: multibyte string/regex literal with escape
sequence
Date: Tue, 9 Oct 2007 22:57:16 +0900
Message-ID: E1IfFa6-00015M-FF@x31

e$B!V<B:]$K=hM}$7$F$$FNc30$,5/$-$k$+$I$&$+!W0J30$NJ}K!$O8=;~E@e(B
e$B$G$ODs6!$7$F$$$^$;$s!#$=$&$$$&0UL#$@$H!V0B?4$7$F:.:
$G$-$ke(B
US-ASCIIe$B!W$H!V$d$C$F$_$J$$$H$o$+$i$J$$e(BBINARYe$B!W$O6hJL$7$?J}$,e(B
e$B4r$7$$%1!<%9$,$"$k$+$b$7$l$^$;$s$M!#e(B

e$B;d$Oe(B 1.9 e$B$G$NJ8;zNs$Ne(B encoding e$B>pJs$N07$$$Ne(B
e$B6qBNE*$J$H$3$m$rM}2r$G$-$F$O$J$$$G$9$7!$e(B
encoding e$BJQ49$K$D$$$F$b87L)$K$OJ,$+$C$F$$$J$$$N$G$9$,!$e(B
Ruby/Tk e$B$G$N%1!<%9$K$D$$$F>/!9!%e(B

Tk e$B$G$O!$%^%k%A%P%$%HJ8;zNs$r07$&>l9g$K$Oe(B utf-8
e$B$GEO$9I,MW$,$"$j$^$9$7!$e(B
e$B5U$KEO$5$l$k>l9g$be(B utf-8 e$B$GJV$5$l$^$9!%e(B
e$B$G$9$,!$KhEY$NJQ49$rI,MW$H$9$k$N$O%f!<%6$K$H$C$FITJX$G$9!%e(B
e$B$=$N$?$a!$e(B$KCODE e$B$NCM$d%7%9%F%`$N%G%U%)%k%H$Ne(B encoding
e$B$NCM$K4p$E$$$Fe(B
e$B<+F0E*$Ke(B (e$B0-$/8@$($P>!<j$Ke(B)
e$BAj8_$NJQ49$r$+$1$k$h$&$K$7$F$$$^$9!%e(B
e$B$J$*JQ49$K:]$7$F$O!$e(BTk e$B$,4|BT$7$F$$$kJQ49$,9T$o$l$k$h$&$K!$e(B
Tcl/Tk e$B>e$Ne(B encoding e$BJQ494X?t$r;H$&$h$&$K$7$F$$$^$9!%e(B

e$B3F<oe(B encoding e$B$r:.:_$5$;$F07$&>l9g$K$Oe(B
e$BJ8;zNs$Ne(B encoding e$B$rL@<($9$kI,MW$,$"$j$^$9$,!$e(B
e$B$=$N>l9g$Oe(B @encoding e$B$H$$$&%$%s%9%?%s%9JQ?t$r;}$D$+$I$&$+$Ge(B
e$BH=Dj$9$ke(B (e$BCM$,$=$NJ8;zNs$Ne(B encoding e$BL>$H$9$ke(B)
e$B$h$&$K$7$F$$$^$9!%e(B

e$BDL>o$O$3$l$G$&$^$/5!G=$9$k$N$G$9$,!$e(B
e$B%$%a!<%8%G!<%?$J$I$N%P%$%J%j%G!<%?$r07$&>l9g$,LdBj$G$9!%e(B

‘ascii’ e$B$K$7$F$*$1$PLdBj$,$J$$$+$H;W$$$-$de(B

irb(main):003:0> $KCODE
=> “NONE”
irb(main):004:0> Tk.encoding
=> “euc-jp”
irb(main):005:0> ‘e$B4A;ze(Basdf’
=> “\264\301\273\372asdf”
irb(main):006:0> TkComm._toUTF8(‘e$B4A;ze(Basdf’)
=> “\346\274\242\345\255\227asdf”
irb(main):007:0> TkComm._toUTF8(‘e$B4A;ze(Basdf’,‘euc-jp’)
=> “\346\274\242\345\255\227asdf”
irb(main):008:0> TkComm._toUTF8(‘e$B4A;ze(Basdf’,‘ascii’)
=> “\302\264\303\201\302\273\303\272asdf”

e$B$H$$$&$h$&$K!$JQ49$5$l$F$7$^$$$^$9!%e(B

e$B$3$NNc$Oe(B Linux e$B>e$J$N$G!$%G%U%)%k%H$G$Ne(B Tk.encoding

e$B$Oe(B

‘euc-jp’ e$B$K$J$C$F$^$9$,!$F|K\8le(B WinXP e$B>e$G$Oe(B ‘cp932’

e$B$H$J$j$^$9!%e(B

e$BAj8_JQ49$G8+$F$_$k$H$3$s$J46$8$K$J$C$F$7$^$$$^$9!%e(B

irb(main):004:0> TkComm._fromUTF8(TkComm._toUTF8(‘e$B4A;ze(Basdf’))
=> “\264\301\273\372asdf”
irb(main):005:0> TkComm._fromUTF8(TkComm._toUTF8(‘e$B4A;ze(Basdf’,
‘euc-jp’), ‘euc-jp’)
=> “\264\301\273\372asdf”
irb(main):006:0> TkComm._fromUTF8(TkComm._toUTF8(‘e$B4A;ze(Basdf’,
‘euc-jp’), ‘ascii’)
=> “??asdf”
irb(main):007:0> TkComm._fromUTF8(TkComm._toUTF8(‘e$B4A;ze(Basdf’,
‘ascii’), ‘ascii’)
=> “???asdf”
irb(main):008:0> TkComm._fromUTF8(TkComm._toUTF8(‘e$B4A;ze(Basdf’,
‘ascii’), ‘euc-jp’)
=> “\241\255???asdf”

e$B$=$3$G!$e(BTk.encoding_names e$B$K$O4^$^$l$J$$e(B ‘binary’
e$B$,;XDj$5$l$?>l9g$Oe(B
e$BJQ49$r9T$o$J$$$h$&$K$9$k$3$H$G!$e(B

irb(main):002:0> TkComm._toUTF8(‘e$B4A;ze(Basdf’,‘binary’)
ArgumentError: unknown encoding name ‘binary’
from /usr/local/lib/ruby/1.8/tk.rb:638:in _toUTF8' from /usr/local/lib/ruby/1.8/tk.rb:638:in _toUTF8’
from (irb):2
irb(main):003:0>
TkComm._toUTF8(Tk::EncodedString.new(‘e$B4A;ze(Basdf’,‘binary’))
=> “\264\301\273\372asdf”
irb(main):004:0> TkComm._toUTF8(Tk::BinaryString.new(‘e$B4A;ze(Basdf’))
=> “\264\301\273\372asdf”
irb(main):005:0> s = Tk::BinaryString.new(‘e$B4A;ze(Basdf’)
=> “\264\301\273\372asdf”
irb(main):006:0> s.encoding
=> “binary”
irb(main):007:0> TkComm._toUTF8(s)
=> “\264\301\273\372asdf”

e$B$H$$$&$h$&$K!$LdBj$,=P$J$$$h$&$K$7$F$$$^$9!%e(B

1.9 e$B$GJ8;zNs$,e(B encoding e$B>pJs$r;}$D$J$i$P!$e(B
Ruby/Tk e$B$G$b$=$l$rMQ$$$k$3$H$K$J$k$O$:$G$9!%e(B
encoding e$B$rL@<($7$J$$%1!<%9$Ge(B encoding
e$B$,$I$N$h$&$K07$o$l$k$N$+e(B
e$BJ,$+$C$F$$$J$$$N$G$9$,!$$H$/$K;XDj$,$J$$>l9g$K$Oe(B US-ASCII
e$B$H$J$k$J$i$P!$e(B
Ruby/Tk e$B$G$Oe(B Tk e$B$N%7%9%F%`%G%U%)%k%HCM$rMQ$$$Fe(B
e$B=>Mh$HF1MM$K<+F0JQ49$r9T$&$h$&$K$9$k$H;W$$$^$9!%e(B

e$B8=<B$K!$:#8=:_$O$=$l$G%f!<%6$NMxJX@-$r3NJ]$G$-$F$$$k$H;W$&$N$G!%e(B

e$B$=$N:]!$e(BUS-ASCII e$B$He(B BINARY
e$B$H$N6hJL$,$G$-$J$$$N$G$"$l$P!$e(B
e$B$;$C$+$/e(B m17n e$B2=$5$l$k$K$b$+$+$o$i$:e(B
Ruby/Tk e$B$GFCJL$JBP1~$r9T$o$6$k$rF@$J$/$J$j!$e(B
e$BLLE]$J$3$H$K$J$j$=$&$K;W$$$^$9!%e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:32003] Re: multibyte string/regex literal with
escape sequence”
on Wed, 10 Oct 2007 09:50:32 +0900, “U.Nakamura”
[email protected] writes:

|In message “[ruby-dev:31987] Re: multibyte string/regex literal with escape sequence”
| on Oct.09,2007 17:47:06, [email protected] wrote:
|| e$B$=$l$r!V%P%$%J%j!W$H8F$V$V$s$K$O9=$$$^$;$s$,!“$=$l$,!V$o$+e(B
|| e$B$k!W!”!V6hJL$G$-$k!W$3$H$K$I$s$J4r$7$5$,$“$j$^$9$+!#e(B
|
|e$B!V$o$+$k!W$3$H$O$=$l<+BN4r$7$$$3$H$@$H;W$&$N$G$9$,!”$=$l$8$ce(B
|e$B<e$$$H$$$&$3$H$G$9$+$M!#e(B

e$B$J$K$,!V$o$+$k!W$+$,LdBj$G$9!#e(BUS-ASCIIe$B$He(BASCII-8BITe$B$r6hJL$9$le(B
e$B$P$=$l$G$$$$$N$+$H$$$&$3$H$G$9$M!#8=;~E@$@$H1i;;$G$-$k$+$I$&e(B
e$B$+$NH=Dj$K$O%(%s%3!<%G%#%s%0$G$O$J$/!"e(B7bite$B$NHO0OFb$7$+e(B
e$B;H$C$F$J$$$+$I$&$+$H$$$&e(B(e$BI=$K=P$F$$$J$$e(B)e$B>pJs$r;H$C$F$$$k$N$Ge(B
e$B$9$h!#$N$G!"e(BASCIIe$B$He(BBINARYe$B$rJ,N%$9$k$3$H$GLdBj$O2r7h$7$J$$$Ne(B
e$B$G$O$J$$$+$H;W$$$^$9!#e(B

e$B$3$N!VI=$K=P$F$$$J$$>pJs!W$rI=$K=P$9$3$H$K$D$$$F$Oe(B(e$BE,@Z$JL>e(B
e$BA0$,7h$^$l$Pe(B)e$B9=$o$J$$$H;W$$$^$9!#e(B

|e$B0l$D$Oe(B[ruby-list:31994]e$B$G$^$D$b$H$5$s$,$$C$7$c$C$F$$i$l$kE@e(B
|e$B$,$"$j$^$9!#e(B

ruby-deve$B$G$9$M!#e(B

|e$B$^$?!“!Ve(BUS-ASCIIe$B!W$H$$$&!”$$$+$K$be(B7bite$B$J%G!<%?Ns$G$9!“$H$$$&e(B
|e$BJ70O5$$NL>A0$KOG$o$5$l$:$K:Q$`!”$H$$$&4r$7$5$,$"$k$h$&$J5$$,e(B
|e$B$7$^$9!#e(B

e$B$=$l$O3N$+$K!#e(BUS-ASCIIe$B$Oe(BASCII-8BITe$B$KL>>NJQ99$7$^$9!#e(B

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:31987] Re: multibyte string/regex literal with
escape sequence”
on Oct.09,2007 17:47:06, [email protected] wrote:
| e$B$=$l$r!V%P%$%J%j!W$H8F$V$V$s$K$O9=$$$^$;$s$,!“$=$l$,!V$o$+e(B
| e$B$k!W!”!V6hJL$G$-$k!W$3$H$K$I$s$J4r$7$5$,$"$j$^$9$+!#e(B

e$B!V$o$+$k!W$3$H$O$=$l<+BN4r$7$$$3$H$@$H;W$&$N$G$9$,!“$=$l$8$ce(B
e$B<e$$$H$$$&$3$H$G$9$+$M!#e(B
e$B0l$D$Oe(B[ruby-list:31994]e$B$G$^$D$b$H$5$s$,$$C$7$c$C$F$$i$l$kE@e(B
e$B$,$”$j$^$9!#e(B

e$B$^$?!“!Ve(BUS-ASCIIe$B!W$H$$$&!”$$$+$K$be(B7bite$B$J%G!<%?Ns$G$9!“$H$$$&e(B
e$BJ70O5$$NL>A0$KOG$o$5$l$:$K:Q$`!”$H$$$&4r$7$5$,$“$k$h$&$J5$$,e(B
e$B$7$^$9!#e(B
e$B$”$H!"e(BUS-ASCIIe$B$J$i$^$@$$$$$N$+$b$7$l$J$$$N$G$9$,!“Nc$($PL@$ie(B
e$B$+$Ke(BEUC-JPe$B$G$O$J$$J8;zNs%j%F%i%k$,e(BEUC-JPe$B$K$J$k!”$H$+$$$&$N$Oe(B
e$B$+$J$j7y$J5$$,$7$^$9!#e(B

e$BG0$N$?$a$K8@$&$H!":#$O$3$&$G$9!#e(B

e$B$G!“e(Bencodinge$B>e6hJL$,$”$k$H$7$?$i!“$H$$$&A0Ds$G!”$3$Ne(B(1)e$B!Ae(B(3)
e$B$NOC$,=P$F$/$k$o$1$G$9$,!"$3$Ne(B(1)e$B$de(B(2)e$B$H$$$&$N$O!“e(Brubye$B$,<+F0e(B
e$BE*$K$”$kJ8;zNs$r%P%$%J%j$H$_$J$9$H$7$?$i$I$&$d$C$FH=CG$9$k$+!"e(B
e$B$H$$$&0F$K$J$j$^$9!#e(B

(1)e$B$O%j%F%i%k$NOC$G$9$+$i!“%9%/%j%W%H$r=q$$$F$k?M$,0U?^E*$KJ8e(B
e$B;zNsCf$K%(%9%1!<%W%7!<%1%s%9$Ge(B8bite$B$J%G!<%?$rA^F~$7$F$$$k$3$He(B
e$B$,L@3N$J$o$1$G!”$G$"$l$P$3$l$r%P%$%J%j$H$_$J$9$N$O<+A3$J2r<ae(B
e$B$H9M$($i$l$k$N$G$O$J$$$+$H;W$$$^$9!#e(B

(2)e$B$O!“%j%F%i%k$K$H$I$^$i$:e(Brubye$B$,<+F0E*$KH=CG$9$k$H$7$?$i!”$=e(B
e$B$3$GH=CG$9$k$s$8$c$J$$$+$H$$$&OC!#e(B

(3)e$B$NJ}$O!“$=$s$J<+F0E*$J2r<a$h$j$O!”$h$jL@3N$K%9%/%j%W%H>e$Ge(B
e$B%P%$%J%j%j%F%i%k$G$“$k$3$H$rI=L@$7$?J}$,$$$$$H$$$&J}8~$G$9$M!#e(B
e$B$^$”!“e(B”…".force_encoding(e$B%P%$%J%je(B)
e$B$G$b$$$$$s$G$9$1$I!"D9$$e(B
e$B$7!#e(B

| e$B$3$l$O$“$j$($k$H;W$C$F$^$7$?!#$,!”$b$&$A$g$C$H%f!<%9%1!<%9$,e(B
| e$B$?$^$C$F$+$i9M$($h$&$H;W$C$F$^$7$?!#$I$&$$$&%1!<%9$G!V%P%$%Je(B
| e$B%j!WJ8;zNs$,4r$7$$$N$+$h$/$o$+$i$J$$$s$G$9$h$M!#3N$+$Ke(Bpacke$B$He(B
| e$B$+$G@8@.$5$l$kJ8;zNs$O%P%$%J%j$G$9$,!“e(BCRubye$B$G$O$$$:$l$K$7$F$be(B
| e$BFbItI=8=$KJQ49$,9T$o$l$J$$$N$G!”%P%$%J%j$rFCJL07$$$7$J$1$l$Pe(B
| e$B$J$i$J$$6ILL$O$=$l$[$IB?$/$J$$$HM=A[$7$F$$$^$9!#e(B

rubye$B%$%s%?%W%j%?$NFbIt5!9=$K%P%$%J%j$rFCJL07$$$7$F$b$i$*$&$He(B
e$B$$$&OC$O$[$H$s$I$J$$$@$m$&$H;W$$$^$9!#e(B
e$B%P%$%J%j$rFCJL07$$$7$?$$$N$O!"%9%/%j%W%H>e$Ge(Bencodinge$B$r85$K=he(B
e$BM}$rJQ$($?$$>l9g$8$c$J$$$G$7$g$&$+$M!#e(B

e$B$=$l$G$O!#e(B

e$B@.@%$G$9!#e(B

e$BF1$8A0Ds$+$i@5H?BP$N7kO@$,=P$kNc$H$7$F!"e(B

Tanaka A. wrote:

e$B$?$H$($Pe(B G e$B$H$$$&$b$N$r9M$($k$H!“e(BLATIN CAPITAL LETTER G e$B$H$$e(B
e$B$&J8;z$rI=8=$7$?$$$N$+e(B 0x47 e$B$H$$$&%P%$%H$rI=8=$7$?$$$N$+>o$Ke(B
e$B0U<1$9$kI,MW$,$”$j$^$9!#e(B

LATIN CAPITAL LETTER G e$B$OJ8;zNs$NMWAG!"e(B0x47
e$B$O%P%$%HNs$NMWAG$G$9$,!“e(BRuby
e$B$G$O$I$A$i$be(B String
e$B$NMWAG$G$”$k$H$$$&$N$O=EMW$JA0Ds$@$H;W$$$^$9!#e(B

e$B$?$H$($P!Ve(BGIF e$B%U%!%$%k$N@hF,e(B 3e$B%P%$%H$Oe(B GIF e$B$G$"$k!W$H$+!"e(B
e$B!Ve(BISO-2022-JP e$B$K$*$$$Fe(B JIS X 0208 1983 e$B$r;H$&$H$-$K$Oe(B
ESC $ B e$B$r;H$&!W$H$+!"e(BASCII e$B$K0MB8$7$?I=8=$O$h$/9T$o$l$^$9!#e(B

ASCII
e$BNs$O!"J8;zNsE*$K$b%P%$%HNsE*$K$bMQ$$$i$l$k$H$$$&$3$H$G$9$h$M!#e(B

ASCII e$B$H%P%$%J%j$r40A4$KJ,N%$9$k$H$$$&$3$H$O!"$3$N$h$&$JI=8=e(B
e$B$r%W%m%0%i%`>e$G9T$&$H$-$K$O$=$NI=8=$,e(B ASCII e$B$rA0Ds$H$7$?$be(B
e$B$N$G$"$k$3$H$rM[$K5-=R$7$J$1$l$P$J$i$J$$$3$H$r0UL#$7$^$9!#e(B

e$B%P%$%J%j$re(B ASCII e$B$+$iJ,N%$7$J$/$H$b!“e(BRuby e$B$Ne(B \x00-\x7F
e$B$O86B’e(B ASCII e$B$H$$e(B
e$B$&A0Ds$,4{$KB8:_$9$k$o$1$G!“M[$K5-=R$7$J$/$H$b!V$=$l!W$,e(B ASCII
e$B$G$”$k$3e(B
e$B$H$Oe(B Ruby
e$B$N%W%m%0%i%`$G$”$k;~E@$G0E$K5-=R$5$l$F$$$k$H9M$($^$9!#e(B

e$B$?$H$($P!"e(B/\AGIF/ =~ image e$B$H$+$OF0$+$J$$$o$1$G$9!#e(B
image e$B$,%P%$%J%j$H$9$l$P!"FbIt$Ke(B LATIN CAPITAL LETTER G e$B$H$$e(B
e$B$&J8;z$OB8:_$7F@$J$$$o$1$G$9$+$i!#e(B

e$B8=:$Ne(B Ruby e$B$N<BAu$K$D$$$F$J$i$P!“e(B image
e$B$,%P%$%J%j$H$9$l$P!”$H$$$&2>Dje(B
e$B$Oe(B image e$B$re(B UTF-8 e$B$J$je(B Shift_JIS
e$B$J$j$K$9$l$P%(%
%e%l!<%H$G$-$^$9$h$M!“e(B
e$BF0$-$^$9!#e(B
e$BJ8;zNs$H%P%$%HNs$rJLJ*$H$7$?@$3&$J$i$PF0$+$J$$$H$O;W$$$^$9$,!”!Ve(BASCII
e$B$He(B
BINARY
e$B$NJ,N%!W$O!VJ8;zNs$H%P%$%J%j$NJ,N%!W$H$OJLJ*$G$O$J$$$G$7$g$&$+!#e(B

e$BEDCf$5$s$N<gD%$O!“e(BASCII
e$B$O%P%$%J%j$NItJ,=89g$G$”$k$H$$$&$b$N$@$H;W$&$N$Ge(B
e$B$9$,!"e(BRuby M17N e$B$K$*$$$Fe(B ASCII e$B$OA4$F$Ne(B String
e$B2C;;!&%^%C%A%s%07O$NItJ,e(B
e$B=89g$H$J$C$F$$$^$9!#EDCf$5$s$,e(B ASCII
e$B$H%P%$%J%j$K$D$$$FE83+$7$?O@M}$O!"e(B
ASCII e$B$He(B UTF-8 e$B$G$b!"e(BASCII e$B$He(B Shift_JIS
e$B$H$G$bE83+$G$-$k$N$G$O$J$$$G$7$ge(B
e$B$&$+!#?^$K$9$k$H0J2<$N$h$&$J46$8$G$9$h$M!#e(B

e$B(#(!(!(!(!($e(B
e$B("e(BBINARY e$B("e(B
e$B(#(!(!(!(+(!(!(!(!(+(!(!(!($e(B
e$B("e(BUTF-8 e$B("e(BASCII e$B("e(BEUC-JPe$B("e(B
e$B(&(!(!(!((!(!(!(!((!(!(!(%e(B

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$B%P%$%J%j$re(B ASCII e$B$+$iJ,N%$7$J$/$H$b!“e(BRuby e$B$Ne(B \x00-\x7F e$B$O86B’e(B ASCII e$B$H$$e(B
e$B$&A0Ds$,4{$KB8:_$9$k$o$1$G!“M[$K5-=R$7$J$/$H$b!V$=$l!W$,e(B ASCII e$B$G$”$k$3e(B
e$B$H$Oe(B Ruby e$B$N%W%m%0%i%`$G$”$k;~E@$G0E$K5-=R$5$l$F$$$k$H9M$($^$9!#e(B

e$B$“$!!”$=$l$O$?$7$+$K!#e(B

e$B8=:$Ne(B Ruby e$B$N<BAu$K$D$$$F$J$i$P!“e(B image e$B$,%P%$%J%j$H$9$l$P!”$H$$$&2>Dje(B
e$B$Oe(B image e$B$re(B UTF-8 e$B$J$je(B Shift_JIS e$B$J$j$K$9$l$P%(%
%e%l!<%H$G$-$^$9$h$M!“e(B
e$BF0$-$^$9!#e(B
e$BJ8;zNs$H%P%$%HNs$rJLJ*$H$7$?@$3&$J$i$PF0$+$J$$$H$O;W$$$^$9$,!”!Ve(BASCII e$B$He(B
BINARY e$B$NJ,N%!W$O!VJ8;zNs$H%P%$%J%j$NJ,N%!W$H$OJLJ*$G$O$J$$$G$7$g$&$+!#e(B

e$B$($'$H!“:.Mp$7$?$N$G$9$,!”$*$=$i$/;d$,8e<T$K$D$$$F=R$Y$F$$$ke(B
e$B$H9M$($i$l$?$N$G$9$Me(B? e$B$=$&A[Dj$7$F=q$-$^$9$,;d$OJ8;zNs$H%P%$e(B
e$B%J%j$r0[$J$k%/%i%9$K$9$k$3$H$O9M$($F$$$^$;$s!#e(B

e$B;d$,=R$Y$F$$$k$N$O%(%s%3!<%G%#%s%0$H$7$Fe(B ASCII-8BIT e$B$H$OJL$Ke(B
BINARY e$B$rF3F~$7$?$H$7$F!"e(Bimage e$B$,e(B String
e$B$N%$%s%9%?%s%9$Ge(B
image.encoding e$B$,e(B “BINARY” e$B$rJV$9>u672<$G!"e(B/\AGIF/ =~ image
e$B$,F0$+$J$$$@$m$&$H$$$&OC$G$9!#e(B

image.encoding e$B$,e(B “UTF-8” e$B$G$"$k$H$-$KF0$/$N$Oe(B UTF-8
e$B$,e(B
ASCII e$B8_49$@$+$i$G$9!#;d$Oe(B BINARY e$B$Oe(B ASCII
e$B8_49$G$J$$$HA[Dje(B
e$B$7$F$$$k$N$GF0$+$J$$$3$H$K$J$j$^$9!#e(B

e$B$J$!"e(BASCII e$B8_49$H$$$&$N$OFbItE$K$Oe(B rb_enc_asciicompat
e$B$G8!e(B
e$B::$5$l$^$9!#$$$^$=$N<BAu$rD/$a$?$H$3$me(B… e$B$&$%$`!"e(B
rb_enc_mbminlen(enc)==1 e$B$H$$$&<BAu$O$J$s$+e(B UTF-16 e$B$7$+A[Dj$7e(B
e$B$F$J$/$F!"e(BEBCDIC e$B$O9M$($F$J$5$=$&$G$9$M$'!#$^$!!"e(BBINARY
e$B$NF3e(B
e$BF~$KH<$C$F<BAu$OJQ99$5$l!"e(BBINARY e$B$K$D$$$F$b56$rJV$9$h$&$K$Je(B
e$B$k$HA[Dj$7$F$/$@$5$$!#e(B

e$BEDCf$5$s$N<gD%$O!“e(BASCII e$B$O%P%$%J%j$NItJ,=89g$G$”$k$H$$$&$b$N$@$H;W$&$N$Ge(B
e$B$9$,!"e(BRuby M17N e$B$K$*$$$Fe(B ASCII e$B$OA4$F$Ne(B String e$B2C;;!&%^%C%A%s%07O$NItJ,e(B
e$B=89g$H$J$C$F$$$^$9!#EDCf$5$s$,e(B ASCII e$B$H%P%$%J%j$K$D$$$FE83+$7$?O@M}$O!"e(B
ASCII e$B$He(B UTF-8 e$B$G$b!"e(BASCII e$B$He(B Shift_JIS e$B$H$G$bE83+$G$-$k$N$G$O$J$$$G$7$ge(B
e$B$&$+!#?^$K$9$k$H0J2<$N$h$&$J46$8$G$9$h$M!#e(B

ASCII e$B8_49$G$J$$$b$N$K$D$$$F$O0[$J$j$^$9!#e(B

ASCII e$B8_49$J%P%$%J%j$H$$$&$N$O$^$5$Ke(B ASCII-8BIT e$B$G!"@.@%$5$se(B
e$B$,A[Dj$7$F$$$k$b$N$O$=$l$@$H;W$$$^$9!#;d$,9M$($F$$$k$N$Oe(B
ASCII-8BIT e$B$H$O0[$J$k=c?h$J%P%$%J%j$H$$$&$b$N$NMxE@!&7gE@$Ge(B
e$B$9!#e(B

e$B@.@%$G$9!#e(B

Tanaka A. wrote:

e$B$H9M$($i$l$?$N$G$9$Me(B? e$B$=$&A[Dj$7$F=q$-$^$9$,;d$OJ8;zNs$H%P%$e(B
e$B%J%j$r0[$J$k%/%i%9$K$9$k$3$H$O9M$($F$$$^$;$s!#e(B

e$B;d$,=R$Y$F$$$k$N$O%(%s%3!<%G%#%s%0$H$7$Fe(B ASCII-8BIT e$B$H$OJL$Ke(B
BINARY e$B$rF3F~$7$?$H$7$F!"e(Bimage e$B$,e(B String e$B$N%$%s%9%?%s%9$Ge(B
image.encoding e$B$,e(B “BINARY” e$B$rJV$9>u672<$G!"e(B/\AGIF/ =~ image
e$B$,F0$+$J$$$@$m$&$H$$$&OC$G$9!#e(B

image.encoding e$B$,e(B “UTF-8” e$B$G$"$k$H$-$KF0$/$N$Oe(B UTF-8 e$B$,e(B
ASCII e$B8_49$@$+$i$G$9!#;d$Oe(B BINARY e$B$Oe(B ASCII e$B8_49$G$J$$$HA[Dje(B
e$B$7$F$$$k$N$GF0$+$J$$$3$H$K$J$j$^$9!#e(B

e$B$J!<$k$[$I!"$?$7$+$K$=$&$J$j$^$9$M!#e(B

e$B$J$!"e(BASCII e$B8_49$H$$$&$N$OFbItE$K$Oe(B rb_enc_asciicompat e$B$G8!e(B
e$B::$5$l$^$9!#$$$^$=$N<BAu$rD/$a$?$H$3$me(B… e$B$&$%$`!"e(B
rb_enc_mbminlen(enc)==1 e$B$H$$$&<BAu$O$J$s$+e(B UTF-16 e$B$7$+A[Dj$7e(B
e$B$F$J$/$F!"e(BEBCDIC e$B$O9M$($F$J$5$=$&$G$9$M$’!#$^$!!"e(BBINARY e$B$NF3e(B
e$BF~$KH<$C$F<BAu$OJQ99$5$l!"e(BBINARY e$B$K$D$$$F$b56$rJV$9$h$&$K$Je(B
e$B$k$HA[Dj$7$F$/$@$5$$!#e(B

e$BM}2r$7$^$7$?!#e(B

ASCII-8BIT e$B$H$O0[$J$k=c?h$J%P%$%J%j$H$$$&$b$N$NMxE@!&7gE@$Ge(B
e$B$9!#e(B

e$B$J$k$[$I!":#!Ve(BASCII-8BITe$B!W$H8F$P$l$F$$$k$b$N$O!":#$^$G!Ve(BUS-ASCIIe$B!W$NL>A0e(B
e$B$K0z$-$:$i$l$F!“e(B0x00-0x7F e$B$,%a%$%s$Ge(B 8bit
e$BItJ,$O$*$^$1$H$$$&G’<1$@$C$?$Ne(B
e$B$G$9$,!”$`$7$me(B ASCII e$B8_49%P%$%J%j$H2r<a$9$k$Y$-$J$N$G$9$M!#e(B

e$B$=$&$9$k$H!"!Ve(BUS-ASCIIe$B!W$H$N2r<a$G<BAu$5$l$?$H;W$o$l$kItJ,!“e(B[ruby-dev:
31715] e$B$N!Ve(B7bit ASCII e$B$N$_$+$i$J$kJ8;zNs$Oe(B
US-ASCIIe$B!W$H$$$&5!G=$O!”:#$He(B
e$B$J$C$F$O<c430cOB46$,$"$k$N$G$9$,$I$&$J$N$G$7$g$&!#e(B

In article [email protected],
“U.Nakamura” [email protected] writes:

e$B!V$o$+$k!W$3$H$O$=$l<+BN4r$7$$$3$H$@$H;W$&$N$G$9$,!"$=$l$8$ce(B
e$B<e$$$H$$$&$3$H$G$9$+$M!#e(B

e$B$7$P$i$/9M$($F$$$?$N$G$9$,!“!V$o$+$k!W$H$$$&$3$H$O!V0U<1$7$Je(B
e$B$1$l$P$J$i$J$$!W$H$$$&$3$H$G$b$”$j$^$9!#e(B

e$B$?$H$($Pe(B G e$B$H$$$&$b$N$r9M$($k$H!“e(BLATIN CAPITAL LETTER G
e$B$H$$e(B
e$B$&J8;z$rI=8=$7$?$$$N$+e(B 0x47 e$B$H$$$&%P%$%H$rI=8=$7$?$$$N$+>o$Ke(B
e$B0U<1$9$kI,MW$,$”$j$^$9!#e(B

e$B$=$l$r0U<1$9$Y$-$@!“$H$$$&<gD%$J$N$@$H$O;W$&$N$G$9$,!”$“$s$^e(B
e$B$j0U<1$7$J$$47=,$,$”$k$N$b$?$7$+$@$H;W$$$^$9!#e(B

e$B$?$H$($P!Ve(BGIF e$B%U%!%$%k$N@hF,e(B 3e$B%P%$%H$Oe(B GIF
e$B$G$"$k!W$H$+!"e(B
e$B!Ve(BISO-2022-JP e$B$K$*$$$Fe(B JIS X 0208 1983 e$B$r;H$&$H$-$K$Oe(B
ESC $ B e$B$r;H$&!W$H$+!"e(BASCII e$B$K0MB8$7$?I=8=$O$h$/9T$o$l$^$9!#e(B

ASCII e$B$H%P%$%J%j$r40A4$KJ,N%$9$k$H$$$&$3$H$O!“$3$N$h$&$JI=8=e(B
e$B$r%W%m%0%i%`>e$G9T$&$H$-$K$O$=$NI=8=$,e(B ASCII e$B$rA0Ds$H$7$?$be(B
e$B$N$G$”$k$3$H$rM[$K5-=R$7$J$1$l$P$J$i$J$$$3$H$r0UL#$7$^$9!#e(B

e$B$?$H$($P!"e(B/\AGIF/ =~ image e$B$H$+$OF0$+$J$$$o$1$G$9!#e(B
image e$B$,%P%$%J%j$H$9$l$P!"FbIt$Ke(B LATIN CAPITAL LETTER G e$B$H$$e(B
e$B$&J8;z$OB8:_$7F@$J$$$o$1$G$9$+$i!#e(B

e$B$=$l$G$b$=$l$,@5$7$$$H$$$&<gD%$O$?$7$+$K$"$jF@$k$H;W$$$^$9$7!“e(B
e$BHse(B ASCII e$B4D6-e(B (EBCDIC e$B$H$+e(B)
e$B$KBP$7$F$b%]!<%?%V%k$K$J$kMxE@$Oe(B
e$B9M$($i$l$k$H$O;W$&$N$G$9$,!”$=$l$C$Fe(B ASCII e$B$rA0Ds$K$7$?I=8=e(B
e$B$N47=,$r<N$F$k$[$I4r$7$$$b$N$J$s$G$9$+$Me(B?

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$B$J$k$[$I!“:#!Ve(BASCII-8BITe$B!W$H8F$P$l$F$$$k$b$N$O!”:#$^$G!Ve(BUS-ASCIIe$B!W$NL>A0e(B
e$B$K0z$-$:$i$l$F!“e(B0x00-0x7F e$B$,%a%$%s$Ge(B 8bit e$BItJ,$O$*$^$1$H$$$&G’<1$@$C$?$Ne(B
e$B$G$9$,!”$`$7$me(B ASCII e$B8_49%P%$%J%j$H2r<a$9$k$Y$-$J$N$G$9$M!#e(B

US-ASCII e$B$O$=$N$&$AJL8D$KDj5A$9$k$H$$$&$3$H$G!#e(B

e$B$=$&$9$k$H!“!Ve(BUS-ASCIIe$B!W$H$N2r<a$G<BAu$5$l$?$H;W$o$l$kItJ,!“e(B[ruby-dev:
31715] e$B$N!Ve(B7bit ASCII e$B$N$_$+$i$J$kJ8;zNs$Oe(B US-ASCIIe$B!W$H$$$&5!G=$O!”:#$He(B
e$B$J$C$F$O<c430cOB46$,$”$k$N$G$9$,$I$&$J$N$G$7$g$&!#e(B

e$B$3$l$OJ8;zNs$N0UL#$,6I=jE*$K7h$^$k$N$GNI$$$3$H$@$H;W$C$F$$$^$9!#e(B

e$B$“$k;~E@$^$Ge(B ASCII e$B$G=q$$$F$$$?%=!<%9$G!”$“$k$H$-e(B UTF-8
e$B$NJ8e(B
e$B;zNs%j%F%i%k$r=q$$$?$H$-!”$=$N%U%!%$%kCf$NB>$N$9$Y$F$NJ8;zNse(B
e$B%j%F%i%k$be(B UTF-8 e$B$K$J$k!“$H$$$&$N$O$A$g$C$H%J%K$@$J!”$H;W$$e(B
e$B$^$9!#e(B

e$B$=$&$J$C$F$bLdBj$J$/F0$/$O$:$@!“$H$$$&0U8+$b$”$k$N$G$9$,!"M[e(B
e$B$Ke(B encoding e$B%a%=%C%I$r8F$S=P$7$?7k2L$r;H$C$F$$$k$H$=$&$H$b8Be(B
e$B$i$J$$$N$G!"e(BASCII-8BIT e$B$K7h$a$F$*$$$?$[$&$,:$$i$J$$$s$8$c$Je(B
e$B$$$+$H8+9~$s$G$$$^$9!#e(B

US-ASCII e$B$,Dj5A$5$l$l$P!“e(B(e$B$<$s$Ve(B 7bit
e$B$N%1!<%9$K$D$$$F$Oe(B)
ASCII-8BIT e$B$G$b$J$/$Fe(B US-ASCII e$B$K$9$k$H$$$&2DG=@-$b=P$F$/$ke(B
e$B$+$b$7$l$^$;$s!#$^$!!”$=$&$7$?$H$7$F$be(B “\x80” e$B$_$?$$$J$N$Oe(B
ASCII-8BIT e$B$G$7$g$&$1$I!#e(B

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:32042] Re: multibyte string/regex literal with
escape sequence”
on Oct.13,2007 01:54:44, [email protected] wrote:
| > e$B!V$o$+$k!W$3$H$O$=$l<+BN4r$7$$$3$H$@$H;W$&$N$G$9$,!“$=$l$8$ce(B
| > e$B<e$$$H$$$&$3$H$G$9$+$M!#e(B
|
| e$B$7$P$i$/9M$($F$$$?$N$G$9$,!”!V$o$+$k!W$H$$$&$3$H$O!V0U<1$7$Je(B
| e$B$1$l$P$J$i$J$$!W$H$$$&$3$H$G$b$“$j$^$9!#e(B
|
| e$B$?$H$($Pe(B G e$B$H$$$&$b$N$r9M$($k$H!“e(BLATIN CAPITAL LETTER G e$B$H$$e(B
| e$B$&J8;z$rI=8=$7$?$$$N$+e(B 0x47 e$B$H$$$&%P%$%H$rI=8=$7$?$$$N$+>o$Ke(B
| e$B0U<1$9$kI,MW$,$”$j$^$9!#e(B
|
| e$B$=$l$r0U<1$9$Y$-$@!”$H$$$&<gD%$J$N$@$H$O;W$&$N$G$9$,!“$”$s$^e(B
| e$B$j0U<1$7$J$$47=,$,$"$k$N$b$?$7$+$@$H;W$$$^$9!#e(B

e$B$J$s$+4{$K@.@%$5$s$+$i0U8+$,=P$FOC$,?J$s$G$k$h$&$G$9$,!“;d$,e(B
e$B!V%P%$%J%j!W$H$$$&8@MU$G0U?^$7$F$$$?$N$O!”:#$Ne(BASCII-8BITe$B$H$$e(B
e$B$&L>A0$G<($5$l$F$$$k$b$N$G$7$?!#e(B
e$B$D$^$j!"2?$,!V$o$+$k!W$H4r$7$+$C$?$N$+$H$$$&$H!"e(B

  • e$B$"$kJ8;zNs$Ke(B8bite$BL$,N)$C$F$$$k%G!<%?$,4^$^$l$F$$$k$3$He(B
  • e$B$7$+$7$=$l$O8=:_%G%U%)%k%H$H$J$C$F$$$k%(%s%3!<%G%#%s%0$Ne(B
    e$BJ8;zNs$H$$$&$o$1$G$O$J$$$H$$$&$3$He(B
    e$B$,!V$o$+$k!W$H4r$7$$!"$H$$$&0UL#$G$7$?!#e(B

e$B$"$NOC$r=P$7$?A08e$G$O!"e(B

  • US-ASCIIe$B$H$$$&%(%s%3!<%G%#%s%0L>$NJ8;zNs$,e(B8bite$B%G!<%?$r4^e(B
    e$B$`$3$H$,$"$j!"e(B
  • US-ASCIIe$B$rL>>h$kJ8;zNs$,Aj8_$KO"7k$G$-$J$$$3$H$,$"$k$,%(e(B
    e$B%s%3!<%G%#%s%0L>$+$i$O$=$l$r;vA0$KCN$kJ}K!$,$J$/!"e(B
  • e$B$5$i$KOC$NN.$l$K$h$C$F$Oe(B8bite$B%G!<%?$r4^$`J8;zNs$O%G%U%)%ke(B
    e$B%H$N%(%s%3!<%G%#%s%0e(B(e$BNc$($Pe(BEUC-JPe$B$H$+e(B)e$B$H$7$F07$o$l$kJ}8~e(B
    e$B$K$J$k$+$b$7$l$J$+$C$?e(B
    e$B$H$$$&>u67$G$7$?!#e(B

e$B$^$@$$$m$$$m$I$&$J$k$+$o$+$C$F$J$$$s$G$9$,!“$H$j$”$($:;d$,7|e(B
e$BG0$7$F$$$?E@$K4X$7$F$O!“e(BASCII-8BITe$B$H$$$&%(%s%3!<%G%#%s%0L>$,e(B
e$BF3F~$5$l$?$3$H$H!”:#8ee(BUS-ASCIIe$B$,JL$N7A$GDj5A$5$l$=$&$@$H$$$&e(B
e$B$3$H$+$i!“:#$O$”$s$^$jIT0B;k$O$7$F$$$^$;$s!#e(B

| ASCII e$B$H%P%$%J%j$r40A4$KJ,N%$9$k$H$$$&$3$H$O!“$3$N$h$&$JI=8=e(B
| e$B$r%W%m%0%i%`>e$G9T$&$H$-$K$O$=$NI=8=$,e(B ASCII e$B$rA0Ds$H$7$?$be(B
| e$B$N$G$”$k$3$H$rM[$K5-=R$7$J$1$l$P$J$i$J$$$3$H$r0UL#$7$^$9!#e(B
|
| e$B$?$H$($P!"e(B/\AGIF/ =~ image e$B$H$+$OF0$+$J$$$o$1$G$9!#e(B
| image e$B$,%P%$%J%j$H$9$l$P!“FbIt$Ke(B LATIN CAPITAL LETTER G e$B$H$$e(B
| e$B$&J8;z$OB8:_$7F@$J$$$o$1$G$9$+$i!#e(B
|
| e$B$=$l$G$b$=$l$,@5$7$$$H$$$&<gD%$O$?$7$+$K$”$jF@$k$H;W$$$^$9$7!“e(B
| e$BHse(B ASCII e$B4D6-e(B (EBCDIC e$B$H$+e(B) e$B$KBP$7$F$b%]!<%?%V%k$K$J$kMxE@$Oe(B
| e$B9M$($i$l$k$H$O;W$&$N$G$9$,!”$=$l$C$Fe(B ASCII e$B$rA0Ds$K$7$?I=8=e(B
| e$B$N47=,$r<N$F$k$[$I4r$7$$$b$N$J$s$G$9$+$Me(B?

e$B$^$5$K$=$NEDCf$5$s$HF1$87|G0$+$i!"e(BASCII-8BITe$B$H$OJL$Ne(BBINARYe$B$He(B
e$B$$$&%(%s%3!<%G%#%s%0$rF3F~$9$kI,MW$O$J$$$N$G$O$J$$$+$H;W$$$^e(B
e$B$9!#e(B

e$B$I$&$b<+J,$N<gD%$K6Z$,DL$C$F$J$$5$$b$9$ke(B

e$B$=$l$G$O!#e(B