[Feature #2102] String#inspect as default_internal encoding

Feature #2102: String#inspect as default_internal encoding
http://redmine.ruby-lang.org/issues/show/2102

e$B5/I<<Te(B: Yui NARUSE
e$B%9%F!<%?%9e(B: Open, e$BM%@hEYe(B: Normal
e$B%+%F%4%je(B: M17N

String#inspect
e$B$N7k2L$OFCDj$N%(%s%3!<%G%#%s%0$KB7$($k$h$&$K$7$^$;$s$+!#e(B

e$B8=:$Ne(B inspect
e$B$O0[$J$k%(%s%3!<%G%#%s%0$r;}$DJ8;zNs$G$b2?$b9M$($:7k9g$r;n$
!"e(B
e$B7k2Le(B EncodingCompatibilityError
e$B$,>e$,$C$?$H$7$F$b5$$K$7$J$$!"e(B
e$B$H$$$&$b$N$K$J$C$F$$$^$9!#e(B

e$B$7$+$7!“e(Binspect e$B$Oe(B irb e$B$de(B p
e$B$J$I!”$H$j$"$($:%*%V%8%'%/%H$NCf?H$r354Q$7$?$$!"e(B
e$B$H$$$&;~$K;H$o$l$k$b$N$J$N$K!“0[$J$k%(%s%3!<%G%#%s%0$,$”$k$/$i$$$G!"e(B
e$BNc30$K$J$C$F$7$^$&$O@5D>ITJX$G$9!#e(B

e$BE:IU$N%Q%C%A$G$O!"e(B

  • default_internal e$B$,@_Dj$5$l$F$$$l$P$=$l$r!“@_Dj$5$l$F$$$J$1$l$Pe(B
    default_external e$B$rMQ$$$k!#e(B
    e$B$?$@$7!”$=$N%(%s%3!<%G%#%s%0$,e(B ASCII compatible
    e$B$G$J$$>l9g$Oe(B US-ASCII e$B$rMQ$$$k!#e(B
    (e$B0J2<e(B inspect e$B$N%(%s%3!<%G%#%s%0$H8F$Ve(B)
  • String#inspect e$B$N7k2L$O!"$=$Ne(B String
    e$B$N%(%s%3!<%G%#%s%0$,!"e(B
    inspect e$B$N%(%s%3!<%G%#%s%0$HF1$8>l9g$O$3$l$^$G$HF1MM!#e(B
  • e$B0[$J$k>l9g!"e(BString e$BFb$NHse(B US-ASCII e$BJ8;z$Oe(B \xXX
    e$B7A<0$G%(%9%1!<%W$9$k!#e(B
  • String e$B0J30$Ne(B inspect e$B$O$3$l$^$G$HF1MM!#e(B
    e$B$H$7$F$$$^$9!#e(B
    e$B$3$l$K$h$j!"e(Binspect
    e$B7k2L$N%(%s%3!<%G%#%s%0$,0lDj$K$J$k$N$GNc30$,>e$,$k$3$H$,$J$/$J$j$^$9!#e(B

e$BF0:n$NNc$r<($9$H!"e(Binspect e$B$N%(%s%3!<%G%#%s%0$,e(B UTF-8
e$B$N>l9g!"e(B
"e$B$“e(B”.encode(“UTF-16BE”).inspect

before => “0B”

after => “\x30\x42”

“e$B$$e(B”.encode(“UTF-8”).inspect

before => “e$B$$e(B”

after => “e$B$$e(B”

“e$B$&e(B”.encode(“EUC-JP”).inspect

before => “” (e$BCme(B: EUC-JP e$B$G@8$N!V$&!We(B)

after => “\xA4\xA6”

["e$B$“e(B”.encode(“UTF-16BE”), “e$B$$e(B”.encode(“UTF-8”),
“e$B$&e(B”.encode(“EUC-JP”)].inspect

before=> EncodingCompatibilityError

after => [“\x30\x42”, “e$B$$e(B”, “\xA4\xA6”]

e$B$I$&$G$7$g$&$+!)e(B

diff --git a/string.c b/string.c
index aa36c37…b8d862c 100644
— a/string.c
+++ b/string.c
@@ -1739,6 +1739,12 @@ str_buf_cat(VALUE str, const char *ptr, long len)
return str;
}

+static VALUE
+str_buf_cat2(VALUE str, const char *ptr)
+{

  • return str_buf_cat(str, ptr, strlen(ptr));
    +}

VALUE
rb_str_buf_cat(VALUE str, const char *ptr, long len)
{
@@ -4237,13 +4243,6 @@ str_cat_char(VALUE str, unsigned int c,
rb_encoding *enc)
rb_enc_str_buf_cat(str, s, n, enc);
}

-static void
-prefix_escape(VALUE str, unsigned int c, rb_encoding *enc)
-{

  • str_cat_char(str, ‘\’, enc);
  • str_cat_char(str, c, enc);
    -}

/*

  • call-seq:
  • str.inspect => string
    @@ -4262,10 +4261,13 @@ rb_str_inspect(VALUE str)
    rb_encoding *enc = STR_ENC_GET(str);
    char *p, *pend;
    VALUE result = rb_str_buf_new(0);
  • rb_encoding *resenc = rb_default_internal_encoding();
  • if (resenc == NULL) resenc = rb_default_external_encoding();
  • if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
  • rb_enc_associate(result, resenc);
  • str_buf_cat2(result, “"”);
  • if (!rb_enc_asciicompat(enc)) enc = rb_usascii_encoding();
  • rb_enc_associate(result, enc);
  • str_cat_char(result, ‘"’, enc);
    p = RSTRING_PTR(str); pend = RSTRING_END(str);
    while (p < pend) {
    unsigned int c, cc;
    @@ -4278,8 +4280,7 @@ rb_str_inspect(VALUE str)
    goto escape_codepoint;
    }
    n = MBCLEN_CHARFOUND_LEN(n);
  • c = rb_enc_codepoint_len(p, pend, &n, enc);
  • c = rb_enc_mbc_to_codepoint(p, pend, enc);
    p += n;
    if (c == ‘"’|| c == ‘\’ ||
    (c == ‘#’ &&
    @@ -4287,51 +4288,49 @@ rb_str_inspect(VALUE str)
    MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
    (cc = rb_enc_codepoint(p,pend,enc),
    (cc == ‘$’ || cc == ‘@’ || cc == ‘{’)))) {
  •  prefix_escape(result, c, enc);
    
  •  str_buf_cat2(result, "\\");
    
  •  str_buf_cat(result, p - n, n);
    
    }
    else if (c == ‘\n’) {
  •  prefix_escape(result, 'n', enc);
    
  •  str_buf_cat2(result, "\\n");
    
    }
    else if (c == ‘\r’) {
  •  prefix_escape(result, 'r', enc);
    
  •  str_buf_cat2(result, "\\r");
    
    }
    else if (c == ‘\t’) {
  •  prefix_escape(result, 't', enc);
    
  •  str_buf_cat2(result, "\\t");
    
    }
    else if (c == ‘\f’) {
  •  prefix_escape(result, 'f', enc);
    
  •  str_buf_cat2(result, "\\f");
    
    }
    else if (c == ‘\013’) {
  •  prefix_escape(result, 'v', enc);
    
  •  str_buf_cat2(result, "\\v");
    
    }
    else if (c == ‘\010’) {
  •  prefix_escape(result, 'b', enc);
    
  •  str_buf_cat2(result, "\\b");
    
    }
    else if (c == ‘\007’) {
  •  prefix_escape(result, 'a', enc);
    
  •  str_buf_cat2(result, "\\a");
    
    }
    else if (c == 033) {
  •  prefix_escape(result, 'e', enc);
    
  •  str_buf_cat2(result, "\\e");
    
    }
  • else if (rb_enc_isprint(c, enc)) {
  •  rb_enc_str_buf_cat(result, p-n, n, enc);
    
  • else if ((enc == resenc && rb_enc_isprint(c, enc)) ||
    rb_enc_isascii(c, enc)) {
  •  str_buf_cat(result, p-n, n);
    
    }
    else {
  •  char buf[5];
    
  •  char *s;
           char *q;
    
  • escape_codepoint:
    for (q = p-n; q < p; q++) {
  •            s = buf;
    
  •            sprintf(buf, "\\x%02X", *q & 0377);
    
  •            while (*s) {
    
  •                str_cat_char(result, *s++, enc);
    
  •            }
    
  •        }
    

+#define BACKESC_BUFSIZE 5

  • char buf[BACKESC_BUFSIZE];
  • sprintf(buf, “\x%02X”, *q & 0377);
  • str_buf_cat(result, buf, BACKESC_BUFSIZE - 1);
    +#undef BACKESC_BUFSIZE
  •  }
    
    }
    }
  • str_cat_char(result, ‘"’, enc);
  • str_buf_cat2(result, “"”);

    OBJ_INFECT(result, str);
    return result;

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:39343] [Feature #2102] String#inspect as
default_internal encoding”
on Wed, 16 Sep 2009 01:11:39 +0900, Yui NARUSE
[email protected] writes:

|String#inspect e$B$N7k2L$OFCDj$N%(%s%3!<%G%#%s%0$KB7$($k$h$&$K$7$^$;$s$+!#e(B

<e$BN,e(B>

e$B$=$NJ}$,JXMx$=$&$G$9$M!#%3%_%C%H$7$F$/$@$5$$!#e(B

e$B%A%1%C%He(B #2102 e$B$,99?7$5$l$^$7$?!#e(B (by Yui NARUSE)

e$B%9%F!<%?%9e(B Opene$B$+$ie(BClosede$B$KJQ99e(B

r25113e$B$GH?1G$7$^$7$?!#e(B

http://redmine.ruby-lang.org/issues/show/2102