--encoding affects script encoding

e$B$3$s$P$s$Oe(B sheepman e$B$G$9!#e(B

e$B0J2<$N$h$&$Ke(B–encoding e$B$Ke(B utf-16be
e$B$r;XDj$9$k$H%(%i!<$bI=<($5$l$:$Ke(B
e$B=*N;$7$^$9!#e(B

$ cat e.rb
p ENCODING

$ ruby-1.9 -v --encoding=utf-16be e.rb
ruby 1.9.0 (2008-03-01 revision 15239) [i686-linux]
(e$B2?$bI=<($5$l$:$K=*N;e(B)

shebang e$B$r8!::$9$kA0$K0lC6e(B IO e$B%(%s%3!<%G%#%s%0$re(B
e$B%;%C%H$7$F$*$/$Y$-e(B
e$B$J$s$@$H;W$$$^$9!#e(B

$ svn diff ruby.c
Index: ruby.c

— ruby.c (e$B%j%S%8%g%se(B 15664)
+++ ruby.c (e$B:n6H%3%T!<e(B)
@@ -1155,6 +1155,9 @@ load_file(VALUE parser, const char *fnam
int no_src_enc = !opt->src.enc.name;
int no_ext_enc = !opt->ext.enc.name;

  •   enc = rb_usascii_encoding();
    
  •   rb_funcall(f, rb_intern("set_encoding"), 1, 
    

rb_enc_from_encoding(enc));
+
if (opt->xflag) {
forbid_setid("-x");
opt->xflag = Qfalse;

e$B$J$+$@$G$9!#e(B

At Sun, 2 Mar 2008 00:31:01 +0900,
sheepman wrote in [ruby-dev:33955]:

shebang e$B$r8!::$9$kA0$K0lC6e(B IO e$B%(%s%3!<%G%#%s%0$re(B e$B%;%C%H$7$F$*$/$Y$-e(B
e$B$J$s$@$H;W$$$^$9!#e(B

e$B%3%_%C%H$7$^$7$?$,!"e(BIO#ungetce$B$G$N%(%s%3!<%G%#%s%0$N07$$$,H4$1$Fe(B
e$B$$$k$3$H$K5$$E$-$^$7$?!#e(B

Index: io.c

— io.c (revision 15671)
+++ io.c (working copy)
@@ -2430,4 +2430,5 @@ VALUE
rb_io_ungetc(VALUE io, VALUE c)
{

  • rb_encoding *enc;
    rb_io_t *fptr;

@@ -2437,11 +2438,15 @@ rb_io_ungetc(VALUE io, VALUE c)
if (FIXNUM_P©) {
int cc = FIX2INT©;

  • rb_encoding *enc = io_read_encoding(fptr);
    char buf[16];

  • c = rb_str_new(buf, rb_enc_mbcput(cc, buf, enc));

  • c = rb_str_new(buf, rb_enc_mbcput(cc, buf, io_read_encoding(fptr)));
  • enc = fptr->enc2;
    }
    else {
    SafeStringValue©;
  • enc = io_input_encoding(fptr);
  • }
  • if (enc && enc != rb_enc_get©) {
  • c = rb_funcall(c, id_encode, 1, rb_enc_from_encoding(enc));
    }
    io_ungetc(c, fptr);

In article [email protected],
Nobuyoshi N. [email protected] writes:

e$B%3%_%C%H$7$^$7$?$,!"e(BIO#ungetce$B$G$N%(%s%3!<%G%#%s%0$N07$$$,H4$1$Fe(B
e$B$$$k$3$H$K5$$E$-$^$7$?!#e(B

 else {

SafeStringValue(c);

  • enc = io_input_encoding(fptr);
  • }
  • if (enc && enc != rb_enc_get(c)) {
  • c = rb_funcall(c, id_encode, 1, rb_enc_from_encoding(enc));
    }
    io_ungetc(c, fptr);

encdetect e$B$r<BAu$9$k$K$O!“e(BIO e$B$+$i%P%$%HNs$GFI$_=P$7$?$b$N$re(B
e$B%P%$%HNs$H$7$Fe(B ungetc e$B$9$k$3$H$,I,MW$J$s$G$9$,!”$=$l$O$G$-$ke(B
e$B$G$7$g$&$+!#e(B

e$BJL%a%=%C%I$+$J$!e(B?

e$B$J$+$@$G$9!#e(B

At Mon, 3 Mar 2008 00:54:33 +0900,
Tanaka A. wrote in [ruby-dev:33960]:

 io_ungetc(c, fptr);

encdetect e$B$r<BAu$9$k$K$O!“e(BIO e$B$+$i%P%$%HNs$GFI$_=P$7$?$b$N$re(B
e$B%P%$%HNs$H$7$Fe(B ungetc e$B$9$k$3$H$,I,MW$J$s$G$9$,!”$=$l$O$G$-$ke(B
e$B$G$7$g$&$+!#e(B

e$B$=$l$O8=>u$G$b$G$-$J$$$G$9$M!#e(B?xe$B$be(BStringe$B$K$J$C$?$3$H$G$b$"$k$7!“e(B
0…255e$B$N@0?t$NJB$S$O%P%$%HNs$H$7$FLa$9!”$H$$$&$N$G$b$$$$$+$b!#e(B

e$BJL%a%=%C%I$+$J$!e(B?

getce$B$He(Bgetbytee$B$bJ,$1$?$3$H$r9M$($l$P!"$3$C$A$G$9$+$M$’!#e(B
ungetbytee$B$H$$$&$N$O$$$^$$$A$J5$$,$7$^$9$,!#e(B

Index: io.c

— io.c (revision 15671)
+++ io.c (working copy)
@@ -312,9 +312,7 @@ io_unread(rb_io_t *fptr)
}

-static void
-io_ungetc(VALUE str, rb_io_t *fptr)
+static char *
+io_unget_space(long len, rb_io_t *fptr)
{

  • int len = RSTRING_LEN(str);
  • if (fptr->rbuf == NULL) {
    fptr->rbuf_off = 0;
    @@ -337,5 +335,11 @@ io_ungetc(VALUE str, rb_io_t *fptr)
    fptr->rbuf_off-=len;
    fptr->rbuf_len+=len;
  • MEMMOVE(fptr->rbuf+fptr->rbuf_off, RSTRING_PTR(str), char, len);
  • return fptr->rbuf+fptr->rbuf_off;
    +}

+static void
+io_ungetc(const char *ptr, long len, rb_io_t *fptr)
+{

  • MEMMOVE(io_unget_space(len, fptr), ptr, char, len);
    }

@@ -680,10 +684,10 @@ io_fwrite(VALUE str, rb_io_t fptr)
/
Can’t use encode! because puts writes a frozen newline */
if (fptr->enc2) {

  •  str = rb_funcall(str, id_encode, 2,
    
  •  str = rb_funcall(str, id_encode, 2,
          rb_enc_from_encoding(fptr->enc2),
          rb_enc_from_encoding(fptr->enc));
    
    }
    else {
  •  str = rb_funcall(str, id_encode, 1,
    
  •  str = rb_funcall(str, id_encode, 1,
          rb_enc_from_encoding(fptr->enc));
    
    }
    @@ -1348,5 +1352,5 @@ io_enc_str(VALUE str, rb_io_t fptr)
    /
    two encodings, so transcode from enc2 to enc /
    /
    the methods in transcode.c are static, so call indirectly */
  • str = rb_funcall(str, id_encode, 2,
  • str = rb_funcall(str, id_encode, 2,
    rb_enc_from_encoding(fptr->enc),
    rb_enc_from_encoding(fptr->enc2));
    @@ -1896,5 +1900,5 @@ prepare_getline_args(int argc, VALUE *ar
    if (fptr->enc2) {
    VALUE rs2;
  •  rs2 = rb_funcall(rs, id_encode, 2,
    
  •  rs2 = rb_funcall(rs, id_encode, 2,
         rb_enc_from_encoding(fptr->enc2),
         rb_enc_from_encoding(fptr->enc));
    

@@ -2414,10 +2418,11 @@ rb_io_readbyte(VALUE io)

  • call-seq:
  • ios.ungetc(string)   => nil
    
    • ios.ungetc(byte)     => nil
      
    • ios.ungetc(byte_ary) => nil
      
    • Pushes back one character (passed as a parameter) onto
      ios,
    • such that a subsequent buffered read will return it. Only one
      character
    • may be pushed back before a subsequent read operation (that is,
    • you will be able to read only the last of several characters that
      have been pushed
    • back). Has no effect with unbuffered reads (such as
      IO#sysread).
    • Pushes back character string or byte sequence (passed as a
    • parameter) onto ios, such that a subsequent buffered read
    • will return it. Has no effect with unbuffered reads (such as
    • IO#sysread).
    • f = File.new("testfile")   #=> #<File:testfile>
      

@@ -2425,25 +2430,89 @@ rb_io_readbyte(VALUE io)

  • f.ungetc(c)                #=> nil
    
  • f.getc                     #=> "8"
    
    • f.ungetc(0x39, 10)         #=> nil
      
    • f.gets                     #=> "9\n"
      
    */

VALUE
-rb_io_ungetc(VALUE io, VALUE c)
+rb_io_unget(int argc, VALUE *argv, VALUE io)
{
rb_io_t *fptr;

  • VALUE c;

    GetOpenFile(io, fptr);
    rb_io_check_readable(fptr);

  • if (argc < 1) return Qnil;

  • if (argc > 1) { /* expects byte array */

  • /* use temporary buffer to get rid of race condition */

  • volatile VALUE str = rb_str_tmp_new(argc);

  • char *ptr = RSTRING_PTR(str);

  • int i;

  • for (i = 0; i < argc; ++i) {

  •  c = argv[i];
    
  •  ptr[i] = (char)NUM2INT(c);
    
  • }

  • io_ungetc(ptr, argc, fptr);

  • rb_str_resize(str, 0);

  • return Qnil;

  • }

  • c = argv[0];
    if (NIL_P©) return Qnil;
    if (FIXNUM_P©) {

  • int cc = FIX2INT©;
  • rb_encoding *enc = io_read_encoding(fptr);
  • char buf[16];
  • char cc = (char)FIX2INT©;
  • c = rb_str_new(buf, rb_enc_mbcput(cc, buf, enc));
  • io_ungetc(&cc, 1, fptr);
    }
    else {
  • rb_encoding *enc = io_input_encoding(fptr);
    SafeStringValue©;
  • if (rb_enc_get_index© && enc != rb_enc_get©) {
  •  c = rb_funcall(c, id_encode, 1, rb_enc_from_encoding(enc));
    
  • }
  • io_ungetc(RSTRING_PTR©, RSTRING_LEN©, fptr);
  • RB_GC_GUARD©;
  • }
  • return Qnil;
    +}

+VALUE
+rb_io_ungetc(VALUE io, VALUE c)
+{

  • VALUE ary = rb_check_array_type©;
  • if (!NIL_P(ary)) {
  • rb_io_unget(RARRAY_LEN(ary), RARRAY_PTR(ary), io);
  • RB_GC_GUARD(ary);
    }
  • io_ungetc(c, fptr);
  • else {
  • rb_io_unget(1, &c, io);
  • }
  • return Qnil;
    +}

+/*

    • call-seq:
    • ios.ungetbyte(fixnum)   => nil
      
    • Pushes back one character (passed as a parameter) onto
    • ios, such that a subsequent buffered read will return it.
    • See also IO#ungetc.
    • f = File.new("testfile")
      
    • c = f.getbyte    #=> 84
      
    • f.ungetbyte(c)   #=> nil
      
    • f.getbyte        #=> 84
      
  • */

+VALUE
+rb_io_ungetbyte(VALUE io, VALUE c)
+{

  • rb_io_t *fptr;
  • int cc;
  • GetOpenFile(io, fptr);
  • rb_io_check_readable(fptr);
  • if (NIL_P©) return Qnil;
  • cc = NUM2INT©;
  • *io_unget_space(1, fptr) = (char)cc;
    return Qnil;
    }
    @@ -6789,5 +6858,6 @@ Init_IO(void)
    rb_define_method(rb_cIO, “readchar”, rb_io_readchar, 0);
    rb_define_method(rb_cIO, “readbyte”, rb_io_readbyte, 0);
  • rb_define_method(rb_cIO, “ungetc”,rb_io_ungetc, 1);
  • rb_define_method(rb_cIO, “ungetc”, rb_io_unget, -1);
  • rb_define_method(rb_cIO, “ungetbyte”, rb_io_ungetbyte, 1);
    rb_define_method(rb_cIO, “<<”, rb_io_addstr, 1);
    rb_define_method(rb_cIO, “flush”, rb_io_flush, 0);

e$B$J$+$@$G$9!#e(B

At Mon, 3 Mar 2008 17:41:25 +0900,
Tanaka A. wrote in [ruby-dev:33976]:

e$B$=$l$O8=>u$G$b$G$-$J$$$G$9$M!#e(B?xe$B$be(BStringe$B$K$J$C$?$3$H$G$b$"$k$7!“e(B
0…255e$B$N@0?t$NJB$S$O%P%$%HNs$H$7$FLa$9!”$H$$$&$N$G$b$$$$$+$b!#e(B

e$B8=>u$G$O!"e(BString e$B%*%V%8%’%/%H$OL5JQ49$G%P%C%U%!$KF~$k$H;W$&e(B
e$B$s$G$9$,!"0c$$$^$9e(B?

e$B$=$&$G$9$,!"$=$l$,LdBj$G$O$J$$$+$H;W$$$^$9!#e(B
e$B30It$HFbIt$N%(%s%3!<%G%#%s%0$,0[$J$k>l9g$O!“e(Bgetce$B$5$l$?$b$N$O$9$Ge(B
e$B$KJQ49$5$l$F$$$k$o$1$G!”$=$l$r$=$N$^$^La$9$H<!2s$NFI$9~$$G:FEYe(B
e$BJQ49$9$k$3$H$K$J$j$^$9!#$3$l$O$"$^$j4|BT$5$l$kF0:n$H$O;W$($^$;$s!#e(B

In article [email protected],
Nobuyoshi N. [email protected] writes:

e$B$=$l$O8=>u$G$b$G$-$J$$$G$9$M!#e(B?xe$B$be(BStringe$B$K$J$C$?$3$H$G$b$"$k$7!“e(B
0…255e$B$N@0?t$NJB$S$O%P%$%HNs$H$7$FLa$9!”$H$$$&$N$G$b$$$$$+$b!#e(B

e$B8=>u$G$O!"e(BString e$B%*%V%8%'%/%H$OL5JQ49$G%P%C%U%!$KF~$k$H;W$&e(B
e$B$s$G$9$,!"0c$$$^$9e(B?

getce$B$He(Bgetbytee$B$bJ,$1$?$3$H$r9M$($l$P!"$3$C$A$G$9$+$M$'!#e(B
ungetbytee$B$H$$$&$N$O$$$^$$$A$J5$$,$7$^$9$,!#e(B

e$B$$$^$^$G$N$d$j$+$?$+$i1ieh$9$l$Pe(B ungetbyte e$B$G$9$h$M$'!#e(B