/#{}/e.encoding

e$B0J2<$N$h$&$Ke(B /#{}/e e$B$Ne(B encoding e$B$,e(B US-ASCII
e$B$K$J$j$^$9!#e(B

% ./ruby -ve ‘p(/#{}/e.encoding)’
ruby 1.9.0 (2008-01-26 revision 15237) [i686-linux]
#Encoding:US-ASCII

e$B@55,I=8=$N%=!<%9$r9=@.$9$kJ8;zNs$NO"7k;~$Ke(B EUC-JP e$B$Je(B
encoding e$B$,>C$($F$7$^$&$N$,M}M3$J$h$&$G$9!#e(B

e$B$b$H$b$H$3$NO"7k$KC1$J$kJ8;zNsO"7k$r;H$&$N$O!"%(%9%1!<%W$5$le(B
e$B$?CfESH>C<$JHse(B ASCII e$BJ8;z$,O"7k$5$l$FCfESH>C<$G$J$$$b$N$K$Je(B
e$B$k$N$r8!=P$G$-$J$$$H$$$&LdBj$,$"$j$^$9!#$J$N$G!"e(B
Regexp.preprocess e$B$H$$$&%a%=%C%I$r?7@$7$F!"$=$3$GO"7k$r9T$$!"e(B
e$B%(%s%3!<%G%#%s%0$NEAGE$b0JA0F1MM$K$K$5$l$k$h$&$K$7$F$
$^$7$?!#e(B

Index: re.c

— re.c (revision 15249)
+++ re.c (working copy)
@@ -1942,36 +1942,51 @@ rb_reg_check_preprocess(VALUE str)
return Qnil;
}

-#if 0
static VALUE
-rb_reg_preprocess_obj(VALUE str,

  •    rb_encoding **fixed_enc, onig_errmsg_buffer err)
    

+rb_reg_preprocess_m(int argc, VALUE *argv, VALUE klass)
{

  • VALUE buf;
  • char *p, *end;
  • rb_encoding *enc;
  • rb_encoding *fixed_enc = 0;
  • onig_errmsg_buffer err = “”;
  • int i;
  • VALUE result = 0;
  • StringValue(str);
  • p = RSTRING_PTR(str);
  • end = p + RSTRING_LEN(str);
  • enc = rb_enc_get(str);
  • if (argc == 0) {
  •    rb_raise(rb_eArgError, "no arguments given");
    
  • }
  • buf = rb_reg_preprocess(p, end, enc, fixed_enc, err);
  • RB_GC_GUARD(str);
  • return buf;
    -}
  • for (i = 0; i < argc; i++) {
  •    VALUE str = argv[i];
    
  •    VALUE buf;
    
  •    char *p, *end;
    
  •    rb_encoding *enc;
    
  •    StringValue(str);
    
  •    p = RSTRING_PTR(str);
    
  •    end = p + RSTRING_LEN(str);
    
  •    enc = rb_enc_get(str);
    
  •    buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
    
  •    RB_GC_GUARD(str);
    
  •    if (buf == Qnil)
    
  •        rb_raise(rb_eArgError, "%s", err);
    
  •    if (i == 0) {
    
  •        /* The encoding of the first fragment is the encoding
    
  •         * given by the regexp option or script encoding. */
    
  •        if (fixed_enc == 0) {
    
  •            rb_enc_copy(buf, str);
    
  •        }
    
  •    }
    

-static VALUE
-rb_reg_preprocess_m(VALUE klass, VALUE obj)
-{

  • rb_encoding *fixed_enc = 0;
  • onig_errmsg_buffer err = “”;
  • VALUE str = rb_reg_preprocess_obj(obj, &fixed_enc, err);
  • if (str == Qnil)
  •    rb_raise(rb_eArgError, "%s", err);
    
  • return rb_assoc_new(str, fixed_enc ? Qtrue : Qfalse);
  •    if (!result)
    
  •        result = buf;
    
  •    else
    
  •        rb_str_buf_append(result, buf);
    
  • }
  • return result;
    }
    -#endif

static int
rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
@@ -3035,9 +3050,7 @@ Init_Regexp(void)
rb_define_singleton_method(rb_cRegexp, “last_match”,
rb_reg_s_last_match, -1);
rb_define_singleton_method(rb_cRegexp, “try_convert”,
rb_reg_s_try_convert, 1);

-#if 0

  • rb_define_singleton_method(rb_cRegexp, “preprocess”,
    rb_reg_preprocess_m, 1);
    -#endif
  • rb_define_singleton_method(rb_cRegexp, “preprocess”,
    rb_reg_preprocess_m, -1);

    rb_define_method(rb_cRegexp, “initialize”, rb_reg_initialize_m,
    -1);
    rb_define_method(rb_cRegexp, “initialize_copy”, rb_reg_init_copy,
    1);
    Index: compile.c
    ===================================================================
    — compile.c (revision 15249)
    +++ compile.c (working copy)
    @@ -1848,7 +1848,7 @@ iseq_set_sequence_stackcaching(rb_iseq_t

static int
-compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node)
+compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node,
int *cntp)
{
NODE *list = node->nd_next;
VALUE lit = node->nd_lit;
@@ -1862,12 +1862,31 @@ compile_dstr(rb_iseq_t *iseq, LINK_ANCHO
cnt++;
list = list->nd_next;
}

  • *cntp = cnt;
  • return COMPILE_OK;
    +}

+static int
+compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node)
+{

  • int cnt;
  • compile_dstr_fragments(iseq, ret, node, &cnt);
    ADD_INSN1(ret, nd_line(node), concatstrings, INT2FIX(cnt));
    return COMPILE_OK;
    }

static int
+compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node)
+{

  • int cnt;
  • ADD_INSN1(ret, nd_line(node), putobject, rb_cRegexp);
  • compile_dstr_fragments(iseq, ret, node, &cnt);
  • ADD_SEND(ret, nd_line(node), ID2SYM(rb_intern(“preprocess”)),
    INT2FIX(cnt));
  • return COMPILE_OK;
    +}

+static int
compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *
cond,
LABEL *then_label, LABEL *else_label)
{
@@ -4077,7 +4096,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_
break;
}
case NODE_DREGX:{

  • compile_dstr(iseq, ret, node);
  • compile_dregx(iseq, ret, node);
    ADD_INSN1(ret, nd_line(node), toregexp, INT2FIX(node->nd_cflag));

    if (poped) {
    @@ -4094,7 +4113,7 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_
    ADD_INSN2(ret, nd_line(node), onceinlinecache, 0, lend);
    ADD_INSN(ret, nd_line(node), pop);

  • compile_dstr(iseq, ret, node);
  • compile_dregx(iseq, ret, node);
    ADD_INSN1(ret, nd_line(node), toregexp, INT2FIX(node->nd_cflag));

    ADD_INSN1(ret, nd_line(node), setinlinecache, lstart);
    Index: test/ruby/test_m17n.rb
    ===================================================================
    — test/ruby/test_m17n.rb (revision 15249)
    +++ test/ruby/test_m17n.rb (working copy)
    @@ -428,14 +428,14 @@ class TestM17N < Test::Unit::TestCase
    assert_raise(ArgumentError) { eval(s("/#{r}\xc2\xa1/s")) }

    r = /\xc2\xa1/e

  • #assert_raise(ArgumentError) { eval(s("/\xc2\xa1#{r}/s")) }
  • #assert_raise(ArgumentError) { eval(s("/#{r}\xc2\xa1/s")) }
  • assert_raise(ArgumentError) { eval(s("/\xc2\xa1#{r}/s")) }

  • assert_raise(ArgumentError) { eval(s("/#{r}\xc2\xa1/s")) }

    r = eval(e("/\xc2\xa1/"))

  • #assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
  • assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }

    r = /\xc2\xa1/e

  • #assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
  • assert_raise(ArgumentError) { /\xc2\xa1#{r}/s }
    end

def test_begin_end_offset
@@ -560,7 +560,7 @@ class TestM17N < Test::Unit::TestCase
}
assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)

  • #assert_raise(SyntaxError) { s1, s2 = s(’\xc2’), s(’\xa1’);
    /#{s1}#{s2}/ }
  • assert_nothing_raised { s1, s2 = a(’\xc2’), a(’\xa1’); /#{s1}#{s2}/
    }
    end

def test_dynamic_eucjp_regexp
@@ -570,7 +570,7 @@ class TestM17N < Test::Unit::TestCase
assert_raise(SyntaxError) { eval(’/\xc2#{}/e’) }
assert_raise(SyntaxError) { eval(’/#{}\xc2/e’) }
assert_raise(SyntaxError) { eval(’/\xc2#{}\xa1/e’) }

  • #assert_raise(SyntaxError) { s1, s2 = e(’\xc2’), e(’\xa1’);
    /#{s1}#{s2}/ }
  • assert_raise(ArgumentError) { s1, s2 = e(’\xc2’), e(’\xa1’);
    /#{s1}#{s2}/ }
    end

def test_dynamic_sjis_regexp
@@ -580,7 +580,7 @@ class TestM17N < Test::Unit::TestCase
assert_raise(SyntaxError) { eval(’/\x81#{}/s’) }
assert_raise(SyntaxError) { eval(’/#{}\x81/s’) }
assert_raise(SyntaxError) { eval(’/\x81#{}\xa1/s’) }

  • #assert_raise(SyntaxError) { s1, s2 = s(’\x81’), s(’\xa1’);
    /#{s1}#{s2}/ }
  • assert_raise(ArgumentError) { s1, s2 = s(’\x81’), s(’\xa1’);
    /#{s1}#{s2}/ }
    end

def test_dynamic_utf8_regexp
@@ -590,7 +590,7 @@ class TestM17N < Test::Unit::TestCase
assert_raise(SyntaxError) { eval(’/\xc2#{}/u’) }
assert_raise(SyntaxError) { eval(’/#{}\xc2/u’) }
assert_raise(SyntaxError) { eval(’/\xc2#{}\xa1/u’) }

  • #assert_raise(SyntaxError) { s1, s2 = u(’\xc2’), u(’\xa1’);
    /#{s1}#{s2}/ }
  • assert_raise(ArgumentError) { s1, s2 = u(’\xc2’), u(’\xa1’);
    /#{s1}#{s2}/ }
    end

def test_regexp_unicode

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

[ruby-dev:33484]e$B$G:EB%$5$l$?$N$G!#e(B

In message “Re: [ruby-dev:33400] /#{}/e.encoding”
on Sat, 26 Jan 2008 12:14:00 +0900, Tanaka A. [email protected]
writes:
|
|e$B0J2<$N$h$&$Ke(B /#{}/e e$B$Ne(B encoding e$B$,e(B US-ASCII e$B$K$J$j$^$9!#e(B
|
| % ./ruby -ve ‘p(/#{}/e.encoding)’
| ruby 1.9.0 (2008-01-26 revision 15237) [i686-linux]
| #Encoding:US-ASCII
|
|e$B@55,I=8=$N%=!<%9$r9=@.$9$kJ8;zNs$NO"7k;~$Ke(B EUC-JP e$B$Je(B
|encoding e$B$,>C$($F$7$^$&$N$,M}M3$J$h$&$G$9!#e(B

e$B$J$k$[$I!#e(B

|e$B$b$H$b$H$3$NO"7k$KC1$J$kJ8;zNsO"7k$r;H$&$N$O!“%(%9%1!<%W$5$le(B
|e$B$?CfESH>C<$JHse(B ASCII e$BJ8;z$,O"7k$5$l$FCfESH>C<$G$J$$$b$N$K$Je(B
|e$B$k$N$r8!=P$G$-$J$$$H$$$&LdBj$,$”$j$^$9!#$J$N$G!"e(B
|Regexp.preprocess e$B$H$$$&%a%=%C%I$r?7@$7$F!"$=$3$GO"7k$r9T$$!"e(B
|e$B%(%s%3!<%G%#%s%0$NEAGE$b0JA0F1MM$K$K$5$l$k$h$&$K$7$F$
$^$7$?!#e(B

e$B$d$O$j%3%s%Q%$%kFbIt$N;v>p$r%a%=%C%I$H$7$F8+$;$A$c$&$N$K$Oe(B
e$B$A$g$C$HDq93$,$"$j$^$9!#$?$H$($Pe(Bto_stre$B$N$h$&$J%i%s%?%$%`$J$ie(B
e$B5$$K$J$i$J$$$N$K$J!#e(B

e$B$G!“L?Na$rA}$d$9$N$OK>$^$7$/$J$$$H$$$&$3$H$J$s$G$9$,!”$J$i$Pe(B
toregexpe$BL?Na$,e(Bcnte$B%*%Z%i%s%I$r<u$1IU$1$k$h$&$K$7$F!"$=$NCf$Ge(B
preprocesse$B4X?t$r8F$V$h$&$K$7$F$O$I$&$G$7$g$&!)e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

In article [email protected],
Yukihiro M. [email protected] writes:

e$B$d$O$j%3%s%Q%$%kFbIt$N;v>p$r%a%=%C%I$H$7$F8+$;$A$c$&$N$K$Oe(B
e$B$A$g$C$HDq93$,$"$j$^$9!#$?$H$($Pe(Bto_stre$B$N$h$&$J%i%s%?%$%`$J$ie(B
e$B5$$K$J$i$J$$$N$K$J!#e(B

e$B$G!“L?Na$rA}$d$9$N$OK>$^$7$/$J$$$H$$$&$3$H$J$s$G$9$,!”$J$i$Pe(B
toregexpe$BL?Na$,e(Bcnte$B%*%Z%i%s%I$r<u$1IU$1$k$h$&$K$7$F!"$=$NCf$Ge(B
preprocesse$B4X?t$r8F$V$h$&$K$7$F$O$I$&$G$7$g$&!)e(B

e$B$“$!!”$J$k$[$I!#$=$&$9$l$PM>7W$J$b$N$rI=$K=P$5$:$K$G$-$^$9$M!#e(B

e$B$=$&$$$&$U$&$K$7$F%3%_%C%H$7$^$7$?!#e(B

e$B!!$5$5$@$G$9!#e(B

Yukihiro M. wrote:

e$B$G!“L?Na$rA}$d$9$N$OK>$^$7$/$J$$$H$$$&$3$H$J$s$G$9$,!”$J$i$Pe(B
toregexpe$BL?Na$,e(Bcnte$B%*%Z%i%s%I$r<u$1IU$1$k$h$&$K$7$F!"$=$NCf$Ge(B
preprocesse$B4X?t$r8F$V$h$&$K$7$F$O$I$&$G$7$g$&!)e(B

e$B!!8D?ME*$K$O!“L?Na$O2DG=$J$i%a%=%C%I$KDI$$=P$7$?$$$J$!!”$H;W$C$F$$$^e(B
e$B$9!#$=$&$$$&FCJL$J5!G=$O!“e(Bfreeze
e$B$7$?%%V%8%’%/%H$NFC0[%a%=%C%I$K$7e(B
e$B$F$
$/$H$+!#Nc$($P!”%/%i%9Dj5A!"%a%=%C%IDj5A!"e(Balias, undef
e$B$J$s$+$Oe(B
e$BA4It30$KDI$$=P$7$?$$$J$!$H!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:33496] Re: /#{}/e.encoding”
on Tue, 29 Jan 2008 17:31:32 +0900, SASADA Koichi [email protected]
writes:

|> e$B$G!“L?Na$rA}$d$9$N$OK>$^$7$/$J$$$H$$$&$3$H$J$s$G$9$,!”$J$i$Pe(B
|> toregexpe$BL?Na$,e(Bcnte$B%%Z%i%s%I$r<u$1IU$1$k$h$&$K$7$F!"$=$NCf$Ge(B
|> preprocesse$B4X?t$r8F$V$h$&$K$7$F$O$I$&$G$7$g$&!)e(B
|
|e$B!!8D?ME
$K$O!“L?Na$O2DG=$J$i%a%=%C%I$KDI$$=P$7$?$$$J$!!”$H;W$C$F$$$^e(B
|e$B$9!#e(B

e$B$U!<$`!#e(BVMe$B<BAu<T$H$7$F$=$NMWK>$OM}2r$G$-$^$9!#e(B

|e$B$=$&$$$&FCJL$J5!G=$O!“e(Bfreeze e$B$7$?%%V%8%'%/%H$NFC0[%a%=%C%I$K$7e(B
|e$B$F$
$/$H$+!#Nc$($P!”%/%i%9Dj5A!"%a%=%C%IDj5A!"e(Balias, undef e$B$J$s$+$Oe(B
|e$BA4It30$KDI$$=P$7$?$$$J$!$H!#e(B

e$B%/%i%9Dj5A!“%a%=%C%IDj5A!“e(Baliase$B!“e(Bundefe$B$O$=$l$>$lAjEv$9$k%a%=%Ce(B
e$B%I$b$”$j$^$9$h$M!#$@$+$i!”$=$l$[$ILdBj$b$J$$$N$GFC$K<h$j>e$2e(B
e$B$F$3$J$+$C$?$N$G$9$,!”:#2s$N$b$N$_$?$$$K8=>u$G$OBP1~$9$k%a%=%Ce(B
e$B%I$b$J$$$N$K%a%=%C%I$KDI$$=P$7$?$$$H$J$k$H!"$^$?$A$g$C$HJL$Ne(B
e$BOC$G$9$M!#e(B

e$BMW$9$k$K!"$=$N%a%=%C%I$r:FDj5A$7$?$j!"JQ$J%3%s%F%-%9%H$G8F$Se(B
e$B=P$7$?$j$9$k$3$H$,7y$J$o$1$G$9!#$J$s$+%^%/%m$K;w$?!V7y$5!W!#e(B
e$B:FDj5A$O$$$A$*$&e(Bfreezee$B$G6X;_$G$-$^$9$1$I!"8e<T$O$M$(!#e(B

e$B$^$“!”;EAH$$H$7$F$O%a%=%C%I8F$S=P$7$H$7$F$$$$F!"$=$N%%V%8%'e(B
e$B%/%H!W$re(BRubye$B%l%Y%k$+$i4JC1$K$
$($J$/$9$k$H$+$GBP1~$G$-$J$$$3e(B
e$B$H$O$J$$$H;W$$$^$9$1$I!#$^$?!"AjCL$7$^$7$g$&!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)