Encoding_fixed $b$h(b encoding_none $b$ngq;_(b

e$B@.@%$G$9!#e(B

e$B35MW$O@hF|$^$D$b$H$5$s$KOC$7$^$7$?$,!"e(BRegexp e$B$K$*$1$kFC<l$Je(B
ENCODING_FIXED e$B$He(B ENCODING_NONE e$B$N07$$$O0lItITMW$K46$8$^$9!#e(B

Ruby 1.9.1 e$B$K$Oe(B 1.8 e$BF1MM!"e(B
e$B@55,I=8=%j%F%i%k$Ke(B //s e$B$de(B //n
e$B$H$$$C$?%*%W%7%g%s$r;XDj$G$-$^$9!#e(B

e$B$H$3$m$,!"$3$N8z2L$O!"e(B//s e$B$de(B //ee$B!“e(B//u e$B$N>l9g$O!“e(B
e$B@55,I=8=$N%(%s%3!<%G%#%s%0$r;XDj$9$k$@$1$G$J$/!“e(B
e$B%^%C%ABP>]$NJ8;zNs$r;XDj$7$?%(%s%3!<%G%#%s%0$K!V8GDj!W$7$^$9!#e(B
e$B6qBNE*$K$Oe(B /aa/u =~ “aa”.force_encoding(“euc-jp”)
e$B$,%^%C%A$7$J$/$J$j$^$9!#e(B
e$B$7$+$7!”$3$N;EMM$O:.Mp$r>7$/$@$1$GITMW$G$O$J$$$G$7$g$&$+!#e(B
e$BHse(B ASCII e$BJ8;z$N%A%’%C%/MQ$H$7$F!”@55,I=8=FbIt$Ne(B KCODE_FIXED
e$B$Oe(B
e$B;D$9I,MW$,$”$k$H;W$$$^$9$,!"e(B//s
e$BEy$+$i;XDj$G$-$k;EMM$N:o=|$rDs0F$7$^$9!#e(B

e$B$^$?!"e(B//n e$B$O!"e(B

  1. e$B%(%s%3!<%G%#%s%0$Ke(B ASCII-8BIT e$B$r;XDje(B
  2. e$B@55,I=8=%j%F%i%k$KHse(B ASCII e$BJ8;z$r4^$`$H%(%i!<e(B
  3. /./n =~ "e$B$“e(B” e$BEy$G7Y9p$rI=<(e(B
    e$B$r9T$$$^$9$,!"e(B3. e$B$OITMW$G$O$J$$$G$7$g$&$+!#e(B

Index: re.c

— re.c (revision 22332)
+++ re.c (working copy)
@@ -238,7 +238,6 @@ rb_memsearch(const void *x0, long m, const void *y
}

#define REG_LITERAL FL_USER5
-#define REG_ENCODING_NONE FL_USER6

#define KCODE_FIXED FL_USER4

@@ -302,7 +301,6 @@ rb_char_to_option_kcode(int c, int *option, int *k
*kcode = -1;
return (*option = char_to_option©);
}

  • *option = ARG_ENCODING_FIXED;
    return 1;
    }

@@ -1201,12 +1199,6 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
}
enc = RREGEXP(re)->ptr->enc;
}

  • if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
  • enc != rb_ascii8bit_encoding() &&
  • rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
  • rb_warn(“regexp match /…/n against to %s string”,
  • rb_enc_name(enc));
  • }
    return enc;
    }

@@ -2329,8 +2321,7 @@ rb_reg_initialize(VALUE obj, const char *s, int le
return -1;

 if (fixed_enc) {
  • if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
  •        (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
    
  • if (fixed_enc != enc && (options & ARG_ENCODING_FIXED)) {
    errcpy(err, “incompatible character encoding”);
    return -1;
    }
    @@ -2347,9 +2338,6 @@ rb_reg_initialize(VALUE obj, const char *s, int le
    if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
    re->basic.flags |= KCODE_FIXED;
    }
  • if (options & ARG_ENCODING_NONE) {

  •    re->basic.flags |= REG_ENCODING_NONE;
    
  • }

    re->ptr = make_regexp(RSTRING_PTR(unescaped),
    RSTRING_LEN(unescaped), enc,
    options & ARG_REG_OPTION_MASK, err);
    @@ -2946,7 +2934,6 @@ rb_reg_options(VALUE re)
    rb_reg_check(re);
    options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
    if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;

  • if (RBASIC(re)->flags & REG_ENCODING_NONE) options |=
    ARG_ENCODING_NONE;
    return options;
    }

Index: test/ruby/test_m17n.rb

— test/ruby/test_m17n.rb (revision 22332)
+++ test/ruby/test_m17n.rb (working copy)
@@ -349,24 +349,14 @@ class TestM17N < Test::Unit::TestCase
def test_regexp_ascii_none
r = /a/n

  • assert_warning(%r{regexp match /…/n against to}) {
  •  assert_regexp_generic_ascii(r)
    
  • }
  • assert_equal(0, r =~ a(“a”))
    assert_equal(0, r =~ e(“a”))
    assert_equal(0, r =~ s(“a”))
    assert_equal(0, r =~ u(“a”))
    assert_equal(nil, r =~ a("\xc2\xa1"))
  • assert_warning(%r{regexp match /…/n against to EUC-JP string}) {
  •  assert_equal(nil, r =~ e("\xc2\xa1"))
    
  • }
  • assert_warning(%r{regexp match /…/n against to Windows-31J
    string}) {
  •  assert_equal(nil, r =~ s("\xc2\xa1"))
    
  • }
  • assert_warning(%r{regexp match /…/n against to UTF-8 string}) {
  •  assert_equal(nil, r =~ u("\xc2\xa1"))
    
  • }
  • assert_equal(nil, r =~ e("\xc2\xa1"))

  • assert_equal(nil, r =~ s("\xc2\xa1"))

  • assert_equal(nil, r =~ u("\xc2\xa1"))

    assert_nothing_raised { eval(e("/\x80/n")) }
    end
    @@ -392,7 +382,7 @@ class TestM17N < Test::Unit::TestCase
    end

def test_regexp_euc

  • assert_regexp_fixed_eucjp(/a/e)
  • assert_regexp_generic_ascii(/a/e)
    assert_regexp_fixed_eucjp(/\xc2\xa1/e)
    assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
    assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
    @@ -402,10 +392,10 @@ class TestM17N < Test::Unit::TestCase
    assert_equal(0, r =~ e(“a”))
    assert_equal(0, r =~ s(“a”))
    assert_equal(0, r =~ u(“a”))
  •  assert_raise(Encoding::CompatibilityError) { r =~ a("\xc2\xa1") }
    
  •  assert_nothing_raised{ r =~ a("\xc2\xa1") }
     assert_equal(nil, r =~ e("\xc2\xa1"))
    
  •  assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
    
  •  assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
    
  •  assert_nothing_raised{ r =~ s("\xc2\xa1") }
    
  •  assert_nothing_raised{ r =~ u("\xc2\xa1") }
    

    }

    [/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each
    {|r|
    @@ -421,7 +411,7 @@ class TestM17N < Test::Unit::TestCase
    end

    def test_regexp_sjis

  • assert_regexp_fixed_sjis(/a/s)
  • assert_regexp_generic_ascii(/a/s)
    assert_regexp_fixed_sjis(/\xc2\xa1/s)
    assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
    assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
    @@ -598,12 +588,10 @@ class TestM17N < Test::Unit::TestCase

def test_union_1_regexp
assert_regexp_generic_ascii(Regexp.union(//))

  • assert_warning(%r{regexp match /…/n against to}) {
  •  assert_regexp_generic_ascii(Regexp.union(//n))
    
  • }
  • assert_regexp_fixed_eucjp(Regexp.union(//e))
  • assert_regexp_fixed_sjis(Regexp.union(//s))
  • assert_regexp_fixed_utf8(Regexp.union(//u))
  • assert_regexp_generic_ascii(Regexp.union(//n))
  • assert_regexp_generic_ascii(Regexp.union(//e))
  • assert_regexp_generic_ascii(Regexp.union(//s))
  • assert_regexp_generic_ascii(Regexp.union(//u))
    end

def test_union_2
@@ -641,16 +629,14 @@ class TestM17N < Test::Unit::TestCase
end

def test_dynamic_ascii_regexp

  • assert_warning(%r{regexp match /…/n against to}) {
  •  assert_regexp_generic_ascii(/#{}/n)
    
  • }
  • assert_regexp_generic_ascii(/#{}/n)
    assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
    assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)
    assert_nothing_raised { s1, s2 = a(’\xc2’), a(’\xa1’); /#{s1}#{s2}/
    }
    end

def test_dynamic_eucjp_regexp

  • assert_regexp_fixed_eucjp(/#{}/e)
  • assert_regexp_generic_ascii(/#{}/e)
    assert_regexp_fixed_eucjp(/#{}\xc2\xa1/e)
    assert_regexp_fixed_eucjp(/\xc2\xa1#{}/e)
    assert_raise(SyntaxError) { eval(’/\xc2#{}/e’) }
    @@ -660,7 +646,7 @@ class TestM17N < Test::Unit::TestCase
    end

def test_dynamic_sjis_regexp

  • assert_regexp_fixed_sjis(/#{}/s)
  • assert_regexp_generic_ascii(/#{}/s)
    assert_regexp_fixed_sjis(/#{}\xc2\xa1/s)
    assert_regexp_fixed_sjis(/\xc2\xa1#{}/s)
    assert_raise(SyntaxError) { eval(’/\x81#{}/s’) }
    @@ -670,7 +656,7 @@ class TestM17N < Test::Unit::TestCase
    end

def test_dynamic_utf8_regexp

  • assert_regexp_fixed_utf8(/#{}/u)
  • assert_regexp_generic_ascii(/#{}/u)
    assert_regexp_fixed_utf8(/#{}\xc2\xa1/u)
    assert_regexp_fixed_utf8(/\xc2\xa1#{}/u)
    assert_raise(SyntaxError) { eval(’/\xc2#{}/u’) }

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$B$H$3$m$,!"$3$N8z2L$O!"e(B//s e$B$de(B //ee$B!"e(B//u e$B$N>l9g$O!"e(B
e$B@55,I=8=$N%(%s%3!<%G%#%s%0$r;XDj$9$k$@$1$G$J$/!“e(B
e$B%^%C%ABP>]$NJ8;zNs$r;XDj$7$?%(%s%3!<%G%#%s%0$K!V8GDj!W$7$^$9!#e(B
e$B6qBNE*$K$Oe(B /aa/u =~ “aa”.force_encoding(“euc-jp”) e$B$,%^%C%A$7$J$/$J$j$^$9!#e(B
e$B$7$+$7!”$3$N;EMM$O:.Mp$r>7$/$@$1$GITMW$G$O$J$$$G$7$g$&$+!#e(B

e$B<j85$G$O%^%C%A$7$^$9!#e(B

% ruby-1.9.1p0 -ve ‘p(/aa/u =~ “aa”.force_encoding(“euc-jp”))’
ruby 1.9.1p0 (2009-01-30 revision 21907) [i686-linux]
0

e$B$3$3$G$$$&:.Mp$H$O<B:]$K$O$J$s$G$7$g$&$+!#e(B

e$B@.@%$G$9!#e(B

Tanaka A. wrote:

% ruby-1.9.1p0 -ve ‘p(/aa/u =~ “aa”.force_encoding(“euc-jp”))’
ruby 1.9.1p0 (2009-01-30 revision 21907) [i686-linux]
0

e$B$*$C$H!"<:Ni!#e(B
% ruby_1_9_1 -ve ‘p(/aa/u =~ "aae$B$“e(B”.encode(“euc-jp”))’
ruby 1.9.1p0 (2009-02-13 revision 22266) [x86_64-freebsd7.1]
-e:1:in `’: incompatible encoding regexp match (UTF-8 regexp with
EUC-JP string) (Encoding::CompatibilityError)
e$B$3$C$A$G$9!#e(B

e$B$3$3$G$$$&:.Mp$H$O<B:]$K$O$J$s$G$7$g$&$+!#e(B

e$B0J>e$@$1$J$i0l8+!V;EMM!W$K$b8+$($k$N$G$9$,!"e(B
e$B$3$N$h$&$J!"e(BRegexp#source.ascii_only? e$B$,@.N)$9$k$N$K!"e(B
ASCII
e$B8_49$J%(%s%3!<%G%#%s%0$r;}$DJ8;zNs$K%^%C%A$5$;$k$3$H$,$G$-$J$$e(B
(Regexp#fixed_encoding? e$B$,e(B true e$B$Je(B) e$B@55,I=8=$Oe(B //u,
//s, //e e$B$rMQ$$$F$7$+e(B
e$B:n$k$3$H$,$G$-$J$$$H$$$&E@$G$9!#e(B

e$B$=$N$h$&$J;EMM$K$9$k$KB-$kM}M3$,$"$k$J$i$P!"$=$N$h$&$K$9$k$N$b$$$$$N$G$7$g$&$,!"e(B
e$B$o$?$7$K$O$J$$$h$&$K;W$($^$9!#e(B

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$B0J>e$@$1$J$i0l8+!V;EMM!W$K$b8+$($k$N$G$9$,!"e(B
e$B$3$N$h$&$J!"e(BRegexp#source.ascii_only? e$B$,@.N)$9$k$N$K!"e(B
ASCII e$B8_49$J%(%s%3!<%G%#%s%0$r;}$DJ8;zNs$K%^%C%A$5$;$k$3$H$,$G$-$J$$e(B
(Regexp#fixed_encoding? e$B$,e(B true e$B$Je(B) e$B@55,I=8=$Oe(B //u, //s, //e e$B$rMQ$$$F$7$+e(B
e$B:n$k$3$H$,$G$-$J$$$H$$$&E@$G$9!#e(B

Regexp#source.ascii_only? e$B$,@.N)$7!"e(B
Regexp#fixed_encoding? e$B$,e(B true e$B@55,I=8=$re(B
//u, //s, //e e$B$rMQ$$$:$K:n$kNc$O$?$H$($P0J2<$,B8:_$7$^$9!#e(B

% ruby -ve ’
r = /\u3042/
p r.source.ascii_only?
p r.fixed_encoding?

ruby 1.9.2dev (2009-02-15 trunk 22328) [i686-linux]
true
true

e$B$3$3$OHyL/$J$H$3$m$G!“@55,I=8=$,5-=R$KMQ$$$?%(%s%3!<%G%#%s%0e(B
e$B$H!”$=$l$,%^%C%A$9$kBP>]$N%(%s%3!<%G%#%s%0$K$O!“$A$g$C$H%.%c%Ce(B
e$B%W$,$”$j$^$9!#$3$3$r$$$8$k$K$O!“$=$N%.%c%C%W$K$D$$$F=<J,$K9Me(B
e$B$($kI,MW$,$”$j$^$9!#e(B

e$B$H$O$$$(!“@.@%$5$s$,46$8$?:.Mp<+BN$O!”$J$s$i$+$NLdBj$r<($7$Fe(B
e$B$$$k2DG=@-$O=<J,$K$"$j$^$9!#e(B

e$B$b$&0l2s!“2?$,LdBj$J$N$+$r@53N$KI=8=$7$F$$$?$@$1$k$H$”$j$,$?e(B
e$B$$$G$9!#e(B

e$B@.@%$G$9!#e(B

Tanaka A. wrote:

Regexp#fixed_encoding? e$B$,e(B true e$B@55,I=8=$re(B
//u, //s, //e e$B$rMQ$$$:$K:n$kNc$O$?$H$($P0J2<$,B8:_$7$^$9!#e(B

% ruby -ve ’
r = /\u3042/
p r.source.ascii_only?
p r.fixed_encoding?

ruby 1.9.2dev (2009-02-15 trunk 22328) [i686-linux]
true
true

e$B$`!"3N$+$K%(%9%1!<%W$7$FKd$a9~$s$@>l9g$b$=$&$G$9$M!#e(B
e$B$9$k$H!"e(BRegexp#source.ascii_only? e$B$r=P$7$?$N$OITE,@Z$G$7$?!#e(B

e$B$3$3$OHyL/$J$H$3$m$G!"@55,I=8=$,5-=R$KMQ$$$?%(%s%3!<%G%#%s%0e(B
e$B$H!"$=$l$,%^%C%A$9$kBP>]$N%(%s%3!<%G%#%s%0$K$O!"$A$g$C$H%.%c%Ce(B
e$B%W$,$"$j$^$9!#$3$3$r$$$8$k$K$O!"$=$N%.%c%C%W$K$D$$$F=<J,$K9Me(B
e$B$($kI,MW$,$"$j$^$9!#e(B

e$B$U$`!“0lHL$K8@$($P3N$+$K6D$k$H$*$j$G$9!#e(B
\p e$BEy$N$3$H$b;kLn$KF~$l$l$P!”==J,$J8!F$$,I,MW$G$7$g$&!#e(B

e$BC<E*$K8@$($P!"$=$N@55,I=8=$N5-=R$KMQ$$$?%(%s%3!<%G%#%s%0$HF1$8e(B
e$B%(%s%3!<%G%#%s%0$NJ8;zNs$N;~$K%^%C%A$G$-$kHO0O$H!“e(B
e$B0UL#E*$K35$MF1$8$K$J$k$h$&$K$7$?$$$G$9$+$M!#e(B
e$B!V35$M!W$HF~$l$?$N$Oe(B \s e$B$de(B \we$B!”@h$K$Oe(B \p{Hiragana}
e$BEy$r9M$($F$$$k$o$1$G$9$,!#e(B

e$B$H$O$$$(!"@.@%$5$s$,46$8$?:.Mp<+BN$O!"$J$s$i$+$NLdBj$r<($7$Fe(B
e$B$$$k2DG=@-$O=<J,$K$"$j$^$9!#e(B

e$B$b$&0l2s!“2?$,LdBj$J$N$+$r@53N$KI=8=$7$F$$$?$@$1$k$H$”$j$,$?e(B
e$B$$$G$9!#e(B

e$B$7$+$7!"e(B//u e$BEy$K8B$C$F8@$($P$=$N$h$&$JBg$-$JLdBj$G$O$J$/!"e(B
/a/u e$B$HF1$8$3$H$r$G$-$kJ}K!$,B8:_$;$:!“e(BUTF-8, EUC-JP, Windows-31J
e$B$Ne(B
3 e$B$D$N%(%s%3!<%G%#%s%0$G$7$+;H$($J$$FC<l$J5!G=$G$”$kE@$+$i!"e(B
e$B6ICOE*$JLdBj$@$H;W$C$F$$$^$9!#e(B

e$B$=$7$F$^$:!"D>@\E*$JLdBj$H$7$F$O!"Nc$($P!"e(B

Regexp.new(/a/u.source) == /a/u
=> false
e$B$H!“e(BRegexp#source e$B$He(B Regexp#new
e$B$GLa$i$J$$E@$,$”$j$^$9!#e(B

e$B$^$?!"0lHL$K@55,I=8=$O0lEY:n$C$F$7$^$&$H!"e(B
e$B$=$l$,$J$<e(B fixed_encoding e$B$J$N$+8e$+$iCN$k$3$H$O:$Fq$G$9!#e(B
e$B$?$$$F$$$N>l9g$Oe(B

  • ASCII e$B8_49%(%s%3!<%G%#%s%0$G$J$$$+$ie(B
  • e$B@55,I=8=$KHse(B ASCII
    e$B$r<($9%j%F%i%k$^$?$O%(%9%1!<%W$r4^$`$+$ie(B
  • \p e$BEy$r4^$`$+$ie(B
    e$B$G!"$3$l$i$O$=$l$J$j$KM}M3$,$"$j$^$9!#e(B(e$B%W%m%Q%F%#Ey$O>-MhE*$K8!F$$,I,MW$G$7$g$&e(B)
    e$B$7$+$7e(B //u e$B$O$=$&$7$?@55,I=8=$=$N$b$N$rL5;k$7$Fe(B KCODE_FIXED
    e$B$rIUM?$7$^$9!#e(B

e$B$o$?$7$Oe(B //u
e$B5-K!<+BN$NGQ;$O<gD%$7$F$$$J$$$N$G!"$3$l$,1F6A$9$k$N$O!"e(B
/a/u e$B$de(B /\w/u e$B$rHse(B ASCII
e$B$JJ8;z$r4^$`>l9g$J$N$G$3$l$i$K$D$$$F9M$($k$H!"e(B
/a/u e$B$Oe(B UTF-8
e$B$K$*$1$k!Ve(Bae$B!W$rB>$N%(%s%3!<%G%#%s%0$N!Ve(Bae$B!W$H6hJL$7!"e(B
UTF-8 e$B$N$b$N$N$
$K%^%C%A$5$;$k@55,I=8=$H9M$($i$l$^$9!#e(B
e$B$7$+$7!"$=$N$h$&$J5!G=$OI,MW$G$7$g$&$+!#e(B
e$B$o$?$7$OI,MW$J$$$H;W$$$^$9$7!"$b$7I,MW$J$i$PB>$N%(%s%3!<%G%#%s%0$K$bDs6!$9$k$Y$-$G$9!#e(B
/\w/u e$B$K$D$$$F$bF1MM$K46$8$^$9!#e(B

e$B$=$7$F!"$9$G$Ke(B 1.8 e$BMQ$K=q$+$l$?e(B /a/u e$BEy$b$o$6$o$6e(B
UTF-8 e$BEy$K8BDj$9$k0U?^$Oe(B
e$B$J$+$C$?$N$G$O$J$$$+$H;W$$$^$9!#e(B
e$B8=>u!"$G$-$4$3$m$Ge(B /u e$B$rIU$1$F$7$^$C$?@55,I=8=%j%F%i%k$+$i!"e(B
/u
e$B$r:o$k$H$$$&ITLS$J:n6H$,9T$o$l$F$$$^$9$,!"K\Ev$K$=$l$OI,MW$J$s$G$7$g$&$+!#e(B
e$B0UL#O@$+$iI,MW$J:n6H$J$i$P9T$C$FLc$&$Y$-$@$H;W$$$^$9$,!"e(B
e$B$o$?$7$K$O$=$&$O;W$($^$;$s!#e(B

e$B7k6I$N$H$3$m!"$3$l$OITI,MW$JHsBP>N$J5!G=$K8+$($^$9!#e(B

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$B$7$+$7!"e(B//u e$BEy$K8B$C$F8@$($P$=$N$h$&$JBg$-$JLdBj$G$O$J$/!"e(B
/a/u e$B$HF1$8$3$H$r$G$-$kJ}K!$,B8:_$;$:!“e(BUTF-8, EUC-JP, Windows-31J e$B$Ne(B
3 e$B$D$N%(%s%3!<%G%#%s%0$G$7$+;H$($J$$FC<l$J5!G=$G$”$kE@$+$i!"e(B
e$B6ICOE*$JLdBj$@$H;W$C$F$$$^$9!#e(B

e$B$=$7$F$^$:!"D>@\E*$JLdBj$H$7$F$O!"Nc$($P!"e(B

Regexp.new(/a/u.source) == /a/u
=> false
e$B$H!“e(BRegexp#source e$B$He(B Regexp#new e$B$GLa$i$J$$E@$,$”$j$^$9!#e(B

e$BLa$9$?$a$K$Oe(B options e$B$b;XDj$7$F$/$@$5$$!#e(B

r = /a/u
r2 = Regexp.new(r.source, r.options)
p r2.fixed_encoding? #=> true

options e$B$Oe(B fixed_encoding e$B$@$1$G$J$/!"e(B//i e$B$J$IB>$Ne(B
option e$B$re(B
e$BJ]B8$9$k$?$a$K$bI,MW$G$9!#e(B

e$B$=$7$F!"$3$l$GLa$k$3$H$+$i$o$+$k$h$&$K!"G$0U$N%(%s%3!<%G%#%se(B
e$B%0$Ge(B fixed_encoding e$B$K$9$k$3$H$b<B$O2DG=$G$9!#e(B

p Regexp.new(“a”, 16).fixed_encoding? #=> true

16 e$B$H$$$&$N$rD>@=q$/$H$$$&$N$OJQ$G$O$"$j$^$9$,!"e(B
Regexp::FIXEDENCODING e$B$H$$$&Dj?t$r:n$k$3$H$O4JC1$G$9!#e(B

/a/u e$B$de(B /\w/u e$B$rHse(B ASCII e$B$JJ8;z$r4^$`>l9g$J$N$G$3$l$i$K$D$$$F9M$($k$H!"e(B

e$B$9$$$^$;$s!#J8>O$,$h$/$o$+$j$^$;$s!#e(B

e$B$7$+$7!“$=$N$h$&$J5!G=$OI,MW$G$7$g$&$+!#e(B
e$B$o$?$7$OI,MW$J$$$H;W$$$^$9$7!”$b$7I,MW$J$i$PB>$N%(%s%3!<%G%#%s%0$K$bDs6!$9$k$Y$-$G$9!#e(B

e$B8=:_$bDs6!$7$F$$$J$$$o$1$G$O$"$j$^$;$s!#$^$H$b$KDs6!$9$k$N$be(B
e$B4JC1$G$9!#e(B

e$B$^$?!"e(B//i e$B$K$D$$$F$O$I$&$G$7$g$&$+!#e(B

/fi/ui e$B$Oe(B “\uFB01” e$B$H%^%C%A$7$^$9!#e(B
e$B$H$$$&$N$Oe(B U+FB01 e$B$Oe(B fi e$B$N9g;z$@$+$i$G$9$,!"e(B
/fi/ei e$B$He(B EUC-JP e$B$rL@<($7$?$H$-$K$be(B “\uFB01”
e$B$K%^%C%A$9$Y$-e(B
e$B$J$s$G$7$g$&$+!#e(B

e$B$=$7$F!"$9$G$Ke(B 1.8 e$BMQ$K=q$+$l$?e(B /a/u e$BEy$b$o$6$o$6e(B UTF-8 e$BEy$K8BDj$9$k0U?^$Oe(B
e$B$J$+$C$?$N$G$O$J$$$+$H;W$$$^$9!#e(B

e$B$=$&$O;W$$$^$;$s!#e(B

e$B8=>u!"$G$-$4$3$m$Ge(B /u e$B$rIU$1$F$7$^$C$?@55,I=8=%j%F%i%k$+$i!"e(B
/u e$B$r:o$k$H$$$&ITLS$J:n6H$,9T$o$l$F$$$^$9$,!"K\Ev$K$=$l$OI,MW$J$s$G$7$g$&$+!#e(B

e$B$3$N:n6H$C$F$I$3$G5/$-$F$k$s$G$9$+e(B?

//n e$B$K$D$$$F$O$?$7$+$K$"$$$^$$$J;H$$$+$?$,B?$+$C$?$H;W$$$^$9!#e(B
e$B$@$+$i!“7Y9p$K<e$a$?$N$G$9$,!”$=$l0J30$OL@3N$@$C$?$h$&$K46$8e(B
e$B$F$$$^$9!#e(B

e$B@.@%$G$9!#e(B

Tanaka A. wrote:

e$B$=$7$F!"$3$l$GLa$k$3$H$+$i$o$+$k$h$&$K!"G$0U$N%(%s%3!<%G%#%se(B
e$B%0$Ge(B fixed_encoding e$B$K$9$k$3$H$b<B$O2DG=$G$9!#e(B

16 e$B$H$$$&$N$rD>@=q$/$H$$$&$N$OJQ$G$O$"$j$^$9$,!"e(B
Regexp::FIXEDENCODING e$B$H$$$&Dj?t$r:n$k$3$H$O4JC1$G$9!#e(B

e$B$U$`!"$J$k$[$I!#e(B
e$BJQ$+$I$&$+$h$j$b$3$l$,e(B Ruby 1.9
e$B$N;EMM$N0lIt$J$N$+$,5$$K$J$j$^$9$,!"e(B
e$B$$$D$G$b<BAu2DG=$J$3$H$O$o$+$j$^$7$?!#e(B
e$BI,MW$J$i$PDj?t$r:n$C$?J}$,$$$$$N$G$7$g$&$M!#e(B

e$B$^$?!"e(B//i e$B$K$D$$$F$O$I$&$G$7$g$&$+!#e(B

/fi/ui e$B$Oe(B “\uFB01” e$B$H%^%C%A$7$^$9!#e(B
e$B$H$$$&$N$Oe(B U+FB01 e$B$Oe(B fi e$B$N9g;z$@$+$i$G$9$,!"e(B
/fi/ei e$B$He(B EUC-JP e$B$rL@<($7$?$H$-$K$be(B “\uFB01” e$B$K%^%C%A$9$Y$-e(B
e$B$J$s$G$7$g$&$+!#e(B

e$B$&!<$s!"$=$l$C$F%l%$%d!<$,0c$&$h$&$J5$$,$7$^$9!#e(B

e$BNc$($P!"e(B
/ss/ui =~ “\u00df”.encode(“iso-8859-1”)
e$B$O%^%C%A$7$F$b$$$$$H;W$$$^$9$,!"e(B

Unicode e$B$Ne(B U+0000 e$B$+$ie(B U+00FF e$B$^$G$Oe(B ISO-8859-1

e$B$H0lCW$9$k$O$:$@$7e(B
ignorecase e$B$N5sF0$K4X$7$F$O%(%s%3!<%G%#%s%0$H$Oe(B
e$BJL$N%l%$%d!<$G=hM}$9$k$Y$-$+$H46$8$^$9!#e(B

e$B$d$k$+$OJL$H$7$F!"e(BRegexp::IGNORECASE_COMBINE e$B$r:n$j$D$D!"e(B

e$B%(%s%3!<%G%#%s%0$4$H$K%G%U%)%k%H$rJQ$($k$H$+e(B

e$B$A$g$C$H$:$l$^$9$,!"e(B
/\s/ =~ “\u3000” #=> 0
/\s/e =~ “\u3000”.encode(“euc-jp”) #=> nil
e$B$H$+!#e(B

e$B$=$7$F!"$9$G$Ke(B 1.8 e$BMQ$K=q$+$l$?e(B /a/u e$BEy$b$o$6$o$6e(B UTF-8 e$BEy$K8BDj$9$k0U?^$Oe(B
e$B$J$+$C$?$N$G$O$J$$$+$H;W$$$^$9!#e(B

e$B$=$&$O;W$$$^$;$s!#e(B

e$BNc$($P0J2<$N$h$&$K$J$k$o$1$G!"$"$^$j6/$$0UL#$r;}$?$;$k$N$O$I$&$J$s$G$9$+$M$’!#e(B

% ruby_1_8 -Ku -e’p /a/s =~ “a\xE3\x81\x82”’
0
% ruby_1_9_1 -Ku -e’p /a/s =~ “a\xE3\x81\x82”’
-e:1:in `’: incompatible encoding regexp match (Windows-31J regexp
with UTF-8 string) (Encoding::CompatibilityError)

e$B8=>u!"$G$-$4$3$m$Ge(B /u e$B$rIU$1$F$7$^$C$?@55,I=8=%j%F%i%k$+$i!"e(B
/u e$B$r:o$k$H$$$&ITLS$J:n6H$,9T$o$l$F$$$^$9$,!"K\Ev$K$=$l$OI,MW$J$s$G$7$g$&$+!#e(B

e$B$3$N:n6H$C$F$I$3$G5/$-$F$k$s$G$9$+e(B?

//n e$B$K$D$$$F$O$?$7$+$K$"$$$^$$$J;H$$$+$?$,B?$+$C$?$H;W$$$^$9!#e(B
e$B$@$+$i!“7Y9p$K<e$a$?$N$G$9$,!”$=$l0J30$OL@3N$@$C$?$h$&$K46$8e(B
e$B$F$$$^$9!#e(B

e$B!VL@3N$@$C$?!W;H$$J}$H$O!"Nc$($Pe(B

/\xB9\xA5/ =~ “\xA5\xB9\xA5\xC8”
=> 1
/#{"\xB9\xA5"}/e =~ “\xA5\xB9\xA5\xC8”
=> nil
e$B$H$+e(B
/#{"\\"}/s =~ “\x95\x5C”
=> nil
/#{"\\"}/ =~ “\x95\x5C”
=> 1
e$B$G$7$g$&$+!#e(B
e$B$I$A$i$b%P%$%H9=B$$K5/0x$9$k8m%^%C%A$rKI$0$?$a$N$b$N$K8+$($^$9!#e(B
e$B$3$N<o$N%P%$%H9=B$$K5/0x$9$k8m%^%C%A$O!“e(BRuby 1.9
e$B$G$O$o$6$o$6e(B
fixed_encoding
e$B$rIU$1$J$/$F$b2sHr$G$-$k$N$G!”$3$l$NKI;_$G$OITMW$K;W$($^$9!#e(B

e$BB>$K2?$+%^%C%ABP>]$N%(%s%3!<%G%#%s%0$r9J$j$?$$$h$&$JMxMQNc$C$F$"$j$^$7$?$C$1!#e(B

In article [email protected],
“NARUSE, Yui” [email protected] writes:

e$BI,MW$J$i$PDj?t$r:n$C$?J}$,$$$$$N$G$7$g$&$M!#e(B

e$B$b$&:n$C$F$7$^$$$^$7$?!#e(B

e$B$&!<$s!"$=$l$C$F%l%$%d!<$,0c$&$h$&$J5$$,$7$^$9!#e(B

e$BNc$($P!"e(B
/ss/ui =~ “\u00df”.encode(“iso-8859-1”)
e$B$O%^%C%A$7$F$b$$$$$H;W$$$^$9$,!"e(B

Unicode e$B$Ne(B U+0000 e$B$+$ie(B U+00FF e$B$^$G$Oe(B ISO-8859-1 e$B$H0lCW$9$k$O$:$@$7e(B

ignorecase e$B$N5sF0$K4X$7$F$O%(%s%3!<%G%#%s%0$H$Oe(B
e$BJL$N%l%$%d!<$G=hM}$9$k$Y$-$+$H46$8$^$9!#e(B

e$B$d$k$+$OJL$H$7$F!"e(BRegexp::IGNORECASE_COMBINE e$B$r:n$j$D$D!"e(B

e$B%(%s%3!<%G%#%s%0$4$H$K%G%U%)%k%H$rJQ$($k$H$+e(B

e$B%l%$%d$,0c$&$H$$$o$l$F$be(B oniguruma e$B$O%(%s%3!<%G%#%s%0$N$H$3e(B
e$B$m$G$d$C$F$^$9$7!#e(B

e$B$A$g$C$H$:$l$^$9$,!"e(B
/\s/ =~ “\u3000” #=> 0
/\s/e =~ “\u3000”.encode(“euc-jp”) #=> nil
e$B$H$+!#e(B

e$B$3$l$b%(%s%3!<%G%#%s%0$r5$$K$7$J$$$H$$$1$J$$Nc$G$9$M!#e(B

e$B$3$&$$$&Nc$b4^$a$F!“@55,I=8=$N5!G=$K$O%(%s%3!<%G%#%s%0$r5$$Ke(B
e$B$9$kI,MW$,$”$k$b$N$,$"$j!"5$$K$9$k@55,I=8=$r=q$$$?$H$-$O=q$$e(B
e$B$?;~E@$G8GDj$7$F$7$^$&$N$,E,@Z$@$H;W$C$F$$$^$9!#e(B

e$B5$$K$7$J$$$H$-$K$Oe(B //e e$B$8$c$J$/$Fe(B //
e$B$H=q$1$P$$$$$s$8$c$J$$e(B
e$B$G$7$g$&$+!#e(B

e$BNc$($P0J2<$N$h$&$K$J$k$o$1$G!“$”$^$j6/$$0UL#$r;}$?$;$k$N$O$I$&$J$s$G$9$+$M$'!#e(B

% ruby_1_8 -Ku -e’p /a/s =~ “a\xE3\x81\x82”’
0
% ruby_1_9_1 -Ku -e’p /a/s =~ “a\xE3\x81\x82”’
-e:1:in `': incompatible encoding regexp match (Windows-31J regexp with UTF-8 string) (Encoding::CompatibilityError)

/a/ e$B$J$i$$$$$+$b$7$l$^$;$s$,!"e(B/fi/i e$B$H$+e(B /ss/i
e$B$O0U?^$7$F$$e(B
e$B$J$$F0:n$K$J$k$+$b$7$l$J$$$7$J$!!#e(B

e$B!VL@3N$@$C$?!W;H$$J}$H$O!"Nc$($Pe(B

e$B;d$,46$8$F$$$k$N$O!"e(B//e e$B$Oe(B EUC-JP
e$B$rA[Dj$7$F$$$k$HM}2r$7$FLde(B
e$BBj$J$5$=$&$@$H$$$&$3$H$G$9!#e(B

e$B$I$A$i$b%P%$%H9=B$$K5/0x$9$k8m%^%C%A$rKI$0$?$a$N$b$N$K8+$($^$9!#e(B
e$B$3$N<o$N%P%$%H9=B$$K5/0x$9$k8m%^%C%A$O!“e(BRuby 1.9 e$B$G$O$o$6$o$6e(B
fixed_encoding e$B$rIU$1$J$/$F$b2sHr$G$-$k$N$G!”$3$l$NKI;_$G$OITMW$K;W$($^$9!#e(B

1.9 e$B$G$OJ8;zNs$,J8;z6-3&$rCN$C$F$$$k$H$$$&$N$O$=$&$G$9$M!#e(B

1.8 e$B$Ne(B /\xB9\xA5/e e$B$O$I$&$J$s$G$9$+$M$'!#>e$NNc$G$Oe(B #{}
e$B$GHre(B
e$B$1$F$^$9$,!#e(B1.9 e$B$G$=$&$7$m$C$F$$$o$l$F$b$G$-$^$;$s$1$l$I!#e(B

e$BB>$K2?$+%^%C%ABP>]$N%(%s%3!<%G%#%s%0$r9J$j$?$$$h$&$JMxMQNc$C$F$"$j$^$7$?$C$1!#e(B

oniguruma e$B$K$O%(%s%3!<%G%#%s%0$r5$$K$9$kI,MW$,$“$k5!G=$,$”$je(B
e$B$^$9$+$i!#e(B