UTF_16LE.dummy? returns false

e$B$3$s$K$A$Oe(B sheepman e$B$G$9!#e(B

Encoding::UTF_16LE.dummy? e$B$,e(B false
e$B$rJV$9$N$O$3$&$$$&$b$N$J$s$G$7$g$&$+!#e(B

$ ruby-1.9 -ve ‘p Encoding::UTF_16LE.dummy?’
ruby 1.9.0 (2008-02-24 revision 15239) [i686-linux]
false

e$B%@%_!<$8$c$J$$3d$K$OJ8;[email protected],I=8=$N%^%C%A%s%0$G%(%i!<$K$J$j$^$9!#e(B

$ ruby-1.9 -ve ‘p “a” + “a”.encode(“UTF-16LE”)’
ruby 1.9.0 (2008-02-24 revision 15239) [i686-linux]
-e:1:in `’: character encodings differ: US-ASCII and UTF-16LE
(ArgumentError)

$ ruby-1.9 -ve ‘/./ =~ “a”.encode(“UTF-16LE”)’
ruby 1.9.0 (2008-02-24 revision 15239) [i686-linux]
-e:1:in `’: incompatible encoding regexp match (US-ASCII regexp
with UTF-16LE string) (ArgumentError)

[email protected]@%$G$9!#e(B

sheepman wrote:

e$B$3$s$K$A$Oe(B sheepman e$B$G$9!#e(B

Encoding::UTF_16LE.dummy? e$B$,e(B false e$B$rJV$9$N$O$3$&$$$&$b$N$J$s$G$7$g$&$+!#e(B

e$B$O$$!"$=$&$$$&$b$N$G$9!#e(BUTF-16 e$B$G$Oe(B dummy encoding
e$B$G$O$"$j$^$;$s!#e(B

$ ruby-1.9 -ve ‘/./ =~ “a”.encode(“UTF-16LE”)’
ruby 1.9.0 (2008-02-24 revision 15239) [i686-linux]
-e:1:in `’: incompatible encoding regexp match (US-ASCII regexp with UTF-16LE string) (ArgumentError)

e$B%(%i!<$K=P$F$$$^$9DL$j!"J8;zNs$N7k9g!&@55,I=8=$N%^%C%A%s%0$G%(%i!<$K$J$Ce(B
e$B$F$$$k$N$G$O$J$/!“[email protected]$,e(B US-ASCII
e$B$G$”$k$?$a$K%(%i!<$,=P$F$$$^$9!#$3$le(B
e$B$O$J$<$+$H$$$&$H!“e(BUTF-8 e$BEyB>$N$[$H$s$I$Ne(B encoding e$B$Oe(B
ascii compatible e$B$Ge(B
e$B$”$k$N$KBP$7!"e(BUTF-16LE
e$B$O$=$&$G$O$J$$$+$i$G$9!#$=$N$?$a!"e(BUS-ASCII e$B$H$N7ke(B
e$B9g!&@55,I=8=%^%C%A%s%0$G%(%s%3!<%G%#%s%0$,0c$&$H%(%i!<$,=P$^$9!#e(B

s1 = “\x30\x42”.force_encoding(“UTF-16BE”)
s2 = “\x30\x44”.force_encoding(“UTF-16BE”)
p s1 + s2 #=> “\x30\x42\x30\x44”

[email protected],I=8=$be(B Regexp.new e$B$Ke(B UTF-16BE
e$BJ8;zNs$rEO$7$F:n$l$P!&!&!&!“e(B
r1 = Regexp.new(”\x30\x42".force_encoding(“UTF-16BE”))
ArgumentError: character encodings differ: US-ASCII and UTF-16BE
e$B$"$l!)$H$$$&$o$1$G!"$3$l$OD>$7$^$9$,!"e(BRegexp e$B%*%V%8%’%/%H$be(B
UTF-16LE e$B$Ke(B
e$B$9$l$P%^%C%A$9$k$O$:$G$9!#e(B

e$B$^$H$a$k$H!"!Ve(BUTF-16LE e$B$Oe(B dummy encoding
e$B$G$O$J$$$,!“e(Bascii compatible e$B$Ge(B
e$B$J$$$N$G!”?’!9ITJX$J$H$3$m$,$"$k!#e(B([email protected]$+$i!"e(BUTF-8
e$B$KJQ49$7$F07$$$^$7$ge(B
e$B$&e(B)e$B!W$H$$$&$3$H$K$J$j$^$9!#e(B

e$B$J$!"$3$l$i$r$b$C$H5$$rMx$+$;$F$/$l$k$h$&$K$9$k$D$b$j$O$J$$$N$+$H$$$&<Ae(B
e$BLd$,9M$([email protected]$KEz$($F$
$-$^$9$H!"!V$o$6$o$6JXMx$K$9$k$D$b$j$O$J$$e(B
e$B$,!"%Q%C%A$rAw$C$F$/$l$l$P<h$j9~$`!W$H$$$&9g0U$K$J$C$F$$$?$H5-21$7$F$$$^$9!#e(B

[email protected]@%$G$9!#e(B

NARUSE, Yui wrote:

e$B%(%i!<$K=P$F$$$^$9DL$j!"J8;zNs$N7k9g!&@55,I=8=$N%^%C%A%s%0$G%(%i!<$K$J$Ce(B
e$B$F$$$k$N$G$O$J$/!“[email protected]$,e(B US-ASCII e$B$G$”$k$?$a$K%(%i!<$,=P$F$$$^$9!#$3$le(B
e$B$O$J$<$+$H$$$&$H!“e(BUTF-8 e$BEyB>$N$[$H$s$I$Ne(B encoding e$B$Oe(B ascii compatible e$B$Ge(B
e$B$”$k$N$KBP$7!"e(BUTF-16LE e$B$O$=$&$G$O$J$$$+$i$G$9!#$=$N$?$a!"e(BUS-ASCII e$B$H$N7ke(B
e$B9g!&@55,I=8=%^%C%A%s%0$G%(%s%3!<%G%#%s%0$,0c$&$H%(%i!<$,=P$^$9!#e(B

e$B$=$&$$$($P!"$"$ke(B encoding e$B$,e(B ASCII-compatible
e$B$+$C$F>pJs$Oe(B Ruby e$B$+$i$O8+e(B
e$B$l$J$$$N$G$9$,!"8+$l$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#e(B

— encoding.c (revision 15591)
+++ encoding.c (working copy)
@@ -372,6 +372,24 @@ enc_dummy_p(VALUE enc)
return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
}

+/*

    • call-seq:
    • enc.ascii_compat? => true or false
    • Returns true for ASCII-compatible encoding.
    • ASCII-incompatible encoding is a encoding of which strings are not
      concat
    • US-ASCII strings and match with US-ASCII regexps.
    • Encoding::UTF-16LE.ascii_compat? #=> true
    • Encoding::UTF_8.ascii_compat? #=> false
  • */
    +static VALUE
    +enc_asciicompat_p(VALUE enc)
    +{
  • return rb_enc_asciicompat(rb_to_encoding(enc)) ? Qtrue : Qfalse;
    +}

static int
enc_alias(const char *alias, int idx)
{
@@ -1212,6 +1230,7 @@ Init_Encoding(void)
rb_define_method(rb_cEncoding, “name”, enc_name, 0);
rb_define_method(rb_cEncoding, “base_encoding”, enc_base_encoding,
0);
rb_define_method(rb_cEncoding, “dummy?”, enc_dummy_p, 0);

  • rb_define_method(rb_cEncoding, “ascii_compat?”, enc_asciicompat_p,
    0);
    rb_define_singleton_method(rb_cEncoding, “list”, enc_list, 0);
    rb_define_singleton_method(rb_cEncoding, “name_list”,
    rb_enc_name_list, 0);
    rb_define_singleton_method(rb_cEncoding, “aliases”, rb_enc_aliases,
    0);

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:33917] Re: UTF_16LE.dummy? returns false”
on Feb.24,2008 13:20:45, [email protected] wrote:
| e$B$^$H$a$k$H!"!Ve(BUTF-16LE e$B$Oe(B dummy encoding e$B$G$O$J$$$,!“e(Bascii compatible e$B$Ge(B
| e$B$J$$$N$G!”?’!9ITJX$J$H$3$m$,$"$k!#e(B([email protected]$+$i!"e(BUTF-8 e$B$KJQ49$7$F07$$$^$7$ge(B
| e$B$&e(B)e$B!W$H$$$&$3$H$K$J$j$^$9!#e(B

e$B$3$NOC$r8+$F$$$F5$IU$$$?$s$G$9$,!"e(B

C:>ruby -ve “p Encoding.compatible?(Encoding::UTF_16LE,
Encoding::US_ASCII)”
ruby 1.9.0 (2008-02-24 revision 15591) [i386-mswin32]
#Encoding:ASCII-8BIT

e$B$H$J$j$^$9!#e(B
[email protected]$7$$$s$G$7$g$&$+e(B?

e$B$=$l$G$O!#e(B

e$B$3$s$K$A$O!"$J$+$`$ie(B(e$B$&e(B)e$B$G$9!#e(B

In message “[ruby-dev:33920] Re: UTF_16LE.dummy? returns false”
on Feb.24,2008 13:50:49, [email protected] wrote:
| e$B0l1~:#$N$H$3$m;EMM$I$$j$J5$$,$7$^$9!#e(BEncoding.compatible? e$B$N0z?t$Oe(B
| string e$BEy$Ne(B encoding e$B$r;}$C$?%
%V%8%’%/%H$G$9$N$G!#$=$N$?$ae(B encoding e$B%*e(B
| e$B%V%8%’%/%H$=$l<+BN$Oe(B encoding e$B$r;}$A$^$;$s$+$i!"e(BASCII-8BIT e$B$K$J$j$^$9!#e(B

e$B$"$"!"$J$k$[$I!#8m2r$7$F$^$7$?!#e(B

| > Encoding.compatible?(Encoding::US_ASCII, Encoding::US_ASCII)
| => #Encoding:ASCII-8BIT
|
| Encoding.compatible? e$B$,e(B encoding e$B%*%V%8%’%/%H$b0z?t$K<h$k$Y$-$H$$$&<gD%e(B
| e$B$O$"$j$+$b$7$l$^$;$s!#e(B

encodinge$B%%V%8%’%/%H$OFCNc$H$7$FDL$7$FM_$7$$5$$O$7$^$9$M!#e(B
e$B8=>[email protected]$H!"e(Bencodinge$B%
%V%8%’%/%H$,JL$Ne(Bencodinge$B%*%V%8%’%/%H$He(B
compatiblee$B$+$I$&$+$rCN$k$?$a$K$O$$$A$$$AJ8;zNs:n$i$J$$$H$$$1e(B
e$B$J$$$H$$$&$3$H$G$9$h$M!#e(B

e$B$=$l$G$O!#e(B

[email protected]@%$G$9!#e(B

U.Nakamura wrote:

C:>ruby -ve “p Encoding.compatible?(Encoding::UTF_16LE, Encoding::US_ASCII)”
ruby 1.9.0 (2008-02-24 revision 15591) [i386-mswin32]
#Encoding:ASCII-8BIT

e$B$H$J$j$^$9!#e(B
[email protected]$7$$$s$G$7$g$&$+e(B?

e$B0l1~:#$N$H$3$m;EMM$I$$j$J5$$,$7$^$9!#e(BEncoding.compatible?
e$B$N0z?t$Oe(B
string e$BEy$Ne(B encoding e$B$r;}$C$?%
%V%8%’%/%H$G$9$N$G!#$=$N$?$ae(B
encoding e$B%*e(B
e$B%V%8%’%/%H$=$l<+BN$Oe(B encoding e$B$r;}$A$^$;$s$+$i!"e(BASCII-8BIT
e$B$K$J$j$^$9!#e(B

Encoding.compatible?(Encoding::US_ASCII, Encoding::US_ASCII)
=> #Encoding:ASCII-8BIT

Encoding.compatible? e$B$,e(B encoding
e$B%*%V%8%’%/%H$b0z?t$K<h$k$Y$-$H$$$&<gD%e(B
e$B$O$"$j$+$b$7$l$^$;$s!#e(B

On Sun, 24 Feb 2008 13:20:45 +0900
“NARUSE, Yui” [email protected] wrote:

e$B$^$H$a$k$H!"!Ve(BUTF-16LE e$B$Oe(B dummy encoding e$B$G$O$J$$$,!“e(Bascii compatible e$B$Ge(B
e$B$J$$$N$G!”?’!9ITJX$J$H$3$m$,$"$k!#e(B([email protected]$+$i!"e(BUTF-8 e$B$KJQ49$7$F07$$$^$7$ge(B
e$B$&e(B)e$B!W$H$$$&$3$H$K$J$j$^$9!#e(B

dummy encoding e$B$NDj5A$r65$($F$/[email protected]$5$$!#e(Bdummy encoding
e$B$H$$$&$N$Oe(B
ascii compatible
e$B$G$J$$%(%s%3!<%G%#%s%[email protected]$H;W$C$F$$$?$s$G$9$,!#e(B

[email protected]@%$G$9!#e(B

sheepman wrote:

On Sun, 24 Feb 2008 13:20:45 +0900
“NARUSE, Yui” [email protected] wrote:

e$B$^$H$a$k$H!"!Ve(BUTF-16LE e$B$Oe(B dummy encoding e$B$G$O$J$$$,!“e(Bascii compatible e$B$Ge(B
e$B$J$$$N$G!”?’!9ITJX$J$H$3$m$,$"$k!#e(B([email protected]$+$i!"e(BUTF-8 e$B$KJQ49$7$F07$$$^$7$ge(B
e$B$&e(B)e$B!W$H$$$&$3$H$K$J$j$^$9!#e(B

dummy encoding e$B$NDj5A$r65$($F$/[email protected]$5$$!#e(Bdummy encoding e$B$H$$$&$N$Oe(B
ascii compatible e$B$G$J$$%(%s%3!<%G%#%s%[email protected]$H;W$C$F$$$?$s$G$9$,!#e(B

Encoding#dummy? e$B$Ne(B rdoc e$B$K$O!"e(B

  • A dummy encoding is a encoding which character handling is not
    properly implemented.
    e$B$H$"$j$^$9!#e(B

e$B$D$^$j!“e(BRubyM17N
e$B$,BP1~$7$F$$$J$$$,L>[email protected]$1CN$C$F$$$k%(%s%3!<%G%#%s%0$Ke(B
e$BM?$($i$l$k%U%i%0$G$9!#8=:_$O!“e(BISO-2022-JP e$B$He(B UTF-7
e$B$,$3$Ne(B dummy encoding
e$B$K$”$?$j$^$9!#%9%F!<%H%l%9$N>l9g$O$H$j$”$($:e(B ASCII-8BIT e$B$Ne(B
replica e$B$K$7e(B
e$B$F$7$^$($P;vB-$j$k$3$H$,$[$H$s$I$J$N$G!">-Mh$K$o$?$C$F$b!“e(Bdummy
encoding
e$B$H$Oe(B stateful encoding
e$B$N$3$H$G$”$k$H9M$($FLdBj$J$$$G$7$g$&!#e(B

[email protected]@%$G$9!#e(B

U.Nakamura wrote:

| > Encoding.compatible?(Encoding::US_ASCII, Encoding::US_ASCII)
| => #Encoding:ASCII-8BIT
|
| Encoding.compatible? e$B$,e(B encoding e$B%*%V%8%’%/%H$b0z?t$K<h$k$Y$-$H$$$&<gD%e(B
| e$B$O$"$j$+$b$7$l$^$;$s!#e(B

encodinge$B%%V%8%’%/%H$OFCNc$H$7$FDL$7$FM_$7$$5$$O$7$^$9$M!#e(B
e$B8=>[email protected]$H!"e(Bencodinge$B%
%V%8%’%/%H$,JL$Ne(Bencodinge$B%*%V%8%’%/%H$He(B
compatiblee$B$+$I$&$+$rCN$k$?$a$K$O$$$A$$$AJ8;zNs:n$i$J$$$H$$$1e(B
e$B$J$$$H$$$&$3$H$G$9$h$M!#e(B

encoding
e$B$+$i0z$/>l9g$G$9$H!"$&$+$D$K$d$k$H6uJ8;z$NNc30$K$R$C$+$+$C$Fe(B

Encoding.compatible?("".force_encoding(“iso-2022-jp”),
“”.force_encoding(“UTF-16BE”))
=> #<Encoding:ISO-2022-JP (dummy)>
e$B$J$s$F$3$H$K$J$k$N$G!"e(BEncoding.compatible?
e$B$,M_$7$$$+$b$7$l$^$;$s!#e(B

e$B$b$C$H$b!"7k6I$d$C$F$/[email protected]$($P!“e(B
if enc1 == enc2 then enc1
elsif enc1 == US-ASCII && enc2.ascii_compat? then enc2
elsif enc2 == US-ASCII && enc1.ascii_compat? then enc1
else nil
end
e$B$/$i$$$G$O$”$k$N$G$9$,!#e(B

[email protected]@%$G$9!#e(B

NARUSE, Yui wrote:

e$B$J$$$H$$$&$3$H$G$9$h$M!#e(B
elsif enc2 == US-ASCII && enc1.ascii_compat? then enc1
else nil
end
e$B$/$i$$$G$O$"$k$N$G$9$,!#e(B

e$B0J>e$N$h$&$J$3$H$r$d$k$h$&$K$9$k%Q%C%A$G$9!#e(B
— encoding.c (revision 15591)
+++ encoding.c (working copy)
@@ -661,8 +661,30 @@ rb_enc_compatible(VALUE str1, VALUE str2
idx1 = rb_enc_get_index(str1);
idx2 = rb_enc_get_index(str2);

  • if (idx1 < 0 || idx2 < 0)
  •    return 0;
    
  • if (idx1 < 0 || idx2 < 0) {
  •   int idx1 = rb_to_encoding_index(str1);
    
  •   int idx2 = rb_to_encoding_index(str1);
    
  •   if (idx1 < 0 || idx2 < 0) {
    
  •       return 0;
    
  •   }
    
  •   else {
    
  •       /* str1 and str2 are encoding object */
    
  •       enc1 = rb_enc_from_index(idx1);
    
  •       enc2 = rb_enc_from_index(idx2);
    
  •       if (idx1 == idx2 ) {
    
  •           return rb_enc_from_index(idx1);
    
  •       }
    
  •       else if (idx1 == rb_usascii_encindex() &&
    

rb_enc_asciicompat(enc2)) {

  •           return enc2;
    
  •       }
    
  •       else if (idx2 == rb_usascii_encindex() &&
    

rb_enc_asciicompat(enc1)) {

  •           return enc1;
    
  •       }
    
  •       else {
    
  •           return 0;
    
  •       }
    
  •   }
    
  • }

    if (idx1 == idx2) {
    return rb_enc_from_index(idx1);