Tests for coderange information

e$B@.@%$G$9!#e(B

Ruby e$B%l%$%d!<$+$ie(B string e$B$Ne(B CODERANGE
e$B>pJs$,<hF@$7$?$$$G$9!#e(B

UTF-8 e$BJ8;z%+%&%s%H$N9bB.2=$rF~$l$F5$IU$$$?$N$G$9$,!"e(Br15519
e$B$^$G$Oe(B str1 +
str2 e$B$de(B str * 3 e$BEy$Ge(B CODERANGE
e$B$N>pJs$,>C<:$7$F$$$^$7$?!#$3$l$O=$@5$7$?e(B
e$B$N$G$$$$$N$G$9$,!"8=:_$OD>$7$F$b%F%9%H$,$J$$$N$G$-$A$s$HD>$C$F$$$k$+8e!9e(B
e$B3NG’$G$-$^$;$s!#e(B

e$B0l0F$H$7$F$O!"e(B
— string.c (revision 15519)
+++ string.c (working copy)
@@ -6009,6 +6009,33 @@ rb_str_is_ascii_only_p(VALUE str)
return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
}

+/*

    • vendor-specific method for testing CRuby coderange
  • */
    +static VALUE
    +rb_str_coderange(VALUE self)
    +{
  • VALUE val;
  • switch (ENC_CODERANGE(self))
  • {
  • case ENC_CODERANGE_UNKNOWN:
  • val = LONG2FIX(0);
  • break;
  • case ENC_CODERANGE_7BIT:
  • val = LONG2FIX(1);
  • break;
  • case ENC_CODERANGE_VALID:
  • val = LONG2FIX(2);
  • break;
  • case ENC_CODERANGE_BROKEN:
  • val = LONG2FIX(3);
  • break;
  • default:
  • rb_raise(rb_eRuntimeError, “invalid coderange value”);
  • }
  • return val;
    +}

/**********************************************************************

  • Document-class: Symbol

@@ -6424,6 +6451,7 @@ Init_String(void)
rb_define_method(rb_cString, “force_encoding”,
rb_str_force_encoding, 1);
rb_define_method(rb_cString, “valid_encoding?”,
rb_str_valid_encoding_p, 0);
rb_define_method(rb_cString, “ascii_only?”, rb_str_is_ascii_only_p,
0);

  • rb_define_method(rb_cString, “coderange”, rb_str_coderange, 0);

    id_to_s = rb_intern(“to_s”);

e$B$3$s$J46$8$G!"0J2<$N$h$&$J%F%9%H$r=q$/$3$H$,$G$-$^$9!#e(B

#! ruby19

coding: utf-8

require ‘test/unit’

class TestCoderange < Test::Unit::TestCase
CODERANGE_UNKNOWN = 0
CODERANGE_7BIT = 1
CODERANGE_VALID = 2
CODERANGE_BROKEN = 3

def setup
@str1 = “e$B$$$m$Oe(B”
@str1v = @str1.dup
@str1v.valid_encoding?
@str2 = “e$B%”%$%&e(B"
@str2v = @str2.dup
@str2v.valid_encoding?
@str3 = “ABC”
@str3v = “ABC”
@str3v.valid_encoding?
end

def test_literal
assert_equal(CODERANGE_UNKNOWN, @str1.coderange)
assert_equal(CODERANGE_VALID, @str1v.coderange)
assert_equal(CODERANGE_UNKNOWN, @str3.coderange)
assert_equal(CODERANGE_7BIT, @str3v.coderange)
end

def test_plus_valid
assert_equal(CODERANGE_UNKNOWN, (@removed_email_address@domain.invalid).coderange)
assert_equal(CODERANGE_UNKNOWN, (@str1v [email protected]).coderange)
assert_equal(CODERANGE_UNKNOWN, (@str1.dup+@str2v ).coderange)
assert_equal(CODERANGE_VALID, (@str1v +@str2v ).coderange)
assert_equal(CODERANGE_VALID, (@removed_email_address@domain.invalid).coderange)
assert_equal(CODERANGE_VALID, (@str1v [email protected]).coderange)
assert_equal(CODERANGE_VALID, (@str1.dup+@str3v ).coderange)
assert_equal(CODERANGE_VALID, (@str1v +@str3v ).coderange)
end

def test_plus_7bit
assert_equal(CODERANGE_UNKNOWN, (@removed_email_address@domain.invalid).coderange)
assert_equal(CODERANGE_UNKNOWN, (@str3v [email protected]).coderange)
assert_equal(CODERANGE_UNKNOWN, (@str3.dup+@str3v ).coderange)
assert_equal(CODERANGE_7BIT, (@str3v +@str3v ).coderange)
end
end

In article [email protected],
“NARUSE, Yui” [email protected] writes:

UTF-8 e$BJ8;z%+%&%s%H$N9bB.2=$rF~$l$F5$IU$$$?$N$G$9$,!"e(Br15519 e$B$^$G$Oe(B str1 +
str2 e$B$de(B str * 3 e$BEy$Ge(B CODERANGE e$B$N>pJs$,>C<:$7$F$$$^$7$?!#$3$l$O=$@5$7$?e(B
e$B$N$G$$$$$N$G$9$,!"8=:_$OD>$7$F$b%F%9%H$,$J$$$N$G$-$A$s$HD>$C$F$$$k$+8e!9e(B

e$B$=$N=$@5$O4V0c$C$F$$$k$N$G$O$J$$$G$7$g$&$+!#e(B

e$B0J2<$N$h$&$K!“e(B”\xa1\xa1" e$B$H$$$&@5$7$$e(B EUC-JP
e$BJ8;zNs$,@8@.$5e(B
e$B$l$?$H$-$G$be(B valid_encoding? e$B$,??$K$J$i$J$$$h$&$K$J$C$F$$$^e(B
e$B$9!#e(B

% ./ruby -ve ’
s = “\xa1”.force_encoding(“euc-jp”)
p s.valid_encoding?
p((s + s).valid_encoding?)’
ruby 1.9.0 (2008-02-17 revision 15520) [i686-linux]
false
false

% ./ruby -ve ’
s = “\xa1”.force_encoding(“euc-jp”)
p s.valid_encoding?
p((s*2).valid_encoding?)’
ruby 1.9.0 (2008-02-17 revision 15520) [i686-linux]
false
false

e$B@.@%$G$9!#e(B

Tanaka A. wrote:

e$B$l$?$H$-$G$be(B valid_encoding? e$B$,??$K$J$i$J$$$h$&$K$J$C$F$$$^e(B
e$B$9!#e(B

e$B$"!<!"e(BBROKEN + ? e$B$Oe(B UNKNOWN
e$B$J$N$G$9$M!#D>$7$F$*$-$^$9!#e(B