SEGV on "abcd\xf0".force_encoding("utf-8").reverse

e$B0J2<$N$h$&$K$9$k$He(B SEGV e$B$7$^$9!#e(B

% ./ruby -ve ‘p “abcd\xf0”.force_encoding(“utf-8”).reverse’
ruby 1.9.0 (2007-12-04 patchlevel 0) [i686-linux]
-e:1: – control frame ----------
c:0004 p:---- s:0009 b:0009 l:000008 d:000008 CFUNC :stuck_out_tongue:
c:0003 p:0023 s:0005 b:0005 l:000004 d:000004 TOP -e:1
c:0002 p:---- s:0003 b:0003 l:000002 d:000002 FINISH :inherited
c:0001 p:---- s:0001 b:-001 l:000000 d:000000 ------

DBG> : “-e:1:in `’”
– backtrace of native function call (Use addr2line) –
0x8102232
0x812141c
0x81214bb
0x80ca351
0xb7f23440
0x8058f8d
0x805c6e5
0x805c92b
0x805cb9d
0x807e8cb
0x806fb90
0x806fbec
0x80f8361
0x80ffc66
0x80ff7da
0x80fc1a1
0x80ffe3f
0x8100484
0x805aadd
0x805ab8c
0x8057d74
0xb7d84ea8
0x8057c91

e$B$J$+$@$G$9!#e(B

At Tue, 4 Dec 2007 15:29:09 +0900,
Tanaka A. wrote in [ruby-dev:32448]:

e$B0J2<$N$h$&$K$9$k$He(B SEGV e$B$7$^$9!#e(B

% ./ruby -ve ‘p “abcd\xf0”.force_encoding(“utf-8”).reverse’
ruby 1.9.0 (2007-12-04 patchlevel 0) [i686-linux]
[BUG] Segmentation fault
ruby 1.9.0 (2007-12-04) [i686-linux]

e$B$I$&$J$k$N$,$$$$$s$G$7$g$&$M!#Nc30e(B?

Index: string.c

— string.c (revision 14096)
+++ string.c (working copy)
@@ -2709,4 +2709,5 @@ rb_str_reverse(VALUE str)
VALUE obj;
char *s, *e, *p;

  • int cr;

    if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
    @@ -2716,6 +2717,7 @@ rb_str_reverse(VALUE str)
    p = RSTRING_END(obj);

  • cr = ENC_CODERANGE(str);
    if (RSTRING_LEN(str) > 1) {

  • if (rb_enc_mbmaxlen(enc) == 1) {
  • if (rb_enc_mbmaxlen(enc) == 1 || cr == ENC_CODERANGE_7BIT) {
    while (s < e) {
    *–p = *s++;
    @@ -2726,6 +2728,16 @@ rb_str_reverse(VALUE str)
    int clen = rb_enc_mbclen(s, e, enc);
  • if (clen == 0) {
  • switch (clen) {
  •  default:
    
  •    if (s + clen <= e) break;
    

+#if !RAISE_ON_REVERSING_BROKEN_CHAR

  •    clen = e - s;
    
  •    break;
    

+#endif

  •  case 0:
       rb_raise(rb_eArgError, "invalid mbstring sequence");
    
  •  case 1:
    
  •    *--p = *s++;
    
  •    continue;
    

    }
    p -= clen;
    @@ -2738,4 +2750,5 @@ rb_str_reverse(VALUE str)
    OBJ_INFECT(obj, str);
    rb_enc_associate(obj, enc);

  • ENC_CODERANGE_SET(obj, cr);

    return obj;

In article [email protected],
Nobuyoshi N. [email protected] writes:

e$B$I$&$J$k$N$,$$$$$s$G$7$g$&$M!#Nc30e(B?

e$B;W$C$?$s$G$9$,!"e(Brb_enc_mbclen e$B$,J8;zNs=*C<$r1[$($k$h$&$JCM$Oe(B
e$BJV$5$J$$$h$&$K$9$k$N$O$I$&$G$9$+$M!#e(B

rb_enc_mbclen e$B$O%(%s%3!<%G%#%s%0$NLdBj$r$A$c$s$H8!=P$9$k$o$1e(B
e$B$G$O$J$$$N$G!"$I$&$;$J$i%(%i!<$K$O7h$7$F$J$i$J$$$b$N$H$7$FDje(B
e$B5A$7$J$*$9$H$$$&$+!#e(B

At 21:10 07/12/04, Tanaka A. wrote:

e$B5A$7$J$$9$H$$$&$+!#e(B
e$B$=$&$G$9$M!#e(BRuby
e$B%W%m%0%i%^$+$i8+$k$H!“!V$3$N4X?t$O%(%s%3!<%G%#%s%0e(B
e$B$r%A%'%C%/e(B (validate)
e$B$7$F$$$k!W!”!V$3$N4X?t$O%A%'%/$7$F$J$$!We(B
e$B$HJ,$+$k$H$"$j$,$?$$5$$,$7$^$9!#%(%s%3!<%G%#%s%0$K$h$C$F!"e(B
e$B6qBNE
$JCM$d>l=j$K$h$C$F?6$kIq$$$,0c$&$H$J$+$J$+BP1~$,Fq$7$$$H;W$$$^$9!#e(B

e$B59$7$/!"e(B Martin.

#-#-# Martin J. Du"rst, Assoc. Professor, Aoyama Gakuin University
#-#-# http://www.sw.it.aoyama.ac.jp mailto:[email protected]