Array#slice! may be too slow and allocate memory too much

e$B1sF#$H?=$7$^$9!#e(B

e$B0J2<$N%W%m%0%i%`$,$d$?$iCY$$$G$9!#$^$?!"%a%b%j$rBgNL$K>CHq$7$^$9!#e(B

$ time ./ruby -ve ’

a = [0] * 10000
(1 … 10000).map { a.slice!(0, 1) }

ruby 1.9.0 (2008-03-07 revision 15720) [i686-linux]

real 0m10.940s
user 0m4.200s
sys 0m6.710s

e$BG[Ns$N6&M-$,860x$N$h$&$G$9!#e(Ba.slice!(i, n) e$B$Oe(B Ruby
e$B$G=q$/$He(B

t = a[i, n]
a[i, n] = []
return t

e$B$H$$$&Iw$K<BAu$5$l$F$$$F!"e(B

  1. t = a[i, n] e$B$,e(B a e$B$He(B t e$B$r6&M->uBV$K$9$ke(B
  2. a[i, n] = [] e$B$,e(B a e$B$re(B rb_ary_modify e$B$9$ke(B
  3. rb_ary_modify e$B$NCf$Ge(B a e$BA4BN$re(B MEMCPY e$B$9$ke(B

e$B$H$$$&Iw$K<B9T$5$l$^$9!#$3$l$K$h$C$F!"e(B

  • 3 e$B$Ne(B MEMCPY e$B$,Kh2se(B O(a.size) e$B$^$k$^$k$+$+$C$FCY$$e(B
  • t e$B$,e(B a.size e$B%P%$%H$rL5BL$K3NJ]$7$?$^$^$K$J$je(B (n
    e$B$G==J,e(B) e$B!"%a%b%j$r05Gw$9$ke(B

e$B$H$$$&$h$&$K$J$C$F$$$^$9!#e(B

e$BK<AE*$J2r7h$G$O$J$$$G$9$,!"e(Bslice! e$B$KFC2=$7$?e(B 2
e$B$D$N:GE,2=$r$7$F$_$^$7$?!#e(B

  • rb_ary_splice e$B$Ge(B rb_ary_modify e$B$9$k$H$-!"I,[email protected]$1e(B
    MEMCPY e$B$9$ke(B
  • a.slice!(i, n) e$B$Ge(B n e$B$,e(B a.size / 2 e$B$h$j>.$5$$>l9g$Oe(B t
    e$B$r6&M-$;$:$K:n$ke(B

e$B%Y%s%A%^!<%/$G$9!#C10L$OIC!#e(B

±---------±------------±----------------------------------------------+
| e$B:GE,2=A0e(B | e$B:GE,2=8ee(B | |
| user sys | user sys | a = [0] * 10000; b = (1 … 10000).map { … } |
±---------±------------±----------------------------------------------+
| 4.2 6.7 | 0.14 0.03 | a.slice!(0, 1) |
| 0.26 6.6 | 0.050 0.030 | t, a = a, a.slice!(0, a.size - 1); t |
±---------±------------±----------------------------------------------+

e$B$J$*!"$3$N:GE,2=$rF~$l$F$b!"0J2<$N%3!<%I$G$O$d$C$Q$jCY$$$G$9!#e(B
e$B$3$l$r2?$H$+$9$k$N$O$$$m$$$m$HLLE]$=$&$G$9!#e(B

a = [0] * 10000
(1 … 10000).map { t = a[0, 1]; a[0, 1] = []; t }

Index: array.c

— array.c (revision 15726)
+++ array.c (working copy)
@@ -647,6 +647,26 @@
}

VALUE
+rb_ary_subseq_copy(VALUE ary, long beg, long len)
+{

  • VALUE klass, ary2;
  • if (beg > RARRAY_LEN(ary)) return Qnil;
  • if (beg < 0 || len < 0) return Qnil;
  • if (RARRAY_LEN(ary) < len || RARRAY_LEN(ary) < beg + len) {
  • len = RARRAY_LEN(ary) - beg;
  • }
  • klass = rb_obj_class(ary);
  • ary2 = ary_new(klass, len);
  • if (len > 0) {
  • MEMCPY(RARRAY_PTR(ary2), RARRAY_PTR(ary) + beg, VALUE, len);
  • RARRAY(ary2)->len = len;
  • }
  • return ary2;
    +}

+VALUE
rb_ary_subseq(VALUE ary, long beg, long len)
{
VALUE klass, ary2, shared;
@@ -973,8 +993,8 @@
rpl = rb_ary_to_ary(rpl);
rlen = RARRAY_LEN(rpl);
}

  • rb_ary_modify(ary);
    if (beg >= RARRAY_LEN(ary)) {
  • rb_ary_modify(ary);
    len = beg + rlen;
    if (len >= ARY_CAPA(ary)) {
    RESIZE_CAPA(ary, len);
    @@ -993,15 +1013,30 @@
    }

    alen = RARRAY_LEN(ary) + rlen - len;

  • if (alen >= ARY_CAPA(ary)) {
  •  RESIZE_CAPA(ary, alen);
    
  • rb_ary_modify_check(ary);
  • if (ARY_SHARED_P(ary)) {
  •  VALUE *ptr;
    
  •  ptr = ALLOC_N(VALUE, alen);
    
  •  FL_UNSET(ary, ELTS_SHARED);
    
  •  MEMCPY(ptr, RARRAY_PTR(ary), VALUE, beg);
    
  •  MEMCPY(ptr + beg + rlen, RARRAY_PTR(ary) + beg + len,
    
  •   VALUE, RARRAY_LEN(ary) - (beg + len));
    
  •  RARRAY(ary)->aux.capa = RARRAY(ary)->len = alen;
    
  •  RARRAY(ary)->ptr = ptr;
    
    }
  • else {
  •  if (alen >= ARY_CAPA(ary)) {
    
  • RESIZE_CAPA(ary, alen);
  •  }
    
  •  if (len != rlen) {
    
  • MEMMOVE(RARRAY_PTR(ary) + beg + rlen,
  •  RARRAY_PTR(ary) + beg + len,
    
  •  VALUE, RARRAY_LEN(ary) - (beg + len));
    
  • RARRAY(ary)->len = alen;
  •  }
    
  • }
  • if (len != rlen) {
  •  MEMMOVE(RARRAY_PTR(ary) + beg + rlen, RARRAY_PTR(ary) + beg + 
    

len,

  •    VALUE, RARRAY_LEN(ary) - (beg + len));
    
  •  RARRAY(ary)->len = alen;
    
  • }
    if (rlen > 0) {
    MEMMOVE(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen);
    }
    @@ -1790,7 +1825,12 @@
    pos = RARRAY_LEN(ary) + pos;
    if (pos < 0) return Qnil;
    }
  • arg2 = rb_ary_subseq(ary, pos, len);
  • if (len * 2 < RARRAY_LEN(ary)) {
  •  arg2 = rb_ary_subseq_copy(ary, pos, len);
    
  • }
  • else {
  •  arg2 = rb_ary_subseq(ary, pos, len);
    
  • }
    rb_ary_splice(ary, pos, len, Qundef); /* Qnil/rb_ary_new2(0) */
    return arg2;
    }

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

e$BCY$/$J$j$^$7$?!#e(B

In message “Re: [ruby-dev:34005] Array#slice! may be too slow and
allocate memory too much”
on Fri, 7 Mar 2008 21:00:53 +0900, “Yusuke ENDOH” [email protected]
writes:

|e$B0J2<$N%W%m%0%i%`$,$d$?$iCY$$$G$9!#$^$?!"%a%b%j$rBgNL$K>CHq$7$^$9!#e(B
|
|$ time ./ruby -ve ’
|> a = [0] * 10000
|> (1 … 10000).map { a.slice!(0, 1) }
|> ’
|ruby 1.9.0 (2008-03-07 revision 15720) [i686-linux]
|
|real 0m10.940s
|user 0m4.200s
|sys 0m6.710s
|
|
|e$BG[Ns$N6&M-$,860x$N$h$&$G$9!#e(B

e$B9M$($F$_$k$He(Bsplicee$B$OI,$:D>8e$KG[Ns$r=q$-49$($k$N$G6&M-$7$F$Oe(B
e$B$$$1$J$$$N$G$7$g$&!#6&M-$r9T$o$J$$e(Bsubseqe$B$r;H$&$HB.EY$,7`E*$Ke(B
e$B2~A1$7$^$7$?!#$3$l$Oe(B1.8e$B$K$bE,MQ$5$l$k$Y$-$G$7$g$&!#e(B

|e$B$3$l$r2?$H$+$9$k$N$O$$$m$$$m$HLLE]$=$&$G$9!#e(B
|
|a = [0] * 10000
|(1 … 10000).map { t = a[0, 1]; a[0, 1] = []; t }

e$B$3$l$O$3$l$G9M$($J$$$H$$$1$^$;$s$+$M$(!#e(B