Rb_str_substr is much slower than rb_str_subpat

e$B$J$+$@$G$9!#e(B

1.9e$B$G$O!"D9$$J8;zNs$KBP$9$ke(BString#[]e$B$,!"e(BFixnume$B$r;H$C$?$H$-$N$[$&e(B
e$B$,e(BRegexpe$B$r;H$C$?$H$-$h$j$b!"3JCJ$KCY$/$J$C$F$$$^$9!#e(B

#! /usr/bin/ruby -Ke
require “benchmark”
class String
def first; self[/\A./m]; end
def last; self[/.\z/m]; end
def first1; self[0]; end
def last1; self[-1]; end
end

[[“e$BF|K\8le(B”*1000, 10000], [“x”*10, 100000]].each do |str, n|
p [str.first, str.last, str.first1, str.last1]
Benchmark.bm(6) {|b|
b.report(“first”) {n.times{str.first}}
b.report("last ") {n.times{str.last}}
b.report(“first1”) {n.times{str.first1}}
b.report("last1 ") {n.times{str.last1}}
}
end

$ ruby19 -v /tmp/nobu/str.rb
ruby 1.9.0 (2007-09-21 revision 13475) [i686-linux]
[“e$BF|e(B”, “e$B8le(B”, “e$BF|e(B”, “e$B8le(B”]
user system total real
first 0.020000 0.000000 0.020000 ( 0.014829)
last 0.160000 0.000000 0.160000 ( 0.164511)
first1 0.580000 0.000000 0.580000 ( 0.579861)
last1 1.160000 0.010000 1.170000 ( 1.152732)
[“x”, “x”, “x”, “x”]
user system total real
first 0.150000 0.000000 0.150000 ( 0.154660)
last 0.210000 0.000000 0.210000 ( 0.212844)
first1 0.060000 0.000000 0.060000 ( 0.059491)
last1 0.060000 0.000000 0.060000 ( 0.058233)

e$B$I$&$be(Bstr_strlen()e$B$,%%H%k%M%C%/$C$]$$$G$9!#e(B

$ ./ruby -v /tmp/nobu/str.rb
ruby 1.9.0 (2007-09-21 revision 13478) [i686-linux]
[“e$BF|e(B”, “e$B8le(B”, “e$BF|e(B”, “e$B8le(B”]
user system total real
first 0.030000 0.000000 0.030000 ( 0.023918)
last 0.200000 0.000000 0.200000 ( 0.199748)
first1 0.010000 0.000000 0.010000 ( 0.006900)
last1 0.080000 0.000000 0.080000 ( 0.079601)
[“x”, “x”, “x”, “x”]
user system total real
first 0.160000 0.000000 0.160000 ( 0.164098)
last 0.190000 0.000000 0.190000 ( 0.184262)
first1 0.050000 0.000000 0.050000 ( 0.055136)
last1 0.060000 0.000000 0.060000 ( 0.059857)

Index: string.c

— string.c (revision 13478)
+++ string.c (working copy)
@@ -737,25 +737,39 @@ rb_str_substr(VALUE str, long beg, long
rb_encoding *enc = rb_enc_get(str);
VALUE str2;

  • int slen = str_strlen(str, enc);
  • char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);

    if (len < 0) return Qnil;

  • if (beg > slen) return Qnil;
  • if (beg < 0) {
  • beg += slen;
  • if (beg < 0) return Qnil;
  • if (!RSTRING_LEN(str)) {
  • len = 0;
    }
  • if (beg + len > slen) {
  • len = slen - beg;
  • if (beg < 0) {
  • if (len > -beg) len = -beg;
  • if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
  •  beg = -beg;
    
  •  while (len++ < beg && (e = rb_enc_prev_char(s, e, enc)) != 0);
    
  •  p = e;
    
  •  if (!p) return Qnil;
    
  •  while (beg-- > 0 && (p = rb_enc_prev_char(s, p, enc)) != 0);
    
  •  if (!p) return Qnil;
    
  •  len = e - p;
    
  •  goto sub;
    
  • }
  • else {
  •  beg += str_strlen(str, enc);
    
  •  if (beg < 0) return Qnil;
    
  • }
    }
  • if (len < 0) {
  • len = 0;
  • else if (beg > 0 && beg > str_strlen(str, enc)) {
  • return Qnil;
    }
    if (len == 0) {
  • str2 = rb_str_new5(str,0,0);
  • p = 0;
    }
    else {
  • char *p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
  • str2 = rb_str_new5(str, p, str_offset(p, RSTRING_END(str), len,
    enc));
  • p = str_nth(s, e, beg, enc);
  • len = str_offset(p, e, len, enc);
    }
  • sub:
  • str2 = rb_str_new5(str, p, len);
    rb_enc_copy(str2, str);
    OBJ_INFECT(str2, str);

e$B$J$+$@$G$9!#e(B

At Fri, 21 Sep 2007 05:37:59 +0900,
Nobuyoshi N. wrote in [ruby-dev:31806]:

1.9e$B$G$O!"D9$$J8;zNs$KBP$9$ke(BString#[]e$B$,!"e(BFixnume$B$r;H$C$?$H$-$N$[$&e(B
e$B$,e(BRegexpe$B$r;H$C$?$H$-$h$j$b!"3JCJ$KCY$/$J$C$F$$$^$9!#e(B

“-”[1,1]e$B$,e(BIndexErrore$B$K$J$C$F$7$^$C$F$$$?$N$G!">/!9=$@5!#e(B

Index: string.c

— string.c (revision 13481)
+++ string.c (working copy)
@@ -737,25 +737,41 @@ rb_str_substr(VALUE str, long beg, long
rb_encoding *enc = rb_enc_get(str);
VALUE str2;

  • int slen = str_strlen(str, enc);
  • char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);

    if (len < 0) return Qnil;

  • if (beg > slen) return Qnil;
  • if (beg < 0) {
  • beg += slen;
  • if (beg < 0) return Qnil;
  • if (!RSTRING_LEN(str)) {
  • len = 0;
    }
  • if (beg + len > slen) {
  • len = slen - beg;
  • if (beg < 0) {
  • if (len > -beg) len = -beg;
  • if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
  •  beg = -beg;
    
  •  while (len++ < beg && (e = rb_enc_prev_char(s, e, enc)) != 0);
    
  •  p = e;
    
  •  if (!p) return Qnil;
    
  •  while (beg-- > 0 && (p = rb_enc_prev_char(s, p, enc)) != 0);
    
  •  if (!p) return Qnil;
    
  •  len = e - p;
    
  •  goto sub;
    
  • }
  • else {
  •  beg += str_strlen(str, enc);
    
  •  if (beg < 0) return Qnil;
    
  • }
    }
  • if (len < 0) {
  • len = 0;
  • else if (beg > 0 && beg > str_strlen(str, enc)) {
  • return Qnil;
    }
    if (len == 0) {
  • str2 = rb_str_new5(str,0,0);
  • p = 0;
  • }
  • else if ((p = str_nth(s, e, beg, enc)) == e) {
  • len = 0;
    }
    else {
  • char *p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc);
  • str2 = rb_str_new5(str, p, str_offset(p, RSTRING_END(str), len,
    enc));
  • len = str_offset(p, e, len, enc);
    }
  • sub:
  • str2 = rb_str_new5(str, p, len);
    rb_enc_copy(str2, str);
    OBJ_INFECT(str2, str);