Forum: Ruby-core [ruby-trunk - Bug #8048][Open] require() features_index bloats size of ruby heap

Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-08 07:43
(Received via mailing list)
Issue #8048 has been reported by tmm1 (Aman Gupta).

----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-09 03:12
(Received via mailing list)
Issue #8048 has been updated by tmm1 (Aman Gupta).


Thank you.

Is there some reason loaded_features_index cannot use 
st_init_strtable()? Will you accept an additional patch to this effect?
----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37408

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-09 03:55
(Received via mailing list)
Issue #8048 has been updated by tmm1 (Aman Gupta).


diff --git a/load.c b/load.c
index 6330e50..3010b18 100644
--- a/load.c
+++ b/load.c
@@ -171,7 +171,7 @@ reset_loaded_features_snapshot(void)
     rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features);
 }

-static VALUE
+static struct st_table *
 get_loaded_features_index_raw(void)
 {
     return GET_VM()->loaded_features_index;
@@ -186,12 +186,19 @@ get_loading_table(void)
 static void
 features_index_add_single(VALUE short_feature, VALUE offset)
 {
-    VALUE features_index, this_feature_index;
+    struct st_table *features_index;
+    VALUE this_feature_index = Qnil;
+    char *short_feature_cstr;
+
+    Check_Type(offset, T_FIXNUM);
+    Check_Type(short_feature, T_STRING);
+    short_feature_cstr = RSTRING_PTR(short_feature);
+
     features_index = get_loaded_features_index_raw();
-    this_feature_index = rb_hash_lookup(features_index, short_feature);
+    st_lookup(features_index, (st_data_t)short_feature_cstr, (st_data_t 
*)&this_feature_index);

     if (NIL_P(this_feature_index)) {
-  rb_hash_aset(features_index, short_feature, offset);
+  st_insert(features_index, (st_data_t)ruby_strdup(short_feature_cstr), 
(st_data_t)offset);
     }
     else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
   VALUE feature_indexes[2];
@@ -199,7 +206,7 @@ features_index_add_single(VALUE short_feature, VALUE 
offset)
   feature_indexes[1] = offset;
   this_feature_index = rb_ary_tmp_new(numberof(feature_indexes));
   rb_ary_cat(this_feature_index, feature_indexes, 
numberof(feature_indexes));
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+  st_insert(features_index, (st_data_t)short_feature_cstr, 
(st_data_t)this_feature_index);
     }
     else {
   Check_Type(this_feature_index, T_ARRAY);
@@ -254,7 +261,14 @@ features_index_add(VALUE feature, VALUE offset)
     }
 }

-static VALUE
+static int
+loaded_features_index_clear_i(st_data_t key, st_data_t val, st_data_t 
arg)
+{
+    xfree((char*) key);
+    return ST_DELETE;
+}
+
+static st_table *
 get_loaded_features_index(void)
 {
     VALUE features;
@@ -264,7 +278,7 @@ get_loaded_features_index(void)
     if (!rb_ary_shared_with_p(vm->loaded_features_snapshot, 
vm->loaded_features)) {
   /* The sharing was broken; something (other than us in 
rb_provide_feature())
      modified loaded_features.  Rebuild the index. */
-  rb_hash_clear(vm->loaded_features_index);
+  st_foreach(vm->loaded_features_index, loaded_features_index_clear_i, 
0);
   features = vm->loaded_features;
   for (i = 0; i < RARRAY_LEN(features); i++) {
       VALUE entry, as_str;
@@ -357,10 +371,10 @@ loaded_feature_path_i(st_data_t v, st_data_t b, 
st_data_t f)
 static int
 rb_feature_p(const char *feature, const char *ext, int rb, int 
expanded, const char **fn)
 {
-    VALUE features, features_index, feature_val, this_feature_index, v, 
p, load_path = 0;
+    VALUE features, feature_val, this_feature_index = Qnil, v, p, 
load_path = 0;
     const char *f, *e;
     long i, len, elen, n;
-    st_table *loading_tbl;
+    st_table *loading_tbl, *features_index;
     st_data_t data;
     int type;

@@ -379,7 +393,7 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
     features_index = get_loaded_features_index();

     feature_val = rb_str_new(feature, len);
-    this_feature_index = rb_hash_lookup(features_index, feature_val);
+    st_lookup(features_index, (st_data_t)feature, (st_data_t 
*)&this_feature_index);
     /* We search `features` for an entry such that either
          "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
        for some j, or
@@ -1120,8 +1134,7 @@ Init_load()
     rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 
0);
     vm->loaded_features = rb_ary_new();
     vm->loaded_features_snapshot = rb_ary_tmp_new(0);
-    vm->loaded_features_index = rb_hash_new();
-    RBASIC(vm->loaded_features_index)->klass = 0;
+    vm->loaded_features_index = st_init_strtable();

     rb_define_global_function("load", rb_f_load, -1);
     rb_define_global_function("require", rb_f_require, 1);
diff --git a/vm.c b/vm.c
index 9126502..b4132cd 100644
--- a/vm.c
+++ b/vm.c
@@ -1565,7 +1565,6 @@ rb_vm_mark(void *ptr)
   RUBY_MARK_UNLESS_NULL(vm->expanded_load_path);
   RUBY_MARK_UNLESS_NULL(vm->loaded_features);
   RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot);
-  RUBY_MARK_UNLESS_NULL(vm->loaded_features_index);
   RUBY_MARK_UNLESS_NULL(vm->top_self);
   RUBY_MARK_UNLESS_NULL(vm->coverages);
   rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + 
ruby_special_error_count);
@@ -1573,6 +1572,9 @@ rb_vm_mark(void *ptr)
   if (vm->loading_table) {
       rb_mark_tbl(vm->loading_table);
   }
+  if (vm->loaded_features_index) {
+      rb_mark_tbl(vm->loaded_features_index);
+  }

   rb_vm_trace_mark_event_hooks(&vm->event_hooks);

diff --git a/vm_core.h b/vm_core.h
index 1838f2a..2ac57fc 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -364,7 +364,7 @@ typedef struct rb_vm_struct {
     VALUE expanded_load_path;
     VALUE loaded_features;
     VALUE loaded_features_snapshot;
-    VALUE loaded_features_index;
+    struct st_table *loaded_features_index;
     struct st_table *loading_table;

     /* signal */

----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37409

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-10 07:59
(Received via mailing list)
Issue #8048 has been updated by tmm1 (Aman Gupta).


> +    Check_Type(short_feature, T_STRING);
> +    short_feature_cstr = RSTRING_PTR(short_feature);

This should be StringValueCStr(), because short_feature is created from 
rb_str_substr().

Is it possible for features to contain a NUL byte? Maybe it is 
preferable to add st_init_bintable() which can accept a string length.
----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37448

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by Nobuyoshi Nakada (nobu)
on 2013-03-10 13:11
(Received via mailing list)
Issue #8048 has been updated by nobu (Nobuyoshi Nakada).


tmm1 (Aman Gupta) wrote:
> Is it possible for features to contain a NUL byte?

No, feature names are checked with FilePathValue().

> Maybe it is preferable to add st_init_bintable() which can accept a string 
length.

Since st interfaces take only one parameter for the key, it will need a 
struct, but we already have such struct, RString.
Perhaps we could make a custom st_table which uses the functions in 
string.c.
----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37453

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 00:08
(Received via mailing list)
Issue #8048 has been updated by tmm1 (Aman Gupta).


nobu-san,

What do you think of applying StringValueCStr() version of this patch? 
Since feature names cannot contain NULL bytes, it should be OK I think.
----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37706

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Posted by nagachika (Tomoyuki Chikanaga) (Guest)
on 2013-04-20 16:55
(Received via mailing list)
Issue #8048 has been updated by nagachika (Tomoyuki Chikanaga).


... and add r39646, r39647 to reduce conflict.
----------------------------------------
Backport #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-38787

Author: tmm1 (Aman Gupta)
Status: Assigned
Priority: Normal
Assignee: nagachika (Tomoyuki Chikanaga)
Category:
Target version:


The new features_index for require() in 2.0.0 creates a lot of extra 
ARRAY and STRING objects. In a big rails application, there are ~70k 
extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require 
"./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; 
GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, 
:T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the 
features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, 
VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, 
short_feature)) == Qnil) {
-  this_feature_index = rb_ary_new();
-  rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, 
offset);
+        rb_hash_aset(features_index, short_feature, 
this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }

 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, 
int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in 
`features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < 
RARRAY_LEN(this_feature_index); i++) {
-  long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+  long index;
+  if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+    if (i >= RARRAY_LEN(this_feature_index)) break;
+    index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+  } else {
+    if (i > 0) break;
+    index = FIX2LONG(this_feature_index);
+  }
+
   v = RARRAY_PTR(features)[index];
   f = StringValuePtr(v);
   if ((n = RSTRING_LEN(v)) < len) continue;
Please log in before posting. Registration is free and takes only a minute.
Existing account (Switch to SSL-encrypted connection)
NEW: Do you have a Google/GoogleMail or Yahoo account? No registration required!
Log in with Google account | Log in with Yahoo account
No account? Register here.