Forum: Ruby-core [ruby-trunk - Bug #8142][Open] [patch] iseq: reduce array allocations for simple sequences

Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-22 06:32
(Received via mailing list)
Issue #8142 has been reported by tmm1 (Aman Gupta).

----------------------------------------
Bug #8142: [patch] iseq: reduce array allocations for simple sequences
https://bugs.ruby-lang.org/issues/8142

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: ko1 (Koichi Sasada)
Category:
Target version:
ruby -v: ruby 2.1.0dev (2013-03-20 trunk 39832) [x86_64-darwin12.2.1]


Allocate iseq->mark_ary on demand, only if needed.

In my application, this reduces long lived arrays on the heap 
significantly.

:T_ARRAY=>88166 # before
:T_ARRAY=>62932 # after

diff --git a/compile.c b/compile.c
index 9360f5b..aafae05 100644
--- a/compile.c
+++ b/compile.c
@@ -416,7 +416,7 @@ static int
 iseq_add_mark_object(rb_iseq_t *iseq, VALUE v)
 {
     if (!SPECIAL_CONST_P(v)) {
-  rb_ary_push(iseq->mark_ary, v);
+  rb_iseq_add_mark_object(iseq, v);
     }
     return COMPILE_OK;
 }
diff --git a/insns.def b/insns.def
index 979aa1c..bb9fc3f 100644
--- a/insns.def
+++ b/insns.def
@@ -1237,7 +1237,7 @@ setinlinecache
 (VALUE val)
 {
     if (ic->ic_value.value == Qundef) {
-  rb_ary_push(GET_ISEQ()->mark_ary, val);
+  rb_iseq_add_mark_object(GET_ISEQ(), val);
     }
     ic->ic_value.value = val;
     ic->ic_vmstat = GET_VM_STATE_VERSION() - 
ruby_vm_const_missing_count;
diff --git a/iseq.c b/iseq.c
index 288d3bf..eab237a 100644
--- a/iseq.c
+++ b/iseq.c
@@ -237,6 +237,17 @@ set_relation(rb_iseq_t *iseq, const VALUE parent)
     }
 }

+void
+rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj)
+{
+    if (!RTEST(iseq->mark_ary)) {
+        iseq->mark_ary = rb_ary_tmp_new(3);
+        OBJ_UNTRUST(iseq->mark_ary);
+        RBASIC(iseq->mark_ary)->klass = 0;
+    }
+    rb_ary_push(iseq->mark_ary, obj);
+}
+
 static VALUE
 prepare_iseq_build(rb_iseq_t *iseq,
        VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno,
@@ -259,9 +270,7 @@ prepare_iseq_build(rb_iseq_t *iseq,
     }

     iseq->defined_method_id = 0;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
-    RBASIC(iseq->mark_ary)->klass = 0;
+    iseq->mark_ary = 0;


     /*
@@ -2060,8 +2069,7 @@ rb_iseq_build_for_ruby2cext(
     iseq->location.label = rb_str_new2(name);
     iseq->location.path = rb_str_new2(path);
     iseq->location.first_lineno = first_lineno;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
+    iseq->mark_ary = 0;
     iseq->self = iseqval;

     iseq->iseq = ALLOC_N(VALUE, iseq->iseq_size);
diff --git a/iseq.h b/iseq.h
index 0790529..4de0816 100644
--- a/iseq.h
+++ b/iseq.h
@@ -23,6 +23,7 @@ VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE 
locals, VALUE args,
            VALUE exception, VALUE body);

 /* iseq.c */
+void rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj);
 VALUE rb_iseq_load(VALUE data, VALUE parent, VALUE opt);
 VALUE rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc);
 struct st_table *ruby_insn_make_insn_table(void);
Posted by SASADA Koichi (Guest)
on 2013-03-22 07:29
(Received via mailing list)
(2013/03/22 14:30), tmm1 (Aman Gupta) wrote:
> Allocate iseq->mark_ary on demand, only if needed.
>
> In my application, this reduces long lived arrays on the heap significantly.

Ah, I got it. Nice!!  I'll introduce it.



##
BTW, in the future, I want avoid any live object relations from iseq.
For example, the program "str = 'hello'" (compiled iseq) has one
relation to a String object. However, this String object can be replaced
with non-VALUE memory object (not a VALUE, but a memory dump). If I can
replace with this technique, iseq->mark_ary is no longer needed.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-22 09:20
(Received via mailing list)
Issue #8142 has been updated by tmm1 (Aman Gupta).


>  For example, the program "str = 'hello'" (compiled iseq) has one
>  relation to a String object. However, this String object can be replaced
>  with non-VALUE memory object (not a VALUE, but a memory dump).

Do you have any existing patch for this technique? I would like to try, 
for example to convert putstring instruction.

In my application there are lots of long lived strings. Many of these 
strings come from string literals in code.

>> GC.start
>> ObjectSpace.count_objects[:T_STRING]
=> 311117

>> ObjectSpace.each_object(String).count
=> 305230

>> ObjectSpace.each_object(String).select{ |s| s.frozen? }.size
=> 233336

Also I see a high level of frozen string duplication. Some of this is 
due to duplication of common iseq->location.label (like "initialize")

>> ObjectSpace.each_object(String).select{ |s| s.frozen? }.uniq.size
=> 118043
----------------------------------------
Bug #8142: [patch] iseq: reduce array allocations for simple sequences
https://bugs.ruby-lang.org/issues/8142#change-37806

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: ko1 (Koichi Sasada)
Category:
Target version:
ruby -v: ruby 2.1.0dev (2013-03-20 trunk 39832) [x86_64-darwin12.2.1]


Allocate iseq->mark_ary on demand, only if needed.

In my application, this reduces long lived arrays on the heap 
significantly.

:T_ARRAY=>88166 # before
:T_ARRAY=>62932 # after

diff --git a/compile.c b/compile.c
index 9360f5b..aafae05 100644
--- a/compile.c
+++ b/compile.c
@@ -416,7 +416,7 @@ static int
 iseq_add_mark_object(rb_iseq_t *iseq, VALUE v)
 {
     if (!SPECIAL_CONST_P(v)) {
-  rb_ary_push(iseq->mark_ary, v);
+  rb_iseq_add_mark_object(iseq, v);
     }
     return COMPILE_OK;
 }
diff --git a/insns.def b/insns.def
index 979aa1c..bb9fc3f 100644
--- a/insns.def
+++ b/insns.def
@@ -1237,7 +1237,7 @@ setinlinecache
 (VALUE val)
 {
     if (ic->ic_value.value == Qundef) {
-  rb_ary_push(GET_ISEQ()->mark_ary, val);
+  rb_iseq_add_mark_object(GET_ISEQ(), val);
     }
     ic->ic_value.value = val;
     ic->ic_vmstat = GET_VM_STATE_VERSION() - 
ruby_vm_const_missing_count;
diff --git a/iseq.c b/iseq.c
index 288d3bf..eab237a 100644
--- a/iseq.c
+++ b/iseq.c
@@ -237,6 +237,17 @@ set_relation(rb_iseq_t *iseq, const VALUE parent)
     }
 }

+void
+rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj)
+{
+    if (!RTEST(iseq->mark_ary)) {
+        iseq->mark_ary = rb_ary_tmp_new(3);
+        OBJ_UNTRUST(iseq->mark_ary);
+        RBASIC(iseq->mark_ary)->klass = 0;
+    }
+    rb_ary_push(iseq->mark_ary, obj);
+}
+
 static VALUE
 prepare_iseq_build(rb_iseq_t *iseq,
        VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno,
@@ -259,9 +270,7 @@ prepare_iseq_build(rb_iseq_t *iseq,
     }

     iseq->defined_method_id = 0;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
-    RBASIC(iseq->mark_ary)->klass = 0;
+    iseq->mark_ary = 0;


     /*
@@ -2060,8 +2069,7 @@ rb_iseq_build_for_ruby2cext(
     iseq->location.label = rb_str_new2(name);
     iseq->location.path = rb_str_new2(path);
     iseq->location.first_lineno = first_lineno;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
+    iseq->mark_ary = 0;
     iseq->self = iseqval;

     iseq->iseq = ALLOC_N(VALUE, iseq->iseq_size);
diff --git a/iseq.h b/iseq.h
index 0790529..4de0816 100644
--- a/iseq.h
+++ b/iseq.h
@@ -23,6 +23,7 @@ VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE 
locals, VALUE args,
            VALUE exception, VALUE body);

 /* iseq.c */
+void rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj);
 VALUE rb_iseq_load(VALUE data, VALUE parent, VALUE opt);
 VALUE rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc);
 struct st_table *ruby_insn_make_insn_table(void);
Posted by SASADA Koichi (Guest)
on 2013-03-22 09:55
(Received via mailing list)
(2013/03/22 17:19), tmm1 (Aman Gupta) wrote:
> Do you have any existing patch for this technique? I would like to try, for 
example to convert putstring instruction.
>
> In my application there are lots of long lived strings. Many of these strings 
come from string literals in code.

Yes. This is why I want to try it.
And this is good to hear we have killer user :)


Already, we have rough design about it.
This design includes Symbol GC :)
The key idea is making static string (or binary) table with *reference
counter*.
We had (Heroku Matz team) discussed about it.


However, now I'm trying GC improvements (restricted gen gc).
So priority is not best.
I want to finish before RubyKaigi2013.
(Event Driven Development)
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-25 06:29
(Received via mailing list)
Issue #8142 has been updated by tmm1 (Aman Gupta).


>  However, now I'm trying GC improvements (restricted gen gc).

This is great news. With an incremental mark, long lived objects are 
less of a problem.

What is your plan for restricted GC?
----------------------------------------
Bug #8142: [patch] iseq: reduce array allocations for simple sequences
https://bugs.ruby-lang.org/issues/8142#change-37911

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: ko1 (Koichi Sasada)
Category:
Target version:
ruby -v: ruby 2.1.0dev (2013-03-20 trunk 39832) [x86_64-darwin12.2.1]


Allocate iseq->mark_ary on demand, only if needed.

In my application, this reduces long lived arrays on the heap 
significantly.

:T_ARRAY=>88166 # before
:T_ARRAY=>62932 # after

diff --git a/compile.c b/compile.c
index 9360f5b..aafae05 100644
--- a/compile.c
+++ b/compile.c
@@ -416,7 +416,7 @@ static int
 iseq_add_mark_object(rb_iseq_t *iseq, VALUE v)
 {
     if (!SPECIAL_CONST_P(v)) {
-  rb_ary_push(iseq->mark_ary, v);
+  rb_iseq_add_mark_object(iseq, v);
     }
     return COMPILE_OK;
 }
diff --git a/insns.def b/insns.def
index 979aa1c..bb9fc3f 100644
--- a/insns.def
+++ b/insns.def
@@ -1237,7 +1237,7 @@ setinlinecache
 (VALUE val)
 {
     if (ic->ic_value.value == Qundef) {
-  rb_ary_push(GET_ISEQ()->mark_ary, val);
+  rb_iseq_add_mark_object(GET_ISEQ(), val);
     }
     ic->ic_value.value = val;
     ic->ic_vmstat = GET_VM_STATE_VERSION() - 
ruby_vm_const_missing_count;
diff --git a/iseq.c b/iseq.c
index 288d3bf..eab237a 100644
--- a/iseq.c
+++ b/iseq.c
@@ -237,6 +237,17 @@ set_relation(rb_iseq_t *iseq, const VALUE parent)
     }
 }

+void
+rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj)
+{
+    if (!RTEST(iseq->mark_ary)) {
+        iseq->mark_ary = rb_ary_tmp_new(3);
+        OBJ_UNTRUST(iseq->mark_ary);
+        RBASIC(iseq->mark_ary)->klass = 0;
+    }
+    rb_ary_push(iseq->mark_ary, obj);
+}
+
 static VALUE
 prepare_iseq_build(rb_iseq_t *iseq,
        VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno,
@@ -259,9 +270,7 @@ prepare_iseq_build(rb_iseq_t *iseq,
     }

     iseq->defined_method_id = 0;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
-    RBASIC(iseq->mark_ary)->klass = 0;
+    iseq->mark_ary = 0;


     /*
@@ -2060,8 +2069,7 @@ rb_iseq_build_for_ruby2cext(
     iseq->location.label = rb_str_new2(name);
     iseq->location.path = rb_str_new2(path);
     iseq->location.first_lineno = first_lineno;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
+    iseq->mark_ary = 0;
     iseq->self = iseqval;

     iseq->iseq = ALLOC_N(VALUE, iseq->iseq_size);
diff --git a/iseq.h b/iseq.h
index 0790529..4de0816 100644
--- a/iseq.h
+++ b/iseq.h
@@ -23,6 +23,7 @@ VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE 
locals, VALUE args,
            VALUE exception, VALUE body);

 /* iseq.c */
+void rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj);
 VALUE rb_iseq_load(VALUE data, VALUE parent, VALUE opt);
 VALUE rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc);
 struct st_table *ruby_insn_make_insn_table(void);
Posted by SASADA Koichi (Guest)
on 2013-03-25 16:33
(Received via mailing list)
(2013/03/25 14:29), tmm1 (Aman Gupta) wrote:
> What is your plan for restricted GC?

I'll make another feature request.

# On a preliminary evaluation, simple (ideal) micro benchmark was
# 20-30% faster.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-26 02:23
(Received via mailing list)
Issue #8142 has been updated by tmm1 (Aman Gupta).


>  I'll make another feature request.
>
>  # On a preliminary evaluation, simple (ideal) micro benchmark was
>  # 20-30% faster.

Great. I tried some GC experiments recently (additional bit per object 
to track longlife generation), but without write barrier it was very 
tricky to implement. I look forward to seeing your idea.

> However, this String object can be replaced
>  with non-VALUE memory object (not a VALUE, but a memory dump).

I am doing some experiments with this technique. For the putstring 
instruction, this is very simple- replace rb_str_resurrect() with 
rb_str_new2() using memory dumped value.

But in DSTR, putobject instruction is used instead of putstring 
(https://github.com/ruby/ruby/commit/49371b54). Replacing this with 
rb_str_new every time will increase GC pressure, so it is not ideal.

One solution is to make memory dump re-use struct RString, so it can 
emulate a string object (special API to allocate strings outside the 
ruby heap, using ALLOC(struct RString)). These objects can also contain 
an extra reference count field.

But if putobject instruction gives out reference to these unmanaged 
object, then reference count is not enough to free. An additional GC 
mark/sweep will be required after refcount==0, to make sure no one is 
still referencing the string. This is still possible (similar technique 
is used for unlinked method entries?). I have some patches for this 
approach, but I am curious what you think. Is this a bad idea?
----------------------------------------
Bug #8142: [patch] iseq: reduce array allocations for simple sequences
https://bugs.ruby-lang.org/issues/8142#change-37928

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: ko1 (Koichi Sasada)
Category:
Target version:
ruby -v: ruby 2.1.0dev (2013-03-20 trunk 39832) [x86_64-darwin12.2.1]


Allocate iseq->mark_ary on demand, only if needed.

In my application, this reduces long lived arrays on the heap 
significantly.

:T_ARRAY=>88166 # before
:T_ARRAY=>62932 # after

diff --git a/compile.c b/compile.c
index 9360f5b..aafae05 100644
--- a/compile.c
+++ b/compile.c
@@ -416,7 +416,7 @@ static int
 iseq_add_mark_object(rb_iseq_t *iseq, VALUE v)
 {
     if (!SPECIAL_CONST_P(v)) {
-  rb_ary_push(iseq->mark_ary, v);
+  rb_iseq_add_mark_object(iseq, v);
     }
     return COMPILE_OK;
 }
diff --git a/insns.def b/insns.def
index 979aa1c..bb9fc3f 100644
--- a/insns.def
+++ b/insns.def
@@ -1237,7 +1237,7 @@ setinlinecache
 (VALUE val)
 {
     if (ic->ic_value.value == Qundef) {
-  rb_ary_push(GET_ISEQ()->mark_ary, val);
+  rb_iseq_add_mark_object(GET_ISEQ(), val);
     }
     ic->ic_value.value = val;
     ic->ic_vmstat = GET_VM_STATE_VERSION() - 
ruby_vm_const_missing_count;
diff --git a/iseq.c b/iseq.c
index 288d3bf..eab237a 100644
--- a/iseq.c
+++ b/iseq.c
@@ -237,6 +237,17 @@ set_relation(rb_iseq_t *iseq, const VALUE parent)
     }
 }

+void
+rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj)
+{
+    if (!RTEST(iseq->mark_ary)) {
+        iseq->mark_ary = rb_ary_tmp_new(3);
+        OBJ_UNTRUST(iseq->mark_ary);
+        RBASIC(iseq->mark_ary)->klass = 0;
+    }
+    rb_ary_push(iseq->mark_ary, obj);
+}
+
 static VALUE
 prepare_iseq_build(rb_iseq_t *iseq,
        VALUE name, VALUE path, VALUE absolute_path, VALUE first_lineno,
@@ -259,9 +270,7 @@ prepare_iseq_build(rb_iseq_t *iseq,
     }

     iseq->defined_method_id = 0;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
-    RBASIC(iseq->mark_ary)->klass = 0;
+    iseq->mark_ary = 0;


     /*
@@ -2060,8 +2069,7 @@ rb_iseq_build_for_ruby2cext(
     iseq->location.label = rb_str_new2(name);
     iseq->location.path = rb_str_new2(path);
     iseq->location.first_lineno = first_lineno;
-    iseq->mark_ary = rb_ary_tmp_new(3);
-    OBJ_UNTRUST(iseq->mark_ary);
+    iseq->mark_ary = 0;
     iseq->self = iseqval;

     iseq->iseq = ALLOC_N(VALUE, iseq->iseq_size);
diff --git a/iseq.h b/iseq.h
index 0790529..4de0816 100644
--- a/iseq.h
+++ b/iseq.h
@@ -23,6 +23,7 @@ VALUE rb_iseq_build_from_ary(rb_iseq_t *iseq, VALUE 
locals, VALUE args,
            VALUE exception, VALUE body);

 /* iseq.c */
+void rb_iseq_add_mark_object(rb_iseq_t *iseq, VALUE obj);
 VALUE rb_iseq_load(VALUE data, VALUE parent, VALUE opt);
 VALUE rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc);
 struct st_table *ruby_insn_make_insn_table(void);
Please log in before posting. Registration is free and takes only a minute.
Existing account (Switch to SSL-encrypted connection)
NEW: Do you have a Google/GoogleMail or Yahoo account? No registration required!
Log in with Google account | Log in with Yahoo account
No account? Register here.