Forum: Ruby-core [ruby-trunk - Bug #8122][Open] [patch] gc: GC.stat improvements and related cleanup

Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 03:30
(Received via mailing list)
Issue #8122 has been reported by tmm1 (Aman Gupta).

----------------------------------------
Bug #8122: [patch] gc: GC.stat improvements and related cleanup
https://bugs.ruby-lang.org/issues/8122

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: authorNari (Narihiro Nakamura)
Category: core
Target version:
ruby -v: ruby 2.1.0dev (2013-03-19 trunk 39816) [x86_64-darwin12.2.1]


I propose 4 related patches below. The end result is more information 
from GC.stat about mark and sweep activity.

1) Rename heap.free_num to heap.swept_num
    This is a purely cosmetic change, to make the purpose of the 
variable more clear and avoid problems due to misuse later (like we saw 
in r39811)
    I considered some other names: heap.freelisted_num, heap.reused_num

2) Add heap.sweep_num
    We already calculate heap.marked_num during mark. This is inverse of 
marked_num calculated at the end of mark cycle.
    Using this you can monitor progress of sweep cycle: (100% * 
heap.swept_num / heap.sweep_num)
    I considered also: heap.sweeping_num

3) Remove rest_sweep() from GC.stat
    Currently sweep is a side-effect of GC.stat. It makes GC.stat 
difficult to use for statistics, because it is not constant time 
operation.
    Also, after rest_sweep(), heap.swept_num == heap.sweep_num, so 
GC.stat cannot be used to track progress of sweep cycle.

4) Expose new mark/sweep counters via GC.stat



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:11:27 2013 -0700

    rename heap.free_num to heap.swept_num

diff --git a/gc.c b/gc.c
index 635e3ec..f654e19 100644
--- a/gc.c
+++ b/gc.c
@@ -229,7 +229,7 @@ typedef struct rb_objspace {
   RVALUE *range[2];
   struct heaps_header *freed;
   size_t marked_num;
-  size_t free_num;
+  size_t swept_num;
   size_t free_min;
   size_t final_num;
   size_t do_heap_free;
@@ -1434,7 +1434,7 @@ finalize_list(rb_objspace_t *objspace, RVALUE *p)
   objspace->total_freed_object_num++;
   if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
             add_slot_local_freelist(objspace, p);
-      objspace->heap.free_num++;
+      objspace->heap.swept_num++;
   }
   else {
       struct heaps_slot *slot = (struct heaps_slot 
*)(VALUE)RDATA(p)->dmark;
@@ -1921,7 +1921,7 @@ slot_sweep(rb_objspace_t *objspace, struct 
heaps_slot *sweep_slot)
     }
     gc_clear_slot_bits(sweep_slot);
     if (final_num + freed_num + empty_num == sweep_slot->header->limit 
&&
-        objspace->heap.free_num > objspace->heap.do_heap_free) {
+        objspace->heap.swept_num > objspace->heap.do_heap_free) {
         RVALUE *pp;

         for (pp = deferred_final_list; pp != final; pp = 
pp->as.free.next) {
@@ -1938,7 +1938,7 @@ slot_sweep(rb_objspace_t *objspace, struct 
heaps_slot *sweep_slot)
         else {
             sweep_slot->free_next = NULL;
         }
-  objspace->heap.free_num += freed_num + empty_num;
+  objspace->heap.swept_num += freed_num + empty_num;
     }
     objspace->total_freed_object_num += freed_num;
     objspace->heap.final_num += final_num;
@@ -1977,7 +1977,7 @@ before_gc_sweep(rb_objspace_t *objspace)
       objspace->heap.do_heap_free = initial_free_min;
     }
     objspace->heap.sweep_slots = heaps;
-    objspace->heap.free_num = 0;
+    objspace->heap.swept_num = 0;
     objspace->heap.free_slots = NULL;

     /* sweep unlinked method entries */
@@ -1992,7 +1992,7 @@ after_gc_sweep(rb_objspace_t *objspace)
     size_t inc;

     gc_prof_set_malloc_info(objspace);
-    if (objspace->heap.free_num < objspace->heap.free_min) {
+    if (objspace->heap.swept_num < objspace->heap.free_min) {
         set_heaps_increment(objspace);
         heaps_increment(objspace);
     }
@@ -2972,7 +2972,7 @@ rb_gc_force_recycle(VALUE p)
         add_slot_local_freelist(objspace, (RVALUE *)p);
     }
     else {
-  objspace->heap.free_num++;
+  objspace->heap.swept_num++;
         slot = add_slot_local_freelist(objspace, (RVALUE *)p);
         if (slot->free_next == NULL) {
             link_free_heap_slot(objspace, slot);
@@ -3205,7 +3205,7 @@ gc_stat(int argc, VALUE *argv, VALUE self)
     rb_hash_aset(hash, sym_heap_length, 
SIZET2NUM(objspace->heap.length));
     rb_hash_aset(hash, sym_heap_increment, 
SIZET2NUM(objspace->heap.increment));
     rb_hash_aset(hash, sym_heap_live_num, 
SIZET2NUM(objspace_live_num(objspace)));
-    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.free_num));
+    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.swept_num));
     rb_hash_aset(hash, sym_heap_final_num, 
SIZET2NUM(objspace->heap.final_num));
     rb_hash_aset(hash, sym_total_allocated_object, 
SIZET2NUM(objspace->total_allocated_object_num));
     rb_hash_aset(hash, sym_total_freed_object, 
SIZET2NUM(objspace->total_freed_object_num));



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:32:58 2013 -0700

    add sweep_num

diff --git a/gc.c b/gc.c
index b911d04..c49f1dd 100644
--- a/gc.c
+++ b/gc.c
@@ -229,6 +229,7 @@ typedef struct rb_objspace {
   RVALUE *range[2];
   struct heaps_header *freed;
   size_t marked_num;
+  size_t sweep_num;
   size_t swept_num;
   size_t free_min;
   size_t final_num;
@@ -2957,6 +2958,7 @@ gc_marks(rb_objspace_t *objspace)
     gc_prof_mark_timer_stop(objspace);

     objspace->mark_func_data = prev_mark_func_data;
+    objspace->heap.sweep_num = (heaps_used * HEAP_OBJ_LIMIT) - 
objspace->heap.marked_num;
 }

 /* GC */



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:14:10 2013 -0700

    remove rest_sweep side-effect from GC.stat

diff --git a/gc.c b/gc.c
index f654e19..b911d04 100644
--- a/gc.c
+++ b/gc.c
@@ -3197,8 +3197,6 @@ gc_stat(int argc, VALUE *argv, VALUE self)
         hash = rb_hash_new();
     }

-    rest_sweep(objspace);
-
     rb_hash_aset(hash, sym_count, SIZET2NUM(objspace->count));
     /* implementation dependent counters */
     rb_hash_aset(hash, sym_heap_used, SIZET2NUM(objspace->heap.used));



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:59:50 2013 -0700

    add more stats to GC.stat

diff --git a/gc.c b/gc.c
index c49f1dd..8f458db 100644
--- a/gc.c
+++ b/gc.c
@@ -3174,18 +3174,28 @@ gc_stat(int argc, VALUE *argv, VALUE self)
 {
     rb_objspace_t *objspace = &rb_objspace;
     VALUE hash;
+    size_t total_num, live_num;
     static VALUE sym_count;
     static VALUE sym_heap_used, sym_heap_length, sym_heap_increment;
-    static VALUE sym_heap_live_num, sym_heap_free_num, 
sym_heap_final_num;
+    static VALUE sym_heap_obj_limit, sym_heap_total_num;
+    static VALUE sym_heap_live_num, sym_heap_empty_num, 
sym_heap_final_num;
+    static VALUE sym_heap_marked_num, sym_heap_sweep_num, 
sym_heap_swept_num;
+    static VALUE sym_is_lazy_sweeping;
     static VALUE sym_total_allocated_object, sym_total_freed_object;
     if (sym_count == 0) {
   sym_count = ID2SYM(rb_intern_const("count"));
   sym_heap_used = ID2SYM(rb_intern_const("heap_used"));
   sym_heap_length = ID2SYM(rb_intern_const("heap_length"));
   sym_heap_increment = ID2SYM(rb_intern_const("heap_increment"));
+  sym_heap_obj_limit = ID2SYM(rb_intern_const("heap_obj_limit"));
+  sym_heap_total_num = ID2SYM(rb_intern_const("heap_total_num"));
   sym_heap_live_num = ID2SYM(rb_intern_const("heap_live_num"));
-  sym_heap_free_num = ID2SYM(rb_intern_const("heap_free_num"));
+  sym_heap_empty_num = ID2SYM(rb_intern_const("heap_empty_num"));
   sym_heap_final_num = ID2SYM(rb_intern_const("heap_final_num"));
+  sym_heap_marked_num = ID2SYM(rb_intern_const("heap_marked_num"));
+  sym_heap_sweep_num = ID2SYM(rb_intern_const("heap_sweep_num"));
+  sym_heap_swept_num = ID2SYM(rb_intern_const("heap_swept_num"));
+  sym_is_lazy_sweeping = ID2SYM(rb_intern_const("is_lazy_sweeping"));
   sym_total_allocated_object = 
ID2SYM(rb_intern_const("total_allocated_object"));
   sym_total_freed_object = 
ID2SYM(rb_intern_const("total_freed_object"));
     }
@@ -3201,12 +3211,21 @@ gc_stat(int argc, VALUE *argv, VALUE self)

     rb_hash_aset(hash, sym_count, SIZET2NUM(objspace->count));
     /* implementation dependent counters */
+    total_num = heaps_used * HEAP_OBJ_LIMIT;
+    live_num = objspace_live_num(objspace);
+
     rb_hash_aset(hash, sym_heap_used, SIZET2NUM(objspace->heap.used));
     rb_hash_aset(hash, sym_heap_length, 
SIZET2NUM(objspace->heap.length));
     rb_hash_aset(hash, sym_heap_increment, 
SIZET2NUM(objspace->heap.increment));
-    rb_hash_aset(hash, sym_heap_live_num, 
SIZET2NUM(objspace_live_num(objspace)));
-    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.swept_num));
+    rb_hash_aset(hash, sym_heap_obj_limit, SIZET2NUM(HEAP_OBJ_LIMIT));
+    rb_hash_aset(hash, sym_heap_total_num, SIZET2NUM(total_num));
+    rb_hash_aset(hash, sym_heap_live_num, SIZET2NUM(live_num));
+    rb_hash_aset(hash, sym_heap_empty_num, SIZET2NUM(total_num - 
live_num));
     rb_hash_aset(hash, sym_heap_final_num, 
SIZET2NUM(objspace->heap.final_num));
+    rb_hash_aset(hash, sym_is_lazy_sweeping, is_lazy_sweeping(objspace) 
? Qtrue : Qfalse);
+    rb_hash_aset(hash, sym_heap_marked_num, 
SIZET2NUM(objspace->heap.marked_num));
+    rb_hash_aset(hash, sym_heap_swept_num, 
SIZET2NUM(objspace->heap.swept_num));
+    rb_hash_aset(hash, sym_heap_sweep_num, 
SIZET2NUM(objspace->heap.sweep_num));
     rb_hash_aset(hash, sym_total_allocated_object, 
SIZET2NUM(objspace->total_allocated_object_num));
     rb_hash_aset(hash, sym_total_freed_object, 
SIZET2NUM(objspace->total_freed_object_num));
Posted by authorNari (Narihiro Nakamura) (Guest)
on 2013-03-19 03:58
(Received via mailing list)
Issue #8122 has been updated by authorNari (Narihiro Nakamura).

Assignee changed from authorNari (Narihiro Nakamura) to ko1 (Koichi 
Sasada)

ko1-san, what do you think?
----------------------------------------
Bug #8122: [patch] gc: GC.stat improvements and related cleanup
https://bugs.ruby-lang.org/issues/8122#change-37714

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: ko1 (Koichi Sasada)
Category: core
Target version:
ruby -v: ruby 2.1.0dev (2013-03-19 trunk 39816) [x86_64-darwin12.2.1]


I propose 4 related patches below. The end result is more information 
from GC.stat about mark and sweep activity.

1) Rename heap.free_num to heap.swept_num
    This is a purely cosmetic change, to make the purpose of the 
variable more clear and avoid problems due to misuse later (like we saw 
in r39811)
    I considered some other names: heap.freelisted_num, heap.reused_num

2) Add heap.sweep_num
    We already calculate heap.marked_num during mark. This is inverse of 
marked_num calculated at the end of mark cycle.
    Using this you can monitor progress of sweep cycle: (100% * 
heap.swept_num / heap.sweep_num)
    I considered also: heap.sweeping_num

3) Remove rest_sweep() from GC.stat
    Currently sweep is a side-effect of GC.stat. It makes GC.stat 
difficult to use for statistics, because it is not constant time 
operation.
    Also, after rest_sweep(), heap.swept_num == heap.sweep_num, so 
GC.stat cannot be used to track progress of sweep cycle.

4) Expose new mark/sweep counters via GC.stat



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:11:27 2013 -0700

    rename heap.free_num to heap.swept_num

diff --git a/gc.c b/gc.c
index 635e3ec..f654e19 100644
--- a/gc.c
+++ b/gc.c
@@ -229,7 +229,7 @@ typedef struct rb_objspace {
   RVALUE *range[2];
   struct heaps_header *freed;
   size_t marked_num;
-  size_t free_num;
+  size_t swept_num;
   size_t free_min;
   size_t final_num;
   size_t do_heap_free;
@@ -1434,7 +1434,7 @@ finalize_list(rb_objspace_t *objspace, RVALUE *p)
   objspace->total_freed_object_num++;
   if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
             add_slot_local_freelist(objspace, p);
-      objspace->heap.free_num++;
+      objspace->heap.swept_num++;
   }
   else {
       struct heaps_slot *slot = (struct heaps_slot 
*)(VALUE)RDATA(p)->dmark;
@@ -1921,7 +1921,7 @@ slot_sweep(rb_objspace_t *objspace, struct 
heaps_slot *sweep_slot)
     }
     gc_clear_slot_bits(sweep_slot);
     if (final_num + freed_num + empty_num == sweep_slot->header->limit 
&&
-        objspace->heap.free_num > objspace->heap.do_heap_free) {
+        objspace->heap.swept_num > objspace->heap.do_heap_free) {
         RVALUE *pp;

         for (pp = deferred_final_list; pp != final; pp = 
pp->as.free.next) {
@@ -1938,7 +1938,7 @@ slot_sweep(rb_objspace_t *objspace, struct 
heaps_slot *sweep_slot)
         else {
             sweep_slot->free_next = NULL;
         }
-  objspace->heap.free_num += freed_num + empty_num;
+  objspace->heap.swept_num += freed_num + empty_num;
     }
     objspace->total_freed_object_num += freed_num;
     objspace->heap.final_num += final_num;
@@ -1977,7 +1977,7 @@ before_gc_sweep(rb_objspace_t *objspace)
       objspace->heap.do_heap_free = initial_free_min;
     }
     objspace->heap.sweep_slots = heaps;
-    objspace->heap.free_num = 0;
+    objspace->heap.swept_num = 0;
     objspace->heap.free_slots = NULL;

     /* sweep unlinked method entries */
@@ -1992,7 +1992,7 @@ after_gc_sweep(rb_objspace_t *objspace)
     size_t inc;

     gc_prof_set_malloc_info(objspace);
-    if (objspace->heap.free_num < objspace->heap.free_min) {
+    if (objspace->heap.swept_num < objspace->heap.free_min) {
         set_heaps_increment(objspace);
         heaps_increment(objspace);
     }
@@ -2972,7 +2972,7 @@ rb_gc_force_recycle(VALUE p)
         add_slot_local_freelist(objspace, (RVALUE *)p);
     }
     else {
-  objspace->heap.free_num++;
+  objspace->heap.swept_num++;
         slot = add_slot_local_freelist(objspace, (RVALUE *)p);
         if (slot->free_next == NULL) {
             link_free_heap_slot(objspace, slot);
@@ -3205,7 +3205,7 @@ gc_stat(int argc, VALUE *argv, VALUE self)
     rb_hash_aset(hash, sym_heap_length, 
SIZET2NUM(objspace->heap.length));
     rb_hash_aset(hash, sym_heap_increment, 
SIZET2NUM(objspace->heap.increment));
     rb_hash_aset(hash, sym_heap_live_num, 
SIZET2NUM(objspace_live_num(objspace)));
-    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.free_num));
+    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.swept_num));
     rb_hash_aset(hash, sym_heap_final_num, 
SIZET2NUM(objspace->heap.final_num));
     rb_hash_aset(hash, sym_total_allocated_object, 
SIZET2NUM(objspace->total_allocated_object_num));
     rb_hash_aset(hash, sym_total_freed_object, 
SIZET2NUM(objspace->total_freed_object_num));



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:32:58 2013 -0700

    add sweep_num

diff --git a/gc.c b/gc.c
index b911d04..c49f1dd 100644
--- a/gc.c
+++ b/gc.c
@@ -229,6 +229,7 @@ typedef struct rb_objspace {
   RVALUE *range[2];
   struct heaps_header *freed;
   size_t marked_num;
+  size_t sweep_num;
   size_t swept_num;
   size_t free_min;
   size_t final_num;
@@ -2957,6 +2958,7 @@ gc_marks(rb_objspace_t *objspace)
     gc_prof_mark_timer_stop(objspace);

     objspace->mark_func_data = prev_mark_func_data;
+    objspace->heap.sweep_num = (heaps_used * HEAP_OBJ_LIMIT) - 
objspace->heap.marked_num;
 }

 /* GC */



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:14:10 2013 -0700

    remove rest_sweep side-effect from GC.stat

diff --git a/gc.c b/gc.c
index f654e19..b911d04 100644
--- a/gc.c
+++ b/gc.c
@@ -3197,8 +3197,6 @@ gc_stat(int argc, VALUE *argv, VALUE self)
         hash = rb_hash_new();
     }

-    rest_sweep(objspace);
-
     rb_hash_aset(hash, sym_count, SIZET2NUM(objspace->count));
     /* implementation dependent counters */
     rb_hash_aset(hash, sym_heap_used, SIZET2NUM(objspace->heap.used));



Author: Aman Gupta <aman@tmm1.net>
Date:   Mon Mar 18 18:59:50 2013 -0700

    add more stats to GC.stat

diff --git a/gc.c b/gc.c
index c49f1dd..8f458db 100644
--- a/gc.c
+++ b/gc.c
@@ -3174,18 +3174,28 @@ gc_stat(int argc, VALUE *argv, VALUE self)
 {
     rb_objspace_t *objspace = &rb_objspace;
     VALUE hash;
+    size_t total_num, live_num;
     static VALUE sym_count;
     static VALUE sym_heap_used, sym_heap_length, sym_heap_increment;
-    static VALUE sym_heap_live_num, sym_heap_free_num, 
sym_heap_final_num;
+    static VALUE sym_heap_obj_limit, sym_heap_total_num;
+    static VALUE sym_heap_live_num, sym_heap_empty_num, 
sym_heap_final_num;
+    static VALUE sym_heap_marked_num, sym_heap_sweep_num, 
sym_heap_swept_num;
+    static VALUE sym_is_lazy_sweeping;
     static VALUE sym_total_allocated_object, sym_total_freed_object;
     if (sym_count == 0) {
   sym_count = ID2SYM(rb_intern_const("count"));
   sym_heap_used = ID2SYM(rb_intern_const("heap_used"));
   sym_heap_length = ID2SYM(rb_intern_const("heap_length"));
   sym_heap_increment = ID2SYM(rb_intern_const("heap_increment"));
+  sym_heap_obj_limit = ID2SYM(rb_intern_const("heap_obj_limit"));
+  sym_heap_total_num = ID2SYM(rb_intern_const("heap_total_num"));
   sym_heap_live_num = ID2SYM(rb_intern_const("heap_live_num"));
-  sym_heap_free_num = ID2SYM(rb_intern_const("heap_free_num"));
+  sym_heap_empty_num = ID2SYM(rb_intern_const("heap_empty_num"));
   sym_heap_final_num = ID2SYM(rb_intern_const("heap_final_num"));
+  sym_heap_marked_num = ID2SYM(rb_intern_const("heap_marked_num"));
+  sym_heap_sweep_num = ID2SYM(rb_intern_const("heap_sweep_num"));
+  sym_heap_swept_num = ID2SYM(rb_intern_const("heap_swept_num"));
+  sym_is_lazy_sweeping = ID2SYM(rb_intern_const("is_lazy_sweeping"));
   sym_total_allocated_object = 
ID2SYM(rb_intern_const("total_allocated_object"));
   sym_total_freed_object = 
ID2SYM(rb_intern_const("total_freed_object"));
     }
@@ -3201,12 +3211,21 @@ gc_stat(int argc, VALUE *argv, VALUE self)

     rb_hash_aset(hash, sym_count, SIZET2NUM(objspace->count));
     /* implementation dependent counters */
+    total_num = heaps_used * HEAP_OBJ_LIMIT;
+    live_num = objspace_live_num(objspace);
+
     rb_hash_aset(hash, sym_heap_used, SIZET2NUM(objspace->heap.used));
     rb_hash_aset(hash, sym_heap_length, 
SIZET2NUM(objspace->heap.length));
     rb_hash_aset(hash, sym_heap_increment, 
SIZET2NUM(objspace->heap.increment));
-    rb_hash_aset(hash, sym_heap_live_num, 
SIZET2NUM(objspace_live_num(objspace)));
-    rb_hash_aset(hash, sym_heap_free_num, 
SIZET2NUM(objspace->heap.swept_num));
+    rb_hash_aset(hash, sym_heap_obj_limit, SIZET2NUM(HEAP_OBJ_LIMIT));
+    rb_hash_aset(hash, sym_heap_total_num, SIZET2NUM(total_num));
+    rb_hash_aset(hash, sym_heap_live_num, SIZET2NUM(live_num));
+    rb_hash_aset(hash, sym_heap_empty_num, SIZET2NUM(total_num - 
live_num));
     rb_hash_aset(hash, sym_heap_final_num, 
SIZET2NUM(objspace->heap.final_num));
+    rb_hash_aset(hash, sym_is_lazy_sweeping, is_lazy_sweeping(objspace) 
? Qtrue : Qfalse);
+    rb_hash_aset(hash, sym_heap_marked_num, 
SIZET2NUM(objspace->heap.marked_num));
+    rb_hash_aset(hash, sym_heap_swept_num, 
SIZET2NUM(objspace->heap.swept_num));
+    rb_hash_aset(hash, sym_heap_sweep_num, 
SIZET2NUM(objspace->heap.sweep_num));
     rb_hash_aset(hash, sym_total_allocated_object, 
SIZET2NUM(objspace->total_allocated_object_num));
     rb_hash_aset(hash, sym_total_freed_object, 
SIZET2NUM(objspace->total_freed_object_num));
Posted by SASADA Koichi (Guest)
on 2013-03-19 06:34
(Received via mailing list)
(2013/03/19 11:58), authorNari (Narihiro Nakamura) wrote:
> ko1-san, what do you think?

(1) Performance

One concern is making it fat seems slow it down.

(2) Usage

How to use new statistics?
Please log in before posting. Registration is free and takes only a minute.
Existing account (Switch to SSL-encrypted connection)
NEW: Do you have a Google/GoogleMail or Yahoo account? No registration required!
Log in with Google account | Log in with Yahoo account
No account? Register here.