Forum: Ruby-core [ruby-trunk - Feature #8107][Open] [patch] runtime flag to track object allocation metadata

Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-16 09:48
(Received via mailing list)
Issue #8107 has been reported by tmm1 (Aman Gupta).

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-16 12:01
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


> +    rb_gc_set_params();
> +
>      if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
>    opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
>    VALUE src_enc_name = opt->src.enc.name;
> @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
>      rb_define_readonly_boolean("$-a", opt->do_split);
>
>      rb_set_safe_level(opt->safe_level);
> -    rb_gc_set_params();

This is a hack. I wanted to set track_metadata=1 as early as possible, 
before require('rubygems') especially.

Maybe it makes more sense to add an option instead of ENV flag, ruby 
--debug-objects ?
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37665

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by Charles Nutter (headius)
on 2013-03-16 20:02
(Received via mailing list)
Issue #8107 has been updated by headius (Charles Nutter).


No objections to adding this feature to MRI, but anything that goes on 
the standard core classes needs to involve other implementers. If this 
is not intended to be a standard Ruby (not MRI) feature, it would 
probably be best to have the access of file/line be via an MRI-specific 
class. Something like RubyVM.allocated_position(obj) => [file, line].

JRuby can do instance tracking, but it's via JVM tooling APIs turned on 
at command line, and the data isn't directly accessible from the running 
program basically it gets streamed out to a debugging/data collection 
client tool. I'm not sure it would be possible to provide the 
allocation-tracking as a runtime flag or as an environment variable (we 
don't process env vars until JVM is already booted) but a command-line 
flag is possible (we process them both before and after JVM starts).
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37668

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 05:38
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


> Something like RubyVM.allocated_position(obj) => [file, line].

I'll defer API decisions to core, but a method under RubyVM or in the 
new objspace.so would be fine. I slightly prefer two separate methods, 
to avoid an array allocation when you're only interested in the filename

Primarily, I am interested in feedback on the runtime flag in this 
patch. JRuby and Rubinius both provide allocation tracking, but MRI 
currently has no equivalent. This makes debugging object leaks very 
painful.

It sounds like a command line option would be simpler to standardize on 
across implementations. I prefer it over an environment variable as 
well.

diff --git a/gc.c b/gc.c
index 2fc1d0c..cafebf2 100644
--- a/gc.c
+++ b/gc.c
@@ -1780,9 +1780,8 @@ rb_obj_get_metadata(VALUE obj)
  *
  *  Returns a string filename where +obj+ was allocated.
  *
- *  This method is only expected to work on C Ruby. An environment
- *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
- *  feature.
+ *  This method is only expected to work on C Ruby. Ruby must be run
+ *  with --debug-objects to enable this feature.
  */
 static VALUE
 rb_obj_sourcefile(VALUE obj)
@@ -1790,7 +1789,7 @@ rb_obj_sourcefile(VALUE obj)
     rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);

     if (!track_metadata)
-        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+        rb_warn("__sourcefile__ requires --debug-objects");

     return meta ? meta->file : Qnil;
 }
@@ -1801,7 +1800,7 @@ rb_obj_sourceline(VALUE obj)
     rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);

     if (!track_metadata)
-        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+        rb_warn("__sourceline__ requires --debug-objects");

     return meta ? INT2FIX(meta->line) : Qnil;
 }
@@ -3366,19 +3365,18 @@ rb_gc_disable(void)
 }

 void
+rb_obj_enable_metadata(void)
+{
+    track_metadata = TRUE;
+}
+
+void
 rb_gc_set_params(void)
 {
-    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

-    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
-    if (track_metadata_ptr != NULL) {
-  if (RTEST(ruby_verbose))
-      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
-  track_metadata = TRUE;
-    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
diff --git a/internal.h b/internal.h
index b099f24..5386f7d 100644
--- a/internal.h
+++ b/internal.h
@@ -143,6 +143,7 @@ void rb_w32_init_file(void);
 /* gc.c */
 void Init_heap(void);
 void *ruby_mimmalloc(size_t size);
+void rb_obj_enable_metadata(void);

 /* inits.c */
 void rb_call_inits(void);
diff --git a/ruby.c b/ruby.c
index a0b438d..095bf29 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1117,6 +1117,9 @@ proc_options(long argc, char **argv, struct 
cmdline_options *opt, int envopt)
     set_source_encoding_once(opt, s, 0);
       }
 #endif
+      else if (strcmp("debug-objects", s) == 0) {
+    rb_obj_enable_metadata();
+      }
       else if (strcmp("version", s) == 0) {
     if (envopt) goto noenvopt_long;
     opt->dump |= DUMP_BIT(version);
@@ -1364,8 +1367,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   ruby_show_copyright();
     }

-    rb_gc_set_params();
-
     if (opt->safe_level >= 4) {
   OBJ_TAINT(rb_argv);
   OBJ_TAINT(GET_VM()->load_path);
@@ -1572,6 +1573,7 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
+    rb_gc_set_params();

     return iseq;
 }

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37715

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 05:39
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


> +typedef struct rb_obj_metadata {
> +    VALUE file;
> +    unsigned short line;
> +} rb_obj_metadata_t;

Maybe instead of file/line, this should be rb_iseq_t *iseq?

ko1-san, do you have any opinion on this patch?
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37716

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 05:48
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


Here's an example using this feature in a rails app, to find files that 
are allocating many long lived objects:

% RUBYOPT=--debug-objects ruby -r config/environment -e'
  GC.start
  ObjectSpace.each_object.to_a.inject(Hash.new 0){ |h,o| 
h["#{o.__sourcefile__}:#{o.class}"] += 1; h }.
    sort_by{ |k,v| -v }.
    first(14).
    each{ |k,v| printf "% 6d  |  %s\n", v, k }
'

 36244  |  lib/ruby/1.9.1/psych/visitors/to_ruby.rb:String
 28560  | 
gems/activesupport-2.3.14.github21/lib/active_support/dependencies.rb:String
 26038  | 
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route_set.rb:String
 19337  | 
gems/activesupport-2.3.14.github21/lib/active_support/multibyte/unicode_database.rb:ActiveSupport::Multibyte::Codepoint
 17279  |  gems/mime-types-1.19/lib/mime/types.rb:String
 10762  | 
gems/tzinfo-0.3.36/lib/tzinfo/data_timezone_info.rb:TZInfo::TimezoneTransitionInfo
 10419  | 
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route.rb:String
  9486  | 
gems/activesupport-2.3.14.github21/lib/active_support/dependencies.rb:RubyVM::InstructionSequence
  8459  | 
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route_set.rb:RubyVM::InstructionSequence
  5569  | 
gems/actionpack-2.3.14.github21/lib/action_controller/routing/builder.rb:String
  5151  |  gems/addressable-2.2.8/lib/addressable/idna/pure.rb:Array
  4944  |  gems/mime-types-1.19/lib/mime/types.rb:Array
  4800  |  gems/addressable-2.2.8/lib/addressable/idna/pure.rb:String
  3782  | 
gems/actionpack-2.3.14.github21/lib/action_controller/routing/builder.rb:ActionController::Routing::DividerSegment


----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37717

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-19 06:47
(Received via mailing list)
(2013/03/19 13:39), tmm1 (Aman Gupta) wrote:
> Maybe instead of file/line, this should be rb_iseq_t *iseq?

C methods doesn't have an iseq.

> ko1-san, do you have any opinion on this patch?

I'm considering another apprach to add such information. But I can't
guarantee when I introduce this patch :(


The approach is adding special trace (call C function, not a ruby's
method) function for each object allocation and free (and end of
marking). I believe this approach allows flexible statistics. But we
need to be more careful to add such a API.

Using this APIs, users can add your own statistics libraries.

*Just yesterday*, I was thinking about this new APIs.
Because I want to generate the following movie easily.
http://www.atdot.net/~ko1/diary/resource/20130318/...
This movie shows the status of heaps. black pixel is free object. red
pixel is string object, and so on.

(Now, I modify gc.c directly:
http://www.atdot.net/~ko1/diary/resource/20130318/... 
)

And I'm considering that I want to make proposal with a patch.
@tmm1, can you wait for my proposal with a patch?
or should I propose only an idea?
Posted by SASADA Koichi (Guest)
on 2013-03-19 07:46
(Received via mailing list)
(2013/03/19 14:47), SASADA Koichi wrote:
>  I believe this approach allows flexible statistics.

One flexibility example is to collect call-tree of object creation. Not
only collect method name, but collect call-tree.

"Memory Profiler for Ruby"
http://rubykaigi.org/2010/ja/events/86
He was my student and he modify gc.c directly.
I want to make it plug-able.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 07:46
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


>  Using this APIs, users can add your own statistics libraries.

I tried a similar approach in ruby 1.8 some while ago, emulating event 
hook api for GC events (newobj, free, gc start/end): 
https://github.com/tmm1/brew2deb/blob/master/packa...

I agree this approach provides more flexibility. But GC hooks cannot 
allocate ruby objects or interact with GC, so it is tricky to use.

Also implementation of newobj hook is tricky, because object klass/flags 
are set in the OBJSETUP macro.

An object tracing api will provide a lot of benefits (debuggers can 
track full C/ruby stacktrace of allocation site), but there are still 
some advantages to doing this in the VM directly:

  - gc.c can do much better job of storing object metadata efficiently 
(external statistics library will have to use hash table)

  - if statistics library is loaded as cext gem, it cannot track objects 
already created (such as objects inside rubygems library)

>  And I'm considering that I want to make proposal with a patch.
>  @tmm1, can you wait for my proposal with a patch?

I would like to hear your idea, but I can wait for patch. Or if you tell 
me I can try to implement.

> This movie shows the status of heaps. black pixel is free object. red
>  pixel is string object, and so on.

This is very cool. Such visualizations make it much easier to understand 
GC behavior, so I am excited to see an official API to make allocation 
tooling easier.
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37722

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-19 08:27
(Received via mailing list)
(2013/03/19 15:45), tmm1 (Aman Gupta) wrote:
> I agree this approach provides more flexibility. But GC hooks cannot allocate 
ruby objects or interact with GC, so it is tricky to use.

Yes exactly. This is why we need to be more carefully.
This is why I restrict only C function ([ruby-core:53530]).

However, it is difficult to make something.

So new idea (core idea of this proposal) is to introduce new another
API: register tasks invoking at finalizing timing.

Finalizing timing is:
* nearest timing to the GC
* free to Ruby execution (same as finalizer environment)

Summary of my proposal:
* Introduce new GC related hooks (restricted to C function)
  * Mark hook
  * Free hook
  * GCed hook
* Introduce new API to register a task invoking finalizing timing

Especially, Free hook and GCed hook is in GC procedure. In this C hooks,
collect information (current place, etc) into  somewhere storage. If you
want to manipulate them in Ruby-level, register task API with this
information.

-----

At first, I wanted to provide only GC related events invoking at
finalizing timing. However, this approach has several problems:
(1) Can't collect correct place (filename, line)
    If GC is at nested C methods, finalizer invoking timing is
    after retuning timing of C methods.
(2) It is difficult to determin how many free-ed objects can register to
delay ("somewhere storage" I mentioned above)

My proposal will solve them.


> Also implementation of newobj hook is tricky, because object klass/flags are set 
in the OBJSETUP macro.

Now, we have rb_newobj_of() function.

> An object tracing api will provide a lot of benefits (debuggers can track full 
C/ruby stacktrace of allocation site), but there are still some advantages to 
doing this in the VM directly:
>
>   - gc.c can do much better job of storing object metadata efficiently (external 
statistics library will have to use hash table)

Yes. we need to make a comparison.
I think there are no big differences between VM-level and C-ext level.
Maybe it is too slow to use it in production. But no data to compare.

>   - if statistics library is loaded as cext gem, it cannot track objects already 
created (such as objects inside rubygems library)

I believe it is no problem because it can be solved requiring it at 
first.

> I would like to hear your idea, but I can wait for patch. Or if you tell me I 
can try to implement.

Ideas are above.

>> This movie shows the status of heaps. black pixel is free object. red
>>  pixel is string object, and so on.
>
> This is very cool. Such visualizations make it much easier to understand GC 
behavior, so I am excited to see an official API to make allocation tooling 
easier.

Hehe. It was my hobby :)
It is easy using trace API (GCed hook) and rb_objspace_each_objects().
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 10:03
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


I like your idea. A finalization task api provides an elegant solution 
for processing profiling data in a safe context.

>  * Introduce new GC related hooks (restricted to C function)
>    * Mark hook
>    * Free hook
>    * GCed hook

What is the difference between Free and GC hooks? Is that for obj_free 
vs finalized?

What about NewObj hook? Can it use the same design? Will rb_newobj_of 
need to call RUBY_VM_SET_FINALIZER_INTERRUPT?

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37727

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-19 10:14
(Received via mailing list)
(2013/03/19 18:02), tmm1 (Aman Gupta) wrote:
> I like your idea. A finalization task api provides an elegant solution for 
processing profiling data in a safe context.

Thanks.

>>  * Introduce new GC related hooks (restricted to C function)
>>    * Mark hook
>>    * Free hook
>>    * GCed hook
>
> What is the difference between Free and GC hooks? Is that for obj_free vs 
finalized?

Free hook is called each free-ed object. Hook will called with GCed 
object.

GCed hook is called each marking. If no obj free-ed, but called only
this hooks.

> What about NewObj hook? Can it use the same design? Will rb_newobj_of need to 
call RUBY_VM_SET_FINALIZER_INTERRUPT?

Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook.

In fact, there are no need to defer it.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 11:27
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


>  Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook.

Oh, OK. This makes much more sense now. I implemented these basic GC 
hooks in c-only tracepoint API.

Is this what you have in mind? 
https://github.com/tmm1/ruby/commit/bffaecd560e83d...

It is useful already without new finalization task api. Would you be 
willing to merge something like this?

For task api, do you have method name/signature suggestion? I will try 
to implement that next.
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37730

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-19 12:33
(Received via mailing list)
(2013/03/19 19:26), tmm1 (Aman Gupta) wrote:
>> >  Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook.
> Oh, OK. This makes much more sense now. I implemented these basic GC hooks in 
c-only tracepoint API.
>
> Is this what you have in mind? 
https://github.com/tmm1/ruby/commit/bffaecd560e83d...

Great!!

Another consideration is lack of EVENTs bit. It is restricted to 32bit.
GC related events are special. So I was thinking to separate ordinal
bits and GC's bits.

> It is useful already without new finalization task api. Would you be willing to 
merge something like this?
>
> For task api, do you have method name/signature suggestion? I will try to 
implement that next.

No idea.

My plan was:
Rename FINALIZER_INTERRUPT_MASK to DELAYED_TASK_INTERRUPT_MASK.
Move finalizers to one task of delayed task.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-19 13:02
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


This was my first time using the new TracePoint apis. I like the C API a 
lot- much more flexible than the old event hook api.

>  GC related events are special. So I was thinking to separate ordinal
>  bits and GC's bits.

I agree, but I am not sure how to separate it without changing signature 
of rb_tracepoint_new.

>  No idea.

I am not sure either. Maybe:

  void rb_delayed_task_run(void);
  void rb_delayed_task_enqueue(void (*func)(void *), void *data);

Where should the implementation live.. vm.c? vm_task.c?
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37733

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-20 04:20
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


I implemented a basic task api: 
https://github.com/tmm1/ruby/compare/tmm1;task-api

But API is too simple, maybe.

  - what if task job is holding onto VALUE, it will never be gc_mark()ed

  - what if there is an exception during task execution?

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37754

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-21 08:40
(Received via mailing list)
(2013/03/20 12:19), tmm1 (Aman Gupta) wrote:
> But API is too simple, maybe.

Another issues:

- `task' is ambiguous (all of procedures are task).
  Yes, name is always issue.
- Allocation during gc is dangerous (maybe, should be prohibited)
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-03-22 09:41
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).


>  - `task' is ambiguous (all of procedures are task).

Do you prefer `rb_delayed_task_*` and vm_delayed_task.c ?

>  - Allocation during gc is dangerous (maybe, should be prohibited)

Yes, I thought of that when using ALLOC_N. Is it safe to use regular 
allocation (without xmalloc)? Or better to maintain static array or 
freelist?
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37808

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Posted by SASADA Koichi (Guest)
on 2013-03-22 10:16
(Received via mailing list)
(2013/03/22 17:41), tmm1 (Aman Gupta) wrote:
>
>> >  - `task' is ambiguous (all of procedures are task).
> Do you prefer `rb_delayed_task_*` and vm_delayed_task.c ?

I prefer rb_delayed_task_. but if there is more good name, suggestion.
I think vm.c is good place to put them.

>> >  - Allocation during gc is dangerous (maybe, should be prohibited)
> Yes, I thought of that when using ALLOC_N. Is it safe to use regular allocation 
(without xmalloc)? Or better to maintain static array or freelist?

I believe static sized C-array (for example, 128 entry) is enough for
this purpose. If overflow, then cause error.
Posted by tmm1 (Aman Gupta) (Guest)
on 2013-05-03 05:48
(Received via mailing list)
Issue #8107 has been updated by tmm1 (Aman Gupta).

Assignee set to tmm1 (Aman Gupta)

>  Another consideration is lack of EVENTs bit. It is restricted to 32bit.
>  GC related events are special. So I was thinking to separate ordinal
>  bits and GC's bits.

ko1-san, is this what you have in mind?

/* GC events (c-api only) */
#define RUBY_EVENT_OBJ             (1<<31)
#define RUBY_EVENT_OBJ_NEW   (RUBY_EVENT_OBJ | 0x1)
#define RUBY_EVENT_OBJ_MARK  (RUBY_EVENT_OBJ | 0x2)
#define RUBY_EVENT_OBJ_FREE   (RUBY_EVENT_OBJ | 0x4)
#define RUBY_EVENT_OBJ_ALL     (RUBY_EVENT_OBJ | 0xF)

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-39097

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: tmm1 (Aman Gupta)
Category: core
Target version:


When a ruby program contains a reference leak, debugging is a lot easier 
if you know where each object was allocated. Tools like bleakhouse and 
memprof have provided this functionality in the past, but were brittle 
and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with 
GC_DEBUG. This is impractical for daily use however, since it increases 
the size of the ruby heap by ~30%. There is also no API to access the 
debug information.

The following patch moves the GC_DEBUG file/line tracking outside of 
RVALUE, and adds a runtime flag (via environment variable) to enable it. 
This way normal usage is not affected by additional memory usage, but it 
is still simple to enable tracking for debugging purposes without having 
to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and 
BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", 
nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", 
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;

 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };

 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif

+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };

 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = 
&rb_objspace.gc_stress;
 #define initial_heap_min_slots  initial_params.initial_heap_min_slots
 #define initial_free_min  initial_params.initial_free_min
 #define initial_growth_factor  initial_params.initial_growth_factor
+#define track_metadata  initial_params.track_metadata

 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)

@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
   size_t i;
   for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
       aligned_free(objspace->heap.sorted[i]);
   }
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t 
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }

     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;

+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, 
sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = 
&heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }

@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
       }
       else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
     aligned_free(objspace->heap.sorted[i]);
       }
       heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }

+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);

     goto marking;    /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE 
ptr)
   rb_mark_generic_ivar(ptr);
     }

+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, 
*growth_factor_ptr;

     if (rb_safe_level() > 0) return;

+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+  if (RTEST(ruby_verbose))
+      fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+  track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
   int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);

+    rb_define_method(rb_cBasicObject, "__sourcefile__", 
rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", 
rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", 
count_objects, -1);

     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
   return Qtrue;
     }

+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
   opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
   VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct 
cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);

     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();

     return iseq;
 }
Please log in before posting. Registration is free and takes only a minute.
Existing account (Switch to SSL-encrypted connection)
NEW: Do you have a Google/GoogleMail or Yahoo account? No registration required!
Log in with Google account | Log in with Yahoo account
No account? Register here.