Issue #8107 has been reported by tmm1 (Aman Gupta). ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-16 09:48
on 2013-03-16 12:01
Issue #8107 has been updated by tmm1 (Aman Gupta). > + rb_gc_set_params(); > + > if (!(opt->disable & DISABLE_BIT(rubyopt)) && > opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { > VALUE src_enc_name = opt->src.enc.name; > @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) > rb_define_readonly_boolean("$-a", opt->do_split); > > rb_set_safe_level(opt->safe_level); > - rb_gc_set_params(); This is a hack. I wanted to set track_metadata=1 as early as possible, before require('rubygems') especially. Maybe it makes more sense to add an option instead of ENV flag, ruby --debug-objects ? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37665 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-16 20:02
Issue #8107 has been updated by headius (Charles Nutter). No objections to adding this feature to MRI, but anything that goes on the standard core classes needs to involve other implementers. If this is not intended to be a standard Ruby (not MRI) feature, it would probably be best to have the access of file/line be via an MRI-specific class. Something like RubyVM.allocated_position(obj) => [file, line]. JRuby can do instance tracking, but it's via JVM tooling APIs turned on at command line, and the data isn't directly accessible from the running program basically it gets streamed out to a debugging/data collection client tool. I'm not sure it would be possible to provide the allocation-tracking as a runtime flag or as an environment variable (we don't process env vars until JVM is already booted) but a command-line flag is possible (we process them both before and after JVM starts). ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37668 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 05:38
Issue #8107 has been updated by tmm1 (Aman Gupta). > Something like RubyVM.allocated_position(obj) => [file, line]. I'll defer API decisions to core, but a method under RubyVM or in the new objspace.so would be fine. I slightly prefer two separate methods, to avoid an array allocation when you're only interested in the filename Primarily, I am interested in feedback on the runtime flag in this patch. JRuby and Rubinius both provide allocation tracking, but MRI currently has no equivalent. This makes debugging object leaks very painful. It sounds like a command line option would be simpler to standardize on across implementations. I prefer it over an environment variable as well. diff --git a/gc.c b/gc.c index 2fc1d0c..cafebf2 100644 --- a/gc.c +++ b/gc.c @@ -1780,9 +1780,8 @@ rb_obj_get_metadata(VALUE obj) * * Returns a string filename where +obj+ was allocated. * - * This method is only expected to work on C Ruby. An environment - * variable (RUBY_OBJECT_METADATA=1) must be set to enable this - * feature. + * This method is only expected to work on C Ruby. Ruby must be run + * with --debug-objects to enable this feature. */ static VALUE rb_obj_sourcefile(VALUE obj) @@ -1790,7 +1789,7 @@ rb_obj_sourcefile(VALUE obj) rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); if (!track_metadata) - rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + rb_warn("__sourcefile__ requires --debug-objects"); return meta ? meta->file : Qnil; } @@ -1801,7 +1800,7 @@ rb_obj_sourceline(VALUE obj) rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); if (!track_metadata) - rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + rb_warn("__sourceline__ requires --debug-objects"); return meta ? INT2FIX(meta->line) : Qnil; } @@ -3366,19 +3365,18 @@ rb_gc_disable(void) } void +rb_obj_enable_metadata(void) +{ + track_metadata = TRUE; +} + +void rb_gc_set_params(void) { - char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; - track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); - if (track_metadata_ptr != NULL) { - if (RTEST(ruby_verbose)) - fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); - track_metadata = TRUE; - } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); diff --git a/internal.h b/internal.h index b099f24..5386f7d 100644 --- a/internal.h +++ b/internal.h @@ -143,6 +143,7 @@ void rb_w32_init_file(void); /* gc.c */ void Init_heap(void); void *ruby_mimmalloc(size_t size); +void rb_obj_enable_metadata(void); /* inits.c */ void rb_call_inits(void); diff --git a/ruby.c b/ruby.c index a0b438d..095bf29 100644 --- a/ruby.c +++ b/ruby.c @@ -1117,6 +1117,9 @@ proc_options(long argc, char **argv, struct cmdline_options *opt, int envopt) set_source_encoding_once(opt, s, 0); } #endif + else if (strcmp("debug-objects", s) == 0) { + rb_obj_enable_metadata(); + } else if (strcmp("version", s) == 0) { if (envopt) goto noenvopt_long; opt->dump |= DUMP_BIT(version); @@ -1364,8 +1367,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) ruby_show_copyright(); } - rb_gc_set_params(); - if (opt->safe_level >= 4) { OBJ_TAINT(rb_argv); OBJ_TAINT(GET_VM()->load_path); @@ -1572,6 +1573,7 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); + rb_gc_set_params(); return iseq; } ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37715 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 05:39
Issue #8107 has been updated by tmm1 (Aman Gupta). > +typedef struct rb_obj_metadata { > + VALUE file; > + unsigned short line; > +} rb_obj_metadata_t; Maybe instead of file/line, this should be rb_iseq_t *iseq? ko1-san, do you have any opinion on this patch? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37716 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 05:48
Issue #8107 has been updated by tmm1 (Aman Gupta).
Here's an example using this feature in a rails app, to find files that
are allocating many long lived objects:
% RUBYOPT=--debug-objects ruby -r config/environment -e'
GC.start
ObjectSpace.each_object.to_a.inject(Hash.new 0){ |h,o|
h["#{o.__sourcefile__}:#{o.class}"] += 1; h }.
sort_by{ |k,v| -v }.
first(14).
each{ |k,v| printf "% 6d | %s\n", v, k }
'
36244 | lib/ruby/1.9.1/psych/visitors/to_ruby.rb:String
28560 |
gems/activesupport-2.3.14.github21/lib/active_support/dependencies.rb:String
26038 |
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route_set.rb:String
19337 |
gems/activesupport-2.3.14.github21/lib/active_support/multibyte/unicode_database.rb:ActiveSupport::Multibyte::Codepoint
17279 | gems/mime-types-1.19/lib/mime/types.rb:String
10762 |
gems/tzinfo-0.3.36/lib/tzinfo/data_timezone_info.rb:TZInfo::TimezoneTransitionInfo
10419 |
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route.rb:String
9486 |
gems/activesupport-2.3.14.github21/lib/active_support/dependencies.rb:RubyVM::InstructionSequence
8459 |
gems/actionpack-2.3.14.github21/lib/action_controller/routing/route_set.rb:RubyVM::InstructionSequence
5569 |
gems/actionpack-2.3.14.github21/lib/action_controller/routing/builder.rb:String
5151 | gems/addressable-2.2.8/lib/addressable/idna/pure.rb:Array
4944 | gems/mime-types-1.19/lib/mime/types.rb:Array
4800 | gems/addressable-2.2.8/lib/addressable/idna/pure.rb:String
3782 |
gems/actionpack-2.3.14.github21/lib/action_controller/routing/builder.rb:ActionController::Routing::DividerSegment
----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37717
Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee:
Category: core
Target version:
When a ruby program contains a reference leak, debugging is a lot easier
if you know where each object was allocated. Tools like bleakhouse and
memprof have provided this functionality in the past, but were brittle
and required source/runtime patches to ruby.
Ruby already provides basic callsite tracking if you recompile ruby with
GC_DEBUG. This is impractical for daily use however, since it increases
the size of the ruby heap by ~30%. There is also no API to access the
debug information.
The following patch moves the GC_DEBUG file/line tracking outside of
RVALUE, and adds a runtime flag (via environment variable) to enable it.
This way normal usage is not affected by additional memory usage, but it
is still simple to enable tracking for debugging purposes without having
to recompile ruby.
I've exposed this data via BasicObject#__sourcefile__ and
BasicObject#__sourceline__
$ ruby -e'
GC.start
ObjectSpace.each_object.first(1).each{ |o|
p [o.class, o, o.__sourcefile__, o.__sourceline__]
}
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String,
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions",
nil, nil]
$ RUBY_OBJECT_METADATA=1 ruby -e'
GC.start
ObjectSpace.each_object.first(1).each{ |o|
p [o.class, o, o.__sourcefile__, o.__sourceline__]
}
'
[String, "$(datarootdir)/doc/$(PACKAGE)",
"/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]
diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
int gc_stress;
#endif
+ int track_metadata;
} ruby_gc_params_t;
static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
FALSE,
#endif
+ FALSE
};
#define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
#pragma pack(pop)
#endif
+typedef struct rb_obj_metadata {
+ VALUE file;
+ unsigned short line;
+} rb_obj_metadata_t;
+
struct heaps_slot {
struct heaps_header *header;
uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
RVALUE *start;
RVALUE *end;
size_t limit;
+ rb_obj_metadata_t *metadata;
};
struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr =
&rb_objspace.gc_stress;
#define initial_heap_min_slots initial_params.initial_heap_min_slots
#define initial_free_min initial_params.initial_free_min
#define initial_growth_factor initial_params.initial_growth_factor
+#define track_metadata initial_params.track_metadata
#define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)
@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
if (objspace->heap.sorted) {
size_t i;
for (i = 0; i < heaps_used; ++i) {
+ if (objspace->heap.sorted[i]->metadata)
+ free(objspace->heap.sorted[i]->metadata);
free(objspace->heap.sorted[i]->bits);
aligned_free(objspace->heap.sorted[i]);
}
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
objspace->heap.sorted[hi]->end = (p + objs);
objspace->heap.sorted[hi]->base = heaps;
objspace->heap.sorted[hi]->limit = objs;
+ objspace->heap.sorted[hi]->metadata = NULL;
assert(objspace->heap.free_bitmap != NULL);
heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
objspace->heap.sorted[hi]->bits = (uintptr_t
*)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
}
MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
- RANY(obj)->file = rb_sourcefile();
- RANY(obj)->line = rb_sourceline();
-#endif
objspace->total_allocated_object_num++;
+ if (UNLIKELY(track_metadata)) {
+ struct heaps_header *heap = GET_HEAP_HEADER(obj);
+ if (!heap->metadata)
+ heap->metadata = calloc(HEAP_OBJ_LIMIT,
sizeof(rb_obj_metadata_t));
+ if (heap->metadata) {
+ rb_obj_metadata_t *meta =
&heap->metadata[NUM_IN_SLOT(obj)];
+ meta->file = rb_sourcefilename();
+ meta->line = rb_sourceline();
+ }
+ }
+
return obj;
}
@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
last = objspace->heap.sorted[i];
}
else {
+ if (objspace->heap.sorted[i]->metadata)
+ free(objspace->heap.sorted[i]->metadata);
aligned_free(objspace->heap.sorted[i]);
}
heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
return nonspecial_obj_id(obj);
}
+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+ struct heaps_header *heap;
+
+ if (SPECIAL_CONST_P(obj))
+ return NULL;
+
+ heap = GET_HEAP_HEADER(obj);
+ if (!heap->metadata)
+ return NULL;
+
+ return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ * Document-method: __sourcefile__
+ *
+ * call-seq:
+ * obj.__sourcefile__ -> string
+ *
+ * Returns a string filename where +obj+ was allocated.
+ *
+ * This method is only expected to work on C Ruby. An environment
+ * variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ * feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+ rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+ if (!track_metadata)
+ rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+ return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+ rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+ if (!track_metadata)
+ rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+ return meta ? INT2FIX(meta->line) : Qnil;
+}
+
static int
set_zero(st_data_t key, st_data_t val, st_data_t arg)
{
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
static void
gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
{
+ register rb_obj_metadata_t *meta;
register RVALUE *obj = RANY(ptr);
goto marking; /* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE
ptr)
rb_mark_generic_ivar(ptr);
}
+ if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+ gc_mark(objspace, meta->file);
+
switch (BUILTIN_TYPE(obj)) {
case T_NIL:
case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
void
rb_gc_set_params(void)
{
+ char *track_metadata_ptr;
char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr,
*growth_factor_ptr;
if (rb_safe_level() > 0) return;
+ track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+ if (track_metadata_ptr != NULL) {
+ if (RTEST(ruby_verbose))
+ fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+ track_metadata = TRUE;
+ }
malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
if (malloc_limit_ptr != NULL) {
int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
+ rb_define_method(rb_cBasicObject, "__sourcefile__",
rb_obj_sourcefile, 0);
+ rb_define_method(rb_cBasicObject, "__sourceline__",
rb_obj_sourceline, 0);
+
rb_define_module_function(rb_mObSpace, "count_objects",
count_objects, -1);
{
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct
cmdline_options *opt)
return Qtrue;
}
+ rb_gc_set_params();
+
if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct
cmdline_options *opt)
rb_define_readonly_boolean("$-a", opt->do_split);
rb_set_safe_level(opt->safe_level);
- rb_gc_set_params();
return iseq;
}
on 2013-03-19 06:47
(2013/03/19 13:39), tmm1 (Aman Gupta) wrote: > Maybe instead of file/line, this should be rb_iseq_t *iseq? C methods doesn't have an iseq. > ko1-san, do you have any opinion on this patch? I'm considering another apprach to add such information. But I can't guarantee when I introduce this patch :( The approach is adding special trace (call C function, not a ruby's method) function for each object allocation and free (and end of marking). I believe this approach allows flexible statistics. But we need to be more careful to add such a API. Using this APIs, users can add your own statistics libraries. *Just yesterday*, I was thinking about this new APIs. Because I want to generate the following movie easily. http://www.atdot.net/~ko1/diary/resource/20130318/... This movie shows the status of heaps. black pixel is free object. red pixel is string object, and so on. (Now, I modify gc.c directly: http://www.atdot.net/~ko1/diary/resource/20130318/... ) And I'm considering that I want to make proposal with a patch. @tmm1, can you wait for my proposal with a patch? or should I propose only an idea?
on 2013-03-19 07:46
(2013/03/19 14:47), SASADA Koichi wrote: > I believe this approach allows flexible statistics. One flexibility example is to collect call-tree of object creation. Not only collect method name, but collect call-tree. "Memory Profiler for Ruby" http://rubykaigi.org/2010/ja/events/86 He was my student and he modify gc.c directly. I want to make it plug-able.
on 2013-03-19 07:46
Issue #8107 has been updated by tmm1 (Aman Gupta). > Using this APIs, users can add your own statistics libraries. I tried a similar approach in ruby 1.8 some while ago, emulating event hook api for GC events (newobj, free, gc start/end): https://github.com/tmm1/brew2deb/blob/master/packa... I agree this approach provides more flexibility. But GC hooks cannot allocate ruby objects or interact with GC, so it is tricky to use. Also implementation of newobj hook is tricky, because object klass/flags are set in the OBJSETUP macro. An object tracing api will provide a lot of benefits (debuggers can track full C/ruby stacktrace of allocation site), but there are still some advantages to doing this in the VM directly: - gc.c can do much better job of storing object metadata efficiently (external statistics library will have to use hash table) - if statistics library is loaded as cext gem, it cannot track objects already created (such as objects inside rubygems library) > And I'm considering that I want to make proposal with a patch. > @tmm1, can you wait for my proposal with a patch? I would like to hear your idea, but I can wait for patch. Or if you tell me I can try to implement. > This movie shows the status of heaps. black pixel is free object. red > pixel is string object, and so on. This is very cool. Such visualizations make it much easier to understand GC behavior, so I am excited to see an official API to make allocation tooling easier. ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37722 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 08:27
(2013/03/19 15:45), tmm1 (Aman Gupta) wrote: > I agree this approach provides more flexibility. But GC hooks cannot allocate ruby objects or interact with GC, so it is tricky to use. Yes exactly. This is why we need to be more carefully. This is why I restrict only C function ([ruby-core:53530]). However, it is difficult to make something. So new idea (core idea of this proposal) is to introduce new another API: register tasks invoking at finalizing timing. Finalizing timing is: * nearest timing to the GC * free to Ruby execution (same as finalizer environment) Summary of my proposal: * Introduce new GC related hooks (restricted to C function) * Mark hook * Free hook * GCed hook * Introduce new API to register a task invoking finalizing timing Especially, Free hook and GCed hook is in GC procedure. In this C hooks, collect information (current place, etc) into somewhere storage. If you want to manipulate them in Ruby-level, register task API with this information. ----- At first, I wanted to provide only GC related events invoking at finalizing timing. However, this approach has several problems: (1) Can't collect correct place (filename, line) If GC is at nested C methods, finalizer invoking timing is after retuning timing of C methods. (2) It is difficult to determin how many free-ed objects can register to delay ("somewhere storage" I mentioned above) My proposal will solve them. > Also implementation of newobj hook is tricky, because object klass/flags are set in the OBJSETUP macro. Now, we have rb_newobj_of() function. > An object tracing api will provide a lot of benefits (debuggers can track full C/ruby stacktrace of allocation site), but there are still some advantages to doing this in the VM directly: > > - gc.c can do much better job of storing object metadata efficiently (external statistics library will have to use hash table) Yes. we need to make a comparison. I think there are no big differences between VM-level and C-ext level. Maybe it is too slow to use it in production. But no data to compare. > - if statistics library is loaded as cext gem, it cannot track objects already created (such as objects inside rubygems library) I believe it is no problem because it can be solved requiring it at first. > I would like to hear your idea, but I can wait for patch. Or if you tell me I can try to implement. Ideas are above. >> This movie shows the status of heaps. black pixel is free object. red >> pixel is string object, and so on. > > This is very cool. Such visualizations make it much easier to understand GC behavior, so I am excited to see an official API to make allocation tooling easier. Hehe. It was my hobby :) It is easy using trace API (GCed hook) and rb_objspace_each_objects().
on 2013-03-19 10:03
Issue #8107 has been updated by tmm1 (Aman Gupta). I like your idea. A finalization task api provides an elegant solution for processing profiling data in a safe context. > * Introduce new GC related hooks (restricted to C function) > * Mark hook > * Free hook > * GCed hook What is the difference between Free and GC hooks? Is that for obj_free vs finalized? What about NewObj hook? Can it use the same design? Will rb_newobj_of need to call RUBY_VM_SET_FINALIZER_INTERRUPT? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37727 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 10:14
(2013/03/19 18:02), tmm1 (Aman Gupta) wrote: > I like your idea. A finalization task api provides an elegant solution for processing profiling data in a safe context. Thanks. >> * Introduce new GC related hooks (restricted to C function) >> * Mark hook >> * Free hook >> * GCed hook > > What is the difference between Free and GC hooks? Is that for obj_free vs finalized? Free hook is called each free-ed object. Hook will called with GCed object. GCed hook is called each marking. If no obj free-ed, but called only this hooks. > What about NewObj hook? Can it use the same design? Will rb_newobj_of need to call RUBY_VM_SET_FINALIZER_INTERRUPT? Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook. In fact, there are no need to defer it.
on 2013-03-19 11:27
Issue #8107 has been updated by tmm1 (Aman Gupta). > Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook. Oh, OK. This makes much more sense now. I implemented these basic GC hooks in c-only tracepoint API. Is this what you have in mind? https://github.com/tmm1/ruby/commit/bffaecd560e83d... It is useful already without new finalization task api. Would you be willing to merge something like this? For task api, do you have method name/signature suggestion? I will try to implement that next. ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37730 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-19 12:33
(2013/03/19 19:26), tmm1 (Aman Gupta) wrote: >> > Ah, it is my mistake. I want to say *newobj hook*, instead of Mark hook. > Oh, OK. This makes much more sense now. I implemented these basic GC hooks in c-only tracepoint API. > > Is this what you have in mind? https://github.com/tmm1/ruby/commit/bffaecd560e83d... Great!! Another consideration is lack of EVENTs bit. It is restricted to 32bit. GC related events are special. So I was thinking to separate ordinal bits and GC's bits. > It is useful already without new finalization task api. Would you be willing to merge something like this? > > For task api, do you have method name/signature suggestion? I will try to implement that next. No idea. My plan was: Rename FINALIZER_INTERRUPT_MASK to DELAYED_TASK_INTERRUPT_MASK. Move finalizers to one task of delayed task.
on 2013-03-19 13:02
Issue #8107 has been updated by tmm1 (Aman Gupta). This was my first time using the new TracePoint apis. I like the C API a lot- much more flexible than the old event hook api. > GC related events are special. So I was thinking to separate ordinal > bits and GC's bits. I agree, but I am not sure how to separate it without changing signature of rb_tracepoint_new. > No idea. I am not sure either. Maybe: void rb_delayed_task_run(void); void rb_delayed_task_enqueue(void (*func)(void *), void *data); Where should the implementation live.. vm.c? vm_task.c? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37733 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-20 04:20
Issue #8107 has been updated by tmm1 (Aman Gupta). I implemented a basic task api: https://github.com/tmm1/ruby/compare/tmm1;task-api But API is too simple, maybe. - what if task job is holding onto VALUE, it will never be gc_mark()ed - what if there is an exception during task execution? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37754 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-21 08:40
(2013/03/20 12:19), tmm1 (Aman Gupta) wrote:
> But API is too simple, maybe.
Another issues:
- `task' is ambiguous (all of procedures are task).
Yes, name is always issue.
- Allocation during gc is dangerous (maybe, should be prohibited)
on 2013-03-22 09:41
Issue #8107 has been updated by tmm1 (Aman Gupta). > - `task' is ambiguous (all of procedures are task). Do you prefer `rb_delayed_task_*` and vm_delayed_task.c ? > - Allocation during gc is dangerous (maybe, should be prohibited) Yes, I thought of that when using ALLOC_N. Is it safe to use regular allocation (without xmalloc)? Or better to maintain static array or freelist? ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-37808 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
on 2013-03-22 10:16
(2013/03/22 17:41), tmm1 (Aman Gupta) wrote: > >> > - `task' is ambiguous (all of procedures are task). > Do you prefer `rb_delayed_task_*` and vm_delayed_task.c ? I prefer rb_delayed_task_. but if there is more good name, suggestion. I think vm.c is good place to put them. >> > - Allocation during gc is dangerous (maybe, should be prohibited) > Yes, I thought of that when using ALLOC_N. Is it safe to use regular allocation (without xmalloc)? Or better to maintain static array or freelist? I believe static sized C-array (for example, 128 entry) is enough for this purpose. If overflow, then cause error.
on 2013-05-03 05:48
Issue #8107 has been updated by tmm1 (Aman Gupta). Assignee set to tmm1 (Aman Gupta) > Another consideration is lack of EVENTs bit. It is restricted to 32bit. > GC related events are special. So I was thinking to separate ordinal > bits and GC's bits. ko1-san, is this what you have in mind? /* GC events (c-api only) */ #define RUBY_EVENT_OBJ (1<<31) #define RUBY_EVENT_OBJ_NEW (RUBY_EVENT_OBJ | 0x1) #define RUBY_EVENT_OBJ_MARK (RUBY_EVENT_OBJ | 0x2) #define RUBY_EVENT_OBJ_FREE (RUBY_EVENT_OBJ | 0x4) #define RUBY_EVENT_OBJ_ALL (RUBY_EVENT_OBJ | 0xF) ---------------------------------------- Feature #8107: [patch] runtime flag to track object allocation metadata https://bugs.ruby-lang.org/issues/8107#change-39097 Author: tmm1 (Aman Gupta) Status: Open Priority: Normal Assignee: tmm1 (Aman Gupta) Category: core Target version: When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby. Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information. The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby. I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__ $ ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' -e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1 -e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1 [String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil] $ RUBY_OBJECT_METADATA=1 ruby -e' GC.start ObjectSpace.each_object.first(1).each{ |o| p [o.class, o, o.__sourcefile__, o.__sourceline__] } ' [String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8] diff --git a/gc.c b/gc.c index bd95073..2fc1d0c 100644 --- a/gc.c +++ b/gc.c @@ -81,6 +81,7 @@ typedef struct { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE int gc_stress; #endif + int track_metadata; } ruby_gc_params_t; static ruby_gc_params_t initial_params = { @@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = { #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE FALSE, #endif + FALSE }; #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory] @@ -162,6 +164,11 @@ typedef struct RVALUE { #pragma pack(pop) #endif +typedef struct rb_obj_metadata { + VALUE file; + unsigned short line; +} rb_obj_metadata_t; + struct heaps_slot { struct heaps_header *header; uintptr_t *bits; @@ -177,6 +184,7 @@ struct heaps_header { RVALUE *start; RVALUE *end; size_t limit; + rb_obj_metadata_t *metadata; }; struct heaps_free_bitmap { @@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress; #define initial_heap_min_slots initial_params.initial_heap_min_slots #define initial_free_min initial_params.initial_free_min #define initial_growth_factor initial_params.initial_growth_factor +#define track_metadata initial_params.track_metadata #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0) @@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace) if (objspace->heap.sorted) { size_t i; for (i = 0; i < heaps_used; ++i) { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); free(objspace->heap.sorted[i]->bits); aligned_free(objspace->heap.sorted[i]); } @@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace) objspace->heap.sorted[hi]->end = (p + objs); objspace->heap.sorted[hi]->base = heaps; objspace->heap.sorted[hi]->limit = objs; + objspace->heap.sorted[hi]->metadata = NULL; assert(objspace->heap.free_bitmap != NULL); heaps->bits = (uintptr_t *)objspace->heap.free_bitmap; objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap; @@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags) } MEMZERO((void*)obj, RVALUE, 1); -#ifdef GC_DEBUG - RANY(obj)->file = rb_sourcefile(); - RANY(obj)->line = rb_sourceline(); -#endif objspace->total_allocated_object_num++; + if (UNLIKELY(track_metadata)) { + struct heaps_header *heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t)); + if (heap->metadata) { + rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)]; + meta->file = rb_sourcefilename(); + meta->line = rb_sourceline(); + } + } + return obj; } @@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace) last = objspace->heap.sorted[i]; } else { + if (objspace->heap.sorted[i]->metadata) + free(objspace->heap.sorted[i]->metadata); aligned_free(objspace->heap.sorted[i]); } heaps_used--; @@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj) return nonspecial_obj_id(obj); } +static inline rb_obj_metadata_t * +rb_obj_get_metadata(VALUE obj) +{ + struct heaps_header *heap; + + if (SPECIAL_CONST_P(obj)) + return NULL; + + heap = GET_HEAP_HEADER(obj); + if (!heap->metadata) + return NULL; + + return &heap->metadata[NUM_IN_SLOT(obj)]; +} + +/* + * Document-method: __sourcefile__ + * + * call-seq: + * obj.__sourcefile__ -> string + * + * Returns a string filename where +obj+ was allocated. + * + * This method is only expected to work on C Ruby. An environment + * variable (RUBY_OBJECT_METADATA=1) must be set to enable this + * feature. + */ +static VALUE +rb_obj_sourcefile(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? meta->file : Qnil; +} + +static VALUE +rb_obj_sourceline(VALUE obj) +{ + rb_obj_metadata_t *meta = rb_obj_get_metadata(obj); + + if (!track_metadata) + rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1"); + + return meta ? INT2FIX(meta->line) : Qnil; +} + static int set_zero(st_data_t key, st_data_t val, st_data_t arg) { @@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr) static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr) { + register rb_obj_metadata_t *meta; register RVALUE *obj = RANY(ptr); goto marking; /* skip */ @@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr) rb_mark_generic_ivar(ptr); } + if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file)) + gc_mark(objspace, meta->file); + switch (BUILTIN_TYPE(obj)) { case T_NIL: case T_FIXNUM: @@ -3294,10 +3368,17 @@ rb_gc_disable(void) void rb_gc_set_params(void) { + char *track_metadata_ptr; char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr; if (rb_safe_level() > 0) return; + track_metadata_ptr = getenv("RUBY_OBJECT_METADATA"); + if (track_metadata_ptr != NULL) { + if (RTEST(ruby_verbose)) + fprintf(stderr, "track_metadata=TRUE (FALSE)\n"); + track_metadata = TRUE; + } malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT"); if (malloc_limit_ptr != NULL) { int malloc_limit_i = atoi(malloc_limit_ptr); @@ -4535,6 +4616,9 @@ Init_GC(void) rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0); + rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0); + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1); { diff --git a/ruby.c b/ruby.c index 6b61162..4c7e93f 100644 --- a/ruby.c +++ b/ruby.c @@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt) return Qtrue; } + rb_gc_set_params(); + if (!(opt->disable & DISABLE_BIT(rubyopt)) && opt->safe_level == 0 && (s = getenv("RUBYOPT"))) { VALUE src_enc_name = opt->src.enc.name; @@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt) rb_define_readonly_boolean("$-a", opt->do_split); rb_set_safe_level(opt->safe_level); - rb_gc_set_params(); return iseq; }
Please log in before posting. Registration is free and takes only a minute.
Existing account
(Switch to SSL-encrypted connection)
NEW: Do you have a Google/GoogleMail or Yahoo account? No registration required!
Log in with Google account | Log in with Yahoo account
Log in with Google account | Log in with Yahoo account
No account? Register here.