Hash#identifed_by

e$B$J$+$@$G$9!#e(B

compared_bye$B$H$$$&L>A0$K$7$h$&$+$H;W$$$^$7$?$,!"Hf3S$@$1$G$O$J$$e(B
e$B$N$Ge(Bidentifed_bye$B$H$7$F$_$^$7$?!#e(B

marshal.ce$B$Ne(Bw_encoding()e$B$b@0M}$7$F$"$j$^$9!#e(B

Index: hash.c

— hash.c (revision 15489)
+++ hash.c (working copy)
@@ -25,4 +25,5 @@ static VALUE rb_hash_s_try_convert(VALUE
#define HASH_DELETED FL_USER1
#define HASH_PROC_DEFAULT FL_USER2
+#define HASH_IDENT_BY_METHOD FL_USER3

VALUE
@@ -410,5 +411,6 @@ static VALUE
rb_hash_rehash(VALUE hash)
{

  • st_table *tbl;
  • st_table *tbl, *ntbl;

  • VALUE htmp;

    if (RHASH(hash)->iter_lev > 0) {
    @@ -416,10 +418,15 @@ rb_hash_rehash(VALUE hash)
    }
    rb_hash_modify_check(hash);

  • if (!RHASH(hash)->ntbl)
  • if (!(ntbl = RHASH(hash)->ntbl))
    return hash;
  • tbl = st_init_table_with_size(RHASH(hash)->ntbl->type,
    RHASH(hash)->ntbl->num_entries);
  • htmp = hash_alloc(0);
  • tbl = st_init_table_with_size(ntbl->type, ntbl->num_entries);
  • RHASH(htmp)->ntbl = tbl;
    rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
  • st_free_table(RHASH(hash)->ntbl);
  • ntbl = RHASH(hash)->ntbl;

  • RHASH(htmp)->ntbl = 0;
    RHASH(hash)->ntbl = tbl;

  • st_free_table(ntbl);

  • rb_gc_force_recycle(htmp);

    return hash;
    @@ -1683,21 +1690,46 @@ rb_hash_flatten(int argc, VALUE *argv, V
    }

-static const struct st_hash_type identhash = {

  • st_numcmp,
  • st_numhash,
    +struct hash_ident {
  • ID equal, hash;
    +};

+static int
+compare_by_method(st_data_t x, st_data_t y, st_table *t)
+{

  • struct hash_ident *ids = st_extra_data(t);
  • VALUE other = (VALUE)y;
  • VALUE val = rb_funcall2((VALUE)x, ids->equal, 1, &other);
  • return RTEST(val);
    +}

+static int
+hash_by_method(st_data_t x, st_table *t)
+{

  • struct hash_ident *ids = st_extra_data(t);
  • VALUE val = rb_funcall2((VALUE)x, ids->hash, 0, 0);
  • return NUM2INT(val);
    +}

+static const struct st_hash_type hash_ident_type = {

  • compare_by_method,
  • hash_by_method,
    };

/*

  • call-seq:
    • hsh.compare_by_identity => hsh
      
    • Hash.identified_by(equ, hash) => hsh
      
    • Makes hsh to compare its keys by their identity, i.e. it
    • will consider exact same objects as same keys.
    • Makes hsh which identifies its keys with +equ+ and +hash+
    • methods.
    • h1 = { "a" => 100, "b" => 200, :c => "c" }
      
    • h1["a"]        #=> 100
      
    • h1.compare_by_identity
      
    • h1.compare_by_identity? #=> true
      
    • h1 = Hash.identified_by(:equal?, :object_id)
      
    • h1["a"] = 100
      
    • key_a = h1.key(100)
      
    • h1["b"] = 200
      
    • h1[:c] = "c"
      
    • h1["a"]        #=> nil  # different objects.
      
    • h1[key_a]      #=> 100  # same object as a key.
      
    • h1[:c]         #=> "c"  # same symbols are all same.
      

@@ -1705,30 +1737,75 @@ static const struct st_hash_type identha

static VALUE
-rb_hash_compare_by_id(VALUE hash)
+rb_hash_s_identified_by(VALUE klass, VALUE equal_mid, VALUE hash_mid)
{

  • rb_hash_modify(hash);
  • RHASH(hash)->ntbl->type = &identhash;
  • rb_hash_rehash(hash);
  • return hash;
  • struct hash_ident ids;
  • VALUE h;
  • st_table *tbl;
  • ids.equal = rb_to_id(equal_mid);
  • ids.hash = rb_to_id(hash_mid);
  • h = hash_alloc(klass);
  • RBASIC(h)->flags |= HASH_IDENT_BY_METHOD;
  • tbl = st_init_table_extra(&hash_ident_type, sizeof(ids));
  • RHASH(h)->ntbl = tbl;
  • *(struct hash_ident *)st_extra_data(tbl) = ids;
  • return h;
    }

/*

  • call-seq:
    • hsh.compare_by_identity? => true or false
      
    • Returns true if hsh will compare its keys by
    • their identity. Also see Hash#compare_by_identity.
    • hsh.methods_to_identify => a pair of symbols or nil
      
    • Returns Symbols to be used to identify the items if
    • hsh is created with Hash#identified_by.
    • Otherwise, returns nil.
      */

-static VALUE
-rb_hash_compare_by_id_p(VALUE hash)
+VALUE
+rb_hash_get_ident_methods(VALUE hash)
{

  • if (!RHASH(hash)->ntbl)
  •    return Qfalse;
    
  • if (RHASH(hash)->ntbl->type == &identhash) {
  • return Qtrue;
  • }
  • return Qfalse;
  • struct hash_ident *ids;
  • if (!FL_TEST(hash, HASH_IDENT_BY_METHOD)) return Qnil;
  • ids = st_extra_data(RHASH(hash)->ntbl);
  • return rb_assoc_new(ID2SYM(ids->equal), ID2SYM(ids->hash));
    +}

+ID
+rb_hash_ident_methods_id(void)
+{

  • return rb_intern(“methods_to_identify”);
    +}

+void
+rb_hash_set_ident_method(VALUE hash, VALUE val)
+{

  • struct hash_ident ids;
  • st_table *tbl, *ntbl;
  • VALUE htmp;
  • if (TYPE(val) != T_ARRAY || RARRAY_LEN(val) != 2 ||
  • !SYMBOL_P(RARRAY_PTR(val)[0]) ||
  • !SYMBOL_P(RARRAY_PTR(val)[1])) {
  • rb_raise(rb_eArgError, “wrong methods to identify”);
  • }
  • ids.equal = SYM2ID(RARRAY_PTR(val)[0]);
  • ids.hash = SYM2ID(RARRAY_PTR(val)[1]);
  • ntbl = RHASH(hash)->ntbl;
  • htmp = hash_alloc(0);
  • tbl = st_init_table_extra_with_size(&hash_ident_type,
  •      sizeof(ids),
    
  •      ntbl->num_entries);
    
  • RBASIC(htmp)->flags |= HASH_IDENT_BY_METHOD;
  • RHASH(htmp)->ntbl = tbl;
  • *(struct hash_ident *)st_extra_data(tbl) = ids;
  • rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
  • ntbl = RHASH(hash)->ntbl;
  • RHASH(htmp)->ntbl = 0;
  • RHASH(hash)->ntbl = tbl;
  • RBASIC(hash)->flags |= HASH_IDENT_BY_METHOD;
  • st_free_table(ntbl);
  • rb_gc_force_recycle(htmp);
    }

@@ -2610,6 +2687,6 @@ Init_Hash(void)
rb_define_method(rb_cHash,“value?”, rb_hash_has_value, 1);

  • rb_define_method(rb_cHash,“compare_by_identity”,
    rb_hash_compare_by_id, 0);
  • rb_define_method(rb_cHash,“compare_by_identity?”,
    rb_hash_compare_by_id_p, 0);
  • rb_define_singleton_method(rb_cHash, “identified_by”,
    rb_hash_s_identified_by, 2);
  • rb_define_method(rb_cHash, “methods_to_identify”,
    rb_hash_get_ident_methods, 0);

#ifndef MACOS /* environment variables nothing on MacOS. */
Index: marshal.c

— marshal.c (revision 15489)
+++ marshal.c (working copy)
@@ -465,34 +465,91 @@ w_obj_each(ID id, VALUE value, struct du
}

-static void
-w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
+ID rb_hash_ident_methods_id(void);
+VALUE rb_hash_get_ident_methods(VALUE);
+void rb_hash_set_ident_method(VALUE, VALUE);
+
+struct extra_ivar {

  • VALUE obj;
  • int count;
  • VALUE encoding;
  • union {
  • struct {
  •  VALUE ident_methods;
    
  • } hash;
  • } as;
    +};

+static int
+has_extra_ivars(VALUE obj, struct extra_ivar *exivars, struct dump_arg
*arg)
{

  • int encidx = rb_enc_get_index(obj);
  • rb_encoding *enc = 0;
  • st_data_t name;
  • int count = 0, encidx;
  • rb_encoding *enc;
  • if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
  • w_long(num, arg->arg);
  • return;
  • if (!ENCODING_IS_ASCII8BIT(obj) &&
  • (encidx = rb_enc_get_index(obj)) > 0 &&
  • (enc = rb_enc_from_index(encidx)) != 0) {
  • st_data_t name;
  • do {
  •  if (!arg->encodings)
    
  • arg->encodings = st_init_strcasetable();
  •  else if (st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), 
    

&name))

  • break;
  •  name = (st_data_t)rb_str_new2(rb_enc_name(enc));
    
  •  st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
    
  • } while (0);
  • ++count;
  • exivars->encoding = (VALUE)name;
  • }
  • else {
  • exivars->encoding = 0;
  • }
  • switch (BUILTIN_TYPE(obj)) {
  •  case T_HASH: {
    
  • VALUE ids = rb_hash_get_ident_methods(obj);
  • if (RTEST(ids)) {
  •  exivars->as.hash.ident_methods = ids;
    
  •  ++count;
    
  • }
  • else {
  •  exivars->as.hash.ident_methods = 0;
    
  • }
  • break;
  •  }
    
  • }
  • exivars->obj = obj;
  • return exivars->count = count;
    +}

+static void
+w_extra_ivars(struct extra_ivar *exivars, struct dump_call_arg *arg)
+{

  • if (exivars->encoding) {
  • w_symbol(rb_id_encoding(), arg->arg);
  • w_object(exivars->encoding, arg->arg, arg->limit);
  • }
  • switch (BUILTIN_TYPE(exivars->obj)) {
  •  case T_HASH:
    
  • if (exivars->as.hash.ident_methods) {
  •  w_symbol(rb_hash_ident_methods_id(), arg->arg);
    
  •  w_object(exivars->as.hash.ident_methods, arg->arg, arg->limit);
    
  • }
  • break;
    }
  • w_long(num + 1, arg->arg);
  • w_symbol(rb_id_encoding(), arg->arg);
  • do {
  • if (!arg->arg->encodings)
  •  arg->arg->encodings = st_init_strcasetable();
    
  • else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc),
    &name))
  •  break;
    
  • name = (st_data_t)rb_str_new2(rb_enc_name(enc));
  • st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
  • } while (0);
  • w_object(name, arg->arg, arg->limit);
    }

static void
-w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
+w_ivar(struct extra_ivar *exivars, st_table *tbl, struct dump_call_arg
*arg)
{
long num = tbl ? tbl->num_entries : 0;

  • w_encoding(obj, num, arg);
  • num += exivars->count;
  • w_long(num, arg->arg);
  • if (exivars->count) {
  • w_extra_ivars(exivars, arg);
  • }
    if (tbl) {
    st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
    @@ -501,17 +558,20 @@ w_ivar(VALUE obj, st_table *tbl, struct

static void
-w_objivar(VALUE obj, struct dump_call_arg *arg)
+w_objivar(struct extra_ivar *exivars, struct dump_call_arg *arg)
{

  • VALUE *ptr;
  • VALUE *ptr, obj = exivars->obj;
    long i, len, num;

    len = ROBJECT_LEN(obj);
    ptr = ROBJECT_PTR(obj);

  • num = 0;
  • num = exivars->count;
    for (i = 0; i < len; i++)
    if (ptr[i] != Qundef)
    num += 1;
  • w_encoding(obj, num, arg);
  • w_long(num, arg->arg);
  • if (exivars->count) {
  • w_extra_ivars(exivars, arg);
  • }
    if (num != 0) {
    rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
    @@ -526,6 +586,8 @@ w_object(VALUE obj, struct dump_arg *arg
    st_data_t num;
    int hasiv = 0;
    -#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) !=
    0 || \
  •         (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
    
  • struct extra_ivar exivar;
    +#define has_ivars(obj, ivtbl, exivar) \

  • (((ivtbl = rb_generic_ivar_table(obj)) != 0) | \

  • (!SPECIAL_CONST_P(obj) && has_extra_ivars(obj, &exivar, arg)))
    

    if (limit == 0) {
    @@ -543,5 +605,5 @@ w_object(VALUE obj, struct dump_arg *arg
    }

  • if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
  • if ((hasiv = has_ivars(obj, ivtbl, exivar)) != 0) {
    w_byte(TYPE_IVAR, arg);
    }
    @@ -586,4 +648,5 @@ w_object(VALUE obj, struct dump_arg *arg
    VALUE real_obj = obj;
    obj = compat->dumper(real_obj);
  •            exivar.obj = obj;
               st_insert(arg->compat_tbl, (st_data_t)obj, 
    

(st_data_t)real_obj);
}
@@ -596,5 +659,5 @@ w_object(VALUE obj, struct dump_arg *arg
w_class(TYPE_USRMARSHAL, obj, arg, Qfalse);
w_object(v, arg, limit);

  •  if (hasiv) w_ivar(obj, 0, &c_arg);
    
  •  if (hasiv) w_ivar(&exivar, 0, &c_arg);
     return;
    
    }
    @@ -602,4 +665,5 @@ w_object(VALUE obj, struct dump_arg *arg
    VALUE v;
    st_table *ivtbl2 = 0;
  •        struct extra_ivar exivar2;
           int hasiv2;
    

@@ -608,5 +672,5 @@ w_object(VALUE obj, struct dump_arg *arg
rb_raise(rb_eTypeError, “_dump() must return string”);
}

  •  if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
    
  •  if ((hasiv2 = has_ivars(v, ivtbl2, exivar2)) != 0 && !hasiv) {
    
    w_byte(TYPE_IVAR, arg);
    }
    @@ -614,8 +678,8 @@ w_object(VALUE obj, struct dump_arg *arg
    w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
    if (hasiv2) {
  • w_ivar(v, ivtbl2, &c_arg);
  • w_ivar(&exivar2, ivtbl2, &c_arg);
    }
    else if (hasiv) {
  • w_ivar(obj, ivtbl, &c_arg);
  • w_ivar(&exivar, ivtbl, &c_arg);
    }
    return;
    @@ -739,5 +803,5 @@ w_object(VALUE obj, struct dump_arg *arg
    case T_OBJECT:
    w_class(TYPE_OBJECT, obj, arg, Qtrue);
  •  w_objivar(obj, &c_arg);
    
  •  w_objivar(&exivar, &c_arg);
     break;
    

@@ -764,5 +828,5 @@ w_object(VALUE obj, struct dump_arg *arg
}
if (hasiv) {

  • w_ivar(obj, ivtbl, &c_arg);
  • w_ivar(&exivar, ivtbl, &c_arg);
    }
    }
    @@ -1093,4 +1159,8 @@ r_ivar(VALUE obj, struct load_arg *arg)
    if (idx > 0) rb_enc_associate_index(obj, idx);
    }
  •  else if (TYPE(obj) == T_HASH &&
    
  •     id == rb_hash_ident_methods_id()) {
    
  • rb_hash_set_ident_method(obj, val);
  •  }
     else {
    
    rb_ivar_set(obj, id, val);
    Index: st.c
    ===================================================================
    — st.c (revision 15489)
    +++ st.c (working copy)
    @@ -69,7 +69,8 @@ static void rehash(st_table );
    #define Calloc(n,s) (char
    )calloc((n),(s))

-#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y))
== 0)
+#define EQUAL(table,x,y) ((x)==(y) || \

  •    (*table->type->compare)((x),(y),(table)) == 0)
    

-#define do_hash(key,table) (unsigned int)((table)->type->hash)((key))
+#define do_hash(key,table) (unsigned
int)(
(table)->type->hash)((key),(table))
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)

@@ -154,19 +155,17 @@ stat_col()
#define MAX_PACKED_NUMHASH 5

-st_table*
-st_init_table_with_size(const struct st_hash_type *type, int size)
-{

  • st_table *tbl;

#ifdef HASH_LOG

  • if (init_st == 0) {
  • init_st = 1;
  • atexit(stat_col);
  • }
    +#define INIT_HASH_LOG() \
  • (void)((init_st == 0) ? \
  • (init_st = 1, atexit(stat_col)) : 0)
    

+#else
+#define INIT_HASH_LOG() ((void)0)
#endif

+static st_table *
+st_table_init(st_table *tbl, const struct st_hash_type type, int size)
+{
size = new_size(size); /
round up to prime number */

  • tbl = alloc(st_table);
    tbl->type = type;
    tbl->num_entries = 0;
    @@ -180,4 +179,23 @@ st_init_table_with_size(const struct st_

st_table*
+st_init_table_with_size(const struct st_hash_type *type, int size)
+{

  • return st_table_init(alloc(st_table), type, size);
    +}

+st_table *
+st_init_table_extra(const struct st_hash_type *type, int extra)
+{

  • return st_table_init(malloc(sizeof(st_table) + extra), type, 0);
    +}

+st_table *
+st_init_table_extra_with_size(const struct st_hash_type *type, int
extra, int size)
+{

  • return st_table_init(malloc(sizeof(st_table) + extra),
  •   type, size);
    

+}
+
+st_table*
st_init_table(const struct st_hash_type *type)
{
Index: include/ruby/st.h

— include/ruby/st.h (revision 15489)
+++ include/ruby/st.h (working copy)
@@ -74,4 +74,6 @@ st_table *st_init_strtable_with_size(int
st_table *st_init_strcasetable(void);
st_table *st_init_strcasetable_with_size(int);
+st_table *st_init_table_extra(const struct st_hash_type *, int);
+st_table *st_init_table_extra_with_size(const struct st_hash_type *,
int, int);
int st_delete(st_table *, st_data_t *, st_data_t *);
int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);
@@ -91,4 +93,6 @@ int st_strcasecmp(const char *s1, const
int st_strncasecmp(const char *s1, const char *s2, size_t n);

+static inline void *st_extra_data(st_table *tbl) {return tbl + 1;}
+
#if defined(__cplusplus)
#if 0
Index: test/ruby/test_hash.rb

— test/ruby/test_hash.rb (revision 15511)
+++ test/ruby/test_hash.rb (working copy)
@@ -818,12 +818,11 @@ class TestHash < Test::Unit::TestCase
end

  • def test_compare_by_identity
  • a = “foo”
  • assert(!{}.compare_by_identity?)
  • h = { a => “bar” }
  • assert(!h.compare_by_identity?)
  • h.compare_by_identity
  • assert(h.compare_by_identity?)
  • #assert_equal(“bar”, h[a])
  • def test_identified_by
  • a = “foo”.freeze
  • assert(!{}.methods_to_identify)
  • h = Hash.identified_by(:equal?, :object_id)
  • assert_equal([:equal?, :object_id], h.methods_to_identify)
  • h[a] = “bar”
  • assert_equal(“bar”, h[a])
    assert_nil(h[“foo”])
    end