# Hash#identifed_by

e\$B\$J\$+\$@\$G\$9!#e(B

compared_bye\$B\$H\$\$\$&L>A0\$K\$7\$h\$&\$+\$H;W\$\$\$^\$7\$?\$,!"Hf3S\$@\$1\$G\$O\$J\$\$e(B
e\$B\$N\$Ge(Bidentifed_bye\$B\$H\$7\$F\$_\$^\$7\$?!#e(B

marshal.ce\$B\$Ne(Bw_encoding()e\$B\$b@0M}\$7\$F\$"\$j\$^\$9!#e(B

# Index: hash.c

— hash.c (revision 15489)
+++ hash.c (working copy)
@@ -25,4 +25,5 @@ static VALUE rb_hash_s_try_convert(VALUE
#define HASH_DELETED FL_USER1
#define HASH_PROC_DEFAULT FL_USER2
+#define HASH_IDENT_BY_METHOD FL_USER3

VALUE
@@ -410,5 +411,6 @@ static VALUE
rb_hash_rehash(VALUE hash)
{

• st_table *tbl;
• st_table *tbl, *ntbl;

• VALUE htmp;

if (RHASH(hash)->iter_lev > 0) {
@@ -416,10 +418,15 @@ rb_hash_rehash(VALUE hash)
}
rb_hash_modify_check(hash);

• if (!RHASH(hash)->ntbl)
• if (!(ntbl = RHASH(hash)->ntbl))
return hash;
• tbl = st_init_table_with_size(RHASH(hash)->ntbl->type,
RHASH(hash)->ntbl->num_entries);
• htmp = hash_alloc(0);
• tbl = st_init_table_with_size(ntbl->type, ntbl->num_entries);
• RHASH(htmp)->ntbl = tbl;
rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
• st_free_table(RHASH(hash)->ntbl);
• ntbl = RHASH(hash)->ntbl;

• RHASH(htmp)->ntbl = 0;
RHASH(hash)->ntbl = tbl;

• st_free_table(ntbl);

• rb_gc_force_recycle(htmp);

return hash;
@@ -1683,21 +1690,46 @@ rb_hash_flatten(int argc, VALUE *argv, V
}

-static const struct st_hash_type identhash = {

• st_numcmp,
• st_numhash,
+struct hash_ident {
• ID equal, hash;
+};

+static int
+compare_by_method(st_data_t x, st_data_t y, st_table *t)
+{

• struct hash_ident *ids = st_extra_data(t);
• VALUE other = (VALUE)y;
• VALUE val = rb_funcall2((VALUE)x, ids->equal, 1, &other);
• return RTEST(val);
+}

+static int
+hash_by_method(st_data_t x, st_table *t)
+{

• struct hash_ident *ids = st_extra_data(t);
• VALUE val = rb_funcall2((VALUE)x, ids->hash, 0, 0);
• return NUM2INT(val);
+}

+static const struct st_hash_type hash_ident_type = {

• compare_by_method,
• hash_by_method,
};

/*

• call-seq:
• ``````hsh.compare_by_identity => hsh
``````
• ``````Hash.identified_by(equ, hash) => hsh
``````
• Makes hsh to compare its keys by their identity, i.e. it
• will consider exact same objects as same keys.
• Makes hsh which identifies its keys with +equ+ and +hash+
• methods.
• ``````h1 = { "a" => 100, "b" => 200, :c => "c" }
``````
• ``````h1["a"]        #=> 100
``````
• ``````h1.compare_by_identity
``````
• ``````h1.compare_by_identity? #=> true
``````
• ``````h1 = Hash.identified_by(:equal?, :object_id)
``````
• ``````h1["a"] = 100
``````
• ``````key_a = h1.key(100)
``````
• ``````h1["b"] = 200
``````
• ``````h1[:c] = "c"
``````
• ``````h1["a"]        #=> nil  # different objects.
``````
• ``````h1[key_a]      #=> 100  # same object as a key.
``````
• ``````h1[:c]         #=> "c"  # same symbols are all same.
``````

@@ -1705,30 +1737,75 @@ static const struct st_hash_type identha

static VALUE
-rb_hash_compare_by_id(VALUE hash)
+rb_hash_s_identified_by(VALUE klass, VALUE equal_mid, VALUE hash_mid)
{

• rb_hash_modify(hash);
• RHASH(hash)->ntbl->type = &identhash;
• rb_hash_rehash(hash);
• return hash;
• struct hash_ident ids;
• VALUE h;
• st_table *tbl;
• ids.equal = rb_to_id(equal_mid);
• ids.hash = rb_to_id(hash_mid);
• h = hash_alloc(klass);
• RBASIC(h)->flags |= HASH_IDENT_BY_METHOD;
• tbl = st_init_table_extra(&hash_ident_type, sizeof(ids));
• RHASH(h)->ntbl = tbl;
• *(struct hash_ident *)st_extra_data(tbl) = ids;
• return h;
}

/*

• call-seq:
• ``````hsh.compare_by_identity? => true or false
``````
• Returns `true` if hsh will compare its keys by
• their identity. Also see `Hash#compare_by_identity`.
• ``````hsh.methods_to_identify => a pair of symbols or nil
``````
• Returns `Symbol`s to be used to identify the items if
• hsh is created with `Hash#identified_by`.
• Otherwise, returns nil.
*/

-static VALUE
-rb_hash_compare_by_id_p(VALUE hash)
+VALUE
+rb_hash_get_ident_methods(VALUE hash)
{

• if (!RHASH(hash)->ntbl)
• ``````   return Qfalse;
``````
• if (RHASH(hash)->ntbl->type == &identhash) {
• return Qtrue;
• }
• return Qfalse;
• struct hash_ident *ids;
• if (!FL_TEST(hash, HASH_IDENT_BY_METHOD)) return Qnil;
• ids = st_extra_data(RHASH(hash)->ntbl);
• return rb_assoc_new(ID2SYM(ids->equal), ID2SYM(ids->hash));
+}

+ID
+rb_hash_ident_methods_id(void)
+{

• return rb_intern(“methods_to_identify”);
+}

+void
+rb_hash_set_ident_method(VALUE hash, VALUE val)
+{

• struct hash_ident ids;
• st_table *tbl, *ntbl;
• VALUE htmp;
• if (TYPE(val) != T_ARRAY || RARRAY_LEN(val) != 2 ||
• !SYMBOL_P(RARRAY_PTR(val)[0]) ||
• !SYMBOL_P(RARRAY_PTR(val)[1])) {
• rb_raise(rb_eArgError, “wrong methods to identify”);
• }
• ids.equal = SYM2ID(RARRAY_PTR(val)[0]);
• ids.hash = SYM2ID(RARRAY_PTR(val)[1]);
• ntbl = RHASH(hash)->ntbl;
• htmp = hash_alloc(0);
• tbl = st_init_table_extra_with_size(&hash_ident_type,
• ``````     sizeof(ids),
``````
• ``````     ntbl->num_entries);
``````
• RBASIC(htmp)->flags |= HASH_IDENT_BY_METHOD;
• RHASH(htmp)->ntbl = tbl;
• *(struct hash_ident *)st_extra_data(tbl) = ids;
• rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
• ntbl = RHASH(hash)->ntbl;
• RHASH(htmp)->ntbl = 0;
• RHASH(hash)->ntbl = tbl;
• RBASIC(hash)->flags |= HASH_IDENT_BY_METHOD;
• st_free_table(ntbl);
• rb_gc_force_recycle(htmp);
}

@@ -2610,6 +2687,6 @@ Init_Hash(void)
rb_define_method(rb_cHash,“value?”, rb_hash_has_value, 1);

• rb_define_method(rb_cHash,“compare_by_identity”,
rb_hash_compare_by_id, 0);
• rb_define_method(rb_cHash,“compare_by_identity?”,
rb_hash_compare_by_id_p, 0);
• rb_define_singleton_method(rb_cHash, “identified_by”,
rb_hash_s_identified_by, 2);
• rb_define_method(rb_cHash, “methods_to_identify”,
rb_hash_get_ident_methods, 0);

# #ifndefMACOS /* environment variables nothing on MacOS. */ Index: marshal.c

— marshal.c (revision 15489)
+++ marshal.c (working copy)
@@ -465,34 +465,91 @@ w_obj_each(ID id, VALUE value, struct du
}

-static void
-w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
+ID rb_hash_ident_methods_id(void);
+VALUE rb_hash_get_ident_methods(VALUE);
+void rb_hash_set_ident_method(VALUE, VALUE);
+
+struct extra_ivar {

• VALUE obj;
• int count;
• VALUE encoding;
• union {
• struct {
• `````` VALUE ident_methods;
``````
• } hash;
• } as;
+};

+static int
+has_extra_ivars(VALUE obj, struct extra_ivar *exivars, struct dump_arg
*arg)
{

• int encidx = rb_enc_get_index(obj);
• rb_encoding *enc = 0;
• st_data_t name;
• int count = 0, encidx;
• rb_encoding *enc;
• if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
• w_long(num, arg->arg);
• return;
• if (!ENCODING_IS_ASCII8BIT(obj) &&
• (encidx = rb_enc_get_index(obj)) > 0 &&
• (enc = rb_enc_from_index(encidx)) != 0) {
• st_data_t name;
• do {
• `````` if (!arg->encodings)
``````
• arg->encodings = st_init_strcasetable();
• `````` else if (st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc),
``````

&name))

• break;
• `````` name = (st_data_t)rb_str_new2(rb_enc_name(enc));
``````
• `````` st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
``````
• } while (0);
• ++count;
• exivars->encoding = (VALUE)name;
• }
• else {
• exivars->encoding = 0;
• }
• switch (BUILTIN_TYPE(obj)) {
• `````` case T_HASH: {
``````
• VALUE ids = rb_hash_get_ident_methods(obj);
• if (RTEST(ids)) {
• `````` exivars->as.hash.ident_methods = ids;
``````
• `````` ++count;
``````
• }
• else {
• `````` exivars->as.hash.ident_methods = 0;
``````
• }
• break;
• `````` }
``````
• }
• exivars->obj = obj;
• return exivars->count = count;
+}

+static void
+w_extra_ivars(struct extra_ivar *exivars, struct dump_call_arg *arg)
+{

• if (exivars->encoding) {
• w_symbol(rb_id_encoding(), arg->arg);
• w_object(exivars->encoding, arg->arg, arg->limit);
• }
• switch (BUILTIN_TYPE(exivars->obj)) {
• `````` case T_HASH:
``````
• if (exivars->as.hash.ident_methods) {
• `````` w_symbol(rb_hash_ident_methods_id(), arg->arg);
``````
• `````` w_object(exivars->as.hash.ident_methods, arg->arg, arg->limit);
``````
• }
• break;
}
• w_long(num + 1, arg->arg);
• w_symbol(rb_id_encoding(), arg->arg);
• do {
• if (!arg->arg->encodings)
• `````` arg->arg->encodings = st_init_strcasetable();
``````
• else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc),
&name))
• `````` break;
``````
• name = (st_data_t)rb_str_new2(rb_enc_name(enc));
• st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
• } while (0);
• w_object(name, arg->arg, arg->limit);
}

static void
-w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
+w_ivar(struct extra_ivar *exivars, st_table *tbl, struct dump_call_arg
*arg)
{
long num = tbl ? tbl->num_entries : 0;

• w_encoding(obj, num, arg);
• num += exivars->count;
• w_long(num, arg->arg);
• if (exivars->count) {
• w_extra_ivars(exivars, arg);
• }
if (tbl) {
st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
@@ -501,17 +558,20 @@ w_ivar(VALUE obj, st_table *tbl, struct

static void
-w_objivar(VALUE obj, struct dump_call_arg *arg)
+w_objivar(struct extra_ivar *exivars, struct dump_call_arg *arg)
{

• VALUE *ptr;
• VALUE *ptr, obj = exivars->obj;
long i, len, num;

len = ROBJECT_LEN(obj);
ptr = ROBJECT_PTR(obj);

• num = 0;
• num = exivars->count;
for (i = 0; i < len; i++)
if (ptr[i] != Qundef)
num += 1;
• w_encoding(obj, num, arg);
• w_long(num, arg->arg);
• if (exivars->count) {
• w_extra_ivars(exivars, arg);
• }
if (num != 0) {
rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
@@ -526,6 +586,8 @@ w_object(VALUE obj, struct dump_arg *arg
st_data_t num;
int hasiv = 0;
-#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) !=
0 || \
• ``````        (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
``````
• struct extra_ivar exivar;
+#define has_ivars(obj, ivtbl, exivar) \

• (((ivtbl = rb_generic_ivar_table(obj)) != 0) | \

• ``````(!SPECIAL_CONST_P(obj) && has_extra_ivars(obj, &exivar, arg)))
``````

if (limit == 0) {
@@ -543,5 +605,5 @@ w_object(VALUE obj, struct dump_arg *arg
}

• if ((hasiv = has_ivars(obj, ivtbl)) != 0) {
• if ((hasiv = has_ivars(obj, ivtbl, exivar)) != 0) {
w_byte(TYPE_IVAR, arg);
}
@@ -586,4 +648,5 @@ w_object(VALUE obj, struct dump_arg *arg
VALUE real_obj = obj;
obj = compat->dumper(real_obj);
• ``````           exivar.obj = obj;
st_insert(arg->compat_tbl, (st_data_t)obj,
``````

(st_data_t)real_obj);
}
@@ -596,5 +659,5 @@ w_object(VALUE obj, struct dump_arg *arg
w_class(TYPE_USRMARSHAL, obj, arg, Qfalse);
w_object(v, arg, limit);

• `````` if (hasiv) w_ivar(obj, 0, &c_arg);
``````
• `````` if (hasiv) w_ivar(&exivar, 0, &c_arg);
return;
``````
}
@@ -602,4 +665,5 @@ w_object(VALUE obj, struct dump_arg *arg
VALUE v;
st_table *ivtbl2 = 0;
• ``````       struct extra_ivar exivar2;
int hasiv2;
``````

@@ -608,5 +672,5 @@ w_object(VALUE obj, struct dump_arg *arg
rb_raise(rb_eTypeError, “_dump() must return string”);
}

• `````` if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
``````
• `````` if ((hasiv2 = has_ivars(v, ivtbl2, exivar2)) != 0 && !hasiv) {
``````
w_byte(TYPE_IVAR, arg);
}
@@ -614,8 +678,8 @@ w_object(VALUE obj, struct dump_arg *arg
w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
if (hasiv2) {
• w_ivar(v, ivtbl2, &c_arg);
• w_ivar(&exivar2, ivtbl2, &c_arg);
}
else if (hasiv) {
• w_ivar(obj, ivtbl, &c_arg);
• w_ivar(&exivar, ivtbl, &c_arg);
}
return;
@@ -739,5 +803,5 @@ w_object(VALUE obj, struct dump_arg *arg
case T_OBJECT:
w_class(TYPE_OBJECT, obj, arg, Qtrue);
• `````` w_objivar(obj, &c_arg);
``````
• `````` w_objivar(&exivar, &c_arg);
break;
``````

@@ -764,5 +828,5 @@ w_object(VALUE obj, struct dump_arg *arg
}
if (hasiv) {

• w_ivar(obj, ivtbl, &c_arg);
• w_ivar(&exivar, ivtbl, &c_arg);
}
}
@@ -1093,4 +1159,8 @@ r_ivar(VALUE obj, struct load_arg *arg)
if (idx > 0) rb_enc_associate_index(obj, idx);
}
• `````` else if (TYPE(obj) == T_HASH &&
``````
• ``````    id == rb_hash_ident_methods_id()) {
``````
• rb_hash_set_ident_method(obj, val);
• `````` }
else {
``````
rb_ivar_set(obj, id, val);
Index: st.c
===================================================================
— st.c (revision 15489)
+++ st.c (working copy)
@@ -69,7 +69,8 @@ static void rehash(st_table );
#define Calloc(n,s) (char
)calloc((n),(s))

-#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y))
== 0)
+#define EQUAL(table,x,y) ((x)==(y) || \

• ``````   (*table->type->compare)((x),(y),(table)) == 0)
``````

-#define do_hash(key,table) (unsigned int)((table)->type->hash)((key))
+#define do_hash(key,table) (unsigned
int)(
(table)->type->hash)((key),(table))
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)

@@ -154,19 +155,17 @@ stat_col()
#define MAX_PACKED_NUMHASH 5

-st_table*
-st_init_table_with_size(const struct st_hash_type *type, int size)
-{

• st_table *tbl;

#ifdef HASH_LOG

• if (init_st == 0) {
• init_st = 1;
• atexit(stat_col);
• }
+#define INIT_HASH_LOG() \
• (void)((init_st == 0) ? \
• ``````(init_st = 1, atexit(stat_col)) : 0)
``````

+#else
+#define INIT_HASH_LOG() ((void)0)
#endif

+static st_table *
+st_table_init(st_table *tbl, const struct st_hash_type type, int size)
+{
size = new_size(size); /
round up to prime number */

• tbl = alloc(st_table);
tbl->type = type;
tbl->num_entries = 0;
@@ -180,4 +179,23 @@ st_init_table_with_size(const struct st_

st_table*
+st_init_table_with_size(const struct st_hash_type *type, int size)
+{

• return st_table_init(alloc(st_table), type, size);
+}

+st_table *
+st_init_table_extra(const struct st_hash_type *type, int extra)
+{

• return st_table_init(malloc(sizeof(st_table) + extra), type, 0);
+}

+st_table *
+st_init_table_extra_with_size(const struct st_hash_type *type, int
extra, int size)
+{

• return st_table_init(malloc(sizeof(st_table) + extra),
• ``````  type, size);
``````

# +} + +st_table* st_init_table(const struct st_hash_type *type) { Index: include/ruby/st.h

— include/ruby/st.h (revision 15489)
+++ include/ruby/st.h (working copy)
@@ -74,4 +74,6 @@ st_table *st_init_strtable_with_size(int
st_table *st_init_strcasetable(void);
st_table *st_init_strcasetable_with_size(int);
+st_table *st_init_table_extra(const struct st_hash_type *, int);
+st_table *st_init_table_extra_with_size(const struct st_hash_type *,
int, int);
int st_delete(st_table *, st_data_t *, st_data_t *);
int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);
@@ -91,4 +93,6 @@ int st_strcasecmp(const char *s1, const
int st_strncasecmp(const char *s1, const char *s2, size_t n);

# +static inline void *st_extra_data(st_table *tbl) {return tbl + 1;} +#if defined(__cplusplus)#if 0 Index: test/ruby/test_hash.rb

— test/ruby/test_hash.rb (revision 15511)
+++ test/ruby/test_hash.rb (working copy)
@@ -818,12 +818,11 @@ class TestHash < Test::Unit::TestCase
end

• def test_compare_by_identity
• a = “foo”
• assert(!{}.compare_by_identity?)
• h = { a => “bar” }
• assert(!h.compare_by_identity?)
• h.compare_by_identity
• assert(h.compare_by_identity?)
• #assert_equal(“bar”, h[a])
• def test_identified_by
• a = “foo”.freeze
• assert(!{}.methods_to_identify)
• h = Hash.identified_by(:equal?, :object_id)
• assert_equal([:equal?, :object_id], h.methods_to_identify)
• h[a] = “bar”
• assert_equal(“bar”, h[a])
assert_nil(h[“foo”])
end