# String#hash

e\$B\$1\$\$\$8\$e!w\$\$\$7\$D\$+\$G\$9e(B.

Ruby1.9e\$B7O\$Ne(B String#hash e\$B\$H\$\$\$&\$+e(B e\$B\$9\$Y\$F\$Ne(B
hashe\$B4X?t\$K\$D\$\$\$F\$G\$9\$,e(B,
e\$B%W%m%;%9Kh\$K0c\$&CM\$K\$J\$k\$N\$G\$9\$,e(B,
e\$B2?\$+?<\$\$M}M3\$,\$"\$k\$N\$G\$7\$g\$&\$+e(B?

e\$BB>\$N%O%C%7%e4X?te(B(SHA1e\$BEye(B)e\$B\$HF1\$8\$/e(B,
e\$B%W%m%;%9\$K0MB8\$;\$:e(B, e\$B8GDjCM\$NJ}\$,NI\$\$e(B
e\$B5\$\$,\$9\$k\$N\$G\$9\$,e(B?

MurmurHashe\$B\$@\$H3N\$+\$Ke(Bseede\$B\$rM?\$(\$k\$h\$&\$K\$J\$C\$F\$\$\$^\$9\$,e(B,
e\$B:#\$Ne(BRuby1.9e\$B\$NMMe(B
e\$B\$J;H\$\$J}\$N\$?\$a\$G\$O\$J\$\$5\$\$,\$7\$^\$9e(B.

e\$BNc\$(\$Pe(B, Complexe\$B\$@\$He(B

@real.hash ^ @imag.hash

e\$B\$H\$J\$C\$F\$\$\$^\$9e(B. e\$B<B:]\$K\$Oe(B, e\$B\$3\$l\$O\$"\$^\$jNI\$/\$J\$/\$Fe(B,
@real==@image e\$B\$N\$H\$-e(B
e\$B\$D\$M\$KF1\$8CM\$K\$J\$C\$F\$7\$^\$\$\$^\$9e(B. e\$B\$=\$3\$Ge(B,
e\$BJL\$Ne(Bseede\$B\$rM?\$(e(B

@real.hash(seed1) ^ @image.hash(seed2)

e\$B\$NMM\$K\$9\$k\$He(B, e\$B>e5-\$NLdBj\$O\$J\$/\$J\$j\$^\$9e(B. e\$B\$3\$l\$Oe(B,
Arraye\$BEy\$Ne(Bhashe\$B4X?t\$G\$be(B
e\$BF1MM\$G\$9e(B. e\$B\$^\$?e(B,
Fixnum#hashe\$B\$He(BObject#hashe\$B\$bJL\$Ne(Bseede\$B\$rM?\$(\$k\$N\$b\$h\$5\$=\$&e(B
e\$B\$J5\$\$,\$7\$^\$9e(B.

__
---------------------------------------------------->> e\$B@PDMe(B
e\$B7=<ye(B <<—
---------------------------------->> e-mail: [email protected] <<—

In article [email protected],
[email protected] (Keiju ISHITSUKA) writes:

Ruby1.9e\$B7O\$Ne(B String#hash e\$B\$H\$\$\$&\$+e(B e\$B\$9\$Y\$F\$Ne(B hashe\$B4X?t\$K\$D\$\$\$F\$G\$9\$,e(B,
e\$B%W%m%;%9Kh\$K0c\$&CM\$K\$J\$k\$N\$G\$9\$,e(B, e\$B2?\$+?<\$\$M}M3\$,\$"\$k\$N\$G\$7\$g\$&\$+e(B?

algorithmic complexity attack e\$BBP:v\$G\$9!#e(B

http://www.cs.rice.edu/~scrosby/hash/CrosbyWallach_UsenixSec2003/

e\$B\$^\$D\$b\$He(B e\$B\$f\$-\$R\$m\$G\$9e(B

In message “Re: [ruby-dev:37777] String#hash”
on Fri, 16 Jan 2009 20:00:08 +0900, [email protected] (Keiju
ISHITSUKA) writes:

|e\$BNc\$(\$Pe(B, Complexe\$B\$@\$He(B
|
| @real.hash ^ @imag.hash
|
|e\$B\$H\$J\$C\$F\$\$\$^\$9e(B. e\$B<B:]\$K\$Oe(B, e\$B\$3\$l\$O\$"\$^\$jNI\$/\$J\$/\$Fe(B, @real==@image e\$B\$N\$H\$-e(B
|e\$B\$D\$M\$KF1\$8CM\$K\$J\$C\$F\$7\$^\$\$\$^\$9e(B. e\$B\$=\$3\$Ge(B, e\$BJL\$Ne(Bseede\$B\$rM?\$(e(B
|
| @real.hash(seed1) ^ @image.hash(seed2)
|
|e\$B\$NMM\$K\$9\$k\$He(B, e\$B>e5-\$NLdBj\$O\$J\$/\$J\$j\$^\$9e(B. e\$B\$3\$l\$Oe(B, Arraye\$BEy\$Ne(Bhashe\$B4X?t\$G\$be(B
|e\$BF1MM\$G\$9e(B. e\$B\$^\$?e(B, Fixnum#hashe\$B\$He(BObject#hashe\$B\$bJL\$Ne(Bseede\$B\$rM?\$(\$k\$N\$b\$h\$5\$=\$&e(B
|e\$B\$J5\$\$,\$7\$^\$9e(B.

e\$BKh2sJQ\$o\$kM}M3\$Oe(B[ruby-dev:37778]e\$B\$NDL\$j\$J\$s\$G\$9\$,!"3F%*%V%8%'e(B
e\$B%/%H\$Ne(Bhashe\$B\$N7W;;J}<0\$r99?7\$7\$?J}\$,NI\$\$\$H\$\$\$&Ds0F\$O\$=\$l\$H\$OFHe(B
e\$BN)\$KM-8z\$@\$H;W\$\$\$^\$9!#\$I\$s\$J7W;;<0\$,NI\$\$\$N\$+5DO@\$7\$F\$\$\$?\$@\$1e(B
e\$B\$^\$;\$s\$+!)e(B

e\$B\$J\$+\$@\$G\$9!#e(B

At Mon, 19 Jan 2009 05:28:18 +0900,
Nobuyoshi N. wrote in [ruby-dev:37784]:

e\$B\$7\$+\$7!“e(BMurmurHashe\$B\$C\$FESCf\$G\$?\$^\$?\$^e(B0e\$B\$K\$J\$k\$H\$=\$l0J9_\$N%G!<%?\$Oe(B
e\$BL54X78\$K\$J\$k\$h\$&\$J5\$\$,\$9\$k\$s\$G\$9\$,!”\$\$\$\$\$s\$G\$7\$g\$&\$+!#e(B

e\$B\$9\$\$\$^\$;\$s!"%%1\$F\$^\$7\$?!#\$3\$l\$O40A4\$K4*0c\$\$\$G\$9!#e(B

e\$B\$@\$+\$i\$H\$\$\$&\$o\$1\$G\$O\$"\$j\$^\$;\$s\$,!"e(BMurmurHash2.0e\$B\$H\$\$\$&\$b\$N\$,=P\$Fe(B
e\$B\$\$\$?\$N\$G\$=\$A\$i\$K:9\$7BX\$(\$F\$_\$^\$7\$?!#G\6a\$/B.\$/\$J\$k\$h\$&\$G\$9!#e(B

e\$B\$J\$+\$@\$G\$9!#e(B

At Sat, 17 Jan 2009 16:33:08 +0900,
Yukihiro M. wrote in [ruby-dev:37779]:

|e\$BF1MM\$G\$9e(B. e\$B\$^\$?e(B, Fixnum#hashe\$B\$He(BObject#hashe\$B\$bJL\$Ne(Bseede\$B\$rM?\$(\$k\$N\$b\$h\$5\$=\$&e(B
|e\$B\$J5\$\$,\$7\$^\$9e(B.

e\$BKh2sJQ\$o\$kM}M3\$Oe(B[ruby-dev:37778]e\$B\$NDL\$j\$J\$s\$G\$9\$,!"3F%*%V%8%'e(B
e\$B%/%H\$Ne(Bhashe\$B\$N7W;;J}<0\$r99?7\$7\$?J}\$,NI\$\$\$H\$\$\$&Ds0F\$O\$=\$l\$H\$OFHe(B
e\$BN)\$KM-8z\$@\$H;W\$\$\$^\$9!#\$I\$s\$J7W;;<0\$,NI\$\$\$N\$+5DO@\$7\$F\$\$\$?\$@\$1e(B
e\$B\$^\$;\$s\$+!)e(B

Complexe\$B\$de(BRationale\$B\$Oe(Brb_memhash()e\$B\$,;H\$(\$k\$H;W\$\$\$^\$9\$,!"2DJQD9\$G\$"e(B
e\$B\$ke(BArraye\$B\$de(BObjecte\$B!"e(BStructe\$B\$J\$I\$G\$OB?>/9)IW\$,I,MW\$G\$7\$g\$&!#e(B

e\$B\$7\$+\$7!“e(BMurmurHashe\$B\$C\$FESCf\$G\$?\$^\$?\$^e(B0e\$B\$K\$J\$k\$H\$=\$l0J9_\$N%G!<%?\$Oe(B
e\$BL54X78\$K\$J\$k\$h\$&\$J5\$\$,\$9\$k\$s\$G\$9\$,!”\$\$\$\$\$s\$G\$7\$g\$&\$+!#e(B

# Index: array.c

— array.c (revision 21650)
+++ array.c (working copy)
@@ -2722,10 +2722,10 @@ recursive_hash(VALUE ary, VALUE dummy, i
return LONG2FIX(0);
}

• h = RARRAY_LEN(ary);
• h = rb_hash_start(RARRAY_LEN(ary));
for (i=0; i<RARRAY_LEN(ary); i++) {
• h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RARRAY_PTR(ary)[i]);
• h ^= NUM2LONG(n);
• h = rb_hash_uint(h, NUM2LONG(n));
}
• h = rb_hash_end(h);
return LONG2FIX(h);
}
Index: complex.c
===================================================================
— complex.c (revision 21650)
+++ complex.c (working copy)
@@ -23,5 +23,5 @@ VALUE rb_cComplex;

static ID id_abs, id_abs2, id_arg, id_cmp, id_conj, id_convert,

• id_denominator, id_divmod, id_equal_p, id_expt, id_floor, id_hash,
• id_denominator, id_divmod, id_equal_p, id_expt, id_floor,
id_idiv, id_inspect, id_negate, id_numerator, id_polar, id_quo,
id_real_p, id_to_f, id_to_i, id_to_r, id_to_s;
@@ -154,6 +154,4 @@ f_sub(VALUE x, VALUE y)
}

## -binop(xor, ‘^’)

fun1(abs)
fun1(abs2)
@@ -162,5 +160,4 @@ fun1(conj)
fun1(denominator)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(negate)
@@ -858,6 +855,15 @@ static VALUE
nucomp_hash(VALUE self)
{

• long v, h[3];
• VALUE n;
• get_dat1(self);
• return f_xor(f_hash(dat->real), f_hash(dat->imag));
• h[0] = rb_hash(rb_obj_class(self));
• n = rb_hash(dat->real);
• h[1] = NUM2LONG(n);
• n = rb_hash(dat->imag);
• h[2] = NUM2LONG(n);
• v = rb_memhash(h, sizeof(h));
• return LONG2FIX(v);
}

@@ -1383,5 +1389,4 @@ Init_Complex(void)
id_expt = rb_intern(“**”);
id_floor = rb_intern(“floor”);

• id_hash = rb_intern(“hash”);
id_idiv = rb_intern(“div”);
id_inspect = rb_intern(“inspect”);
Index: hash.c
===================================================================
— hash.c (revision 21650)
+++ hash.c (working copy)
@@ -70,5 +70,5 @@ rb_any_hash(VALUE a)
case T_FIXNUM:
case T_SYMBOL:
• hnum = (int)a;
• hnum = rb_hash_end(rb_hash_start((unsigned int)a));
break;

@@ -1511,7 +1511,10 @@ static int
hash_i(VALUE key, VALUE val, int *hval)
{

• VALUE n;
if (key == Qundef) return ST_CONTINUE;
• *hval ^= rb_hash(key);
• *hval ^= rb_hash(val);
• n = rb_hash(key);
• *hval = rb_hash_uint(*hval, NUM2LONG(n));
• n = rb_hash(val);
• *hval = rb_hash_uint(*hval, NUM2LONG(n));
return ST_CONTINUE;
}
@@ -1527,6 +1530,7 @@ recursive_hash(VALUE hash, VALUE dummy,
if (!RHASH(hash)->ntbl)
return LONG2FIX(0);
• hval = RHASH(hash)->ntbl->num_entries;
• hval = rb_hash_start(RHASH(hash)->ntbl->num_entries);
rb_hash_foreach(hash, hash_i, (st_data_t)&hval);
• hval = rb_hash_end(hval);
return INT2FIX(hval);
}
Index: gc.c
===================================================================
— gc.c (revision 21650)
+++ gc.c (working copy)
@@ -2904,5 +2904,4 @@ Init_GC(void)
OBJ_FREEZE(nomem_error);
• rb_define_method(rb_mKernel, “hash”, rb_obj_id, 0);
rb_define_method(rb_mKernel, “id”, rb_obj_id, 0);
rb_define_method(rb_mKernel, “object_id”, rb_obj_id, 0);
Index: object.c
===================================================================
— object.c (revision 21650)
+++ object.c (working copy)
@@ -96,4 +96,12 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
}

+VALUE
+rb_obj_hash(VALUE obj)
+{

• VALUE oid = rb_obj_id(obj);
• unsigned h = rb_hash_end(rb_hash_start(NUM2LONG(oid)));
• return LONG2NUM(h);
+}

/*

• call-seq:
@@ -2517,4 +2525,5 @@ Init_Object(void)
rb_define_method(rb_mKernel, “!~”, rb_obj_not_match, 1);
rb_define_method(rb_mKernel, “eql?”, rb_obj_equal, 1);
• rb_define_method(rb_mKernel, “hash”, rb_obj_hash, 0);

rb_define_method(rb_mKernel, “class”, rb_obj_class, 0);
Index: range.c
===================================================================
— range.c (revision 21650)
+++ range.c (working copy)
@@ -214,12 +214,14 @@ static VALUE
range_hash(VALUE range)
{

• long hash = EXCL(range);
• unsigned hash = EXCL(range);
VALUE v;

• hash = rb_hash_start(hash);
v = rb_hash(RANGE_BEG(range));

• hash ^= v << 1;
• hash = rb_hash_uint(hash, NUM2LONG(v));
v = rb_hash(RANGE_END(range));
• hash ^= v << 9;
• hash ^= EXCL(range) << 24;
• hash = rb_hash_uint(hash, NUM2LONG(v));

• hash = rb_hash_uint(hash, EXCL(range) << 24);

• hash = rb_hash_end(hash);

return LONG2FIX(hash);
Index: rational.c
===================================================================
— rational.c (revision 21650)
+++ rational.c (working copy)
@@ -28,5 +28,5 @@ VALUE rb_cRational;

static ID id_abs, id_cmp, id_convert, id_equal_p, id_expt, id_floor,

• id_hash, id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
• id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
id_to_i, id_to_s, id_truncate;

@@ -136,9 +136,6 @@ f_sub(VALUE x, VALUE y)
}

## -binop(xor, ‘^’)

fun1(abs)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(integer_p)
@@ -1162,6 +1159,15 @@ static VALUE
nurat_hash(VALUE self)
{

• long v, h[3];
• VALUE n;
• get_dat1(self);
• return f_xor(f_hash(dat->num), f_hash(dat->den));
• h[0] = rb_hash(rb_obj_class(self));
• n = rb_hash(dat->num);
• h[1] = NUM2LONG(n);
• n = rb_hash(dat->den);
• h[2] = NUM2LONG(n);
• v = rb_memhash(h, sizeof(h));
• return LONG2FIX(v);
}

@@ -1555,5 +1561,4 @@ Init_Rational(void)
id_expt = rb_intern(“**”);
id_floor = rb_intern(“floor”);

• id_hash = rb_intern(“hash”);
id_idiv = rb_intern(“div”);
id_inspect = rb_intern(“inspect”);
Index: string.c
===================================================================
— string.c (revision 21650)
+++ string.c (working copy)
@@ -1883,10 +1883,17 @@ rb_str_concat(VALUE str1, VALUE str2)

/* MurmurHash described in http://murmurhash.googlepages.com/ */
-static unsigned int
-hash(const unsigned char * data, int len, unsigned int h)
+static inline uint32_t
+murmur(unsigned int h, const int r)
{
const unsigned int m = 0x7fd652ad;

• const int r = 16;
• h *= m;
• return h ^ (h >> r);
+}

+#define murmur16(h) murmur(h, 16)

+static unsigned int
+hash(const unsigned char * data, int len, unsigned int h)
+{

@@ -1929,7 +1936,5 @@ hash(const unsigned char * data, int len
t = (t >> sr) | (d << sl);
#endif

• h += t;
• h *= m;
• h ^= h >> r;
• h = murmur16(h + t);
t = d;

@@ -1954,6 +1959,5 @@ hash(const unsigned char * data, int len
h += (t >> sr) | (d << sl);
#endif

• h *= m;
• h ^= h >> r;
• h = murmur16(h);
}

@@ -1965,8 +1969,5 @@ hash(const unsigned char * data, int len
{
do {

• h += *(uint32_t *)data;
• h *= m;
• h ^= h >> r;
• h = murmur16(h + *(uint32_t *)data);
data += 4;
len -= 4;
@@ -1991,18 +1992,57 @@ hash(const unsigned char * data, int len
h += data[0];
#endif
• h *= m;
• h ^= h >> r;
• h = murmur16(h);
}
• h *= m;
• h ^= h >> 10;
• h *= m;
• h ^= h >> 17;
• return rb_hash_end(h);
+}

+unsigned int
+rb_hash_uint32(unsigned int h, unsigned int i)
+{

• return murmur(h + i, 16);
+}

+unsigned int
+rb_hash_uint(unsigned int h, unsigned int i)
+{

• unsigned int v = 0;
• h += i;
+#ifdef WORDS_BIGENDIAN
+#if SIZEOF_INTCHAR_BIT > 128
• v = murmur16(v + (h >> 128));
+#endif
+#if SIZEOF_INT
CHAR_BIT > 8*8
• v = murmur16(v + (h >> 88));
+#endif
+#if SIZEOF_INT
CHAR_BIT > 4*8
• v = murmur16(v + (h >> 4*8));
+#endif
+#endif
• v = murmur16(v + h);
+#ifndef WORDS_BIGENDIAN
+#if SIZEOF_INTCHAR_BIT > 48
• v = murmur16(v + (h >> 48));
+#endif
+#if SIZEOF_INT
CHAR_BIT > 8*8
• v = murmur16(v + (h >> 88));
+#endif
+#if SIZEOF_INT
CHAR_BIT > 12*8
• v = murmur16(v + (h >> 12*8));
+#endif
+#endif
• return v;
+}

+unsigned int
+rb_hash_end(unsigned int h)
+{

• h = murmur(h, 10);
• h = murmur(h, 17);
return h;
}

-int
-rb_memhash(const void *ptr, long len)
+unsigned int
+rb_hash_start(unsigned int h)
{
static int hashseed_init = 0;
@@ -2013,6 +2053,11 @@ rb_memhash(const void *ptr, long len)
hashseed_init = 1;
}

• return hashseed + h;
+}
• return hash(ptr, len, hashseed);
+int
+rb_memhash(const void *ptr, long len)
+{
• return hash(ptr, len, rb_hash_start(0));
}

# Index: struct.c

— struct.c (revision 21650)
+++ struct.c (working copy)
@@ -805,14 +805,15 @@ static VALUE
rb_struct_hash(VALUE s)
{

• long i, h;
• long i;
• unsigned h;
VALUE n;
• h = rb_hash(rb_obj_class(s));
• h = rb_hash_start(rb_hash(rb_obj_class(s)));
for (i = 0; i < RSTRUCT_LEN(s); i++) {
• h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RSTRUCT_PTR(s)[i]);
• h ^= NUM2LONG(n);
• h = rb_hash_uint(h, NUM2LONG(n));
}
• return LONG2FIX(h);
• h = rb_hash_end(h);
• return INT2FIX(h);
}

# Index: time.c

— time.c (revision 21650)
+++ time.c (working copy)
@@ -1184,5 +1184,5 @@ time_hash(VALUE time)

`````` GetTimeval(time, tobj);
``````
• hash = tobj->ts.tv_sec ^ tobj->ts.tv_nsec;
• hash = rb_hash_end(rb_hash_uint(rb_hash_start(tobj->ts.tv_sec),
tobj->ts.tv_nsec));
return LONG2FIX(hash);
}
Index: include/ruby/intern.h
===================================================================
— include/ruby/intern.h (revision 21650)
+++ include/ruby/intern.h (working copy)
@@ -607,4 +607,8 @@ VALUE rb_str_append(VALUE, VALUE);
VALUE rb_str_concat(VALUE, VALUE);
int rb_memhash(const void *ptr, long len);
+unsigned int rb_hash_start(unsigned int);
+unsigned int rb_hash_uint32(unsigned int, unsigned int);
+unsigned int rb_hash_uint(unsigned int, unsigned int);
+unsigned int rb_hash_end(unsigned int);
int rb_str_hash(VALUE);
int rb_str_hash_cmp(VALUE,VALUE);

e\$BKNIt\$G\$9!#e(B

e\$B\$@\$+\$i\$H\$\$\$&\$o\$1\$G\$O\$"\$j\$^\$;\$s\$,!"e(BMurmurHash2.0e\$B\$H\$\$\$&\$b\$N\$,=P\$Fe(B
e\$B\$\$\$?\$N\$G\$=\$A\$i\$K:9\$7BX\$(\$F\$_\$^\$7\$?!#G\6a\$/B.\$/\$J\$k\$h\$&\$G\$9!#e(B

e\$B\$3\$NJQ990J9_e(Bamd64e\$B\$N%^%7%s\$G%3%s%Q%\$%k\$,DL\$j\$^\$;\$s!#e(B

zsh % make up all
make[1]: Entering directory `/home/shyouhei/build/trunk' /home/shyouhei/ruby/trunk/revision.h unchanged make[1]: Leaving directory`/home/shyouhei/build/trunk’
gcc -pipe -ansi -std=iso9899:199409 -O2 -march=athlon64 -mcmodel=medium
-g3
-pedantic -Wall -Wextra -Wundef -Wno-format -Wno-long-long
-Wno-parentheses
-Wno-unused -Wno-sign-compare -Wno-missing-field-initializers -I.
-I.ext/include/x86_64-linux -I/home/shyouhei/ruby/trunk/include
-I/home/shyouhei/ruby/trunk -DRUBY_EXPORT -DRUBY_DEBUG_ENV
-D_FORTIFY_SOURCE=2
-o string.o -c /home/shyouhei/ruby/trunk/string.c
/home/shyouhei/ruby/trunk/string.c: In function e\$B!Fe(Bhashe\$B!Ge(B:
/home/shyouhei/ruby/trunk/string.c:1976: error: too few arguments to
function
e\$B!Fe(Bmurmure\$B!Ge(B
make: *** [string.o] Error 1
zsh: exit 2 make up all