String#hash


#1

e$B$1$$$8$e!w$$$7$D$+$G$9e(B.

Ruby1.9e$B7O$Ne(B String#hash e$B$H$$$&$+e(B e$B$9$Y$F$Ne(B
hashe$B4X?t$K$D$$$F$G$9$,e(B,
e$B%W%m%;%9Kh$K0c$&CM$K$J$k$N$G$9$,e(B,
e$B2?$+?<$$M}M3$,$"$k$N$G$7$g$&$+e(B?

e$BB>$N%O%C%7%e4X?te(B(SHA1e$BEye(B)e$B$HF1$8$/e(B,
e$B%W%m%;%9$K0MB8$;$:e(B, e$B8GDjCM$NJ}$,NI$$e(B
e$B5$$,$9$k$N$G$9$,e(B?

MurmurHashe$B$@$H3N$+$Ke(Bseede$B$rM?$($k$h$&$K$J$C$F$$$^$9$,e(B,
e$B:#$Ne(BRuby1.9e$B$NMMe(B
e$B$J;H$$J}$N$?$a$G$O$J$$5$$,$7$^$9e(B.

e$BNc$($Pe(B, Complexe$B$@$He(B

@real.hash ^ @imag.hash

e$B$H$J$C$F$$$^$9e(B. e$B<B:]$K$Oe(B, e$B$3$l$O$"$^$jNI$/$J$/$Fe(B,
@real==@image e$B$N$H$-e(B
e$B$D$M$KF1$8CM$K$J$C$F$7$^$$$^$9e(B. e$B$=$3$Ge(B,
e$BJL$Ne(Bseede$B$rM?$(e(B

@real.hash(seed1) ^ @image.hash(seed2)

e$B$NMM$K$9$k$He(B, e$B>e5-$NLdBj$O$J$/$J$j$^$9e(B. e$B$3$l$Oe(B,
Arraye$BEy$Ne(Bhashe$B4X?t$G$be(B
e$BF1MM$G$9e(B. e$B$^$?e(B,
Fixnum#hashe$B$He(BObject#hashe$B$bJL$Ne(Bseede$B$rM?$($k$N$b$h$5$=$&e(B
e$B$J5$$,$7$^$9e(B.

__
---------------------------------------------------->> e$B@PDMe(B
e$B7=<ye(B <<—
---------------------------------->> e-mail: removed_email_address@domain.invalid <<—


#2

In article removed_email_address@domain.invalid,
removed_email_address@domain.invalid (Keiju ISHITSUKA) writes:

Ruby1.9e$B7O$Ne(B String#hash e$B$H$$$&$+e(B e$B$9$Y$F$Ne(B hashe$B4X?t$K$D$$$F$G$9$,e(B,
e$B%W%m%;%9Kh$K0c$&CM$K$J$k$N$G$9$,e(B, e$B2?$+?<$$M}M3$,$"$k$N$G$7$g$&$+e(B?

algorithmic complexity attack e$BBP:v$G$9!#e(B

http://www.cs.rice.edu/~scrosby/hash/CrosbyWallach_UsenixSec2003/


#3

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:37777] String#hash”
on Fri, 16 Jan 2009 20:00:08 +0900, removed_email_address@domain.invalid (Keiju
ISHITSUKA) writes:

|e$BNc$($Pe(B, Complexe$B$@$He(B
|
| @real.hash ^ @imag.hash
|
|e$B$H$J$C$F$$$^$9e(B. e$B<B:]$K$Oe(B, e$B$3$l$O$"$^$jNI$/$J$/$Fe(B, @real==@image e$B$N$H$-e(B
|e$B$D$M$KF1$8CM$K$J$C$F$7$^$$$^$9e(B. e$B$=$3$Ge(B, e$BJL$Ne(Bseede$B$rM?$(e(B
|
| @real.hash(seed1) ^ @image.hash(seed2)
|
|e$B$NMM$K$9$k$He(B, e$B>e5-$NLdBj$O$J$/$J$j$^$9e(B. e$B$3$l$Oe(B, Arraye$BEy$Ne(Bhashe$B4X?t$G$be(B
|e$BF1MM$G$9e(B. e$B$^$?e(B, Fixnum#hashe$B$He(BObject#hashe$B$bJL$Ne(Bseede$B$rM?$($k$N$b$h$5$=$&e(B
|e$B$J5$$,$7$^$9e(B.

e$BKh2sJQ$o$kM}M3$Oe(B[ruby-dev:37778]e$B$NDL$j$J$s$G$9$,!"3F%*%V%8%'e(B
e$B%/%H$Ne(Bhashe$B$N7W;;J}<0$r99?7$7$?J}$,NI$$$H$$$&Ds0F$O$=$l$H$OFHe(B
e$BN)$KM-8z$@$H;W$$$^$9!#$I$s$J7W;;<0$,NI$$$N$+5DO@$7$F$$$?$@$1e(B
e$B$^$;$s$+!)e(B


#4

e$B$J$+$@$G$9!#e(B

At Mon, 19 Jan 2009 05:28:18 +0900,
Nobuyoshi N. wrote in [ruby-dev:37784]:

e$B$7$+$7!“e(BMurmurHashe$B$C$FESCf$G$?$^$?$^e(B0e$B$K$J$k$H$=$l0J9_$N%G!<%?$Oe(B
e$BL54X78$K$J$k$h$&$J5$$,$9$k$s$G$9$,!”$$$$$s$G$7$g$&$+!#e(B

e$B$9$$$^$;$s!"%%1$F$^$7$?!#$3$l$O40A4$K4*0c$$$G$9!#e(B

e$B$@$+$i$H$$$&$o$1$G$O$"$j$^$;$s$,!"e(BMurmurHash2.0e$B$H$$$&$b$N$,=P$Fe(B
e$B$$$?$N$G$=$A$i$K:9$7BX$($F$_$^$7$?!#G\6a$/B.$/$J$k$h$&$G$9!#e(B


#5

e$B$J$+$@$G$9!#e(B

At Sat, 17 Jan 2009 16:33:08 +0900,
Yukihiro M. wrote in [ruby-dev:37779]:

|e$BF1MM$G$9e(B. e$B$^$?e(B, Fixnum#hashe$B$He(BObject#hashe$B$bJL$Ne(Bseede$B$rM?$($k$N$b$h$5$=$&e(B
|e$B$J5$$,$7$^$9e(B.

e$BKh2sJQ$o$kM}M3$Oe(B[ruby-dev:37778]e$B$NDL$j$J$s$G$9$,!"3F%*%V%8%'e(B
e$B%/%H$Ne(Bhashe$B$N7W;;J}<0$r99?7$7$?J}$,NI$$$H$$$&Ds0F$O$=$l$H$OFHe(B
e$BN)$KM-8z$@$H;W$$$^$9!#$I$s$J7W;;<0$,NI$$$N$+5DO@$7$F$$$?$@$1e(B
e$B$^$;$s$+!)e(B

Complexe$B$de(BRationale$B$Oe(Brb_memhash()e$B$,;H$($k$H;W$$$^$9$,!"2DJQD9$G$"e(B
e$B$ke(BArraye$B$de(BObjecte$B!"e(BStructe$B$J$I$G$OB?>/9)IW$,I,MW$G$7$g$&!#e(B

e$B$7$+$7!“e(BMurmurHashe$B$C$FESCf$G$?$^$?$^e(B0e$B$K$J$k$H$=$l0J9_$N%G!<%?$Oe(B
e$BL54X78$K$J$k$h$&$J5$$,$9$k$s$G$9$,!”$$$$$s$G$7$g$&$+!#e(B

Index: array.c

— array.c (revision 21650)
+++ array.c (working copy)
@@ -2722,10 +2722,10 @@ recursive_hash(VALUE ary, VALUE dummy, i
return LONG2FIX(0);
}

  • h = RARRAY_LEN(ary);
  • h = rb_hash_start(RARRAY_LEN(ary));
    for (i=0; i<RARRAY_LEN(ary); i++) {
  • h = (h << 1) | (h<0 ? 1 : 0);
    n = rb_hash(RARRAY_PTR(ary)[i]);
  • h ^= NUM2LONG(n);
  • h = rb_hash_uint(h, NUM2LONG(n));
    }
  • h = rb_hash_end(h);
    return LONG2FIX(h);
    }
    Index: complex.c
    ===================================================================
    — complex.c (revision 21650)
    +++ complex.c (working copy)
    @@ -23,5 +23,5 @@ VALUE rb_cComplex;

static ID id_abs, id_abs2, id_arg, id_cmp, id_conj, id_convert,

  • id_denominator, id_divmod, id_equal_p, id_expt, id_floor, id_hash,
  • id_denominator, id_divmod, id_equal_p, id_expt, id_floor,
    id_idiv, id_inspect, id_negate, id_numerator, id_polar, id_quo,
    id_real_p, id_to_f, id_to_i, id_to_r, id_to_s;
    @@ -154,6 +154,4 @@ f_sub(VALUE x, VALUE y)
    }

-binop(xor, ‘^’)

fun1(abs)
fun1(abs2)
@@ -162,5 +160,4 @@ fun1(conj)
fun1(denominator)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(negate)
@@ -858,6 +855,15 @@ static VALUE
nucomp_hash(VALUE self)
{

  • long v, h[3];
  • VALUE n;
  • get_dat1(self);
  • return f_xor(f_hash(dat->real), f_hash(dat->imag));
  • h[0] = rb_hash(rb_obj_class(self));
  • n = rb_hash(dat->real);
  • h[1] = NUM2LONG(n);
  • n = rb_hash(dat->imag);
  • h[2] = NUM2LONG(n);
  • v = rb_memhash(h, sizeof(h));
  • return LONG2FIX(v);
    }

@@ -1383,5 +1389,4 @@ Init_Complex(void)
id_expt = rb_intern("**");
id_floor = rb_intern(“floor”);

  • id_hash = rb_intern(“hash”);
    id_idiv = rb_intern(“div”);
    id_inspect = rb_intern(“inspect”);
    Index: hash.c
    ===================================================================
    — hash.c (revision 21650)
    +++ hash.c (working copy)
    @@ -70,5 +70,5 @@ rb_any_hash(VALUE a)
    case T_FIXNUM:
    case T_SYMBOL:
  • hnum = (int)a;
  • hnum = rb_hash_end(rb_hash_start((unsigned int)a));
    break;

@@ -1511,7 +1511,10 @@ static int
hash_i(VALUE key, VALUE val, int *hval)
{

  • VALUE n;
    if (key == Qundef) return ST_CONTINUE;
  • *hval ^= rb_hash(key);
  • *hval ^= rb_hash(val);
  • n = rb_hash(key);
  • *hval = rb_hash_uint(*hval, NUM2LONG(n));
  • n = rb_hash(val);
  • *hval = rb_hash_uint(*hval, NUM2LONG(n));
    return ST_CONTINUE;
    }
    @@ -1527,6 +1530,7 @@ recursive_hash(VALUE hash, VALUE dummy,
    if (!RHASH(hash)->ntbl)
    return LONG2FIX(0);
  • hval = RHASH(hash)->ntbl->num_entries;
  • hval = rb_hash_start(RHASH(hash)->ntbl->num_entries);
    rb_hash_foreach(hash, hash_i, (st_data_t)&hval);
  • hval = rb_hash_end(hval);
    return INT2FIX(hval);
    }
    Index: gc.c
    ===================================================================
    — gc.c (revision 21650)
    +++ gc.c (working copy)
    @@ -2904,5 +2904,4 @@ Init_GC(void)
    OBJ_FREEZE(nomem_error);
  • rb_define_method(rb_mKernel, “hash”, rb_obj_id, 0);
    rb_define_method(rb_mKernel, “id”, rb_obj_id, 0);
    rb_define_method(rb_mKernel, “object_id”, rb_obj_id, 0);
    Index: object.c
    ===================================================================
    — object.c (revision 21650)
    +++ object.c (working copy)
    @@ -96,4 +96,12 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
    }

+VALUE
+rb_obj_hash(VALUE obj)
+{

  • VALUE oid = rb_obj_id(obj);
  • unsigned h = rb_hash_end(rb_hash_start(NUM2LONG(oid)));
  • return LONG2NUM(h);
    +}

/*

  • call-seq:
    @@ -2517,4 +2525,5 @@ Init_Object(void)
    rb_define_method(rb_mKernel, “!~”, rb_obj_not_match, 1);
    rb_define_method(rb_mKernel, “eql?”, rb_obj_equal, 1);
  • rb_define_method(rb_mKernel, “hash”, rb_obj_hash, 0);

    rb_define_method(rb_mKernel, “class”, rb_obj_class, 0);
    Index: range.c
    ===================================================================
    — range.c (revision 21650)
    +++ range.c (working copy)
    @@ -214,12 +214,14 @@ static VALUE
    range_hash(VALUE range)
    {

  • long hash = EXCL(range);
  • unsigned hash = EXCL(range);
    VALUE v;

  • hash = rb_hash_start(hash);
    v = rb_hash(RANGE_BEG(range));

  • hash ^= v << 1;
  • hash = rb_hash_uint(hash, NUM2LONG(v));
    v = rb_hash(RANGE_END(range));
  • hash ^= v << 9;
  • hash ^= EXCL(range) << 24;
  • hash = rb_hash_uint(hash, NUM2LONG(v));

  • hash = rb_hash_uint(hash, EXCL(range) << 24);

  • hash = rb_hash_end(hash);

    return LONG2FIX(hash);
    Index: rational.c
    ===================================================================
    — rational.c (revision 21650)
    +++ rational.c (working copy)
    @@ -28,5 +28,5 @@ VALUE rb_cRational;

static ID id_abs, id_cmp, id_convert, id_equal_p, id_expt, id_floor,

  • id_hash, id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
  • id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
    id_to_i, id_to_s, id_truncate;

@@ -136,9 +136,6 @@ f_sub(VALUE x, VALUE y)
}

-binop(xor, ‘^’)

fun1(abs)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(integer_p)
@@ -1162,6 +1159,15 @@ static VALUE
nurat_hash(VALUE self)
{

  • long v, h[3];
  • VALUE n;
  • get_dat1(self);
  • return f_xor(f_hash(dat->num), f_hash(dat->den));
  • h[0] = rb_hash(rb_obj_class(self));
  • n = rb_hash(dat->num);
  • h[1] = NUM2LONG(n);
  • n = rb_hash(dat->den);
  • h[2] = NUM2LONG(n);
  • v = rb_memhash(h, sizeof(h));
  • return LONG2FIX(v);
    }

@@ -1555,5 +1561,4 @@ Init_Rational(void)
id_expt = rb_intern("**");
id_floor = rb_intern(“floor”);

  • id_hash = rb_intern(“hash”);
    id_idiv = rb_intern(“div”);
    id_inspect = rb_intern(“inspect”);
    Index: string.c
    ===================================================================
    — string.c (revision 21650)
    +++ string.c (working copy)
    @@ -1883,10 +1883,17 @@ rb_str_concat(VALUE str1, VALUE str2)

/* MurmurHash described in http://murmurhash.googlepages.com/ */
-static unsigned int
-hash(const unsigned char * data, int len, unsigned int h)
+static inline uint32_t
+murmur(unsigned int h, const int r)
{
const unsigned int m = 0x7fd652ad;

  • const int r = 16;
  • h *= m;
  • return h ^ (h >> r);
    +}

+#define murmur16(h) murmur(h, 16)

+static unsigned int
+hash(const unsigned char * data, int len, unsigned int h)
+{
h += 0xdeadbeef;

@@ -1929,7 +1936,5 @@ hash(const unsigned char * data, int len
t = (t >> sr) | (d << sl);
#endif

  • h += t;
  • h *= m;
  • h ^= h >> r;
  • h = murmur16(h + t);
    t = d;

@@ -1954,6 +1959,5 @@ hash(const unsigned char * data, int len
h += (t >> sr) | (d << sl);
#endif

  • h *= m;
  • h ^= h >> r;
  • h = murmur16(h);
    }

@@ -1965,8 +1969,5 @@ hash(const unsigned char * data, int len
{
do {

  • h += *(uint32_t *)data;
  • h *= m;
  • h ^= h >> r;
  • h = murmur16(h + *(uint32_t *)data);
    data += 4;
    len -= 4;
    @@ -1991,18 +1992,57 @@ hash(const unsigned char * data, int len
    h += data[0];
    #endif
  • h *= m;
  • h ^= h >> r;
  • h = murmur16(h);
    }
  • h *= m;
  • h ^= h >> 10;
  • h *= m;
  • h ^= h >> 17;
  • return rb_hash_end(h);
    +}

+unsigned int
+rb_hash_uint32(unsigned int h, unsigned int i)
+{

  • return murmur(h + i, 16);
    +}

+unsigned int
+rb_hash_uint(unsigned int h, unsigned int i)
+{

  • unsigned int v = 0;
  • h += i;
    +#ifdef WORDS_BIGENDIAN
    +#if SIZEOF_INTCHAR_BIT > 128
  • v = murmur16(v + (h >> 128));
    +#endif
    +#if SIZEOF_INT
    CHAR_BIT > 8*8
  • v = murmur16(v + (h >> 88));
    +#endif
    +#if SIZEOF_INT
    CHAR_BIT > 4*8
  • v = murmur16(v + (h >> 4*8));
    +#endif
    +#endif
  • v = murmur16(v + h);
    +#ifndef WORDS_BIGENDIAN
    +#if SIZEOF_INTCHAR_BIT > 48
  • v = murmur16(v + (h >> 48));
    +#endif
    +#if SIZEOF_INT
    CHAR_BIT > 8*8
  • v = murmur16(v + (h >> 88));
    +#endif
    +#if SIZEOF_INT
    CHAR_BIT > 12*8
  • v = murmur16(v + (h >> 12*8));
    +#endif
    +#endif
  • return v;
    +}

+unsigned int
+rb_hash_end(unsigned int h)
+{

  • h = murmur(h, 10);
  • h = murmur(h, 17);
    return h;
    }

-int
-rb_memhash(const void *ptr, long len)
+unsigned int
+rb_hash_start(unsigned int h)
{
static int hashseed_init = 0;
@@ -2013,6 +2053,11 @@ rb_memhash(const void *ptr, long len)
hashseed_init = 1;
}

  • return hashseed + h;
    +}
  • return hash(ptr, len, hashseed);
    +int
    +rb_memhash(const void *ptr, long len)
    +{
  • return hash(ptr, len, rb_hash_start(0));
    }

Index: struct.c

— struct.c (revision 21650)
+++ struct.c (working copy)
@@ -805,14 +805,15 @@ static VALUE
rb_struct_hash(VALUE s)
{

  • long i, h;
  • long i;
  • unsigned h;
    VALUE n;
  • h = rb_hash(rb_obj_class(s));
  • h = rb_hash_start(rb_hash(rb_obj_class(s)));
    for (i = 0; i < RSTRUCT_LEN(s); i++) {
  • h = (h << 1) | (h<0 ? 1 : 0);
    n = rb_hash(RSTRUCT_PTR(s)[i]);
  • h ^= NUM2LONG(n);
  • h = rb_hash_uint(h, NUM2LONG(n));
    }
  • return LONG2FIX(h);
  • h = rb_hash_end(h);
  • return INT2FIX(h);
    }

Index: time.c

— time.c (revision 21650)
+++ time.c (working copy)
@@ -1184,5 +1184,5 @@ time_hash(VALUE time)

 GetTimeval(time, tobj);
  • hash = tobj->ts.tv_sec ^ tobj->ts.tv_nsec;
  • hash = rb_hash_end(rb_hash_uint(rb_hash_start(tobj->ts.tv_sec),
    tobj->ts.tv_nsec));
    return LONG2FIX(hash);
    }
    Index: include/ruby/intern.h
    ===================================================================
    — include/ruby/intern.h (revision 21650)
    +++ include/ruby/intern.h (working copy)
    @@ -607,4 +607,8 @@ VALUE rb_str_append(VALUE, VALUE);
    VALUE rb_str_concat(VALUE, VALUE);
    int rb_memhash(const void *ptr, long len);
    +unsigned int rb_hash_start(unsigned int);
    +unsigned int rb_hash_uint32(unsigned int, unsigned int);
    +unsigned int rb_hash_uint(unsigned int, unsigned int);
    +unsigned int rb_hash_end(unsigned int);
    int rb_str_hash(VALUE);
    int rb_str_hash_cmp(VALUE,VALUE);

#6

e$BKNIt$G$9!#e(B

Nobuyoshi N. e$B$5$s$O=q$-$^$7$?e(B:

e$B$@$+$i$H$$$&$o$1$G$O$"$j$^$;$s$,!"e(BMurmurHash2.0e$B$H$$$&$b$N$,=P$Fe(B
e$B$$$?$N$G$=$A$i$K:9$7BX$($F$_$^$7$?!#G\6a$/B.$/$J$k$h$&$G$9!#e(B

e$B$3$NJQ990J9_e(Bamd64e$B$N%^%7%s$G%3%s%Q%$%k$,DL$j$^$;$s!#e(B

zsh % make up all
make[1]: Entering directory /home/shyouhei/build/trunk' /home/shyouhei/ruby/trunk/revision.h unchanged make[1]: Leaving directory/home/shyouhei/build/trunk’
gcc -pipe -ansi -std=iso9899:199409 -O2 -march=athlon64 -mcmodel=medium
-g3
-pedantic -Wall -Wextra -Wundef -Wno-format -Wno-long-long
-Wno-parentheses
-Wno-unused -Wno-sign-compare -Wno-missing-field-initializers -I.
-I.ext/include/x86_64-linux -I/home/shyouhei/ruby/trunk/include
-I/home/shyouhei/ruby/trunk -DRUBY_EXPORT -DRUBY_DEBUG_ENV
-D_FORTIFY_SOURCE=2
-o string.o -c /home/shyouhei/ruby/trunk/string.c
/home/shyouhei/ruby/trunk/string.c: In function e$B!Fe(Bhashe$B!Ge(B:
/home/shyouhei/ruby/trunk/string.c:1976: error: too few arguments to
function
e$B!Fe(Bmurmure$B!Ge(B
make: *** [string.o] Error 1
zsh: exit 2 make up all