Float#hash collision

e$B$J$+$@$G$9!#e(B

Float#hashe$B$,0[MM$K>WFM$9$k$h$&$G$9!#0J2<$N%3!<%I$GD4$Y$F$_$k$H!"e(B
e$BM-8zN($Oe(B5%e$BDxEY$G$9!#e(B

a = Array.new(10_000) {rand}
h = Hash.new(0)
a.each {|x| h[x.hash] += 1}
p h.size.to_f / a.size.to_f

Index: numeric.c

RCS file: /cvs/ruby/src/ruby/numeric.c,v
retrieving revision 1.101.2.23
diff -p -U 2 -r1.101.2.23 numeric.c
— numeric.c 1 May 2006 03:46:46 -0000 1.101.2.23
+++ numeric.c 24 Aug 2006 13:57:07 -0000
@@ -871,5 +871,5 @@ flo_hash(num)
c = (char*)&d;
for (hash=0, i=0; i<sizeof(double);i++) {

  • hash += c[i] * 971;
  • hash = (hash << 2) ^ (c[i] * 971);
    }
    if (hash < 0) hash = -hash;

e$B$J$+$@$G$9!#e(B

At Thu, 24 Aug 2006 23:08:46 +0900,
[email protected] wrote in [ruby-dev:29352]:

 for (hash=0, i=0; i<sizeof(double);i++) {
  • hash += c[i] * 971;
  • hash = (hash << 2) ^ (c[i] * 971);
    }

e$B$3$l$@$He(B971e$B$r3]$1$F$$$k0UL#$,$J$$$N$G!"e(B

  • hash = (hash * 971) ^ (unsigned char)c[i];

e$B$J$I$H$9$k$+!"$$$C$=$N$3$He(Brb_str_hash()e$B$rHFMQ$K$9$k$H$+!#e(B

  • numeric.c (flo_hash): improve collision.

  • string.c (rb_memhash): new generic function to calculate hash value
    for memory chunk.

Index: intern.h

RCS file: /cvs/ruby/src/ruby/intern.h,v
retrieving revision 1.197
diff -p -u -2 -r1.197 intern.h
— intern.h 18 Jul 2006 06:22:26 -0000 1.197
+++ intern.h 29 Aug 2006 12:50:10 -0000
@@ -503,4 +503,5 @@ VALUE rb_str_cat2(VALUE, const char*);
VALUE rb_str_append(VALUE, VALUE);
VALUE rb_str_concat(VALUE, VALUE);
+int rb_memhash(const void *ptr, long len);
int rb_str_hash(VALUE);
int rb_str_cmp(VALUE, VALUE);
Index: numeric.c

RCS file: /cvs/ruby/src/ruby/numeric.c,v
retrieving revision 1.139
diff -p -u -2 -r1.139 numeric.c
— numeric.c 20 Aug 2006 02:47:13 -0000 1.139
+++ numeric.c 29 Aug 2006 12:50:09 -0000
@@ -837,13 +837,9 @@ flo_hash(VALUE num)
{
double d;

  • char *c;
  • int i, hash;
  • int hash;

    d = RFLOAT(num)->value;
    if (d == 0) d = fabs(d);

  • c = (char*)&d;
  • for (hash=0, i=0; i<sizeof(double);i++) {
  • hash += c[i] * 971;
  • }
  • hash = rb_memhash(&d, sizeof(d)) ^ RBASIC(num)->klass;
    if (hash < 0) hash = -hash;
    return INT2FIX(hash);
    Index: string.c
    ===================================================================
    RCS file: /cvs/ruby/src/ruby/string.c,v
    retrieving revision 1.256
    diff -p -u -2 -r1.256 string.c
    — string.c 13 Aug 2006 05:32:57 -0000 1.256
    +++ string.c 29 Aug 2006 12:56:29 -0000
    @@ -769,7 +769,7 @@ rb_str_concat(VALUE str1, VALUE str2)
  • hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
    • @(#) $Revision: 1.256 $
    • @(#) $Id: string.c,v 1.256 2006-08-13 05:32:57 drbrain Exp $
    • @(#) $Source: /cvs/ruby/src/ruby/string.c,v $
    • @(#) $hash_32.Revision: 1.1 $
    • @(#) $hash_32.Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $
    • @(#) $hash_32.Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $

@@ -842,8 +842,7 @@ rb_str_concat(VALUE str1, VALUE str2)

int
-rb_str_hash(VALUE str)
+rb_memhash(const void *ptr, long len)
{

  • register long len = RSTRING(str)->len;
  • register char *p = RSTRING(str)->ptr;
  • register const unsigned char *p = ptr;
    register unsigned int hval = FNV1_32A_INIT;

@@ -865,4 +864,10 @@ rb_str_hash(VALUE str)
}

+int
+rb_str_hash(VALUE str)
+{

  • return rb_memhash(RSTRING(str)->ptr, RSTRING(str)->len);
    +}

/*

  • call-seq:

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:29362] Re: Float#hash collision”
on Tue, 29 Aug 2006 22:07:41 +0900, [email protected] writes:

|At Thu, 24 Aug 2006 23:08:46 +0900,
|[email protected] wrote in [ruby-dev:29352]:
|> for (hash=0, i=0; i<sizeof(double);i++) {
|> - hash += c[i] * 971;
|> + hash = (hash << 2) ^ (c[i] * 971);
|> }
|
|e$B$3$l$@$He(B971e$B$r3]$1$F$$$k0UL#$,$J$$$N$G!“e(B
|
|+ hash = (hash * 971) ^ (unsigned char)c[i];
|
|e$B$J$I$H$9$k$+!”$$$C$=$N$3$He(Brb_str_hash()e$B$rHFMQ$K$9$k$H$+!#e(B

e$B8e<T$+$J!#%3%_%C%H$7$F$/$@$5$$!#e(B1.8e$B$O$I$&$7$h$&$+!#e(B

e$B$J$+$@$G$9!#e(B

At Wed, 30 Aug 2006 10:10:35 +0900,
Yukihiro M. wrote in [ruby-dev:29363]:

|
|e$B$J$I$H$9$k$+!"$$$C$=$N$3$He(Brb_str_hash()e$B$rHFMQ$K$9$k$H$+!#e(B

e$B8e<T$+$J!#%3%_%C%H$7$F$/$@$5$$!#e(B1.8e$B$O$I$&$7$h$&$+!#e(B

e$B$$$:$l$K$7$F$b!"e(Brb_memhash()e$BAjEv$O3HD%%i%$%V%i%j$KJXMx$J$N$GM_e(B
e$B$7$$5$$,$7$^$9$M!#e(B