PATCH to make internal Hash class retain order

Hi there -

beeing new to ruby (thru rails) and coming from a strong php background

i really miss beeing able to retain order in a Hash. i know that this
has been discussed in the past i also understand that i can always
write my own OrderedHash implemenation in user-space. -but- as a) i
love to poke around b) userspace was too slow for some of my problems
-and- c) it’s always best to to let “code speak” i decided to “scratch
my itch” and wrote a patch (against 1.8.4) that will:

  • remember the order in which elements have been added to a Hash (at
    the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
    per hash-table)
  • use this order for iterating (st_foreach will use this order)
  • implement a call Hash.order! allows the user to change the internal
    order;-)

y = { ‘d’ => ‘4’, ‘a’ => ‘1’, ‘b’ => ‘2’, ‘c’ => ‘3’ }
puts y.inspect

y.order!
puts y.inspect

y.order! {|a,b| b[1] <=> a[1]}
puts y.inspect

will now get you:
{“d”=>“4”, “a”=>“1”, “b”=>“2”, “c”=>“3”}
{“a”=>“1”, “b”=>“2”, “c”=>“3”, “d”=>“4”}
{“d”=>“4”, “c”=>“3”, “b”=>“2”, “a”=>“1”}

please bear with me as i’m not fully used to all the conventions you
use in the code - also this has only seen minimal testing…

suggestions welcome!

regards, tc

— snipp –

diff -ru ruby-1.8.4/hash.c ruby-thies/hash.c
— ruby-1.8.4/hash.c 2005-07-19 10:25:37.000000000 +0200
+++ ruby-thies/hash.c 2006-08-12 17:28:56.000000000 +0200
@@ -1195,6 +1195,46 @@
return entries;
}

+/*

    • call-seq:
    • hsh.order!                  => hash
      
    • hsh.order! {|a,b| block }  => hash
      
    • Orders hsh according to code [ key,
    • value ]. If no block is given the sort order
    • defaults to the keyorder.
    • h = { "b" => 30, "c" => 10, "a" => 20  }
      
    • h.order!                       #=> { "c" => 10, "a" => 20, "b"
      

=> 30 }

    • h.order! {|a,b| a[0]<=>b[0]}   #=> { "a" => 20, "b" => 30, "c"
      

=> 10 }

  • */

+static VALUE
+rb_hash_order(hash)

  • VALUE hash;
    +{
  • VALUE *keyorder;
  • VALUE entries = rb_hash_to_a(hash);
  • int i;
  • rb_ary_sort_bang(entries);
  • keyorder = ALLOC_N(VALUE, RARRAY(entries)->len);
  • if (! keyorder) {
  • return 0;
  • }
  • for (i = 0; i < RARRAY(entries)->len; i++) {
  •   keyorder[i] = RARRAY(RARRAY(entries)->ptr[i])->ptr[0];
    
  • }
  • st_reorder(RHASH(hash)->tbl, keyorder);
  • free(keyorder);
  • return hash;
    +}

static int
inspect_i(key, value, str)
VALUE key, value, str;
@@ -2449,6 +2489,7 @@
rb_define_method(rb_cHash,“each_key”, rb_hash_each_key, 0);
rb_define_method(rb_cHash,“each_pair”, rb_hash_each_pair, 0);
rb_define_method(rb_cHash,“sort”, rb_hash_sort, 0);

  • rb_define_method(rb_cHash,“order!”, rb_hash_order, 0);

    rb_define_method(rb_cHash,“keys”, rb_hash_keys, 0);
    rb_define_method(rb_cHash,“values”, rb_hash_values, 0);
    diff -ru ruby-1.8.4/rubytest.rb ruby-thies/rubytest.rb
    — ruby-1.8.4/rubytest.rb 2005-02-06 18:03:32.000000000 +0100
    +++ ruby-thies/rubytest.rb 2006-08-12 15:04:15.000000000 +0200
    @@ -42,6 +42,7 @@
    print “test succeeded\n”
    exit 0
    end

  • puts line
    error << line if line =~ %r:^(sample/test.rb|not):
    end
    print error
    Only in ruby-thies: signal.o
    Only in ruby-thies: sprintf.o
    diff -ru ruby-1.8.4/st.c ruby-thies/st.c
    — ruby-1.8.4/st.c 2005-12-20 05:13:21.000000000 +0100
    +++ ruby-thies/st.c 2006-08-12 18:05:55.000000000 +0200
    @@ -13,11 +13,25 @@

typedef struct st_table_entry st_table_entry;

+#define ST_APPEND_TO_GLOBAL_ORDER(tbl, entry)\

  • (entry)->gnext = NULL;\
  • (entry)->gprev = (tbl)->tail;\
  • if ((entry)->gprev) {\
  •   (entry)->gprev->gnext = (entry);\
    
  • }\
  • (tbl)->tail = (entry);\
  • if (! (tbl)->head) {\
  •   (tbl)->head = (entry);\
    
  • }

struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;

  • st_table_entry *gprev;
  • st_table_entry *gnext;
    };

#define ST_DEFAULT_MAX_DENSITY 5
@@ -135,6 +149,52 @@
}
#endif

+static int
+st_unlink_entry(table, entry)

  • st_table *table;
  • st_table_entry *entry;
    +{
  • int i;
  • st_table_entry *last, *ptr;
  • for (i = 0; i < table->num_bins; i++) {
  • last = NULL;
  •    ptr = table->bins[i];
    
  •    while (ptr) {
    
  •   if (ptr == entry) break; /* found */
    
  •   last = ptr;
    
  •   ptr = ptr->next;
    
  •   }
    
  • if (! ptr) continue; /* correct bin it yet to be found */
  • if (last) {
  •   last->next = ptr->next;
    
  • } else {
  •   table->bins[i] = ptr->next;
    
  • }
  • if (table->head == ptr) {
  •   table->head = ptr->gnext;
    
  • }
  • if (table->tail == ptr) {
  •   table->tail = ptr->gprev;
    
  • }
  • if (ptr->gnext) {
  •   ptr->gnext->gprev = ptr->gprev;
    
  • }
  • if (ptr->gprev) {
  •   ptr->gprev->gnext = ptr->gnext;
    
  • }
  •    table->num_entries--;
    
  • return 1;
  • }
  • return 0;
    +}

st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
@@ -157,6 +217,8 @@
tbl->num_bins = size;
tbl->bins = (st_table_entry *)Calloc(size,
sizeof(st_table_entry
));

  • tbl->head = tbl->tail = NULL;
  • return tbl;
    }

@@ -269,6 +331,7 @@
entry->record = value;
entry->next = table->bins[bin_pos];
table->bins[bin_pos] = entry;\

  • ST_APPEND_TO_GLOBAL_ORDER(table, entry)
    table->num_entries++;
    } while (0)

@@ -339,39 +402,74 @@
{
st_table *new_table;
st_table_entry *ptr, *entry;

  • int i, num_bins = old_table->num_bins;
  • int num_bins = old_table->num_bins;
  • unsigned int bin_pos;
  • new_table = alloc(st_table);
  • new_table = st_init_table_with_size(old_table->type,
    old_table->num_bins);
    if (new_table == 0) {
    return 0;
    }
  • *new_table = *old_table;
  • new_table->bins = (st_table_entry**)
  • Calloc((unsigned)num_bins, sizeof(st_table_entry*));
  • ptr = old_table->head;
  • while (ptr) {
  • entry = alloc(st_table_entry);
  • if (! entry) {
  •   st_free_table(new_table); /* safe as new_table is consistent */
    
  •   return 0;
    
  • }
  • *entry = *ptr;
  • bin_pos = entry->hash % num_bins;
  • entry->next = new_table->bins[bin_pos];
  • new_table->bins[bin_pos] = entry;
  • ST_APPEND_TO_GLOBAL_ORDER(new_table, entry);
  • new_table->num_entries++;
  • if (new_table->bins == 0) {
  • free(new_table);
  • return 0;
  • ptr = ptr->gnext;
    }
  • for(i = 0; i < num_bins; i++) {
  • new_table->bins[i] = 0;
  • ptr = old_table->bins[i];
  • while (ptr != 0) {
  •   entry = alloc(st_table_entry);
    
  •   if (entry == 0) {
    
  •   free(new_table->bins);
    
  •   free(new_table);
    
  •   return 0;
    
  •   }
    
  •   *entry = *ptr;
    
  •   entry->next = new_table->bins[i];
    
  •   new_table->bins[i] = entry;
    
  •   ptr = ptr->next;
    
  • return new_table;
    +}

+void
+st_reorder(table, keylist)

  • st_table *table;
  • st_data_t *keylist;
    +{
  • st_table buf_table, *org_table;
  • st_table_entry *entry;
  • st_data_t key;
  • unsigned int i, hash_val, bin_pos;
  • /*
  • * "algorithmus":
    
  • * 1 remeber old table content
    
  • * 2 reinitialize old_table to be empty
    
  • * 3 refill old table in corrent order (supplied by keylist) using
    

the old content

  • */
    
  • buf_table = *table;
  • org_table = &buf_table;
  • table->num_entries = 0;
  • table->bins = (st_table_entry *)Calloc(table->num_bins,
    sizeof(st_table_entry
    ));
  • table->head = table->tail = NULL;
  • for (i = 0; i < org_table->num_entries; i++) {
  • key = keylist[i];
  • hash_val = do_hash(key, org_table);
  • FIND_ENTRY(org_table, entry, hash_val, bin_pos);
  • if (! entry) {
  •   /* should not happen */
    
  •   printf("uoooo...\n");
    
    }
  • entry->next = table->bins[bin_pos];
  • table->bins[bin_pos] = entry;
  • ST_APPEND_TO_GLOBAL_ORDER(table, entry);
  • table->num_entries++;
    }
  • return new_table;
  • free(org_table->bins);
    }

int
@@ -381,39 +479,26 @@
st_data_t *value;
{
unsigned int hash_val;

  • st_table_entry *tmp;
    register st_table_entry *ptr;

    hash_val = do_hash_bin(*key, table);
    ptr = table->bins[hash_val];

  • if (ptr == 0) {

  • if (value != 0) *value = 0;

  • return 0;

  • while (ptr) {
  • if (EQUAL(table, ptr->key, *key)) break;
  • ptr = ptr->next;
    }
  • if (EQUAL(table, *key, ptr->key)) {
  • table->bins[hash_val] = ptr->next;
  • table->num_entries–;
  • if (ptr) {
  • st_unlink_entry(table, ptr);
    if (value != 0) *value = ptr->record;
    *key = ptr->key;
    free(ptr);
    return 1;
  • } else {
  • if (value != 0) *value = 0;
  • return 0;
    }
  • for(; ptr->next != 0; ptr = ptr->next) {
  • if (EQUAL(table, ptr->next->key, *key)) {
  •   tmp = ptr->next;
    
  •   ptr->next = ptr->next->next;
    
  •   table->num_entries--;
    
  •   if (value != 0) *value = tmp->record;
    
  •   *key = tmp->key;
    
  •   free(tmp);
    
  •   return 1;
    
  • }
  • }
  • return 0;
    }

int
@@ -429,22 +514,21 @@
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];

  • if (ptr == 0) {
  • if (value != 0) *value = 0;
  • return 0;
  • while (ptr) {
  • if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) break;
  • ptr = ptr->next;
    }
  • for(; ptr != 0; ptr = ptr->next) {
  • if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
  •   table->num_entries--;
    
  •   *key = ptr->key;
    
  •   if (value != 0) *value = ptr->record;
    
  •   ptr->key = ptr->record = never;
    
  •   return 1;
    
  • }
  • if (ptr) {
  • table->num_entries–;
  • if (value != 0) *value = ptr->record;
  • *key = ptr->key;
  • free(ptr);
  • return 1;
  • } else {
  • if (value != 0) *value = 0;
  • return 0;
    }
  • return 0;
    }

static int
@@ -472,46 +556,40 @@
int (*func)();
st_data_t arg;
{

  • st_table_entry *ptr, *last, *tmp;
  • st_table_entry *ptr, *tmp;
    enum st_retval retval;
  • int i;
  • for(i = 0; i < table->num_bins; i++) {
  • last = 0;
  • for(ptr = table->bins[i]; ptr != 0;) {
  •   retval = (*func)(ptr->key, ptr->record, arg);
    
  •   switch (retval) {
    
  •   case ST_CHECK:	/* check if hash is modified during iteration */
    
  •       tmp = 0;
    
  •   if (i < table->num_bins) {
    
  •       for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
    
  •   	if (tmp == ptr) break;
    
  •       }
    
  •   }
    
  •   if (!tmp) {
    
  •       /* call func with error notice */
    
  •       return 1;
    
  •   }
    
  •   /* fall through */
    
  •   case ST_CONTINUE:
    
  •   last = ptr;
    
  •   ptr = ptr->next;
    
  •   break;
    
  •   case ST_STOP:
    
  •       return 0;
    
  •   case ST_DELETE:
    
  •   tmp = ptr;
    
  •   if (last == 0) {
    
  •       table->bins[i] = ptr->next;
    
  •   }
    
  •   else {
    
  •       last->next = ptr->next;
    
  •   }
    
  •   ptr = ptr->next;
    
  •   free(tmp);
    
  •   table->num_entries--;
    
  •   }
    
  • }
  • ptr = table->head;
  • while (ptr) {
  •    retval = (*func)(ptr->key, ptr->record, arg);
    
  •    switch (retval) {
    
  •    case ST_CHECK: /* check if hash is modified during iteration
    

*/

  •        tmp = table->head;
    
  •        while (tmp) {
    
  •            if (tmp == ptr) break; /* current element is still
    

there - good */

  •            tmp = tmp->gnext;
    
  •        }
    
  •        if (! tmp) {
    
  •       /* call func with error notice */
    
  •            return 1;
    
  •        }
    
  •        /* fall through */
    
  •    case ST_CONTINUE:
    
  •        ptr = ptr->gnext;
    
  •        break;
    
  •    case ST_STOP:
    
  •        return 0;
    
  •    case ST_DELETE:
    
  •   tmp = ptr;
    
  •   ptr = ptr->gnext;
    
  •   st_unlink_entry(table, tmp);
    
  •        free(tmp);
    
  •        break;
    
  •    default:
    
  •   /* should never happen! */
    
  •        printf("uhh? retval=%d\n", retval);
    
  •        exit(-10);
    
  •    }
    
    }
    return 0;
    }
    diff -ru ruby-1.8.4/st.h ruby-thies/st.h
    — ruby-1.8.4/st.h 2005-02-08 01:50:33.000000000 +0100
    +++ ruby-thies/st.h 2006-08-12 17:51:50.000000000 +0200
    @@ -21,6 +21,8 @@
    int num_bins;
    int num_entries;
    struct st_table_entry **bins;
  • struct st_table_entry *head;
  • struct st_table_entry *tail;
    };

#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
@@ -53,6 +55,7 @@
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
+void st_reorder _((st_table *, st_data_t *));

#define ST_NUMCMP ((int ()()) 0)
#define ST_NUMHASH ((int (
)()) -2)

“T” == Thies C Arntzen [email protected] writes:

T> i really miss beeing able to retain order in a Hash. i know that this
T> has been discussed in the past i also understand that i can always
T> write my own OrderedHash implemenation in user-space. -but- as a) i
T> love to poke around b) userspace was too slow for some of my problems
T> -and- c) it’s always best to to let “code speak” i decided to
“scratch
T> my itch” and wrote a patch (against 1.8.4) that will:

Why you don’t first write a C extension, rather than modifying ruby ?

Guy Decoux

On Aug 12, 2006, at 9:55 AM, Thies C. Arntzen wrote:

Hi there -

beeing new to ruby (thru rails) and coming from a strong php
background

  • remember the order in which elements have been added to a Hash (at
    the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
    per hash-table)
  • use this order for iterating (st_foreach will use this order)
  • implement a call Hash.order! allows the user to change the internal
    order;-)

Nice :slight_smile: The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

-Brian

Brian McCallister schrieb:

the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
per hash-table)

  • use this order for iterating (st_foreach will use this order)
  • implement a call Hash.order! allows the user to change the internal
    order;-)

Nice :slight_smile: The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

i wasn’t aware of that list;-) subscribed…

How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

i really believe that it needs to be a 1st class citizen - the reason
it’s done in C is because userland imposes too much overhead (i
believe) and converting forward<->back to the normal Hash would be
slow, memory intensive and ordering information would get lost on the
way…

also the amount of code added to the Hash class is ~20 lines of C-code

  • this code adds zero overhead. the bigger chunk of the code was added
    to the st.c which is the underlying library for handling hash-tables -
    and duplicating > 600 lines of code to add ~60 lines didn’t seem very
    cool to me;-)

best, thies

Hi –

On Sun, 13 Aug 2006, Thies C. Arntzen wrote:

also the amount of code added to the Hash class is ~20 lines of C-code

  • this code adds zero overhead. the bigger chunk of the code was added
    to the st.c which is the underlying library for handling hash-tables -
    and duplicating > 600 lines of code to add ~60 lines didn’t seem very
    cool to me;-)

The problem you likely face, though, is that no one will use it (which
may or may not matter to you :slight_smile: You’re requiring that people use a
patched Ruby, and if anyone writes an application that uses it, then
they require that everyone who runs the application use a patched
Ruby, and so on.

It’s much more likely – probably literally thousands of times more
likely – that people will try out something distributed as an
extension than that they’ll patch the Ruby source. And C extensions
are very fast, once loaded.

David

Robert K. wrote:

the way Java’s TreeSet works - and IMHO it’s superior vs. having to call
order! manually. etc.

All good points. I still think it would be nice if such a class were
built-in to Ruby with literal:

[ :a => 1, :b => 2 ]

Maybe as a red-black tree class.

T.

Thies C. Arntzen wrote:

  • remember the order in which elements have been added to a Hash (at
    How difficult would it be to do an OrderedHash data structure as a C
    to the st.c which is the underlying library for handling hash-tables -
    and duplicating > 600 lines of code to add ~60 lines didn’t seem very
    cool to me;-)

Adding to David’s comments: you’re changing semantics of one of the core
classes which is in itself a dangerous thing. Plus, you increase the
memory footprint of a hash which is IMHO a very bad idea for such a
widely used data structure (and some of these hashes grow huge). So, if
you want to patch ruby core, make it at least a subclass of Hash so
people can choose.

Then of course there’s a whole bunch of other things that one should
consider. Is the name properly chosen? Do we have to provide a means
for the user to define the standard order on creation? This is e.g.
the way Java’s TreeSet works - and IMHO it’s superior vs. having to call
order! manually. etc.

Kind regards

robert

Brian McCallister wrote:

  • remember the order in which elements have been added to a Hash (at
    How difficult would it be to do an OrderedHash data structure as a C
    extension instead of in Ruby?

Well, if it were another class, then you would lose the order
information before you ever got into that class, right?

hash = {a=>1, b=>2, c=>3}

ordered = OrderedHash.new(hash) # Oops – too late

Hal

ts wrote:

Why you don’t first write a C extension, rather than modifying ruby ?

Interesting question… do you mean a C extension implementing a
different Hash class or what?

Hal

Hi –

On Sun, 13 Aug 2006, Hal F. wrote:

extension than that they’ll patch the Ruby source. And C extensions
are very fast, once loaded.

My impression was that this was for play and for testing
prior to possibly being folded into Ruby.

Like I said – he may not care whether anyone uses it (in this phase
at least).

Also, I don’t yet see how this could be implemented as a
C extension. See my previous email.

Could it be implemented as an extension that redefines Hash?

David

[email protected] wrote:

are very fast, once loaded.
My impression was that this was for play and for testing
prior to possibly being folded into Ruby.

Also, I don’t yet see how this could be implemented as a
C extension. See my previous email.

Hal

[email protected] wrote:

Could it be implemented as an extension that redefines Hash?

That’s a very interesting question. I’d like to think so.

I’d assume that the parser at least retains the oreder
in which it sees the elements of the literal (long enough
to pass them to the “guts” of Hash).

Hal

On Sun, Aug 13, 2006 at 09:08:14AM +0900, Hal F. wrote:

[email protected] wrote:

Could it be implemented as an extension that redefines Hash?

That’s a very interesting question. I’d like to think so.

I’d assume that the parser at least retains the oreder
in which it sees the elements of the literal (long enough
to pass them to the “guts” of Hash).

I don’t think this is easily implementable as an extension without
patching
the interpreter.

The order of the associations in the literal is indeed preserved, since
they’re put in a linked list inside the NODE_HASH. However, the latter
is
evaluated in rb_eval, so the OP would need to patch that, using at least
something like

(untested)

— eval.c.orig 2006-08-13 02:28:59.000000000 +0200
+++ eval.c 2006-08-13 02:31:39.000000000 +0200
@@ -3750,22 +3750,23 @@
break;

   case NODE_HASH:
    {
        NODE *list;
  •       VALUE hash = rb_hash_new();
    
  •       VALUE hash = rb_funcall(rb_const_get_at(rb_cObject, 
    

rb_intern(“Hash”)),

  •                               rb_intern("new"), 0, 0);
          VALUE key, val;
    
          list = node->nd_head;
          while (list) {
              key = rb_eval(self, list->nd_head);
              list = list->nd_next;
              if (list == 0)
                  rb_bug("odd number list for Hash");
              val = rb_eval(self, list->nd_head);
              list = list->nd_next;
    
  •           rb_hash_aset(hash, key, val);
    
  •           rb_funcall(hash, rb_intern("[]="), 2, key, val);
          }
          result = hash;
      }
      break;
    

in order to turn literals into ordered hashes if Hash is replaced.

Even if you cache the IDs, the speed impact will probably be
significant;
it’s too late to time this now.

On 8/12/06, Hal F. [email protected] wrote:

Well, if it were another class, then you would lose the order
information before you ever got into that class, right?

hash = {a=>1, b=>2, c=>3}

ordered = OrderedHash.new(hash) # Oops – too late

The case of initializing with a hash is problematical, but

OrderedHash[a, 1, b, 2, c, 3]

could work if you want control the order on initialization.

And OrderedHash.new(hash) or
OrderedHash[hash]

could be defined to set the order to hash.keys


Rick DeNatale

http://talklikeaduck.denhaven2.com/

Matt T. wrote:

This is a very interesting hack, though I don’t really think it is a
good one to implement. Maybe as an extension (which would be really
cool, though I have a hard time seeing that working), but not as
default.

I tentatively disagree…

I believe that I’ve heard that one of the reasons why it’s not sorted
any particular way is due to a performance increase in the randomness
of it. I can’t verify that, as that’s outside of my expertise, but I
don’t wish to have Ruby any slower.

Not really, as I understand it. The reason the order gets scrambled
is because it’s implemented internally as a “true” hash (as in
“hashing function”).

I believe that Nobu once made a patch like this and (as I recall)
reported no significant speed hit. (Some small memory hit, of course.)

Nobu? What can you tell us?

But, really, my biggest reasoning is that order doesn’t matter to a
dictionary/hash. What matters, above all else, is that you have a
collection of keys that refer to their associated values. Sure, some
kind of ordering would be good for some occasions, but, honestly, I
don’t need it often, if ever. When I need some kind of ordered set, I
use an Array. I ask myself: if I really need this ordered, am I really
working with a hash/dictionary, or am I working with an array?

You have a good point. But the thing is, the Hash class has two
things I like (two things that make me like hashes). It consists of
pairs of items (associations), not just single items. (Of course, you
could view an array as associating integers with objects.) Also the
keys don’t have to be integers. And thirdly, it has a convenient literal
notation (as arrays do also).

Lastly, though I can’t see using it (any time soon, at least), I do
understand that it has valid uses. I don’t see Rick’s example syntax
to be too far-fetched an alternative to simply using a hash literal
(which I don’t use too often, anyways), as uncomfortable as it may be.
But, then again, it’s an Array, isn’t it? Hmm. But, really, I’m sure
you could figure something out that works well.

Just don’t mess with my hash… ;D

I once had the idea of a separate OrderedHash class with a literal
notation that Ruby would recognize:

 unord = {a=>1, b=>2, c=>3}
 ord   = [a=>1, b=>2, c=>3]

It even looks array-like. Trouble is, Ruby already recognizes such a
notation as being an array with a single element which is a hash.

I am more likely to ask for changes outside the parser than inside it.

Hal

Thies C. Arntzen schrieb:

hi back - replying to myself…

  • it cannot be implemented “as-cool” in a C-Extension cause it would
    not integrate “far” enough into ruby to have a real substantial value
    IMHO. (using Mauricios patch to eval would alow to implement it fully
    as an extension, but it would slow down any Hash creation in to
    engine - so it’s not an option)
  • it was not meant as a patch to ruby for people to use - i would much
    rather like to have this feature a default in upcoming versions of
    ruby. so, i posted it for discussion…
  • sure, changing the core-classes is a delicate thing, but -heck- why
    not? a) improvment most often comes with change, and b) i have have
    hacked on laguages-cores (PHP) before;-)
  • i would not go as far as proposing to possibility to supply an
    order-function to the hash so that it’s always ordered in a
    user-defined way - if you have to go that far you are better off in
    user-space!
  • using rb-trees has to be less efficient for simple lookups (O(?)
    against O(1))

regarding possible memory-usage and speed tradeoffs:

  • the memory footprint increase is tiny and we should be able to ignore
    that. rubys implemenation is way more memory efficient that eg PHPs and
    noone has complaint there yet.
  • the same goes for speed. the lookup-speed is untoched, update is
    untouched, insert is O(1) slower (a few more pointers), deletion is a
    tad slower (but that could be fixed). sidenote: i even think we could
    squueze a tiny bit more out of our current hash-table implementation.
    i don’t believe that you will be able to find any benchmark to prove
    this to slow down or bloat ruby in any way…

re, tc

Wow, you worked on PHP? You brave soul! :slight_smile: I’ve made some suggestions
to that crowd, but they are very stone-cold and concretely resolute in
staying with how things are. They move slow: a whole version just for
something that should’ve been disabled long ago (register_globals) and
Unicode support (yay).

But, back to the topic, I’m not a fan of increasing size, even if
we’re talking ‘neglible’ amounts. 2n+2 is a large increase, relatively
speaking.

To me, though, for something that is not needed for the majority of
the uses of a normal hash, I’d take the speed and memory over the
ordering capability. But, then again, that’s just me.

M.T.

This is a very interesting hack, though I don’t really think it is a
good one to implement. Maybe as an extension (which would be really
cool, though I have a hard time seeing that working), but not as
default.

I believe that I’ve heard that one of the reasons why it’s not sorted
any particular way is due to a performance increase in the randomness
of it. I can’t verify that, as that’s outside of my expertise, but I
don’t wish to have Ruby any slower.

But, really, my biggest reasoning is that order doesn’t matter to a
dictionary/hash. What matters, above all else, is that you have a
collection of keys that refer to their associated values. Sure, some
kind of ordering would be good for some occasions, but, honestly, I
don’t need it often, if ever. When I need some kind of ordered set, I
use an Array. I ask myself: if I really need this ordered, am I really
working with a hash/dictionary, or am I working with an array?

Lastly, though I can’t see using it (any time soon, at least), I do
understand that it has valid uses. I don’t see Rick’s example syntax
to be too far-fetched an alternative to simply using a hash literal
(which I don’t use too often, anyways), as uncomfortable as it may be.
But, then again, it’s an Array, isn’t it? Hmm. But, really, I’m sure
you could figure something out that works well.

Just don’t mess with my hash… ;D

M.T.

On Sun, Aug 13, 2006 at 05:35:04PM +0900, Thies C. Arntzen wrote:

  • it cannot be implemented “as-cool” in a C-Extension cause it would
    not integrate “far” enough into ruby to have a real substantial value
    IMHO. (using Mauricios patch to eval would alow to implement it fully
    as an extension, but it would slow down any Hash creation in to

Not Hash.new :wink:

engine - so it’s not an option)

Right. Here are some times just for fun; the overhead is around 50%:

$ cat $MESS/time-hash.rb

def time(exec, code = “1000000.times{ {1,1,1,1,1,1} }”)
/usr/bin/time #{exec} -e "#{code}" 2>&1 [/(\d.\d+)user/, 1].to_f
end

RUNS = 5
base = time(“./ruby”, “1000000.times{}”)
a = (0…RUNS).map{ time(“./ruby.orig”) - base }
b = (0…RUNS).map{ time(“./ruby”) - base}
ma = a.inject{|s,x| s+x}/a.size
mb = b.inject{|s,x| s+x}/b.size
sa = Math.sqrt(a.inject(0){|s,x| s+x2}/a.size - ma2)
sb = Math.sqrt(b.inject(0){|s,x| s+x2}/b.size - mb2)
puts “orig %4.2f <%4.2f>” % [ma, sa]
puts “patched %4.2f <%4.2f> (#{(100*mb/ma).round}%%)” % [mb, sb]

$ ruby $MESS/time-hash.rb
orig 2.11 <0.01>
patched 3.10 <0.06> (147%)

  • it was not meant as a patch to ruby for people to use - i would much
    rather like to have this feature a default in upcoming versions of
    ruby. so, i posted it for discussion…

Maybe this thread should be moved to ruby-core.

  • sure, changing the core-classes is a delicate thing, but -heck- why
    not? a) improvment most often comes with change, and b) i have have
    hacked on laguages-cores (PHP) before;-)
    ===
    You could have kept that to yourself }:wink:

regarding possible memory-usage and speed tradeoffs:

  • the memory footprint increase is tiny and we should be able to ignore
    that. rubys implemenation is way more memory efficient that eg PHPs and
    noone has complaint there yet.
  • the same goes for speed. the lookup-speed is untoched, update is
    untouched, insert is O(1) slower (a few more pointers), deletion is a
    tad slower (but that could be fixed). sidenote: i even think we could
    squueze a tiny bit more out of our current hash-table implementation.
    i don’t believe that you will be able to find any benchmark to prove
    this to slow down or bloat ruby in any way…

Your patch is similar to
http://www.rubyist.net/~nobu/ruby/ordered-hash.diff
which was not applied due to concerns about the memory overhead…

This is a question that has been raised during an earlier discussion of
this issue: What should this expression return?

{:foo => 1, :bar => 2} == {:bar => 2, :foo => 1}

I don’t think there’s any way around it returning `true’, but that would
decrease the usefulness of ordered hashes. One solution would be that
Hash#== returns true in such cases while Hash#eql? returns false, just
as

2 == 2.0 => true
2.eql? 2.0 => false

Cheers,
Daniel