# Optimize empty hash space

# — include/ruby/intern.h (e\$B%j%S%8%g%se(B 13307) +++ include/ruby/intern.h (e\$B:n6H%3%T!<e(B) @@ -326,6 +326,7 @@ VALUE rb_hash_aset(VALUE, VALUE, VALUE); VALUE rb_hash_delete_if(VALUE); VALUE rb_hash_delete(VALUE,VALUE); +struct st_table rb_hash_tbl(VALUE); int rb_path_check(const char); int rb_env_path_tainted(void); /* io.c */ Index: include/ruby/ruby.h

— include/ruby/ruby.h (e\$B%j%S%8%g%se(B 13307)
+++ include/ruby/ruby.h (e\$B:n6H%3%T!<e(B)
@@ -478,10 +478,15 @@

struct RHash {
struct RBasic basic;

• struct st_table *tbl;
• struct st_table *ntbl;
int iter_lev;
VALUE ifnone;
};
+#define RHASH_TBL(h) rb_hash_tbl(h)
+#define RHASH_ITER_LEV(h) RHASH(h)->iter_lev
+#define RHASH_IFNONE(h) RHASH(h)->ifnone
+#define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries :

+#define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0)

# struct RFile { struct RBasic basic; Index: array.c

— array.c (e\$B%j%S%8%g%se(B 13307)
+++ array.c (e\$B:n6H%3%T!<e(B)
@@ -2525,7 +2525,7 @@
ary3 = rb_ary_new();

`````` for (i=0; i<RARRAY_LEN(ary1); i++) {
``````
• if (st_lookup(RHASH(hash)->tbl, RARRAY_PTR(ary1)[i], 0)) continue;
• if (st_lookup(RHASH_TBL(hash), RARRAY_PTR(ary1)[i], 0)) continue;
rb_ary_push(ary3, rb_ary_elt(ary1, i));
}
return ary3;
@@ -2553,9 +2553,12 @@
RARRAY_LEN(ary1) : RARRAY_LEN(ary2));
hash = ary_make_hash(ary2, 0);

• if (RHASH_EMPTY_P(hash))

• ``````   return ary3;
``````
• for (i=0; i<RARRAY_LEN(ary1); i++) {
v = vv = rb_ary_elt(ary1, i);

• if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
• if (st_delete(RHASH_TBL(hash), (st_data_t*)&vv, 0)) {
rb_ary_push(ary3, v);
}
}
@@ -2587,13 +2590,13 @@

for (i=0; i<RARRAY_LEN(ary1); i++) {
v = vv = rb_ary_elt(ary1, i);

• if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
• if (st_delete(RHASH_TBL(hash), (st_data_t*)&vv, 0)) {
rb_ary_push(ary3, v);
}
}
for (i=0; i<RARRAY_LEN(ary2); i++) {
v = vv = rb_ary_elt(ary2, i);
• if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
• if (st_delete(RHASH_TBL(hash), (st_data_t*)&vv, 0)) {
rb_ary_push(ary3, v);
}
}
@@ -2623,12 +2626,12 @@
ary_iter_check(ary);
hash = ary_make_hash(ary, 0);
• if (RARRAY_LEN(ary) == RHASH(hash)->tbl->num_entries) {
• if (RARRAY_LEN(ary) == RHASH_SIZE(hash)) {
return Qnil;
}
for (i=j=0; i<RARRAY_LEN(ary); i++) {
v = vv = rb_ary_elt(ary, i);
• if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
• if (st_delete(RHASH_TBL(hash), (st_data_t*)&vv, 0)) {
rb_ary_store(ary, j++, v);
}
}
Index: insns.def
===================================================================
— insns.def (e\$B%j%S%8%g%se(B 13307)
+++ insns.def (e\$B:n6H%3%T!<e(B)
@@ -1312,7 +1312,7 @@
}
else {
VALUE val;
• if (st_lookup(RHASH(hash)->tbl, key, &val)) {
• if (st_lookup(RHASH_TBL(hash), key, &val)) {
JUMP(FIX2INT(val));
}
else {
@@ -1958,7 +1958,7 @@
val = LONG2NUM(RARRAY_LEN(recv));
}
else if (HEAP_CLASS_OF(recv) == rb_cHash) {
• `````` val = INT2FIX(RHASH(recv)->tbl->num_entries);
``````
• `````` val = INT2FIX(RHASH_TBL(recv)->num_entries);
``````

}
else {
goto INSN_LABEL(normal_dispatch);
Index: gc.c
===================================================================
— gc.c (e\$B%j%S%8%g%se(B 13307)
+++ gc.c (e\$B:n6H%3%T!<e(B)
@@ -1032,7 +1032,7 @@
break;

`````` case T_HASH:
``````
• mark_hash(obj->as.hash.tbl, lev);
• mark_hash(obj->as.hash.ntbl, lev);
ptr = obj->as.hash.ifnone;
goto again;

@@ -1267,8 +1267,8 @@
rb_ary_free(obj);
break;
case T_HASH:

• if (RANY(obj)->as.hash.tbl) {
• `````` st_free_table(RANY(obj)->as.hash.tbl);
``````
• if (RANY(obj)->as.hash.ntbl) {
• `````` st_free_table(RANY(obj)->as.hash.ntbl);
``````
}
break;
case T_REGEXP:
Index: ext/json/ext/generator/generator.c
===================================================================
— ext/json/ext/generator/generator.c (e\$B%j%S%8%g%se(B 13307)
+++ ext/json/ext/generator/generator.c (e\$B:n6H%3%T!<e(B)
@@ -86,7 +86,7 @@
}

inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate,
VALUE Vdepth) {

• long depth, len = RHASH(self)->tbl->num_entries;
• long depth, len = RHASH_SIZE(self);
VALUE result;
GET_STATE(Vstate);

@@ -140,7 +140,7 @@
rb_scan_args(argc, argv, “02”, &Vstate, &Vdepth);
depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
if (NIL_P(Vstate)) {

• ``````   long len = RHASH(self)->tbl->num_entries;
``````
• ``````   long len = RHASH_SIZE(self);
result = rb_str_buf_new(len);
rb_str_buf_cat2(result, "{");
rb_hash_foreach(self, hash_to_json_i, result);
``````

@@ -480,7 +480,7 @@
state->object_nl = tmp;
}
tmp = ID2SYM(i_check_circular);

• if (st_lookup(RHASH(opts)->tbl, tmp, 0)) {
• if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
tmp = rb_hash_aref(opts, ID2SYM(i_check_circular));
state->check_circular = RTEST(tmp);
} else {
@@ -488,7 +488,7 @@
}
tmp = ID2SYM(i_max_nesting);
state->max_nesting = 19;
• if (st_lookup(RHASH(opts)->tbl, tmp, 0)) {
• if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
VALUE max_nesting = rb_hash_aref(opts, tmp);
if (RTEST(max_nesting)) {
Check_Type(max_nesting, T_FIXNUM);
Index: ext/json/ext/parser/parser.c
===================================================================
— ext/json/ext/parser/parser.c (e\$B%j%S%8%g%se(B 13307)
+++ ext/json/ext/parser/parser.c (e\$B:n6H%3%T!<e(B)
@@ -1470,7 +1470,7 @@
rb_raise(rb_eArgError, “opts needs to be like a hash”);
} else {
VALUE tmp = ID2SYM(i_max_nesting);
• ``````       if (st_lookup(RHASH(opts)->tbl, tmp, 0)) {
``````
• ``````       if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
VALUE max_nesting = rb_hash_aref(opts, tmp);
if (RTEST(max_nesting)) {
Check_Type(max_nesting, T_FIXNUM);
``````

@@ -1480,7 +1480,7 @@
}
}
tmp = ID2SYM(i_allow_nan);

• ``````       if (st_lookup(RHASH(opts)->tbl, tmp, 0)) {
``````
• ``````       if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
VALUE allow_nan = rb_hash_aref(opts, tmp);
if (RTEST(allow_nan)) json->allow_nan = 1;
}
``````

# Index: ext/syck/rubyext.c

— ext/syck/rubyext.c (e\$B%j%S%8%g%se(B 13307)
+++ ext/syck/rubyext.c (e\$B:n6H%3%T!<e(B)
@@ -621,7 +621,7 @@
if ( bonus->taint) OBJ_TAINT( obj );
if ( bonus->proc != 0 ) rb_funcall(bonus->proc, s_call, 1, obj);

• rb_hash_aset(bonus->data,
INT2FIX(RHASH(bonus->data)->tbl->num_entries), obj);
• rb_hash_aset(bonus->data, INT2FIX(RHASH_SIZE(bonus->data)), obj);
return obj;
}

# Index: ext/iconv/iconv.c

— ext/iconv/iconv.c (e\$B%j%S%8%g%se(B 13307)
+++ ext/iconv/iconv.c (e\$B:n6H%3%T!<e(B)
@@ -138,10 +138,10 @@
{
VALUE val = *code;

• if (RHASH(charset_map)->tbl &&
RHASH(charset_map)->tbl->num_entries) {
• if (RHASH_SIZE(charset_map)) {
VALUE key = rb_funcall2(val, rb_intern(“downcase”), 0, 0);
StringValuePtr(key);
• if (st_lookup(RHASH(charset_map)->tbl, key, &val)) {
• if (st_lookup(RHASH_TBL(charset_map), key, &val)) {
*code = val;
}
}
Index: hash.c
===================================================================
— hash.c (e\$B%j%S%8%g%se(B 13307)
+++ hash.c (e\$B:n6H%3%T!<e(B)
@@ -147,10 +147,10 @@
int status;
st_table *tbl;
• tbl = RHASH(arg->hash)->tbl;
• tbl = RHASH(arg->hash)->ntbl;
if (key == Qundef) return ST_CONTINUE;
status = (*arg->func)(key, value, arg->arg);
• if (RHASH(arg->hash)->tbl != tbl) {
• if (RHASH(arg->hash)->ntbl != tbl) {
rb_raise(rb_eRuntimeError, “rehash occurred during iteration”);
}
switch (status) {
@@ -172,7 +172,7 @@

if (RHASH(hash)->iter_lev == 0) {
if (FL_TEST(hash, HASH_DELETED)) {

• `````` st_cleanup_safe(RHASH(hash)->tbl, Qundef);
``````
• `````` st_cleanup_safe(RHASH(hash)->ntbl, Qundef);
FL_UNSET(hash, HASH_DELETED);
``````
}
}
@@ -182,7 +182,7 @@
static VALUE
hash_foreach_call(struct hash_foreach_arg *arg)
{
• if (st_foreach(RHASH(arg->hash)->tbl, hash_foreach_iter,
(st_data_t)arg)) {
• if (st_foreach(RHASH(arg->hash)->ntbl, hash_foreach_iter,
(st_data_t)arg)) {
rb_raise(rb_eRuntimeError, “hash modified during iteration”);
}
return Qnil;
@@ -193,6 +193,8 @@
{
struct hash_foreach_arg arg;

• if (!RHASH(hash)->ntbl)

• ``````   return;
``````

RHASH(hash)->iter_lev++;
arg.hash = hash;
arg.func = (rb_foreach_func *)func;
@@ -216,7 +218,7 @@
{
VALUE hash = hash_alloc0(klass);

• RHASH(hash)->tbl = st_init_table(&objhash);
• RHASH(hash)->ntbl = 0;

return hash;
}
@@ -228,14 +230,29 @@
}

static void
-rb_hash_modify(VALUE hash)
+rb_hash_modify_check(VALUE hash)
{

• if (!RHASH(hash)->tbl) rb_raise(rb_eTypeError, “uninitialized
Hash”);
if (OBJ_FROZEN(hash)) rb_error_frozen(“hash”);
if (!OBJ_TAINTED(hash) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, “Insecure: can’t modify hash”);
}

+struct st_table *
+rb_hash_tbl(VALUE hash)
+{

• if (!RHASH(hash)->ntbl) {
• ``````   RHASH(hash)->ntbl = st_init_table(&objhash);
``````
• }
• return RHASH(hash)->ntbl;
+}

+static void
+rb_hash_modify(VALUE hash)
+{

• rb_hash_modify_check(hash);
• rb_hash_tbl(hash);
+}

/*

• call-seq:
• ``````Hash.new                          => hash
``````

@@ -313,7 +330,9 @@

`````` if (argc == 1 && TYPE(argv[0]) == T_HASH) {
``````

hash = hash_alloc0(klass);

• RHASH(hash)->tbl = st_copy(RHASH(argv[0])->tbl);
• ``````   if (RHASH(argv[0])->ntbl) {
``````
• ``````       RHASH(hash)->ntbl = st_copy(RHASH(argv[0])->ntbl);
``````
• ``````   }
``````

return hash;
}
@@ -388,11 +407,13 @@
if (RHASH(hash)->iter_lev > 0) {
rb_raise(rb_eRuntimeError, “rehash during iteration”);
}

• rb_hash_modify(hash);
• tbl = st_init_table_with_size(RHASH(hash)->tbl->type,
RHASH(hash)->tbl->num_entries);
• rb_hash_modify_check(hash);
• if (!RHASH(hash)->ntbl)
• ``````   return hash;
``````
• tbl = st_init_table_with_size(RHASH(hash)->ntbl->type,
RHASH(hash)->ntbl->num_entries);
rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
• st_free_table(RHASH(hash)->tbl);
• RHASH(hash)->tbl = tbl;
• st_free_table(RHASH(hash)->ntbl);

• RHASH(hash)->ntbl = tbl;

return hash;
}
@@ -416,7 +437,7 @@
{
VALUE val;

• if (!st_lookup(RHASH(hash)->tbl, key, &val)) {
• if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val))
{
return rb_funcall(hash, id_default, 1, key);
}
return val;
@@ -427,7 +448,7 @@
{
VALUE val;
• if (!st_lookup(RHASH(hash)->tbl, key, &val)) {
• if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val))
{
return Qnil; /* without Hash#default */
}
return val;
@@ -475,7 +496,7 @@
if (block_given && argc == 2) {
rb_warn(“block supersedes default value argument”);
}
• if (!st_lookup(RHASH(hash)->tbl, key, &val)) {
• if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val))
{
if (block_given) return rb_yield(key);
if (argc == 1) {
@@ -620,13 +641,15 @@
{
st_data_t ktmp = (st_data_t)key, val;

• if (!RHASH(hash)->ntbl)

• ``````   return Qundef;
``````

if (RHASH(hash)->iter_lev > 0) {

• if (st_delete_safe(RHASH(hash)->tbl, &ktmp, &val, Qundef)) {
• if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, Qundef)) {
FL_SET(hash, HASH_DELETED);
return (VALUE)val;
}
}
• else if (st_delete(RHASH(hash)->tbl, &ktmp, &val))
• else if (st_delete(RHASH(hash)->ntbl, &ktmp, &val))
return (VALUE)val;
return Qundef;
}
@@ -765,9 +788,12 @@
VALUE
rb_hash_reject_bang(VALUE hash)
{
• int n = RHASH(hash)->tbl->num_entries;
• int n;
• if (!RHASH(hash)->ntbl)
• ``````   return Qnil;
``````
• n = RHASH(hash)->ntbl->num_entries;
rb_hash_delete_if(hash);
• if (n == RHASH(hash)->tbl->num_entries) return Qnil;
• if (n == RHASH(hash)->ntbl->num_entries) return Qnil;
return hash;
}

@@ -861,12 +887,14 @@
static VALUE
rb_hash_clear(VALUE hash)
{

• rb_hash_modify(hash);
• if (RHASH(hash)->tbl->num_entries > 0) {
• rb_hash_modify_check(hash);
• if (!RHASH(hash)->ntbl)
• ``````   return hash;
``````
• if (RHASH(hash)->ntbl->num_entries > 0) {
if (RHASH(hash)->iter_lev > 0)
rb_hash_foreach(hash, clear_i, 0);
else
• `````` st_clear(RHASH(hash)->tbl);
``````
• `````` st_clear(RHASH(hash)->ntbl);
``````

}

return hash;
@@ -894,11 +922,11 @@
rb_hash_aset(VALUE hash, VALUE key, VALUE val)
{
rb_hash_modify(hash);

• if (TYPE(key) != T_STRING || st_lookup(RHASH(hash)->tbl, key, 0)) {
• st_insert(RHASH(hash)->tbl, key, val);
• if (TYPE(key) != T_STRING || st_lookup(RHASH(hash)->ntbl, key, 0))
{
• st_insert(RHASH(hash)->ntbl, key, val);
}
else {
}
return val;
}
@@ -959,7 +987,9 @@
static VALUE
rb_hash_size(VALUE hash)
{
• return INT2FIX(RHASH(hash)->tbl->num_entries);
• if (!RHASH(hash)->ntbl)
• ``````   return INT2FIX(0);
``````
• return INT2FIX(RHASH(hash)->ntbl->num_entries);
}

@@ -976,7 +1006,7 @@
static VALUE
rb_hash_empty_p(VALUE hash)
{

• if (RHASH(hash)->tbl->num_entries == 0)
• if (!RHASH(hash)->ntbl || RHASH(hash)->ntbl->num_entries == 0)
return Qtrue;
return Qfalse;
}
@@ -1190,7 +1220,7 @@
static VALUE
rb_hash_inspect(VALUE hash)
{
• if (RHASH(hash)->tbl == 0 || RHASH(hash)->tbl->num_entries == 0)
• if (!RHASH(hash)->ntbl || RHASH(hash)->ntbl->num_entries == 0)
return rb_str_new2("{}");
return rb_exec_recursive(inspect_hash, hash, 0);
}
@@ -1288,7 +1318,9 @@
static VALUE
rb_hash_has_key(VALUE hash, VALUE key)
{
• if (st_lookup(RHASH(hash)->tbl, key, 0)) {
• if (!RHASH(hash)->ntbl)
• ``````   return Qfalse;
``````
• if (st_lookup(RHASH(hash)->ntbl, key, 0)) {
return Qtrue;
}
return Qfalse;
@@ -1380,15 +1412,17 @@
}
return rb_equal(hash2, hash1);
}
• if (RHASH(hash1)->tbl->num_entries !=
RHASH(hash2)->tbl->num_entries)
• if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2))
return Qfalse;
• if (!RHASH(hash1)->ntbl || !RHASH(hash2)->ntbl)
• ``````   return Qtrue;
``````

#if 0
if (!(rb_equal(RHASH(hash1)->ifnone, RHASH(hash2)->ifnone) &&
FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2,
HASH_PROC_DEFAULT)))
return Qfalse;
#endif

• data.tbl = RHASH(hash2)->tbl;
• data.tbl = RHASH(hash2)->ntbl;
data.result = Qtrue;
rb_hash_foreach(hash1, eql ? eql_i : equal_i, (st_data_t)&data);

@@ -1451,7 +1485,9 @@
if (recur) {
return LONG2FIX(0);
}

• hval = RHASH(hash)->tbl->num_entries;
• if (!RHASH(hash)->ntbl)
• ``````   return LONG2FIX(0);
``````
• hval = RHASH(hash)->ntbl->num_entries;
rb_hash_foreach(hash, hash_i, (st_data_t)&hval);
return INT2FIX(hval);
}
@@ -1695,7 +1731,7 @@
rb_hash_compare_by_id(VALUE hash)
{
rb_hash_modify(hash);
• RHASH(hash)->tbl->type = &identhash;
• RHASH(hash)->ntbl->type = &identhash;
rb_hash_rehash(hash);
return hash;
}
@@ -1712,7 +1748,9 @@
static VALUE
rb_hash_compare_by_id_p(VALUE hash)
{
• if (RHASH(hash)->tbl->type == &identhash) {
• if (!RHASH(hash)->ntbl)
• ``````   return Qfalse;
``````
• if (RHASH(hash)->ntbl->type == &identhash) {
return Qtrue;
}
return Qfalse;
Index: marshal.c
===================================================================
— marshal.c (e\$B%j%S%8%g%se(B 13307)
+++ marshal.c (e\$B:n6H%3%T!<e(B)
@@ -578,7 +578,7 @@
else {
w_byte(TYPE_HASH_DEF, arg);
}
• `````` w_long(RHASH(obj)->tbl->num_entries, arg);
``````
• `````` w_long(RHASH_SIZE(obj), arg);
rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
if (!NIL_P(RHASH(obj)->ifnone)) {
``````
w_object(RHASH(obj)->ifnone, arg, limit);
@@ -899,7 +899,7 @@
static VALUE
{
• rb_hash_aset(arg->data,
INT2FIX(RHASH(arg->data)->tbl->num_entries), v);
• rb_hash_aset(arg->data, INT2FIX(RHASH_SIZE(arg->data)), v);
if (arg->taint) OBJ_TAINT(v);
if (arg->proc) {
v = rb_funcall(arg->proc, rb_intern(“call”), 1, v);

