Latest changes from mainline fast_xs

Hi all,

Hopefully the new maintainer of Hpricot will see this because I have no
idea who to send it to now…

I originally had a version of this patch posted in _why’s Trac sometime
in 2008 until he took Trac down. I’ve tried emailing him directly on
several occasions since and never got a response, last was February
2009. Come to think of it, the only time I ever got any response from
him was my initial email to him about fast_xs back in October 2007.

So here it is, I’ve rebased my patch against
2c961095954d5aaa5c046f4c773c62c3d5902ef4 on
git://github.com/whymirror/hpricot.git

Also I have an entire repo up on git://git.bogomips.org/hpricot and
viewable from http://git.bogomips.org/cgit/hpricot.git

From 636e3f453b2bbc0c7486b91eda452fe4767e4bbc Mon Sep 17 00:00:00 2001
From: Eric W. [email protected]
Date: Sat, 7 Feb 2009 21:31:15 -0800
Subject: [PATCH] * ext/fast_xs: latest changes from mainline fast_xs

  • alloca() dependency removed. An extra memory allocation and
    memory copy is saved by writing directly to the string object
    returned by rb_str_new(0, len) (both the Ruby 1.8.6 and 1.9
    code has this usage in it, so it should be safe :slight_smile:
    This also allows fast_xs to work on strings larger than
    the stack size.
  • general readability and cleanups
  • fast_xs works with Ruby 1.9
  • Don’t rely on ruby_digitmap being a valid symbol

ext/fast_xs/fast_xs.c | 101
+++++++++++++++++++++++±------------------------
1 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/ext/fast_xs/fast_xs.c b/ext/fast_xs/fast_xs.c
index 4a30a6c…04b175f 100644
— a/ext/fast_xs/fast_xs.c
+++ b/ext/fast_xs/fast_xs.c
@@ -1,8 +1,5 @@
-#define VERSION “0.1”

#include <ruby.h>
#include <assert.h>
-/* #include <stdio.h> */

#ifndef RARRAY_LEN
#define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -72,11 +69,6 @@ static const int cp_1252[] = {
n = cp_1252[n - 128];
} while(0)

-#define return_const_len(x) do { \

  • memcpy(buf, x, sizeof(x) - 1); \
  • return (sizeof(x) - 1);
    -} while (0)

static inline size_t bytes_for(int n)
{
if (n < 1000)
@@ -91,18 +83,24 @@ static inline size_t bytes_for(int n)
return sizeof(“�”) - 1;
}

-static long escape(char *buf, int n)
+static size_t escape(char *buf, int n)
{
+
+#define return_const_len(x) do { \

  • memcpy(buf, x, sizeof(x) - 1); \
  • return (sizeof(x) - 1);
    +} while (0)
  • /* handle ASCII first */
    if (likely(n < 128)) {
  • if (likely(n >= 0x20 || n == 0x9 || n == 0xA || n == 0xD)) {
  •  if (unlikely(n == 34))
    
  • if (likely(n >= 0x20 || n == ‘\t’ || n == ‘\n’ || n == ‘\r’)) {
  •  if (unlikely(n == '"'))
       return_const_len("&quot;");
    
  •  if (unlikely(n == 38))
    
  •  if (unlikely(n == '&'))
       return_const_len("&amp;");
    
  •  if (unlikely(n == 60))
    
  •  if (unlikely(n == '<'))
       return_const_len("&lt;");
    
  •  if (unlikely(n == 62))
    
  •  if (unlikely(n == '>'))
       return_const_len("&gt;");
     buf[0] = (char)n;
     return 1;
    

@@ -112,16 +110,18 @@ static long escape(char *buf, int n)
return 1;
}

+#undef return_const_len
+
CP_1252_ESCAPE(n);

if (VALID_VALUE(n)) {
/* return snprintf(buf, sizeof(“�”), “&#%i;”, n); */

  • RUBY_EXTERN const char ruby_digitmap[];
  • int rv = 3; /* &#; */
  • static const char digitmap[] = “0123456789”;
  • size_t rv = sizeof(“&#;”) - 1;
    buf += bytes_for(n);
    *–buf = ‘;’;
    do {
  •  *--buf = ruby_digitmap[(int)(n % 10)];
    
  •  *--buf = digitmap[(int)(n % 10)];
     ++rv;
    
    } while (n /= 10);
    *–buf = ‘#’;
    @@ -132,27 +132,6 @@ static long escape(char *buf, int n)
    return 1;
    }

-#undef return_const_len

-static long escaped_len(int n)
-{

  • if (likely(n < 128)) {
  • if (unlikely(n == 34))
  •  return (sizeof("&quot;") - 1);
    
  • if (unlikely(n == 38))
  •  return (sizeof("&amp;") - 1);
    
  • if (unlikely(n == 60 || n == 62))
  •  return (sizeof("&gt;") - 1);
    
  • return 1;
  • }
  • CP_1252_ESCAPE(n);
  • if (VALID_VALUE(n))
  • return bytes_for(n);
  • return 1;
    -}

static VALUE unpack_utf8(VALUE self)
{
return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -163,28 +142,48 @@ static VALUE unpack_uchar(VALUE self)
return rb_funcall(self, unpack_id, 1, C_fmt);
}

-VALUE fast_xs(VALUE self)
+/*

    • escapes strings for XML
    • The double-quote (") character is translated to “"”
  • */
    +static VALUE fast_xs(VALUE self)
    {
    long i;
  • struct RArray *array;
  • char *s, *c;
  • long s_len = 0;
  • VALUE array;
  • char *c;
  • size_t s_len;
    VALUE *tmp;
  • VALUE rv;
  • array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
  • for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
  •   --i >= 0;
    
  •   tmp++) {
    
  • int n = NUM2INT(*tmp);
  • if (likely(n < 128)) {
  •  if (unlikely(n == '"'))
    
  •    s_len += (sizeof("&quot;") - 2);
    
  •  if (unlikely(n == '&'))
    
  •    s_len += (sizeof("&amp;") - 2);
    
  •  if (unlikely(n == '>' || n == '<'))
    
  •    s_len += (sizeof("&gt;") - 2);
    
  •  continue;
    
  • }
  • array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
  • CP_1252_ESCAPE(n);
  • tmp = RARRAY_PTR(array);
  • for (i = RARRAY_LEN(array); --i >= 0; tmp++)
  • s_len += escaped_len(NUM2INT(*tmp));
  • if (VALID_VALUE(n))
  •  s_len += bytes_for(n) - 1;
    
  • }
  • c = s = alloca(s_len + 1);
  • rv = rb_str_new(NULL, s_len);
  • c = RSTRING_PTR(rv);
  • tmp = RARRAY_PTR(array);
  • for (i = RARRAY_LEN(array); --i >= 0; tmp++)
  • for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
    c += escape(c, NUM2INT(*tmp));
  • *c = ‘\0’;
  • return rb_str_new(s, s_len);
  • return rv;
    }

void Init_fast_xs(void)