Ngx.re.split implementation

Hello

I was checking HttpLuaModule docs and saw “ngx.re.split” implementation
in the TODO section.

Is it already implemented?
Are there any details about this implementation (e.g. parameters,
returned data)?

Jader H. Silva


AVISO: A informao contida neste e-mail, bem como em qualquer de seus
anexos, CONFIDENCIAL e destinada ao uso exclusivo do(s) destinatrio(s)
acima referido(s), podendo conter informaes sigilosas e/ou legalmente
protegidas. Caso voc no seja o destinatrio desta mensagem, informamos
que qualquer divulgao, distribuio ou cpia deste e-mail e/ou de qualquer
de seus anexos absolutamente proibida. Solicitamos que o remetente seja
comunicado imediatamente, respondendo esta mensagem, e que o original
desta mensagem e de seus anexos, bem como toda e qualquer cpia e/ou
impresso realizada a partir destes, sejam permanentemente apagados e/ou
destrudos. Informaes adicionais sobre nossa empresa podem ser obtidas no
site http://sobre.uol.com.br/.

NOTICE: The information contained in this e-mail and any attachments
thereto is CONFIDENTIAL and is intended only for use by the recipient
named herein and may contain legally privileged and/or secret
information.
If you are not the e-mails intended recipient, you are hereby notified
that any dissemination, distribution or copy of this e-mail, and/or any
attachments thereto, is strictly prohibited. Please immediately notify
the sender replying to the above mentioned e-mail address, and
permanently delete and/or destroy the original and any copy of this
e-mail and/or its attachments, as well as any printout thereof.
Additional information about our company may be obtained through the
site UOL - Seu universo online.

Show!! As listas nos ajudam muito.

Só uma recomendacao: procure utilizar o seu email pessoal em listas de
discussao. Nao utilizamos o corporativo para nao dar indicios do q
estamos mexendo no UOL.

Abs

Sent from my iPhone

Sorry… Wrong mail.

Hello!

On Wed, Jul 11, 2012 at 12:54 PM, Jader Henrique da Silva
[email protected] wrote:

I was checking HttpLuaModule docs and saw “ngx.re.split” implementation in
the TODO section.

Is it already implemented?

Nope, otherwise I would update the TODO section accordingly :slight_smile:

Are there any details about this implementation (e.g. parameters, returned
data)?

Not yet. But I think the behavior will be similar to Perl 5’s split
builtin function.

I’m always open to patches for this feature :slight_smile:

Best regards,
-agentzh

So, here it is :slight_smile:

ngx.re.split(subject, regex, options?, limit?)

This function is based on ngx_re_sub.

It will split subject on regex matches and return a table of strings.
Limit
is the max number of splits (0 will return a table containing the
subject
string).

Let me know if there are bugs, identation issues or anything I need to
fix.

Jader H. Silva

2012/7/11 agentzh [email protected]

Hello!

On Fri, Jul 13, 2012 at 2:19 PM, Jader H. Silva [email protected]
wrote:

The patch was removed in the previous message :frowning:
So here it is (for real).

Thanks for the patch! I’ll look into this in the next week :slight_smile:

Best regards,
-agentzh

The patch was removed in the previous message :frowning:
So here it is (for real).

From 9091c40e22f6fd0ca2173ecbeb1f932502cc8ac6 Mon Sep 17 00:00:00 2001
From: “Jader H. Silva” [email protected]
Date: Fri, 13 Jul 2012 18:06:32 -0300
Subject: [PATCH] Add ngx.re.split function


src/ngx_http_lua_regex.c | 443
++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 443 insertions(+)

diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c
index 108070c…aa5d445 100644
— a/src/ngx_http_lua_regex.c
+++ b/src/ngx_http_lua_regex.c
@@ -74,6 +74,7 @@ static int ngx_http_lua_ngx_re_match(lua_State *L);
static int ngx_http_lua_ngx_re_gmatch(lua_State *L);
static int ngx_http_lua_ngx_re_sub(lua_State *L);
static int ngx_http_lua_ngx_re_gsub(lua_State *L);
+static int ngx_http_lua_ngx_re_split(lua_State *L);
static void ngx_http_lua_regex_free_study_data(ngx_pool_t *pool,
pcre_extra *sd);
static ngx_int_t ngx_lua_regex_compile(ngx_lua_regex_compile_t *rc);
@@ -1611,6 +1612,445 @@ error:
return luaL_error(L, msg);
}

+static int
+ngx_http_lua_ngx_re_split(lua_State *L)
+{

  • ngx_http_lua_regex_t *re;
  • ngx_http_request_t *r;
  • ngx_str_t subj;
  • ngx_str_t pat;
  • ngx_str_t opts;
  • ngx_str_t tpl;
  • ngx_http_lua_main_conf_t *lmcf = NULL;
  • ngx_pool_t *pool, *old_pool;
  • ngx_lua_regex_compile_t re_comp;
  • const char *msg;
  • ngx_int_t rc;
  • ngx_uint_t n;
  • ngx_int_t i;
  • int nargs;
  • int *cap = NULL;
  • int ovecsize;
  • int type;
  • unsigned func;
  • int offset;
  • size_t count;
  • luaL_Buffer luabuf;
  • ngx_int_t flags;
  • ngx_int_t limit = -1;
  • u_char *p;
  • u_char errstr[NGX_MAX_CONF_ERRSTR + 1];
  • pcre_extra *sd = NULL;
  • ngx_http_lua_complex_value_t *ctpl = NULL;
  • ngx_http_lua_compile_complex_value_t ccv;
  • nargs = lua_gettop(L);
  • if (nargs != 2 && nargs != 3 && nargs != 4) {
  •    return luaL_error(L, "expecting two or three or four arguments,
    

but got %d",

  •            nargs);
    
  • }
  • lua_pushlightuserdata(L, &ngx_http_lua_request_key);
  • lua_rawget(L, LUA_GLOBALSINDEX);
  • r = lua_touserdata(L, -1);
  • lua_pop(L, 1);
  • if (r == NULL) {
  •    return luaL_error(L, "no request object found");
    
  • }
  • subj.data = (u_char *) luaL_checklstring(L, 1, &subj.len);
  • pat.data = (u_char *) luaL_checklstring(L, 2, &pat.len);
  • if (nargs >= 3) {
  •    opts.data = (u_char *) luaL_checklstring(L, 3, &opts.len);
    
  •    if (nargs == 4) {
    
  •        limit = luaL_checkinteger(L, 4);
    
  •        lua_pop(L, 1);
    
  •    } else {/* nargs == 3 */
    
  •        limit = -1;
    
  •    }
    
  • } else { /* nargs == 2 */
  •    opts.data = (u_char *) "";
    
  •    opts.len = 0;
    
  • }
  • ngx_memzero(&re_comp, sizeof(ngx_lua_regex_compile_t));
  • /* stack: subj regex repl */
  • re_comp.options = 0;
  • flags = ngx_http_lua_ngx_re_parse_opts(L, &re_comp, &opts, 4);
  • if (flags & NGX_LUA_RE_COMPILE_ONCE) {
  •    lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);
    
  •    pool = lmcf->pool;
    
  •    dd("server pool %p", lmcf->pool);
    
  •    lua_pushlightuserdata(L, &ngx_http_lua_regex_cache_key);
    
  •    lua_rawget(L, LUA_REGISTRYINDEX); /* table */
    
  •    lua_pushliteral(L, "s");
    
  •    lua_pushinteger(L, tpl.len);
    
  •    lua_pushliteral(L, ":");
    
  •    lua_pushvalue(L, 2);
    
  •    if (tpl.len != 0) {
    
  •        lua_pushvalue(L, 3);
    
  •    }
    
  •    dd("options size: %d", (int) sizeof(re_comp.options));
    
  •    lua_pushlstring(L, (char *) &re_comp.options,
    

sizeof(re_comp.options));

  •            /* table regex opts */
    
  •    if (tpl.len == 0) {
    
  •        lua_concat(L, 5); /* table key */
    
  •    } else {
    
  •        lua_concat(L, 6); /* table key */
    
  •    }
    
  •    lua_pushvalue(L, -1); /* table key key */
    
  •    dd("regex cache key: %.*s", (int) (pat.len +
    

sizeof(re_comp.options)),

  •            lua_tostring(L, -1));
    
  •    lua_rawget(L, -3); /* table key re */
    
  •    re = lua_touserdata(L, -1);
    
  •    lua_pop(L, 1); /* table key */
    
  •    if (re) {
    
  •        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •                "lua regex cache hit for split regex \"%s\" with
    

options "

  •                "\"%s\"", pat.data, opts.data);
    
  •        lua_pop(L, 2);
    
  •        dd("restoring regex %p, ncaptures %d,  captures %p", 
    

re->regex,

  •                re->ncaptures, re->captures);
    
  •        re_comp.regex = re->regex;
    
  •        sd = re->regex_sd;
    
  •        re_comp.captures = re->ncaptures;
    
  •        cap = re->captures;
    
  •        ctpl = re->replace;
    
  •        if (flags & NGX_LUA_RE_MODE_DFA) {
    
  •            ovecsize = 2;
    
  •        } else {
    
  •            ovecsize = (re->ncaptures + 1) * 3;
    
  •        }
    
  •        goto exec;
    
  •    }
    
  •    ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "lua regex cache miss for split regex \"%s\" with 
    

options "

  •            "\"%s\"",
    
  •            pat.data, opts.data);
    
  •    if (lmcf->regex_cache_entries >= lmcf->regex_cache_max_entries) 
    

{
+

  •        if (lmcf->regex_cache_entries ==
    

lmcf->regex_cache_max_entries) {

  •            ngx_log_error(NGX_LOG_WARN, r->connection->log, 0,
    
  •                    "lua exceeding regex cache max entries (%i)",
    
  •                    lmcf->regex_cache_max_entries);
    
  •            lmcf->regex_cache_entries++;
    
  •        }
    
  •        pool = r->pool;
    
  •        flags &= ~NGX_LUA_RE_COMPILE_ONCE;
    
  •    }
    
  • } else {
  •    pool = r->pool;
    
  • }
  • re_comp.pattern = pat;
  • re_comp.err.len = NGX_MAX_CONF_ERRSTR;
  • re_comp.err.data = errstr;
  • re_comp.pool = pool;
  • dd(“compiling regex”);
  • ngx_log_debug5(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
  •        "lua compiling split regex \"%s\" with options \"%s\" "
    
  •        "(compile once: %d) (dfa mode: %d) (jit mode: %d)",
    
  •        pat.data, opts.data,
    
  •        (flags & NGX_LUA_RE_COMPILE_ONCE) != 0,
    
  •        (flags & NGX_LUA_RE_MODE_DFA) != 0,
    
  •        (flags & NGX_LUA_RE_MODE_JIT) != 0);
    
  • old_pool = ngx_http_lua_pcre_malloc_init(pool);
  • rc = ngx_lua_regex_compile(&re_comp);
  • ngx_http_lua_pcre_malloc_done(old_pool);
  • if (rc != NGX_OK) {
  •    dd("compile failed");
    
  •    re_comp.err.data[re_comp.err.len] = '\0';
    
  •    msg = lua_pushfstring(L, "failed to compile regex \"%s\": %s",
    
  •            pat.data, re_comp.err.data);
    
  •    return luaL_argerror(L, 2, msg);
    
  • }

+#if LUA_HAVE_PCRE_JIT
+

  • if (flags & NGX_LUA_RE_MODE_JIT) {
  •    old_pool = ngx_http_lua_pcre_malloc_init(pool);
    
  •    sd = pcre_study(re_comp.regex, PCRE_STUDY_JIT_COMPILE, &msg);
    
  •    ngx_http_lua_pcre_malloc_done(old_pool);
    

+# if (NGX_DEBUG)

  •    dd("sd = %p", sd);
    
  •    if (msg != NULL) {
    
  •        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "pcre study failed with PCRE_STUDY_JIT_COMPILE: %s 
    

(%p)",

  •            msg, sd);
    
  •    }
    
  •    if (sd != NULL) {
    
  •        int         jitted;
    
  •        old_pool = ngx_http_lua_pcre_malloc_init(pool);
    
  •        pcre_fullinfo(re_comp.regex, sd, PCRE_INFO_JIT, &jitted);
    
  •        ngx_http_lua_pcre_malloc_done(old_pool);
    
  •        ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "pcre JIT compiling result: %d", jitted);
    
  •    }
    

+# endif /* NGX_DEBUG */
+

  • } else {
  •    old_pool = ngx_http_lua_pcre_malloc_init(pool);
    
  •    sd = pcre_study(re_comp.regex, 0, &msg);
    
  •    ngx_http_lua_pcre_malloc_done(old_pool);
    

+# if (NGX_DEBUG)

  •    dd("sd = %p", sd);
    
  •    if (msg != NULL) {
    
  •        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "pcre_study failed with PCRE_STUDY_JIT_COMPILE: %s 
    

(%p)",

  •            msg, sd);
    
  •    }
    

+# endif /* NGX_DEBUG */

  • }

+#else /* LUA_HAVE_PCRE_JIT */
+

  • if (flags & NGX_LUA_RE_MODE_JIT) {
  •    ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "your pcre build does not have JIT support and "
    
  •            "the \"j\" regex option is ignored");
    
  • }

+#endif /* LUA_HAVE_PCRE_JIT */
+

  • dd(“compile done, captures %d”, re_comp.captures);
  • if (flags & NGX_LUA_RE_MODE_DFA) {
  •    ovecsize = 2;
    
  • } else {
  •    ovecsize = (re_comp.captures + 1) * 3;
    
  • }
  • cap = ngx_palloc(pool, ovecsize * sizeof(int));
  • if (cap == NULL) {
  •    flags &= ~NGX_LUA_RE_COMPILE_ONCE;
    
  •    msg = "out of memory";
    
  •    goto error;
    
  • }
  • if (flags & NGX_LUA_RE_COMPILE_ONCE) {
  •    ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
    
  •            "lua saving compiled sub regex (%d captures) into the
    

cache "

  •            "(entries %i)", re_comp.captures,
    
  •            lmcf ? lmcf->regex_cache_entries : 0);
    
  •    re = ngx_palloc(pool, sizeof(ngx_http_lua_regex_t));
    
  •    if (re == NULL) {
    
  •        return luaL_error(L, "out of memory");
    
  •    }
    
  •    dd("saving regex %p, ncaptures %d,  captures %p", 
    

re_comp.regex,

  •            re_comp.captures, cap);
    
  •    re->regex = re_comp.regex;
    
  •    re->regex_sd = sd;
    
  •    re->ncaptures = re_comp.captures;
    
  •    re->captures = cap;
    
  •    re->replace = ctpl;
    
  •    lua_pushlightuserdata(L, re); /* table key value */
    
  •    lua_rawset(L, -3); /* table */
    
  •    lua_pop(L, 1);
    
  •    if (lmcf) {
    
  •        lmcf->regex_cache_entries++;
    
  •    }
    
  • }

+exec:

  • count = 0;
  • offset = 0;
  • lua_newtable(L);
  • for (;:wink: {
  •    if (subj.len == 0 || count == limit) {
    
  •        break;
    
  •    }
    
  •    if (flags & NGX_LUA_RE_MODE_DFA) {
    

+#if LUA_HAVE_PCRE_DFA
+

  •        int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];
    
  •        rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,
    
  •            offset, cap, ovecsize, ws,
    

NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT);
+
+#else /* LUA_HAVE_PCRE_DFA */
+

  •        msg = "at least pcre 6.0 is required for the DFA mode";
    
  •        goto error;
    

+#endif /* LUA_HAVE_PCRE_DFA */
+

  •    } else {
    
  •        rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, 
    

offset,
cap,

  •                ovecsize);
    
  •    }
    
  •    if (rc == NGX_REGEX_NO_MATCHED) {
    
  •        break;
    
  •    }
    
  •    if (rc < 0) {
    
  •        msg = lua_pushfstring(L, ngx_regex_exec_n " failed: %d on
    

"%s" "

  •            "using \"%s\"", (int) rc, subj.data, pat.data);
    
  •        goto error;
    
  •    }
    
  •    if (rc == 0) {
    
  •        if (flags & NGX_LUA_RE_MODE_DFA) {
    
  •            rc = 1;
    
  •        } else {
    
  •            msg = "capture size too small";
    
  •            goto error;
    
  •        }
    
  •    }
    
  •    dd("rc = %d", (int) rc);
    
  •    count++;
    
  •    luaL_buffinit(L, &luabuf);
    
  •    luaL_addlstring(&luabuf, (char *) &subj.data[offset],
    
  •                cap[0] - offset);
    
  •    lua_pushnumber(L, count);
    
  •    luaL_pushresult(&luabuf);
    
  •    lua_settable(L, -3);
    
  •    offset = cap[1];
    
  • }
  • if (count == 0) {
  •    dd("no match, just the original subject");
    
  •    lua_pushnumber(L, count+1);
    
  •    lua_pushvalue(L, 1);
    
  •    lua_settable(L, -3);
    
  • } else {
  •    if (offset != (int) subj.len) {
    
  •        dd("adding trailer: %s (len %d)", &subj.data[offset],
    
  •                (int) (subj.len - offset));
    
  •        luaL_buffinit(L, &luabuf);
    
  •        luaL_addlstring(&luabuf, (char *) &subj.data[offset],
    
  •                subj.len - offset);
    
  •        lua_pushnumber(L, count+1);
    
  •        luaL_pushresult(&luabuf);
    
  •        lua_settable(L, -3);
    
  •    }
    
  •    dd("the dst string: %s", lua_tostring(L, -1));
    
  • }
  • if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
  •    if (sd) {
    
  •        ngx_http_lua_regex_free_study_data(pool, sd);
    
  •    }
    
  •    if (re_comp.regex) {
    
  •        ngx_pfree(pool, re_comp.regex);
    
  •    }
    
  •    if (ctpl) {
    
  •        ngx_pfree(pool, ctpl);
    
  •    }
    
  •    if (cap) {
    
  •        ngx_pfree(pool, cap);
    
  •    }
    
  • }
  • return 1;

+error:

  • if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
  •    if (sd) {
    
  •        ngx_http_lua_regex_free_study_data(pool, sd);
    
  •    }
    
  •    if (re_comp.regex) {
    
  •        ngx_pfree(pool, re_comp.regex);
    
  •    }
    
  •    if (ctpl) {
    
  •        ngx_pfree(pool, ctpl);
    
  •    }
    
  •    if (cap) {
    
  •        ngx_pfree(pool, cap);
    
  •    }
    
  • }
  • return luaL_error(L, msg);
    +}

void
ngx_http_lua_inject_regex_api(lua_State *L)
@@ -1631,6 +2071,9 @@ ngx_http_lua_inject_regex_api(lua_State *L)
lua_pushcfunction(L, ngx_http_lua_ngx_re_gsub);
lua_setfield(L, -2, “gsub”);

  • lua_pushcfunction(L, ngx_http_lua_ngx_re_split);
  • lua_setfield(L, -2, “split”);
  • lua_setfield(L, -2, “re”);
    }


1.7.9.5

2012/7/13 Jader H. Silva [email protected]