Invalid error message by illegal regexp


#1

e$BLZB<$G$9!#e(B

1.9e$B$NJ}$N$_$G$9$,!"e(B

ruby -e “/a(?=b)?/.match(‘a’)”
e$B$H$+e(B
ruby -e “/a(?=b?/.match(‘a’)”
e$B$H$7$?$H$-$K!"e(B

0: uninitialized Regexp (TypeError)

e$B$H$$$&E,@Z$G$O$J$$$G$"$m$&%(%i!<%a%C%;!<%8$,=PNO$5$l$^$9!#e(B
ruby 1.9.0 (2007-07-30 patchlevel 0) [i386-mswin32_71] e$B$G3NG’!#e(B

e$B$3$l$O54<V$,JV$7$F$$$k%(%i!<%a%C%;!<%8$r0.$j$D$V$7$F$7$^$C$F$$$k$?$ae(B
e$B$G$9!#0J2<>:Y!#e(B

VALUE
rb_reg_compile(const char *s, long len, int options, const char *file,
int line)
{
VALUE re = rb_reg_s_alloc(rb_cRegexp);
char err[ONIG_MAX_ERROR_MESSAGE_LEN];

if (rb_reg_initialize(re, s, len, options, err) != 0) {
    VALUE desc = rb_reg_desc(s, len, re);

rb_compile_error(file, line, “%s: %s”, err, RSTRING_PTR(desc));
}
FL_SET(re, REG_LITERAL);
return re;
}

e$B$3$3$N!“e(Brb_reg_initilize
e$B7PM3$G@55,I=8=$r%3%s%Q%$%k$7$?$H$-$Ke(B
e$B54<VB&$N%(%i!<$K$J$jLa$jCM$,e(B
-1e$B!”%(%i!<$N860x$rI=$954<VB&$+$i$N%a%C%;!<e(B
e$B%8$,e(B erre$B$K@_Dj$5$l$k$N$G$9$,!"e(B

e$B<!$K8F$S=P$7$F$$$ke(B rb_reg_desc e$B$NCf$Ge(B

if (re) {

rb_reg_check(re);

e$B$He(B rb_reg_check e$B$r8F$S=P$7!"$=$Ne(B rb_reg_check e$B$Ge(B

rb_reg_check(VALUE re)
{
if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
rb_raise(rb_eTypeError, “uninitialized Regexp”);
}
}

e$B$He(B rb_raise e$B$7$F$7$^$C$F$$$k$N$G!"e(B
0: uninitialized Regexp (TypeError)
e$B$H$$$&%a%C%;!<%8$@$1=PNO$5$l$F=*$o$j$K$J$C$F$7$^$$$^$9e(B
(rb_compile_errore$B$,8F$S=P$5$l$J$$e(B)e$B!#e(B

rb_reg_initialize e$B$G<:GT$7$?e(B(not
0e$B$,JV$C$F$-$?e(B)e$B$H$-$NBP=h$rJQ$($k$+e(B
rb_reg_desce$BFbIt$NH=Dj$rD4@a$9$l$PNI$$$H;W$$$^$9$,!"6qBNE*$J=$@5J}K!e(B
e$B$O$*G$$;$7$^$9!#e(B


#2

e$B$J$+$@$G$9!#e(B

At Thu, 2 Aug 2007 13:45:04 +0900,
KIMURA Koichi wrote in [ruby-dev:31333]:

ruby -e “/a(?=b)?/.match(‘a’)”
e$B$H$+e(B
ruby -e “/a(?=b?/.match(‘a’)”
e$B$H$7$?$H$-$K!"e(B

0: uninitialized Regexp (TypeError)

e$B$H$$$&E,@Z$G$O$J$$$G$"$m$&%(%i!<%a%C%;!<%8$,=PNO$5$l$^$9!#e(B

e$B$9$$$^$;$s!#D>$=$&$H$O;W$C$F$$$?$s$G$9$,!"8e2s$7$K$7$?$^$^K:$l$Fe(B
e$B$^$7$?!#e(B

rb_reg_initialize e$B$G<:GT$7$?e(B(not 0e$B$,JV$C$F$-$?e(B)e$B$H$-$NBP=h$rJQ$($k$+e(B
rb_reg_desce$BFbIt$NH=Dj$rD4@a$9$l$PNI$$$H;W$$$^$9$,!"6qBNE*$J=$@5J}K!e(B
e$B$O$*G$$;$7$^$9!#e(B

e$B85!9$O!“e(Brb_compile_errore$B$r8F$V$H$-$Oe(Brb_reg_desce$B$K$Oe(B0e$B$rEO$7$F$$$?e(B
e$B$N$G!”<:GT$7$?$ie(B rb_reg_desc(s, len, 0)
e$B$G$b$$$$$H;W$$$^$9$,!"%*e(B
e$B%W%7%g%s$,I=<($5$l$F$$$J$$$3$H$K5$$,$D$-$^$7$?!#e(B

e$B%*%W%7%g%s$NJQ492s$j$b@0M}$7$F!"$3$s$J$H$3$m$G$I$&$G$7$g$&$+!#e(B

Index: re.c

— re.c (revision 12861)
+++ re.c (working copy)
@@ -197,4 +197,39 @@ char_to_option(int c)
}

+static char *
+option_to_str(char str[4], int options)
+{

  • char *p = str;
  • if (options & ONIG_OPTION_MULTILINE) *p++ = ‘m’;
  • if (options & ONIG_OPTION_IGNORECASE) *p++ = ‘i’;
  • if (options & ONIG_OPTION_EXTEND) *p++ = ‘x’;
  • *p = 0;
  • return str;
    +}

+static const char *
+arg_kcode(int options)
+{

  • switch (options & ARG_KCODE_MASK) {
  •  case ARG_KCODE_NONE: return "n";
    
  •  case ARG_KCODE_EUC:  return "e";
    
  •  case ARG_KCODE_SJIS: return "s";
    
  •  case ARG_KCODE_UTF8: return "u";
    
  • }
  • return “”;
    +}

+static const char *
+opt_kcode(int flags)
+{

  • switch (flags) {
  •  case KCODE_NONE: return "n";
    
  •  case KCODE_EUC:  return "e";
    
  •  case KCODE_SJIS: return "s";
    
  •  case KCODE_UTF8: return "u";
    
  • }
  • return “”;
    +}

extern int
rb_char_to_option_kcode(int c, int *option, int *kcode)
@@ -417,27 +452,11 @@ rb_reg_desc(const char *s, long len, VAL
rb_str_buf_cat2(str, “/”);
if (re) {

  • char opts[4];
    rb_reg_check(re);
  • if (RREGEXP(re)->ptr->options & ONIG_OPTION_MULTILINE)
  •  rb_str_buf_cat2(str, "m");
    
  • if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE)
  •  rb_str_buf_cat2(str, "i");
    
  • if (RREGEXP(re)->ptr->options & ONIG_OPTION_EXTEND)
  •  rb_str_buf_cat2(str, "x");
    
  • if (*option_to_str(opts, RREGEXP(re)->ptr->options))

  •  rb_str_buf_cat2(str, opts);
    

    if (FL_TEST(re, KCODE_FIXED)) {

  •  switch ((RBASIC(re)->flags & KCODE_MASK)) {
    
  •    case KCODE_NONE:
    
  • rb_str_buf_cat2(str, “n”);
  • break;
  •    case KCODE_EUC:
    
  • rb_str_buf_cat2(str, “e”);
  • break;
  •    case KCODE_SJIS:
    
  • rb_str_buf_cat2(str, “s”);
  • break;
  •    case KCODE_UTF8:
    
  • rb_str_buf_cat2(str, “u”);
  • break;
  •  }
    
  •  rb_str_buf_cat2(str, opt_kcode(RBASIC(re)->flags & KCODE_MASK));
    

    }
    }
    @@ -514,4 +533,5 @@ rb_reg_to_s(VALUE re)
    const UChar* ptr;
    VALUE str = rb_str_buf_new2("(?");

  • char optbuf[5];

    rb_reg_check(re);
    @@ -578,13 +598,10 @@ rb_reg_to_s(VALUE re)
    }

  • if (options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, “m”);
  • if (options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, “i”);
  • if (options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, “x”);
  • if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);

    if ((options & embeddable) != embeddable) {

  • rb_str_buf_cat2(str, “-”);
  • if (!(options & ONIG_OPTION_MULTILINE)) rb_str_buf_cat2(str, “m”);
  • if (!(options & ONIG_OPTION_IGNORECASE)) rb_str_buf_cat2(str, “i”);
  • if (!(options & ONIG_OPTION_EXTEND)) rb_str_buf_cat2(str, “x”);
  • optbuf[0] = ‘-’;
  • option_to_str(optbuf + 1, ~options)
  • rb_str_buf_cat2(str, optbuf);
    }

@@ -1528,5 +1545,12 @@ rb_reg_compile(const char *s, long len,

 if (rb_reg_initialize(re, s, len, options, err) != 0) {
  • VALUE desc = rb_reg_desc(s, len, re);
  • char opts[6];
  • VALUE desc = rb_str_buf_new2("/");
  • rb_reg_expr_str(desc, s, len);
  • opts[0] = ‘/’;
  • option_to_str(opts + 1, options);
  • strlcat(opts, arg_kcode(options), sizeof(opts));
  • rb_str_buf_cat2(desc, opts);
    rb_compile_error(file, line, “%s: %s”, err, RSTRING_PTR(desc));
    }
    @@ -2066,20 +2090,9 @@ rb_reg_s_union(int argc, VALUE *argv)
    args[0] = source;
    args[1] = Qnil;
  •    switch (kcode) {
    
  •      case -1:
    
  •    if (kcode == -1) {
           args[2] = Qnil;
    
  •        break;
    
  •      case KCODE_NONE:
    
  •        args[2] = rb_str_new2("n");
    
  •        break;
    
  •      case KCODE_EUC:
    
  •        args[2] = rb_str_new2("e");
    
  •        break;
    
  •      case KCODE_SJIS:
    
  •        args[2] = rb_str_new2("s");
    
  •        break;
    
  •      case KCODE_UTF8:
    
  •        args[2] = rb_str_new2("u");
    
  •        break;
    
  • }
  • else {
  •        args[2] = rb_str_new2(opt_kcode(kcode));
       }
       return rb_class_new_instance(3, args, rb_cRegexp);

#3

e$BLZB<$G$9!#e(B

On Thu, 2 Aug 2007 15:38:36 +0900
Nobuyoshi N. removed_email_address@domain.invalid wrote:

0: uninitialized Regexp (TypeError)

e$B$H$$$&E,@Z$G$O$J$$$G$"$m$&%(%i!<%a%C%;!<%8$,=PNO$5$l$^$9!#e(B

e$B$9$$$^$;$s!#D>$=$&$H$O;W$C$F$$$?$s$G$9$,!"8e2s$7$K$7$?$^$^K:$l$Fe(B
e$B$^$7$?!#e(B

e$B$"!":EB%$7$F$7$^$C$?$h$&$J7A$K$J$C$F?=$7Lu$J$$$G$9!#e(B

e$B%Q%C%A$G$9$,!“0l2U=j9TKv$N%;%_%3%m%s$,H4$1$F$$$k$H$3$m$,$”$j$^$7$?!#e(B

  • optbuf[0] = ‘-’;
  • option_to_str(optbuf + 1, ~options)
  • rb_str_buf_cat2(str, optbuf);

e$B??$sCf$N9Te(B(e$B$:$l$F$J$1$l$Pe(B re.c
e$B$Ne(B605e$B9TL\e(B)e$B$,$=$l$G$9!#e(B

ruby -e “/a(?=b)?/.match(‘a’)”
error oniguruma: target of repeat operator is invalid
-e:1: target of repeat operator is invalid: /a(?=b)?/
-e:1:in match': uninitialized Regexp (TypeError) from -e:1:in

ruby -e “/a(?:b/.match(‘a’)”
error oniguruma: end pattern with unmatched parenthesis
-e:1: end pattern with unmatched parenthesis: /a(?:b/
-e:1:in match': uninitialized Regexp (TypeError) from -e:1:in

e$B$N$h$&$K!"54<V$+$i$N%a%C%;!<%8$b=PNO$5$l$k$3$H$r3NG’$7$^$7$?!#e(B

e$B$H$3$m$Ge(B a(?=b)?
e$B$H$$$&@55,I=8=$J$s$G$9$,!"e(B2e$B$A$c$s$M$k$Ne(BRubye$B%9%l$Ge(B
e$BOCBj$K$G$?$b$N$G$9!#e(B
#e$B$4B8CN$NJ}$bB?$$$G$7$g$&$1$Ie(B

Ruby 1.8.6 e$B$G$O%(%i!<$K$O$J$j$^$;$s$,!"%^%C%AF0:n$,4|BTDL$je(B(‘a’
e$B$H$+e(B
‘ab’ e$B$K%^%C%Ae(B)e$B$N$b$N$G$O$J$$$h$&$G$9!#e(BRuby 1.9
e$B$G$O>e5-$NDL$j%(%i!<$Ke(B
e$B$J$j$^$9!#e(B

e$B$G!"<ALd$J$s$G$9$,!"$3$&$$$&7Ae(B(e$B@hFI$e(B =
e$BI}$,$J$$$b$N$N7+$jJV$7e(B)e$B$H$$$&$N$Oe(B
e$B$I$&$9$Y$-$J$s$G$7$g$&$+e(B?
e$B54<V$NF0:n$K4X$7$F$O>.Gw$5$s$,0U?^E*$K$3$&$7$Fe(B
e$B$$$k$h$&$J5$$,$7$J$$$G$b$"$j$^$;$se(B(e$BK\Ev$K$=$&$J$N$+$O3N$+$a$F$
$J$$$H$oe(B
e$B$+$j$^$;$s$,e(B)e$B!#e(B

e$B$o$?$7$N0U8+$O8=>u$N54<V$N<BAu$NDL$j%(%i!<$K$7$A$c$C$F$$$$$s$8$c$J$$$+e(B
e$B$H$$$&$b$N$G$9!#e(B

e$B$?$@$7B>$N=hM}7O$rD4$Y$F$_$k$He(B

Perl 5.8.8
Python 2.5.1
Java (1.5.10e$B$/$i$$e(B?)
.NET (2.0)

e$B$O%(%i!<$K$J$i$:!"e(B

PHP (5.2.1, preg_*)
e$B$O%(%i!<$K$J$j$^$7$?!#e(B

Warning: preg_match(): Compilation failed: nothing to repeat at offset

#6 in Command line code on line 1)


#4

e$B$J$+$@$G$9!#e(B

At Thu, 2 Aug 2007 16:49:35 +0900,
KIMURA Koichi wrote in [ruby-dev:31336]:

e$B%Q%C%A$G$9$,!“0l2U=j9TKv$N%;%_%3%m%s$,H4$1$F$$$k$H$3$m$,$”$j$^$7$?!#e(B

e$B$D$$$&$C$+$j!#e(B

    from -e:1:in `<main>'

e$B$N$h$&$K!"54<V$+$i$N%a%C%;!<%8$b=PNO$5$l$k$3$H$r3NG’$7$^$7$?!#e(B

e$B%3%s%Q%$%k%(%i!<$J$N$Ke(Bmatche$B$,<B9T$5$l$k$N$b4V0c$C$F$$$k$N$G$9$,!"e(B
e$B$3$l$K$D$$$F$O$5$5$@$5$s$H8!F$Cf$G$9!#e(B


#5

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:31334] Re: Invalid error message by illegal
regexp”
on Thu, 2 Aug 2007 15:38:36 +0900, Nobuyoshi N.
removed_email_address@domain.invalid writes:

|e$B85!9$O!“e(Brb_compile_errore$B$r8F$V$H$-$Oe(Brb_reg_desce$B$K$Oe(B0e$B$rEO$7$F$$$?e(B
|e$B$N$G!”<:GT$7$?$ie(B rb_reg_desc(s, len, 0) e$B$G$b$$$$$H;W$$$^$9$,!"%e(B
|e$B%W%7%g%s$,I=<($5$l$F$$$J$$$3$H$K5$$,$D$-$^$7$?!#e(B
|
|e$B%
%W%7%g%s$NJQ492s$j$b@0M}$7$F!"$3$s$J$H$3$m$G$I$&$G$7$g$&$+!#e(B

e$B<B$O%Q%C%A$rFI$s$G$J$$$N$G$9$,!"?.$8$^$9!#%3%_%C%H$7$F$/$@$5e(B
e$B$$$^$;!#e(B


#6

e$B$J$+$@$G$9!#e(B

At Mon, 13 Aug 2007 03:02:26 +0900,
SASADA Koichi wrote in [ruby-dev:31382]:

e$B%3%s%Q%$%k%(%i!<$J$N$Ke(Bmatche$B$,<B9T$5$l$k$N$b4V0c$C$F$$$k$N$G$9$,!"e(B
e$B$3$l$K$D$$$F$O$5$5$@$5$s$H8!F$Cf$G$9!#e(B

e$B!!J|CV$7$F$$$F$9$_$^$;$s!#$3$N7o$I$&$J$C$F$k$s$G$7$?$C$1!#e(B

e$B@.8y$J$ie(BRegexpe$B!"%(%i!<$,$"$C$?$ie(BStringe$B$rJV$9!"$H$$$&$N$,5$$KF~$ie(B
e$B$J$$$H$$$&$N$O$"$j$^$9$,!“0l1~=$@5$7$F$”$j$^$9!#e(B

Thu Aug 2 23:42:57 2007 Nobuyoshi N. removed_email_address@domain.invalid

  • parse.y (reg_compile_gen): set error if failed to compile regexp
    literal. [ruby-dev:31336]

#7

e$B!!$5$5$@$G$9!#e(B

Nobuyoshi N. wrote:

e$B%3%s%Q%$%k%(%i!<$J$N$Ke(Bmatche$B$,<B9T$5$l$k$N$b4V0c$C$F$$$k$N$G$9$,!"e(B
e$B$3$l$K$D$$$F$O$5$5$@$5$s$H8!F$Cf$G$9!#e(B

e$B!!J|CV$7$F$$$F$9$_$^$;$s!#$3$N7o$I$&$J$C$F$k$s$G$7$?$C$1!#e(B