[Ruby 1.9 - Bug #4544] Ripperで「:"a \n b \n c"」を正常にパースできない

Issue #4544 has been updated by Masaya T…

ripperのコードを読んでみましたが通常のparse.cとの何が違うか理解するのが難しいですね。

このチケットの原因になってる 、yylex
でdelayed_tokenをdispatchするコード(parse.y:7855)の直後のreturnを
r24557([Bug #1071])で削除していますが、
ripper_dispatch_delayed_token(parser, t);
ripper_dispatch_scan_event(parser, t);
と実行される可能性があるのは違和感を感じます。

そもそもこのif文に入るのはどういう場合を想定していたんでしょう?

patchをhere document部分のripperコードを参考に書いてみましたが、
想定がわからないのでyylexの部分は触ってません。
自信はないのでレビューしてもらえませんでしょうか?(中田さん?)

diff --git a/parse.y b/parse.y
index 06f96ce…31bdc6f 100644
— a/parse.y
+++ b/parse.y
@@ -5984,6 +5984,21 @@ parser_parse_string(struct parser_params *parser,
NODE *quote)

 tokfix();
 set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));

+#ifdef RIPPER

  • if (!NIL_P(parser->delayed)){
  •   if(lex_p - parser->tokp > 0 ){
    
  •       rb_str_append(parser->delayed,
    
  •                     STR_NEW3(parser->tokp,
    
  •                              lex_p - parser->tokp,
    
  •                              enc,
    
  •                              func));
    
  •   }
    
  •   ripper_dispatch_delayed_token(parser, tSTRING_CONTENT);
    
  •   parser->tokp = lex_p;
    
  • }
    +#endif
  • return tSTRING_CONTENT;
    }

diff --git a/test/ripper/test_scanner_events.rb
b/test/ripper/test_scanner_events.rb
index 25e4b13…3c62f52 100644
— a/test/ripper/test_scanner_events.rb
+++ b/test/ripper/test_scanner_events.rb
@@ -67,8 +67,7 @@ class TestRipper::ScannerEvents < Test::Unit::TestCase
[[3, 0], :on_heredoc_end, “EOS”]],
Ripper.lex(“<<EOS\nheredoc\nEOS”)
assert_equal [[[1, 0], :on_regexp_beg, “/”],

  •              [[1, 1], :on_tstring_content, "foo\n"],
    
  •              [[2, 0], :on_tstring_content, "bar"],
    
  •              [[1, 1], :on_tstring_content, "foo\nbar"],
                 [[2, 3], :on_regexp_end, "/"]],
                Ripper.lex("/foo\nbar/")
    
    end

Bug #4544: Ripperで「:“a \n b \n c”」を正常にパースできない

Author: Kazunori SAKAMOTO
Status: Open
Priority: Normal
Assignee:
Category:
Target version:
ruby -v: ruby 1.9.2p180 (2011-02-18 revision 30909) [i686-linux]

=begin
Ripper::SexpBuilder.new(%Q!:“a \n b \n c”!).parse にて、on_tstring_content
メソッドが二回連続で呼び出されます。

検証済みバージョン

  • ruby 1.9.3dev (2011-03-31 trunk 31223) [i686-linux]
  • ruby 1.9.2p180 (2011-02-18 revision 30909) [i686-linux]
  • ruby 1.9.1p431 (2011-02-18 revision 30908) [i686-linux]

検証用スクリプト
require ‘ripper’

class Parser < Ripper::SexpBuilder
ms = Ripper::SexpBuilder.new(‘’).methods
defs = ms.map { |s| s.to_s }
.select { |s| s.start_with?(‘on_’) }
.map { |s| %Q{
def #{s}(*args)
print '#{s}: ’
p args
end
}}.join
eval(defs)
end

def parse(src)
puts “" + src.inspect + "
Parser.new(src).parse
puts
end

parse(%Q!:“a \n b \n c”!)
parse(%Q!:“a \n b c”!)
parse(%Q!:“a b c”!)

実行結果
----“:"a \n b \n c"”----
on_symbeg: [“:"”]
on_tstring_content: [“a \n b \n”]
on_tstring_content: [" c"]
on_xstring_new: []
on_xstring_add: [[], [" c"]]
on_tstring_end: [“"”]
on_dyna_symbol: [[[], [" c"]]]
on_stmts_new: []
on_stmts_add: [[], [[[], [" c"]]]]
on_program: [[[], [[[], [" c"]]]]]

----“:"a \n b c"”----
on_symbeg: [“:"”]
on_tstring_content: [“a \n”]
on_tstring_content: [" b c"]
on_xstring_new: []
on_xstring_add: [[], [" b c"]]
on_tstring_end: [“"”]
on_dyna_symbol: [[[], [" b c"]]]
on_stmts_new: []
on_stmts_add: [[], [[[], [" b c"]]]]
on_program: [[[], [[[], [" b c"]]]]]

----“:"a b c"”----
on_symbeg: [“:"”]
on_tstring_content: [“a b c”]
on_xstring_new: []
on_xstring_add: [[], [“a b c”]]
on_tstring_end: [“"”]
on_dyna_symbol: [[[], [“a b c”]]]
on_stmts_new: []
on_stmts_add: [[], [[[], [“a b c”]]]]
on_program: [[[], [[[], [“a b c”]]]]]
=end

$B$J$+$@$G$9!#(B

At Sun, 12 Jun 2011 09:18:46 +0900,
Masaya T. wrote in [ruby-dev:43762]:

$B$3$N%A%1%C%H$N860x$K$J$C$F$k(B $B!"(Byylex
$B$G(Bdelayed_token$B$r(Bdispatch$B$9$k%3!<%I(B(parse.y:7855)$B$ND>8e$N(Breturn$B$r(B
r24557([Bug #1071])$B$G:o=|$7$F$$$^$9$,!"(B
ripper_dispatch_delayed_token(parser, t);
ripper_dispatch_scan_event(parser, t);
$B$H<B9T$5$l$k2DG=@-$,$"$k$N$O0cOB46$r46$8$^$9!#(B

$B$=$b$=$b$3$N(Bif$BJ8$KF~$k$N$O$I$&$$$&>l9g$rA[Dj$7$F$$$?$s$G$7$g$&!)(B

$B$A$g$C$H;W$$=P$;$^$;$s$,!"C.2H$5$s$N=$@5$GITMW$K$J$k$h$&$G$9!#(B

patch$B$r(Bhere document$BItJ,$N(Bripper$B%3!<%I$r;29M$K=q$$$F$_$^$7$?$,!"(B
$BA[Dj$,$o$+$i$J$$$N$G(Byylex$B$NItJ,$O?($C$F$^$;$s!#(B
$B<+?.$O$J$$$N$G%l%S%e!<$7$F$b$i$($^$;$s$G$7$g$&$+!)(B($BCfED$5$s!)(B)

  •       rb_str_append(parser->delayed,
    
  •                     STR_NEW3(parser->tokp,
    
  •                              lex_p - parser->tokp,
    
  •                              enc,
    
  •                              func));
    

$B?7$7$/(BString$B$r:n$C$F(Bappend$B$7$J$/$F$b!"(Brb_enc_str_buf_cat()$B$H$$$&(B
$B$N$,$"$j$^$9!#(B

diff --git i/parse.y w/parse.y
index 06f96ce…60e5a3b 100644
— i/parse.y
+++ w/parse.y
@@ -5984,6 +5984,18 @@ parser_parse_string(struct parser_params *parser,
NODE *quote)

 tokfix();
 set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));

+#ifdef RIPPER

  • if (!NIL_P(parser->delayed)){
  • ptrdiff_t len = lex_p - parser->tokp;
  • if (len > 0) {
  •  rb_enc_str_buf_cat(parser->delayed, parser->tokp, len, enc);
    
  • }
  • ripper_dispatch_delayed_token(parser, tSTRING_CONTENT);
  • parser->tokp = lex_p;
  • }
    +#endif
  • return tSTRING_CONTENT;
    }

@@ -7853,6 +7865,7 @@ yylex(void *p)
#ifdef RIPPER
if (!NIL_P(parser->delayed)) {
ripper_dispatch_delayed_token(parser, t);

  • return t;
    }
    if (t != 0)
    ripper_dispatch_scan_event(parser, t);
    diff --git i/test/ripper/test_scanner_events.rb
    w/test/ripper/test_scanner_events.rb
    index 25e4b13…d89e50e 100644
    — i/test/ripper/test_scanner_events.rb
    +++ w/test/ripper/test_scanner_events.rb
    @@ -67,10 +67,17 @@ class TestRipper::ScannerEvents <
    Test::Unit::TestCase
    [[3, 0], :on_heredoc_end, “EOS”]],
    Ripper.lex("<<EOS\nheredoc\nEOS")
    assert_equal [[[1, 0], :on_regexp_beg, “/”],
  •              [[1, 1], :on_tstring_content, "foo\n"],
    
  •              [[2, 0], :on_tstring_content, "bar"],
    
  •              [[1, 1], :on_tstring_content, "foo\nbar"],
                 [[2, 3], :on_regexp_end, "/"]],
                Ripper.lex("/foo\nbar/")
    
  • assert_equal [[[1, 0], :on_regexp_beg, “/”],

  •              [[1, 1], :on_tstring_content, "foo\n\u3020"],
    
  •              [[2, 3], :on_regexp_end, "/"]],
    
  •             Ripper.lex("/foo\n\u3020/")
    
  • assert_equal [[[1, 0], :on_tstring_beg, “’”],

  •              [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0"],
    
  •              [[2, 3], :on_tstring_end, "'"]],
    
  •             Ripper.lex("'foo\n\xe3\x80\xa0'")
    

    end

    def test_location
    @@ -534,6 +541,10 @@ class TestRipper::ScannerEvents <
    Test::Unit::TestCase
    scan(‘tstring_content’, ‘“abc#{1}def”’)
    assert_equal [‘sym’],
    scan(‘tstring_content’, ‘:“sym”’)

  • assert_equal [‘a b c’],

  •             scan('tstring_content', ':"a b c"')
    
  • assert_equal [“a\nb\nc”],

  •             scan('tstring_content', ":'a\nb\nc'")
    

    end

    def test_tstring_end