Possum [email protected] writes:
recently I’m working on stanford parser(written in java) which has an
in-complete ruby rapper.I can get the result like this as a string ,
but I still want to manipulate the result as an tree , what is the
convinient way for me to convert the following string to an tree
object,can someone give me some suggestion?
EBADLNG ?
Perhaps not…
----(parse-sentence.rb)------------------------------------------------------------
(sexp = “(ROOT
(S
(S
(NP
(NP (DT The) (JJS strongest) (NN rain))
(VP
(ADVP (RB ever))
(VBN recorded)
(PP (IN in)
(NP (NNP India)))))
(VP
(VP (VBD shut)
(PRT (RP down))
(NP
(NP (DT the) (JJ financial) (NN hub))
(PP (IN of)
(NP (NNP Mumbai)))))
(, ,)
(VP (VBD snapped)
(NP (NN communication) (NNS lines)))
(, ,)
(VP (VBD closed)
(NP (NNS airports)))
(CC and)
(VP (VBD forced)
(NP
(NP (NNS thousands))
(PP (IN of)
(NP (NNS people))))
(S
(VP (TO to)
(VP
(VP (VB sleep)
(PP (IN in)
(NP (PRP$ their) (NNS offices))))
(CC or)
(VP (VB walk)
(NP (NN home))
(PP (IN during)
(NP (DT the) (NN night))))))))))
(, ,)
(NP (NNS officials))
(VP (VBD said)
(NP-TMP (NN today)))
(. .)))”)
(code = <<ENDCODE
(defparameter rt (let ((rt (copy-readtable nil)))
(setf (readtable-case rt) :preserve)
rt))
(defun translate-parsed-sentence-string (string)
(translate-parsed-sentence-sexp (let ((readtable rt))
(read-from-string string))))
(defun collect-classes (sexp)
(let ((classes '()))
(labels ((scan (sexp)
(unless (atom sexp)
(push (car sexp) classes)
(mapcar (function scan) (cdr sexp)))))
(scan sexp)
(remove-duplicates classes))))
(defun concatenate-strings (list-of-strings)
"
LIST-OF-STRINGS: Each element may be either a string,
or a list containing a string, and a start and end
position
denoting a substring.
RETURN: A string containing the concatenation of the strings
of the LIST-OF-STRINGS.
"
(flet ((slength (string)
(if (stringp string)
(length string)
(- (or (third string) (length (first string)))
(second string)))))
(loop
:with result = (make-string (loop :for s :in list-of-strings
:sum (slength s)))
:for pos = 0
:then (+ pos (slength string))
:for string :in list-of-strings
:do (if (stringp string)
(replace result string :start1 pos)
(replace result (first string) :start1 pos
:start2 (second string) :end2 (third string)))
:finally (return result))))
(defun string-replace (string pattern replace &key (test (function
char=)))
"
RETURN: A string build from STRING where all occurences of PATTERN
are replaced by the REPLACE string.
TEST: The function used to compare the elements of the PATTERN
with the elements of the STRING.
"
(concatenate-strings
(loop
:with pattern-length = (length pattern)
:for start = 0 :then (+ pos pattern-length)
:for pos = (search pattern string :start2 start :test test)
:if pos :collect (list string start pos)
:and :collect replace
:else :collect (list string start)
:while pos)))
(defun rubify (name)
(format nil "~{~:(~A~)~}"
(loop
:with string = (string-replace (string name) "$"
"DOLLAR")
:with start = 0
:for end = (position #\- string :start start)
:collect (subseq string start end)
:while end
:do (setf start (1+ end)))))
(defun generate-sexp-instance-building (sexp)
(format nil “(~A.new(~{~A~^,~%~}))”
(rubify (car sexp))
(mapcar (lambda (item)
(etypecase item
(cons (generate-sexp-instance-building item))
(symbol (format nil “\"~A\"” item))
(t (format nil “~S” item))))
(cdr sexp))))
(defun translate-parsed-sentence-sexp (sexp)
(format t “(begin~%”)
(princ "(class Node
attr_accessor :children
(def initialize(*args)
(@children=args)
end)
end)
")
(dolist (class (collect-classes sexp))
(format t “(class ~A < Node~%end)~%” (rubify class)))
(princ (generate-sexp-instance-building sexp))
(format t “~%end)~%”))
ENDCODE
)
(begin
file=File.open(“/tmp/parse-sentence.lisp”,“w”)
file.write(code)
file.write(“(princ (translate-parsed-sentence-string "#{(sexp .
gsub(”(. .)“,”(DOT ".")“) . gsub(”(, ,)“,”(COMMA ",")“) . gsub(”(;
;)“,”(SEMICOLON ";")“) . gsub(”(: :)“,”(COLON ":")“) .
gsub(”"“,”\"“))}"))\n”)
file.write(“(finish-output)\n”)
file.close
end)
(parseTree=(begin
(expression=IO.popen(“clisp
/tmp/parse-sentence.lisp”,“w+”))
(parseTree=(eval(((expression . readlines)
[0…-2]).join)))
(expression.close)
parseTree
end))
puts parseTree
parseTree
returns: #<Root:0x7f9219695d68 @children=[#<S:0x7f9219695db8
@children=[#<S:0x7f9219696150 @children=[#<Np:0x7f9219697a78
@children=[#<Np:0x7f9219697e10 @children=[#<Dt:0x7f9219697fc8
@children=[“The”]>, #<Jjs:0x7f9219697f50 @children=[“strongest”]>,
#<Nn:0x7f9219697e88 @children=[“rain”]>]>, #<Vp:0x7f9219697af0
@children=[#<Advp:0x7f9219697d48 @children=[#<Rb:0x7f9219697d70
@children=[“ever”]>]>, #<Vbn:0x7f9219697ca8 @children=[“recorded”]>,
#<Pp:0x7f9219697b40 @children=[#<In:0x7f9219697c30 @children=[“in”]>,
#<Np:0x7f9219697b90 @children=[#<Nnp:0x7f9219697bb8
@children=[“India”]>]>]>]>]>, #<Vp:0x7f92196961a0
@children=[#<Vp:0x7f9219697438 @children=[#<Vbd:0x7f92196979b0
@children=[“shut”]>, #<Prt:0x7f9219697910 @children=[#<Rp:0x7f9219697938
@children=[“down”]>]>, #<Np:0x7f92196974b0
@children=[#<Np:0x7f9219697758 @children=[#<Dt:0x7f9219697870
@children=[“the”]>, #<Jj:0x7f92196977f8 @children=[“financial”]>,
#<Nn:0x7f9219697780 @children=[“hub”]>]>, #<Pp:0x7f9219697550
@children=[#<In:0x7f92196976b8 @children=[“of”]>, #<Np:0x7f92196975c8
@children=[#<Nnp:0x7f92196975f0 @children=[“Mumbai”]>]>]>]>]>,
#<Comma:0x7f9219697398 @children=[“,”]>, #<Vp:0x7f92196971b8
@children=[#<Vbd:0x7f9219697320 @children=[“snapped”]>,
#<Np:0x7f9219697208 @children=[#<Nn:0x7f92196972a8
@children=[“communication”]>, #<Nns:0x7f9219697230
@children=[“lines”]>]>]>, #<Comma:0x7f9219697118 @children=[“,”]>,
#<Vp:0x7f9219696f88 @children=[#<Vbd:0x7f92196970a0
@children=[“closed”]>, #<Np:0x7f9219697000
@children=[#<Nns:0x7f9219697028 @children=[“airports”]>]>]>,
#<Cc:0x7f9219696ec0 @children=[“and”]>, #<Vp:0x7f92196961f0
@children=[#<Vbd:0x7f9219696e48 @children=[“forced”]>,
#<Np:0x7f9219696bc8 @children=[#<Np:0x7f9219696da8
@children=[#<Nns:0x7f9219696dd0 @children=[“thousands”]>]>,
#<Pp:0x7f9219696c18 @children=[#<In:0x7f9219696d08 @children=[“of”]>,
#<Np:0x7f9219696c68 @children=[#<Nns:0x7f9219696c90
@children=[“people”]>]>]>]>, #<S:0x7f9219696240
@children=[#<Vp:0x7f9219696290 @children=[#<To:0x7f9219696b28
@children=[“to”]>, #<Vp:0x7f9219696330 @children=[#<Vp:0x7f9219696830
@children=[#<Vb:0x7f9219696a60 @children=[“sleep”]>, #<Pp:0x7f9219696880
@children=[#<In:0x7f92196969e8 @children=[“in”]>, #<Np:0x7f92196968d0
@children=[#<Prpdollar:0x7f9219696970 @children=[“their”]>,
#<Nns:0x7f92196968f8 @children=[“offices”]>]>]>]>, #<Cc:0x7f9219696740
@children=[“or”]>, #<Vp:0x7f92196963d0 @children=[#<Vb:0x7f92196966c8
@children=[“walk”]>, #<Np:0x7f9219696628 @children=[#<Nn:0x7f9219696650
@children=[“home”]>]>, #<Pp:0x7f9219696420
@children=[#<In:0x7f9219696588 @children=[“during”]>,
#<Np:0x7f9219696470 @children=[#<Dt:0x7f9219696510 @children=[“the”]>,
#<Nn:0x7f9219696498 @children=[“night”]>]>]>]>]>]>]>]>]>]>,
#<Comma:0x7f92196960b0 @children=[“,”]>, #<Np:0x7f9219696010
@children=[#<Nns:0x7f9219696038 @children=[“officials”]>]>,
#<Vp:0x7f9219695e80 @children=[#<Vbd:0x7f9219695f70 @children=[“said”]>,
#<NpTmp:0x7f9219695ed0 @children=[#<Nn:0x7f9219695ef8
@children=[“today”]>]>]>, #<Dot:0x7f9219695de0 @children=[“.”]>]>]>