:- use_module(library(sgml)). :- use_module(library(lists),[member/2,append/3,reverse/2]). test(F):- load_xml_file(F,T), % pretty_print(T,0), elements(T,['VNCLASS'],f(X,C)), value(X,'ID',ID), check(ID,C). verbnet2prolog(F):- load_xml_file(F,T), % pretty_print(T,0), elements(T,['VNCLASS'],f(X,C)), value(X,'ID',ID), members(C,F,[],ID). /* ---------------------------------------------------------------------- Pretty Printing XML ---------------------------------------------------------------------- */ pretty_print([],_). pretty_print([element(A,B,C)|L],Tab):- !, tab(Tab), write(A), write(' '), write(B), nl, NewTab is Tab+3, pretty_print(C,NewTab), pretty_print(L,Tab). pretty_print([E|L],Tab):- tab(Tab), write(unknown:E),nl, pretty_print(L,Tab). /* ---------------------------------------------------------------------- Checking Syntax ---------------------------------------------------------------------- */ check(ID,X):- elements(X,['FRAMES','FRAME'],f(_,Frame)), elements(Frame,['DESCRIPTION'],f(De,_)), value(De,primary,Primary), write(ID), write(': '), write(Primary),nl, fail. check(_,X):- elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)), value(Y,'ID',ID), check(ID,Sub), fail. check(_,_). /* ---------------------------------------------------------------------- Processing all members of a VerbNet class ---------------------------------------------------------------------- */ members(X,File,SuperFrames,XID):- findall(Name,(elements(X,['MEMBERS','MEMBER'],f(Member,_)),value(Member,name,Name)),Names), findall(Frame,(elements(X,['FRAMES','FRAME'],f(_,Frame))),SubFrames), append(SuperFrames,SubFrames,Frames), frameMember(Frames,Names,XID,File), findall(Sub,(elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)), value(Y,'ID',YID), members(Sub,File,Frames,YID)),_). /* ---------------------------------------------------------------------- Process a member/frame pair ---------------------------------------------------------------------- */ frameMember([],_,_,_):- !. frameMember([F|L],Names,ID,File):- pairMemberFrame(Names,F,ID,File), frameMember(L,Names,ID,File). pairMemberFrame([],_,_,_). pairMemberFrame([Name|L],Frame,ID,File):- elements(Frame,['DESCRIPTION'],f(De,_)), value(De,primary,Pr), elements(Frame,['SYNTAX'],f(_,Syntax)), format('verbnet(~q, ~q, ',[Name,Pr]), subcatpat(Syntax,[],SubCatPat), format('~q, ',[SubCatPat]), subcat(Syntax,[],SubCat), atom_chars(ID,IDChars), formatID(IDChars,[_,_|FID]), format('~q,~q). %%% ~p (~p)~n',[SubCat,FID,ID,File]), !, pairMemberFrame(L,Frame,ID,File). /* ---------------------------------------------------------------------- Format VerbNet ID ---------------------------------------------------------------------- */ formatID(Chars,[Pre,Sep1|L]):- Seps = ['-','.'], member(Sep1,Seps), append(PreChars,[Sep1|RestChars],Chars), \+ ( member(Sep2,Seps), member(Sep2,PreChars) ), !, formatNumber(PreChars,Pre), formatID(RestChars,L). formatID(Chars,[ID]):- formatNumber(Chars,ID). formatNumber(Chars,Num):- Chars = [First|_], member(First,['0','1','2','3','4','5','6','7','8','9']), !, number_chars(Num,Chars). formatNumber(Chars,Atom):- atom_chars(Atom,Chars). /* ---------------------------------------------------------------------- Printing the subcat frame ---------------------------------------------------------------------- */ subcat([],Acc1,Acc2):- postproc(Acc1,[],Acc2). subcat([E|L],Acc1,Acc3):- cat(E,Acc1,Acc2), subcat(L,Acc2,Acc3). subcatpat([],Acc1,Acc2):- postproc(Acc1,[],Acc2). subcatpat([E|L],Acc1,Acc3):- catpat(E,Acc1,Acc2), subcatpat(L,Acc2,Acc3). /* ---------------------------------------------------------------------- Post Processing (reverse + rewriting) ---------------------------------------------------------------------- */ postproc([],L,L). postproc([np,pp|L1],Acc,L2):- !, postproc(L1,[pp|Acc],L2). postproc([np:V,pp|L1],Acc,L2):- !, postproc(L1,[pp:V|Acc],L2). postproc([s,pp|L1],Acc,L2):- !, postproc(L1,[s|Acc],L2). postproc([s:V,pp|L1],Acc,L2):- !, postproc(L1,[s:V|Acc],L2). postproc([X|L1],Acc,L2):- postproc(L1,[X|Acc],L2). /* ---------------------------------------------------------------------- Syntactic Restrictions ---------------------------------------------------------------------- */ restr(Restr,Type):- Restr = [element('SYNRESTRS',[],L)], member(element('SYNRESTR',['Value'='+',type=Type],[]),L), !. s_restr(that_comp). s_restr(for_comp). s_restr(wh_comp). % s_restr(poss_ing). % not sentence! s_restr(acc_ing). s_restr(oc_ing). s_restr(ac_ing). s_restr(be_sc_ing). s_restr(np_omit_ing). % ??? s_restr(np_ppart). % ??? s_restr(np_p_ing). % ??? s_restr(np_ing). % ??? s_restr(how_extract). s_restr(what_extract). s_restr(wh_inf). s_restr(what_inf). s_restr(wheth_inf). s_restr(oc_bare_inf). s_restr(oc_to_inf). s_restr(ac_to_inf). s_restr(sc_to_inf). s_restr(np_to_inf). s_restr(vc_to_inf). s_restr(rs_to_inf). % very rare -- bug? s_restr(to_inf_rs). % very rare -- bug? /* ---------------------------------------------------------------------- Printing a category ---------------------------------------------------------------------- */ cat(element('NP', [value=Value], R),A,[s:Value|A]):- s_restr(S), restr(R,S), !. cat(element('NP', [value=Value], _),A,[np:Value|A]):- !. cat(element('PREP', [], _),A,[pp|A]):- !. cat(element('PREP', [value=Value], _),A,[prep:Value|A]):- !. cat(element('LEX', [value='[+be]'], _),A,[lex:be|A]):- !. cat(element('LEX', [value='it[+be]'], _),A,[lex:be,lex:it|A]):- !. cat(element('LEX', [value=at], _),A,[prep:at|A]):- !. cat(element('LEX', [value=of], _),A,[prep:of|A]):- !. cat(element('LEX', [value=Value], _),A,[lex:Value|A]):- !. cat(element('VERB',[],[]),A,[v|A]):- !. cat(element('ADJ',[],[]),A,[adj|A]):- !. cat(element('ADV',[],[]),A,[adv|A]):- !. cat(U,A,[unk:U|A]):- !. catpat(element('NP',_,R),A,[s|A]):- s_restr(S), restr(R,S), !. catpat(element('NP',_,_),A,[np|A]):- !. catpat(element('PREP', [], _),A,[pp|A]):- !. catpat(element('PREP', [value=_], _),A,[prep|A]):- !. catpat(element('LEX',[value=at],_),A,[prep|A]):- !. catpat(element('LEX',[value=of],_),A,[prep|A]):- !. catpat(element('LEX',_,_),A,[lex|A]):- !. catpat(element('VERB',_,_),A,[v|A]):- !. catpat(element('ADJ',_,_),A,[adj|A]):- !. catpat(element('ADV',_,_),A,[adv|A]):- !. catpat(_,A,[unk|A]):- !. /* ---------------------------------------------------------------------- Processing elements of the XML tree ---------------------------------------------------------------------- */ elements([element(X,F,L)|_],[X],f(F,L)). elements([element(X,_,L)|_],[X|R],A):- elements(L,R,A). elements([_|L],X,A):- elements(L,X,A). /* ---------------------------------------------------------------------- Accessing a value ---------------------------------------------------------------------- */ value([Name=Value|_],Name,Value):- !. value([_|L],Name,Value):- value(L,Name,Value). /* ---------------------------------------------------------------------- VerbNet Directory ---------------------------------------------------------------------- */ verbnet_dir('ext/VerbNet/'). /* ---------------------------------------------------------------------- Processing all XML files ---------------------------------------------------------------------- */ process([]). process([File|L]):- % test(File), verbnet2prolog(File), process(L). /* ---------------------------------------------------------------------- Start Predicate ---------------------------------------------------------------------- */ run:- verbnet_dir(Dir), exists_directory(Dir), WildCard = '*.xml', % WildCard = 'put-9.1.xml', atom_concat(Dir,WildCard,Expand), expand_file_name(Expand,Files), write(':- dynamic verbnet/5.'), nl, process(Files), halt. :- run.