1% This file is part of the Attempto Parsing Engine (APE). 2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 3% 4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it 5% under the terms of the GNU Lesser General Public License as published by the Free Software 6% Foundation, either version 3 of the License, or (at your option) any later version. 7% 8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT 9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10% PURPOSE. See the GNU Lesser General Public License for more details. 11% 12% You should have received a copy of the GNU Lesser General Public License along with the Attempto 13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. 14 15 16:- module(ace_to_drs, [ 17 acetext_to_drs/5, % +Text, -Sentences, -SyntaxTrees, -Drs, -Messages 18 acetext_to_drs/8, % +Text, +Guess, +Catch, -Sentences, -SyntaxTrees, -Drs, -Messages, -Time 19 aceparagraph_to_drs/6, % +Text, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages 20 aceparagraph_to_drs/10 % +Text, +Guess, +Catch, +StartID, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages, -Time 21 ]).
31:- use_module('../logger/error_logger'). 32 33:- use_module('../utils/drs_reverse', [ 34 drs_reverse/2 35 ]). 36 37:- use_module(ape_utils, [ 38 cpu_time/2, 39 handle_unknown_words/4 40 ]). 41 42:- use_module(tokenizer). 43 44:- use_module(tokens_to_sentences, [ 45 tokens_to_sentences/2, 46 tokens_to_paragraphs/2 47 ]). 48 49:- style_check(-singleton). 50:- use_module(refres, [ 51 resolve_anaphors/2 52 ]). 53:- style_check(+singleton). 54 55:- style_check(-discontiguous). 56:- style_check(-singleton). 57:- use_module('grammar.plp', [parse/4]). 58:- style_check(+discontiguous). 59:- style_check(+singleton). 60 61%:- debug(result).
Examples:
acetext_to_drs('Every man waits.', Sentences, SyntaxTrees, Drs, Messages) acetext_to_drs('Every man waits.', on, off, Sentences, SyntaxTrees, Drs, Messages, Time)
86acetext_to_drs(Text, Sentences, SyntaxTrees, Drs, Messages) :- 87 acetext_to_drs(Text, off, off, Sentences, SyntaxTrees, Drs, Messages, _). 88 89acetext_to_drs(Text, Guess, Catch, Sentences, SyntaxTrees, Drs, Messages, [TimeT, TimeP, TimeR]) :- 90 cpu_time(tokenizer:tokenize(Text, Tokens), T), 91 tokens_to_paragraphs(Tokens, Paragraphs), 92 paragraphs_to_drs(Paragraphs, Guess, Catch, 1, Sentences, SyntaxTrees, Drs, Messages, [TimeTPre, TimeP, TimeR]), 93 clear_messages, 94 add_messages(Messages), 95 TimeT is T + TimeTPre, 96 !.
Examples:
aceparagraph_to_drs('Every man waits.', Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages) aceparagraph_to_drs('Every man waits.', on, off, 1, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages, Time)
123aceparagraph_to_drs(Text, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages) :- 124 aceparagraph_to_drs(Text, off, off, 1, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages, _). 125 126aceparagraph_to_drs(Text, Guess, Catch, StartID, Sentences, SyntaxTrees, UnresolvedDrsCopy, Drs, Messages, Time) :- 127 Time = [DTokenizer, DParse, DRefres], 128 clear_ape_messages, 129 catch( 130 ( 131 cpu_time(ace_to_drs:call_tokenizer(Text, Guess, Sentences, SentencesToParse), DTokenizer), 132 cpu_time(ace_to_drs:call_parser(SentencesToParse, StartID, SyntaxTrees, UnresolvedDrs), DParse), 133 ( 134 UnresolvedDrsCopy \== off 135 -> 136 copy_term(UnresolvedDrs, UnresolvedDrsCopy) 137 ; true), 138 cpu_time(ignore(refres:resolve_anaphors(UnresolvedDrs, DrsTmp)), DRefres), 139 (var(SyntaxTrees) -> SyntaxTrees = [] ; true), 140 (var(DrsTmp) -> Drs = drs([], []) ; true), 141 (is_error_message(_, _, _, _) -> Drs = drs([], []) ; Drs = DrsTmp), 142 ! 143 ), 144 CatchType, 145 ( 146 ( 147 Catch == on 148 -> 149 Sentences = [], 150 SyntaxTrees = [], 151 Drs = drs([], []), 152 DTokenizer = -1, 153 DParse = -1, 154 DRefres = -1, 155 add_error_message(ape, '', '', 'Fatal error. Please send screenshot to APE developers.') 156 ; 157 throw(CatchType) 158 ) 159 ) 160 ), 161 get_messages(Messages), 162 !. 163 164 165% Note: should not fail. 166% If sentence splitting fails then the problem must have been that there 167% was no sentence end symbol. In this case we return the original 168% token list. 169call_tokenizer(Text, GuessOnOff, SentencesOutput, SentencesToParse) :- 170 ( 171 is_list(Text) 172 -> 173 Tokens = Text 174 ; 175 tokenizer:tokenize(Text, Tokens) 176 ), 177 ( 178 tokens_to_sentences:tokens_to_sentences(Tokens, SentencesTmp) 179 -> 180 ape_utils:handle_unknown_words(GuessOnOff, SentencesTmp, SentencesOutput, 1), 181 SentencesToParse = SentencesOutput 182 ; 183 SentencesOutput = [Tokens], 184 SentencesToParse = [], 185 last(Tokens, LastToken), 186 add_error_message(sentence, '', LastToken, 'Every ACE text must end with . or ? or !.') 187 ). 188 189call_parser(Sentences, StartID, Syntaxtrees, DrsReversed) :- 190 ignore(grammar:parse(Sentences, StartID, Syntaxtrees, Drs)), 191 ignore(drs_reverse:drs_reverse(Drs, DrsReversed)).
196paragraphs_to_drs([], _, _, _, [], [], drs([],[]), [], [0,0,0]) :- 197 !. 198 199paragraphs_to_drs([P|Paragraphs], Guess, Catch, StartID, Sentences, Trees, drs(Dom,Conds), Messages, [TimeT, TimeP, TimeR]) :- 200 aceparagraph_to_drs(P, Guess, Catch, StartID, S, T, off, drs(D,C), M, [TT, TP, TR]), 201 length(S, SentenceCount), 202 NewStartID is StartID + SentenceCount, 203 ( 204 is_error_message(_, _, _, _) 205 -> 206 Sentences = S, 207 Trees = T, 208 Dom = D, 209 Conds = C, 210 Messages = M, 211 TimeT = TT, 212 TimeP = TP, 213 TimeR = TR 214 ; 215 paragraphs_to_drs(Paragraphs, Guess, Catch, NewStartID, SentencesR, TreesR, drs(DomR, CondsR), MessagesR, [TimeTR, TimePR, TimeRR]), 216 append(S, SentencesR, Sentences), 217 append(T, TreesR, Trees), 218 append(D, DomR, Dom), 219 append(C, CondsR, Conds), 220 append(M, MessagesR, Messages), 221 TimeT is TT + TimeTR, 222 TimeP is TP + TimePR, 223 TimeR is TR + TimeRR 224 ), 225 !. 226 227 228clear_ape_messages :- 229 clear_messages(character), 230 clear_messages(word), 231 clear_messages(sentence), 232 clear_messages(anaphor), 233 clear_messages(pronoun)
ACE to DRS