1% This file is part of the Attempto Parsing Engine (APE). 2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 3% 4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it 5% under the terms of the GNU Lesser General Public License as published by the Free Software 6% Foundation, either version 3 of the License, or (at your option) any later version. 7% 8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT 9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10% PURPOSE. See the GNU Lesser General Public License for more details. 11% 12% You should have received a copy of the GNU Lesser General Public License along with the Attempto 13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. 14 15 16:- module(ape, [ 17 get_ape_results_timelimit/3, 18 get_ape_results_timelimit/4, 19 get_ape_results/2, 20 get_ape_results/3 21 ]).
52% Default encoding used for opening files in text mode. 53:- set_prolog_flag(encoding, utf8). 54 55% Note: The following line fixes the testcase: 3.1415926536 approximates Pi. 56% Note that using 'f' instead of 'g' does not drop the trailing zeros. 57% @bug: sometimes weird digits are attached to the end. 58:- set_prolog_flag(float_format, '%.11g'). 59 60% Richard A. O'Keefe: to see full precision for IEEE doubles, do 61% :- set_prolog_flag(float_format, '%.18g'). 62% So it seems that values above 18 do not make sense. 63 64:- assert(user:file_search_path(ape, '.')). 65:- prolog_load_context(directory, Dir), 66 asserta(user:file_search_path(ape, Dir)). 67 68 69:- use_module(ape('utils/morphgen'), [ 70 acesentencelist_pp/2 71 ]). 72 73:- use_module(ape('utils/ace_niceace'), [ 74 tokens_to_sentences/2 75 ]). 76 77:- use_module(ape('utils/drs_to_xml')). 78:- use_module(ape('utils/drs_to_fol_to_prenex')). 79:- use_module(ape('utils/drs_to_ascii')). 80:- use_module(ape('utils/drs_to_ace')). 81:- use_module(ape('utils/drs_to_coreace')). 82:- use_module(ape('utils/drs_to_npace')). 83:- use_module(ape('utils/drs_to_html')). 84:- use_module(ape('utils/drs_to_ruleml')). 85:- use_module(ape('utils/tree_utils')). 86:- use_module(ape('utils/trees_to_ascii')). 87:- use_module(ape('utils/drs_to_tptp')). 88:- use_module(ape('lexicon/clex')). 89:- use_module(ape('lexicon/ulex')). 90:- use_module(ape('parser/ace_to_drs')). 91:- use_module(ape('logger/error_logger')). 92 93:- use_module(ape('utils/xmlterm_to_xmlatom'), [ 94 xmlterm_to_xmlatom/2, 95 xmlterm_to_xmlatom/3 96 ]). 97 98:- use_module(ape('utils/serialize_term'), [ 99 serialize_term_into_atom/2 100 ]). 101 102:- use_module(ape('utils/owlswrl/get_owl_output'), [ 103 get_owl_output/6 104 ]).
119get_ape_results_timelimit(Input, Content, TimeLimit) :- 120 get_ape_results_timelimit(Input, _ContentType, Content, TimeLimit). 121 122get_ape_results_timelimit(Input, ContentType, Content, TimeLimit) :- 123 catch( 124 call_with_time_limit( 125 TimeLimit, 126 get_ape_results(Input, ContentType, Content) 127 ), 128 CatchType, 129 catchtype_errormessage(CatchType, ContentType, Content) 130 ).
145catchtype_errormessage( 146 time_limit_exceeded, 147 'text/xml', 148 '<apeResult><messages><message importance="error" type="ws" sentence="" token="" value="time_limit_exceeded" 149repair="Split the text into smaller parts and parse them separately."/></messages></apeResult>' 150 ) :- !. 151 152catchtype_errormessage(CatchType, 'text/xml', ErrorMessage) :- 153 format(atom(CatchTypeAsAtom), "~w", [CatchType]), 154 xmlterm_to_xmlatom(element(apeResult, [], [ 155 element(messages, [], [ 156 element(message, [ 157 importance='error', 158 type='ws', 159 sentence='', 160 token='', 161 value=CatchTypeAsAtom, 162 repair='Fatal error. Please send screenshot to APE developers.' 163 ], []) 164 ]) 165 ]), ErrorMessage).
175get_ape_results(Input, Content) :- 176 get_ape_results(Input, _ContentType, Content). 177 178get_ape_results(Input, ContentType, Content) :- 179 clear_messages, 180 init_clex(Input), 181 load_ulex(Input), 182 get_value(Input, text, ACEText), 183 get_value(Input, guess, GuessOnOff), 184 acetext_to_drs(ACEText, GuessOnOff, on, Tokens, Syntax, Drs, _Messages, [DT, DP, DR]), 185 get_value(Input, uri, Uri, 'http://attempto.ifi.uzh.ch/ontologies/owlswrl/test'), 186 TempResult = [ 187 time=[DT, DP, DR], 188 acetext=ACEText, 189 tokens=Tokens, 190 drs=Drs, 191 syntax=Syntax, 192 uri=Uri 193 ], 194 get_content(Input, TempResult, ContentType, Content), 195 !. 196 197% @bug: This is sometimes called, but we should know why. 198% It must be here, just to catch errors. 199get_ape_results(_, 'text/plain', '').
206output_type(input). 207output_type(tokens). 208output_type(sentences). 209output_type(drs). 210output_type(drsrt). 211output_type(syntax). 212output_type(syntaxpp). 213output_type(syntaxd). 214output_type(syntaxdpp). 215output_type(drspp). 216output_type(drsxml). 217output_type(drshtml). 218output_type(paraphrase). 219output_type(paraphrase1). 220output_type(paraphrase2). 221output_type(owlrdf). 222output_type(owlfss). 223output_type(owlfsspp). 224output_type(owlxml). 225output_type(ruleml). 226output_type(fol). 227output_type(pnf). 228output_type(tptp).
234get_content(Input, TempResult, ContentType, Content) :- 235 member(solo=SoloType, Input), 236 !, 237 get_solo_content(SoloType, TempResult, ContentType, Content). 238 239get_content(Input, TempResult, ContentType, Content) :- 240 get_multi_content(Input, TempResult, ContentType, Content).
250% If there are APE error messages 251% then we print the messages and not the solo output. 252% Note: we do not care about the warning messages. 253get_solo_content(_, _TempResult, 'text/xml', Content) :- 254 get_error_messages([M | ErrorMessages]), 255 !, 256 messages_xmlmessages([M | ErrorMessages], XmlErrorMessages), 257 xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content). 258 259get_solo_content(SoloType, TempResult, ContentType, Content) :- 260 output_type(SoloType), 261 get_output(SoloType, TempResult, OutputContentType, OutputContent), 262 ( 263 get_error_messages([M | ErrorMessages]) 264 -> 265 messages_xmlmessages([M | ErrorMessages], XmlErrorMessages), 266 xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content), 267 ContentType = 'text/xml' 268 ; 269 Content = OutputContent, 270 ContentType = OutputContentType 271 ), 272 !. 273 274get_solo_content(SoloType, _, 'text/plain', 'ERROR: Unexpected error.') :- 275 output_type(SoloType), 276 !. 277 278get_solo_content(_, _, 'text/plain', 'ERROR: Wrong solo type.').
287get_multi_content(Input, TempResult, 'text/xml', Content) :-
288 get_value(TempResult, time, [DTokenizer, DParser, DRefres]),
289 with_output_to(atom(DT), format("~3f", [DTokenizer])),
290 with_output_to(atom(DP), format("~3f", [DParser])),
291 with_output_to(atom(DR), format("~3f", [DRefres])),
292 findall(OutputElement, get_multi_output_element(Input, TempResult, OutputElement), OutputElements),
293 get_messages_in_xml(Messages),
294 append([element(duration, [tokenizer=DT, parser=DP, refres=DR], [])|OutputElements], [element(messages, [], Messages)], Elements),
295 xmlterm_to_xmlatom(element(apeResult, [], Elements), [header(true)], Content).
In case an exception is thrown during the generation of the output, then an empty atom is returned as the output.
306get_multi_output_element(Input, TempResult, element(Type, [], [Output])) :-
307 output_type(Type),
308 atom_concat('c', Type, Key),
309 memberchk(Key=on, Input),
310 catch(
311 get_output(Type, TempResult, _, Output),
312 _Catcher,
313 Output = ''
314 ).
321get_output(input, TempResult, 'text/plain', Output) :- 322 get_value(TempResult, acetext, Output), 323 !. 324 325get_output(tokens, TempResult, 'text/plain', Output) :- 326 get_value(TempResult, tokens, Tokens), 327 serialize_term_into_atom(Tokens, Output), 328 !. 329 330get_output(sentences, TempResult, 'text/plain', Output) :- 331 get_value(TempResult, tokens, Tokens), 332 tokens_to_sentences(Tokens, Sentences), 333 serialize_term_into_atom(Sentences, Output), 334 !. 335 336get_output(syntax, TempResult, 'text/plain', Output) :- 337 get_value(TempResult, syntax, Syntax1), 338 unsplit_pronouns_in_tree(Syntax1, Syntax2), 339 remove_gaps_in_tree(Syntax2, Syntax3), 340 unify_coords_in_tree(Syntax3, Syntax), 341 serialize_term_into_atom(Syntax, Output), 342 !. 343 344get_output(syntaxpp, TempResult, 'text/plain', Output) :- 345 get_value(TempResult, syntax, Syntax1), 346 unsplit_pronouns_in_tree(Syntax1, Syntax2), 347 remove_gaps_in_tree(Syntax2, Syntax3), 348 unify_coords_in_tree(Syntax3, Syntax), 349 trees_to_ascii:trees_to_ascii(Syntax, Output), 350 !. 351 352get_output(syntaxd, TempResult, 'text/plain', Output) :- 353 get_value(TempResult, syntax, Syntax), 354 serialize_term_into_atom(Syntax, Output), 355 !. 356 357get_output(syntaxdpp, TempResult, 'text/plain', Output) :- 358 get_value(TempResult, syntax, Syntax), 359 trees_to_ascii:trees_to_ascii(Syntax, Output), 360 !. 361 362get_output(fol, TempResult, 'text/plain', Output) :- 363 get_value(TempResult, drs, Drs), 364 drs_fol_pnf:drs_fol(Drs, Fol), 365 serialize_term_into_atom(Fol, Output), 366 !. 367 368get_output(pnf, TempResult, 'text/plain', Output) :- 369 get_value(TempResult, drs, Drs), 370 drs_fol_pnf:drs_pnf(Drs, Pnf), 371 serialize_term_into_atom(Pnf, Output), 372 !. 373 374get_output(paraphrase, TempResult, 'text/plain', Output) :- 375 get_value(TempResult, drs, Drs), 376 drs_to_ace:drs_to_ace(Drs, Sentences), 377 acesentencelist_pp(Sentences, Output), 378 !. 379 380get_output(paraphrase1, TempResult, 'text/plain', Output) :- 381 get_value(TempResult, drs, Drs), 382 drs_to_coreace:bigdrs_to_coreace(Drs, Sentences), 383 acesentencelist_pp(Sentences, Output), 384 !. 385 386get_output(paraphrase2, TempResult, 'text/plain', Output) :- 387 get_value(TempResult, drs, Drs), 388 drs_to_npace:drs_to_npace(Drs, Sentences), 389 acesentencelist_pp(Sentences, Output), 390 !. 391 392get_output(ruleml, TempResult, 'text/xml', Output) :- 393 get_value(TempResult, drs, Drs), 394 drs_to_ruleml:drs_to_ruleml(Drs, Ruleml), 395 xmlterm_to_xmlatom([Ruleml], Output), 396 !. 397 398get_output(drs, TempResult, 'text/plain', Output) :- 399 get_value(TempResult, drs, Drs), 400 serialize_term_into_atom(Drs, Output), 401 !. 402 403get_output(drspp, TempResult, 'text/plain', Output) :- 404 get_value(TempResult, drs, Drs), 405 drs_to_ascii:drs_to_ascii(Drs, Output), 406 !. 407 408get_output(tptp, TempResult, 'text/plain', Output) :- 409 get_value(TempResult, drs, Drs), 410 drs_to_tptp:drs_to_tptplist(Drs, TptpList), 411 with_output_to(atom(Output), tptplist_pp(TptpList)), 412 !. 413 414get_output(drsxml, TempResult, 'text/xml', Output) :- 415 get_value(TempResult, drs, Drs), 416 drs_to_xmlatom(Drs, Output), 417 !. 418 419get_output(drshtml, TempResult, 'text/xml', Output) :- 420 get_value(TempResult, drs, Drs), 421 drs_to_html:drs_to_html(Drs, Output), 422 !. 423 424get_output(OwlOutputType, TempResult, ContentType, Output) :- 425 get_value(TempResult, acetext, AceText), 426 get_value(TempResult, drs, Drs), 427 get_value(TempResult, uri, Uri), 428 get_owl_output(OwlOutputType, AceText, Drs, Uri, ContentType, Output), 429 !.
435init_clex(Input) :- 436 get_value(Input, noclex, on), 437 !, 438 set_clex_switch(off). 439 440init_clex(_) :- 441 set_clex_switch(on).
447load_ulex(Input) :- 448 get_value(Input, ulextext, Ulex), 449 !, 450 discard_ulex, 451 setup_call_cleanup( 452 open_string(Ulex, UlexStream), 453 read_ulex(UlexStream), 454 close(UlexStream)). 455 456load_ulex(_).
469get_value(Map, Key, Value, _Default) :- 470 memberchk(Key=Value, Map), 471 !. 472 473get_value(_, _, Default, Default).
480get_value(Map, Key, Value) :-
481 get_value(Map, Key, Value, '')
Interface for the ACE tools (ACE parser, DRS verbalizer, ...)
Usage with multiple results returned (i.e. multi-mode):
Usage with a single result returned (i.e. solo-mode):