1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(ape, [
   17		get_ape_results_timelimit/3,
   18		get_ape_results_timelimit/4,
   19		get_ape_results/2,
   20		get_ape_results/3
   21	]).

Interface for the ACE tools (ACE parser, DRS verbalizer, ...)

author

- Kaarel Kaljurand

- Tobias Kuhn

version

- 2009-05-13

Usage with multiple results returned (i.e. multi-mode):

get_ape_results([text='Every man waits.', cparaphrase1=on], ContentType, Content).
get_ape_results([text='Every man waits.', cparaphrase=on, cparaphrase1=on], ContentType, Content).
get_ape_results([text='A man waits.', cinput=on, cdrs=on, cdrspp=on, cparaphrase=on, cparaphrase1=on, cparaphrase2=on, ctokens=on, csyntax=on, csyntaxpp=on, cfol=on], ContentType, Content).

Usage with a single result returned (i.e. solo-mode):

get_ape_results([text='A man sees a dog.', solo=drs], ContentType, Content).
get_ape_results([text='Every man owns a dog.', solo=owlrdf], ContentType, Content).
get_ape_results([text='Peeter likes Mary.', solo=owlfss], ContentType, Content).

To be done

- : provide all outputs (1) as serialized Prolog terms, (2) as pretty-printed terms, (3) as XML, JSON, HTML, etc. */

   52% Default encoding used for opening files in text mode.
   53:- set_prolog_flag(encoding, utf8).   54
   55% Note: The following line fixes the testcase: 3.1415926536 approximates Pi.
   56% Note that using 'f' instead of 'g' does not drop the trailing zeros.
   57% @bug: sometimes weird digits are attached to the end.
   58:- set_prolog_flag(float_format, '%.11g').   59
   60% Richard A. O'Keefe: to see full precision for IEEE doubles, do
   61% :- set_prolog_flag(float_format, '%.18g').
   62% So it seems that values above 18 do not make sense.
   63
   64:- assert(user:file_search_path(ape, '.')).   65:- prolog_load_context(directory, Dir),
   66	asserta(user:file_search_path(ape, Dir)).   67
   68
   69:- use_module(ape('utils/morphgen'), [
   70		acesentencelist_pp/2
   71	]).   72
   73:- use_module(ape('utils/ace_niceace'), [
   74		tokens_to_sentences/2
   75	]).   76
   77:- use_module(ape('utils/drs_to_xml')).   78:- use_module(ape('utils/drs_to_fol_to_prenex')).   79:- use_module(ape('utils/drs_to_ascii')).   80:- use_module(ape('utils/drs_to_ace')).   81:- use_module(ape('utils/drs_to_coreace')).   82:- use_module(ape('utils/drs_to_npace')).   83:- use_module(ape('utils/drs_to_html')).   84:- use_module(ape('utils/drs_to_ruleml')).   85:- use_module(ape('utils/tree_utils')).   86:- use_module(ape('utils/trees_to_ascii')).   87:- use_module(ape('utils/drs_to_tptp')).   88:- use_module(ape('lexicon/clex')).   89:- use_module(ape('lexicon/ulex')).   90:- use_module(ape('parser/ace_to_drs')).   91:- use_module(ape('logger/error_logger')).   92
   93:- use_module(ape('utils/xmlterm_to_xmlatom'), [
   94		xmlterm_to_xmlatom/2,
   95		xmlterm_to_xmlatom/3
   96	]).   97
   98:- use_module(ape('utils/serialize_term'), [
   99		serialize_term_into_atom/2
  100	]).  101
  102:- use_module(ape('utils/owlswrl/get_owl_output'), [
  103		get_owl_output/6
  104	]).

get_ape_results_timelimit(+Input:list, -Content:atom, +TimeLimit) is det

get_ape_results_timelimit(+Input:list, -ContentType:atom, -Content:atom, +TimeLimit) is det

There is call_with_time_limit(+Time, :Goal) defined in library(time), part of clib package. On timeout this throws the exception time_limit_exceeded. But we catch other exceptions as well...

Arguments:

`Input`	- is a list of input parameters of the form Key=Value
`ContentType`	- is one of {text/plain, text/xml}
`Content`	- is the returned result
`TimeLimit`	- the timelimit in seconds

  119get_ape_results_timelimit(Input, Content, TimeLimit) :-
  120	get_ape_results_timelimit(Input, _ContentType, Content, TimeLimit).
  121
  122get_ape_results_timelimit(Input, ContentType, Content, TimeLimit) :-
  123	catch(
  124		call_with_time_limit(
  125			TimeLimit,
  126			get_ape_results(Input, ContentType, Content)
  127		),
  128		CatchType,
  129		catchtype_errormessage(CatchType, ContentType, Content)
  130	).

catchtype_errormessage(+CatchType:atom, -ContentType:atom, -ErrorMessage:atom) is det

Returns an error message as XML. The error message is set by catch/3, this predicate just formats the message. This is a very toplevel error message indicating either that the time limit was exceeded or that there were resource errors (out of stack, etc.) or programmer errors (undefined predicate called, etc.).

Arguments:

`CatchType`	- is one of {time_limit_exceeded, ...}
`ContentType`	- is always 'text/xml'
`ErrorMessage`	- is an end-user-level message that corresponds to the `CatchType`

  145catchtype_errormessage(
  146	time_limit_exceeded,
  147	'text/xml',
  148	'<apeResult><messages><message importance="error" type="ws" sentence="" token="" value="time_limit_exceeded"
  149repair="Split the text into smaller parts and parse them separately."/></messages></apeResult>'
  150	) :- !.
  151
  152catchtype_errormessage(CatchType, 'text/xml', ErrorMessage) :-
  153	format(atom(CatchTypeAsAtom), "~w", [CatchType]),
  154	xmlterm_to_xmlatom(element(apeResult, [], [
  155			element(messages, [], [
  156				element(message, [
  157					importance='error',
  158					type='ws',
  159					sentence='',
  160					token='',
  161					value=CatchTypeAsAtom,
  162					repair='Fatal error. Please send screenshot to APE developers.'
  163					], [])
  164			])
  165		]), ErrorMessage).

get_ape_results(+Input:list, -Content:atom) is det

get_ape_results(+Input:list, -ContentType:atom, -Content:atom) is det

Arguments:

`Input`	- is a list of input parameters of the form Key=Value
`ContentType`	- is one of {text/plain, text/xml}
`Content`	- is the returned XML

  175get_ape_results(Input, Content) :-
  176	get_ape_results(Input, _ContentType, Content).
  177
  178get_ape_results(Input, ContentType, Content) :-
  179	clear_messages,
  180	init_clex(Input),
  181	load_ulex(Input),
  182	get_value(Input, text, ACEText),
  183	get_value(Input, guess, GuessOnOff),
  184	acetext_to_drs(ACEText, GuessOnOff, on, Tokens, Syntax, Drs, _Messages, [DT, DP, DR]),
  185	get_value(Input, uri, Uri, 'http://attempto.ifi.uzh.ch/ontologies/owlswrl/test'),
  186	TempResult = [
  187			time=[DT, DP, DR],
  188			acetext=ACEText,
  189			tokens=Tokens,
  190			drs=Drs,
  191			syntax=Syntax,
  192			uri=Uri
  193		],
  194	get_content(Input, TempResult, ContentType, Content),
  195	!.
  196
  197% @bug: This is sometimes called, but we should know why.
  198% It must be here, just to catch errors.
  199get_ape_results(_, 'text/plain', '').

output_type(?Type:atom): This predicate defines the supported output types and the order of the outputs for the multi mode.

  206output_type(input).
  207output_type(tokens).
  208output_type(sentences).
  209output_type(drs).
  210output_type(drsrt).
  211output_type(syntax).
  212output_type(syntaxpp).
  213output_type(syntaxd).
  214output_type(syntaxdpp).
  215output_type(drspp).
  216output_type(drsxml).
  217output_type(drshtml).
  218output_type(paraphrase).
  219output_type(paraphrase1).
  220output_type(paraphrase2).
  221output_type(owlrdf).
  222output_type(owlfss).
  223output_type(owlfsspp).
  224output_type(owlxml).
  225output_type(ruleml).
  226output_type(fol).
  227output_type(pnf).
  228output_type(tptp).

get_content(+Input:list, +TempResult:list, -ContentType:atom, -Content:atom) is det

  234get_content(Input, TempResult, ContentType, Content) :-
  235    member(solo=SoloType, Input),
  236	!,
  237	get_solo_content(SoloType, TempResult, ContentType, Content).
  238
  239get_content(Input, TempResult, ContentType, Content) :-
  240	get_multi_content(Input, TempResult, ContentType, Content).

get_solo_content(+SoloType:atom, +TempResult:list, -ContentType:atom, -Content:atom) is det

Arguments:

`SoloType`	- defines which output should be returned
`TempResult`	- is a list of outputs from the parser
`ContentType`	- is one of {text/xml, text/plain}
`Content`	- is the result (e.g. a syntax tree or a paraphrase) in XML or plain text

  250% If there are APE error messages
  251% then we print the messages and not the solo output.
  252% Note: we do not care about the warning messages.
  253get_solo_content(_, _TempResult, 'text/xml', Content) :-
  254	get_error_messages([M | ErrorMessages]),
  255	!,
  256	messages_xmlmessages([M | ErrorMessages], XmlErrorMessages),
  257	xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content).
  258
  259get_solo_content(SoloType, TempResult, ContentType, Content) :-
  260	output_type(SoloType),
  261	get_output(SoloType, TempResult, OutputContentType, OutputContent),
  262	(
  263		get_error_messages([M | ErrorMessages])
  264	->
  265		messages_xmlmessages([M | ErrorMessages], XmlErrorMessages),
  266		xmlterm_to_xmlatom(element(messages, [], XmlErrorMessages), Content),
  267		ContentType = 'text/xml'
  268	;
  269		Content = OutputContent,
  270		ContentType = OutputContentType
  271	),
  272	!.
  273
  274get_solo_content(SoloType, _, 'text/plain', 'ERROR: Unexpected error.') :-
  275	output_type(SoloType),
  276	!.
  277
  278get_solo_content(_, _, 'text/plain', 'ERROR: Wrong solo type.').

get_multi_content(+Input:list, +TempResult:list, -ContentType:atom, -Content:atom) is det

bug: - : we ignore the parser+refres messages, and get a fresh list of messages from the messages repository (it will include the parser+refres messages anyway).

  287get_multi_content(Input, TempResult, 'text/xml', Content) :-
  288    get_value(TempResult, time, [DTokenizer, DParser, DRefres]),
  289	with_output_to(atom(DT), format("~3f", [DTokenizer])),
  290	with_output_to(atom(DP), format("~3f", [DParser])),
  291	with_output_to(atom(DR), format("~3f", [DRefres])),
  292	findall(OutputElement, get_multi_output_element(Input, TempResult, OutputElement), OutputElements),
  293	get_messages_in_xml(Messages),
  294	append([element(duration, [tokenizer=DT, parser=DP, refres=DR], [])|OutputElements], [element(messages, [], Messages)], Elements),
  295	xmlterm_to_xmlatom(element(apeResult, [], Elements), [header(true)], Content).

get_multi_output_element(+Input:list, +TempResult:list, -Content:element): Returns one output element if it is required by the input in multi mode. It succeeds as many times as there are such multi typed outputs.
In case an exception is thrown during the generation of the output, then an empty atom is returned as the output.

  306get_multi_output_element(Input, TempResult, element(Type, [], [Output])) :-
  307	output_type(Type),
  308	atom_concat('c', Type, Key),
  309	memberchk(Key=on, Input),
  310	catch(
  311		get_output(Type, TempResult, _, Output),
  312		_Catcher,
  313		Output = ''
  314	).

get_output(+OutputType:atom, +TempResult:list, -ContentType:atom, -Content:atom): Returns the required output as an atom. This is used for both modes, solo and multi.

  321get_output(input, TempResult, 'text/plain', Output) :-
  322    get_value(TempResult, acetext, Output),
  323    !.
  324
  325get_output(tokens, TempResult, 'text/plain', Output) :-
  326    get_value(TempResult, tokens, Tokens),
  327	serialize_term_into_atom(Tokens, Output),
  328    !.
  329
  330get_output(sentences, TempResult, 'text/plain', Output) :-
  331    get_value(TempResult, tokens, Tokens),
  332	tokens_to_sentences(Tokens, Sentences),
  333	serialize_term_into_atom(Sentences, Output),
  334    !.
  335
  336get_output(syntax, TempResult, 'text/plain', Output) :-
  337    get_value(TempResult, syntax, Syntax1),
  338	unsplit_pronouns_in_tree(Syntax1, Syntax2),
  339	remove_gaps_in_tree(Syntax2, Syntax3),
  340	unify_coords_in_tree(Syntax3, Syntax),
  341	serialize_term_into_atom(Syntax, Output),
  342    !.
  343
  344get_output(syntaxpp, TempResult, 'text/plain', Output) :-
  345    get_value(TempResult, syntax, Syntax1),
  346	unsplit_pronouns_in_tree(Syntax1, Syntax2),
  347	remove_gaps_in_tree(Syntax2, Syntax3),
  348	unify_coords_in_tree(Syntax3, Syntax),
  349	trees_to_ascii:trees_to_ascii(Syntax, Output),
  350    !.
  351
  352get_output(syntaxd, TempResult, 'text/plain', Output) :-
  353    get_value(TempResult, syntax, Syntax),
  354	serialize_term_into_atom(Syntax, Output),
  355    !.
  356
  357get_output(syntaxdpp, TempResult, 'text/plain', Output) :-
  358    get_value(TempResult, syntax, Syntax),
  359	trees_to_ascii:trees_to_ascii(Syntax, Output),
  360    !.
  361
  362get_output(fol, TempResult, 'text/plain', Output) :-
  363    get_value(TempResult, drs, Drs),
  364	drs_fol_pnf:drs_fol(Drs, Fol),
  365	serialize_term_into_atom(Fol, Output),
  366    !.
  367
  368get_output(pnf, TempResult, 'text/plain', Output) :-
  369    get_value(TempResult, drs, Drs),
  370	drs_fol_pnf:drs_pnf(Drs, Pnf),
  371	serialize_term_into_atom(Pnf, Output),
  372    !.
  373
  374get_output(paraphrase, TempResult, 'text/plain', Output) :-
  375    get_value(TempResult, drs, Drs),
  376	drs_to_ace:drs_to_ace(Drs, Sentences),
  377	acesentencelist_pp(Sentences, Output),
  378    !.
  379
  380get_output(paraphrase1, TempResult, 'text/plain', Output) :-
  381    get_value(TempResult, drs, Drs),
  382	drs_to_coreace:bigdrs_to_coreace(Drs, Sentences),
  383	acesentencelist_pp(Sentences, Output),
  384    !.
  385
  386get_output(paraphrase2, TempResult, 'text/plain', Output) :-
  387    get_value(TempResult, drs, Drs),
  388	drs_to_npace:drs_to_npace(Drs, Sentences),
  389	acesentencelist_pp(Sentences, Output),
  390    !.
  391
  392get_output(ruleml, TempResult, 'text/xml', Output) :-
  393    get_value(TempResult, drs, Drs),
  394	drs_to_ruleml:drs_to_ruleml(Drs, Ruleml),
  395	xmlterm_to_xmlatom([Ruleml], Output),
  396    !.
  397
  398get_output(drs, TempResult, 'text/plain', Output) :-
  399    get_value(TempResult, drs, Drs),
  400	serialize_term_into_atom(Drs, Output),
  401    !.
  402
  403get_output(drspp, TempResult, 'text/plain', Output) :-
  404    get_value(TempResult, drs, Drs),
  405	drs_to_ascii:drs_to_ascii(Drs, Output),
  406    !.
  407
  408get_output(tptp, TempResult, 'text/plain', Output) :-
  409	get_value(TempResult, drs, Drs),
  410	drs_to_tptp:drs_to_tptplist(Drs, TptpList),
  411	with_output_to(atom(Output), tptplist_pp(TptpList)),
  412    !.
  413
  414get_output(drsxml, TempResult, 'text/xml', Output) :-
  415    get_value(TempResult, drs, Drs),
  416	drs_to_xmlatom(Drs, Output),
  417    !.
  418
  419get_output(drshtml, TempResult, 'text/xml', Output) :-
  420    get_value(TempResult, drs, Drs),
  421	drs_to_html:drs_to_html(Drs, Output),
  422    !.
  423
  424get_output(OwlOutputType, TempResult, ContentType, Output) :-
  425	get_value(TempResult, acetext, AceText),
  426	get_value(TempResult, drs, Drs),
  427	get_value(TempResult, uri, Uri),
  428	get_owl_output(OwlOutputType, AceText, Drs, Uri, ContentType, Output),
  429	!.

init_clex(+Input:list) is det

  435init_clex(Input) :-
  436	get_value(Input, noclex, on),
  437	!,
  438	set_clex_switch(off).
  439
  440init_clex(_) :-
  441	set_clex_switch(on).

load_ulex(+Input:list) is det

  447load_ulex(Input) :-
  448	get_value(Input, ulextext, Ulex),
  449	!,
  450	discard_ulex,
  451    setup_call_cleanup(
  452	open_string(Ulex, UlexStream),
  453	read_ulex(UlexStream),
  454	close(UlexStream)).
  455
  456load_ulex(_).

get_value(+Map:list, +Key:atom, -Value:atom, +Default:atom) is det

Returns the value for a key from a map of key/value pairs. The default value is returned if the key is not found.

Arguments:

`Map`	- is a list key/value pairs of the form `Key`=`Value`
`Key`	- is the key to look for
`Value`	- is the value that is returned
`Default`	- is the default value for the case that the key is not found

  469get_value(Map, Key, Value, _Default) :-
  470	memberchk(Key=Value, Map),
  471	!.
  472
  473get_value(_, _, Default, Default).

get_value(+Map:list, +Key:atom, -Value:atom) is det: The same as get_value/4 with '' as default value

  480get_value(Map, Key, Value) :-
  481	get_value(Map, Key, Value, '')