1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(ace_to_drs, [
   17		acetext_to_drs/5,       % +Text, -Sentences, -SyntaxTrees, -Drs, -Messages
   18		acetext_to_drs/8,       % +Text, +Guess, +Catch, -Sentences, -SyntaxTrees, -Drs, -Messages, -Time
   19		aceparagraph_to_drs/6,  % +Text, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages
   20		aceparagraph_to_drs/10  % +Text, +Guess, +Catch, +StartID, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages, -Time
   21	]).

ACE to DRS

author: - Kaarel Kaljurand; - Tobias Kuhn
version: - 2009-05-21 */

   31:- use_module('../logger/error_logger').   32
   33:- use_module('../utils/drs_reverse', [
   34		drs_reverse/2
   35	]).   36
   37:- use_module(ape_utils, [
   38		cpu_time/2,
   39		handle_unknown_words/4
   40	]).   41
   42:- use_module(tokenizer).   43
   44:- use_module(tokens_to_sentences, [
   45		tokens_to_sentences/2,
   46		tokens_to_paragraphs/2
   47	]).   48
   49:- style_check(-singleton).   50:- use_module(refres, [
   51		resolve_anaphors/2
   52	]).   53:- style_check(+singleton).   54
   55:- style_check(-discontiguous).   56:- style_check(-singleton).   57:- use_module('grammar.plp', [parse/4]).   58:- style_check(+discontiguous).   59:- style_check(+singleton).   60
   61%:- debug(result).

acetext_to_drs(+Text, -Sentences, -SyntaxTrees, -Drs, -Messages)

acetext_to_drs(+Text, +Guess, +Catch, -Sentences, -SyntaxTrees, -Drs, -Messages, -Time)

Provides an interface to the parser for parsing an ACE text that consists of one or more paragraphs. These predicates will always succeed even if the parser fails.

Examples:

 acetext_to_drs('Every man waits.', Sentences, SyntaxTrees, Drs, Messages)
 acetext_to_drs('Every man waits.', on, off, Sentences, SyntaxTrees, Drs, Messages, Time)

Arguments:

`Text`	- is an ACE text
`Guess`	- is either 'on' or 'off' (default) and defines whether unknown word guessing should be used
`Catch`	- is either 'on' or 'off' (default) and defines whether unexpected errors should be catched and put to the list of messages or not
`Sentences`	- is the list of sentences (each a list of tokens) in the ACE text
`SyntaxTrees`	- is the list of syntax trees (each a complex list) of the ACE text
`Drs`	- is the DRS of the ACE text
`Messages`	- is the list of error and warning messages that result from the processing

   86acetext_to_drs(Text, Sentences, SyntaxTrees, Drs, Messages) :-
   87	acetext_to_drs(Text, off, off, Sentences, SyntaxTrees, Drs, Messages, _).
   88
   89acetext_to_drs(Text, Guess, Catch, Sentences, SyntaxTrees, Drs, Messages, [TimeT, TimeP, TimeR]) :-
   90	cpu_time(tokenizer:tokenize(Text, Tokens), T),
   91	tokens_to_paragraphs(Tokens, Paragraphs),
   92	paragraphs_to_drs(Paragraphs, Guess, Catch, 1, Sentences, SyntaxTrees, Drs, Messages, [TimeTPre, TimeP, TimeR]),
   93	clear_messages,
   94	add_messages(Messages),
   95	TimeT is T + TimeTPre,
   96	!.

aceparagraph_to_drs(+Text, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages)

aceparagraph_to_drs(+Text, +Guess, +Catch, +StartID, -Sentences, -SyntaxTrees, -UnresolvedDrs, -Drs, -Messages, -Time)

Provides an interface to the parser for parsing an ACE text that consists of one or more paragraphs. These predicates will always succeed even if the parser fails.

Examples:

 aceparagraph_to_drs('Every man waits.', Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages)
 aceparagraph_to_drs('Every man waits.', on, off, 1, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages, Time)

Arguments:

`Text`	- is an ACE text
`Guess`	- is either 'on' or 'off' (default) and defines whether unknown word guessing should be used
`Catch`	- is either 'on' or 'off' (default) and defines whether unexpected errors should be caught and put to the list of messages or not
`StartID`	- is the sentence id for the first sentence (default is 1)
`Sentences`	- is the list of sentences (each a list of tokens) in the ACE text
`SyntaxTrees`	- is the list of syntax trees (each a complex list) of the ACE text
`UnresolvedDrs`	- is the DRS of the ACE text with anaphoric references unresolved (pre-refres DRS)
`Drs`	- is the DRS of the ACE text
`Messages`	- is the list of error and warning messages that result from the processing

  123aceparagraph_to_drs(Text, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages) :-
  124	aceparagraph_to_drs(Text, off, off, 1, Sentences, SyntaxTrees, UnresolvedDrs, Drs, Messages, _).
  125
  126aceparagraph_to_drs(Text, Guess, Catch, StartID, Sentences, SyntaxTrees, UnresolvedDrsCopy, Drs, Messages, Time) :-
  127	Time = [DTokenizer, DParse, DRefres],
  128	clear_ape_messages,
  129	catch(
  130		(
  131			cpu_time(ace_to_drs:call_tokenizer(Text, Guess, Sentences, SentencesToParse), DTokenizer),
  132			cpu_time(ace_to_drs:call_parser(SentencesToParse, StartID, SyntaxTrees, UnresolvedDrs), DParse),
  133			(
  134				UnresolvedDrsCopy \== off
  135			->
  136				copy_term(UnresolvedDrs, UnresolvedDrsCopy)
  137			; true),
  138			cpu_time(ignore(refres:resolve_anaphors(UnresolvedDrs, DrsTmp)), DRefres),
  139			(var(SyntaxTrees) -> SyntaxTrees = [] ; true),
  140			(var(DrsTmp) -> Drs = drs([], []) ; true),
  141			(is_error_message(_, _, _, _) -> Drs = drs([], []) ; Drs = DrsTmp),
  142			!
  143		),
  144		CatchType,
  145		(
  146			(
  147				Catch == on
  148			->
  149				Sentences = [],
  150				SyntaxTrees = [],
  151				Drs = drs([], []),
  152				DTokenizer = -1,
  153				DParse = -1,
  154				DRefres = -1,
  155				add_error_message(ape, '', '', 'Fatal error. Please send screenshot to APE developers.')
  156			;
  157				throw(CatchType)
  158			)
  159		)
  160	),
  161	get_messages(Messages),
  162	!.
  163
  164
  165% Note: should not fail.
  166% If sentence splitting fails then the problem must have been that there
  167% was no sentence end symbol. In this case we return the original
  168% token list.
  169call_tokenizer(Text, GuessOnOff, SentencesOutput, SentencesToParse) :-
  170	(
  171		is_list(Text) 
  172	->
  173		Tokens = Text
  174	;
  175		tokenizer:tokenize(Text, Tokens)
  176	),
  177	(
  178		tokens_to_sentences:tokens_to_sentences(Tokens, SentencesTmp)
  179	->
  180		ape_utils:handle_unknown_words(GuessOnOff, SentencesTmp, SentencesOutput, 1),
  181		SentencesToParse = SentencesOutput
  182	;
  183		SentencesOutput = [Tokens],
  184		SentencesToParse = [],
  185		last(Tokens, LastToken),
  186		add_error_message(sentence, '', LastToken, 'Every ACE text must end with . or ? or !.')
  187	).
  188
  189call_parser(Sentences, StartID, Syntaxtrees, DrsReversed) :-
  190	ignore(grammar:parse(Sentences, StartID, Syntaxtrees, Drs)),
  191	ignore(drs_reverse:drs_reverse(Drs, DrsReversed)).

paragraphs_to_drs(+Paragraphs, +Guess, +Catch, +StartID, -Sentences, -Trees, -Drs, -Messages, -Time)

  196paragraphs_to_drs([], _, _, _, [], [], drs([],[]), [], [0,0,0]) :-
  197	!.
  198
  199paragraphs_to_drs([P|Paragraphs], Guess, Catch, StartID, Sentences, Trees, drs(Dom,Conds), Messages, [TimeT, TimeP, TimeR]) :-
  200	aceparagraph_to_drs(P, Guess, Catch, StartID, S, T, off, drs(D,C), M, [TT, TP, TR]),
  201	length(S, SentenceCount),
  202	NewStartID is StartID + SentenceCount,
  203	(
  204		is_error_message(_, _, _, _)
  205	->
  206		Sentences = S,
  207		Trees = T,
  208		Dom = D,
  209		Conds = C,
  210		Messages = M,
  211		TimeT = TT,
  212		TimeP = TP,
  213		TimeR = TR
  214	;
  215		paragraphs_to_drs(Paragraphs, Guess, Catch, NewStartID, SentencesR, TreesR, drs(DomR, CondsR), MessagesR, [TimeTR, TimePR, TimeRR]),
  216		append(S, SentencesR, Sentences),
  217		append(T, TreesR, Trees),
  218		append(D, DomR, Dom),
  219		append(C, CondsR, Conds),
  220		append(M, MessagesR, Messages),
  221		TimeT is TT + TimeTR,
  222		TimeP is TP + TimePR,
  223		TimeR is TR + TimeRR
  224	),
  225	!.
  226
  227
  228clear_ape_messages :-
  229	clear_messages(character),
  230	clear_messages(word),
  231	clear_messages(sentence),
  232	clear_messages(anaphor),
  233	clear_messages(pronoun)