1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(sentence_failure, [
   17		get_all_sentence_errors/2, % +SentenceTokens, -ErrorTextList
   18		get_sentence_error_text/2  % +SentenceTokens, -ErrorText
   19	]).   20
   21:- use_module('../lexicon/lexicon_interface').   22
   23:- use_module('../lexicon/chars', [
   24		is_capitalized/1
   25	]).   26
   27:- use_module('../lexicon/is_in_lexicon', [
   28		is_functionword/1
   29	]).   30
   31:- use_module('../lexicon/illegalwords').   32
   33:- style_check(-singleton).   34:- style_check(-discontiguous).   35:- use_module('grammar.plp').   36:- style_check(+discontiguous).   37:- style_check(+singleton).

Sentence Failure

author
- Norbert E. Fuchs
- Tobias Kuhn
- Kaarel Kaljurand
version
- 2011-07-18 */
 get_all_sentence_errors(+SentenceTokens:list, -ErrorTextList:list) is det
Arguments:
SentenceTokens- is a list of tokens in the sentence
ErrorTextList- is a list of error messages
   52get_all_sentence_errors(SentenceTokens, ErrorTextList) :-  
   53	findall(ErrorText, get_sentence_error_text(SentenceTokens, ErrorText), ErrorTextListIntermediate),
   54	% remove duplicate messages
   55	list_to_set(ErrorTextListIntermediate, ErrorTextList). 
 get_sentence_error_text(+SentenceTokens:list, -ErrorText:atom) is nondet
Arguments:
SentenceTokens- is a list of tokens in the sentence
ErrorText- is an error message
   64% check for illegal words
   65get_sentence_error_text(SentenceTokens, ErrorText) :-
   66	member(IllegalWord, SentenceTokens),
   67	% BUG: add: \+ is_contentword(IllegalWord)
   68	% because maybe the user defined `any' as a contentword
   69	is_illegalword(IllegalWord, ErrorText).
   70
   71% check for repetitions of tokens
   72get_sentence_error_text(SentenceTokens, ErrorText) :-
   73    append(_, [Token, '<>', Token|_], SentenceTokens),
   74	\+ is_repeatable(Token),
   75	with_output_to(atom(ErrorText), format("Token \'~w\' repeated.", [Token])).
   76
   77% check for `there is' + proper name
   78get_sentence_error_text(SentenceTokens, ErrorText) :-
   79	append(_, [There, is, Token, '<>' | _], SentenceTokens),
   80	(
   81		There = there
   82	;
   83		There = 'There'
   84	),
   85	(
   86		Token = the
   87	;
   88		is_capitalized(Token)
   89	;
   90		pn_sg(Token, _, _)
   91	),
   92	with_output_to(atom(ErrorText), format("The construct \'there is\' + \'~w\' is not allowed.", [Token])).
   93
   94% check for `there are' + proper name
   95get_sentence_error_text(SentenceTokens, ErrorText) :-
   96	append(_, [There, are, Token, '<>' | _], SentenceTokens),
   97	(
   98		There = there
   99	;
  100		There = 'There'
  101	),
  102	(
  103		Token = the
  104	;
  105		is_capitalized(Token)
  106	;
  107		pn_pl(Token, _, _)
  108	),
  109	with_output_to(atom(ErrorText), format("The construct \'there are\' + \'~w\' is not allowed.", [Token])).
  110
  111% check for intransitive verb followed by that-subordination
  112% Example: John appears that Mary waits. (`appear' is an intransitive verb that is not transitive)
  113get_sentence_error_text(SentenceTokens, ErrorText) :-
  114	append(_, [Wordform, '<>', that | _], SentenceTokens),
  115	(
  116		iv_finsg(Wordform, Verb)
  117	;
  118		iv_infpl(Wordform, Verb)
  119	),
  120	with_output_to(atom(ErrorText), format("The intransitive verb \'~w\' cannot be followed by that-subordination. Use a transitive verb.", [Verb])).
  121
  122get_sentence_error_text(SentenceTokens, 'The sentence contains \'then\' but not \'if\'.') :-
  123	member(then, SentenceTokens),
  124	\+ member(if, SentenceTokens),
  125	\+ member('If', SentenceTokens).
  126
  127get_sentence_error_text(SentenceTokens, 'The sentence contains \'if\' but not \'then\'.') :-
  128	(
  129		member(if, SentenceTokens)
  130	;
  131		member('If', SentenceTokens)
  132	),	
  133	\+ member(then, SentenceTokens).
  134
  135
  136% check for illegal use of commas
  137% Note that this rule does not find all illegal uses of commas, e.g.:
  138% [...] ... , ... [...]
  139% {...} ... , ... {...}
  140% ,!
  141get_sentence_error_text(SentenceTokens, 'Commas must be immediately followed by \'and\' or \'or\', or must occur at specified positions in lists, sets and commands.') :-
  142	append(Front, [',', NextToken | Tail], SentenceTokens),
  143	% comma not immediately followed by 'and'
  144	NextToken \= and,
  145	% comma not immediately followed by 'or'
  146	NextToken \= or,
  147	% comma not followed by an exclamation mark (command)
  148	\+ member('!', Tail),
  149	% comma not between [ and ] (list)
  150	\+ (member('[', Front), member(']', Tail)),
  151	% comma not between { and } (set)
  152	\+ (member('{', Front), member('}', Tail)).
  153
  154
  155get_sentence_error_text(_, 'This is the first sentence that was not ACE. The sign <> indicates the position where parsing failed.').
 is_repeatable(?Token) is nondet
Arguments:
Token- is an ACE token that can be repeated

Example: ((1+2)-3) = 0.

  164is_repeatable('{').
  165is_repeatable('}').
  166is_repeatable('(').
  167is_repeatable(')').
  168is_repeatable('[').
  169is_repeatable(']').
  170is_repeatable('"')