1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(grammar_words, [
   17		word/3,                       % ?Word
   18		word/4,                       % ?Word, +Condition
   19		word_initial/3,               % ?Word
   20		word_initial/4,               % ?Word, +Condition
   21		word_noninitial/3,            % ?Word
   22		word_noninitial/4,            % ?Word, +Condition
   23		word_capitalize/4,            % +Word, +WordInitial
   24		words/3,                      % +WordList
   25		words/4,                      % +WordList, +Condition
   26		words_initial/3,              % +WordList
   27		words_initial/4,              % +WordList, +Condition
   28		words_noninitial/3,           % +WordList
   29		words_noninitial/4,           % +WordList, +Condition
   30		get_position/3,               % -Position
   31		warning/6,                    % +Type, +SentenceID, +Subject, +Description
   32		try/4,                        % +Goal, error(+Type, +SentenceID, +Subject, +Description)
   33		reset_progress_record/1,      % +TokenList
   34		get_unparsed_tokens_number/1  % -Number
   35	]).   36
   37:- use_module('../logger/error_logger', [
   38		add_warning_message_once/4,
   39		add_error_message_once/4
   40	]).   41:- use_module('../lexicon/lexicon_interface').   42:- use_module('../lexicon/functionwords').   43:- use_module('../lexicon/chars').   44:- use_module('../lexicon/is_in_lexicon').

Word-level Grammar Rules

This module contains word-level grammar rules. It manages the fact that certain words can be capitalized at the beginning of a sentence. Furthermore, it keeps track of the parsing process and, in the case of an error, it can determine up to which token parsing succeeded.

author: - Tobias Kuhn */

word(?Word): This rule reads the token Word which can be in sentence-initial position.

   60word(Word) -->
   61    [Word],
   62    record_position.
   63
   64word(Word) -->
   65    [^, Word],
   66    record_position.

word(?Word, +Condition): This rule reads the token Word (which can be in sentence-initial position) if the given condition is fulfilled.

   74word(Word, Condition) -->
   75    [Word],
   76    { call(Condition) },
   77    record_position.
   78
   79word(Word, Condition) -->
   80	[^, Word],
   81    { call(Condition) },
   82    record_position.

word_initial(?Word): This rule reads the token Word in sentence-initial position.

   89word_initial(Word) -->
   90	[^, Word],
   91    record_position.

word_initial(?Word, +Condition): This rule reads the token Word in sentence-initial position if the given condition is fulfilled.

   98word_initial(Word, Condition) -->
   99	[^, Word],
  100    { call(Condition) },
  101    record_position.

word_noninitial(?Word): This rule reads the token Word if it is not in sentence-initial position.

  108word_noninitial(Word) -->
  109	[Word],
  110    record_position.

word_noninitial(?Word, +Condition): This rule reads the token Word if it is not in sentence-initial position and if the given condition is fulfilled.

  118word_noninitial(Word, Condition) -->
  119	[Word],
  120    { call(Condition) },
  121    record_position.

word_capitalize(+Word, +WordInitial): This rule reads the token Word. In sentence-initial position also WordInitial is accepted.

  128word_capitalize(Word, _WordInitial) -->
  129    [Word],
  130    record_position.
  131
  132word_capitalize(Word, _WordInitial) -->
  133	[^, Word],
  134    record_position.
  135
  136word_capitalize(_Word, WordInitial) -->
  137	[^, WordInitial],
  138    record_position.

words(+WordList): This rule reads the tokens of WordList which can be in sentence-initial position.

  145words(WordList) -->
  146    words_noninitial(WordList).
  147
  148words(WordList) -->
  149    [^],
  150    words_noninitial(WordList).

words(+WordList, +Condition): This rule reads the tokens of WordList (which can be in sentence-initial position) if the condition is fulfilled.

  158words(WordList, Condition) -->
  159    words_noninitial(WordList, Condition).
  160
  161words(WordList, Condition) -->
  162    [^],
  163    words_noninitial(WordList, Condition).

words_initial(+WordList): This rule reads the tokens of WordList if they are in sentence-initial position.

  170words_initial(WordList) -->
  171    [^],
  172    words_noninitial(WordList).

words_initial(+WordList, +Condition): This rule reads the tokens of WordList if they are in sentence-initial position and if the condition is fulfilled.

  180words_initial(WordList, Condition) -->
  181    [^],
  182    words_noninitial(WordList, Condition).

words_noninitial(+WordList): This rule reads the tokens of WordList if they are not in sentence-initial position.

  189words_noninitial([]) -->
  190	record_position.
  191
  192words_noninitial([Word|Rest]) -->
  193    [Word],
  194    words_noninitial(Rest).

words_noninitial(+WordList, +Condition): This rule reads the tokens of WordList if they are not in sentence-initial position and if the condition is fulfilled.

  202words_noninitial([], Condition) -->
  203    { call(Condition) },
  204	record_position.
  205
  206words_noninitial([Word|Rest], Condition) -->
  207    [Word],
  208    words_noninitial(Rest, Condition).

warning(+Type, +SentenceID, +Subject, +Description): This predicate can be used as a DCG rule. It reads no token but asserts a warning message.

  215warning(Type, SentenceID, Subject, Description) -->
  216	get_position(Pos),
  217	{
  218		PrevPos is Pos - 1,
  219		add_warning_message_once(Type, SentenceID-PrevPos, Subject, Description)
  220	}.

This predicate can be used as a DCG rule. It tries to call the goal. If this fails then an error or warning message is asserted. In the case of an error, the complete predicate fails.

  229try(Goal, _, Tokens, Tokens) :-
  230	call(Goal),
  231	!.
  232
  233try(_, error(Type, SentenceID, Subject, Description)) -->
  234	get_position(Pos),
  235	{
  236		PrevPos is Pos - 1,
  237		add_error_message_once(Type, SentenceID-PrevPos, Subject, Description),
  238		fail
  239	}.
  240
  241try(_, warning(Type, SentenceID, Subject, Description)) -->
  242	get_position(Pos),
  243	{
  244		PrevPos is Pos - 1,
  245		add_warning_message_once(Type, SentenceID-PrevPos, Subject, Description)
  246	}.

tokencount(-TokenCount): This predicate stores the overall number of tokens.

  253:- dynamic tokencount/1.

reset_progress_record(+TokenList): This predicate resets the record about how far the parser proceeded in the token list. Furthermore, it initializes the record for the new token list.

  261reset_progress_record(TokenList) :-
  262    retractall(position_backwards(_)),
  263    record_position(TokenList, TokenList),
  264    retractall(tokencount(_)),
  265    length(TokenList, Length),
  266    assert(tokencount(Length)),
  267    !.

get_unparsed_tokens_number(-Number): This predicate returns the smallest number of tokens that were not parsed (since the record was reset).

  274get_unparsed_tokens_number(Number) :-
  275    position_backwards(First),
  276    findall(P, position_backwards(P), Positions),
  277    get_minimum(Positions, First, Number).

position_backwards(-PositionBackwards): This predicate stores the positions in a backwards way, i.e. the number of tokens that are not (yet) parsed.

  284:- dynamic position_backwards/1.

get_position(-Position): This predicate can be used as a DCG rule. It reads no token but returns the position in a forward way, i.e. starting from the beginning of the list.

  292get_position(Position, [^|Tokens], [^|Tokens]) :-
  293	!,
  294	length(Tokens, Length),
  295	tokencount(TokenCount),
  296	Position is TokenCount - Length.
  297
  298get_position(Position, Tokens, Tokens) :-
  299	!,
  300	length(Tokens, Length),
  301	tokencount(TokenCount),
  302	Position is TokenCount - Length.

record_position(+ListIn, ?ListOut): This predicate can be used as a DCG rule. Is reads nothing, but records the position.

  309record_position(List, List) :-
  310    length(List, Length),
  311    record_position(Length).

record_position(+Pos): This predicates records the position Pos (which is the number of unparsed tokens) if it is not already recorded.

  319record_position(Pos) :-
  320    position_backwards(Pos),
  321    !.
  322
  323record_position(Pos) :-
  324    assert(position_backwards(Pos)).

get_minimum(+List, +TempMin, -Min): Returns the minimal value of the list or TempMin, whichever is smaller.

  331get_minimum([], M, M).
  332
  333get_minimum([N|Rest], Temp, M) :-
  334    N < Temp,
  335    !,
  336    get_minimum(Rest, N, M).
  337
  338get_minimum([_|Rest], Temp, M) :-
  339    get_minimum(Rest, Temp, M)