1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(is_in_lexicon, [
   17	is_in_lexicon/1,
   18	is_functionword/1,
   19	is_contentword/1
   20]).

Is a token in the lexicon?

author
- Kaarel Kaljurand
- Tobias Kuhn
version
- 2007-12-06

*/

   31:- use_module(functionwords).   32:- use_module(lexicon_interface).   33:- use_module(illegalwords).   34:- use_module(chars).
 is_in_lexicon(+WordForm:atom) is semidet
Arguments:
WordForm- is an ACE wordform

Succeeds if WordForm is among the ACE words, possibly one of the illegal words like `any' or `this'.

   44is_in_lexicon(WordForm) :-
   45	(
   46		is_functionword(WordForm)
   47	;
   48		is_contentword(WordForm)
   49	;
   50		is_illegalword(WordForm, _)
   51	).
 is_functionword(+WordForm:atom) is nondet
Arguments:
WordForm- is an ACE wordform

Succeeds if WordForm is among the ACE function words.

   60is_functionword(WordForm) :-
   61	(
   62		functionwords:rawnumber_number(WordForm, _)
   63	;
   64		functionwords:functionword(WordForm)
   65	;
   66		functionwords:variable(WordForm)
   67	).
 is_contentword(+WordForm:atom) is nondet
Arguments:
WordForm- is an ACE wordform

Succeeds if WordForm is in the content word lexicon.

   76is_contentword(WordForm) :-
   77	(
   78		adv(WordForm, _)
   79	;
   80		adv_comp(WordForm, _)
   81	;
   82		adv_sup(WordForm, _)
   83	;
   84		adj_itr(WordForm, _)
   85	;
   86		adj_itr_comp(WordForm, _)
   87	;
   88		adj_itr_sup(WordForm, _)
   89	;
   90		adj_tr(WordForm, _, _)
   91	;
   92		adj_tr_comp(WordForm, _, _)
   93	;
   94		adj_tr_sup(WordForm, _, _)
   95	;
   96		noun_sg(WordForm, _, _)
   97	;
   98		noun_pl(WordForm, _, _)
   99	;
  100		noun_mass(WordForm, _, _)
  101	;
  102		mn_sg(WordForm, _)
  103	;
  104		mn_pl(WordForm, _)
  105	;
  106		pn_sg(WordForm, _, _)
  107	;
  108		pn_pl(WordForm, _, _)
  109	;
  110		pndef_sg(WordForm, _, _)
  111	;
  112		pndef_pl(WordForm, _, _)
  113	;
  114		iv_finsg(WordForm, _)
  115	;
  116		iv_infpl(WordForm, _)
  117	;
  118		tv_finsg(WordForm, _)
  119	;
  120		tv_infpl(WordForm, _)
  121	;
  122		tv_pp(WordForm, _)
  123	;
  124		dv_finsg(WordForm, _, _)
  125	;
  126		dv_infpl(WordForm, _, _)
  127	;
  128		dv_pp(WordForm, _, _)
  129	;
  130		( chars:to_lowercase(WordForm, WordFormL), prep(WordFormL, _) )
  131	)