/****************************************************
% File Name: pronto_morph_engine.pl
% Author: Jason Schlachter (ai@uga.edu)(www.arches.uga.edu/~ai)

% Released: May 8th, 2003
% Artificial Intelligence Center (www.ai.uga.edu)
% ***see pronto_morph.pdf for documentation

% Morphological Analyzer to be used with
% ProNTo (Prolog Natural Language Toolkit),
% created at the Artificial Intelligence Center
% of The University of Georgia

% Modified from: POEM.PL - Part Of English Morphology
% M. Covington
% 2003 February 12
****************************************************/


% *******************************************************************
% Be sure to install these files in the same directory as morph_engine.pl
% *******************************************************************
%:- ensure_loaded( 'pronto_morph_tokenizer.pl' ).
:- ensure_loaded( 'pronto_morph_spelling_rules.pl').
:- ensure_loaded( 'pronto_morph_irreg_adj.pl' ).
:- ensure_loaded( 'pronto_morph_irreg_adv.pl' ).
:- ensure_loaded( 'pronto_morph_irreg_noun.pl' ).
:- ensure_loaded( 'pronto_morph_irreg_verb.pl' ).
:- multifile( irregular_form/3 ).

% How inflectional suffixes are named:
%
% - All are marked with the prefix operator -/1.
%   (In Prolog, -x is equivalent to -(x).)
%   This makes it easy to distinguish suffixes
%   from words.
%
% - The morphological analyzer makes the distinctions
%   that it can make without a lexicon.
%   We use the ambiguous symbols -s and -ed for morphemes
%   that cannot be unambiguously identified.
%
% -s     Suffix -s, could be either noun plural or verb 3sg
% -pl    Suffix definitely denoting a noun plural (e.g., oxen = ox -pl)
% -sg3   Suffix definitely denoting a verb 3rd person singular (e.g., has = have -sg3)
%
% -ing   Verb ending, never ambiguous
%
% -ed    Suffix -ed, denoting the past or -en form of a regular verb
% -past  Suffix definitely denoting a verb past tense form (e.g., ran = run -past)
% -en    Suffix definitely denoting an -en form of a verb  (e.g., eaten = eat -en)
%
% -er    Suffix denoting the comparative form of the adjective or adverb
% -est   Suffix denoting the superlative form of the adjective or adverb


% morph_tokens(+Tokens,-List)
%  Converts the output of pronto_morph_tokenizer.pl (also et.pl) to a list of morphemes.
%   (i.e. w([t,e,s,t,i,n,g] --> [[test,-ing]]
%  OR
%   (i.e. [w([t,e,s,t,i,n,g])] -->  [[test,-ing]]
%  OR
%   (i.e. [w([t,e,s,t,i,n,g]),w([i,t])] -->  [[test,-ing],[it]]
 
morph_tokens([w(Chars)|Tokens],Morphs) :-    % handles list of token(s) as input
   !,
   morph(Chars,Rest,Morphs),    
   morph_tokens(Tokens,Rest).

morph_tokens([_|Tokens],Morphs) :-           % handles list of token(s) as input
   % numeric or special-character token
   morph_tokens(Tokens,Morphs).

morph_tokens(Token,Morphs) :-                % handles single token as input
   \+ is_list(Token),
   morph_tokens([Token],Morphs).

morph_tokens([],[]).


% morph_tokens_bag(+Tokens,-List)
%  Same as morph_tokens/2 except that it returns every alternative
%  analysis in a list of lists
%   (i.e. )
%   [[[[testing]], [[teste, -ing]], [[test, -ing]]], [[[more]]]] 

morph_tokens_bag(Token,List) :-
   \+ is_list(Token),
   findall(Alternative,
	   morph_tokens(Token,Alternative),
	   List).
morph_tokens_bag([First|RestTokens],[List|RestList]) :-
   findall(Alternative ,
	   morph_tokens(First,Alternative),
	   List),
   morph_tokens_bag(RestTokens,RestList).
morph_tokens_bag([],[]).


% morph_atoms(+AtomWord,-List)
%  Converts an atom to a list of morphemes
%   (i.e. testing --> [[test,-ing]]
%  OR
%   (i.e. [testing] --> [[test,-ing]]
%  OR
%  Converts a list of atoms to a list of morpheme lists
%   (i.e. [testing,one,two,three] --> [[test,-ing],[one],[two],[three]]

morph_atoms([AtomWord|Rest],List) :-	   % handles list of atom(s) as input
   atom_chars(AtomWord,RawList),
   morph(RawList,RestResult,List),
   morph_atoms(Rest,RestResult).

morph_atoms(SingleAtom,List) :-            % handles single atoms as input
   \+ is_list(SingleAtom),
   morph_atoms([SingleAtom],List).

morph_atoms([],[]).


% morph_atoms_bag(+Atoms,-List)
%  Same as morph_atoms/2 except that it returns evey alternative
%  analysis in a list of lists
%  i.e.)
%  [[[[testing]], [[teste, -ing]], [[test, -ing]]], [[[more]]]]

morph_atoms_bag(Token,List) :-
   \+ is_list(Token),
   findall(Alternative,
	   morph_atoms(Token,Alternative),
	   List).
morph_atoms_bag([First|RestTokens],[List|RestList]) :-
    findall(Alternative ,
	    morph_atoms(First,Alternative),
	    List),
    morph_atoms_bag(RestTokens,RestList).
morph_atoms_bag([],[]).


% morph(+Characters,-List)
%  Converts a list of characters to a list of morphemes.
%   (i.e. [r,u,n,n,i,n,g] --> [run,-ing]
%  OR
%  Converts a list of character lists to a list of morpheme lists
%   (i.e. [[r,u,n,n,i,n,g],[f,a,s,t,e,r]] --> [[run,-ing],[fast,-er]]

morph_chars([Chars|Rest],List) :-          % handles list of character lists as input
   is_list(Chars),
   morph(Chars,RestResult,List),
   morph_chars(Rest,RestResult).

morph_chars([Chars|Rest],List) :-          % handles a single list of characters as input
   \+ is_list(Chars),
   morph_chars([[Chars|Rest]],List).

morph_chars([],[]).


% morph_chars_bag(+Tokens,-List) :-
%  Same as morph_chars/2 except that it returns every alternative
%  analysis as a list of lists
% i.e.
% [[[[testing]], [[teste, -ing]], [[test, -ing]]], [[[more]]]] 

morph_chars_bag([C|CharList],List) :-
    \+ is_list(C),
    findall(Alternatives,
	    morph_chars(CharList,Alternatives),
	    List).
morph_chars_bag([First|RestTokens],[List|RestList]) :-
    findall(Alternatives,
	    morph_chars(First,Alternatives),
	    List),
    morph_chars_bag(RestTokens,RestList).
morph_chars_bag([],[]).

% morph(+Characters,-Tail,-OpenList)
%  Like morph/2, but creates an open list ending with Tail.
%  This is where the real work is done.

morph(Chars,Tail,[[Root,Suffix]|Tail]) :-
   atom_chars(Atom,Chars),                   % quicker to look up an atom than a list
   irregular_form(Atom,Tail,[Root,Suffix|Tail]),         % check to see if word is irregular
   !.                                        

morph(Chars,Tail,[[Word]|Tail]) :-
   atom_chars(Word,Chars).                   % always an option that word is root

morph(Chars,Tail,[[RootWord,Suffix]|Tail]) :-  % tries to break up word into root and suffix
   find_suffix(Chars,Root,Suffix),
   atom_chars(RootWord,Root).


% find_suffix(+Characters,-Root,-Suffix)
%  Applies split_suffix to a word at all positions.

find_suffix(Chars,Root,Suffix) :-
   split_suffix(Chars,Root,Suffix).

find_suffix([C|Chars],[C|Root],Suffix) :-
   find_suffix(Chars,Root,Suffix).

   
% suffix(?Chars,?Morpheme)
%  If Chars is a suffix, Morpheme is the description of it.
%  Note that the suffix -s is also hard-coded into split_suffix
%  in various places.
%
suffix([s],-s).         
suffix([e,d],-ed).
suffix([i,n,g],-ing).
suffix([e,r],-er).
suffix([e,s,t],-est).


% vowel(?Char)
%  Char is a vowel.
%
vowel(a).
vowel(e).
vowel(i).
vowel(o).
vowel(u).
vowel(y).