1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16%---------------------------------------------------------------------------------------------------
   17%
   18%  The program corrects misspellings that are the result of:
   19%     - transposition of two letters 
   20%     - one letter extra
   21%     - one letter missing
   22%     - one letter wrong
   23%
   24%  Input:
   25%  spelling_corrector([yolandee,is,kooking,a,diner,fro,tri,persons],Candidates,NoSolutions).
   26%
   27%
   28%  Output (first solution):
   29%  Candidates  = [yolandee-yolande, kooking-cooking, diner-dinner, fro-for],
   30%  NoSolutions = [tri]
   31%
   32%
   33%  Output (all solutions):
   34%  Candidates  = [yolandee-[yolande], kooking-[cooking, looking], 
   35%                 diner-[dinner, dinar], fro-[for]],
   36%  NoSolutions = [tri]
   37%
   38%---------------------------------------------------------------------------------------------------
   39
   40:- module(spellcheck, [
   41		spelling_corrector/3,
   42		damerau_rules/3
   43	]).   44
   45:- use_module(is_in_lexicon).   46
   47
   48spelling_corrector(TokenList,Candidates,NoSolutions) :-
   49	lexicon_lookup(TokenList,MissingTokens),
   50	damerau_rules(MissingTokens,Candidates,NoSolutions).
   51
   52% ------------------------------------------------------------------------------
   53%  lexicon_lookup/2 collects all tokens that are not in the lexicon.
   54% ------------------------------------------------------------------------------
   55
   56lexicon_lookup([],[]).
   57
   58lexicon_lookup([Token|Tokens],MissingTokens) :-
   59	lexicon(Token),
   60	lexicon_lookup(Tokens,MissingTokens).
   61
   62lexicon_lookup([Token|Tokens],[Token|MissingTokens]) :-
   63	lexicon_lookup(Tokens,MissingTokens).
   64
   65% ------------------------------------------------------------------------------
   66%  First solution:
   67%
   68%  For every missing token the first candidate (Token-NewToken) is generated
   69%  on account of the four Damerau rules. If the token is written completely
   70%  wrong so that no Damerau rule comes into question then the token is given 
   71%  back in the argument NoSolutions.
   72% ------------------------------------------------------------------------------
   73 
   74damerau_rules([],[],[]).
   75 
   76damerau_rules([Token|Tokens],[Token-NewToken|Candidates],NoSolutions) :-
   77	convert_token_char(Token,CharList),
   78	(
   79		transposition(CharList,NewCharList)
   80	;
   81		one_letter_extra(CharList,NewCharList)
   82	;
   83		one_letter_missing(CharList,NewCharList)
   84	;
   85		one_letter_wrong(CharList,NewCharList)
   86	),
   87	convert_token_char(NewToken,NewCharList),
   88	lexicon(NewToken),
   89	damerau_rules(Tokens,Candidates,NoSolutions).   
   90
   91damerau_rules([Token|Tokens],Candidates,[Token|NoSolutions]) :-
   92	damerau_rules(Tokens,Candidates,NoSolutions).    
   93 
   94% ------------------------------------------------------------------------------
   95%  convert_token_char/2 converts either a token into a list of characters or 
   96%  vice versa.
   97% ------------------------------------------------------------------------------
   98
   99convert_token_char(Token,CharList) :-
  100	var(CharList),
  101	atom_codes(Token,AsciiList),
  102	convert_ascii_char(AsciiList,CharList).
  103
  104convert_token_char(Token,CharList) :-          
  105	var(Token),
  106	convert_ascii_char(AsciiList,CharList),
  107	atom_codes(Token,AsciiList).
  108
  109
  110convert_ascii_char([],[]).
  111
  112convert_ascii_char([Ascii|Asciis],[Char|Chars]) :-
  113	atom_codes(Char,[Ascii]),
  114	convert_ascii_char(Asciis,Chars).
  115
  116% ------------------------------------------------------------------------------
  117%  transposition/2 transposes two letters.
  118% ------------------------------------------------------------------------------
  119
  120transposition([Char1,Char2|Chars],[Char2,Char1|Chars]).
  121
  122transposition([Char|Chars1],[Char|Chars2]) :-
  123	transposition(Chars1,Chars2).
  124
  125% ------------------------------------------------------------------------------
  126%  one_letter_extra/2 deletes one letter.  
  127% ------------------------------------------------------------------------------
  128
  129one_letter_extra([_|Chars],Chars).
  130
  131one_letter_extra([Char|Chars1],[Char|Chars2]) :-
  132	one_letter_extra(Chars1,Chars2).  
  133
  134% ------------------------------------------------------------------------------
  135%  one_letter_missing/2 adds one letter.
  136% ------------------------------------------------------------------------------
  137
  138one_letter_missing(Chars,[Char|Chars]) :-
  139	propose_letter(Char).
  140
  141one_letter_missing([Char|Chars1],[Char|Chars2]) :-
  142	one_letter_missing(Chars1,Chars2).  
  143
  144% ------------------------------------------------------------------------------
  145%  one_letter_wrong/2 exchanges one letter.
  146% ------------------------------------------------------------------------------
  147
  148one_letter_wrong([_|Chars],[Char2|Chars]) :-
  149	propose_letter(Char2).
  150
  151one_letter_wrong([Char|Chars1],[Char|Chars2]) :-
  152	one_letter_wrong(Chars1,Chars2).
  153
  154% ------------------------------------------------------------------------------
  155%  propose_letter/1 provides a new letter.
  156% ------------------------------------------------------------------------------
  157
  158/*
  159propose_letter(Char) :-
  160  member(Char,[a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z]).
  161*/
  162
  163propose_letter(a).
  164propose_letter(b).
  165propose_letter(c).
  166propose_letter(d).
  167propose_letter(e).
  168propose_letter(f).
  169propose_letter(g).
  170propose_letter(h).
  171propose_letter(i).
  172propose_letter(j).
  173propose_letter(k).
  174propose_letter(l).
  175propose_letter(m).
  176propose_letter(n).
  177propose_letter(o).
  178propose_letter(p).
  179propose_letter(q).
  180propose_letter(r).
  181propose_letter(s).
  182propose_letter(t).
  183propose_letter(u).
  184propose_letter(v).
  185propose_letter(w).
  186propose_letter(x).
  187propose_letter(y).
  188propose_letter(z).
  189
  190
  191% ------------------------------------------------------------------------------
  192%  Lexicon
  193% ------------------------------------------------------------------------------
  194
  195lexicon(Word) :-
  196	is_in_lexicon:is_contentword(Word)