2% tokkie.pl, by Johan Bos
    3
    4/*========================================================================
    5   File Search Paths
    6========================================================================*/
    7
    8:- prolog_load_context(file,File),
    9   absolute_file_name('..',X,[relative_to(File),file_type(directory)]),
   10   asserta(user:file_search_path(candc,X)).   11
   12user:file_search_path(semlib,     candc(lib)).
   13user:file_search_path(boxer,      candc(boxer)).
   14
   15:- set_prolog_flag(double_quotes,codes).   16
   17
   18/*========================================================================
   19   Load other libraries
   20========================================================================*/
   21
   22:- use_module(library(lists),[member/2,append/3]).   23:- use_module(library(readutil),[read_line_to_codes/2]).   24:- use_module(boxer(version),[version/1]).   25:- use_module(semlib(errors),[error/2,warning/2]).   26:- use_module(semlib(options),[option/2,parseOptions/2,setOption/3,
   27                               showOptions/1,setDefaultOptions/1]).   28
   29
   30/*========================================================================
   31   Main
   32========================================================================*/
   33
   34tokkie:-
   35   option(Option,do), 
   36   member(Option,['--version','--help']), !, 
   37   version,
   38   help.
   39
   40tokkie:-
   41   openInput(InStream),
   42   openOutput(OutStream), !,
   43   read_line_to_codes(InStream,Codes),
   44   readLines(Codes,InStream,OutStream).
   45
   46tokkie:-
   47   setOption(tokkie,'--help',do), !,
   48   help.
   49  
   50
   51/* ----------------------------------------------------------------------
   52   Read lines
   53---------------------------------------------------------------------- */
   54
   55readLines(end_of_file,Stream1,Stream2):- !,
   56   close(Stream1),
   57   close(Stream2).
   58
   59readLines(Codes,InStream,OutStream):- !,
   60   tok(Codes,TokCodes,Last),
   61   format(OutStream,'~s',[TokCodes]),
   62   read_line_to_codes(InStream,NewCodes),
   63   decideNewLine(NewCodes,Last,OutStream),
   64   readLines(NewCodes,InStream,OutStream).
   65
   66
   67/* =======================================================================
   68   Determine New Line
   69========================================================================*/
   70
   71decideNewLine(end_of_file,_Last,Stream):- !, nl(Stream).
   72decideNewLine([C1,C2|_],_,Stream):- lower(C1), lower(C2), !, write(Stream,' ').
   73decideNewLine(_,Last,Stream):- title(Last), !, write(Stream,' ').
   74decideNewLine(_,Last,_Stream):- mistake(Last), !.
   75decideNewLine(_,_Last,Stream):- nl(Stream).
   76
   77
   78/* =======================================================================
   79   Tokenise
   80========================================================================*/
   81
   82tok([],[],[]):- !.
   83tok([65533|L1],L2,Last):- !, warning('skipping non-utf8 character',[]), tok(L1,L2,Last).
   84tok([32|L1],L2,Last):- !, tok(L1,L2,Last).
   85tok([9|L1],L2,Last):- !, tok(L1,L2,Last).
   86tok(L1,L2,Last):- tok(L1,[],L2,Last).
   87
   88tok([],Last,[],Last):- !.
   89tok([65533|L1],Prev,L2,Last):- !, warning('skipping non-utf8 character',[]), tok(L1,Prev,L2,Last).
   90tok(P1,Prev,P2,Last):- pattern(P1-L1,Prev,P2-L2,Next), !, tok(L1,Next,L2,Last).
   91tok([32|L1],_,[32|L2],Last):- !, tok(L1,[],L2,Last).
   92tok([X|L1],Prev,[X|L2],Last):- tok(L1,[X|Prev],L2,Last).
   93
   94
   95/* ==================================================================================
   96   Patterns
   97================================================================================== */
   98
   99/* ----------------------------------------------------------------------------------
  100   Remove space at end of line
  101---------------------------------------------------------------------------------- */
  102
  103pattern([32]-[], X, B-B, X):- !.
  104pattern([9]-[],  X, B-B, X):- !.
  105
  106/* ----------------------------------------------------------------------------------
  107   Squeeze space
  108---------------------------------------------------------------------------------- */
  109
  110pattern([32,32|A]-A, [], B-B, []):- !.                         %%% double space
  111pattern([32,32|A]-[32|A], X, B-B, X):- !.                      %%% double space
  112pattern([9,32|A]-[32|A],  X, B-B, X):- !.                      %%% double space/tab
  113pattern([32,9|A]-[32|A],  X, B-B, X):- !.                      %%% double space/tab
  114pattern([9,9|A]-[32|A],   X, B-B, X):- !.                      %%% double tab
  115pattern([9|A]-[32|A],     X, B-B, X):- !.                      %%% tab -> space
  116
  117/* ----------------------------------------------------------------------------------
  118   Dot dot dot (end of line)
  119   If the last token before the ... is an abbreviation, an extra . is preserved
  120---------------------------------------------------------------------------------- */
  121
  122pattern(D-[], Prev, [46,32,46,46,46|B]-B, [46,46,46]):- dots(D,A), end(A), abb(Prev), !. 
  123pattern(D-[], Prev, B1-B2, [46,46,46]):- dots(D,A),end(A), !, insertSpace(Prev,[46,46,46|B2],B1).  
  124
  125/* ----------------------------------------------------------------------------------
  126   Dot dot dot (not end of line)
  127---------------------------------------------------------------------------------- */
  128
  129pattern(D-[L|A], Prev, B1-B2,[]):- dots(D,[L|A]), lower(L), !, insertSpace(Prev,[46,46,46,32|B2],B1).
  130pattern(D-[L|A], Prev, B1-B2,[]):- dots(D,[L|A]), upper(L), !, insertSpace(Prev,[46,46,46,10|B2],B1).
  131pattern(D-A,     Prev, B1-B2,[]):- dots(D,A), !, insertSpace(Prev,[46,46,46,32|B2],B1).
  132
  133/* ----------------------------------------------------------------------------------
  134   Full stop and bracket (end of line)
  135---------------------------------------------------------------------------------- */
  136
  137pattern([46,Q|A]-[], Prev, B1-B2, [Q]):- bracket(Q), end(A), !, insertSpace(Prev,[46,32,Q|B2],B1).   %%% X.) -> X . )
  138
  139/* ----------------------------------------------------------------------------------
  140   Full stop and ending quotes (end of line)
  141---------------------------------------------------------------------------------- */
  142
  143pattern([46,Q|A]-[], Prev, B1-B2, [46]):- quote(Q),end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1).      %%% X." -> X .
  144pattern([46,Q|A]-[], Prev, B1-B2,  [Q]):- quote(Q),end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q|B2],B1).   %%% X." -> X . "
  145
  146pattern([46,Q,Q|A]-[], Prev, B1-B2, [46]):- quotes(Q),end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1).       %%% X.'' -> X .
  147pattern([46,Q,Q|A]-[], Prev, B1-B2,  [Q]):- quotes(Q),end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q,Q|B2],B1).  %%% X.'' -> X . ''
  148
  149pattern([46,32,Q1,Q2|A]-[], Prev, B1-B2,  [46]):- quote(Q1),quote(Q2),\+Q1=Q2,end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1).            %%% X. '" -> X . ' "
  150pattern([46,32,Q1,Q2|A]-[], Prev, B1-B2,  [Q2]):- quote(Q1),quote(Q2),\+Q1=Q2,end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q1,32,Q2|B2],B1).  %%% X. '" -> X . ' "
  151
  152/* ----------------------------------------------------------------------------------
  153   Full stop and ending quotes (not end of line)
  154---------------------------------------------------------------------------------- */
  155
  156pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- quote(Q),upper(U),option('--quotes',delete), !, insertSpace(Prev,[46,10|B2],B1). %%% X." U
  157pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- quote(Q),upper(U),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q,10|B2],B1). %%% X." U
  158pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- closing_bracket(Q),upper(U), !, insertSpace(Prev,[46,32,Q,10|B2],B1). %%% X.) U
  159
  160/* ----------------------------------------------------------------------------------
  161   Full stop (end of line)
  162   If the last token before the . is an abbreviation, no extra . is produced.
  163---------------------------------------------------------------------------------- */
  164
  165pattern([46|A]-[], Prev, [46|B]-B, Prev):- end(A), title(Prev), !.                   %%% X. -> X. 
  166pattern([46|A]-[], Prev, [46|B]-B, [46|Prev]):- end(A), abb(Prev), !.                %%% X. -> X. 
  167pattern([46|A]-[], Prev, B1-B2, [46]):- end(A), !, insertSpace(Prev,[46|B2],B1).     %%% X. -> X . 
  168
  169/* ----------------------------------------------------------------------------------
  170   Full stop, followed by opening quote
  171---------------------------------------------------------------------------------- */
  172
  173pattern([46,32,Q,115|A]-A, [_|_], [46,32,Q,115|B]-B, [115,Q]):- rsq(Q), !.   
  174pattern([46,32,Q,C|A]-[Q,C|A],     Prev, B1-B2, []):- quote(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
  175pattern([46,32,Q,Q,C|A]-[Q,Q,C|A], Prev, B1-B2, []):- quotes(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
  176pattern([46,32,Q,C|A]-[Q,C|A],     Prev, B1-B2, []):- opening_bracket(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
  177
  178/* ----------------------------------------------------------------------------------
  179   Full stop (not end of line), next token starts with uppercase --- arhhhhh....
  180   Case 1: A full stop after a space -> sentence boundary.
  181   Case 2: A full stop after a one-character token --> initial, no sentence boundary
  182   Case 3: A full stop after a title --> no sentence boundary
  183   Case 4: A full stop after a non-abbreviation --> sentence boundary
  184%  Case 5: A full stop after abbreviation --> no sentence boundary
  185---------------------------------------------------------------------------------- */
  186
  187pattern([46,32,U|A]-[U|A], [], [46,10|B]-B,   []):- upper(U), !.
  188pattern([46,32,U|A]-[U|A], [_], [46,32|B]-B,  []):- upper(U), !.    %%% Initial
  189pattern([46,32,U|A]-[U|A], Prev, [46,32|B]-B, []):- upper(U), title(Prev), !.
  190pattern([46,32,U|A]-[U|A], Prev, [32,46,10|B]-B, []):- upper(U), \+ abb(Prev), !.
  191%pattern([46,32,U|A]-[U|A], Prev, [46,10|B]-B, []):- upper(U), abb(Prev), !.
  192
  193pattern([46,32,32,U|A]-[U|A], [], [46,10|B]-B,   []):- upper(U), !.
  194pattern([46,32,32,U|A]-[U|A], [_], [46,32|B]-B,  []):- upper(U), !.    %%% Initial
  195pattern([46,32,32,U|A]-[U|A], Prev, [46,32|B]-B, []):- upper(U), title(Prev), !.
  196pattern([46,32,32,U|A]-[U|A], Prev, [32,46,10|B]-B, []):- upper(U), \+ abb(Prev), !.
  197
  198/* ----------------------------------------------------------------------------------
  199   The comma
  200---------------------------------------------------------------------------------- */
  201
  202pattern([X,44,Y|A]-[Y|A], P, [X,44|B]-B, [44|P]):- num(X), num(Y), !.   %%% "0,0" -> "0,0"
  203pattern([44|A]-[32|A],  Prev, B1-B2, [44]):- !, insertSpace(Prev,[44|B2],B1).  %%% "X," -> "X , "
  204
  205/* ----------------------------------------------------------------------------------
  206   The brackets
  207---------------------------------------------------------------------------------- */
  208
  209pattern([X|A]-[32|A], Prev, B1-B2, [X]):- bracket(X), !, insertSpace(Prev,[X|B2],B1).
  210
  211/* ----------------------------------------------------------------------------------
  212   Colon
  213---------------------------------------------------------------------------------- */
  214
  215pattern([58|A]-[32|A], Prev, B1-B2, [58]):- !, insertSpace(Prev,[58|B2],B1).
  216
  217/* ----------------------------------------------------------------------------------
  218   Semicolon
  219---------------------------------------------------------------------------------- */
  220
  221pattern([59|A]-[32|A], Prev, B1-B2, [59]):- !, insertSpace(Prev,[59|B2],B1).
  222
  223/* ----------------------------------------------------------------------------------
  224   Question and Exclamation Mark
  225---------------------------------------------------------------------------------- */
  226
  227pattern([X|A]-[32|A], Prev, B1-B2, [X]):- mark(X), !, insertSpace(Prev,[X|B2],B1).
  228
  229/* ----------------------------------------------------------------------------------
  230   Percentage     "100%" -> "100 % "
  231---------------------------------------------------------------------------------- */
  232
  233pattern([X,37|A]-[32|A], _, [X,32,37|B]-B, [37]):- num(X), !.          
  234
  235/* ----------------------------------------------------------------------------------
  236   Monetary units  "$100" -> "$ 100"
  237---------------------------------------------------------------------------------- */
  238
  239pattern([36,X|A]-[X|A], _, [36,32|B]-B, [32]):- num(X), !.        
  240pattern([128,X|A]-[X|A], _, [128,32|B]-B, [32]):- num(X), !.        
  241
  242/* ----------------------------------------------------------------------------------
  243   Contractions: year/decade expressions
  244---------------------------------------------------------------------------------- */
  245
  246pattern([Q,N1,N2,115|A]-A, [], [Q,N1,N2,115|B]-B, [115,N2,N1,Q]):- rsq(Q), num(N1),num(N2), !.  %%% "'30s" -> "'30s"
  247pattern([Q,N1,N2,N|A]-[N|A], [], [Q,N1,N2|B]-B, [N2,N1,Q]):- rsq(Q), num(N1),num(N2), \+ alphanum(N), !.  %%% "'30" -> "'30"
  248
  249/* ----------------------------------------------------------------------------------
  250   Contractions: \'s (English)
  251---------------------------------------------------------------------------------- */
  252
  253pattern([X,Q,115,N|A]-[N|A], [_|_], [X,32,Q,115|B]-B, [115,Q]):- option('--language',en), rsq(Q), alpha(X), \+ alphanum(N), !.  %%% "X's" -> "X 's"
  254pattern([Q,115,N|A]-[N|A], Prev, [32,Q,115|B]-B, [115,Q]):- option('--language',en), abb(Prev), rsq(Q), \+ alphanum(N), !.  %%% "U.S.'s" -> "U.S. 's"
  255pattern([X,Q,83,N|A]-[N|A],  [_|_], [X,32,Q,83|B]-B,  [83,Q]):- option('--language',en), rsq(Q), alpha(X), \+ alphanum(N), !.   %%% "X'S" -> "X 'S"
  256pattern([Q,115,N|A]-[N|A], [], [Q,115|B]-B, [115,Q]):- option('--language',en), rsq(Q), \+ alphanum(N), !.  %%% " 's" -> " 's"
  257pattern([115,Q,N|A]-[N|A], [_|_], [115,32,Q|B]-B, [Q,115]):- option('--language',en), rsq(Q), \+ alphanum(N), !.  %%% "s' " -> "s ' "
  258
  259/* ----------------------------------------------------------------------------------
  260   Contractions: auxiliary verbs (English)
  261---------------------------------------------------------------------------------- */
  262
  263pattern([X,Q,109|A]-A,     _, [X,32,Q,109|B]-B,         [109,Q]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "X'm" -> "X 'm"
  264pattern([X,Q,100|A]-A,     _, [X,32,Q,100|B]-B,         [100,Q]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "X'd" -> "X 'd"
  265pattern([X,Q,108,108|A]-A, _, [X,32,Q,108,108|B]-B, [108,108,Q]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "X'll" -> "X 'll"
  266pattern([X,Q,118,101|A]-A, _, [X,32,Q,118,101|B]-B, [101,118,Q]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "X've" -> "X 've"
  267pattern([X,Q,114,101|A]-A, _, [X,32,Q,114,101|B]-B, [101,114,Q]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "X're" -> "X 're"
  268pattern([X,110,Q,116|A]-A, _, [X,32,110,Q,116|B]-B, [116,Q,110]):- option('--language',en), rsq(Q), alpha(X), !.  %%% "Xn't" -> "X n't"
  269
  270/* ----------------------------------------------------------------------------------
  271   Contractions (Italian)
  272---------------------------------------------------------------------------------- */
  273
  274pattern([108,Q,X|A]-[X|A],   Prev, B1-B2, []):- option('--language',it), alpha(X), rsq(Q), !, insertSpace(Prev,[108,Q,32|B2],B1).   %%% " l'X" -> " l' X"
  275
  276
  277/* ----------------------------------------------------------------------------------
  278   Contractions: Irish and foreign names
  279---------------------------------------------------------------------------------- */
  280
  281pattern([U1,Q,U2|A]-A, [], [U1,Q,U2|B]-B, [U2,Q,U1]):- rsq(Q), alpha(U1),alpha(U2).  %%% "O'R" -> "O'R"
  282
  283/* ----------------------------------------------------------------------------------
  284   Double character quotes
  285---------------------------------------------------------------------------------- */
  286
  287pattern([32,Q,Q,32|A]-[32|A], X, B-B, X):- quotes(Q), option('--quotes',delete), !.
  288pattern([Q,Q|A]-A, X, B-B, X):- quotes(Q), option('--quotes',delete), !.
  289pattern([X,X|A]-[32|A], Prev, B1-B2, [X,X]):- quotes(X), !, insertSpace(Prev,[X,X|B2],B1).
  290
  291/* ----------------------------------------------------------------------------------
  292   Single character quotes
  293---------------------------------------------------------------------------------- */
  294
  295pattern([32,Q,32|A]-[32|A], X, B-B, X):- quote(Q), option('--quotes',delete), !.
  296pattern([Q|A]-A, X, B-B, X):- quote(Q), option('--quotes',delete), !.
  297pattern([X|A]-[32|A], Prev, B1-B2, [X]):- quote(X), !, insertSpace(Prev,[X|B2],B1).   
  298
  299
  300/* ==================================================================================
  301   Aux Predicates
  302====================================================================================*/
  303
  304alphanum(X):- alpha(X), !.
  305alphanum(X):- num(X), !.
  306
  307alpha(62):- !.                         %%% '>' (end of markup)
  308alpha(X):- upper(X), !.
  309alpha(X):- lower(X), !.
  310
  311upper(X):- X > 64, X < 91, !.
  312lower(X):- X > 96, X < 123, !.
  313
  314num(X):- X > 47, X < 58, !.
  315
  316
  317/* ----------------------------------------------------------------------------------
  318   Insert space, but only if there is a token just before
  319---------------------------------------------------------------------------------- */
  320
  321insertSpace([], L, L):- !.
  322insertSpace( _, L, [32|L]).
  323
  324
  325/* ----------------------------------------------------------------------------------
  326   Codes for Brackets
  327---------------------------------------------------------------------------------- */
  328
  329bracket(X):- opening_bracket(X).
  330bracket(X):- closing_bracket(X).
  331
  332opening_bracket(40).  %%% (
  333opening_bracket(91).  %%% [
  334opening_bracket(123). %%% {
  335
  336closing_bracket(41).  %%% )
  337closing_bracket(93).  %%% ]
  338closing_bracket(125). %%% }
  339
  340
  341/* ----------------------------------------------------------------------------------
  342   Codes for right single quotation marks (used in genitives)
  343---------------------------------------------------------------------------------- */
  344
  345rsq(39).
  346rsq(8217).
  347
  348
  349/* ----------------------------------------------------------------------------------
  350   Codes for single-character quotes
  351---------------------------------------------------------------------------------- */
  352
  353quote(34).    %%% "
  354quote(39).    %%% '
  355quote(96).    %%% `
  356quote(8216).  %%% left single quotation mark
  357quote(8217).  %%% right single quotation mark
  358quote(8218).  %%% low single quotation mark
  359quote(8220).  %%% left double quotation mark
  360quote(8221).  %%% right double quotation mark
  361quote(8222).  %%% low double quotation mark
  362
  363
  364/* ----------------------------------------------------------------------------------
  365   Codes for double quotes
  366---------------------------------------------------------------------------------- */
  367
  368quotes(96).    %%% ``
  369quotes(39).    %%% ''
  370quotes(8216).
  371quotes(8217).
  372quotes(8218).
  373
  374/* ----------------------------------------------------------------------------------
  375   Codes for punctuation marks
  376---------------------------------------------------------------------------------- */
  377
  378mark(63).    %%% ?
  379mark(33).    %%% !
  380
  381
  382/* ----------------------------------------------------------------------------------
  383   Titles (or other expressions that never/rarely occur at end of sentence)
  384   The actual string (in double quotes) is reversed!
  385---------------------------------------------------------------------------------- */
  386
  387title(Title):- option('--language',Language), title(Language,Title), !.
  388
  389title(en, "rM").           % Mr     sg
  390title(en, "srsseM").       % Messrs pl
  391title(en, "srM").          % Mrs    sg
  392title(en, "semM").         % Mmes   pl
  393title(en, "sM").           % Ms
  394title(en, "rD").           % Dr     sg
  395title(en, "srD").          % Drs    pl
  396title(en, "forP").         % Prof
  397title(en, "neS").          % Sen
  398title(en, "voG").          % Gov
  399title(en, "tS").           % St    (for Saint)
  400title(en, "peR").          % Rep
  401title(en, "neG").          % Gen
  402title(en, "tL").           % Lt    Lieutenant
  403title(en, "tueiL").        % Lieut    Lieutenant
  404title(en, "loC").          % Col   Colonel
  405title(en, "mdA").          % Adm   Admiral
  406title(en, "tpC").          % Cpt   Captain
  407title(en, "veR").          % Rev   Reverend
  408title(en, "noH").          % Hon   Honoroble
  409title(en, "tpaC").         % Capt
  410title(en, "rdmC").         % Cmdr
  411title(en, "nlpahc").       % Chapln
  412
  413title(en, "v").            % v
  414title(en, "sv").           % vs
  415title(en, "eiC").          % Cie
  416title(en, "a.k.a").        % a.k.a
  417title(en, "tM").           % Mt    Mount
  418
  419/* ----------------------------------------------------------------------------------
  420   Abbreviations
  421---------------------------------------------------------------------------------- */
  422
  423abb(Codes):- member(46,Codes), member(X,Codes), alpha(X), !.
  424abb(Abb):- option('--language',Language), abb(Language,Abb), !.
  425
  426abb(en, "proC"). % Corp
  427abb(en, "cnI").  % Inc
  428abb(en, "oC").   % Co
  429abb(en, "dtL").  % Ltd
  430abb(en, "rJ").   % Jr
  431abb(en, "rS").   % Sr
  432abb(en, "soC").  % Cos
  433abb(en, "sorB"). % Bros
  434abb(en, "cte").  % etc
  435
  436
  437/* ----------------------------------------------------------------------------------
  438   Mistake in WSJ tokenisation
  439---------------------------------------------------------------------------------- */
  440
  441mistake(".p.S").
  442
  443
  444/* =======================================================================
  445   End (only spaces or tabs before end of line)
  446========================================================================*/
  447
  448end([]):- !.
  449end([32|L]):- !, end(L).
  450end([9|L]):- !, end(L).
  451
  452
  453/* =======================================================================
  454   Dots
  455========================================================================*/
  456
  457dots(In,Out):- 
  458   dots(In,0,Out).
  459
  460dots([32,46,32|In],N,Out):- !,
  461   M is N + 1,
  462   dots(In,M,Out).
  463
  464dots([46,32|In],N,Out):- !,
  465   M is N + 1,
  466   dots(In,M,Out).
  467
  468dots([46|In],N,Out):- !,
  469   M is N + 1,
  470   dots(In,M,Out).
  471
  472dots(Out,N,Out):- 
  473   N > 1.
  474
  475
  476/* =======================================================================
  477   Open Input File
  478========================================================================*/
  479
  480openInput(Stream):-
  481   option('--stdin',dont),
  482   option('--input',File),
  483   exists_file(File), !,
  484   open(File,read,Stream,[encoding(utf8)]).
  485
  486openInput(Stream):-
  487   option('--stdin',do), 
  488   set_prolog_flag(encoding,utf8),
  489   warning('reading from standard input',[]),
  490   prompt(_,''),
  491   Stream = user_input.
  492
  493
  494/* =======================================================================
  495   Open Output File
  496========================================================================*/
  497
  498openOutput(Stream):-
  499   option('--output',Output),
  500   atomic(Output),
  501   \+ Output=user_output,
  502   ( access_file(Output,write), !,
  503     open(Output,write,Stream,[encoding(utf8)])
  504   ; error('cannot write to specified file ~p',[Output]),
  505     Stream=user_output ), !.
  506
  507openOutput(user_output).
  508
  509
  510/* =======================================================================
  511   Version
  512========================================================================*/
  513
  514version:-
  515   option('--version',do), !,
  516   version(V),
  517   format(user_error,'~p~n',[V]).
  518
  519version.
  520
  521
  522/* =======================================================================
  523   Help
  524========================================================================*/
  525
  526help:-
  527   option('--help',do), !,
  528   format(user_error,'usage: tokkie [options]~n~n',[]),
  529   showOptions(tokkie).
  530
  531help:-
  532   option('--help',dont), !.
  533
  534
  535/* =======================================================================
  536   Definition of start
  537========================================================================*/
  538
  539start:-
  540   current_prolog_flag(argv,[_Comm|Args]),
  541   setDefaultOptions(tokkie), 
  542   parseOptions(tokkie,Args),
  543   tokkie, !,
  544   halt.
  545
  546start:- 
  547   error('tokkie failed',[]), 
  548   halt