View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Wouter Jansweijer and Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  1985-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(readln,
   36          [ readln/1,                   % -Line
   37            readln/2,                   % -Line, +EOL
   38            readln/5                    % See above
   39          ]).   40:- autoload(library(lists),[append/3,member/2]).   41
   42
   43/** <module> Read line as list of tokens
   44
   45Read a sentence from the current input stream and convert it into a list
   46of atoms and numbers:
   47
   48    - Letters(A-Z, a-z) are converted to atoms
   49    - Digits (0-9) (and a '.' if a real number) are converted to numbers
   50        Some obscure 'rounding' is done, so you have most of the times
   51        only 6 significant digits with an exponent part. (This is caused
   52        by the system predicate 'name'. If you want looonnnggg numbers
   53        then define digits as parts of words).
   54        (N.B. reals work only if '.' is not defined as 'stop-char' but
   55                'escape' will work in this case)
   56
   57    The reader is _flexible_, you can define yourself:
   58
   59        - the character on which reading will stop
   60                (this character is escapable with \
   61                 to read a \ type this character twice!!)
   62        - the character(s) that make up a word (execpt the
   63          characters A-Z, a-z that always make up words!!
   64          and (real)-numbers that always are grouped together!!)
   65        - whether you want conversion of uppercase letters to
   66          lowercase letters.
   67
   68    readln/1
   69        The default setting for readln/1 is
   70                - read up till newline
   71                - see underscore('_') and numbers 0-9 as part of words
   72                - make lowercase
   73
   74        - If nothing is read readln/1 succeeds with []
   75        - If an end_of_file is read readln/1 succeeds with [..|end_of_file]
   76
   77
   78    readln/5
   79        This predicate gives you the flexibility.
   80        It succeeds with arg1 = list of word&atoms
   81                         arg2 = Ascii code of last character
   82                                (but '-1' in case of ^D).
   83        To change one or more of the defaults you have to
   84        instantiate argument3 and/or argument4 and/or argument5.
   85         !! Uninstantiated arguments are defaulted !!
   86        - stop character(s):
   87                instantiate argument 3 with the list of ASCII code's
   88                of the desired stop characters (Note: you can also
   89                say: ".!?", what is equivalent to [46,33,63]).
   90        - word character(s):
   91                instantiate argument 4 with the list of ASCII code's
   92                of the desired word-part characters (Note: wou can also
   93                say: "", what is equivalent to [] ; i.e. no extra
   94                characters).
   95        - lowercase conversion:
   96                instantiate argument 5 with lowercase
   97
   98
   99Main predicates provided:
  100
  101    readln(P)           - Read a sentence up till NewLine and
  102                          unify <P> with the list of atoms/numbers
  103                          (identical to:
  104                                 readln(P, [10],"_01213456789",uppercase).)
  105    readln(P, LastCh)   - idem as above but the second argument is unified
  106                          with the last character read (the ascii-code for
  107                          the stop-character or -1)
  108    readln(P, LastCh, Arg1, Arg2, Arg3)
  109                        - idem as above but the default setting is changed
  110                          for the instantiated args:
  111                          Arg1: List of stop characters
  112                          Arg2: List of word_part characters
  113                          Arg3: uppercase/lowercase conversion
  114
  115Examples:
  116        read_sentence(P,Case) :-
  117                readln(P,_,".!?","_0123456789",Case).
  118
  119        read_in(P) :-                           % with numbers as separate
  120                readln(P,Eof,_,"", _).  % entities.
  121
  122        read_atom(A) :-                 % stop on newline,
  123                readln(A,_,_," ",_).            % space is part of word
  124
  125@deprecated Old code. Not maintained and probably not at the
  126        right level of abstraction.  Not locale support.
  127@see    library(readutil), nlp package.
  128*/
  129
  130
  131readln(Read) :-                 % the default is read up to EOL
  132    string_codes("_0123456789", Arg2),
  133    rl_readln(Line, LastCh, [10], Arg2, uppercase),
  134    (   LastCh == end_of_file
  135    ->  append(Line,[end_of_file], Read)
  136    ;   Read = Line
  137    ).
  138
  139readln(Read, LastCh):-
  140    string_codes("_0123456789", Arg2),
  141    rl_readln(Read, LastCh, [10], Arg2, uppercase).
  142
  143readln(P, EOF, StopChars, WordChars, Case) :-
  144    (   var(StopChars)
  145    ->  Arg1 = [10]
  146    ;   Arg1 = StopChars
  147    ),
  148    (   var(WordChars)
  149    ->  string_codes("01234567890_", Arg2)
  150    ;   Arg2 = WordChars
  151    ),
  152    (   var(Case)
  153    ->  Arg3 = lowercase
  154    ;   Arg3 = Case
  155    ),
  156    rl_readln(P, EOF, Arg1, Arg2, Arg3).
  157
  158rl_readln(P, EOF, StopChars, WordChars, Case) :-
  159    rl_initread(L, EOF, StopChars),
  160    rl_blanks(L, LL),
  161    !,
  162    rl_words(P, LL,[], options(WordChars, Case)),
  163    !.
  164
  165rl_initread(S, EOF, StopChars) :-
  166    get_code(K),
  167    rl_readrest(K, S, EOF, StopChars).
  168
  169rl_readrest(-1, [], end_of_file, _) :- !.
  170rl_readrest(0'\\, [K1|R], EOF, StopChars) :-
  171    get_code(K1),                   % skip it, take next char
  172    get_code(K2),
  173    rl_readrest(K2, R, EOF, StopChars).
  174rl_readrest(K, [K], K, StopChars) :-    % the stop char(s)
  175    member(K, StopChars),
  176    !.
  177rl_readrest(K, [K|R], EOF, StopChars) :-        % the normal case
  178    get_code(K1),
  179    rl_readrest(K1, R, EOF, StopChars).
  180
  181rl_words([W|Ws], S1, S4, Options) :-
  182    rl_word(W, S1, S2, Options),
  183    !,
  184    rl_blanks(S2, S3),
  185    rl_words(Ws, S3, S4, Options).
  186rl_words([], S1, S2, _) :-
  187    rl_blanks(S1, S2),
  188    !.
  189rl_words([], S, S, _).
  190
  191rl_word(N, [46|S1], S3, _) :-           % the dot can be in the beginning of
  192    rl_basic_num(N1, S1, S2),        % a real number.
  193    !,
  194    rl_basic_nums(Rest, S2, S3, dot),       % only ONE dot IN a number !!
  195    name(N,[48, 46, N1|Rest]).      % i.e '0.<number>'
  196rl_word(N, S0, S2, _) :-
  197    rl_basic_num(N1, S0, S1),
  198    !,
  199    rl_basic_nums(Rest, S1, S2, _),
  200    name(N,[N1|Rest]).
  201rl_word(W, S0, S2, Options) :-
  202    rl_basic_char(C1, S0, S1, Options),
  203    !,
  204    rl_basic_chars(Rest, S1, S2, Options),
  205    name(W, [C1|Rest]).
  206rl_word(P,[C|R], R, _) :-
  207    name(P, [C]),
  208    !.
  209
  210rl_basic_chars([A|As], S0, S2, Options) :-
  211    rl_basic_char(A, S0, S1, Options),
  212    !,
  213    rl_basic_chars(As, S1, S2, Options).
  214rl_basic_chars([], S, S, _).
  215
  216rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit
  217    var(Dot),                       % but not found a dot already
  218    rl_basic_num(N, S1, S2),
  219    !,
  220    rl_basic_nums(As, S2, S3, dot).
  221rl_basic_nums([A|As], S0, S2, Dot) :-
  222    rl_basic_num(A, S0, S1),
  223    !,
  224    rl_basic_nums(As, S1, S2, Dot).
  225rl_basic_nums([], S, S, _).
  226
  227rl_blanks([C|S0], S1) :-
  228    rl_blank(C),
  229    !,
  230    rl_blanks(S0, S1).
  231rl_blanks(S, S).
  232
  233/* Basic Character types that form rl_words together */
  234
  235rl_basic_char(A, [C|S], S, options(WordChars, Case)) :-
  236    rl_lc(C, A, WordChars, Case).
  237
  238rl_basic_num(N, [N|R], R) :-
  239    code_type(N, digit).
  240
  241rl_blank(X) :-
  242    code_type(X, space).
  243
  244rl_lc(X, X1, _, Case) :-
  245    code_type(X, upper),
  246    !,
  247    rl_fix_case(Case, X, X1).
  248rl_lc(X, X, _, _) :-
  249    code_type(X, lower).
  250rl_lc(X, X, WordChars, _) :-
  251    memberchk(X, WordChars).
  252
  253rl_fix_case(lowercase, U, L) :-
  254    !,
  255    code_type(L, lower(U)).
  256rl_fix_case(_, C, C)