/* Part of SWI-Prolog Author: Wouter Jansweijer and Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (c) 1985-2013, University of Amsterdam All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ :- module(readln, [ readln/1, % -Line readln/2, % -Line, +EOL readln/5 % See above ]). :- autoload(library(lists),[append/3,member/2]). /** Read line as list of tokens Read a sentence from the current input stream and convert it into a list of atoms and numbers: - Letters(A-Z, a-z) are converted to atoms - Digits (0-9) (and a '.' if a real number) are converted to numbers Some obscure 'rounding' is done, so you have most of the times only 6 significant digits with an exponent part. (This is caused by the system predicate 'name'. If you want looonnnggg numbers then define digits as parts of words). (N.B. reals work only if '.' is not defined as 'stop-char' but 'escape' will work in this case) The reader is _flexible_, you can define yourself: - the character on which reading will stop (this character is escapable with \ to read a \ type this character twice!!) - the character(s) that make up a word (execpt the characters A-Z, a-z that always make up words!! and (real)-numbers that always are grouped together!!) - whether you want conversion of uppercase letters to lowercase letters. readln/1 The default setting for readln/1 is - read up till newline - see underscore('_') and numbers 0-9 as part of words - make lowercase - If nothing is read readln/1 succeeds with [] - If an end_of_file is read readln/1 succeeds with [..|end_of_file] readln/5 This predicate gives you the flexibility. It succeeds with arg1 = list of word&atoms arg2 = Ascii code of last character (but '-1' in case of ^D). To change one or more of the defaults you have to instantiate argument3 and/or argument4 and/or argument5. !! Uninstantiated arguments are defaulted !! - stop character(s): instantiate argument 3 with the list of ASCII code's of the desired stop characters (Note: you can also say: ".!?", what is equivalent to [46,33,63]). - word character(s): instantiate argument 4 with the list of ASCII code's of the desired word-part characters (Note: wou can also say: "", what is equivalent to [] ; i.e. no extra characters). - lowercase conversion: instantiate argument 5 with lowercase Main predicates provided: readln(P) - Read a sentence up till NewLine and unify

with the list of atoms/numbers (identical to: readln(P, [10],"_01213456789",uppercase).) readln(P, LastCh) - idem as above but the second argument is unified with the last character read (the ascii-code for the stop-character or -1) readln(P, LastCh, Arg1, Arg2, Arg3) - idem as above but the default setting is changed for the instantiated args: Arg1: List of stop characters Arg2: List of word_part characters Arg3: uppercase/lowercase conversion Examples: read_sentence(P,Case) :- readln(P,_,".!?","_0123456789",Case). read_in(P) :- % with numbers as separate readln(P,Eof,_,"", _). % entities. read_atom(A) :- % stop on newline, readln(A,_,_," ",_). % space is part of word @deprecated Old code. Not maintained and probably not at the right level of abstraction. Not locale support. @see library(readutil), nlp package. */ readln(Read) :- % the default is read up to EOL string_codes("_0123456789", Arg2), rl_readln(Line, LastCh, [10], Arg2, uppercase), ( LastCh == end_of_file -> append(Line,[end_of_file], Read) ; Read = Line ). readln(Read, LastCh):- string_codes("_0123456789", Arg2), rl_readln(Read, LastCh, [10], Arg2, uppercase). readln(P, EOF, StopChars, WordChars, Case) :- ( var(StopChars) -> Arg1 = [10] ; Arg1 = StopChars ), ( var(WordChars) -> string_codes("01234567890_", Arg2) ; Arg2 = WordChars ), ( var(Case) -> Arg3 = lowercase ; Arg3 = Case ), rl_readln(P, EOF, Arg1, Arg2, Arg3). rl_readln(P, EOF, StopChars, WordChars, Case) :- rl_initread(L, EOF, StopChars), rl_blanks(L, LL), !, rl_words(P, LL,[], options(WordChars, Case)), !. rl_initread(S, EOF, StopChars) :- get_code(K), rl_readrest(K, S, EOF, StopChars). rl_readrest(-1, [], end_of_file, _) :- !. rl_readrest(0'\\, [K1|R], EOF, StopChars) :- get_code(K1), % skip it, take next char get_code(K2), rl_readrest(K2, R, EOF, StopChars). rl_readrest(K, [K], K, StopChars) :- % the stop char(s) member(K, StopChars), !. rl_readrest(K, [K|R], EOF, StopChars) :- % the normal case get_code(K1), rl_readrest(K1, R, EOF, StopChars). rl_words([W|Ws], S1, S4, Options) :- rl_word(W, S1, S2, Options), !, rl_blanks(S2, S3), rl_words(Ws, S3, S4, Options). rl_words([], S1, S2, _) :- rl_blanks(S1, S2), !. rl_words([], S, S, _). rl_word(N, [46|S1], S3, _) :- % the dot can be in the beginning of rl_basic_num(N1, S1, S2), % a real number. !, rl_basic_nums(Rest, S2, S3, dot), % only ONE dot IN a number !! name(N,[48, 46, N1|Rest]). % i.e '0.' rl_word(N, S0, S2, _) :- rl_basic_num(N1, S0, S1), !, rl_basic_nums(Rest, S1, S2, _), name(N,[N1|Rest]). rl_word(W, S0, S2, Options) :- rl_basic_char(C1, S0, S1, Options), !, rl_basic_chars(Rest, S1, S2, Options), name(W, [C1|Rest]). rl_word(P,[C|R], R, _) :- name(P, [C]), !. rl_basic_chars([A|As], S0, S2, Options) :- rl_basic_char(A, S0, S1, Options), !, rl_basic_chars(As, S1, S2, Options). rl_basic_chars([], S, S, _). rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit var(Dot), % but not found a dot already rl_basic_num(N, S1, S2), !, rl_basic_nums(As, S2, S3, dot). rl_basic_nums([A|As], S0, S2, Dot) :- rl_basic_num(A, S0, S1), !, rl_basic_nums(As, S1, S2, Dot). rl_basic_nums([], S, S, _). rl_blanks([C|S0], S1) :- rl_blank(C), !, rl_blanks(S0, S1). rl_blanks(S, S). /* Basic Character types that form rl_words together */ rl_basic_char(A, [C|S], S, options(WordChars, Case)) :- rl_lc(C, A, WordChars, Case). rl_basic_num(N, [N|R], R) :- code_type(N, digit). rl_blank(X) :- code_type(X, space). rl_lc(X, X1, _, Case) :- code_type(X, upper), !, rl_fix_case(Case, X, X1). rl_lc(X, X, _, _) :- code_type(X, lower). rl_lc(X, X, WordChars, _) :- memberchk(X, WordChars). rl_fix_case(lowercase, U, L) :- !, code_type(L, lower(U)). rl_fix_case(_, C, C).