1:- encoding(utf8).
    2:- module(
    3  stream_ext,
    4  [
    5    copy_stream_type/2,      % +In, +Out
    6    guess_encoding/2,        % +In, -Encoding
    7    number_of_open_files/1,  % -N
    8    read_line_to_atom/2,     % +In, -Atom
    9    read_line_to_number/2,   % +In, -N
   10    read_stream_to_atom/2,   % +In, -Atom
   11    read_stream_to_string/2, % +In, -String
   12    recode_stream/3,         % +In, +FromEncoding, -Out
   13    stream_line/2,           % +In, -Line
   14    stream_line_column/3,    % +Stream, -Line, -Column
   15    stream_metadata/2,       % +Stream, -Metadata
   16    wc/2                     % +In, -Stats
   17  ]
   18).

Support for working with streams.

Some predicates in the library use the external programs ‘iconv’ and ‘uchardet’.

*/

   27:- use_module(library(aggregate)).   28:- use_module(library(apply)).   29:- use_module(library(archive)).   30:- use_module(library(error)).   31:- use_module(library(lists)).   32:- use_module(library(readutil)).   33:- use_module(library(yall)).   34
   35:- use_module(library(dcg)).   36:- use_module(library(dict)).   37:- use_module(library(debug_ext)).   38:- use_module(library(file_ext), []).   39:- use_module(library(hash_stream)).   40:- use_module(library(os_ext)).   41:- use_module(library(string_ext)).   42:- use_module(library(thread_ext)).   43
   44:- thread_local
   45   debug_indent/1.   46
   47debug_indent(0).
 copy_stream_type(+In:istream, +Out:ostream) is det
Like copy_stream_data/2, but also sets the stream type of Out to match the stream type of In, if needed,
   58copy_stream_type(In, Out) :-
   59  stream_property(In, type(Type)),
   60  set_stream(Out, type(Type)),
   61  copy_stream_data(In, Out).
 guess_encoding(+In:istream, -Encoding:atom) is det
If the encoding cannot be guessed (`unknown'), the error cannot_guess_encoding/0 is thrown.
   70% The value bom causes the stream to check whether the current
   71% character is a Unicode BOM marker.  If a BOM marker is found, the
   72% encoding is set accordingly and the call succeeds; otherwise the
   73% call fails.
   74guess_encoding(In, utf8) :-
   75  set_stream(In, encoding(bom)), !.
   76guess_encoding(In, Enc) :-
   77  process_create(
   78    path(uchardet),
   79    [],
   80    [stdin(pipe(ProcIn)),stdout(pipe(ProcOut))]
   81  ),
   82  set_stream(ProcIn, encoding(octet)),
   83  call_cleanup(
   84    copy_stream_data(In, ProcIn),
   85    close(ProcIn)
   86  ),
   87  call_cleanup(
   88    (
   89      read_string(ProcOut, String1),
   90      string_strip(String1, "\n", String2),
   91      atom_string(Enc0, String2)
   92    ),
   93    close(ProcOut)
   94  ),
   95  clean_encoding_(Enc0, Enc),
   96  (Enc == unknown -> throw(error(cannot_guess_encoding,guess_encoding/2)) ; true).
 number_of_open_files(-N:nonneg) is det
  102number_of_open_files(N) :-
  103  expand_file_name('/proc/self/fd/*', Files),
  104  length(Files, N).
 read_line_to_atom(+In:istream, -Atom:atom) is nondet
  110read_line_to_atom(In, Atom) :-
  111  repeat,
  112  read_line_to_codes(In, Codes),
  113  (   Codes == end_of_file
  114  ->  !, fail
  115  ;   atom_codes(Atom, Codes)
  116  ).
 read_line_to_number(+In:istream, -N:number) is nondet
  122read_line_to_number(In, N) :-
  123  read_line_to_atom(In, Atom),
  124  atom_number(Atom, N).
 read_stream_to_atom(+In:istream, -Atom:atom) is det
  130read_stream_to_atom(In, Atom) :-
  131  read_stream_to_codes(In, Codes),
  132  atom_codes(Atom, Codes).
 read_stream_to_string(+In:istream, -String:string) is det
  138read_stream_to_string(In, String) :-
  139  read_stream_to_codes(In, Codes),
  140  string_codes(String, Codes).
 recode_stream(+In:istream, +FromEncoding:atom, -Out:ostream) is det
We only recode to UTF-8.

See the output of command ~iconv -l~ for the supported encodings.

Assumes that In and Out are binary streams.

  152recode_stream(In, Enc0, Out) :-
  153  clean_encoding_(Enc0, Enc),
  154  process_create(
  155    path(iconv),
  156    ['-c','-f',Enc,'-t','utf-8',-],
  157    [stdin(pipe(ProcIn)),stdout(pipe(ProcOut))]
  158  ),
  159  create_detached_thread(
  160    call_cleanup(
  161      copy_stream_type(In, ProcIn),
  162      close(ProcIn)
  163    )
  164  ),
  165  set_stream(ProcOut, type(binary)),
  166  (   ground(Out)
  167  ->  call_cleanup(
  168        copy_stream_data(ProcOut, Out),
  169        close(ProcOut)
  170      )
  171  ;   ProcOut = Out
  172  ).
 stream_line(+In:istream, -Line:string) is nondet
  178stream_line(In, Line) :-
  179  repeat,
  180  read_line_to_string(In, Line),
  181  (Line == end_of_file -> !, fail ; true).
 stream_line_column(+Stream:stream, -Line:nonneg, -Column:nonneg) is det
  187stream_line_column(Stream, Line, Column) :-
  188  stream_property(Stream, position(Pos)),
  189  stream_position_data(line_count, Pos, Line),
  190  stream_position_data(line_position, Pos, Column).
 stream_metadata(+Stream:stream, -Metadata:dict) is det
  196stream_metadata(Stream, Meta) :-
  197  stream_property(Stream, position(Pos)),
  198  stream_position_data(byte_count, Pos, NumBytes),
  199  stream_position_data(char_count, Pos, NumChars),
  200  stream_position_data(line_count, Pos, NumLines),
  201  stream_property(Stream, newline(Newline)),
  202  Meta = _{
  203    bytes: NumBytes,
  204    characters: NumChars,
  205    lines: NumLines,
  206    newline: Newline
  207  }.
  208
  209
  210
  211/*
  212%! wc(+In:istream, -Lines:nonneg) is det.
  213%
  214% Native implementation of line count.
  215%
  216% @tbd Compare performance with wc/4.
  217
  218wc(In, Lines) :-
  219  Counter = count(0),
  220  repeat,
  221  read_line_to_codes(In, Codes),
  222  (   Codes == end_of_file
  223  ->  !,
  224      arg(1, Counter, Lines)
  225  ;   arg(1, Counter, Count1),
  226      Count2 is Count1 + 1,
  227      nb_setarg(1, Counter, Count2),
  228      fail
  229  ).
  230*/
 wc(+In:istream, -Stats:dict) is det
Linux-only parsing of GNU wc output.
  236wc(In, Stats) :-
  237  setup_call_cleanup(
  238    process_create(path(wc), [], [stdin(pipe(ProcIn)),stdout(pipe(Out))]),
  239    (
  240      create_detached_thread(
  241        call_cleanup(
  242          copy_stream_data(In, ProcIn),
  243          close(ProcIn)
  244        )
  245      ),
  246      read_wc(Out, Lines, Words, Bytes)
  247    ),
  248    close(Out)
  249  ),
  250  Stats = _{
  251    number_of_bytes: Bytes,
  252    number_of_lines: Lines,
  253    number_of_words: Words
  254  }.
  255
  256read_wc(Out, Lines, Words, Bytes) :-
  257  read_stream_to_codes(Out, Codes),
  258  phrase(read_wc(Lines, Words, Bytes), Codes, _).
  259
  260% E.g., `427 1818 13512 README.md`.
  261read_wc(Lines, Words, Bytes) -->
  262  whites,
  263  integer(Lines),
  264  whites,
  265  integer(Words),
  266  whites,
  267  integer(Bytes).
  268
  269
  270
  271
  272
  273% GENERICS %
 clean_encoding_(+Encoding:atom, -CleanEncoding:atom) is det
  277clean_encoding_(Enc1, Enc4) :-
  278  downcase_atom(Enc1, Enc2),
  279  (encoding_alias_(Enc2, Enc3) -> true ; Enc3 = Enc2),
  280  (encoding_promotion_(Enc3, Enc4) -> true ; Enc4 = Enc3).
  281
  282encoding_alias_('iso-8859-1', iso_latin_1).
  283encoding_alias_('iso-8859-15.latin1', 'iso8859-15').
  284encoding_alias_(macroman, macintosh).
  285encoding_alias_('us-ascii', ascii).
  286encoding_alias_('utf-8', utf8).
  287
  288encoding_promotion_(ascii, utf8)