View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples,
   36          [ rdf_read_ntriples/3,        % +Input, -Triples, +Options
   37            rdf_read_nquads/3,          % +Input, -Quads, +Options
   38            rdf_process_ntriples/3,     % +Input, :CallBack, +Options
   39
   40            read_ntriple/2,             % +Stream, -Triple
   41            read_nquad/2,               % +Stream, -Quad
   42            read_ntuple/2               % +Stream, -TripleOrQuad
   43          ]).   44:- if(exists_source(library(semweb/rdf_db))).   45:- use_module(library(semweb/rdf_db),
   46              [rdf_transaction/2,rdf_set_graph/2,rdf_assert/4]).   47:- endif.   48:- use_module(library(record),[(record)/1, op(_,_,record)]).   49
   50:- autoload(library(error),[domain_error/2]).   51:- autoload(library(memfile),
   52	    [atom_to_memory_file/2,open_memory_file/4]).   53:- autoload(library(option),[option/3,option/2]).   54:- autoload(library(uri),
   55	    [uri_file_name/2,uri_is_global/1,uri_normalized/2]).   56:- autoload(library(http/http_open),[http_open/3]).   57
   58:- use_foreign_library(foreign(ntriples)).

Process files in the RDF N-Triples format

The library(semweb/rdf_ntriples) provides a fast reader for the RDF N-Triples and N-Quads format. N-Triples is a simple format, originally used to support the W3C RDF test suites. The current format has been extended and is a subset of the Turtle format (see library(semweb/turtle)).

The API of this library is almost identical to library(semweb/turtle). This module provides a plugin into rdf_load/2, making this predicate support the format ntriples and nquads.

See also
- http://www.w3.org/TR/n-triples/
To be done
- Sync with RDF 1.1. specification. */
   76:- predicate_options(rdf_read_ntriples/3, 3,
   77                     [ anon_prefix(any), % atom or node(_)
   78                       base_uri(atom),
   79                       error_count(-integer),
   80                       on_error(oneof([warning,error]))
   81                     ]).   82:- predicate_options(rdf_read_nquads/3, 3,
   83                     [ anon_prefix(any), % atom or node(_)
   84                       base_uri(atom),
   85                       error_count(-integer),
   86                       on_error(oneof([warning,error])),
   87                       graph(atom)
   88                     ]).   89:- predicate_options(rdf_process_ntriples/3, 3,
   90                     [ graph(atom),
   91                       pass_to(rdf_read_ntriples/3, 3)
   92                     ]).   93
   94:- meta_predicate
   95    rdf_process_ntriples(+,2,+).   96
   97
   98%!  read_ntriple(+Stream, -Triple) is det.
   99%
  100%   Read the next triple from Stream as Triple. Stream must have UTF-8
  101%   encoding.
  102%
  103%   @param  Triple is a term triple(Subject,Predicate,Object).
  104%           Arguments follow the normal conventions of the RDF
  105%           libraries.  NodeID elements are mapped to node(Id).
  106%           If end-of-file is reached, Triple is unified with
  107%           =end_of_file=.
  108%   @error  syntax_error(Message) on syntax errors
  109
  110%!  read_nquad(+Stream, -Quad) is det.
  111%
  112%   Read the next quad from Stream as Quad.  Stream must have UTF-8
  113%   encoding.
  114%
  115%   @param  Quad is a term quad(Subject,Predicate,Object,Graph).
  116%           Arguments follow the normal conventions of the RDF
  117%           libraries.  NodeID elements are mapped to node(Id).
  118%           If end-of-file is reached, Quad is unified with
  119%           =end_of_file=.
  120%   @error  syntax_error(Message) on syntax errors
  121
  122%!  read_ntuple(+Stream, -Tuple) is det.
  123%
  124%   Read the next triple or quad from  Stream as Tuple. Tuple is one
  125%   of the terms below.  See   read_ntriple/2  and  read_nquad/2 for
  126%   details.
  127%
  128%     - triple(Subject,Predicate,Object)
  129%     - quad(Subject,Predicate,Object,Graph).
  130
  131:- record nt_state(anon_prefix,               graph,               on_error:oneof([warning,error])=warning,               format:oneof([ntriples,nquads]),               error_count=0).
 rdf_read_ntriples(+Input, -Triples, +Options) is det
 rdf_read_nquads(+Input, -Quads, +Options) is det
True when Triples/Quads is a list of triples/quads from Input. Options:
anon_prefix(+AtomOrNode)
Prefix nodeIDs with this atom. If AtomOrNode is the term node(_), bnodes are returned as node(Id).
base_uri(+Atom)
Defines the default anon_prefix as _:<baseuri>_
on_error(Action)
One of warning (default) or error
error_count(-Count)
If on_error is warning, unify Count with th number of errors.
graph(+Graph)
For rdf_read_nquads/3, this defines the graph associated to triples loaded from the input. For rdf_read_ntriples/3 this opion is ignored.
Arguments:
Triples- is a list of rdf(Subject, Predicate, Object)
Quads- is a list of rdf(Subject, Predicate, Object, Graph)
  162rdf_read_ntriples(Input, Triples, Options) :-
  163    rdf_read_ntuples(Input, Triples, [format(ntriples)|Options]).
  164
  165rdf_read_nquads(Input, Triples, Options) :-
  166    rdf_read_ntuples(Input, Triples, [format(nquads)|Options]).
  167
  168
  169rdf_read_ntuples(Input, Triples, Options) :-
  170    setup_call_cleanup(
  171        open_input(Input, Stream, Close),
  172        (   init_state(Input, Options, State0),
  173            read_ntuples(Stream, Triples, State0, State)
  174        ),
  175        Close),
  176    option(error_count(Count), Options, _),
  177    nt_state_error_count(State, Count).
 rdf_process_ntriples(+Input, :CallBack, +Options)
Call-back interface, compatible with the other triple readers. In addition to the options from rdf_read_ntriples/3, this processes the option graph(Graph).
Arguments:
CallBack- is called as call(CallBack, Triples, Graph), where Triples is a list holding a single rdf(S,P,O) triple. Graph is passed from the graph option and unbound if this option is omitted.
  190rdf_process_ntriples(Input, CallBack, Options) :-
  191    setup_call_cleanup(
  192        open_input(Input, Stream, Close),
  193        (   init_state(Input, Options, State0),
  194            process_ntriple(Stream, CallBack, State0, State)
  195        ),
  196        Close),
  197    option(error_count(Count), Options, _),
  198    nt_state_error_count(State, Count).
 read_ntuples(+Stream, -Triples, +State0, -State)
  203read_ntuples(Stream, Triples, State0, State) :-
  204    read_ntuple(Stream, Triple0, State0, State1),
  205    (   Triple0 == end_of_file
  206    ->  Triples = [],
  207        State = State1
  208    ;   map_nodes(Triple0, Triple, State1, State2),
  209        Triples = [Triple|More],
  210        read_ntuples(Stream, More, State2, State)
  211    ).
 process_ntriple(+Stream, :CallBack, +State0, -State)
  215process_ntriple(Stream, CallBack, State0, State) :-
  216    read_ntuple(Stream, Triple0, State0, State1),
  217    (   Triple0 == end_of_file
  218    ->  State = State1
  219    ;   map_nodes(Triple0, Triple, State1, State2),
  220        nt_state_graph(State2, Graph),
  221        call(CallBack, [Triple], Graph),
  222        process_ntriple(Stream, CallBack, State2, State)
  223    ).
 read_ntuple(+Stream, -Tuple, +State0, -State) is det
True when Tuple is the next triple on Stream. May increment the error count on State.
  230read_ntuple(Stream, Triple, State0, State) :-
  231    nt_state_on_error(State0, error),
  232    !,
  233    read_ntuple(Stream, Triple, State0),
  234    State = State0.
  235read_ntuple(Stream, Triple, State0, State) :-
  236    catch(read_ntuple(Stream, Triple, State0), E, true),
  237    (   var(E)
  238    ->  State = State0
  239    ;   print_message(warning, E),
  240        nt_state_error_count(State0, EC0),
  241        EC is EC0+1,
  242        set_error_count_of_nt_state(EC, State0, State1),
  243        read_ntuple(Stream, Triple, State1, State)
  244    ).
  245
  246read_ntuple(Stream, Triple, State0) :-
  247    nt_state_format(State0, Format),
  248    format_read_ntuple(Format, Stream, Triple, State0).
  249
  250format_read_ntuple(ntriples, Stream, Triple, _) :-
  251    !,
  252    read_ntriple(Stream, Triple).
  253format_read_ntuple(nquads, Stream, Quad, State) :-
  254    !,
  255    read_ntuple(Stream, Tuple),
  256    to_quad(Tuple, Quad, State).
  257
  258to_quad(Quad, Quad, _) :-
  259    functor(Quad, quad, 4),
  260    !.
  261to_quad(triple(S,P,O), quad(S,P,O,Graph), State) :-
  262    nt_state_graph(State, Graph).
  263to_quad(end_of_file, end_of_file, _).
  264
  265
  266map_nodes(triple(S0,P0,O0), rdf(S,P,O), State0, State) :-
  267    map_node(S0, S, State0, State1),
  268    map_node(P0, P, State1, State2),
  269    map_node(O0, O, State2, State).
  270map_nodes(quad(S0,P0,O0,G0), rdf(S,P,O,G), State0, State) :-
  271    map_node(S0, S, State0, State1),
  272    map_node(P0, P, State1, State2),
  273    map_node(O0, O, State2, State3),
  274    map_node(G0, G, State3, State).
  275
  276map_node(node(NodeId), BNode, State, State) :-
  277    nt_state_anon_prefix(State, Prefix),
  278    atom(Prefix),
  279    !,
  280    atom_concat(Prefix, NodeId, BNode).
  281map_node(Node, Node, State, State).
 open_input(+Input, -Stream, -Close) is det
Open input for reading ntriples. The default encoding is UTF-8. If the input has a different encoding, Input must be a stream with the correct encoding and the stream type must be text.
  290open_input(stream(Stream), Stream, Close) :-
  291    !,
  292    (   stream_property(Stream, type(binary))
  293    ->  set_stream(Stream, encoding(utf8)),
  294        Close = set_stream(Stream, type(binary))
  295    ;   stream_property(Stream, encoding(Old)),
  296        (   n3_encoding(Old)
  297        ->  true
  298        ;   domain_error(ntriples_encoding, Old)
  299        ),
  300        Close = true
  301    ).
  302open_input(Stream, Stream, Close) :-
  303    is_stream(Stream),
  304    !,
  305    open_input(stream(Stream), Stream, Close).
  306open_input(atom(Atom), Stream, close(Stream)) :-
  307    !,
  308    atom_to_memory_file(Atom, MF),
  309    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  310open_input(URL, Stream, close(Stream)) :-
  311    (   sub_atom(URL, 0, _, _, 'http://')
  312    ;   sub_atom(URL, 0, _, _, 'https://')
  313    ),
  314    !,
  315    http_open(URL, Stream, []),
  316    set_stream(Stream, encoding(utf8)).
  317open_input(URL, Stream, close(Stream)) :-
  318    uri_file_name(URL, Path),
  319    !,
  320    open(Path, read, Stream, [encoding(utf8)]).
  321open_input(File, Stream, close(Stream)) :-
  322    absolute_file_name(File, Path,
  323                       [ access(read),
  324                         extensions(['', nt, ntriples])
  325                       ]),
  326    open(Path, read, Stream, [encoding(utf8)]).
  327
  328n3_encoding(octet).
  329n3_encoding(ascii).
  330n3_encoding(iso_latin_1).
  331n3_encoding(utf8).
  332n3_encoding(text).
 init_state(+Input, +Options, -State) is det
  336init_state(In, Options, State) :-
  337    (   option(base_uri(BaseURI), Options)
  338    ->  true
  339    ;   In = stream(_)
  340    ->  BaseURI = []
  341    ;   is_stream(In)
  342    ->  BaseURI = []
  343    ;   In = atom(_)
  344    ->  BaseURI = []
  345    ;   uri_is_global(In),
  346        \+ is_absolute_file_name(In)        % Avoid C:Path in Windows
  347    ->  uri_normalized(In, BaseURI)
  348    ;   uri_file_name(BaseURI, In)
  349    ),
  350    (   option(anon_prefix(Prefix), Options)
  351    ->  true
  352    ;   BaseURI == []
  353    ->  Prefix = '_:genid'
  354    ;   atom_concat('_:', BaseURI, Prefix)
  355    ),
  356    option(on_error(OnError), Options, warning),
  357    % If the format is not set explicitly we assume N-Triples.
  358    % The format option _must_ be set before make_nt_state/2.
  359    option(format(Format), Options, ntriples),
  360    rdf_db:graph(Options, Graph),
  361    (   var(Graph)
  362    ->  Graph = user
  363    ;   true
  364    ),
  365    make_nt_state([ anon_prefix(Prefix),
  366                    on_error(OnError),
  367                    format(Format),
  368                    graph(Graph)
  369                  ], State).
  370
  371
  372                 /*******************************
  373                 *          RDF-DB HOOK         *
  374                 *******************************/
  375
  376:- if(current_predicate(rdf_transaction/2)).  377:- multifile
  378    rdf_db:rdf_load_stream/3,
  379    rdf_db:rdf_file_type/2.
 rdf_db:rdf_load_stream(+Format, +Stream, :Options) is semidet
Plugin rule that supports loading the ntriples and nquads formats.
  386rdf_db:rdf_load_stream(ntriples, Stream, _Module:Options) :-
  387    rdf_db:graph(Options, Graph),
  388    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  389                       rdf_set_graph(Graph, modified(false))
  390                    ),
  391                    parse(Graph)).
  392rdf_db:rdf_load_stream(nquads, Stream, _Module:Options) :-
  393    rdf_db:graph(Options, Graph),
  394    (   var(Graph)
  395    ->  Graph = user
  396    ;   true
  397    ),
  398    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  399                       rdf_set_graph(Graph, modified(false))
  400                    ),
  401                    parse(Graph)).
  402
  403assert_tuples([], _).
  404assert_tuples([H|T], Graph) :-
  405    assert_tuple(H, Graph),
  406    assert_tuples(T, Graph).
  407
  408assert_tuple(rdf(S,P,O), Graph) :-
  409    rdf_assert(S,P,O,Graph).
  410assert_tuple(rdf(S,P,O,Graph), _) :-
  411    rdf_assert(S,P,O,Graph).
 rdf_db:rdf_file_type(+Extension, -Format)
Bind the ntriples reader to files with the extensions nt, ntriples and nquads.
  418rdf_db:rdf_file_type(nt,       ntriples).
  419rdf_db:rdf_file_type(ntriples, ntriples).
  420rdf_db:rdf_file_type(nq,       nquads).
  421rdf_db:rdf_file_type(nquads,   nquads).
  422:- endif.