View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples,
   36          [ rdf_read_ntriples/3,        % +Input, -Triples, +Options
   37            rdf_read_nquads/3,          % +Input, -Quads, +Options
   38            rdf_process_ntriples/3,     % +Input, :CallBack, +Options
   39
   40            read_ntriple/2,             % +Stream, -Triple
   41            read_nquad/2,               % +Stream, -Quad
   42            read_ntuple/2               % +Stream, -TripleOrQuad
   43          ]).   44:- use_module(library(semweb/rdf_db),
   45              [rdf_transaction/2,rdf_set_graph/2,rdf_assert/4]).   46:- use_module(library(record),[(record)/1, op(_,_,record)]).   47
   48:- autoload(library(error),[domain_error/2]).   49:- autoload(library(memfile),
   50	    [atom_to_memory_file/2,open_memory_file/4]).   51:- autoload(library(option),[option/3,option/2]).   52:- autoload(library(uri),
   53	    [uri_file_name/2,uri_is_global/1,uri_normalized/2]).   54:- autoload(library(http/http_open),[http_open/3]).   55
   56:- use_foreign_library(foreign(ntriples)).

Process files in the RDF N-Triples format

The library(semweb/rdf_ntriples) provides a fast reader for the RDF N-Triples and N-Quads format. N-Triples is a simple format, originally used to support the W3C RDF test suites. The current format has been extended and is a subset of the Turtle format (see library(semweb/turtle)).

The API of this library is almost identical to library(semweb/turtle). This module provides a plugin into rdf_load/2, making this predicate support the format ntriples and nquads.

See also
- http://www.w3.org/TR/n-triples/
To be done
- Sync with RDF 1.1. specification. */
   74:- predicate_options(rdf_read_ntriples/3, 3,
   75                     [ anon_prefix(any), % atom or node(_)
   76                       base_uri(atom),
   77                       error_count(-integer),
   78                       on_error(oneof([warning,error]))
   79                     ]).   80:- predicate_options(rdf_read_nquads/3, 3,
   81                     [ anon_prefix(any), % atom or node(_)
   82                       base_uri(atom),
   83                       error_count(-integer),
   84                       on_error(oneof([warning,error])),
   85                       graph(atom)
   86                     ]).   87:- predicate_options(rdf_process_ntriples/3, 3,
   88                     [ graph(atom),
   89                       pass_to(rdf_read_ntriples/3, 3)
   90                     ]).   91
   92:- meta_predicate
   93    rdf_process_ntriples(+,2,+).   94
   95
   96%!  read_ntriple(+Stream, -Triple) is det.
   97%
   98%   Read the next triple from Stream as Triple. Stream must have UTF-8
   99%   encoding.
  100%
  101%   @param  Triple is a term triple(Subject,Predicate,Object).
  102%           Arguments follow the normal conventions of the RDF
  103%           libraries.  NodeID elements are mapped to node(Id).
  104%           If end-of-file is reached, Triple is unified with
  105%           =end_of_file=.
  106%   @error  syntax_error(Message) on syntax errors
  107
  108%!  read_nquad(+Stream, -Quad) is det.
  109%
  110%   Read the next quad from Stream as Quad.  Stream must have UTF-8
  111%   encoding.
  112%
  113%   @param  Quad is a term quad(Subject,Predicate,Object,Graph).
  114%           Arguments follow the normal conventions of the RDF
  115%           libraries.  NodeID elements are mapped to node(Id).
  116%           If end-of-file is reached, Quad is unified with
  117%           =end_of_file=.
  118%   @error  syntax_error(Message) on syntax errors
  119
  120%!  read_ntuple(+Stream, -Tuple) is det.
  121%
  122%   Read the next triple or quad from  Stream as Tuple. Tuple is one
  123%   of the terms below.  See   read_ntriple/2  and  read_nquad/2 for
  124%   details.
  125%
  126%     - triple(Subject,Predicate,Object)
  127%     - quad(Subject,Predicate,Object,Graph).
  128
  129:- record nt_state(anon_prefix,               graph,               on_error:oneof([warning,error])=warning,               format:oneof([ntriples,nquads]),               error_count=0).
 rdf_read_ntriples(+Input, -Triples, +Options) is det
 rdf_read_nquads(+Input, -Quads, +Options) is det
True when Triples/Quads is a list of triples/quads from Input. Options:
anon_prefix(+AtomOrNode)
Prefix nodeIDs with this atom. If AtomOrNode is the term node(_), bnodes are returned as node(Id).
base_uri(+Atom)
Defines the default anon_prefix as _:<baseuri>_
on_error(Action)
One of warning (default) or error
error_count(-Count)
If on_error is warning, unify Count with th number of errors.
graph(+Graph)
For rdf_read_nquads/3, this defines the graph associated to triples loaded from the input. For rdf_read_ntriples/3 this opion is ignored.
Arguments:
Triples- is a list of rdf(Subject, Predicate, Object)
Quads- is a list of rdf(Subject, Predicate, Object, Graph)
  160rdf_read_ntriples(Input, Triples, Options) :-
  161    rdf_read_ntuples(Input, Triples, [format(ntriples)|Options]).
  162
  163rdf_read_nquads(Input, Triples, Options) :-
  164    rdf_read_ntuples(Input, Triples, [format(nquads)|Options]).
  165
  166
  167rdf_read_ntuples(Input, Triples, Options) :-
  168    setup_call_cleanup(
  169        open_input(Input, Stream, Close),
  170        (   init_state(Input, Options, State0),
  171            read_ntuples(Stream, Triples, State0, State)
  172        ),
  173        Close),
  174    option(error_count(Count), Options, _),
  175    nt_state_error_count(State, Count).
 rdf_process_ntriples(+Input, :CallBack, +Options)
Call-back interface, compatible with the other triple readers. In addition to the options from rdf_read_ntriples/3, this processes the option graph(Graph).
Arguments:
CallBack- is called as call(CallBack, Triples, Graph), where Triples is a list holding a single rdf(S,P,O) triple. Graph is passed from the graph option and unbound if this option is omitted.
  188rdf_process_ntriples(Input, CallBack, Options) :-
  189    setup_call_cleanup(
  190        open_input(Input, Stream, Close),
  191        (   init_state(Input, Options, State0),
  192            process_ntriple(Stream, CallBack, State0, State)
  193        ),
  194        Close),
  195    option(error_count(Count), Options, _),
  196    nt_state_error_count(State, Count).
 read_ntuples(+Stream, -Triples, +State0, -State)
  201read_ntuples(Stream, Triples, State0, State) :-
  202    read_ntuple(Stream, Triple0, State0, State1),
  203    (   Triple0 == end_of_file
  204    ->  Triples = [],
  205        State = State1
  206    ;   map_nodes(Triple0, Triple, State1, State2),
  207        Triples = [Triple|More],
  208        read_ntuples(Stream, More, State2, State)
  209    ).
 process_ntriple(+Stream, :CallBack, +State0, -State)
  213process_ntriple(Stream, CallBack, State0, State) :-
  214    read_ntuple(Stream, Triple0, State0, State1),
  215    (   Triple0 == end_of_file
  216    ->  State = State1
  217    ;   map_nodes(Triple0, Triple, State1, State2),
  218        nt_state_graph(State2, Graph),
  219        call(CallBack, [Triple], Graph),
  220        process_ntriple(Stream, CallBack, State2, State)
  221    ).
 read_ntuple(+Stream, -Tuple, +State0, -State) is det
True when Tuple is the next triple on Stream. May increment the error count on State.
  228read_ntuple(Stream, Triple, State0, State) :-
  229    nt_state_on_error(State0, error),
  230    !,
  231    read_ntuple(Stream, Triple, State0),
  232    State = State0.
  233read_ntuple(Stream, Triple, State0, State) :-
  234    catch(read_ntuple(Stream, Triple, State0), E, true),
  235    (   var(E)
  236    ->  State = State0
  237    ;   print_message(warning, E),
  238        nt_state_error_count(State0, EC0),
  239        EC is EC0+1,
  240        set_error_count_of_nt_state(EC, State0, State1),
  241        read_ntuple(Stream, Triple, State1, State)
  242    ).
  243
  244read_ntuple(Stream, Triple, State0) :-
  245    nt_state_format(State0, Format),
  246    format_read_ntuple(Format, Stream, Triple, State0).
  247
  248format_read_ntuple(ntriples, Stream, Triple, _) :-
  249    !,
  250    read_ntriple(Stream, Triple).
  251format_read_ntuple(nquads, Stream, Quad, State) :-
  252    !,
  253    read_ntuple(Stream, Tuple),
  254    to_quad(Tuple, Quad, State).
  255
  256to_quad(Quad, Quad, _) :-
  257    functor(Quad, quad, 4),
  258    !.
  259to_quad(triple(S,P,O), quad(S,P,O,Graph), State) :-
  260    nt_state_graph(State, Graph).
  261to_quad(end_of_file, end_of_file, _).
  262
  263
  264map_nodes(triple(S0,P0,O0), rdf(S,P,O), State0, State) :-
  265    map_node(S0, S, State0, State1),
  266    map_node(P0, P, State1, State2),
  267    map_node(O0, O, State2, State).
  268map_nodes(quad(S0,P0,O0,G0), rdf(S,P,O,G), State0, State) :-
  269    map_node(S0, S, State0, State1),
  270    map_node(P0, P, State1, State2),
  271    map_node(O0, O, State2, State3),
  272    map_node(G0, G, State3, State).
  273
  274map_node(node(NodeId), BNode, State, State) :-
  275    nt_state_anon_prefix(State, Prefix),
  276    atom(Prefix),
  277    !,
  278    atom_concat(Prefix, NodeId, BNode).
  279map_node(Node, Node, State, State).
 open_input(+Input, -Stream, -Close) is det
Open input for reading ntriples. The default encoding is UTF-8. If the input has a different encoding, Input must be a stream with the correct encoding and the stream type must be text.
  288open_input(stream(Stream), Stream, Close) :-
  289    !,
  290    (   stream_property(Stream, type(binary))
  291    ->  set_stream(Stream, encoding(utf8)),
  292        Close = set_stream(Stream, type(binary))
  293    ;   stream_property(Stream, encoding(Old)),
  294        (   n3_encoding(Old)
  295        ->  true
  296        ;   domain_error(ntriples_encoding, Old)
  297        ),
  298        Close = true
  299    ).
  300open_input(Stream, Stream, Close) :-
  301    is_stream(Stream),
  302    !,
  303    open_input(stream(Stream), Stream, Close).
  304open_input(atom(Atom), Stream, close(Stream)) :-
  305    !,
  306    atom_to_memory_file(Atom, MF),
  307    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  308open_input(URL, Stream, close(Stream)) :-
  309    (   sub_atom(URL, 0, _, _, 'http://')
  310    ;   sub_atom(URL, 0, _, _, 'https://')
  311    ),
  312    !,
  313    http_open(URL, Stream, []),
  314    set_stream(Stream, encoding(utf8)).
  315open_input(URL, Stream, close(Stream)) :-
  316    uri_file_name(URL, Path),
  317    !,
  318    open(Path, read, Stream, [encoding(utf8)]).
  319open_input(File, Stream, close(Stream)) :-
  320    absolute_file_name(File, Path,
  321                       [ access(read),
  322                         extensions(['', nt, ntriples])
  323                       ]),
  324    open(Path, read, Stream, [encoding(utf8)]).
  325
  326n3_encoding(octet).
  327n3_encoding(ascii).
  328n3_encoding(iso_latin_1).
  329n3_encoding(utf8).
  330n3_encoding(text).
 init_state(+Input, +Options, -State) is det
  334init_state(In, Options, State) :-
  335    (   option(base_uri(BaseURI), Options)
  336    ->  true
  337    ;   In = stream(_)
  338    ->  BaseURI = []
  339    ;   is_stream(In)
  340    ->  BaseURI = []
  341    ;   In = atom(_)
  342    ->  BaseURI = []
  343    ;   uri_is_global(In),
  344        \+ is_absolute_file_name(In)        % Avoid C:Path in Windows
  345    ->  uri_normalized(In, BaseURI)
  346    ;   uri_file_name(BaseURI, In)
  347    ),
  348    (   option(anon_prefix(Prefix), Options)
  349    ->  true
  350    ;   BaseURI == []
  351    ->  Prefix = '_:genid'
  352    ;   atom_concat('_:', BaseURI, Prefix)
  353    ),
  354    option(on_error(OnError), Options, warning),
  355    % If the format is not set explicitly we assume N-Triples.
  356    % The format option _must_ be set before make_nt_state/2.
  357    option(format(Format), Options, ntriples),
  358    rdf_db:graph(Options, Graph),
  359    (   var(Graph)
  360    ->  Graph = user
  361    ;   true
  362    ),
  363    make_nt_state([ anon_prefix(Prefix),
  364                    on_error(OnError),
  365                    format(Format),
  366                    graph(Graph)
  367                  ], State).
  368
  369
  370                 /*******************************
  371                 *          RDF-DB HOOK         *
  372                 *******************************/
  373
  374:- multifile
  375    rdf_db:rdf_load_stream/3,
  376    rdf_db:rdf_file_type/2.
 rdf_db:rdf_load_stream(+Format, +Stream, :Options) is semidet
Plugin rule that supports loading the ntriples and nquads formats.
  383rdf_db:rdf_load_stream(ntriples, Stream, _Module:Options) :-
  384    rdf_db:graph(Options, Graph),
  385    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  386                       rdf_set_graph(Graph, modified(false))
  387                    ),
  388                    parse(Graph)).
  389rdf_db:rdf_load_stream(nquads, Stream, _Module:Options) :-
  390    rdf_db:graph(Options, Graph),
  391    (   var(Graph)
  392    ->  Graph = user
  393    ;   true
  394    ),
  395    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  396                       rdf_set_graph(Graph, modified(false))
  397                    ),
  398                    parse(Graph)).
  399
  400assert_tuples([], _).
  401assert_tuples([H|T], Graph) :-
  402    assert_tuple(H, Graph),
  403    assert_tuples(T, Graph).
  404
  405assert_tuple(rdf(S,P,O), Graph) :-
  406    rdf_assert(S,P,O,Graph).
  407assert_tuple(rdf(S,P,O,Graph), _) :-
  408    rdf_assert(S,P,O,Graph).
 rdf_db:rdf_file_type(+Extension, -Format)
Bind the ntriples reader to files with the extensions nt, ntriples and nquads.
  416rdf_db:rdf_file_type(nt,       ntriples).
  417rdf_db:rdf_file_type(ntriples, ntriples).
  418rdf_db:rdf_file_type(nq,       nquads).
  419rdf_db:rdf_file_type(nquads,   nquads)