View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_triple,
   36          [ rdf_triples/2,              % +Parsed, -Tripples
   37            rdf_triples/3,              % +Parsed, -Tripples, +Tail
   38            rdf_reset_ids/0,            % Reset gensym id's
   39            rdf_start_file/2,           % +Options, -Cleanup
   40            rdf_end_file/1,             % +Cleanup
   41            anon_prefix/1               % Prefix for anonynmous resources
   42          ]).   43:- autoload(library(gensym),[gensym/2,reset_gensym/1]).   44:- autoload(library(option),[option/3,option/2]).   45:- autoload(library(rdf_parser),[rdf_name_space/1]).   46:- autoload(library(uri),[iri_normalized/2]).   47
   48
   49:- predicate_options(rdf_start_file/2, 1,
   50                     [ base_uri(atom),
   51                       blank_nodes(oneof([share,noshare]))
   52                     ]).   53
   54/** <module> Create triples from intermediate representation
   55
   56Convert the output of xml_to_rdf/3  from   library(rdf)  into  a list of
   57triples of the format described   below. The intermediate representation
   58should be regarded a proprietary representation.
   59
   60        rdf(Subject, Predicate, Object).
   61
   62Where `Subject' is
   63
   64        * Atom
   65        The subject is a resource
   66
   67        * each(URI)
   68        URI is the URI of an RDF Bag
   69
   70        * prefix(Pattern)
   71        Pattern is the prefix of a fully qualified Subject URI
   72
   73And `Predicate' is
   74
   75        * Atom
   76        The predicate is always a resource
   77
   78And `Object' is
   79
   80        * Atom
   81        URI of Object resource
   82
   83        * literal(Value)
   84        Literal value (Either a single atom or parsed XML data)
   85*/
   86
   87%!  rdf_triples(+Term, -Triples) is det.
   88%!  rdf_triples(+Term, -Tridpples, +Tail) is det.
   89%
   90%   Convert an object as parsed by rdf.pl into a list of rdf/3
   91%   triples.  The identifier of the main object created is returned
   92%   by rdf_triples/3.
   93%
   94%   Input is the `content' of the RDF element in the format as
   95%   generated by load_structure(File, Term, [dialect(xmlns)]).
   96%   rdf_triples/3 can process both individual descriptions as
   97%   well as the entire content-list of an RDF element.  The first
   98%   mode is suitable when using library(sgml) in `call-back' mode.
   99
  100rdf_triples(RDF, Tripples) :-
  101    rdf_triples(RDF, Tripples, []).
  102
  103rdf_triples([]) -->
  104    !,
  105    [].
  106rdf_triples([H|T]) -->
  107    !,
  108    rdf_triples(H),
  109    rdf_triples(T).
  110rdf_triples(Term) -->
  111    triples(Term, _).
  112
  113%!  triples(-Triples, -Id, +In, -Tail)
  114%
  115%   DGC set processing the output of  xml_to_rdf/3. Id is unified to
  116%   the identifier of the main description.
  117
  118triples(description(Type, About, Props), Subject) -->
  119    { var(About),
  120      share_blank_nodes(true)
  121    },
  122    !,
  123    (   { shared_description(description(Type, Props), Subject)
  124        }
  125    ->  []
  126    ;   { make_id('_:Description', Id)
  127        },
  128        triples(description(Type, about(Id), Props), Subject),
  129        { assert_shared_description(description(Type, Props), Subject)
  130        }
  131    ).
  132triples(description(description, IdAbout, Props), Subject) -->
  133    !,
  134    { description_id(IdAbout, Subject)
  135    },
  136    properties(Props, Subject).
  137triples(description(TypeURI, IdAbout, Props), Subject) -->
  138    { description_id(IdAbout, Subject)
  139    },
  140    properties([ rdf:type = TypeURI
  141               | Props
  142               ], Subject).
  143triples(unparsed(Data), Id) -->
  144    { make_id('_:Error', Id),
  145      print_message(error, rdf(unparsed(Data)))
  146    },
  147    [].
  148
  149
  150                 /*******************************
  151                 *          DESCRIPTIONS        *
  152                 *******************************/
  153
  154:- thread_local
  155    node_id/2,                      % nodeID --> ID
  156    unique_id/1.                    % known rdf:ID
  157
  158rdf_reset_node_ids :-
  159    retractall(node_id(_,_)),
  160    retractall(unique_id(_)).
  161
  162description_id(Id, Id) :-
  163    var(Id),
  164    !,
  165    make_id('_:Description', Id).
  166description_id(about(Id), Id).
  167description_id(id(Id), Id) :-
  168    (   unique_id(Id)
  169    ->  print_message(error, rdf(redefined_id(Id)))
  170    ;   assert(unique_id(Id))
  171    ).
  172description_id(each(Id), each(Id)).
  173description_id(prefix(Id), prefix(Id)).
  174description_id(node(NodeID), Id) :-
  175    (   node_id(NodeID, Id)
  176    ->  true
  177    ;   make_id('_:Node', Id),
  178        assert(node_id(NodeID, Id))
  179    ).
  180
  181properties(PlRDF, Subject) -->
  182    properties(PlRDF, 1, [], [], Subject).
  183
  184properties([], _, Bag, Bag, _) -->
  185    [].
  186properties([H0|T0], N, Bag0, Bag, Subject) -->
  187    property(H0, N, NN, Bag0, Bag1, Subject),
  188    properties(T0, NN, Bag1, Bag, Subject).
  189
  190%!  property(Property, N, NN, Subject)// is det.
  191%
  192%   Generate triples for {Subject,  Pred,   Object}.  Also generates
  193%   triples for Object if necessary.
  194%
  195%   @param Property One of
  196%
  197%           * Pred = Object
  198%           Used for normal statements
  199%           * id(Id, Pred = Object)
  200%           Used for reified statements
  201
  202property(Pred0 = Object, N, NN, BagH, BagT, Subject) --> % inlined object
  203    triples(Object, Id),
  204    !,
  205    { li_pred(Pred0, Pred, N, NN)
  206    },
  207    statement(Subject, Pred, Id, _, BagH, BagT).
  208property(Pred0 = collection(Elems), N, NN, BagH, BagT, Subject) -->
  209    !,
  210    { li_pred(Pred0, Pred, N, NN)
  211    },
  212    statement(Subject, Pred, Object, _Id, BagH, BagT),
  213    collection(Elems, Object).
  214property(Pred0 = Object, N, NN, BagH, BagT, Subject) -->
  215    !,
  216    { li_pred(Pred0, Pred, N, NN)
  217    },
  218    statement(Subject, Pred, Object, _Id, BagH, BagT).
  219property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
  220    triples(Object, ObjectId),
  221    !,
  222    { li_pred(Pred0, Pred, N, NN)
  223    },
  224    statement(Subject, Pred, ObjectId, Id, BagH, BagT).
  225property(id(Id, Pred0 = collection(Elems)), N, NN, BagH, BagT, Subject) -->
  226    !,
  227    { li_pred(Pred0, Pred, N, NN)
  228    },
  229    statement(Subject, Pred, Object, Id, BagH, BagT),
  230    collection(Elems, Object).
  231property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
  232    { li_pred(Pred0, Pred, N, NN)
  233    },
  234    statement(Subject, Pred, Object, Id, BagH, BagT).
  235
  236%!  statement(+Subject, +Pred, +Object, +Id, +BagH, -BagT)
  237%
  238%   Add a statement to the model. If nonvar(Id), we reinify the
  239%   statement using the given Id.
  240
  241statement(Subject, Pred, Object, Id, BagH, BagT) -->
  242    rdf(Subject, Pred, Object),
  243    {   BagH = [Id|BagT]
  244    ->  statement_id(Id)
  245    ;   BagT = BagH
  246    },
  247    (   { nonvar(Id)
  248        }
  249    ->  rdf(Id, rdf:type, rdf:'Statement'),
  250        rdf(Id, rdf:subject, Subject),
  251        rdf(Id, rdf:predicate, Pred),
  252        rdf(Id, rdf:object, Object)
  253    ;   []
  254    ).
  255
  256
  257statement_id(Id) :-
  258    nonvar(Id),
  259    !.
  260statement_id(Id) :-
  261    make_id('_:Statement', Id).
  262
  263%!  li_pred(+Pred, -Pred, +Nth, -NextNth)
  264%
  265%   Transform rdf:li predicates into _1, _2, etc.
  266
  267li_pred(rdf:li, rdf:Pred, N, NN) :-
  268    !,
  269    NN is N + 1,
  270    atom_concat('_', N, Pred).
  271li_pred(Pred, Pred, N, N).
  272
  273%!  collection(+Elems, -Id)
  274%
  275%   Handle the elements of a collection and return the identifier
  276%   for the whole collection in Id.
  277
  278collection([], Nil) -->
  279    { global_ref(rdf:nil, Nil)
  280    }.
  281collection([H|T], Id) -->
  282    triples(H, HId),
  283    { make_id('_:List', Id)
  284    },
  285    rdf(Id, rdf:type, rdf:'List'),
  286    rdf(Id, rdf:first, HId),
  287    rdf(Id, rdf:rest, TId),
  288    collection(T, TId).
  289
  290
  291rdf(S0, P0, O0) -->
  292    { global_ref(S0, S),
  293      global_ref(P0, P),
  294      global_obj(O0, O)
  295    },
  296    [ rdf(S, P, O) ].
  297
  298
  299global_ref(In, Out) :-
  300    (   nonvar(In),
  301        In = NS:Local
  302    ->  (   NS == rdf,
  303            rdf_name_space(RDF)
  304        ->  atom_concat(RDF, Local, Out)
  305        ;   atom_concat(NS, Local, Out0),
  306            iri_normalized(Out0, Out)
  307        )
  308    ;   Out = In
  309    ).
  310
  311global_obj(V, V) :-
  312    var(V),
  313    !.
  314global_obj(literal(type(Local, X)), literal(type(Global, X))) :-
  315    !,
  316    global_ref(Local, Global).
  317global_obj(literal(X), literal(X)) :- !.
  318global_obj(Local, Global) :-
  319    global_ref(Local, Global).
  320
  321
  322                 /*******************************
  323                 *             SHARING          *
  324                 *******************************/
  325
  326:- thread_local
  327    shared_description/3,           % +Hash, +Term, -Subject
  328    share_blank_nodes/1,            % Boolean
  329    shared_nodes/1.                 % counter
  330
  331reset_shared_descriptions :-
  332    retractall(shared_description(_,_,_)),
  333    retractall(shared_nodes(_)).
  334
  335shared_description(Term, Subject) :-
  336    term_hash(Term, Hash),
  337    shared_description(Hash, Term, Subject),
  338    (   retract(shared_nodes(N))
  339    ->  N1 is N + 1
  340    ;   N1 = 1
  341    ),
  342    assert(shared_nodes(N1)).
  343
  344
  345assert_shared_description(Term, Subject) :-
  346    term_hash(Term, Hash),
  347    assert(shared_description(Hash, Term, Subject)).
  348
  349
  350                 /*******************************
  351                 *            START/END         *
  352                 *******************************/
  353
  354%!  rdf_start_file(+Options, -Cleanup) is det.
  355%
  356%   Initialise for the translation of a file.
  357
  358rdf_start_file(Options, Cleanup) :-
  359    rdf_reset_node_ids,             % play safe
  360    reset_shared_descriptions,
  361    set_bnode_sharing(Options, C1),
  362    set_anon_prefix(Options, C2),
  363    add_cleanup(C1, C2, Cleanup).
  364
  365%!  rdf_end_file(:Cleanup) is det.
  366%
  367%   Cleanup reaching the end of an RDF file.
  368
  369rdf_end_file(Cleanup) :-
  370    rdf_reset_node_ids,
  371    (   shared_nodes(N)
  372    ->  print_message(informational, rdf(shared_blank_nodes(N)))
  373    ;   true
  374    ),
  375    reset_shared_descriptions,
  376    Cleanup.
  377
  378set_bnode_sharing(Options, erase(Ref)) :-
  379    option(blank_nodes(Share), Options, noshare),
  380    (   Share == share
  381    ->  assert(share_blank_nodes(true), Ref), !
  382    ;   Share == noshare
  383    ->  fail                        % next clause
  384    ;   throw(error(domain_error(share, Share), _))
  385    ).
  386set_bnode_sharing(_, true).
  387
  388set_anon_prefix(Options, erase(Ref)) :-
  389    option(base_uri(BaseURI), Options),
  390    nonvar(BaseURI),
  391    !,
  392    (   BaseURI == []
  393    ->  AnonBase = '_:'
  394    ;   atomic_list_concat(['_:', BaseURI, '#'], AnonBase)
  395    ),
  396    asserta(anon_prefix(AnonBase), Ref).
  397set_anon_prefix(_, true).
  398
  399add_cleanup(true, X, X) :- !.
  400add_cleanup(X, true, X) :- !.
  401add_cleanup(X, Y, (X, Y)).
  402
  403
  404                 /*******************************
  405                 *             UTIL             *
  406                 *******************************/
  407
  408%!  anon_prefix(-Prefix) is semidet.
  409%
  410%   If defined, it is the prefix used to generate a blank node.
  411
  412:- thread_local
  413    anon_prefix/1.  414
  415make_id(For, ID) :-
  416    anon_prefix(Prefix),
  417    !,
  418    atom_concat(Prefix, For, Base),
  419    gensym(Base, ID).
  420make_id(For, ID) :-
  421    gensym(For, ID).
  422
  423anon_base('_:Description').
  424anon_base('_:Statement').
  425anon_base('_:List').
  426anon_base('_:Node').
  427
  428%!  rdf_reset_ids is det.
  429%
  430%   Utility predicate to reset the gensym counters for the various
  431%   generated identifiers.  This simplifies debugging and matching
  432%   output with the stored desired output (see rdf_test.pl).
  433
  434rdf_reset_ids :-
  435    anon_prefix(Prefix),
  436    !,
  437    (   anon_base(Base),
  438        atom_concat(Prefix, Base, X),
  439        reset_gensym(X),
  440        fail
  441    ;   true
  442    ).
  443rdf_reset_ids :-
  444    (   anon_base(Base),
  445        reset_gensym(Base),
  446        fail
  447    ;   true
  448    )