View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2003-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(rdf_db,
   39          [ rdf_version/1,              % -Version
   40
   41            rdf/3,                      % ?Subject, ?Predicate, ?Object
   42            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
   43            rdf_has/3,                  % ?Subject, +Pred, ?Obj
   44            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
   45            rdf_reachable/3,            % ?Subject, +Pred, ?Object
   46            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
   47            rdf_resource/1,             % ?Resource
   48            rdf_subject/1,              % ?Subject
   49
   50            rdf_member_property/2,      % ?Property, ?Index
   51
   52            rdf_assert/3,               % +Subject, +Predicate, +Object
   53            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
   54            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
   55            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
   56            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
   57            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
   58            rdf_set_predicate/2,        % +Predicate, +Property
   59            rdf_predicate_property/2,   % +Predicate, ?Property
   60            rdf_current_predicate/1,    % -Predicate
   61            rdf_current_literal/1,      % -Literal
   62            rdf_transaction/1,          % :Goal
   63            rdf_transaction/2,          % :Goal, +Id
   64            rdf_transaction/3,          % :Goal, +Id, +Options
   65            rdf_active_transaction/1,   % ?Id
   66
   67            rdf_monitor/2,              % :Goal, +Options
   68
   69            rdf_save_db/1,              % +File
   70            rdf_save_db/2,              % +File, +DB
   71            rdf_load_db/1,              % +File
   72            rdf_reset_db/0,
   73
   74            rdf_node/1,                 % -Id
   75            rdf_bnode/1,                % -Id
   76            rdf_is_bnode/1,             % +Id
   77
   78            rdf_is_resource/1,          % +Term
   79            rdf_is_literal/1,           % +Term
   80            rdf_literal_value/2,        % +Term, -Value
   81
   82            rdf_load/1,                 % +File
   83            rdf_load/2,                 % +File, +Options
   84            rdf_save/1,                 % +File
   85            rdf_save/2,                 % +File, +Options
   86            rdf_unload/1,               % +File
   87            rdf_unload_graph/1,         % +Graph
   88
   89            rdf_md5/2,                  % +DB, -MD5
   90            rdf_atom_md5/3,             % +Text, +Times, -MD5
   91
   92            rdf_create_graph/1,         % ?Graph
   93            rdf_graph_property/2,       % ?Graph, ?Property
   94            rdf_set_graph/2,            % +Graph, +Property
   95            rdf_graph/1,                % ?Graph
   96            rdf_source/1,               % ?File
   97            rdf_source/2,               % ?DB, ?SourceURL
   98            rdf_make/0,                 % Reload modified databases
   99            rdf_gc/0,                   % Garbage collection
  100
  101            rdf_source_location/2,      % +Subject, -Source
  102            rdf_statistics/1,           % -Key
  103            rdf_set/1,                  % +Term
  104            rdf_generation/1,           % -Generation
  105            rdf_snapshot/1,             % -Snapshot
  106            rdf_delete_snapshot/1,      % +Snapshot
  107            rdf_current_snapshot/1,     % +Snapshot
  108            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
  109
  110            rdf_save_subject/3,         % +Stream, +Subject, +DB
  111            rdf_save_header/2,          % +Out, +Options
  112            rdf_save_footer/1,          % +Out
  113
  114            rdf_equal/2,                % ?Resource, ?Resource
  115            lang_equal/2,               % +Lang1, +Lang2
  116            lang_matches/2,             % +Lang, +Pattern
  117
  118            rdf_prefix/2,               % :Alias, +URI
  119            rdf_current_prefix/2,       % :Alias, ?URI
  120            rdf_register_prefix/2,      % +Alias, +URI
  121            rdf_register_prefix/3,      % +Alias, +URI, +Options
  122            rdf_unregister_prefix/1,    % +Alias
  123            rdf_current_ns/2,           % :Alias, ?URI
  124            rdf_register_ns/2,          % +Alias, +URI
  125            rdf_register_ns/3,          % +Alias, +URI, +Options
  126            rdf_global_id/2,            % ?NS:Name, :Global
  127            rdf_global_object/2,        % +Object, :NSExpandedObject
  128            rdf_global_term/2,          % +Term, :WithExpandedNS
  129
  130            rdf_compare/3,              % -Dif, +Object1, +Object2
  131            rdf_match_label/3,          % +How, +String, +Label
  132            rdf_split_url/3,            % ?Base, ?Local, ?URL
  133            rdf_url_namespace/2,        % +URL, ?Base
  134
  135            rdf_warm_indexes/0,
  136            rdf_warm_indexes/1,         % +Indexed
  137            rdf_update_duplicates/0,
  138
  139            rdf_debug/1,                % Set verbosity
  140
  141            rdf_new_literal_map/1,      % -Handle
  142            rdf_destroy_literal_map/1,  % +Handle
  143            rdf_reset_literal_map/1,    % +Handle
  144            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
  145            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
  146            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
  147            rdf_delete_literal_map/2,   % +Handle, +Key
  148            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
  149            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
  150            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
  151
  152            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
  153            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
  154
  155            (rdf_meta)/1,               % +Heads
  156            op(1150, fx, (rdf_meta))
  157          ]).  158:- use_module(library(semweb/rdf_prefixes),
  159              [ (rdf_meta)/1,
  160                register_file_prefixes/1,
  161                rdf_global_id/2,
  162                rdf_register_ns/2,
  163                                        % re-exported predicates
  164                rdf_global_object/2,
  165                rdf_current_ns/2,
  166                rdf_prefix/2,
  167                rdf_global_term/2,
  168                rdf_register_ns/3,
  169                rdf_register_prefix/3,
  170                rdf_register_prefix/2,
  171                rdf_current_prefix/2,
  172                rdf_unregister_prefix/1
  173              ]).  174
  175:- autoload(library(apply),[maplist/2,maplist/3]).  176:- use_module(library(debug),[debug/3,assertion/1]).  177:- autoload(library(error),[must_be/2,existence_error/2]).  178:- autoload(library(gensym),[gensym/2,reset_gensym/1]).  179:- autoload(library(lists),
  180	    [member/2,flatten/2,list_to_set/2,append/3,select/3]).  181:- autoload(library(memfile),
  182	    [atom_to_memory_file/2,open_memory_file/4]).  183:- autoload(library(option),
  184	    [option/2,option/3,merge_options/3,meta_options/3]).  185:- autoload(library(rdf),[process_rdf/3]).  186:- autoload(library(sgml),
  187	    [ load_structure/3,
  188	      xml_quote_attribute/3,
  189	      xml_name/1,
  190	      xml_quote_cdata/3,
  191	      xml_is_dom/1,
  192	      iri_xml_namespace/3,
  193	      iri_xml_namespace/2
  194	    ]).  195:- autoload(library(sgml_write),[xml_write/3]).  196:- autoload(library(uri),
  197	    [ uri_file_name/2,
  198	      uri_is_global/1,
  199	      uri_normalized/2,
  200	      uri_components/2,
  201	      uri_data/3,
  202	      uri_data/4
  203	    ]).  204:- autoload(library(xsdp_types),[xsdp_numeric_uri/2]).  205:- autoload(library(semweb/rdf_cache),[rdf_cache_file/3]).  206
  207:- if(exists_source(library(thread))).  208:- autoload(library(thread), [concurrent/3]).  209:- endif.  210
  211:- use_foreign_library(foreign(rdf_db)).  212:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
  213
  214:- meta_predicate
  215    rdf_transaction(0),
  216    rdf_transaction(0, +),
  217    rdf_transaction(0, +, +),
  218    rdf_monitor(1, +),
  219    rdf_save(+, :),
  220    rdf_load(+, :).  221
  222:- predicate_options(rdf_graph_prefixes/3, 3,
  223                     [ expand(callable+4),
  224                       filter(callable+3),
  225                       get_prefix(callable+2),
  226                       min_count(nonneg)
  227                     ]).  228:- predicate_options(rdf_load/2, 2,
  229                     [ base_uri(atom),
  230                       blank_nodes(oneof([share,noshare])),
  231                       cache(boolean),
  232                       concurrent(positive_integer),
  233                       db(atom),
  234                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
  235                       graph(atom),
  236                       multifile(boolean),
  237                       if(oneof([true,changed,not_loaded])),
  238                       modified(-float),
  239                       prefixes(-list),
  240                       silent(boolean),
  241                       register_namespaces(boolean)
  242                     ]).  243:- predicate_options(rdf_save/2, 2,
  244                     [ graph(atom),
  245                       db(atom),
  246                       anon(boolean),
  247                       base_uri(atom),
  248                       write_xml_base(boolean),
  249                       convert_typed_literal(callable),
  250                       encoding(encoding),
  251                       document_language(atom),
  252                       namespaces(list(atom)),
  253                       xml_attributes(boolean),
  254                       inline(boolean)
  255                     ]).  256:- predicate_options(rdf_save_header/2, 2,
  257                     [ graph(atom),
  258                       db(atom),
  259                       namespaces(list(atom))
  260                     ]).  261:- predicate_options(rdf_save_subject/3, 3,
  262                     [ graph(atom),
  263                       base_uri(atom),
  264                       convert_typed_literal(callable),
  265                       document_language(atom)
  266                     ]).  267:- predicate_options(rdf_transaction/3, 3,
  268                     [ snapshot(any)
  269                     ]).  270
  271:- discontiguous
  272    term_expansion/2.  273
  274/** <module> Core RDF database
  275
  276The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
  277store.
  278
  279@deprecated     New applications should use library(semweb/rdf11), which
  280                provides a much more intuitive API to the RDF store, notably
  281                for handling literals.  The library(semweb/rdf11) runs
  282                currently on top of this library and both can run side-by-side
  283                in the same application.  Terms retrieved from the database
  284                however have a different shape and can not be exchanged without
  285                precautions.
  286*/
  287
  288		 /*******************************
  289		 *            PREFIXES		*
  290		 *******************************/
  291
  292% the ns/2 predicate is historically defined  in this module. We'll keep
  293% that for compatibility reasons.
  294
  295:- multifile ns/2.  296:- dynamic   ns/2.                      % ID, URL
  297
  298:- multifile
  299    rdf_prefixes:rdf_empty_prefix_cache/2.  300
  301rdf_prefixes:rdf_empty_prefix_cache(_Prefix, _IRI) :-
  302    rdf_empty_prefix_cache.
  303
  304:- rdf_meta
  305    rdf(r,r,o),
  306    rdf_has(r,r,o,r),
  307    rdf_has(r,r,o),
  308    rdf_assert(r,r,o),
  309    rdf_retractall(r,r,o),
  310    rdf(r,r,o,?),
  311    rdf_assert(r,r,o,+),
  312    rdf_retractall(r,r,o,?),
  313    rdf_reachable(r,r,o),
  314    rdf_reachable(r,r,o,+,?),
  315    rdf_update(r,r,o,t),
  316    rdf_update(r,r,o,+,t),
  317    rdf_equal(o,o),
  318    rdf_source_location(r,-),
  319    rdf_resource(r),
  320    rdf_subject(r),
  321    rdf_create_graph(r),
  322    rdf_graph(r),
  323    rdf_graph_property(r,?),
  324    rdf_set_graph(r,+),
  325    rdf_unload_graph(r),
  326    rdf_set_predicate(r, t),
  327    rdf_predicate_property(r, -),
  328    rdf_estimate_complexity(r,r,r,-),
  329    rdf_print_predicate_cloud(r,+).  330
  331%!  rdf_equal(?Resource1, ?Resource2)
  332%
  333%   Simple equality test to exploit goal-expansion.
  334
  335rdf_equal(Resource, Resource).
  336
  337%!  lang_equal(+Lang1, +Lang2) is semidet.
  338%
  339%   True if two RFC language specifiers denote the same language
  340%
  341%   @see lang_matches/2.
  342
  343lang_equal(Lang, Lang) :- !.
  344lang_equal(Lang1, Lang2) :-
  345    downcase_atom(Lang1, LangCannon),
  346    downcase_atom(Lang2, LangCannon).
  347
  348%!  lang_matches(+Lang, +Pattern) is semidet.
  349%
  350%   True if Lang  matches  Pattern.   This  implements  XML language
  351%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
  352%   dash-separated strings of  identifiers  or   (for  Pattern)  the
  353%   wildcard *. Identifiers are  matched   case-insensitive  and a *
  354%   matches any number of identifiers. A   short pattern is the same
  355%   as *.
  356
  357
  358                 /*******************************
  359                 *     BASIC TRIPLE QUERIES     *
  360                 *******************************/
  361
  362%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
  363%
  364%   Elementary query for triples. Subject   and  Predicate are atoms
  365%   representing the fully qualified URL of  the resource. Object is
  366%   either an atom representing a resource  or literal(Value) if the
  367%   object  is  a  literal  value.   If    a   value   of  the  form
  368%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
  369%   atom  using  expand_goal/2.  This  implies   you  can  use  this
  370%   construct in compiled code without paying a performance penalty.
  371%   Literal values take one of the following forms:
  372%
  373%     * Atom
  374%     If the value is a simple atom it is the textual representation
  375%     of a string literal without explicit type or language
  376%     qualifier.
  377%
  378%     * lang(LangID, Atom)
  379%     Atom represents the text of a string literal qualified with
  380%     the given language.
  381%
  382%     * type(TypeID, Value)
  383%     Used for attributes qualified using the =|rdf:datatype|=
  384%     TypeID. The Value is either the textual representation or a
  385%     natural Prolog representation. See the option
  386%     convert_typed_literal(:Convertor) of the parser. The storage
  387%     layer provides efficient handling of atoms, integers (64-bit)
  388%     and floats (native C-doubles). All other data is represented
  389%     as a Prolog record.
  390%
  391%   For literal querying purposes, Object can be of the form
  392%   literal(+Query, -Value), where Query is one of the terms below.
  393%   If the Query takes a literal argument and the value has a
  394%   numeric type numerical comparison is performed.
  395%
  396%     * plain(+Text)
  397%     Perform exact match and demand the language or type qualifiers
  398%     to match. This query is fully indexed.
  399%
  400%     * icase(+Text)
  401%     Perform a full but case-insensitive match. This query is
  402%     fully indexed.
  403%
  404%     * exact(+Text)
  405%     Same as icase(Text).  Backward compatibility.
  406%
  407%     * substring(+Text)
  408%     Match any literal that contains Text as a case-insensitive
  409%     substring. The query is not indexed on Object.
  410%
  411%     * word(+Text)
  412%     Match any literal that contains Text delimited by a non
  413%     alpha-numeric character, the start or end of the string. The
  414%     query is not indexed on Object.
  415%
  416%     * prefix(+Text)
  417%     Match any literal that starts with Text. This call is intended
  418%     for completion. The query is indexed using the skip list of
  419%     literals.
  420%
  421%     * ge(+Literal)
  422%     Match any literal that is equal or larger than Literal in the
  423%     ordered set of literals.
  424%
  425%     * gt(+Literal)
  426%     Match any literal that is larger than Literal in the ordered set
  427%     of literals.
  428%
  429%     * eq(+Literal)
  430%     Match any literal that is equal to Literal in the ordered set
  431%     of literals.
  432%
  433%     * le(+Literal)
  434%     Match any literal that is equal or smaller than Literal in the
  435%     ordered set of literals.
  436%
  437%     * lt(+Literal)
  438%     Match any literal that is smaller than Literal in the ordered set
  439%     of literals.
  440%
  441%     * between(+Literal1, +Literal2)
  442%     Match any literal that is between Literal1 and Literal2 in the
  443%     ordered set of literals. This may include both Literal1 and
  444%     Literal2.
  445%
  446%     * like(+Pattern)
  447%     Match any literal that matches Pattern case insensitively,
  448%     where the `*' character in Pattern matches zero or more
  449%     characters.
  450%
  451%   Backtracking never returns duplicate triples.  Duplicates can be
  452%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
  453%   if called with improper arguments.  If   rdf/3  is called with a
  454%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
  455%   silently.  This  allows   for   graph    matching   goals   like
  456%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
  457
  458%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
  459%
  460%   As rdf/3 but in addition query  the   graph  to which the triple
  461%   belongs. Unlike rdf/3, this predicate does not remove duplicates
  462%   from the result set.
  463%
  464%   @param Source is a term Graph:Line.  If Source is instantiated,
  465%   passing an atom is the same as passing Atom:_.
  466
  467
  468%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
  469%
  470%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
  471%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
  472%   predicates   declared   using   rdf_set_predicate/2   with   the
  473%   =inverse_of= property.
  474
  475%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
  476%
  477%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
  478%   predicate that makes this relation   true. RealPredicate must be
  479%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
  480%   match is found, RealPredicate is the term inverse_of(Pred).
  481
  482%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
  483%
  484%   Is true if Object can  be   reached  from  Subject following the
  485%   transitive predicate Predicate or a  sub-property thereof, while
  486%   respecting the symmetric(true) or inverse_of(P2) properties.
  487%
  488%   If used with either Subject or  Object unbound, it first returns
  489%   the origin, followed by  the  reachable  nodes  in breadth-first
  490%   search-order. The implementation internally   looks one solution
  491%   ahead and succeeds deterministically on  the last solution. This
  492%   predicate never generates the same  node   twice  and  is robust
  493%   against cycles in the transitive relation.
  494%
  495%   With all arguments instantiated,   it succeeds deterministically
  496%   if a path can be found from  Subject to Object. Searching starts
  497%   at Subject, assuming the branching factor   is normally lower. A
  498%   call  with  both  Subject   and    Object   unbound   raises  an
  499%   instantiation  error.  The  following    example  generates  all
  500%   subclasses of rdfs:Resource:
  501%
  502%     ==
  503%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
  504%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
  505%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
  506%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
  507%     ...
  508%     ==
  509
  510
  511%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
  512%
  513%   Same as rdf_reachable/3, but in addition, MaxD limits the number
  514%   of edges expanded and D is   unified with the `distance' between
  515%   Subject and Object. Distance 0 means  Subject and Object are the
  516%   same resource. MaxD can be the  constant =infinite= to impose no
  517%   distance-limit.
  518
  519%!  rdf_subject(?Resource) is nondet.
  520%
  521%   True if Resource appears as a   subject. This query respects the
  522%   visibility rules implied by the logical update view.
  523%
  524%   @see rdf_resource/1.
  525
  526rdf_subject(Resource) :-
  527    rdf_resource(Resource),
  528    ( rdf(Resource, _, _) -> true ).
  529
  530%!  rdf_resource(?Resource) is nondet.
  531%
  532%   True when Resource is a resource used as a subject or object in
  533%   a triple.
  534%
  535%   This predicate is primarily intended  as   a  way to process all
  536%   resources without processing resources twice.   The user must be
  537%   aware that some of the returned resources  may not appear in any
  538%   _visible_ triple.
  539
  540
  541                 /*******************************
  542                 *     TRIPLE MODIFICATIONS     *
  543                 *******************************/
  544
  545%!  rdf_assert(+Subject, +Predicate, +Object) is det.
  546%
  547%   Assert a new triple into  the   database.  This is equivalent to
  548%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
  549%   resources. Object is either a resource or a term literal(Value).
  550%   See rdf/3 for an explanation  of   Value  for typed and language
  551%   qualified literals. All arguments  are   subject  to  name-space
  552%   expansion. Complete duplicates (including  the   same  graph and
  553%   `line' and with a compatible `lifespan')   are  not added to the
  554%   database.
  555
  556%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
  557%
  558%   As rdf_assert/3, adding the  predicate   to  the indicated named
  559%   graph.
  560%
  561%   @param Graph is either the name of a   graph (an atom) or a term
  562%   Graph:Line, where Line is an integer that denotes a line number.
  563
  564%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
  565%
  566%   Remove   all   matching   triples   from    the   database.   As
  567%   rdf_retractall/4 using an unbound graph. See also
  568%   rdf_retractall/4 and rdf_unload/1.
  569
  570%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
  571%
  572%   As rdf_retractall/3, also matching Graph.  This  is particularly
  573%   useful to remove all triples coming from a loaded file. See also
  574%   rdf_unload/1.
  575
  576%!  rdf_update(+Subject, +Predicate, +Object, ++Action) is det.
  577%!  rdf_update(+Subject, +Predicate, +Object, +Graph, ++Action) is det
  578%
  579%   Replaces one of the three  (four)   fields  on  the matching triples
  580%   depending on Action:
  581%
  582%     * subject(Resource)
  583%     Changes the first field of the triple.
  584%     * predicate(Resource)
  585%     Changes the second field of the triple.
  586%     * object(Object)
  587%     Changes the last field of the triple to the given resource or
  588%     literal(Value).
  589%     * graph(Graph)
  590%     Moves the triple from its current named graph to Graph.
  591%     This only works with rdf_update/5 and throws an error when
  592%     used with rdf_update/4.
  593
  594
  595                 /*******************************
  596                 *          COLLECTIONS         *
  597                 *******************************/
  598
  599%!  rdf_member_property(?Prop, ?Index)
  600%
  601%   Deal with the rdf:_1, ... properties.
  602
  603term_expansion(member_prefix(x),
  604               member_prefix(Prefix)) :-
  605    rdf_db:ns(rdf, NS),
  606    atom_concat(NS, '_', Prefix).
  607member_prefix(x).
  608
  609rdf_member_property(P, N) :-
  610    integer(N),
  611    !,
  612    member_prefix(Prefix),
  613    atom_concat(Prefix, N, P).
  614rdf_member_property(P, N) :-
  615    member_prefix(Prefix),
  616    atom_concat(Prefix, Sub, P),
  617    atom_number(Sub, N).
  618
  619
  620                 /*******************************
  621                 *      ANONYMOUS SUBJECTS      *
  622                 *******************************/
  623
  624%!  rdf_node(-Id)
  625%
  626%   Generate a unique blank node identifier for a subject.
  627%
  628%   @deprecated     New code should use rdf_bnode/1.
  629
  630rdf_node(Resource) :-
  631    rdf_bnode(Resource).
  632
  633%!  rdf_bnode(-Id)
  634%
  635%   Generate a unique anonymous identifier for a subject.
  636
  637rdf_bnode(Value) :-
  638    repeat,
  639    gensym('_:genid', Value),
  640    \+ rdf(Value, _, _),
  641    \+ rdf(_, _, Value),
  642    \+ rdf(_, Value, _),
  643    !.
  644
  645
  646
  647                 /*******************************
  648                 *             TYPES            *
  649                 *******************************/
  650
  651%!  rdf_is_bnode(+Id)
  652%
  653%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
  654%   resource). A blank node is represented   as  an atom that starts
  655%   with =|_:|=. For backward compatibility   reason, =|__|= is also
  656%   considered to be a blank node.
  657%
  658%   @see rdf_bnode/1.
  659
  660%!  rdf_is_resource(@Term) is semidet.
  661%
  662%   True if Term is an RDF  resource.   Note  that  this is merely a
  663%   type-test; it does not mean  this   resource  is involved in any
  664%   triple.  Blank nodes are also considered resources.
  665%
  666%   @see rdf_is_bnode/1
  667
  668rdf_is_resource(Term) :-
  669    atom(Term).
  670
  671%!  rdf_is_literal(@Term) is semidet.
  672%
  673%   True if Term is an RDF literal object. Currently only checks for
  674%   groundness and the literal functor.
  675
  676rdf_is_literal(literal(Value)) :-
  677    ground(Value).
  678
  679                 /*******************************
  680                 *             LITERALS         *
  681                 *******************************/
  682
  683%!  rdf_current_literal(-Literal) is nondet.
  684%
  685%   True when Literal is a currently  known literal. Enumerates each
  686%   unique literal exactly once. Note that   it is possible that the
  687%   literal only appears in already deleted triples. Deleted triples
  688%   may be locked due to active   queries, transactions or snapshots
  689%   or may not yet be reclaimed by the garbage collector.
  690
  691
  692%!  rdf_literal_value(+Literal, -Value) is semidet.
  693%
  694%   True when value is  the   appropriate  Prolog  representation of
  695%   Literal in the RDF _|value space|_.  Current mapping:
  696%
  697%     | Plain literals              | Atom                    |
  698%     | Language tagged literal     | Atom holding plain text |
  699%     | xsd:string                  | Atom                    |
  700%     | rdf:XMLLiteral              | XML DOM Tree            |
  701%     | Numeric XSD type            | Number                  |
  702%
  703%   @tbd    Well, this is the long-term idea.
  704%   @tbd    Add mode (-,+)
  705
  706:- rdf_meta
  707    rdf_literal_value(o, -),
  708    typed_value(r, +, -),
  709    numeric_value(r, +, -).  710
  711rdf_literal_value(literal(String), Value) :-
  712    atom(String),
  713    !,
  714    Value = String.
  715rdf_literal_value(literal(lang(_Lang, String)), String).
  716rdf_literal_value(literal(type(Type, String)), Value) :-
  717    typed_value(Type, String, Value).
  718
  719typed_value(Numeric, String, Value) :-
  720    xsdp_numeric_uri(Numeric, NumType),
  721    !,
  722    numeric_value(NumType, String, Value).
  723typed_value(xsd:string, String, String).
  724typed_value(rdf:'XMLLiteral', Value, DOM) :-
  725    (   atom(Value)
  726    ->  setup_call_cleanup(
  727            ( atom_to_memory_file(Value, MF),
  728              open_memory_file(MF, read, In, [free_on_close(true)])
  729            ),
  730            load_structure(stream(In), DOM, [dialect(xml)]),
  731            close(In))
  732    ;   DOM = Value
  733    ).
  734
  735numeric_value(xsd:integer, String, Value) :-
  736    atom_number(String, Value),
  737    integer(Value).
  738numeric_value(xsd:float, String, Value) :-
  739    atom_number(String, Number),
  740    Value is float(Number).
  741numeric_value(xsd:double, String, Value) :-
  742    atom_number(String, Number),
  743    Value is float(Number).
  744numeric_value(xsd:decimal, String, Value) :-
  745    atom_number(String, Value).
  746
  747
  748                 /*******************************
  749                 *            SOURCE            *
  750                 *******************************/
  751
  752%!  rdf_source_location(+Subject, -Location) is nondet.
  753%
  754%   True when triples for Subject are loaded from Location.
  755%
  756%   @param Location is a term File:Line.
  757
  758rdf_source_location(Subject, Source) :-
  759    findall(Source, rdf(Subject, _, _, Source), Sources),
  760    sort(Sources, Unique),
  761    member(Source, Unique).
  762
  763
  764                 /*******************************
  765                 *       GARBAGE COLLECT        *
  766                 *******************************/
  767
  768%!  rdf_create_gc_thread
  769%
  770%   Create the garbage collection thread.
  771
  772:- public
  773    rdf_create_gc_thread/0.  774
  775rdf_create_gc_thread :-
  776    thread_create(rdf_gc_loop, _,
  777                  [ alias('__rdf_GC')
  778                  ]).
  779
  780%!  rdf_gc_loop
  781%
  782%   Take care of running the RDF garbage collection.  This predicate
  783%   is called from a thread started by creating the RDF DB.
  784
  785rdf_gc_loop :-
  786    catch(rdf_gc_loop(0), E, recover_gc(E, Cont)),
  787    (   Cont == true
  788    ->  rdf_gc_loop
  789    ;   thread_self(Me),
  790        thread_detach(Me)
  791    ).
  792
  793recover_gc('$aborted', false) :-
  794    !.
  795recover_gc(unwind(_), false) :-
  796    !.
  797recover_gc(Error, true) :-
  798    print_message(error, Error).
  799
  800rdf_gc_loop(CPU) :-
  801    repeat,
  802    (   consider_gc(CPU)
  803    ->  rdf_gc(CPU1),
  804        sleep(CPU1)
  805    ;   sleep(0.1)
  806    ),
  807    fail.
  808
  809%!  rdf_gc(-CPU) is det.
  810%
  811%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
  812%   update this in Prolog because portable access to thread specific
  813%   CPU is really hard in C.
  814
  815rdf_gc(CPU) :-
  816    statistics(cputime, CPU0),
  817    (   rdf_gc_
  818    ->  statistics(cputime, CPU1),
  819        CPU is CPU1-CPU0,
  820        rdf_add_gc_time(CPU)
  821    ;   CPU = 0.0
  822    ).
  823
  824%!  rdf_gc is det.
  825%
  826%   Run the RDF-DB garbage collector until no   garbage  is left and all
  827%   tables are fully optimized. Under normal operation a separate thread
  828%   with identifier =|__rdf_GC|= performs garbage  collection as long as
  829%   it is considered `useful'.
  830%
  831%   Using rdf_gc/0 should  only  be  needed   to  ensure  a  fully clean
  832%   database for analysis purposes such as leak detection.
  833
  834rdf_gc :-
  835    has_garbage,
  836    !,
  837    rdf_gc(_),
  838    rdf_gc.
  839rdf_gc.
  840
  841%!  has_garbage is semidet.
  842%
  843%   True if there is something to gain using GC.
  844
  845has_garbage :-
  846    rdf_gc_info_(Info),
  847    has_garbage(Info),
  848    !.
  849
  850has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
  851has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
  852has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
  853
  854%!  consider_gc(+CPU) is semidet.
  855%
  856%   @param CPU is the amount of CPU time spent in the most recent
  857%   GC.
  858
  859consider_gc(_CPU) :-
  860    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
  861                             Garbage,       % Garbage triples in DB
  862                             Reindexed,     % Reindexed & not reclaimed
  863                             Optimizable,   % Non-optimized tables
  864                             _KeepGen,      % Oldest active generation
  865                             _LastGCGen,    % Oldest active gen at last GC
  866                             _ReindexGen,
  867                             _LastGCReindexGen))
  868    ->  (   (Garbage+Reindexed) * 5 > Triples
  869        ;   Optimizable > 4
  870        )
  871    ;   print_message(error, rdf(invalid_gc_info)),
  872        sleep(10)
  873    ),
  874    !.
  875
  876
  877                 /*******************************
  878                 *           STATISTICS         *
  879                 *******************************/
  880
  881%!  rdf_statistics(?KeyValue) is nondet.
  882%
  883%   Obtain statistics on the RDF database.  Defined statistics are:
  884%
  885%     * graphs(-Count)
  886%     Number of named graphs.
  887%
  888%     * triples(-Count)
  889%     Total number of triples in the database.  This is the number
  890%     of asserted triples minus the number of retracted ones.  The
  891%     number of _visible_ triples in a particular context may be
  892%     different due to visibility rules defined by the logical
  893%     update view and transaction isolation.
  894%
  895%     * resources(-Count)
  896%     Number of resources that appear as subject or object in a
  897%     triple.  See rdf_resource/1.
  898%
  899%     * properties(-Count)
  900%     Number of current predicates.  See rdf_current_predicate/1.
  901%
  902%     * literals(-Count)
  903%     Number of current literals.  See rdf_current_literal/1.
  904%
  905%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
  906%     Information about the garbage collector.
  907%
  908%     * searched_nodes(-Count)
  909%     Number of nodes expanded by rdf_reachable/3 and
  910%     rdf_reachable/5.
  911%
  912%     * lookup(rdf(S,P,O,G), Count)
  913%     Number of queries that have been performed for this particular
  914%     instantiation pattern.  Each of S,P,O,G is either + or -.
  915%     Fails in case the number of performed queries is zero.
  916%
  917%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
  918%     Statistics on the index for this pattern.  Indices are created
  919%     lazily on the first relevant query.
  920%
  921%     * triples_by_graph(Graph, Count)
  922%     This statistics is produced for each named graph. See
  923%     =triples= for the interpretation of this value.
  924
  925rdf_statistics(graphs(Count)) :-
  926    rdf_statistics_(graphs(Count)).
  927rdf_statistics(triples(Count)) :-
  928    rdf_statistics_(triples(Count)).
  929rdf_statistics(duplicates(Count)) :-
  930    rdf_statistics_(duplicates(Count)).
  931rdf_statistics(lingering(Count)) :-
  932    rdf_statistics_(lingering(Count)).
  933rdf_statistics(resources(Count)) :-
  934    rdf_statistics_(resources(Count)).
  935rdf_statistics(properties(Count)) :-
  936    rdf_statistics_(predicates(Count)).
  937rdf_statistics(literals(Count)) :-
  938    rdf_statistics_(literals(Count)).
  939rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
  940    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
  941rdf_statistics(searched_nodes(Count)) :-
  942    rdf_statistics_(searched_nodes(Count)).
  943rdf_statistics(lookup(Index, Count)) :-
  944    functor(Indexed, indexed, 16),
  945    rdf_statistics_(Indexed),
  946    index(Index, I),
  947    Arg is I + 1,
  948    arg(Arg, Indexed, Count),
  949    Count \== 0.
  950rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
  951    rdf_statistics_(hash_quality(List)),
  952    member(hash(Place,Size,Quality,Optimize), List),
  953    index(Index, Place).
  954rdf_statistics(triples_by_graph(Graph, Count)) :-
  955    rdf_graph_(Graph, Count).
  956
  957index(rdf(-,-,-,-), 0).
  958index(rdf(+,-,-,-), 1).
  959index(rdf(-,+,-,-), 2).
  960index(rdf(+,+,-,-), 3).
  961index(rdf(-,-,+,-), 4).
  962index(rdf(+,-,+,-), 5).
  963index(rdf(-,+,+,-), 6).
  964index(rdf(+,+,+,-), 7).
  965
  966index(rdf(-,-,-,+), 8).
  967index(rdf(+,-,-,+), 9).
  968index(rdf(-,+,-,+), 10).
  969index(rdf(+,+,-,+), 11).
  970index(rdf(-,-,+,+), 12).
  971index(rdf(+,-,+,+), 13).
  972index(rdf(-,+,+,+), 14).
  973index(rdf(+,+,+,+), 15).
  974
  975
  976                 /*******************************
  977                 *           PREDICATES         *
  978                 *******************************/
  979
  980%!  rdf_current_predicate(?Predicate) is nondet.
  981%
  982%   True when Predicate is a   currently known predicate. Predicates
  983%   are created if a triples is created  that uses this predicate or
  984%   a property of the predicate   is  set using rdf_set_predicate/2.
  985%   The predicate may (no longer) have triples associated with it.
  986%
  987%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
  988%   not automatically included in the  result-set of this predicate,
  989%   while _all_ resources that appear as   the  second argument of a
  990%   triple _are_ included.
  991%
  992%   @see rdf_predicate_property/2.
  993
  994rdf_current_predicate(P, DB) :-
  995    rdf_current_predicate(P),
  996    (   rdf(_,P,_,DB)
  997    ->  true
  998    ).
  999
 1000%!  rdf_predicate_property(?Predicate, ?Property)
 1001%
 1002%   Query properties of  a  defined   predicate.  Currently  defined
 1003%   properties are given below.
 1004%
 1005%     * symmetric(Bool)
 1006%     True if the predicate is defined to be symmetric. I.e., {A} P
 1007%     {B} implies {B} P {A}. Setting symmetric is equivalent to
 1008%     inverse_of(Self).
 1009%
 1010%     * inverse_of(Inverse)
 1011%     True if this predicate is the inverse of Inverse. This
 1012%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
 1013%     rdf_reachable/5.
 1014%
 1015%     * transitive(Bool)
 1016%     True if this predicate is transitive. This predicate is
 1017%     currently not used. It might be used to make rdf_has/3 imply
 1018%     rdf_reachable/3 for transitive predicates.
 1019%
 1020%     * triples(Triples)
 1021%     Unify Triples with the number of existing triples using this
 1022%     predicate as second argument. Reporting the number of triples
 1023%     is intended to support query optimization.
 1024%
 1025%     * rdf_subject_branch_factor(-Float)
 1026%     Unify Float with the average number of triples associated with
 1027%     each unique value for the subject-side of this relation. If
 1028%     there are no triples the value 0.0 is returned. This value is
 1029%     cached with the predicate and recomputed only after
 1030%     substantial changes to the triple set associated to this
 1031%     relation. This property is intended for path optimisation
 1032%     when solving conjunctions of rdf/3 goals.
 1033%
 1034%     * rdf_object_branch_factor(-Float)
 1035%     Unify Float with the average number of triples associated with
 1036%     each unique value for the object-side of this relation. In
 1037%     addition to the comments with the =rdf_subject_branch_factor=
 1038%     property, uniqueness of the object value is computed from the
 1039%     hash key rather than the actual values.
 1040%
 1041%     * rdfs_subject_branch_factor(-Float)
 1042%     Same as =rdf_subject_branch_factor=, but also considering
 1043%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1044%
 1045%     * rdfs_object_branch_factor(-Float)
 1046%     Same as =rdf_object_branch_factor=, but also considering
 1047%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1048%
 1049%   @see rdf_set_predicate/2.
 1050
 1051rdf_predicate_property(P, Prop) :-
 1052    var(P),
 1053    !,
 1054    rdf_current_predicate(P),
 1055    rdf_predicate_property_(P, Prop).
 1056rdf_predicate_property(P, Prop) :-
 1057    rdf_predicate_property_(P, Prop).
 1058
 1059%!  rdf_set_predicate(+Predicate, +Property) is det.
 1060%
 1061%   Define a property of  the   predicate.  This predicate currently
 1062%   supports the following properties:
 1063%
 1064%       - symmetric(+Boolean)
 1065%       Set/unset the predicate as being symmetric.  Using
 1066%       symmetric(true) is the same as inverse_of(Predicate),
 1067%       i.e., creating a predicate that is the inverse of
 1068%       itself.
 1069%       - transitive(+Boolean)
 1070%       Sets the transitive property.
 1071%       - inverse_of(+Predicate2)
 1072%       Define Predicate as the inverse of Predicate2. An inverse
 1073%       relation is deleted using inverse_of([]).
 1074%
 1075%   The `transitive` property is currently not used. The `symmetric`
 1076%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
 1077%   rdf_reachable/3.
 1078%
 1079%   @tbd    Maintain these properties based on OWL triples.
 1080
 1081
 1082                 /*******************************
 1083                 *            SNAPSHOTS         *
 1084                 *******************************/
 1085
 1086%!  rdf_snapshot(-Snapshot) is det.
 1087%
 1088%   Take a snapshot of the current state   of  the RDF store. Later,
 1089%   goals may be executed in the  context   of  the database at this
 1090%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
 1091%   snapshot created outside  a  transaction   exists  until  it  is
 1092%   deleted. Snapshots taken inside a transaction   can only be used
 1093%   inside this transaction.
 1094
 1095%!  rdf_delete_snapshot(+Snapshot) is det.
 1096%
 1097%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
 1098%   call, resources used for maintaining the snapshot become subject
 1099%   to garbage collection.
 1100
 1101%!  rdf_current_snapshot(?Term) is nondet.
 1102%
 1103%   True when Term is a currently known snapshot.
 1104%
 1105%   @bug    Enumeration of snapshots is slow.
 1106
 1107rdf_current_snapshot(Term) :-
 1108    current_blob(Term, rdf_snapshot).
 1109
 1110
 1111                 /*******************************
 1112                 *          TRANSACTION         *
 1113                 *******************************/
 1114
 1115%!  rdf_transaction(:Goal) is semidet.
 1116%
 1117%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
 1118
 1119%!  rdf_transaction(:Goal, +Id) is semidet.
 1120%
 1121%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
 1122
 1123%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
 1124%
 1125%   Run Goal in an RDF  transaction.   Compared to the ACID model,
 1126%   RDF transactions have the following properties:
 1127%
 1128%     1. Modifications inside the transactions become all atomically
 1129%        visible to the outside world if Goal succeeds or remain
 1130%        invisible if Goal fails or throws an exception.  I.e.,
 1131%        the _atomicity_ property is fully supported.
 1132%     2. _Consistency_ is not guaranteed. Later versions may
 1133%        implement consistency constraints that will be checked
 1134%        serialized just before the actual commit of a transaction.
 1135%     3. Concurrently executing transactions do not influence each
 1136%        other.  I.e., the _isolation_ property is fully supported.
 1137%     4. _Durability_ can be activated by loading
 1138%        library(semweb/rdf_persistency).
 1139%
 1140%   Processed options are:
 1141%
 1142%     * snapshot(+Snapshot)
 1143%     Execute Goal using the state of the RDF store as stored in
 1144%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
 1145%     atom =true=, which implies that an anonymous snapshot is
 1146%     created at the current state of the store.  Modifications
 1147%     due to executing Goal are only visible to Goal.
 1148
 1149rdf_transaction(Goal) :-
 1150    rdf_transaction(Goal, user, []).
 1151rdf_transaction(Goal, Id) :-
 1152    rdf_transaction(Goal, Id, []).
 1153
 1154%!  rdf_active_transaction(?Id) is nondet.
 1155%
 1156%   True if Id is the identifier of  a transaction in the context of
 1157%   which  this  call  is  executed.  If  Id  is  not  instantiated,
 1158%   backtracking yields transaction identifiers   starting  with the
 1159%   innermost nested transaction. Transaction   identifier terms are
 1160%   not copied, need not be ground   and  can be instantiated during
 1161%   the transaction.
 1162
 1163rdf_active_transaction(Id) :-
 1164    rdf_active_transactions_(List),
 1165    member(Id, List).
 1166
 1167%!  rdf_monitor(:Goal, +Options)
 1168%
 1169%   Call Goal if specified actions occur on the database.
 1170
 1171rdf_monitor(Goal, Options) :-
 1172    monitor_mask(Options, 0xffff, Mask),
 1173    rdf_monitor_(Goal, Mask).
 1174
 1175monitor_mask([], Mask, Mask).
 1176monitor_mask([H|T], Mask0, Mask) :-
 1177    update_mask(H, Mask0, Mask1),
 1178    monitor_mask(T, Mask1, Mask).
 1179
 1180update_mask(-X, Mask0, Mask) :-
 1181    !,
 1182    monitor_mask(X, M),
 1183    Mask is Mask0 /\ \M.
 1184update_mask(+X, Mask0, Mask) :-
 1185    !,
 1186    monitor_mask(X, M),
 1187    Mask is Mask0 \/ M.
 1188update_mask(X, Mask0, Mask) :-
 1189    monitor_mask(X, M),
 1190    Mask is Mask0 \/ M.
 1191
 1192%!  monitor_mask(Name, Mask)
 1193%
 1194%   Mask bit for the monitor events.  Note that this must be kept
 1195%   consistent with the enum broadcast_id defined in rdf_db.c
 1196
 1197                                        % C-defined broadcasts
 1198monitor_mask(assert,       0x0001).
 1199monitor_mask(assert(load), 0x0002).
 1200monitor_mask(retract,      0x0004).
 1201monitor_mask(update,       0x0008).
 1202monitor_mask(new_literal,  0x0010).
 1203monitor_mask(old_literal,  0x0020).
 1204monitor_mask(transaction,  0x0040).
 1205monitor_mask(load,         0x0080).
 1206monitor_mask(create_graph, 0x0100).
 1207monitor_mask(reset,        0x0200).
 1208                                        % prolog defined broadcasts
 1209monitor_mask(parse,        0x1000).
 1210monitor_mask(unload,       0x1000).     % FIXME: Duplicate
 1211                                        % mask for all
 1212monitor_mask(all,          0xffff).
 1213
 1214%rdf_broadcast(Term, MaskName) :-
 1215%%      monitor_mask(MaskName, Mask),
 1216%%      rdf_broadcast_(Term, Mask).
 1217
 1218
 1219                 /*******************************
 1220                 *            WARM              *
 1221                 *******************************/
 1222
 1223%!  rdf_warm_indexes
 1224%
 1225%   Warm all indexes.  See rdf_warm_indexes/1.
 1226
 1227rdf_warm_indexes :-
 1228    findall(Index, rdf_index(Index), Indexes),
 1229    rdf_warm_indexes(Indexes).
 1230
 1231rdf_index(s).
 1232rdf_index(p).
 1233rdf_index(o).
 1234rdf_index(sp).
 1235rdf_index(o).
 1236rdf_index(po).
 1237rdf_index(spo).
 1238rdf_index(g).
 1239rdf_index(sg).
 1240rdf_index(pg).
 1241
 1242%!  rdf_warm_indexes(+Indexes) is det.
 1243%
 1244%   Create the named indexes.  Normally,   the  RDF database creates
 1245%   indexes on lazily the first time they are needed. This predicate
 1246%   serves two purposes: it provides an   explicit  way to make sure
 1247%   that the required indexes  are   present  and  creating multiple
 1248%   indexes at the same time is more efficient.
 1249
 1250
 1251                 /*******************************
 1252                 *          DUPLICATES          *
 1253                 *******************************/
 1254
 1255%!  rdf_update_duplicates is det.
 1256%
 1257%   Update the duplicate administration of the RDF store. This marks
 1258%   every triple that  is  potentially  a   duplicate  of another as
 1259%   duplicate. Being potentially a  duplicate   means  that subject,
 1260%   predicate and object are equivalent and   the  life-times of the
 1261%   two triples overlap.
 1262%
 1263%   The duplicates marks are used to  reduce the administrative load
 1264%   of avoiding duplicate answers.  Normally,   the  duplicates  are
 1265%   marked using a background thread that   is  started on the first
 1266%   query that produces a substantial amount of duplicates.
 1267
 1268:- public
 1269    rdf_update_duplicates_thread/0. 1270
 1271%!  rdf_update_duplicates_thread
 1272%
 1273%   Start a thread to initialize the duplicate administration.
 1274
 1275rdf_update_duplicates_thread :-
 1276    thread_create(rdf_update_duplicates, _,
 1277                  [ detached(true),
 1278                    alias('__rdf_duplicate_detecter')
 1279                  ]).
 1280
 1281%!  rdf_update_duplicates is det.
 1282%
 1283%   Update the duplicate administration. If  this  administration is
 1284%   up-to-date, each triples that _may_ have a duplicate is flagged.
 1285%   The predicate rdf/3 uses this administration to speedup checking
 1286%   for duplicate answers.
 1287%
 1288%   This predicate is normally  executed   from  a background thread
 1289%   named =__rdf_duplicate_detecter= which is created   when a query
 1290%   discovers that checking for duplicates becomes too expensive.
 1291
 1292
 1293                 /*******************************
 1294                 *    QUICK BINARY LOAD/SAVE    *
 1295                 *******************************/
 1296
 1297%!  rdf_save_db(+File) is det.
 1298%!  rdf_save_db(+File, +Graph) is det.
 1299%
 1300%   Save triples into File in a   quick-to-load binary format. If Graph
 1301%   is supplied only triples flagged to originate from that database
 1302%   are  added.  Files  created  this  way    can  be  loaded  using
 1303%   rdf_load_db/1.
 1304
 1305:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1306
 1307rdf_save_db(File) :-
 1308    current_prolog_flag(rdf_triple_format, Version),
 1309    setup_call_cleanup(
 1310        open(File, write, Out, [type(binary)]),
 1311        ( set_stream(Out, record_position(false)),
 1312          rdf_save_db_(Out, _, Version)
 1313        ),
 1314        close(Out)).
 1315
 1316
 1317rdf_save_db(File, Graph) :-
 1318    current_prolog_flag(rdf_triple_format, Version),
 1319    setup_call_cleanup(
 1320        open(File, write, Out, [type(binary)]),
 1321        ( set_stream(Out, record_position(false)),
 1322          rdf_save_db_(Out, Graph, Version)
 1323        ),
 1324        close(Out)).
 1325
 1326
 1327%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
 1328%
 1329%   Load triples from a  .trp  file   without  updating  the  source
 1330%   administration. Id is  handled  to   monitor  action.  Graphs is
 1331%   a list of graph-names encountered in File.
 1332
 1333rdf_load_db_no_admin(File, Id, Graphs) :-
 1334    open(File, read, In, [type(binary)]),
 1335    set_stream(In, record_position(false)),
 1336    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
 1337
 1338
 1339%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
 1340%
 1341%   Verify the loaded cache file and optionally fix the modification
 1342%   time (new versions save this along with the snapshot).
 1343%
 1344%   @tbd    What to do if there is a cache mismatch? Delete the loaded
 1345%           graphs and fail?
 1346
 1347check_loaded_cache(DB, [DB], _Modified) :- !.
 1348check_loaded_cache(DB, Graphs, _) :-
 1349    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
 1350
 1351
 1352%!  rdf_load_db(+File) is det.
 1353%
 1354%   Load triples from a file created using rdf_save_db/2.
 1355
 1356rdf_load_db(File) :-
 1357    uri_file_name(URL, File),
 1358    rdf_load_db_no_admin(File, URL, _Graphs).
 1359
 1360
 1361                 /*******************************
 1362                 *          LOADING RDF         *
 1363                 *******************************/
 1364
 1365:- multifile
 1366    rdf_open_hook/8,
 1367    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
 1368    rdf_load_stream/3,              % +Format, +Stream, +Options
 1369    rdf_file_type/2,                % ?Extension, ?Format
 1370    rdf_storage_encoding/2,         % ?Extension, ?Encoding
 1371    url_protocol/1.                 % ?Protocol
 1372
 1373%!  rdf_load(+FileOrList) is det.
 1374%
 1375%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
 1376
 1377%!  rdf_load(+FileOrList, :Options) is det.
 1378%
 1379%   Load RDF data. If this predicate is called a second time
 1380%   for the same file, it is by default treated as a no-op.
 1381%   See option =if(changed)=.
 1382%
 1383%   Options provides   additional processing options.
 1384%   Defined options are:
 1385%
 1386%       * blank_nodes(+ShareMode)
 1387%       How to handle equivalent blank nodes.  If =share= (default),
 1388%       equivalent blank nodes are shared in the same resource.
 1389%
 1390%       * base_uri(+URI)
 1391%       URI that is used for rdf:about="" and other RDF constructs
 1392%       that are relative to the base uri.  Default is the source
 1393%       URL.
 1394%
 1395%       * concurrent(+Jobs)
 1396%       If FileOrList is a list of files, process the input files
 1397%       using Jobs threads concurrently.  Default is the minimum
 1398%       of the number of cores and the number of inputs.  Higher
 1399%       values can be useful when loading inputs from (slow)
 1400%       network connections.  Using 1 (one) does not use
 1401%       separate worker threads.
 1402%
 1403%       * format(+Format)
 1404%       Specify the source format explicitly. Normally this is
 1405%       deduced from the filename extension or the mime-type. The
 1406%       core library understands the formats xml (RDF/XML) and
 1407%       triples (internal quick load and cache format).  Plugins,
 1408%       such as library(semweb/turtle) extend the set of recognised
 1409%       extensions.
 1410%
 1411%       * graph(?Graph)
 1412%       Named graph in which to load the data.  It is *not* allowed
 1413%       to load two sources into the same named graph.  If Graph is
 1414%       unbound, it is unified to the graph into which the data is
 1415%       loaded.  The default graph is a =|file://|= URL when loading
 1416%       a file or, if the specification is a URL, its normalized
 1417%       version without the optional _|#fragment|_.
 1418%
 1419%       * if(Condition)
 1420%       When to load the file. One of =true=, =changed= (default) or
 1421%       =not_loaded=.
 1422%
 1423%       * modified(-Modified)
 1424%       Unify Modified with one of =not_modified=, cached(File),
 1425%       last_modified(Stamp) or =unknown=.
 1426%
 1427%       * cache(Bool)
 1428%       If =false=, do not use or create a cache file.
 1429%
 1430%       * register_namespaces(Bool)
 1431%       If =true= (default =false=), register =xmlns= namespace
 1432%       declarations or Turtle =|@prefix|= prefixes using
 1433%       rdf_register_prefix/3 if there is no conflict.
 1434%
 1435%       * silent(+Bool)
 1436%       If =true=, the message reporting completion is printed using
 1437%       level =silent=. Otherwise the level is =informational=. See
 1438%       also print_message/2.
 1439%
 1440%       * prefixes(-Prefixes)
 1441%       Returns the prefixes defined in the source   data file as a list
 1442%       of pairs.
 1443%
 1444%       * multifile(+Boolean)
 1445%       Indicate that the addressed graph may be populated with
 1446%       triples from multiple sources. This disables caching and
 1447%       avoids that an rdf_load/2 call affecting the specified
 1448%       graph cleans the graph.
 1449%
 1450%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
 1451%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
 1452%   load data from other formats and   locations  using plugins. The
 1453%   full set of plugins relevant to   support  different formats and
 1454%   locations is below:
 1455%
 1456%     ==
 1457%     :- use_module(library(semweb/turtle)).        % Turtle and TriG
 1458%     :- use_module(library(semweb/rdf_ntriples)).
 1459%     :- use_module(library(semweb/rdf_zlib_plugin)).
 1460%     :- use_module(library(semweb/rdf_http_plugin)).
 1461%     :- use_module(library(http/http_ssl_plugin)).
 1462%     ==
 1463%
 1464%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
 1465%           library(semweb/rdf_cache)
 1466
 1467:- dynamic
 1468    rdf_loading/3.                          % Graph, Queue, Thread
 1469
 1470rdf_load(Spec) :-
 1471    rdf_load(Spec, []).
 1472
 1473:- if(\+current_predicate(concurrent/3)). 1474concurrent(_, Goals, _) :-
 1475    forall(member(G, Goals), call(G)).
 1476:- endif. 1477
 1478% Note that we kill atom garbage collection.  This improves performance
 1479% with about 15% loading the LUBM Univ_50 benchmark.
 1480
 1481rdf_load(Spec, M:Options) :-
 1482    must_be(list, Options),
 1483    current_prolog_flag(agc_margin, Old),
 1484    setup_call_cleanup(
 1485        set_prolog_flag(agc_margin, 0),
 1486        rdf_load_noagc(Spec, M, Options),
 1487        set_prolog_flag(agc_margin, Old)).
 1488
 1489rdf_load_noagc(List, M, Options) :-
 1490    is_list(List),
 1491    !,
 1492    flatten(List, Inputs),          % Compatibility: allow nested lists
 1493    maplist(must_be(ground), Inputs),
 1494    length(Inputs, Count),
 1495    load_jobs(Count, Jobs, Options),
 1496    (   Jobs =:= 1
 1497    ->  forall(member(Spec, Inputs),
 1498               rdf_load_one(Spec, M, Options))
 1499    ;   maplist(load_goal(Options, M), Inputs, Goals),
 1500        concurrent(Jobs, Goals, [])
 1501    ).
 1502rdf_load_noagc(One, M, Options) :-
 1503    must_be(ground, One),
 1504    rdf_load_one(One, M, Options).
 1505
 1506load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
 1507
 1508load_jobs(_, Jobs, Options) :-
 1509    option(concurrent(Jobs), Options),
 1510    !,
 1511    must_be(positive_integer, Jobs).
 1512load_jobs(Count, Jobs, _) :-
 1513    current_prolog_flag(cpu_count, CPUs),
 1514    CPUs > 0,
 1515    !,
 1516    Jobs is max(1, min(CPUs, Count)).
 1517load_jobs(_, 1, _).
 1518
 1519
 1520rdf_load_one(Spec, M, Options) :-
 1521    source_url(Spec, Protocol, SourceURL),
 1522    load_graph(SourceURL, Graph, Options),
 1523    setup_call_cleanup(
 1524        with_mutex(rdf_load_file,
 1525                   rdf_start_load(SourceURL, Loading)),
 1526        rdf_load_file(Loading, Spec, SourceURL, Protocol,
 1527                      Graph, M, Options),
 1528        rdf_end_load(Loading)).
 1529
 1530%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
 1531%!  rdf_end_load(+WhatToDo) is det.
 1532%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
 1533%!                +Module, +Options) is det.
 1534%
 1535%   Of these three predicates, rdf_load_file/7   does the real work.
 1536%   The others deal with the  possibility   that  the graph is being
 1537%   loaded by another thread. In that case,   we  wait for the other
 1538%   thread to complete the work.
 1539%
 1540%   @tbd    What if both threads disagree on what is loaded into the
 1541%           graph?
 1542%   @see    Code is modelled closely after how concurrent loading
 1543%           is handled in SWI-Prolog's boot/init.pl
 1544
 1545rdf_start_load(SourceURL, queue(Queue)) :-
 1546    rdf_loading(SourceURL, Queue, LoadThread),
 1547    \+ thread_self(LoadThread),
 1548    !,
 1549    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
 1550          [ SourceURL, LoadThread]).
 1551rdf_start_load(SourceURL, Ref) :-
 1552    thread_self(Me),
 1553    message_queue_create(Queue),
 1554    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
 1555
 1556rdf_end_load(queue(_)) :- !.
 1557rdf_end_load(Ref) :-
 1558    clause(rdf_loading(_, Queue, _), _, Ref),
 1559    erase(Ref),
 1560    thread_send_message(Queue, done),
 1561    message_queue_destroy(Queue).
 1562
 1563rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
 1564    !,
 1565    catch(thread_get_message(Queue, _), _, true).
 1566rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
 1567    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
 1568    statistics(cputime, T0),
 1569    rdf_open_input(SourceURL, Protocol, Graph,
 1570                   In, Cleanup, Modified, Format, Options),
 1571    supported_format(Format, Cleanup),
 1572    return_modified(Modified, Options),
 1573    (   Modified == not_modified
 1574    ->  Action = none
 1575    ;   Modified = cached(CacheFile)
 1576    ->  do_unload(Graph),
 1577        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
 1578        check_loaded_cache(Graph, Graphs, Modified),
 1579        Action = load
 1580    ;   option(base_uri(BaseURI), Options, Graph),
 1581        (   var(BaseURI)
 1582        ->  BaseURI = SourceURL
 1583        ;   true
 1584        ),
 1585        once(phrase(derived_options(Options, NSList), Extra)),
 1586        merge_options([ base_uri(BaseURI),
 1587                        graph(Graph),
 1588                        format(Format)
 1589                      | Extra
 1590                      ], Options, RDFOptions),
 1591        (   option(multifile(true), Options)
 1592        ->  true
 1593        ;   do_unload(Graph)
 1594        ),
 1595        graph_modified(Modified, ModifiedStamp),
 1596        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
 1597        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
 1598                     Cleanup),
 1599        save_cache(Graph, SourceURL, Options),
 1600        register_file_prefixes(NSList),
 1601        format_action(Format, Action)
 1602    ),
 1603    rdf_statistics_(triples(Graph, Triples)),
 1604    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
 1605
 1606supported_format(Format, _Cleanup) :-
 1607    rdf_file_type(_, Format),
 1608    !.
 1609supported_format(Format, Cleanup) :-
 1610    call(Cleanup),
 1611    existence_error(rdf_format_plugin, Format).
 1612
 1613format_action(triples, load) :- !.
 1614format_action(_, parsed).
 1615
 1616save_cache(Graph, SourceURL, Options) :-
 1617    option(cache(true), Options, true),
 1618    rdf_cache_file(SourceURL, write, CacheFile),
 1619    !,
 1620    catch(save_cache(Graph, CacheFile), E,
 1621          print_message(warning, E)).
 1622save_cache(_, _, _).
 1623
 1624derived_options([], _) -->
 1625    [].
 1626derived_options([H|T], NSList) -->
 1627    (   {   H == register_namespaces(true)
 1628        ;   H == (register_namespaces = true)
 1629        }
 1630    ->  [ namespaces(NSList) ]
 1631    ;   []
 1632    ),
 1633    derived_options(T, NSList).
 1634
 1635graph_modified(last_modified(Stamp), Stamp).
 1636graph_modified(unknown, Stamp) :-
 1637    get_time(Stamp).
 1638
 1639return_modified(Modified, Options) :-
 1640    option(modified(M0), Options),
 1641    !,
 1642    M0 = Modified.
 1643return_modified(_, _).
 1644
 1645
 1646                 /*******************************
 1647                 *        INPUT HANDLING        *
 1648                 *******************************/
 1649
 1650/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1651This section deals with pluggable input sources.  The task of the input
 1652layer is
 1653
 1654    * Decide on the graph-name
 1655    * Decide on the source-location
 1656    * Decide whether loading is needed (if-modified)
 1657    * Decide on the serialization in the input
 1658
 1659The protocol must ensure minimal  overhead,   in  particular for network
 1660protocols. E.g. for HTTP we want to make a single call on the server and
 1661use If-modified-since to verify that we need not reloading this file.
 1662- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1663
 1664%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
 1665%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
 1666%
 1667%   Open an input source.
 1668%
 1669%   Options processed:
 1670%
 1671%       * graph(Graph)
 1672%       * db(Graph)
 1673%       * if(Condition)
 1674%       * cache(Cache)
 1675%       * format(Format)
 1676%
 1677%   @param  Modified is one of =not_modified=, last_modified(Time),
 1678%           cached(CacheFile) or =unknown=
 1679
 1680rdf_open_input(SourceURL, Protocol, Graph,
 1681               Stream, Cleanup, Modified, Format, Options) :-
 1682    (   option(multifile(true), Options)
 1683    ->  true
 1684    ;   option(if(If), Options, changed),
 1685        (   If == true
 1686        ->  true
 1687        ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
 1688        ->  true
 1689        ;   option(cache(true), Options, true),
 1690            rdf_cache_file(SourceURL, read, CacheFile)
 1691        ->  time_file(CacheFile, HaveModified)
 1692        ;   true
 1693        )
 1694    ),
 1695    option(format(Format), Options, _),
 1696    open_input_if_modified(Protocol, SourceURL, HaveModified,
 1697                           Stream, Cleanup, Modified0, Format, Options),
 1698    (   Modified0 == not_modified
 1699    ->  (   nonvar(CacheFile)
 1700        ->  Modified = cached(CacheFile)
 1701        ;   Modified = not_modified
 1702        )
 1703    ;   Modified = Modified0
 1704    ).
 1705
 1706
 1707%!  source_url(+Spec, -Class, -SourceURL) is det.
 1708%
 1709%   Determine class and url of the source.  Class is one of
 1710%
 1711%       * stream(Stream)
 1712%       * file
 1713%       * a url-protocol (e.g., =http=)
 1714
 1715source_url(stream(In), stream(In), SourceURL) :-
 1716    !,
 1717    (   stream_property(In, file_name(File))
 1718    ->  to_url(File, SourceURL)
 1719    ;   gensym('stream://', SourceURL)
 1720    ).
 1721source_url(Stream, Class, SourceURL) :-
 1722    is_stream(Stream),
 1723    !,
 1724    source_url(stream(Stream), Class, SourceURL).
 1725source_url(Spec, Protocol, SourceURL) :-
 1726    compound(Spec),
 1727    !,
 1728    source_file(Spec, Protocol, SourceURL).
 1729source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
 1730    uri_file_name(FileURL, File),
 1731    !,
 1732    source_file(File, Protocol, SourceURL).
 1733source_url(SourceURL0, Protocol, SourceURL) :-
 1734    is_url(SourceURL0, Protocol, SourceURL),
 1735    !.
 1736source_url(File, Protocol, SourceURL) :-
 1737    source_file(File, Protocol, SourceURL).
 1738
 1739source_file(Spec, file(SExt), SourceURL) :-
 1740    findall(Ext, valid_extension(Ext), Exts),
 1741    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
 1742    storage_extension(_Plain, SExt, File),
 1743    uri_file_name(SourceURL, File).
 1744
 1745to_url(URL, URL) :-
 1746    uri_is_global(URL),
 1747    !.
 1748to_url(File, URL) :-
 1749    absolute_file_name(File, Path),
 1750    uri_file_name(URL, Path).
 1751
 1752storage_extension(Plain, SExt, File) :-
 1753    file_name_extension(Plain, SExt, File),
 1754    SExt \== '',
 1755    rdf_storage_encoding(SExt, _),
 1756    !.
 1757storage_extension(File, '', File).
 1758
 1759%!  load_graph(+SourceURL, -Graph, +Options) is det.
 1760%
 1761%   Graph is the graph into which  we   load  the  data. Tries these
 1762%   options:
 1763%
 1764%     1. The graph(Graph) option
 1765%     2. The db(Graph) option (backward compatibility)
 1766%     3. The base_uri(BaseURI) option
 1767%     4. The source URL
 1768
 1769load_graph(_Source, Graph, Options) :-
 1770    option(multifile(true), Options),
 1771    !,
 1772    (   (   option(graph(Graph), Options)
 1773        ->  true
 1774        ;   option(db(Graph), Options)
 1775        ),
 1776        ground(Graph)
 1777    ->  true
 1778    ;   throw(error(existence_error(option, graph),
 1779                    context(_, "rdf_load/2: using multifile requires graph")))
 1780    ).
 1781load_graph(Source, Graph, Options) :-
 1782    (   option(graph(Graph), Options)
 1783    ;   option(db(Graph), Options)
 1784    ),
 1785    !,
 1786    load_graph2(Source, Graph, Options).
 1787load_graph(Source, Graph, Options) :-
 1788    load_graph2(Source, Graph, Options).
 1789
 1790load_graph2(_, Graph, _) :-
 1791    ground(Graph),
 1792    !.
 1793load_graph2(_Source, Graph, Options) :-
 1794    option(base_uri(Graph), Options),
 1795    Graph \== [],
 1796    ground(Graph),
 1797    !.
 1798load_graph2(Source, Graph, _) :-
 1799    load_graph(Source, Graph).
 1800
 1801load_graph(SourceURL, BaseURI) :-
 1802    file_name_extension(BaseURI, Ext, SourceURL),
 1803    rdf_storage_encoding(Ext, _),
 1804    !.
 1805load_graph(SourceURL, SourceURL).
 1806
 1807
 1808open_input_if_modified(stream(In), SourceURL, _, In, true,
 1809                       unknown, Format, _) :-
 1810    !,
 1811    (   var(Format)
 1812    ->  guess_format(SourceURL, Format)
 1813    ;   true
 1814    ).
 1815open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
 1816                       Modified, Format, _) :-
 1817    !,
 1818    uri_file_name(SourceURL, File),
 1819    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
 1820    time_file(File, LastModified),
 1821    (   nonvar(HaveModified),
 1822        HaveModified >= LastModified
 1823    ->  Modified = not_modified,
 1824        Cleanup = true
 1825    ;   storage_open(SExt, File, Stream, Cleanup),
 1826        Modified = last_modified(LastModified),
 1827        (   var(Format)
 1828        ->  guess_format(Plain, Format)
 1829        ;   true
 1830        )
 1831    ).
 1832open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
 1833                       Modified, Format, Options) :-
 1834    !,
 1835    open_input_if_modified(file(''), SourceURL, HaveModified,
 1836                           Stream, Cleanup,
 1837                           Modified, Format, Options).
 1838open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1839                       Modified, Format, Options) :-
 1840    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1841                  Modified, Format, Options).
 1842
 1843guess_format(File, Format) :-
 1844    file_name_extension(_, Ext, File),
 1845    (   rdf_file_type(Ext, Format)
 1846    ->  true
 1847    ;   Format = xml,
 1848        print_message(warning, rdf(guess_format(Ext)))
 1849    ).
 1850
 1851%!  storage_open(+Extension, +File, -Stream, -Cleanup)
 1852%
 1853%   Open the low-level storage. Note  that   the  file  is opened as
 1854%   binary. This is the same  as   for  HTTP  resources. The correct
 1855%   encoding will be set by the XML parser or the Turtle parser.
 1856
 1857storage_open('', File, Stream, close(Stream)) :-
 1858    !,
 1859    open(File, read, Stream, [type(binary)]).
 1860storage_open(Ext, File, Stream, Cleanup) :-
 1861    rdf_storage_encoding(Ext, Encoding),
 1862    rdf_open_decode(Encoding, File, Stream, Cleanup).
 1863
 1864valid_extension(Ext) :-
 1865    rdf_file_type(Ext, _).
 1866valid_extension(Ext) :-
 1867    rdf_storage_encoding(Ext, _).
 1868
 1869%!  is_url(@Term, -Scheme, -URL) is semidet.
 1870%
 1871%   True if Term is an atom denoting URL of the given Scheme. URL is
 1872%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
 1873%   identifier (#fragment) is removed. This  predicate only succeeds
 1874%   if  the  scheme  is   registered    using   the  multifile  hook
 1875%   url_protocol/1.
 1876
 1877is_url(URL, Scheme, FetchURL) :-
 1878    atom(URL),
 1879    uri_is_global(URL),
 1880    uri_normalized(URL, URL1),              % case normalization
 1881    uri_components(URL1, Components),
 1882    uri_data(scheme, Components, Scheme0),
 1883    url_protocol(Scheme0),
 1884    !,
 1885    Scheme = Scheme0,
 1886    uri_data(fragment, Components, _, Components1),
 1887    uri_components(FetchURL, Components1).
 1888
 1889url_protocol(file).                     % built-in
 1890
 1891%!  rdf_file_type(+Extension, -Format) is semidet.
 1892%
 1893%   True if Format  is  the  format   belonging  to  the  given file
 1894%   extension.  This predicate is multifile and can thus be extended
 1895%   by plugins.
 1896
 1897rdf_file_type(xml,   xml).
 1898rdf_file_type(rdf,   xml).
 1899rdf_file_type(rdfs,  xml).
 1900rdf_file_type(owl,   xml).
 1901rdf_file_type(htm,   xhtml).
 1902rdf_file_type(html,  xhtml).
 1903rdf_file_type(xhtml, xhtml).
 1904rdf_file_type(trp,   triples).
 1905
 1906
 1907%!  rdf_file_encoding(+Extension, -Format) is semidet.
 1908%
 1909%   True if Format describes the storage encoding of file.
 1910
 1911rdf_storage_encoding('', plain).
 1912
 1913
 1914%!  rdf_load_stream(+Format, +Stream, :Options)
 1915%
 1916%   Load RDF data from Stream.
 1917%
 1918%   @tbd    Handle mime-types?
 1919
 1920rdf_load_stream(xml, Stream, Options) :-
 1921    !,
 1922    graph(Options, Graph),
 1923    rdf_transaction(load_stream(Stream, Options),
 1924                    parse(Graph)).
 1925rdf_load_stream(xhtml, Stream, M:Options) :-
 1926    !,
 1927    graph(Options, Graph),
 1928    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
 1929                    parse(Graph)).
 1930rdf_load_stream(triples, Stream, Options) :-
 1931    !,
 1932    graph(Options, Graph),
 1933    rdf_load_db_(Stream, Graph, _Graphs).
 1934
 1935load_stream(Stream, M:Options) :-
 1936    process_rdf(Stream, assert_triples, M:Options),
 1937    option(graph(Graph), Options),
 1938    rdf_graph_clear_modified_(Graph).
 1939
 1940
 1941%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
 1942
 1943report_loaded(none, _, _, _, _, _) :- !.
 1944report_loaded(Action, Source, DB, Triples, T0, Options) :-
 1945    statistics(cputime, T1),
 1946    Time is T1 - T0,
 1947    (   option(silent(true), Options)
 1948    ->  Level = silent
 1949    ;   Level = informational
 1950    ),
 1951    print_message(Level,
 1952                  rdf(loaded(Action, Source, DB, Triples, Time))).
 1953
 1954
 1955%!  rdf_unload(+Source) is det.
 1956%
 1957%   Identify the graph loaded from Source and use rdf_unload_graph/1
 1958%   to erase this graph.
 1959%
 1960%   @deprecated     For compatibility, this predicate also accepts a
 1961%                   graph name instead of a source specification.
 1962%                   Please update your code to use
 1963%                   rdf_unload_graph/1.
 1964
 1965rdf_unload(Spec) :-
 1966    source_url(Spec, _Protocol, SourceURL),
 1967    rdf_graph_source_(Graph, SourceURL, _),
 1968    !,
 1969    rdf_unload_graph(Graph).
 1970rdf_unload(Graph) :-
 1971    atom(Graph),
 1972    rdf_graph(Graph),
 1973    !,
 1974    warn_deprecated_unload(Graph),
 1975    rdf_unload_graph(Graph).
 1976rdf_unload(_).
 1977
 1978:- dynamic
 1979    warned/0. 1980
 1981warn_deprecated_unload(_) :-
 1982    warned,
 1983    !.
 1984warn_deprecated_unload(Graph) :-
 1985    assertz(warned),
 1986    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
 1987
 1988
 1989%!  rdf_unload_graph(+Graph) is det.
 1990%
 1991%   Remove Graph from the RDF store.  Succeeds silently if the named
 1992%   graph does not exist.
 1993
 1994rdf_unload_graph(Graph) :-
 1995    must_be(atom, Graph),
 1996    (   rdf_graph(Graph)
 1997    ->  rdf_transaction(do_unload(Graph), unload(Graph))
 1998    ;   true
 1999    ).
 2000
 2001do_unload(Graph) :-
 2002    (   rdf_graph_(Graph, Triples),
 2003        Triples > 0
 2004    ->  rdf_retractall(_,_,_,Graph)
 2005    ;   true
 2006    ),
 2007    rdf_destroy_graph(Graph).
 2008
 2009                 /*******************************
 2010                 *         GRAPH QUERIES        *
 2011                 *******************************/
 2012
 2013%!  rdf_create_graph(+Graph) is det.
 2014%
 2015%   Create an RDF graph without triples.   Succeeds  silently if the
 2016%   graph already exists.
 2017
 2018
 2019%!  rdf_graph(?Graph) is nondet.
 2020%
 2021%   True when Graph is an existing graph.
 2022
 2023rdf_graph(Graph) :-
 2024    rdf_graph_(Graph, _Triples).
 2025
 2026%!  rdf_source(?Graph, ?SourceURL) is nondet.
 2027%
 2028%   True if named Graph is loaded from SourceURL.
 2029%
 2030%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
 2031
 2032rdf_source(Graph, SourceURL) :-
 2033    rdf_graph(Graph),
 2034    rdf_graph_source_(Graph, SourceURL, _Modified).
 2035
 2036%!  rdf_source(?Source)
 2037%
 2038%   True if Source is a loaded source.
 2039%
 2040%   @deprecated     Use rdf_graph/1 or rdf_source/2.
 2041
 2042rdf_source(SourceURL) :-
 2043    rdf_source(_Graph, SourceURL).
 2044
 2045%!  rdf_make
 2046%
 2047%   Reload all loaded files that have been modified since the last
 2048%   time they were loaded.
 2049
 2050rdf_make :-
 2051    findall(Source-Graph, modified_graph(Source, Graph), Modified),
 2052    forall(member(Source-Graph, Modified),
 2053           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
 2054                 print_message(error, E))).
 2055
 2056modified_graph(SourceURL, Graph) :-
 2057    rdf_graph(Graph),
 2058    rdf_graph_source_(Graph, SourceURL, Modified),
 2059    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
 2060    Modified > 0.
 2061
 2062%!  rdf_graph_property(?Graph, ?Property) is nondet.
 2063%
 2064%   True when Property is a property of Graph.  Defined properties
 2065%   are:
 2066%
 2067%       * hash(Hash)
 2068%       Hash is the (MD5-)hash for the content of Graph.
 2069%       * modified(Boolean)
 2070%       True if the graph is modified since it was loaded or
 2071%       rdf_set_graph/2 was called with modified(false).
 2072%       * source(Source)
 2073%       The graph is loaded from the Source (a URL)
 2074%       * source_last_modified(?Time)
 2075%       Time is the last-modified timestamp of Source at the moment
 2076%       the graph was loaded from Source.
 2077%       * triples(Count)
 2078%       True when Count is the number of triples in Graph.
 2079%
 2080%    Additional graph properties can be added  by defining rules for
 2081%    the multifile predicate  property_of_graph/2.   Currently,  the
 2082%    following extensions are defined:
 2083%
 2084%       - library(semweb/rdf_persistency)
 2085%         - persistent(Boolean)
 2086%           Boolean is =true= if the graph is persistent.
 2087
 2088rdf_graph_property(Graph, Property) :-
 2089    rdf_graph(Graph),
 2090    property_of_graph(Property, Graph).
 2091
 2092:- multifile
 2093    property_of_graph/2. 2094
 2095property_of_graph(hash(Hash), Graph) :-
 2096    rdf_md5(Graph, Hash).
 2097property_of_graph(modified(Boolean), Graph) :-
 2098    rdf_graph_modified_(Graph, Boolean, _).
 2099property_of_graph(source(URL), Graph) :-
 2100    rdf_graph_source_(Graph, URL, _).
 2101property_of_graph(source_last_modified(Time), Graph) :-
 2102    rdf_graph_source_(Graph, _, Time),
 2103    Time > 0.0.
 2104property_of_graph(triples(Count), Graph) :-
 2105    rdf_graph_(Graph, Count).
 2106
 2107%!  rdf_set_graph(+Graph, +Property) is det.
 2108%
 2109%   Set properties of Graph.  Defined properties are:
 2110%
 2111%       * modified(false)
 2112%       Set the modified state of Graph to false.
 2113
 2114rdf_set_graph(Graph, modified(Modified)) :-
 2115    must_be(oneof([false]), Modified),
 2116    rdf_graph_clear_modified_(Graph).
 2117
 2118
 2119%!  save_cache(+DB, +Cache) is det.
 2120%
 2121%   Save triples belonging to DB in the file Cache.
 2122
 2123save_cache(DB, Cache) :-
 2124    current_prolog_flag(rdf_triple_format, Version),
 2125    setup_call_cleanup(
 2126        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
 2127        rdf_save_db_(CacheStream, DB, Version),
 2128        close(CacheStream)).
 2129
 2130%!  assert_triples(+Triples, +Source)
 2131%
 2132%   Assert a list of triples into the database. For security
 2133%   reasons we check we aren't inserting anything but nice RDF
 2134%   triples.
 2135
 2136assert_triples([], _).
 2137assert_triples([rdf(S,P,O)|T], DB) :-
 2138    !,
 2139    rdf_assert(S, P, O, DB),
 2140    assert_triples(T, DB).
 2141assert_triples([H|_], _) :-
 2142    throw(error(type_error(rdf_triple, H), _)).
 2143
 2144
 2145                 /*******************************
 2146                 *             RESET            *
 2147                 *******************************/
 2148
 2149%!  rdf_reset_db
 2150%
 2151%   Remove all triples from the RDF database and reset all its
 2152%   statistics.
 2153%
 2154%   @bug    This predicate checks for active queries, but this check is
 2155%           not properly synchronized and therefore the use of this
 2156%           predicate is unsafe in multi-threaded contexts. It is
 2157%           mainly used to run functionality tests that need to
 2158%           start with an empty database.
 2159
 2160rdf_reset_db :-
 2161    reset_gensym('_:genid'),
 2162    rdf_reset_db_.
 2163
 2164
 2165                 /*******************************
 2166                 *           SAVE RDF           *
 2167                 *******************************/
 2168
 2169%!  rdf_save(+Out) is det.
 2170%
 2171%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
 2172
 2173%!  rdf_save(+Out, :Options) is det.
 2174%
 2175%   Write RDF data as RDF/XML. Options is a list of one or more of
 2176%   the following options:
 2177%
 2178%           * graph(+Graph)
 2179%           Save only triples associated to the given named Graph.
 2180%
 2181%           * anon(Bool)
 2182%           If =false= (default =true=) do not save blank nodes that do
 2183%           not appear (indirectly) as object of a named resource.
 2184%
 2185%           * base_uri(URI)
 2186%           BaseURI used. If present, all URIs that can be
 2187%           represented relative to this base are written using
 2188%           their shorthand.  See also =write_xml_base= option.
 2189%
 2190%           * convert_typed_literal(:Convertor)
 2191%           Call Convertor(-Type, -Content, +RDFObject), providing
 2192%           the opposite for the convert_typed_literal option of
 2193%           the RDF parser.
 2194%
 2195%           * document_language(+Lang)
 2196%           Initial =|xml:lang|= saved with rdf:RDF element.
 2197%
 2198%           * encoding(Encoding)
 2199%           Encoding for the output.  Either utf8 or iso_latin_1.
 2200%
 2201%           * inline(+Bool)
 2202%           If =true= (default =false=), inline resources when
 2203%           encountered for the first time. Normally, only bnodes
 2204%           are handled this way.
 2205%
 2206%           * namespaces(+List)
 2207%           Explicitly specify saved namespace declarations. See
 2208%           rdf_save_header/2 option namespaces for details.
 2209%
 2210%           * sorted(+Boolean)
 2211%           If =true= (default =false=), emit subjects sorted on
 2212%           the full URI.  Useful to make file comparison easier.
 2213%
 2214%           * write_xml_base(Bool)
 2215%           If =false=, do _not_ include the =|xml:base|=
 2216%           declaration that is written normally when using the
 2217%           =base_uri= option.
 2218%
 2219%           * xml_attributes(+Bool)
 2220%           If =false= (default =true=), never use xml attributes to
 2221%           save plain literal attributes, i.e., always used an XML
 2222%           element as in =|<name>Joe</name>|=.
 2223%
 2224%   @param Out      Location to save the data.  This can also be a
 2225%                   file-url (=|file://path|=) or a stream wrapped
 2226%                   in a term stream(Out).
 2227%   @see rdf_save_db/1
 2228
 2229:- thread_local
 2230    named_anon/2,                   % +Resource, -Id
 2231    inlined/1.                      % +Resource
 2232
 2233rdf_save(File) :-
 2234    rdf_save2(File, []).
 2235
 2236rdf_save(Spec, M:Options0) :-
 2237    is_list(Options0),
 2238    !,
 2239    meta_options(save_meta_option, M:Options0, Options),
 2240    to_file(Spec, File),
 2241    rdf_save2(File, Options).
 2242rdf_save(Spec, _:DB) :-
 2243    atom(DB),                      % backward compatibility
 2244    !,
 2245    to_file(Spec, File),
 2246    rdf_save2(File, [graph(DB)]).
 2247
 2248save_meta_option(convert_typed_literal).
 2249
 2250to_file(URL, File) :-
 2251    atom(URL),
 2252    uri_file_name(URL, File),
 2253    !.
 2254to_file(File, File).
 2255
 2256rdf_save2(File, Options) :-
 2257    option(encoding(Encoding), Options, utf8),
 2258    valid_encoding(Encoding),
 2259    open_output(File, Encoding, Out, Close),
 2260    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
 2261    flag(rdf_db_saved_triples, OSavedTriples, 0),
 2262    call_cleanup(rdf_do_save(Out, Options),
 2263                 Reason,
 2264                 cleanup_save(Reason,
 2265                              File,
 2266                              OSavedSubjects,
 2267                              OSavedTriples,
 2268                              Close)).
 2269
 2270open_output(stream(Out), Encoding, Out, Cleanup) :-
 2271    !,
 2272    stream_property(Out, encoding(Old)),
 2273    (   (   Old == Encoding
 2274        ;   Old == wchar_t          % Internal encoding
 2275        )
 2276    ->  Cleanup = true
 2277    ;   set_stream(Out, encoding(Encoding)),
 2278        Cleanup = set_stream(Out, encoding(Old))
 2279    ).
 2280open_output(File, Encoding, Out,
 2281            close(Out)) :-
 2282    open(File, write, Out, [encoding(Encoding)]).
 2283
 2284valid_encoding(Enc) :-
 2285    (   xml_encoding_name(Enc, _)
 2286    ->  true
 2287    ;   throw(error(domain_error(encoding, Enc), _))
 2288    ).
 2289
 2290
 2291cleanup_save(Reason,
 2292             File,
 2293             OSavedSubjects,
 2294             OSavedTriples,
 2295             Close) :-
 2296    call(Close),
 2297    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
 2298    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
 2299    retractall(named_anon(_, _)),
 2300    retractall(inlined(_)),
 2301    (   Reason == exit
 2302    ->  print_message(informational,
 2303                      rdf(saved(File, SavedSubjects, SavedTriples)))
 2304    ;   format(user_error, 'Reason = ~w~n', [Reason])
 2305    ).
 2306
 2307rdf_do_save(Out, Options0) :-
 2308    rdf_save_header(Out, Options0, Options),
 2309    graph(Options, DB),
 2310    (   option(sorted(true), Options, false)
 2311    ->  (   var(DB)
 2312        ->  setof(Subject, rdf_subject(Subject), Subjects)
 2313        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2314            sort(SubjectList, Subjects)
 2315        ),
 2316        forall(member(Subject, Subjects),
 2317               rdf_save_non_anon_subject(Out, Subject, Options))
 2318    ;   forall(rdf_subject_in_graph(Subject, DB),
 2319               rdf_save_non_anon_subject(Out, Subject, Options))
 2320    ),
 2321    rdf_save_footer(Out),
 2322    !.                                  % dubious cut; without the
 2323                                        % cleanup handlers isn't called!?
 2324
 2325%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
 2326%
 2327%   True when Subject is a subject in the   graph  DB. If DB is unbound,
 2328%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
 2329%   enumerate all subjects and filter by graph or collect all triples of
 2330%   the graph and get the unique subjects.   The  first is attractive if
 2331%   the graph is big compared  to  the   DB,  also  because  it does not
 2332%   require memory, the second if the graph is small compared to the DB.
 2333
 2334rdf_subject_in_graph(Subject, DB) :-
 2335    var(DB),
 2336    !,
 2337    rdf_subject(Subject).
 2338rdf_subject_in_graph(Subject, DB) :-
 2339    rdf_statistics(triples(AllTriples)),
 2340    rdf_graph_property(DB, triples(DBTriples)),
 2341    DBTriples > AllTriples // 10,
 2342    !,
 2343    rdf_resource(Subject),
 2344    (   rdf(Subject, _, _, DB:_)
 2345    ->  true
 2346    ).
 2347rdf_subject_in_graph(Subject, DB) :-
 2348    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2349    list_to_set(SubjectList, Subjects),
 2350    member(Subject, Subjects).
 2351
 2352
 2353graph(Options0, DB) :-
 2354    strip_module(Options0, _, Options),
 2355    (   memberchk(graph(DB0), Options)
 2356    ->  DB = DB0
 2357    ;   memberchk(db(DB0), Options)
 2358    ->  DB = DB0
 2359    ;   true                            % leave unbound
 2360    ).
 2361
 2362
 2363%!  rdf_save_header(+Fd, +Options)
 2364%
 2365%   Save XML document header, doctype and open the RDF environment.
 2366%   This predicate also sets up the namespace notation.
 2367%
 2368%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
 2369%   opening the rdf:RDF element with appropriate namespace
 2370%   declarations. It uses the primitives from section 3.5 to
 2371%   generate the required namespaces and desired short-name. Options
 2372%   is one of:
 2373%
 2374%     * graph(+URI)
 2375%     Only search for namespaces used in triples that belong to the
 2376%     given named graph.
 2377%
 2378%     * namespaces(+List)
 2379%     Where List is a list of namespace abbreviations. With this
 2380%     option, the expensive search for all namespaces that may be
 2381%     used by your data is omitted. The namespaces =rdf= and =rdfs=
 2382%     are added to the provided List. If a namespace is not
 2383%     declared, the resource is emitted in non-abbreviated form.
 2384
 2385rdf_save_header(Out, Options) :-
 2386    rdf_save_header(Out, Options, _).
 2387
 2388rdf_save_header(Out, Options, OptionsOut) :-
 2389    is_list(Options),
 2390    !,
 2391    option(encoding(Enc), Options, utf8),
 2392    xml_encoding(Enc, Encoding),
 2393    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
 2394    format(Out, '<!DOCTYPE rdf:RDF [', []),
 2395    header_namespaces(Options, NSIdList),
 2396    nsmap(NSIdList, NsMap),
 2397    append(Options, [nsmap(NsMap)], OptionsOut),
 2398    forall(member(Id=URI, NsMap),
 2399           (   xml_quote_attribute(URI, NSText0, Enc),
 2400               xml_escape_parameter_entity(NSText0, NSText),
 2401               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
 2402           )),
 2403    format(Out, '~N]>~n~n', []),
 2404    format(Out, '<rdf:RDF', []),
 2405    (   member(Id, NSIdList),
 2406        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
 2407        fail
 2408    ;   true
 2409    ),
 2410    (   option(base_uri(Base), Options),
 2411        option(write_xml_base(true), Options, true)
 2412    ->  xml_quote_attribute(Base, BaseText, Enc),
 2413        format(Out, '~N    xml:base="~w"~n', [BaseText])
 2414    ;   true
 2415    ),
 2416    (   memberchk(document_language(Lang), Options)
 2417    ->  format(Out, '~N    xml:lang="~w"', [Lang])
 2418    ;   true
 2419    ),
 2420    format(Out, '>~n', []).
 2421rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
 2422    atom(FileRef),
 2423    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
 2424
 2425xml_encoding(Enc, Encoding) :-
 2426    (   xml_encoding_name(Enc, Encoding)
 2427    ->  true
 2428    ;   throw(error(domain_error(rdf_encoding, Enc), _))
 2429    ).
 2430
 2431xml_encoding_name(ascii,       'US-ASCII').
 2432xml_encoding_name(iso_latin_1, 'ISO-8859-1').
 2433xml_encoding_name(utf8,        'UTF-8').
 2434
 2435%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
 2436%
 2437%   Create a namespace-map that is compatible to xml_write/2
 2438%   for dealing with XML-Literals
 2439
 2440nsmap([], []).
 2441nsmap([Id|T0], [Id=URI|T]) :-
 2442    ns(Id, URI),
 2443    nsmap(T0, T).
 2444
 2445%!  xml_escape_parameter_entity(+In, -Out) is det.
 2446%
 2447%   Escape % as &#37; for entity declarations.
 2448
 2449xml_escape_parameter_entity(In, Out) :-
 2450    sub_atom(In, _, _, _, '%'),
 2451    !,
 2452    atom_codes(In, Codes),
 2453    phrase(escape_parent(Codes), OutCodes),
 2454    atom_codes(Out, OutCodes).
 2455xml_escape_parameter_entity(In, In).
 2456
 2457escape_parent([]) --> [].
 2458escape_parent([H|T]) -->
 2459    (   { H == 37 }
 2460    ->  "&#37;"
 2461    ;   [H]
 2462    ),
 2463    escape_parent(T).
 2464
 2465
 2466%!  header_namespaces(Options, -List)
 2467%
 2468%   Get namespaces we will define as entities
 2469
 2470header_namespaces(Options, List) :-
 2471    memberchk(namespaces(NSL0), Options),
 2472    !,
 2473    sort([rdf,rdfs|NSL0], List).
 2474header_namespaces(Options, List) :-
 2475    graph(Options, DB),
 2476    used_namespace_entities(List, DB).
 2477
 2478%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
 2479%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
 2480%
 2481%   List is a sorted list of  prefixes (namepaces) in Graph. Options
 2482%   defined are:
 2483%
 2484%       * filter(:Filter)
 2485%       optional Filter argument is used to filter the results. It
 2486%       is called with 3 additional arguments:
 2487%
 2488%           ==
 2489%           call(Filter, Where, Prefix, URI)
 2490%           ==
 2491%
 2492%       The Where argument gives the location of the prefix ans is
 2493%       one of =subject=, =predicate=, =object= or =type=. The
 2494%       Prefix argument is the potentially new prefix and URI is
 2495%       the full URI that is being processed.
 2496%
 2497%       * expand(:Goal)
 2498%       Hook to generate the graph.  Called using
 2499%
 2500%           ==
 2501%           call(Goal,S,P,O,Graph)
 2502%           ==
 2503%
 2504%       * min_count(+Count)
 2505%       Only include prefixes that appear at least N times.  Default
 2506%       is 1. Declared prefixes are always returned if found at
 2507%       least one time.
 2508%
 2509%       * get_prefix(:GetPrefix)
 2510%       Predicate to extract the candidate prefix from an IRI.  Default
 2511%       is iri_xml_namespace/2.
 2512
 2513
 2514:- thread_local
 2515    graph_prefix/3. 2516:- meta_predicate
 2517    rdf_graph_prefixes(?, -, :). 2518
 2519rdf_graph_prefixes(Graph, List) :-
 2520    rdf_graph_prefixes(Graph, List, []).
 2521
 2522rdf_graph_prefixes(Graph, List, M:QOptions) :-
 2523    is_list(QOptions),
 2524    !,
 2525    meta_options(is_meta, M:QOptions, Options),
 2526    option(filter(Filter), Options, true),
 2527    option(expand(Expand), Options, rdf_db),
 2528    option(min_count(MinCount), Options, 1),
 2529    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
 2530    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
 2531                 retractall(graph_prefix(_,_,_))),
 2532    sort(Prefixes, List).
 2533rdf_graph_prefixes(Graph, List, M:Filter) :-
 2534    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
 2535
 2536is_meta(filter).
 2537is_meta(expand).
 2538is_meta(get_prefix).
 2539
 2540
 2541prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
 2542    (   call(Expand, S, P, O, Graph),
 2543        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
 2544        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
 2545        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
 2546        fail
 2547    ;   true
 2548    ),
 2549    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
 2550
 2551add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
 2552    \+ rdf_is_bnode(S),
 2553    call(GetPrefix, S, Full),
 2554    Full \== '',
 2555    !,
 2556    (   graph_prefix(Full, MinCount, _)
 2557    ->  true
 2558    ;   Filter == true
 2559    ->  add_ns(Full, Context)
 2560    ;   call(Filter, Where, Full, S)
 2561    ->  add_ns(Full, Context)
 2562    ;   true
 2563    ).
 2564add_ns(_, _, _, _, _, _).
 2565
 2566add_ns(Full, Context) :-
 2567    graph_prefix(Full, _, Contexts),
 2568    memberchk(Context, Contexts),
 2569    !.
 2570add_ns(Full, Context) :-
 2571    retract(graph_prefix(Full, C0, Contexts)),
 2572    !,
 2573    C1 is C0+1,
 2574    asserta(graph_prefix(Full, C1, [Context|Contexts])).
 2575add_ns(Full, _) :-
 2576    ns(_, Full),
 2577    !,
 2578    asserta(graph_prefix(Full, _, _)).
 2579add_ns(Full, Context) :-
 2580    asserta(graph_prefix(Full, 1, [Context])).
 2581
 2582
 2583add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
 2584    atom(O),
 2585    !,
 2586    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
 2587add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
 2588    atom(Type),
 2589    !,
 2590    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
 2591add_ns_obj(_, _, _, _, _).
 2592
 2593
 2594%!  used_namespace_entities(-List, ?Graph) is det.
 2595%
 2596%   Return the namespace aliases that are actually used in Graph. In
 2597%   addition, this predicate creates ns<N>   aliases  for namespaces
 2598%   used in predicates because RDF/XML cannot write predicates other
 2599%   than as an XML name.
 2600
 2601used_namespace_entities(List, Graph) :-
 2602    decl_used_predicate_ns(Graph),
 2603    used_namespaces(List, Graph).
 2604
 2605used_namespaces(List, DB) :-
 2606    rdf_graph_prefixes(DB, FullList),
 2607    ns_abbreviations(FullList, List0),
 2608    sort([rdf|List0], List).
 2609
 2610ns_abbreviations([], []).
 2611ns_abbreviations([H0|T0], [H|T]) :-
 2612    ns(H, H0),
 2613    !,
 2614    ns_abbreviations(T0, T).
 2615ns_abbreviations([_|T0], T) :-
 2616    ns_abbreviations(T0, T).
 2617
 2618
 2619/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 2620For every URL used as a predicate  we   *MUST*  define a namespace as we
 2621cannot use names holding /, :, etc. as XML identifiers.
 2622- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 2623
 2624:- thread_local
 2625    predicate_ns/2. 2626
 2627decl_used_predicate_ns(DB) :-
 2628    retractall(predicate_ns(_,_)),
 2629    (   rdf_current_predicate(P, DB),
 2630        decl_predicate_ns(P),
 2631        fail
 2632    ;   true
 2633    ).
 2634
 2635decl_predicate_ns(Pred) :-
 2636    predicate_ns(Pred, _),
 2637    !.
 2638decl_predicate_ns(Pred) :-
 2639    rdf_global_id(NS:Local, Pred),
 2640    xml_name(Local),
 2641    !,
 2642    assert(predicate_ns(Pred, NS)).
 2643decl_predicate_ns(Pred) :-
 2644    atom_codes(Pred, Codes),
 2645    append(NSCodes, LocalCodes, Codes),
 2646    xml_codes(LocalCodes),
 2647    !,
 2648    (   NSCodes \== []
 2649    ->  atom_codes(NS, NSCodes),
 2650        (   ns(Id, NS)
 2651        ->  assert(predicate_ns(Pred, Id))
 2652        ;   between(1, infinite, N),
 2653            atom_concat(ns, N, Id),
 2654            \+ ns(Id, _)
 2655        ->  rdf_register_ns(Id, NS),
 2656            print_message(informational,
 2657                          rdf(using_namespace(Id, NS)))
 2658        ),
 2659        assert(predicate_ns(Pred, Id))
 2660    ;   assert(predicate_ns(Pred, -)) % no namespace used
 2661    ).
 2662
 2663xml_codes([]).
 2664xml_codes([H|T]) :-
 2665    xml_code(H),
 2666    xml_codes(T).
 2667
 2668xml_code(X) :-
 2669    code_type(X, csym),
 2670    !.
 2671xml_code(0'-).                          % Match 0'-
 2672
 2673
 2674%!  rdf_save_footer(Out:stream) is det.
 2675%
 2676%   Finish XML generation and write the document footer.
 2677%
 2678%   @see rdf_save_header/2, rdf_save_subject/3.
 2679
 2680rdf_save_footer(Out) :-
 2681    retractall(named_anon(_, _)),
 2682    retractall(inlined(_)),
 2683    format(Out, '</rdf:RDF>~n', []).
 2684
 2685%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
 2686%
 2687%   Save an object.  Anonymous objects not saved if anon(false)
 2688%   is present in the Options list.
 2689
 2690rdf_save_non_anon_subject(_Out, Subject, Options) :-
 2691    rdf_is_bnode(Subject),
 2692    (   memberchk(anon(false), Options)
 2693    ;   graph(Options, DB),
 2694        rdf_db(_, _, Subject, DB)
 2695    ),
 2696    !.
 2697rdf_save_non_anon_subject(Out, Subject, Options) :-
 2698    rdf_save_subject(Out, Subject, Options),
 2699    flag(rdf_db_saved_subjects, X, X+1).
 2700
 2701
 2702%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
 2703%
 2704%   Save the triples associated to Subject to Out. Options:
 2705%
 2706%     * graph(+Graph)
 2707%     Only save properties from Graph.
 2708%     * base_uri(+URI)
 2709%     * convert_typed_literal(:Goal)
 2710%     * document_language(+XMLLang)
 2711%
 2712%   @see rdf_save/2 for a description of these options.
 2713
 2714rdf_save_subject(Out, Subject, Options) :-
 2715    is_list(Options),
 2716    !,
 2717    option(base_uri(BaseURI), Options, '-'),
 2718    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
 2719    ->  format(Out, '~n', [])
 2720    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
 2721    ).
 2722rdf_save_subject(Out, Subject, DB) :-
 2723    (   var(DB)
 2724    ->  rdf_save_subject(Out, Subject, [])
 2725    ;   rdf_save_subject(Out, Subject, [graph(DB)])
 2726    ).
 2727
 2728
 2729%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2730%!                   +Indent:int, +Options) is det.
 2731%
 2732%   Save properties of Subject.
 2733%
 2734%   @param Indent   Current indentation
 2735
 2736rdf_save_subject(_, Subject, _, _, _) :-
 2737    inlined(Subject),
 2738    !.
 2739rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2740    do_save_subject(Out, Subject, BaseURI, Indent, Options).
 2741
 2742do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2743    graph(Options, DB),
 2744    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
 2745    sort(Atts0, Atts),              % remove duplicates
 2746    length(Atts, L),
 2747    (   length(Atts0, L0),
 2748        Del is L0-L,
 2749        Del > 0
 2750    ->  print_message(informational,
 2751                      rdf(save_removed_duplicates(Del, Subject)))
 2752    ;   true
 2753    ),
 2754    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
 2755    flag(rdf_db_saved_triples, X, X+L).
 2756
 2757rdf_db(Subject, Pred, Object, DB) :-
 2758    var(DB),
 2759    !,
 2760    rdf(Subject, Pred, Object).
 2761rdf_db(Subject, Pred, Object, DB) :-
 2762    rdf(Subject, Pred, Object, DB:_).
 2763
 2764%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2765%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
 2766%
 2767%   Save triples defined by Atts on Subject.
 2768
 2769rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2770    rdf_equal(rdf:type, RdfType),
 2771    select(RdfType=Type, Atts, Atts1),
 2772    \+ rdf_is_bnode(Type),
 2773    rdf_id(Type, BaseURI, TypeId),
 2774    xml_is_name(TypeId),
 2775    !,
 2776    format(Out, '~*|<', [Indent]),
 2777    rdf_write_id(Out, TypeId),
 2778    save_about(Out, BaseURI, Subject, Options),
 2779    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
 2780rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2781    format(Out, '~*|<rdf:Description', [Indent]),
 2782    save_about(Out, BaseURI, Subject, Options),
 2783    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
 2784
 2785xml_is_name(_NS:Atom) :-
 2786    !,
 2787    xml_name(Atom).
 2788xml_is_name(Atom) :-
 2789    xml_name(Atom).
 2790
 2791%!  save_about(+Out, +BaseURI, +Subject, +Options) is det.
 2792%
 2793%   Save the rdf:about. If Subject is a  blank node, save the nodeID
 2794%   if any.
 2795
 2796save_about(Out, _BaseURI, Subject, _Options) :-
 2797    rdf_is_bnode(Subject),
 2798    !,
 2799    (   named_anon(Subject, NodeID)
 2800    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
 2801    ;   true
 2802    ).
 2803save_about(Out, BaseURI, Subject, Options) :-
 2804    option(encoding(Encoding), Options, utf8),
 2805    rdf_value(Subject, BaseURI, QSubject, Encoding),
 2806    format(Out, ' rdf:about="~w"', [QSubject]).
 2807
 2808%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
 2809%
 2810%   Save the attributes.  Short literal attributes are saved in the
 2811%   tag.  Others as the content of the description element.  The
 2812%   begin tag has already been filled.
 2813
 2814save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
 2815    split_attributes(Atts, InTag, InBody, Options),
 2816    SubIndent is Indent + 2,
 2817    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
 2818    (   InBody == []
 2819    ->  format(Out, '/>~n', [])
 2820    ;   format(Out, '>~n', []),
 2821        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
 2822        format(Out, '~N~*|</', [Indent]),
 2823        rdf_write_id(Out, Element),
 2824        format(Out, '>~n', [])
 2825    ).
 2826
 2827%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
 2828%
 2829%   Split attribute (Name=Value) list into attributes for the head
 2830%   and body. Attributes can only be in the head if they are literal
 2831%   and appear only one time in the attribute list.
 2832
 2833split_attributes(Atts, [], Atts, Options) :-
 2834    option(xml_attributes(false), Options),
 2835    !.
 2836split_attributes(Atts, HeadAttr, BodyAttr, _) :-
 2837    duplicate_attributes(Atts, Dupls, Singles),
 2838    simple_literal_attributes(Singles, HeadAttr, Rest),
 2839    append(Dupls, Rest, BodyAttr).
 2840
 2841%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
 2842%
 2843%   Extract attributes that appear more than once as we cannot
 2844%   duplicate an attribute in the head according to the XML rules.
 2845
 2846duplicate_attributes([], [], []).
 2847duplicate_attributes([H|T], Dupls, Singles) :-
 2848    H = (Name=_),
 2849    named_attributes(Name, T, D, R),
 2850    D \== [],
 2851    append([H|D], Dupls2, Dupls),
 2852    !,
 2853    duplicate_attributes(R, Dupls2, Singles).
 2854duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
 2855    duplicate_attributes(T, Dupls2, Singles).
 2856
 2857named_attributes(_, [], [], []) :- !.
 2858named_attributes(Name, [H|T], D, R) :-
 2859    (   H = (Name=_)
 2860    ->  D = [H|DT],
 2861        named_attributes(Name, T, DT, R)
 2862    ;   R = [H|RT],
 2863        named_attributes(Name, T, D, RT)
 2864    ).
 2865
 2866%!  simple_literal_attributes(+Attributes, -Inline, -Body)
 2867%
 2868%   Split attributes for (literal) attributes to be used in the
 2869%   begin-tag and ones that have to go into the body of the description.
 2870
 2871simple_literal_attributes([], [], []).
 2872simple_literal_attributes([H|TA], [H|TI], B) :-
 2873    in_tag_attribute(H),
 2874    !,
 2875    simple_literal_attributes(TA, TI, B).
 2876simple_literal_attributes([H|TA], I, [H|TB]) :-
 2877    simple_literal_attributes(TA, I, TB).
 2878
 2879in_tag_attribute(_=literal(Text)) :-
 2880    atom(Text),                     % may not have lang qualifier
 2881    atom_length(Text, Len),
 2882    Len < 60.
 2883
 2884%!  save_attributes2(+List, +BaseURI, +TagOrBody, +Stream, +Indent, +Options)
 2885%
 2886%   Save a list of attributes.
 2887
 2888save_attributes2([], _, _, _, _, _).
 2889save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
 2890    save_attribute(Where, H, BaseURI, Out, Indent, Options),
 2891    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
 2892
 2893save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, Options) :-
 2894    AttIndent is Indent + 2,
 2895    rdf_id(Name, BaseURI, NameText),
 2896    option(encoding(Encoding), Options, utf8),
 2897    xml_quote_attribute(Value, QVal, Encoding),
 2898    format(Out, '~N~*|', [AttIndent]),
 2899    rdf_write_id(Out, NameText),
 2900    format(Out, '="~w"', [QVal]).
 2901save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
 2902    !,
 2903    rdf_id(Name, BaseURI, NameText),
 2904    (   memberchk(convert_typed_literal(Converter), Options),
 2905        call(Converter, Type, Content, Literal0)
 2906    ->  Literal = type(Type, Content)
 2907    ;   Literal = Literal0
 2908    ),
 2909    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
 2910save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2911    rdf_is_bnode(Value),
 2912    !,
 2913    rdf_id(Name, BaseURI, NameText),
 2914    format(Out, '~N~*|<', [Indent]),
 2915    rdf_write_id(Out, NameText),
 2916    (   named_anon(Value, NodeID)
 2917    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
 2918    ;   (   rdf(S1, Name, Value),
 2919            rdf(S2, P2, Value),
 2920            (S1 \== S2 ; Name \== P2)
 2921        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
 2922            atom_concat('bn', N, NodeID),
 2923            assertz(named_anon(Value, NodeID))
 2924        ;   true
 2925        ),
 2926        SubIndent is Indent + 2,
 2927        (   rdf_collection(Value)
 2928        ->  save_about(Out, BaseURI, Value, Options),
 2929            format(Out, ' rdf:parseType="Collection">~n', []),
 2930            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2931        ;   format(Out, '>~n', []),
 2932            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2933        ),
 2934        format(Out, '~N~*|</', [Indent]),
 2935        rdf_write_id(Out, NameText),
 2936        format(Out, '>~n', [])
 2937    ).
 2938save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2939    option(inline(true), Options),
 2940    has_attributes(Value, Options),
 2941    \+ inlined(Value),
 2942    !,
 2943    assertz(inlined(Value)),
 2944    rdf_id(Name, BaseURI, NameText),
 2945    format(Out, '~N~*|<', [Indent]),
 2946    rdf_write_id(Out, NameText),
 2947    SubIndent is Indent + 2,
 2948    (   rdf_collection(Value)
 2949    ->  save_about(Out, BaseURI, Value, Options),
 2950        format(Out, ' rdf:parseType="Collection">~n', []),
 2951        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2952    ;   format(Out, '>~n', []),
 2953        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2954    ),
 2955    format(Out, '~N~*|</', [Indent]),
 2956    rdf_write_id(Out, NameText),
 2957    format(Out, '>~n', []).
 2958save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2959    option(encoding(Encoding), Options, utf8),
 2960    rdf_value(Value, BaseURI, QVal, Encoding),
 2961    rdf_id(Name, BaseURI, NameText),
 2962    format(Out, '~N~*|<', [Indent]),
 2963    rdf_write_id(Out, NameText),
 2964    format(Out, ' rdf:resource="~w"/>', [QVal]).
 2965
 2966has_attributes(URI, Options) :-
 2967    graph(Options, DB),
 2968    rdf_db(URI, _, _, DB),
 2969    !.
 2970
 2971%!  save_body_literal(+Literal, +NameText, +BaseURI,
 2972%!                    +Out, +Indent, +Options).
 2973
 2974save_body_literal(lang(Lang, Value),
 2975                  NameText, BaseURI, Out, Indent, Options) :-
 2976    !,
 2977    format(Out, '~N~*|<', [Indent]),
 2978    rdf_write_id(Out, NameText),
 2979    (   memberchk(document_language(Lang), Options)
 2980    ->  write(Out, '>')
 2981    ;   rdf_id(Lang, BaseURI, LangText),
 2982        format(Out, ' xml:lang="~w">', [LangText])
 2983    ),
 2984    save_attribute_value(Value, Out, Options),
 2985    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2986save_body_literal(type(Type, DOM),
 2987                  NameText, _BaseURI, Out, Indent, Options) :-
 2988    rdf_equal(Type, rdf:'XMLLiteral'),
 2989    !,
 2990    (   atom(DOM)
 2991    ->  format(Out, '~N~*|<', [Indent]),
 2992        rdf_write_id(Out, NameText),
 2993        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
 2994        rdf_write_id(Out, NameText), write(Out, '>')
 2995    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
 2996    ).
 2997save_body_literal(type(Type, Value),
 2998                  NameText, BaseURI, Out, Indent, Options) :-
 2999    !,
 3000    format(Out, '~N~*|<', [Indent]),
 3001    rdf_write_id(Out, NameText),
 3002    option(encoding(Encoding), Options, utf8),
 3003    rdf_value(Type, BaseURI, QVal, Encoding),
 3004    format(Out, ' rdf:datatype="~w">', [QVal]),
 3005    save_attribute_value(Value, Out, Options),
 3006    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3007save_body_literal(Literal,
 3008                  NameText, _, Out, Indent, Options) :-
 3009    atomic(Literal),
 3010    !,
 3011    format(Out, '~N~*|<', [Indent]),
 3012    rdf_write_id(Out, NameText),
 3013    write(Out, '>'),
 3014    save_attribute_value(Literal, Out, Options),
 3015    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3016save_body_literal(DOM,
 3017                  NameText, BaseURI, Out, Indent, Options) :-
 3018    rdf_equal(Type, rdf:'XMLLiteral'),
 3019    save_body_literal(type(Type, DOM),
 3020                      NameText, BaseURI, Out, Indent, Options).
 3021
 3022save_attribute_value(Value, Out, Options) :-  % strings
 3023    (	atom(Value)
 3024    ;	string(Value)
 3025    ),
 3026    !,
 3027    option(encoding(Encoding), Options, utf8),
 3028    xml_quote_cdata(Value, QVal, Encoding),
 3029    write(Out, QVal).
 3030save_attribute_value(Value, Out, _Options) :-  % numbers
 3031    number(Value),
 3032    !,
 3033    writeq(Out, Value).             % quoted: preserve floats
 3034save_attribute_value(Value, _Out, _Options) :-
 3035    throw(error(save_attribute_value(Value), _)).
 3036
 3037%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
 3038%
 3039%   Save an XMLLiteral value. We already emitted
 3040%
 3041%           ==
 3042%           <prop parseType="literal"
 3043%           ==
 3044%
 3045%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
 3046%   namespaces used in the DOM. The   namespaces in the rdf document
 3047%   are in the nsmap-option of Options.
 3048
 3049save_xml_literal(DOM, Attr, Out, Indent, Options) :-
 3050    xml_is_dom(DOM),
 3051    !,
 3052    memberchk(nsmap(NsMap), Options),
 3053    id_to_atom(Attr, Atom),
 3054    xml_write(Out,
 3055              element(Atom, ['rdf:parseType'='Literal'], DOM),
 3056              [ header(false),
 3057                indent(Indent),
 3058                nsmap(NsMap)
 3059              ]).
 3060save_xml_literal(NoDOM, _, _, _, _) :-
 3061    must_be(xml_dom, NoDOM).
 3062
 3063id_to_atom(NS:Local, Atom) :-
 3064    !,
 3065    atomic_list_concat([NS,Local], :, Atom).
 3066id_to_atom(ID, ID).
 3067
 3068
 3069%!  rdf_collection(+URI) is semidet.
 3070%
 3071%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
 3072%   parseType=collection syntax. This means it is   a linked list of
 3073%   bnode-cells with a rdf:first that is   a  resource, optionally a
 3074%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
 3075
 3076:- rdf_meta
 3077    rdf_collection(r),
 3078    collection_p(r,r). 3079
 3080rdf_collection(rdf:nil) :- !.
 3081rdf_collection(Cell) :-
 3082    rdf_is_bnode(Cell),
 3083    findall(F, rdf(Cell, rdf:first, F), [_]),
 3084    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
 3085    forall(rdf(Cell, P, V),
 3086           collection_p(P, V)),
 3087    rdf_collection(Rest).
 3088
 3089collection_p(rdf:first, V) :- atom(V).
 3090collection_p(rdf:rest, _).
 3091collection_p(rdf:type, rdf:'List').
 3092
 3093
 3094%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
 3095
 3096rdf_save_list(_, List, _, _, _) :-
 3097    rdf_equal(List, rdf:nil),
 3098    !.
 3099rdf_save_list(Out, List, BaseURI, Indent, Options) :-
 3100    rdf_has(List, rdf:first, First),
 3101    (   rdf_is_bnode(First)
 3102    ->  nl(Out),
 3103        rdf_save_subject(Out, First, BaseURI, Indent, Options)
 3104    ;   option(encoding(Encoding), Options, utf8),
 3105        rdf_value(First, BaseURI, QVal, Encoding),
 3106        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
 3107               [Indent, QVal])
 3108    ),
 3109    flag(rdf_db_saved_triples, X, X+3),
 3110    (   rdf_has(List, rdf:rest, List2),
 3111        \+ rdf_equal(List2, rdf:nil)
 3112    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
 3113    ;   true
 3114    ).
 3115
 3116
 3117%!  rdf_id(+Resource, +BaseURI, -NSLocal)
 3118%
 3119%   Generate a NS:Local  name  for   Resource  given  the  indicated
 3120%   default namespace. This call is used for elements.
 3121
 3122rdf_id(Id, BaseURI, Local) :-
 3123    assertion(atom(BaseURI)),
 3124    atom_concat(BaseURI, Local, Id),
 3125    sub_atom(Local, 0, 1, _, #),
 3126    !.
 3127rdf_id(Id, _, NS:Local) :-
 3128    iri_xml_namespace(Id, Full, Local),
 3129    ns(NS, Full),
 3130    !.
 3131rdf_id(Id, _, NS:Local) :-
 3132    ns(NS, Full),
 3133    Full \== '',
 3134    atom_concat(Full, Local, Id),
 3135    !.
 3136rdf_id(Id, _, Id).
 3137
 3138
 3139%!  rdf_write_id(+Out, +NSLocal) is det.
 3140%
 3141%   Write an identifier. We cannot use native write on it as both NS
 3142%   and Local can be operators.
 3143
 3144rdf_write_id(Out, NS:Local) :-
 3145    !,
 3146    format(Out, '~w:~w', [NS, Local]).
 3147rdf_write_id(Out, Atom) :-
 3148    write(Out, Atom).
 3149
 3150%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
 3151%
 3152%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
 3153%   specification, a URI reference is  UNICODE string not containing
 3154%   control sequences, represented as  UTF-8   and  then  as escaped
 3155%   US-ASCII.
 3156
 3157rdf_value(Base, Base, '', _) :- !.
 3158rdf_value(V, Base, Text, Encoding) :-
 3159    atom_concat(Base, Local, V),
 3160    sub_atom(Local, 0, _, _, #),
 3161    !,
 3162    xml_quote_attribute(Local, Text, Encoding).
 3163rdf_value(V, _, Text, Encoding) :-
 3164    ns(NS, Full),
 3165    atom_concat(Full, Local, V),
 3166    xml_is_name(Local),
 3167    !,
 3168    xml_quote_attribute(Local, QLocal, Encoding),
 3169    atomic_list_concat(['&', NS, (';'), QLocal], Text).
 3170rdf_value(V, _, Q, Encoding) :-
 3171    xml_quote_attribute(V, Q, Encoding).
 3172
 3173
 3174                 /*******************************
 3175                 *       MATCH AND COMPARE      *
 3176                 *******************************/
 3177
 3178%!  rdf_compare(-Dif, +Object1, +Object2) is det.
 3179%
 3180%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
 3181%   ordering, we define a complete ordering   of terms. The ordering
 3182%   is defines as:
 3183%
 3184%     - Blank nodes < IRIs < Literals
 3185%     - Numeric literals < other literals
 3186%     - Numeric literals are compared by value and then by type,
 3187%       where Integer < Decimal < Double
 3188%     - Other literals are compare lexically, case insensitive.
 3189%       If equal, uppercase precedes lowercase.  If still equal,
 3190%       the types are compared lexically.
 3191
 3192%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
 3193%
 3194%   True if Label matches Pattern according to   How.  How is one of
 3195%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
 3196%   compatibility, `exact` is a synonym for `icase`.
 3197
 3198
 3199                 /*******************************
 3200                 *      DEPRECATED MATERIAL     *
 3201                 *******************************/
 3202
 3203%!  rdf_split_url(+Prefix, +Local, -URL) is det.
 3204%!  rdf_split_url(-Prefix, -Local, +URL) is det.
 3205%
 3206%   Split/join a URL.  This functionality is moved to library(sgml).
 3207%
 3208%   @deprecated Use iri_xml_namespace/3. Note that the argument
 3209%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
 3210
 3211rdf_split_url(Prefix, Local, URL) :-
 3212    atomic(URL),
 3213    !,
 3214    iri_xml_namespace(URL, Prefix, Local).
 3215rdf_split_url(Prefix, Local, URL) :-
 3216    atom_concat(Prefix, Local, URL).
 3217
 3218%!  rdf_url_namespace(+URL, -Namespace)
 3219%
 3220%   Namespace is the namespace of URL.
 3221%
 3222%   @deprecated Use iri_xml_namespace/2
 3223
 3224rdf_url_namespace(URL, Prefix) :-
 3225    iri_xml_namespace(URL, Prefix).
 3226
 3227
 3228                 /*******************************
 3229                 *            LITERALS          *
 3230                 *******************************/
 3231
 3232%!  rdf_new_literal_map(-Map) is det.
 3233%
 3234%   Create a new literal map, returning an opaque handle.
 3235
 3236%!  rdf_destroy_literal_map(+Map) is det.
 3237%
 3238%   Destroy a literal map. After this call,   further use of the Map
 3239%   handle is illegal. Additional synchronisation  is needed if maps
 3240%   that are shared between threads are   destroyed to guarantee the
 3241%   handle    is    no    longer    used.    In    some    scenarios
 3242%   rdf_reset_literal_map/1 provides a safe alternative.
 3243
 3244%!  rdf_reset_literal_map(+Map) is det.
 3245%
 3246%   Delete all content from the literal map.
 3247
 3248%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
 3249%
 3250%   Add a relation between  Key  and  Value   to  the  map.  If this
 3251%   relation already exists no action is performed.
 3252
 3253%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
 3254%
 3255%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
 3256%   Map, unify KeyCount with the number of  keys in Map. This serves
 3257%   two purposes. Derived maps, such as  the stem and metaphone maps
 3258%   need to know about new  keys   and  it avoids additional foreign
 3259%   calls for doing the progress in rdf_litindex.pl.
 3260
 3261%!  rdf_delete_literal_map(+Map, +Key) is det.
 3262%
 3263%   Delete Key and all associated values from the map.
 3264
 3265%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
 3266%
 3267%   Delete the association between Key and Value from the map.
 3268
 3269%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
 3270%
 3271%   Unify ValueList with an ordered set  of values associated to all
 3272%   keys from KeyList. Each key in  KeyList   is  either an atom, an
 3273%   integer or a term not(Key).  If   not-terms  are provided, there
 3274%   must be at least one positive keywords. The negations are tested
 3275%   after establishing the positive matches.
 3276
 3277%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
 3278%
 3279%   Realises various queries on the key-set:
 3280%
 3281%     * all
 3282%
 3283%     Unify Answer with an ordered list of all keys.
 3284%     * key(+Key)
 3285%
 3286%     Succeeds if Key is a key in the map and unify Answer with the
 3287%     number of values associated with the key. This provides a fast
 3288%     test of existence without fetching the possibly large
 3289%     associated value set as with rdf_find_literal_map/3.
 3290%
 3291%     * prefix(+Prefix)
 3292%     Unify Answer with an ordered set of all keys that have the
 3293%     given prefix. See section 3.1 for details on prefix matching.
 3294%     Prefix must be an atom. This call is intended for
 3295%     auto-completion in user interfaces.
 3296%
 3297%     * ge(+Min)
 3298%     Unify Answer with all keys that are larger or equal to the
 3299%     integer Min.
 3300%
 3301%     * le(+Max)
 3302%     Unify Answer with all keys that are smaller or equal to the integer
 3303%     Max.
 3304%
 3305%     * between(+Min, +Max) Unify
 3306%     Answer with all keys between Min and Max (including).
 3307
 3308%!  rdf_statistics_literal_map(+Map, -KeyValue)
 3309%
 3310%   Query some statistics of the map. Provides KeyValue are:
 3311%
 3312%     * size(-Keys, -Relations)
 3313%     Unify Keys with the total key-count of the index and Relation
 3314%     with the total Key-Value count.
 3315
 3316
 3317
 3318                 /*******************************
 3319                 *             MISC             *
 3320                 *******************************/
 3321
 3322%!  rdf_version(-Version) is det.
 3323%
 3324%   True when Version is the numerical version-id of this library.
 3325%   The version is computed as
 3326%
 3327%           Major*10000 + Minor*100 + Patch.
 3328
 3329%!  rdf_set(+Term) is det.
 3330%
 3331%   Set properties of the RDF store.  Currently defines:
 3332%
 3333%     * hash(+Hash, +Parameter, +Value)
 3334%     Set properties for a triple index.  Hash is one of =s=,
 3335%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
 3336%     is one of:
 3337%
 3338%       - size
 3339%       Value defines the number of entries in the hash-table.
 3340%       Value is rounded _down_ to a power of 2.  After setting
 3341%       the size explicitly, auto-sizing for this table is
 3342%       disabled.  Setting the size smaller than the current
 3343%       size results in a =permission_error= exception.
 3344%
 3345%       - average_chain_len
 3346%       Set maximum average collision number for the hash.
 3347%
 3348%       - optimize_threshold
 3349%       Related to resizing hash-tables.  If 0, all triples are
 3350%       moved to the new size by the garbage collector.  If more
 3351%       then zero, those of the last Value resize steps remain at
 3352%       their current location.  Leaving cells at their current
 3353%       location reduces memory fragmentation and slows down
 3354%       access.
 3355
 3356%!  rdf_md5(+Graph, -MD5) is det.
 3357%
 3358%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
 3359%   digest itself is represented as an   atom holding a 32-character
 3360%   hexadecimal   string.   The   library   maintains   the   digest
 3361%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
 3362%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
 3363%   changed since the last rdf_load/[1,2]  call provides a practical
 3364%   means for checking whether the file needs to be saved.
 3365%
 3366%   @deprecated New code should use rdf_graph_property(Graph,
 3367%   hash(Hash)).
 3368
 3369%!  rdf_generation(-Generation) is det.
 3370%
 3371%   True when Generation is the current  generation of the database.
 3372%   Each modification to the database  increments the generation. It
 3373%   can be used to check the validity of cached results deduced from
 3374%   the database. Committing a non-empty  transaction increments the
 3375%   generation by one.
 3376%
 3377%   When inside a transaction,  Generation  is   unified  to  a term
 3378%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
 3379%   that the transaction was started at   generation 4 of the global
 3380%   database and we have  created  3   new  generations  inside  the
 3381%   transaction. Note that this choice  of representation allows for
 3382%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
 3383%   generation in one  transaction  with   a  generation  in another
 3384%   transaction is meaningless.
 3385
 3386%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
 3387%
 3388%   Return the number of alternatives as   indicated by the database
 3389%   internal hashed indexing. This is a rough measure for the number
 3390%   of alternatives we can expect for   an  rdf_has/3 call using the
 3391%   given three arguments. When  called   with  three variables, the
 3392%   total number of triples is returned.   This  estimate is used in
 3393%   query  optimisation.  See  also    rdf_predicate_property/2  and
 3394%   rdf_statistics/1 for additional information to help optimizers.
 3395
 3396%!  rdf_debug(+Level) is det.
 3397%
 3398%   Set debugging to Level.  Level is an integer 0..9.  Default is
 3399%   0 no debugging.
 3400
 3401%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
 3402%
 3403%   Computes the MD5 hash from Text, which is an atom, string or list of
 3404%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
 3405%   algorithm is repeated Times times on the generated hash. This can be
 3406%   used for password encryption algorithms   to  make generate-and-test
 3407%   loops slow.
 3408%
 3409%   @deprecated Obviously, password hash  primitives   do  not belong in
 3410%   this library. The  library(crypto)  from   the  \const{ssl}  package
 3411%   provides extensive support for  hashes.   The  \const{clib}  package
 3412%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
 3413%   implementation as well as  lightweight   implementations  of several
 3414%   popular hashes.
 3415
 3416
 3417                 /*******************************
 3418                 *             MESSAGES         *
 3419                 *******************************/
 3420
 3421:- multifile
 3422    prolog:message//1. 3423
 3424prolog:message(rdf(Term)) -->
 3425    message(Term).
 3426
 3427message(loaded(How, What, BaseURI, Triples, Time)) -->
 3428    how(How),
 3429    source(What),
 3430    into(What, BaseURI),
 3431    in_time(Triples, Time).
 3432message(save_removed_duplicates(N, Subject)) -->
 3433    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
 3434message(saved(File, SavedSubjects, SavedTriples)) -->
 3435    [ 'Saved ~D triples about ~D subjects into ~p'-
 3436      [SavedTriples, SavedSubjects, File]
 3437    ].
 3438message(using_namespace(Id, NS)) -->
 3439    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
 3440message(inconsistent_cache(DB, Graphs)) -->
 3441    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
 3442      '~t~8|~p'-[Graphs]
 3443    ].
 3444message(guess_format(Ext)) -->
 3445    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
 3446message(meta(not_expanded(G))) -->
 3447    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
 3448message(deprecated(rdf_unload(Graph))) -->
 3449    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
 3450
 3451
 3452how(load)   --> [ 'Loaded' ].
 3453how(parsed) --> [ 'Parsed' ].
 3454
 3455source(SourceURL) -->
 3456    { uri_file_name(SourceURL, File),
 3457      !,
 3458      file_base_name(File, Base)    % TBD: relative file?
 3459    },
 3460    [ ' "~w"'-[Base] ].
 3461source(SourceURL) -->
 3462    [ ' "~w"'-[SourceURL] ].
 3463
 3464into(_, _) --> [].                      % TBD
 3465
 3466in_time(Triples, ParseTime) -->
 3467    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
 3468    ]