View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2003-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(rdf_db,
   39          [ rdf_version/1,              % -Version
   40
   41            rdf/3,                      % ?Subject, ?Predicate, ?Object
   42            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
   43            rdf_has/3,                  % ?Subject, +Pred, ?Obj
   44            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
   45            rdf_reachable/3,            % ?Subject, +Pred, ?Object
   46            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
   47            rdf_resource/1,             % ?Resource
   48            rdf_subject/1,              % ?Subject
   49
   50            rdf_member_property/2,      % ?Property, ?Index
   51
   52            rdf_assert/3,               % +Subject, +Predicate, +Object
   53            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
   54            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
   55            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
   56            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
   57            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
   58            rdf_set_predicate/2,        % +Predicate, +Property
   59            rdf_predicate_property/2,   % +Predicate, ?Property
   60            rdf_current_predicate/1,    % -Predicate
   61            rdf_current_literal/1,      % -Literal
   62            rdf_transaction/1,          % :Goal
   63            rdf_transaction/2,          % :Goal, +Id
   64            rdf_transaction/3,          % :Goal, +Id, +Options
   65            rdf_active_transaction/1,   % ?Id
   66
   67            rdf_monitor/2,              % :Goal, +Options
   68
   69            rdf_save_db/1,              % +File
   70            rdf_save_db/2,              % +File, +DB
   71            rdf_load_db/1,              % +File
   72            rdf_reset_db/0,
   73
   74            rdf_node/1,                 % -Id
   75            rdf_bnode/1,                % -Id
   76            rdf_is_bnode/1,             % +Id
   77
   78            rdf_is_resource/1,          % +Term
   79            rdf_is_literal/1,           % +Term
   80            rdf_literal_value/2,        % +Term, -Value
   81
   82            rdf_load/1,                 % +File
   83            rdf_load/2,                 % +File, +Options
   84            rdf_save/1,                 % +File
   85            rdf_save/2,                 % +File, +Options
   86            rdf_unload/1,               % +File
   87            rdf_unload_graph/1,         % +Graph
   88
   89            rdf_md5/2,                  % +DB, -MD5
   90            rdf_atom_md5/3,             % +Text, +Times, -MD5
   91
   92            rdf_create_graph/1,         % ?Graph
   93            rdf_graph_property/2,       % ?Graph, ?Property
   94            rdf_set_graph/2,            % +Graph, +Property
   95            rdf_graph/1,                % ?Graph
   96            rdf_source/1,               % ?File
   97            rdf_source/2,               % ?DB, ?SourceURL
   98            rdf_make/0,                 % Reload modified databases
   99            rdf_gc/0,                   % Garbage collection
  100
  101            rdf_source_location/2,      % +Subject, -Source
  102            rdf_statistics/1,           % -Key
  103            rdf_set/1,                  % +Term
  104            rdf_generation/1,           % -Generation
  105            rdf_snapshot/1,             % -Snapshot
  106            rdf_delete_snapshot/1,      % +Snapshot
  107            rdf_current_snapshot/1,     % +Snapshot
  108            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
  109
  110            rdf_save_subject/3,         % +Stream, +Subject, +DB
  111            rdf_save_header/2,          % +Out, +Options
  112            rdf_save_footer/1,          % +Out
  113
  114            rdf_equal/2,                % ?Resource, ?Resource
  115            lang_equal/2,               % +Lang1, +Lang2
  116            lang_matches/2,             % +Lang, +Pattern
  117
  118            rdf_prefix/2,               % :Alias, +URI
  119            rdf_current_prefix/2,       % :Alias, ?URI
  120            rdf_register_prefix/2,      % +Alias, +URI
  121            rdf_register_prefix/3,      % +Alias, +URI, +Options
  122            rdf_unregister_prefix/1,    % +Alias
  123            rdf_current_ns/2,           % :Alias, ?URI
  124            rdf_register_ns/2,          % +Alias, +URI
  125            rdf_register_ns/3,          % +Alias, +URI, +Options
  126            rdf_global_id/2,            % ?NS:Name, :Global
  127            rdf_global_object/2,        % +Object, :NSExpandedObject
  128            rdf_global_term/2,          % +Term, :WithExpandedNS
  129
  130            rdf_compare/3,              % -Dif, +Object1, +Object2
  131            rdf_match_label/3,          % +How, +String, +Label
  132            rdf_split_url/3,            % ?Base, ?Local, ?URL
  133            rdf_url_namespace/2,        % +URL, ?Base
  134
  135            rdf_warm_indexes/0,
  136            rdf_warm_indexes/1,         % +Indexed
  137            rdf_update_duplicates/0,
  138
  139            rdf_debug/1,                % Set verbosity
  140
  141            rdf_new_literal_map/1,      % -Handle
  142            rdf_destroy_literal_map/1,  % +Handle
  143            rdf_reset_literal_map/1,    % +Handle
  144            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
  145            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
  146            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
  147            rdf_delete_literal_map/2,   % +Handle, +Key
  148            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
  149            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
  150            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
  151
  152            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
  153            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
  154
  155            (rdf_meta)/1,               % +Heads
  156            op(1150, fx, (rdf_meta))
  157          ]).  158:- use_module(library(semweb/rdf_prefixes),
  159              [ (rdf_meta)/1,
  160                register_file_prefixes/1,
  161                rdf_global_id/2,
  162                rdf_register_ns/2,
  163                                        % re-exported predicates
  164                rdf_global_object/2,
  165                rdf_current_ns/2,
  166                rdf_prefix/2,
  167                rdf_global_term/2,
  168                rdf_register_ns/3,
  169                rdf_register_prefix/3,
  170                rdf_register_prefix/2,
  171                rdf_current_prefix/2,
  172                rdf_unregister_prefix/1
  173              ]).  174
  175:- autoload(library(apply),[maplist/2,maplist/3]).  176:- use_module(library(debug),[debug/3,assertion/1]).  177:- autoload(library(error),[must_be/2,existence_error/2]).  178:- autoload(library(gensym),[gensym/2,reset_gensym/1]).  179:- autoload(library(lists),
  180	    [member/2,flatten/2,list_to_set/2,append/3,select/3]).  181:- autoload(library(memfile),
  182	    [atom_to_memory_file/2,open_memory_file/4]).  183:- autoload(library(option),
  184	    [option/2,option/3,merge_options/3,meta_options/3]).  185:- autoload(library(rdf),[process_rdf/3]).  186:- autoload(library(sgml),
  187	    [ load_structure/3,
  188	      xml_quote_attribute/3,
  189	      xml_name/1,
  190	      xml_quote_cdata/3,
  191	      xml_is_dom/1,
  192	      iri_xml_namespace/3,
  193	      iri_xml_namespace/2
  194	    ]).  195:- autoload(library(sgml_write),[xml_write/3]).  196:- autoload(library(uri),
  197	    [ uri_file_name/2,
  198	      uri_is_global/1,
  199	      uri_normalized/2,
  200	      uri_components/2,
  201	      uri_data/3,
  202	      uri_data/4
  203	    ]).  204:- autoload(library(xsdp_types),[xsdp_numeric_uri/2]).  205:- autoload(library(semweb/rdf_cache),[rdf_cache_file/3]).  206
  207:- if(exists_source(library(thread))).  208:- autoload(library(thread), [concurrent/3]).  209:- endif.  210
  211:- use_foreign_library(foreign(rdf_db)).  212:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
  213
  214:- meta_predicate
  215    rdf_transaction(0),
  216    rdf_transaction(0, +),
  217    rdf_transaction(0, +, +),
  218    rdf_monitor(1, +),
  219    rdf_save(+, :),
  220    rdf_load(+, :).  221
  222:- predicate_options(rdf_graph_prefixes/3, 3,
  223                     [ expand(callable+4),
  224                       filter(callable+3),
  225                       get_prefix(callable+2),
  226                       min_count(nonneg)
  227                     ]).  228:- predicate_options(rdf_load/2, 2,
  229                     [ base_uri(atom),
  230                       blank_nodes(oneof([share,noshare])),
  231                       cache(boolean),
  232                       concurrent(positive_integer),
  233                       db(atom),
  234                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
  235                       graph(atom),
  236                       multifile(boolean),
  237                       if(oneof([true,changed,not_loaded])),
  238                       modified(-float),
  239                       prefixes(-list),
  240                       silent(boolean),
  241                       register_namespaces(boolean)
  242                     ]).  243:- predicate_options(rdf_save/2, 2,
  244                     [ graph(atom),
  245                       db(atom),
  246                       anon(boolean),
  247                       base_uri(atom),
  248                       write_xml_base(boolean),
  249                       convert_typed_literal(callable),
  250                       encoding(encoding),
  251                       document_language(atom),
  252                       namespaces(list(atom)),
  253                       xml_attributes(boolean),
  254                       inline(boolean)
  255                     ]).  256:- predicate_options(rdf_save_header/2, 2,
  257                     [ graph(atom),
  258                       db(atom),
  259                       namespaces(list(atom))
  260                     ]).  261:- predicate_options(rdf_save_subject/3, 3,
  262                     [ graph(atom),
  263                       base_uri(atom),
  264                       convert_typed_literal(callable),
  265                       document_language(atom)
  266                     ]).  267:- predicate_options(rdf_transaction/3, 3,
  268                     [ snapshot(any)
  269                     ]).  270
  271:- discontiguous
  272    term_expansion/2.  273
  274/** <module> Core RDF database
  275
  276The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
  277store.
  278
  279@deprecated     New applications should use library(semweb/rdf11), which
  280                provides a much more intuitive API to the RDF store, notably
  281                for handling literals.  The library(semweb/rdf11) runs
  282                currently on top of this library and both can run side-by-side
  283                in the same application.  Terms retrieved from the database
  284                however have a different shape and can not be exchanged without
  285                precautions.
  286*/
  287
  288		 /*******************************
  289		 *            PREFIXES		*
  290		 *******************************/
  291
  292% the ns/2 predicate is historically defined  in this module. We'll keep
  293% that for compatibility reasons.
  294
  295:- multifile ns/2.  296:- dynamic   ns/2.                      % ID, URL
  297
  298:- multifile
  299    rdf_prefixes:rdf_empty_prefix_cache/2.  300
  301rdf_prefixes:rdf_empty_prefix_cache(_Prefix, _IRI) :-
  302    rdf_empty_prefix_cache.
  303
  304:- rdf_meta
  305    rdf(r,r,o),
  306    rdf_has(r,r,o,r),
  307    rdf_has(r,r,o),
  308    rdf_assert(r,r,o),
  309    rdf_retractall(r,r,o),
  310    rdf(r,r,o,?),
  311    rdf_assert(r,r,o,+),
  312    rdf_retractall(r,r,o,?),
  313    rdf_reachable(r,r,o),
  314    rdf_reachable(r,r,o,+,?),
  315    rdf_update(r,r,o,t),
  316    rdf_update(r,r,o,+,t),
  317    rdf_equal(o,o),
  318    rdf_source_location(r,-),
  319    rdf_resource(r),
  320    rdf_subject(r),
  321    rdf_create_graph(r),
  322    rdf_graph(r),
  323    rdf_graph_property(r,?),
  324    rdf_set_graph(r,+),
  325    rdf_unload_graph(r),
  326    rdf_set_predicate(r, t),
  327    rdf_predicate_property(r, -),
  328    rdf_estimate_complexity(r,r,r,-),
  329    rdf_print_predicate_cloud(r,+).  330
  331%!  rdf_equal(?Resource1, ?Resource2)
  332%
  333%   Simple equality test to exploit goal-expansion.
  334
  335rdf_equal(Resource, Resource).
  336
  337%!  lang_equal(+Lang1, +Lang2) is semidet.
  338%
  339%   True if two RFC language specifiers denote the same language
  340%
  341%   @see lang_matches/2.
  342
  343lang_equal(Lang, Lang) :- !.
  344lang_equal(Lang1, Lang2) :-
  345    downcase_atom(Lang1, LangCannon),
  346    downcase_atom(Lang2, LangCannon).
  347
  348%!  lang_matches(+Lang, +Pattern) is semidet.
  349%
  350%   True if Lang  matches  Pattern.   This  implements  XML language
  351%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
  352%   dash-separated strings of  identifiers  or   (for  Pattern)  the
  353%   wildcard *. Identifiers are  matched   case-insensitive  and a *
  354%   matches any number of identifiers. A   short pattern is the same
  355%   as *.
  356
  357
  358                 /*******************************
  359                 *     BASIC TRIPLE QUERIES     *
  360                 *******************************/
  361
  362%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
  363%
  364%   Elementary query for triples. Subject   and  Predicate are atoms
  365%   representing the fully qualified URL of  the resource. Object is
  366%   either an atom representing a resource  or literal(Value) if the
  367%   object  is  a  literal  value.   If    a   value   of  the  form
  368%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
  369%   atom  using  expand_goal/2.  This  implies   you  can  use  this
  370%   construct in compiled code without paying a performance penalty.
  371%   Literal values take one of the following forms:
  372%
  373%     * Atom
  374%     If the value is a simple atom it is the textual representation
  375%     of a string literal without explicit type or language
  376%     qualifier.
  377%
  378%     * lang(LangID, Atom)
  379%     Atom represents the text of a string literal qualified with
  380%     the given language.
  381%
  382%     * type(TypeID, Value)
  383%     Used for attributes qualified using the =|rdf:datatype|=
  384%     TypeID. The Value is either the textual representation or a
  385%     natural Prolog representation. See the option
  386%     convert_typed_literal(:Convertor) of the parser. The storage
  387%     layer provides efficient handling of atoms, integers (64-bit)
  388%     and floats (native C-doubles). All other data is represented
  389%     as a Prolog record.
  390%
  391%   For literal querying purposes, Object can be of the form
  392%   literal(+Query, -Value), where Query is one of the terms below.
  393%   If the Query takes a literal argument and the value has a
  394%   numeric type numerical comparison is performed.
  395%
  396%     * plain(+Text)
  397%     Perform exact match and demand the language or type qualifiers
  398%     to match. This query is fully indexed.
  399%
  400%     * icase(+Text)
  401%     Perform a full but case-insensitive match. This query is
  402%     fully indexed.
  403%
  404%     * exact(+Text)
  405%     Same as icase(Text).  Backward compatibility.
  406%
  407%     * substring(+Text)
  408%     Match any literal that contains Text as a case-insensitive
  409%     substring. The query is not indexed on Object.
  410%
  411%     * word(+Text)
  412%     Match any literal that contains Text delimited by a non
  413%     alpha-numeric character, the start or end of the string. The
  414%     query is not indexed on Object.
  415%
  416%     * prefix(+Text)
  417%     Match any literal that starts with Text. This call is intended
  418%     for completion. The query is indexed using the skip list of
  419%     literals.
  420%
  421%     * ge(+Literal)
  422%     Match any literal that is equal or larger than Literal in the
  423%     ordered set of literals.
  424%
  425%     * gt(+Literal)
  426%     Match any literal that is larger than Literal in the ordered set
  427%     of literals.
  428%
  429%     * eq(+Literal)
  430%     Match any literal that is equal to Literal in the ordered set
  431%     of literals.
  432%
  433%     * le(+Literal)
  434%     Match any literal that is equal or smaller than Literal in the
  435%     ordered set of literals.
  436%
  437%     * lt(+Literal)
  438%     Match any literal that is smaller than Literal in the ordered set
  439%     of literals.
  440%
  441%     * between(+Literal1, +Literal2)
  442%     Match any literal that is between Literal1 and Literal2 in the
  443%     ordered set of literals. This may include both Literal1 and
  444%     Literal2.
  445%
  446%     * like(+Pattern)
  447%     Match any literal that matches Pattern case insensitively,
  448%     where the `*' character in Pattern matches zero or more
  449%     characters.
  450%
  451%   Backtracking never returns duplicate triples.  Duplicates can be
  452%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
  453%   if called with improper arguments.  If   rdf/3  is called with a
  454%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
  455%   silently.  This  allows   for   graph    matching   goals   like
  456%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
  457
  458%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
  459%
  460%   As rdf/3 but in addition query  the   graph  to which the triple
  461%   belongs. Unlike rdf/3, this predicate does not remove duplicates
  462%   from the result set.
  463%
  464%   @param Source is a term Graph:Line.  If Source is instatiated,
  465%   passing an atom is the same as passing Atom:_.
  466
  467
  468%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
  469%
  470%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
  471%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
  472%   predicates   declared   using   rdf_set_predicate/2   with   the
  473%   =inverse_of= property.
  474
  475%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
  476%
  477%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
  478%   predicate that makes this relation   true. RealPredicate must be
  479%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
  480%   match is found, RealPredicate is the term inverse_of(Pred).
  481
  482%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
  483%
  484%   Is true if Object can  be   reached  from  Subject following the
  485%   transitive predicate Predicate or a  sub-property thereof, while
  486%   repecting the symetric(true) or inverse_of(P2) properties.
  487%
  488%   If used with either Subject or  Object unbound, it first returns
  489%   the origin, followed by  the  reachable  nodes  in breadth-first
  490%   search-order. The implementation internally   looks one solution
  491%   ahead and succeeds deterministically on  the last solution. This
  492%   predicate never generates the same  node   twice  and  is robust
  493%   against cycles in the transitive relation.
  494%
  495%   With all arguments instantiated,   it succeeds deterministically
  496%   if a path can be found from  Subject to Object. Searching starts
  497%   at Subject, assuming the branching factor   is normally lower. A
  498%   call  with  both  Subject   and    Object   unbound   raises  an
  499%   instantiation  error.  The  following    example  generates  all
  500%   subclasses of rdfs:Resource:
  501%
  502%     ==
  503%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
  504%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
  505%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
  506%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
  507%     ...
  508%     ==
  509
  510
  511%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
  512%
  513%   Same as rdf_reachable/3, but in addition, MaxD limits the number
  514%   of edges expanded and D is   unified with the `distance' between
  515%   Subject and Object. Distance 0 means  Subject and Object are the
  516%   same resource. MaxD can be the  constant =infinite= to impose no
  517%   distance-limit.
  518
  519%!  rdf_subject(?Resource) is nondet.
  520%
  521%   True if Resource appears as a   subject. This query respects the
  522%   visibility rules implied by the logical update view.
  523%
  524%   @see rdf_resource/1.
  525
  526rdf_subject(Resource) :-
  527    rdf_resource(Resource),
  528    ( rdf(Resource, _, _) -> true ).
  529
  530%!  rdf_resource(?Resource) is nondet.
  531%
  532%   True when Resource is a resource used as a subject or object in
  533%   a triple.
  534%
  535%   This predicate is primarily intended  as   a  way to process all
  536%   resources without processing resources twice.   The user must be
  537%   aware that some of the returned resources  may not appear in any
  538%   _visible_ triple.
  539
  540
  541                 /*******************************
  542                 *     TRIPLE MODIFICATIONS     *
  543                 *******************************/
  544
  545%!  rdf_assert(+Subject, +Predicate, +Object) is det.
  546%
  547%   Assert a new triple into  the   database.  This is equivalent to
  548%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
  549%   resources. Object is either a resource or a term literal(Value).
  550%   See rdf/3 for an explanation  of   Value  for typed and language
  551%   qualified literals. All arguments  are   subject  to  name-space
  552%   expansion. Complete duplicates (including  the   same  graph and
  553%   `line' and with a compatible `lifespan')   are  not added to the
  554%   database.
  555
  556%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
  557%
  558%   As rdf_assert/3, adding the  predicate   to  the indicated named
  559%   graph.
  560%
  561%   @param Graph is either the name of a   graph (an atom) or a term
  562%   Graph:Line, where Line is an integer that denotes a line number.
  563
  564%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
  565%
  566%   Remove   all   matching   triples   from    the   database.   As
  567%   rdf_retractall/4 using an unbound graph.
  568
  569%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
  570%
  571%   As rdf_retractall/3, also matching Graph.   This  is particulary
  572%   useful to remove all triples coming from a loaded file. See also
  573%   rdf_unload/1.
  574
  575%!  rdf_update(+Subject, +Predicate, +Object, ++Action) is det.
  576%!  rdf_update(+Subject, +Predicate, +Object, +Graph, ++Action) is det
  577%
  578%   Replaces one of the three  (four)   fields  on  the matching triples
  579%   depending on Action:
  580%
  581%     * subject(Resource)
  582%     Changes the first field of the triple.
  583%     * predicate(Resource)
  584%     Changes the second field of the triple.
  585%     * object(Object)
  586%     Changes the last field of the triple to the given resource or
  587%     literal(Value).
  588%     * graph(Graph)
  589%     Moves the triple from its current named graph to Graph.
  590%     This only works with rdf_update/5 and throws an error when
  591%     used with rdf_update/4.
  592
  593
  594                 /*******************************
  595                 *          COLLECTIONS         *
  596                 *******************************/
  597
  598%!  rdf_member_property(?Prop, ?Index)
  599%
  600%   Deal with the rdf:_1, ... properties.
  601
  602term_expansion(member_prefix(x),
  603               member_prefix(Prefix)) :-
  604    rdf_db:ns(rdf, NS),
  605    atom_concat(NS, '_', Prefix).
  606member_prefix(x).
  607
  608rdf_member_property(P, N) :-
  609    integer(N),
  610    !,
  611    member_prefix(Prefix),
  612    atom_concat(Prefix, N, P).
  613rdf_member_property(P, N) :-
  614    member_prefix(Prefix),
  615    atom_concat(Prefix, Sub, P),
  616    atom_number(Sub, N).
  617
  618
  619                 /*******************************
  620                 *      ANONYMOUS SUBJECTS      *
  621                 *******************************/
  622
  623%!  rdf_node(-Id)
  624%
  625%   Generate a unique blank node identifier for a subject.
  626%
  627%   @deprecated     New code should use rdf_bnode/1.
  628
  629rdf_node(Resource) :-
  630    rdf_bnode(Resource).
  631
  632%!  rdf_bnode(-Id)
  633%
  634%   Generate a unique anonymous identifier for a subject.
  635
  636rdf_bnode(Value) :-
  637    repeat,
  638    gensym('_:genid', Value),
  639    \+ rdf(Value, _, _),
  640    \+ rdf(_, _, Value),
  641    \+ rdf(_, Value, _),
  642    !.
  643
  644
  645
  646                 /*******************************
  647                 *             TYPES            *
  648                 *******************************/
  649
  650%!  rdf_is_bnode(+Id)
  651%
  652%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
  653%   resource). A blank node is represented   as  an atom that starts
  654%   with =|_:|=. For backward compatibility   reason, =|__|= is also
  655%   considered to be a blank node.
  656%
  657%   @see rdf_bnode/1.
  658
  659%!  rdf_is_resource(@Term) is semidet.
  660%
  661%   True if Term is an RDF  resource.   Note  that  this is merely a
  662%   type-test; it does not mean  this   resource  is involved in any
  663%   triple.  Blank nodes are also considered resources.
  664%
  665%   @see rdf_is_bnode/1
  666
  667rdf_is_resource(Term) :-
  668    atom(Term).
  669
  670%!  rdf_is_literal(@Term) is semidet.
  671%
  672%   True if Term is an RDF literal object. Currently only checks for
  673%   groundness and the literal functor.
  674
  675rdf_is_literal(literal(Value)) :-
  676    ground(Value).
  677
  678                 /*******************************
  679                 *             LITERALS         *
  680                 *******************************/
  681
  682%!  rdf_current_literal(-Literal) is nondet.
  683%
  684%   True when Literal is a currently  known literal. Enumerates each
  685%   unique literal exactly once. Note that   it is possible that the
  686%   literal only appears in already deleted triples. Deleted triples
  687%   may be locked due to active   queries, transactions or snapshots
  688%   or may not yet be reclaimed by the garbage collector.
  689
  690
  691%!  rdf_literal_value(+Literal, -Value) is semidet.
  692%
  693%   True when value is  the   appropriate  Prolog  representation of
  694%   Literal in the RDF _|value space|_.  Current mapping:
  695%
  696%     | Plain literals              | Atom                    |
  697%     | Language tagged literal     | Atom holding plain text |
  698%     | xsd:string                  | Atom                    |
  699%     | rdf:XMLLiteral              | XML DOM Tree            |
  700%     | Numeric XSD type            | Number                  |
  701%
  702%   @tbd    Well, this is the long-term idea.
  703%   @tbd    Add mode (-,+)
  704
  705:- rdf_meta
  706    rdf_literal_value(o, -),
  707    typed_value(r, +, -),
  708    numeric_value(r, +, -).  709
  710rdf_literal_value(literal(String), Value) :-
  711    atom(String),
  712    !,
  713    Value = String.
  714rdf_literal_value(literal(lang(_Lang, String)), String).
  715rdf_literal_value(literal(type(Type, String)), Value) :-
  716    typed_value(Type, String, Value).
  717
  718typed_value(Numeric, String, Value) :-
  719    xsdp_numeric_uri(Numeric, NumType),
  720    !,
  721    numeric_value(NumType, String, Value).
  722typed_value(xsd:string, String, String).
  723typed_value(rdf:'XMLLiteral', Value, DOM) :-
  724    (   atom(Value)
  725    ->  setup_call_cleanup(
  726            ( atom_to_memory_file(Value, MF),
  727              open_memory_file(MF, read, In, [free_on_close(true)])
  728            ),
  729            load_structure(stream(In), DOM, [dialect(xml)]),
  730            close(In))
  731    ;   DOM = Value
  732    ).
  733
  734numeric_value(xsd:integer, String, Value) :-
  735    atom_number(String, Value),
  736    integer(Value).
  737numeric_value(xsd:float, String, Value) :-
  738    atom_number(String, Number),
  739    Value is float(Number).
  740numeric_value(xsd:double, String, Value) :-
  741    atom_number(String, Number),
  742    Value is float(Number).
  743numeric_value(xsd:decimal, String, Value) :-
  744    atom_number(String, Value).
  745
  746
  747                 /*******************************
  748                 *            SOURCE            *
  749                 *******************************/
  750
  751%!  rdf_source_location(+Subject, -Location) is nondet.
  752%
  753%   True when triples for Subject are loaded from Location.
  754%
  755%   @param Location is a term File:Line.
  756
  757rdf_source_location(Subject, Source) :-
  758    findall(Source, rdf(Subject, _, _, Source), Sources),
  759    sort(Sources, Unique),
  760    member(Source, Unique).
  761
  762
  763                 /*******************************
  764                 *       GARBAGE COLLECT        *
  765                 *******************************/
  766
  767%!  rdf_create_gc_thread
  768%
  769%   Create the garbage collection thread.
  770
  771:- public
  772    rdf_create_gc_thread/0.  773
  774rdf_create_gc_thread :-
  775    thread_create(rdf_gc_loop, _,
  776                  [ alias('__rdf_GC')
  777                  ]).
  778
  779%!  rdf_gc_loop
  780%
  781%   Take care of running the RDF garbage collection.  This predicate
  782%   is called from a thread started by creating the RDF DB.
  783
  784rdf_gc_loop :-
  785    catch(rdf_gc_loop(0), E, recover_gc(E)).
  786
  787recover_gc('$aborted') :-
  788    !,
  789    thread_self(Me),
  790    thread_detach(Me).
  791recover_gc(Error) :-
  792    print_message(error, Error),
  793    rdf_gc_loop.
  794
  795rdf_gc_loop(CPU) :-
  796    repeat,
  797    (   consider_gc(CPU)
  798    ->  rdf_gc(CPU1),
  799        sleep(CPU1)
  800    ;   sleep(0.1)
  801    ),
  802    fail.
  803
  804%!  rdf_gc(-CPU) is det.
  805%
  806%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
  807%   update this in Prolog because portable access to thread specific
  808%   CPU is really hard in C.
  809
  810rdf_gc(CPU) :-
  811    statistics(cputime, CPU0),
  812    (   rdf_gc_
  813    ->  statistics(cputime, CPU1),
  814        CPU is CPU1-CPU0,
  815        rdf_add_gc_time(CPU)
  816    ;   CPU = 0.0
  817    ).
  818
  819%!  rdf_gc is det.
  820%
  821%   Run the RDF-DB garbage collector until no   garbage  is left and all
  822%   tables are fully optimized. Under normal operation a separate thread
  823%   with identifier =|__rdf_GC|= performs garbage  collection as long as
  824%   it is considered `useful'.
  825%
  826%   Using rdf_gc/0 should  only  be  needed   to  ensure  a  fully clean
  827%   database for analysis purposes such as leak detection.
  828
  829rdf_gc :-
  830    has_garbage,
  831    !,
  832    rdf_gc(_),
  833    rdf_gc.
  834rdf_gc.
  835
  836%!  has_garbage is semidet.
  837%
  838%   True if there is something to gain using GC.
  839
  840has_garbage :-
  841    rdf_gc_info_(Info),
  842    has_garbage(Info),
  843    !.
  844
  845has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
  846has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
  847has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
  848
  849%!  consider_gc(+CPU) is semidet.
  850%
  851%   @param CPU is the amount of CPU time spent in the most recent
  852%   GC.
  853
  854consider_gc(_CPU) :-
  855    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
  856                             Garbage,       % Garbage triples in DB
  857                             Reindexed,     % Reindexed & not reclaimed
  858                             Optimizable,   % Non-optimized tables
  859                             _KeepGen,      % Oldest active generation
  860                             _LastGCGen,    % Oldest active gen at last GC
  861                             _ReindexGen,
  862                             _LastGCReindexGen))
  863    ->  (   (Garbage+Reindexed) * 5 > Triples
  864        ;   Optimizable > 4
  865        )
  866    ;   print_message(error, rdf(invalid_gc_info)),
  867        sleep(10)
  868    ),
  869    !.
  870
  871
  872                 /*******************************
  873                 *           STATISTICS         *
  874                 *******************************/
  875
  876%!  rdf_statistics(?KeyValue) is nondet.
  877%
  878%   Obtain statistics on the RDF database.  Defined statistics are:
  879%
  880%     * graphs(-Count)
  881%     Number of named graphs.
  882%
  883%     * triples(-Count)
  884%     Total number of triples in the database.  This is the number
  885%     of asserted triples minus the number of retracted ones.  The
  886%     number of _visible_ triples in a particular context may be
  887%     different due to visibility rules defined by the logical
  888%     update view and transaction isolation.
  889%
  890%     * resources(-Count)
  891%     Number of resources that appear as subject or object in a
  892%     triple.  See rdf_resource/1.
  893%
  894%     * properties(-Count)
  895%     Number of current predicates.  See rdf_current_predicate/1.
  896%
  897%     * literals(-Count)
  898%     Number of current literals.  See rdf_current_literal/1.
  899%
  900%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
  901%     Information about the garbage collector.
  902%
  903%     * searched_nodes(-Count)
  904%     Number of nodes expanded by rdf_reachable/3 and
  905%     rdf_reachable/5.
  906%
  907%     * lookup(rdf(S,P,O,G), Count)
  908%     Number of queries that have been performed for this particular
  909%     instantiation pattern.  Each of S,P,O,G is either + or -.
  910%     Fails in case the number of performed queries is zero.
  911%
  912%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
  913%     Statistics on the index for this pattern.  Indices are created
  914%     lazily on the first relevant query.
  915%
  916%     * triples_by_graph(Graph, Count)
  917%     This statistics is produced for each named graph. See
  918%     =triples= for the interpretation of this value.
  919
  920rdf_statistics(graphs(Count)) :-
  921    rdf_statistics_(graphs(Count)).
  922rdf_statistics(triples(Count)) :-
  923    rdf_statistics_(triples(Count)).
  924rdf_statistics(duplicates(Count)) :-
  925    rdf_statistics_(duplicates(Count)).
  926rdf_statistics(lingering(Count)) :-
  927    rdf_statistics_(lingering(Count)).
  928rdf_statistics(resources(Count)) :-
  929    rdf_statistics_(resources(Count)).
  930rdf_statistics(properties(Count)) :-
  931    rdf_statistics_(predicates(Count)).
  932rdf_statistics(literals(Count)) :-
  933    rdf_statistics_(literals(Count)).
  934rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
  935    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
  936rdf_statistics(searched_nodes(Count)) :-
  937    rdf_statistics_(searched_nodes(Count)).
  938rdf_statistics(lookup(Index, Count)) :-
  939    functor(Indexed, indexed, 16),
  940    rdf_statistics_(Indexed),
  941    index(Index, I),
  942    Arg is I + 1,
  943    arg(Arg, Indexed, Count),
  944    Count \== 0.
  945rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
  946    rdf_statistics_(hash_quality(List)),
  947    member(hash(Place,Size,Quality,Optimize), List),
  948    index(Index, Place).
  949rdf_statistics(triples_by_graph(Graph, Count)) :-
  950    rdf_graph_(Graph, Count).
  951
  952index(rdf(-,-,-,-), 0).
  953index(rdf(+,-,-,-), 1).
  954index(rdf(-,+,-,-), 2).
  955index(rdf(+,+,-,-), 3).
  956index(rdf(-,-,+,-), 4).
  957index(rdf(+,-,+,-), 5).
  958index(rdf(-,+,+,-), 6).
  959index(rdf(+,+,+,-), 7).
  960
  961index(rdf(-,-,-,+), 8).
  962index(rdf(+,-,-,+), 9).
  963index(rdf(-,+,-,+), 10).
  964index(rdf(+,+,-,+), 11).
  965index(rdf(-,-,+,+), 12).
  966index(rdf(+,-,+,+), 13).
  967index(rdf(-,+,+,+), 14).
  968index(rdf(+,+,+,+), 15).
  969
  970
  971                 /*******************************
  972                 *           PREDICATES         *
  973                 *******************************/
  974
  975%!  rdf_current_predicate(?Predicate) is nondet.
  976%
  977%   True when Predicate is a   currently known predicate. Predicates
  978%   are created if a triples is created  that uses this predicate or
  979%   a property of the predicate   is  set using rdf_set_predicate/2.
  980%   The predicate may (no longer) have triples associated with it.
  981%
  982%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
  983%   not automatically included in the  result-set of this predicate,
  984%   while _all_ resources that appear as   the  second argument of a
  985%   triple _are_ included.
  986%
  987%   @see rdf_predicate_property/2.
  988
  989rdf_current_predicate(P, DB) :-
  990    rdf_current_predicate(P),
  991    (   rdf(_,P,_,DB)
  992    ->  true
  993    ).
  994
  995%!  rdf_predicate_property(?Predicate, ?Property)
  996%
  997%   Query properties of  a  defined   predicate.  Currently  defined
  998%   properties are given below.
  999%
 1000%     * symmetric(Bool)
 1001%     True if the predicate is defined to be symetric. I.e., {A} P
 1002%     {B} implies {B} P {A}. Setting symmetric is equivalent to
 1003%     inverse_of(Self).
 1004%
 1005%     * inverse_of(Inverse)
 1006%     True if this predicate is the inverse of Inverse. This
 1007%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
 1008%     rdf_reachable/5.
 1009%
 1010%     * transitive(Bool)
 1011%     True if this predicate is transitive. This predicate is
 1012%     currently not used. It might be used to make rdf_has/3 imply
 1013%     rdf_reachable/3 for transitive predicates.
 1014%
 1015%     * triples(Triples)
 1016%     Unify Triples with the number of existing triples using this
 1017%     predicate as second argument. Reporting the number of triples
 1018%     is intended to support query optimization.
 1019%
 1020%     * rdf_subject_branch_factor(-Float)
 1021%     Unify Float with the average number of triples associated with
 1022%     each unique value for the subject-side of this relation. If
 1023%     there are no triples the value 0.0 is returned. This value is
 1024%     cached with the predicate and recomputed only after
 1025%     substantial changes to the triple set associated to this
 1026%     relation. This property is intended for path optimalisation
 1027%     when solving conjunctions of rdf/3 goals.
 1028%
 1029%     * rdf_object_branch_factor(-Float)
 1030%     Unify Float with the average number of triples associated with
 1031%     each unique value for the object-side of this relation. In
 1032%     addition to the comments with the =rdf_subject_branch_factor=
 1033%     property, uniqueness of the object value is computed from the
 1034%     hash key rather than the actual values.
 1035%
 1036%     * rdfs_subject_branch_factor(-Float)
 1037%     Same as =rdf_subject_branch_factor=, but also considering
 1038%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1039%
 1040%     * rdfs_object_branch_factor(-Float)
 1041%     Same as =rdf_object_branch_factor=, but also considering
 1042%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1043%
 1044%   @see rdf_set_predicate/2.
 1045
 1046rdf_predicate_property(P, Prop) :-
 1047    var(P),
 1048    !,
 1049    rdf_current_predicate(P),
 1050    rdf_predicate_property_(P, Prop).
 1051rdf_predicate_property(P, Prop) :-
 1052    rdf_predicate_property_(P, Prop).
 1053
 1054%!  rdf_set_predicate(+Predicate, +Property) is det.
 1055%
 1056%   Define a property of  the   predicate.  This predicate currently
 1057%   supports the following properties:
 1058%
 1059%       - symmetric(+Boolean)
 1060%       Set/unset the predicate as being symmetric.  Using
 1061%       symmetric(true) is the same as inverse_of(Predicate),
 1062%       i.e., creating a predicate that is the inverse of
 1063%       itself.
 1064%       - transitive(+Boolean)
 1065%       Sets the transitive property.
 1066%       - inverse_of(+Predicate2)
 1067%       Define Predicate as the inverse of Predicate2. An inverse
 1068%       relation is deleted using inverse_of([]).
 1069%
 1070%   The `transitive` property is currently not used. The `symmetric`
 1071%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
 1072%   rdf_reachable/3.
 1073%
 1074%   @tbd    Maintain these properties based on OWL triples.
 1075
 1076
 1077                 /*******************************
 1078                 *            SNAPSHOTS         *
 1079                 *******************************/
 1080
 1081%!  rdf_snapshot(-Snapshot) is det.
 1082%
 1083%   Take a snapshot of the current state   of  the RDF store. Later,
 1084%   goals may be executed in the  context   of  the database at this
 1085%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
 1086%   snapshot created outside  a  transaction   exists  until  it  is
 1087%   deleted. Snapshots taken inside a transaction   can only be used
 1088%   inside this transaction.
 1089
 1090%!  rdf_delete_snapshot(+Snapshot) is det.
 1091%
 1092%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
 1093%   call, resources used for maintaining the snapshot become subject
 1094%   to garbage collection.
 1095
 1096%!  rdf_current_snapshot(?Term) is nondet.
 1097%
 1098%   True when Term is a currently known snapshot.
 1099%
 1100%   @bug    Enumeration of snapshots is slow.
 1101
 1102rdf_current_snapshot(Term) :-
 1103    current_blob(Term, rdf_snapshot).
 1104
 1105
 1106                 /*******************************
 1107                 *          TRANSACTION         *
 1108                 *******************************/
 1109
 1110%!  rdf_transaction(:Goal) is semidet.
 1111%
 1112%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
 1113
 1114%!  rdf_transaction(:Goal, +Id) is semidet.
 1115%
 1116%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
 1117
 1118%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
 1119%
 1120%   Run Goal in an RDF  transaction.   Compared to the ACID model,
 1121%   RDF transactions have the following properties:
 1122%
 1123%     1. Modifications inside the transactions become all atomically
 1124%        visible to the outside world if Goal succeeds or remain
 1125%        invisible if Goal fails or throws an exception.  I.e.,
 1126%        the _atomicy_ property is fully supported.
 1127%     2. _Consistency_ is not guaranteed. Later versions may
 1128%        implement consistency constraints that will be checked
 1129%        serialized just before the actual commit of a transaction.
 1130%     3. Concurrently executing transactions do not infuence each
 1131%        other.  I.e., the _isolation_ property is fully supported.
 1132%     4. _Durability_ can be activated by loading
 1133%        library(semweb/rdf_persistency).
 1134%
 1135%   Processed options are:
 1136%
 1137%     * snapshot(+Snapshot)
 1138%     Execute Goal using the state of the RDF store as stored in
 1139%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
 1140%     atom =true=, which implies that an anonymous snapshot is
 1141%     created at the current state of the store.  Modifications
 1142%     due to executing Goal are only visible to Goal.
 1143
 1144rdf_transaction(Goal) :-
 1145    rdf_transaction(Goal, user, []).
 1146rdf_transaction(Goal, Id) :-
 1147    rdf_transaction(Goal, Id, []).
 1148
 1149%!  rdf_active_transaction(?Id) is nondet.
 1150%
 1151%   True if Id is the identifier of  a transaction in the context of
 1152%   which  this  call  is  executed.  If  Id  is  not  instantiated,
 1153%   backtracking yields transaction identifiers   starting  with the
 1154%   innermost nested transaction. Transaction   identifier terms are
 1155%   not copied, need not be ground   and  can be instantiated during
 1156%   the transaction.
 1157
 1158rdf_active_transaction(Id) :-
 1159    rdf_active_transactions_(List),
 1160    member(Id, List).
 1161
 1162%!  rdf_monitor(:Goal, +Options)
 1163%
 1164%   Call Goal if specified actions occur on the database.
 1165
 1166rdf_monitor(Goal, Options) :-
 1167    monitor_mask(Options, 0xffff, Mask),
 1168    rdf_monitor_(Goal, Mask).
 1169
 1170monitor_mask([], Mask, Mask).
 1171monitor_mask([H|T], Mask0, Mask) :-
 1172    update_mask(H, Mask0, Mask1),
 1173    monitor_mask(T, Mask1, Mask).
 1174
 1175update_mask(-X, Mask0, Mask) :-
 1176    !,
 1177    monitor_mask(X, M),
 1178    Mask is Mask0 /\ \M.
 1179update_mask(+X, Mask0, Mask) :-
 1180    !,
 1181    monitor_mask(X, M),
 1182    Mask is Mask0 \/ M.
 1183update_mask(X, Mask0, Mask) :-
 1184    monitor_mask(X, M),
 1185    Mask is Mask0 \/ M.
 1186
 1187%!  monitor_mask(Name, Mask)
 1188%
 1189%   Mask bit for the monitor events.  Note that this must be kept
 1190%   consistent with the enum broadcast_id defined in rdf_db.c
 1191
 1192                                        % C-defined broadcasts
 1193monitor_mask(assert,       0x0001).
 1194monitor_mask(assert(load), 0x0002).
 1195monitor_mask(retract,      0x0004).
 1196monitor_mask(update,       0x0008).
 1197monitor_mask(new_literal,  0x0010).
 1198monitor_mask(old_literal,  0x0020).
 1199monitor_mask(transaction,  0x0040).
 1200monitor_mask(load,         0x0080).
 1201monitor_mask(create_graph, 0x0100).
 1202monitor_mask(reset,        0x0200).
 1203                                        % prolog defined broadcasts
 1204monitor_mask(parse,        0x1000).
 1205monitor_mask(unload,       0x1000).     % FIXME: Duplicate
 1206                                        % mask for all
 1207monitor_mask(all,          0xffff).
 1208
 1209%rdf_broadcast(Term, MaskName) :-
 1210%%      monitor_mask(MaskName, Mask),
 1211%%      rdf_broadcast_(Term, Mask).
 1212
 1213
 1214                 /*******************************
 1215                 *            WARM              *
 1216                 *******************************/
 1217
 1218%!  rdf_warm_indexes
 1219%
 1220%   Warm all indexes.  See rdf_warm_indexes/1.
 1221
 1222rdf_warm_indexes :-
 1223    findall(Index, rdf_index(Index), Indexes),
 1224    rdf_warm_indexes(Indexes).
 1225
 1226rdf_index(s).
 1227rdf_index(p).
 1228rdf_index(o).
 1229rdf_index(sp).
 1230rdf_index(o).
 1231rdf_index(po).
 1232rdf_index(spo).
 1233rdf_index(g).
 1234rdf_index(sg).
 1235rdf_index(pg).
 1236
 1237%!  rdf_warm_indexes(+Indexes) is det.
 1238%
 1239%   Create the named indexes.  Normally,   the  RDF database creates
 1240%   indexes on lazily the first time they are needed. This predicate
 1241%   serves two purposes: it provides an   explicit  way to make sure
 1242%   that the required indexes  are   present  and  creating multiple
 1243%   indexes at the same time is more efficient.
 1244
 1245
 1246                 /*******************************
 1247                 *          DUPLICATES          *
 1248                 *******************************/
 1249
 1250%!  rdf_update_duplicates is det.
 1251%
 1252%   Update the duplicate administration of the RDF store. This marks
 1253%   every triple that is potentionally  a   duplicate  of another as
 1254%   duplicate. Being potentially a  duplicate   means  that subject,
 1255%   predicate and object are equivalent and   the  life-times of the
 1256%   two triples overlap.
 1257%
 1258%   The duplicates marks are used to  reduce the administrative load
 1259%   of avoiding duplicate answers.  Normally,   the  duplicates  are
 1260%   marked using a background thread that   is  started on the first
 1261%   query that produces a substantial amount of duplicates.
 1262
 1263:- public
 1264    rdf_update_duplicates_thread/0. 1265
 1266%!  rdf_update_duplicates_thread
 1267%
 1268%   Start a thread to initialize the duplicate administration.
 1269
 1270rdf_update_duplicates_thread :-
 1271    thread_create(rdf_update_duplicates, _,
 1272                  [ detached(true),
 1273                    alias('__rdf_duplicate_detecter')
 1274                  ]).
 1275
 1276%!  rdf_update_duplicates is det.
 1277%
 1278%   Update the duplicate administration. If   this  adminstration is
 1279%   up-to-date, each triples that _may_ have a duplicate is flagged.
 1280%   The predicate rdf/3 uses this administration to speedup checking
 1281%   for duplicate answers.
 1282%
 1283%   This predicate is normally  executed   from  a background thread
 1284%   named =__rdf_duplicate_detecter= which is created   when a query
 1285%   discovers that checking for duplicates becomes too expensive.
 1286
 1287
 1288                 /*******************************
 1289                 *    QUICK BINARY LOAD/SAVE    *
 1290                 *******************************/
 1291
 1292%!  rdf_save_db(+File) is det.
 1293%!  rdf_save_db(+File, +Graph) is det.
 1294%
 1295%   Save triples into File in a   quick-to-load binary format. If Graph
 1296%   is supplied only triples flagged to originate from that database
 1297%   are  added.  Files  created  this  way    can  be  loaded  using
 1298%   rdf_load_db/1.
 1299
 1300:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1301
 1302rdf_save_db(File) :-
 1303    current_prolog_flag(rdf_triple_format, Version),
 1304    setup_call_cleanup(
 1305        open(File, write, Out, [type(binary)]),
 1306        ( set_stream(Out, record_position(false)),
 1307          rdf_save_db_(Out, _, Version)
 1308        ),
 1309        close(Out)).
 1310
 1311
 1312rdf_save_db(File, Graph) :-
 1313    current_prolog_flag(rdf_triple_format, Version),
 1314    setup_call_cleanup(
 1315        open(File, write, Out, [type(binary)]),
 1316        ( set_stream(Out, record_position(false)),
 1317          rdf_save_db_(Out, Graph, Version)
 1318        ),
 1319        close(Out)).
 1320
 1321
 1322%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
 1323%
 1324%   Load triples from a  .trp  file   without  updating  the  source
 1325%   administration. Id is  handled  to   monitor  action.  Graphs is
 1326%   a list of graph-names encountered in File.
 1327
 1328rdf_load_db_no_admin(File, Id, Graphs) :-
 1329    open(File, read, In, [type(binary)]),
 1330    set_stream(In, record_position(false)),
 1331    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
 1332
 1333
 1334%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
 1335%
 1336%   Verify the loaded cache file and optionally fix the modification
 1337%   time (new versions save this along with the snapshot).
 1338%
 1339%   @tbd    What to do if there is a cache mismatch? Delete the loaded
 1340%           graphs and fail?
 1341
 1342check_loaded_cache(DB, [DB], _Modified) :- !.
 1343check_loaded_cache(DB, Graphs, _) :-
 1344    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
 1345
 1346
 1347%!  rdf_load_db(+File) is det.
 1348%
 1349%   Load triples from a file created using rdf_save_db/2.
 1350
 1351rdf_load_db(File) :-
 1352    uri_file_name(URL, File),
 1353    rdf_load_db_no_admin(File, URL, _Graphs).
 1354
 1355
 1356                 /*******************************
 1357                 *          LOADING RDF         *
 1358                 *******************************/
 1359
 1360:- multifile
 1361    rdf_open_hook/8,
 1362    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
 1363    rdf_load_stream/3,              % +Format, +Stream, +Options
 1364    rdf_file_type/2,                % ?Extension, ?Format
 1365    rdf_storage_encoding/2,         % ?Extension, ?Encoding
 1366    url_protocol/1.                 % ?Protocol
 1367
 1368%!  rdf_load(+FileOrList) is det.
 1369%
 1370%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
 1371
 1372%!  rdf_load(+FileOrList, :Options) is det.
 1373%
 1374%   Load RDF data. Options provides   additional processing options.
 1375%   Defined options are:
 1376%
 1377%       * blank_nodes(+ShareMode)
 1378%       How to handle equivalent blank nodes.  If =share= (default),
 1379%       equivalent blank nodes are shared in the same resource.
 1380%
 1381%       * base_uri(+URI)
 1382%       URI that is used for rdf:about="" and other RDF constructs
 1383%       that are relative to the base uri.  Default is the source
 1384%       URL.
 1385%
 1386%       * concurrent(+Jobs)
 1387%       If FileOrList is a list of files, process the input files
 1388%       using Jobs threads concurrently.  Default is the mininum
 1389%       of the number of cores and the number of inputs.  Higher
 1390%       values can be useful when loading inputs from (slow)
 1391%       network connections.  Using 1 (one) does not use
 1392%       separate worker threads.
 1393%
 1394%       * format(+Format)
 1395%       Specify the source format explicitly. Normally this is
 1396%       deduced from the filename extension or the mime-type. The
 1397%       core library understands the formats xml (RDF/XML) and
 1398%       triples (internal quick load and cache format).  Plugins,
 1399%       such as library(semweb/turtle) extend the set of recognised
 1400%       extensions.
 1401%
 1402%       * graph(?Graph)
 1403%       Named graph in which to load the data.  It is *not* allowed
 1404%       to load two sources into the same named graph.  If Graph is
 1405%       unbound, it is unified to the graph into which the data is
 1406%       loaded.  The default graph is a =|file://|= URL when loading
 1407%       a file or, if the specification is a URL, its normalized
 1408%       version without the optional _|#fragment|_.
 1409%
 1410%       * if(Condition)
 1411%       When to load the file. One of =true=, =changed= (default) or
 1412%       =not_loaded=.
 1413%
 1414%       * modified(-Modified)
 1415%       Unify Modified with one of =not_modified=, cached(File),
 1416%       last_modified(Stamp) or =unknown=.
 1417%
 1418%       * cache(Bool)
 1419%       If =false=, do not use or create a cache file.
 1420%
 1421%       * register_namespaces(Bool)
 1422%       If =true= (default =false=), register =xmlns= namespace
 1423%       declarations or Turtle =|@prefix|= prefixes using
 1424%       rdf_register_prefix/3 if there is no conflict.
 1425%
 1426%       * silent(+Bool)
 1427%       If =true=, the message reporting completion is printed using
 1428%       level =silent=. Otherwise the level is =informational=. See
 1429%       also print_message/2.
 1430%
 1431%       * prefixes(-Prefixes)
 1432%       Returns the prefixes defined in the source   data file as a list
 1433%       of pairs.
 1434%
 1435%       * multifile(+Boolean)
 1436%       Indicate that the addressed graph may be populated with
 1437%       triples from multiple sources. This disables caching and
 1438%       avoids that an rdf_load/2 call affecting the specified
 1439%       graph cleans the graph.
 1440%
 1441%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
 1442%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
 1443%   load data from other formats and   locations  using plugins. The
 1444%   full set of plugins relevant to   support  different formats and
 1445%   locations is below:
 1446%
 1447%     ==
 1448%     :- use_module(library(semweb/turtle)).        % Turtle and TriG
 1449%     :- use_module(library(semweb/rdf_ntriples)).
 1450%     :- use_module(library(semweb/rdf_zlib_plugin)).
 1451%     :- use_module(library(semweb/rdf_http_plugin)).
 1452%     :- use_module(library(http/http_ssl_plugin)).
 1453%     ==
 1454%
 1455%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
 1456%           library(semweb/rdf_cache)
 1457
 1458:- dynamic
 1459    rdf_loading/3.                          % Graph, Queue, Thread
 1460
 1461rdf_load(Spec) :-
 1462    rdf_load(Spec, []).
 1463
 1464:- if(\+current_predicate(concurrent/3)). 1465concurrent(_, Goals, _) :-
 1466    forall(member(G, Goals), call(G)).
 1467:- endif. 1468
 1469% Note that we kill atom garbage collection.  This improves performance
 1470% with about 15% loading the LUBM Univ_50 benchmark.
 1471
 1472rdf_load(Spec, M:Options) :-
 1473    must_be(list, Options),
 1474    current_prolog_flag(agc_margin, Old),
 1475    setup_call_cleanup(
 1476        set_prolog_flag(agc_margin, 0),
 1477        rdf_load_noagc(Spec, M, Options),
 1478        set_prolog_flag(agc_margin, Old)).
 1479
 1480rdf_load_noagc(List, M, Options) :-
 1481    is_list(List),
 1482    !,
 1483    flatten(List, Inputs),          % Compatibility: allow nested lists
 1484    maplist(must_be(ground), Inputs),
 1485    length(Inputs, Count),
 1486    load_jobs(Count, Jobs, Options),
 1487    (   Jobs =:= 1
 1488    ->  forall(member(Spec, Inputs),
 1489               rdf_load_one(Spec, M, Options))
 1490    ;   maplist(load_goal(Options, M), Inputs, Goals),
 1491        concurrent(Jobs, Goals, [])
 1492    ).
 1493rdf_load_noagc(One, M, Options) :-
 1494    must_be(ground, One),
 1495    rdf_load_one(One, M, Options).
 1496
 1497load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
 1498
 1499load_jobs(_, Jobs, Options) :-
 1500    option(concurrent(Jobs), Options),
 1501    !,
 1502    must_be(positive_integer, Jobs).
 1503load_jobs(Count, Jobs, _) :-
 1504    current_prolog_flag(cpu_count, CPUs),
 1505    CPUs > 0,
 1506    !,
 1507    Jobs is max(1, min(CPUs, Count)).
 1508load_jobs(_, 1, _).
 1509
 1510
 1511rdf_load_one(Spec, M, Options) :-
 1512    source_url(Spec, Protocol, SourceURL),
 1513    load_graph(SourceURL, Graph, Options),
 1514    setup_call_cleanup(
 1515        with_mutex(rdf_load_file,
 1516                   rdf_start_load(SourceURL, Loading)),
 1517        rdf_load_file(Loading, Spec, SourceURL, Protocol,
 1518                      Graph, M, Options),
 1519        rdf_end_load(Loading)).
 1520
 1521%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
 1522%!  rdf_end_load(+WhatToDo) is det.
 1523%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
 1524%!                +Module, +Options) is det.
 1525%
 1526%   Of these three predicates, rdf_load_file/7   does the real work.
 1527%   The others deal with the  possibility   that  the graph is being
 1528%   loaded by another thread. In that case,   we  wait for the other
 1529%   thread to complete the work.
 1530%
 1531%   @tbd    What if both threads disagree on what is loaded into the
 1532%           graph?
 1533%   @see    Code is modelled closely after how concurrent loading
 1534%           is handled in SWI-Prolog's boot/init.pl
 1535
 1536rdf_start_load(SourceURL, queue(Queue)) :-
 1537    rdf_loading(SourceURL, Queue, LoadThread),
 1538    \+ thread_self(LoadThread),
 1539    !,
 1540    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
 1541          [ SourceURL, LoadThread]).
 1542rdf_start_load(SourceURL, Ref) :-
 1543    thread_self(Me),
 1544    message_queue_create(Queue),
 1545    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
 1546
 1547rdf_end_load(queue(_)) :- !.
 1548rdf_end_load(Ref) :-
 1549    clause(rdf_loading(_, Queue, _), _, Ref),
 1550    erase(Ref),
 1551    thread_send_message(Queue, done),
 1552    message_queue_destroy(Queue).
 1553
 1554rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
 1555    !,
 1556    catch(thread_get_message(Queue, _), _, true).
 1557rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
 1558    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
 1559    statistics(cputime, T0),
 1560    rdf_open_input(SourceURL, Protocol, Graph,
 1561                   In, Cleanup, Modified, Format, Options),
 1562    supported_format(Format, Cleanup),
 1563    return_modified(Modified, Options),
 1564    (   Modified == not_modified
 1565    ->  Action = none
 1566    ;   Modified = cached(CacheFile)
 1567    ->  do_unload(Graph),
 1568        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
 1569        check_loaded_cache(Graph, Graphs, Modified),
 1570        Action = load
 1571    ;   option(base_uri(BaseURI), Options, Graph),
 1572        (   var(BaseURI)
 1573        ->  BaseURI = SourceURL
 1574        ;   true
 1575        ),
 1576        once(phrase(derived_options(Options, NSList), Extra)),
 1577        merge_options([ base_uri(BaseURI),
 1578                        graph(Graph),
 1579                        format(Format)
 1580                      | Extra
 1581                      ], Options, RDFOptions),
 1582        (   option(multifile(true), Options)
 1583        ->  true
 1584        ;   do_unload(Graph)
 1585        ),
 1586        graph_modified(Modified, ModifiedStamp),
 1587        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
 1588        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
 1589                     Cleanup),
 1590        save_cache(Graph, SourceURL, Options),
 1591        register_file_prefixes(NSList),
 1592        format_action(Format, Action)
 1593    ),
 1594    rdf_statistics_(triples(Graph, Triples)),
 1595    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
 1596
 1597supported_format(Format, _Cleanup) :-
 1598    rdf_file_type(_, Format),
 1599    !.
 1600supported_format(Format, Cleanup) :-
 1601    call(Cleanup),
 1602    existence_error(rdf_format_plugin, Format).
 1603
 1604format_action(triples, load) :- !.
 1605format_action(_, parsed).
 1606
 1607save_cache(Graph, SourceURL, Options) :-
 1608    option(cache(true), Options, true),
 1609    rdf_cache_file(SourceURL, write, CacheFile),
 1610    !,
 1611    catch(save_cache(Graph, CacheFile), E,
 1612          print_message(warning, E)).
 1613save_cache(_, _, _).
 1614
 1615derived_options([], _) -->
 1616    [].
 1617derived_options([H|T], NSList) -->
 1618    (   {   H == register_namespaces(true)
 1619        ;   H == (register_namespaces = true)
 1620        }
 1621    ->  [ namespaces(NSList) ]
 1622    ;   []
 1623    ),
 1624    derived_options(T, NSList).
 1625
 1626graph_modified(last_modified(Stamp), Stamp).
 1627graph_modified(unknown, Stamp) :-
 1628    get_time(Stamp).
 1629
 1630return_modified(Modified, Options) :-
 1631    option(modified(M0), Options),
 1632    !,
 1633    M0 = Modified.
 1634return_modified(_, _).
 1635
 1636
 1637                 /*******************************
 1638                 *        INPUT HANDLING        *
 1639                 *******************************/
 1640
 1641/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1642This section deals with pluggable input sources.  The task of the input
 1643layer is
 1644
 1645    * Decide on the graph-name
 1646    * Decide on the source-location
 1647    * Decide whether loading is needed (if-modified)
 1648    * Decide on the serialization in the input
 1649
 1650The protocol must ensure minimal  overhead,   in  particular for network
 1651protocols. E.g. for HTTP we want to make a single call on the server and
 1652use If-modified-since to verify that we need not reloading this file.
 1653- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1654
 1655%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
 1656%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
 1657%
 1658%   Open an input source.
 1659%
 1660%   Options processed:
 1661%
 1662%       * graph(Graph)
 1663%       * db(Graph)
 1664%       * if(Condition)
 1665%       * cache(Cache)
 1666%       * format(Format)
 1667%
 1668%   @param  Modified is one of =not_modified=, last_modified(Time),
 1669%           cached(CacheFile) or =unknown=
 1670
 1671rdf_open_input(SourceURL, Protocol, Graph,
 1672               Stream, Cleanup, Modified, Format, Options) :-
 1673    (   option(multifile(true), Options)
 1674    ->  true
 1675    ;   option(if(If), Options, changed),
 1676        (   If == true
 1677        ->  true
 1678        ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
 1679        ->  true
 1680        ;   option(cache(true), Options, true),
 1681            rdf_cache_file(SourceURL, read, CacheFile)
 1682        ->  time_file(CacheFile, HaveModified)
 1683        ;   true
 1684        )
 1685    ),
 1686    option(format(Format), Options, _),
 1687    open_input_if_modified(Protocol, SourceURL, HaveModified,
 1688                           Stream, Cleanup, Modified0, Format, Options),
 1689    (   Modified0 == not_modified
 1690    ->  (   nonvar(CacheFile)
 1691        ->  Modified = cached(CacheFile)
 1692        ;   Modified = not_modified
 1693        )
 1694    ;   Modified = Modified0
 1695    ).
 1696
 1697
 1698%!  source_url(+Spec, -Class, -SourceURL) is det.
 1699%
 1700%   Determine class and url of the source.  Class is one of
 1701%
 1702%       * stream(Stream)
 1703%       * file
 1704%       * a url-protocol (e.g., =http=)
 1705
 1706source_url(stream(In), stream(In), SourceURL) :-
 1707    !,
 1708    (   stream_property(In, file_name(File))
 1709    ->  to_url(File, SourceURL)
 1710    ;   gensym('stream://', SourceURL)
 1711    ).
 1712source_url(Stream, Class, SourceURL) :-
 1713    is_stream(Stream),
 1714    !,
 1715    source_url(stream(Stream), Class, SourceURL).
 1716source_url(Spec, Protocol, SourceURL) :-
 1717    compound(Spec),
 1718    !,
 1719    source_file(Spec, Protocol, SourceURL).
 1720source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
 1721    uri_file_name(FileURL, File),
 1722    !,
 1723    source_file(File, Protocol, SourceURL).
 1724source_url(SourceURL0, Protocol, SourceURL) :-
 1725    is_url(SourceURL0, Protocol, SourceURL),
 1726    !.
 1727source_url(File, Protocol, SourceURL) :-
 1728    source_file(File, Protocol, SourceURL).
 1729
 1730source_file(Spec, file(SExt), SourceURL) :-
 1731    findall(Ext, valid_extension(Ext), Exts),
 1732    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
 1733    storage_extension(_Plain, SExt, File),
 1734    uri_file_name(SourceURL, File).
 1735
 1736to_url(URL, URL) :-
 1737    uri_is_global(URL),
 1738    !.
 1739to_url(File, URL) :-
 1740    absolute_file_name(File, Path),
 1741    uri_file_name(URL, Path).
 1742
 1743storage_extension(Plain, SExt, File) :-
 1744    file_name_extension(Plain, SExt, File),
 1745    SExt \== '',
 1746    rdf_storage_encoding(SExt, _),
 1747    !.
 1748storage_extension(File, '', File).
 1749
 1750%!  load_graph(+SourceURL, -Graph, +Options) is det.
 1751%
 1752%   Graph is the graph into which  we   load  the  data. Tries these
 1753%   options:
 1754%
 1755%     1. The graph(Graph) option
 1756%     2. The db(Graph) option (backward compatibility)
 1757%     3. The base_uri(BaseURI) option
 1758%     4. The source URL
 1759
 1760load_graph(_Source, Graph, Options) :-
 1761    option(multifile(true), Options),
 1762    !,
 1763    (   (   option(graph(Graph), Options)
 1764        ->  true
 1765        ;   option(db(Graph), Options)
 1766        ),
 1767        ground(Graph)
 1768    ->  true
 1769    ;   throw(error(existence_error(option, graph),
 1770                    context(_, "rdf_load/2: using multifile requires graph")))
 1771    ).
 1772load_graph(Source, Graph, Options) :-
 1773    (   option(graph(Graph), Options)
 1774    ;   option(db(Graph), Options)
 1775    ),
 1776    !,
 1777    load_graph2(Source, Graph, Options).
 1778load_graph(Source, Graph, Options) :-
 1779    load_graph2(Source, Graph, Options).
 1780
 1781load_graph2(_, Graph, _) :-
 1782    ground(Graph),
 1783    !.
 1784load_graph2(_Source, Graph, Options) :-
 1785    option(base_uri(Graph), Options),
 1786    Graph \== [],
 1787    ground(Graph),
 1788    !.
 1789load_graph2(Source, Graph, _) :-
 1790    load_graph(Source, Graph).
 1791
 1792load_graph(SourceURL, BaseURI) :-
 1793    file_name_extension(BaseURI, Ext, SourceURL),
 1794    rdf_storage_encoding(Ext, _),
 1795    !.
 1796load_graph(SourceURL, SourceURL).
 1797
 1798
 1799open_input_if_modified(stream(In), SourceURL, _, In, true,
 1800                       unknown, Format, _) :-
 1801    !,
 1802    (   var(Format)
 1803    ->  guess_format(SourceURL, Format)
 1804    ;   true
 1805    ).
 1806open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
 1807                       Modified, Format, _) :-
 1808    !,
 1809    uri_file_name(SourceURL, File),
 1810    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
 1811    time_file(File, LastModified),
 1812    (   nonvar(HaveModified),
 1813        HaveModified >= LastModified
 1814    ->  Modified = not_modified,
 1815        Cleanup = true
 1816    ;   storage_open(SExt, File, Stream, Cleanup),
 1817        Modified = last_modified(LastModified),
 1818        (   var(Format)
 1819        ->  guess_format(Plain, Format)
 1820        ;   true
 1821        )
 1822    ).
 1823open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
 1824                       Modified, Format, Options) :-
 1825    !,
 1826    open_input_if_modified(file(''), SourceURL, HaveModified,
 1827                           Stream, Cleanup,
 1828                           Modified, Format, Options).
 1829open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1830                       Modified, Format, Options) :-
 1831    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1832                  Modified, Format, Options).
 1833
 1834guess_format(File, Format) :-
 1835    file_name_extension(_, Ext, File),
 1836    (   rdf_file_type(Ext, Format)
 1837    ->  true
 1838    ;   Format = xml,
 1839        print_message(warning, rdf(guess_format(Ext)))
 1840    ).
 1841
 1842%!  storage_open(+Extension, +File, -Stream, -Cleanup)
 1843%
 1844%   Open the low-level storage. Note  that   the  file  is opened as
 1845%   binary. This is the same  as   for  HTTP  resources. The correct
 1846%   encoding will be set by the XML parser or the Turtle parser.
 1847
 1848storage_open('', File, Stream, close(Stream)) :-
 1849    !,
 1850    open(File, read, Stream, [type(binary)]).
 1851storage_open(Ext, File, Stream, Cleanup) :-
 1852    rdf_storage_encoding(Ext, Encoding),
 1853    rdf_open_decode(Encoding, File, Stream, Cleanup).
 1854
 1855valid_extension(Ext) :-
 1856    rdf_file_type(Ext, _).
 1857valid_extension(Ext) :-
 1858    rdf_storage_encoding(Ext, _).
 1859
 1860%!  is_url(@Term, -Scheme, -URL) is semidet.
 1861%
 1862%   True if Term is an atom denoting URL of the given Scheme. URL is
 1863%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
 1864%   identifier (#fragment) is removed. This  predicate only succeeds
 1865%   if  the  scheme  is   registered    using   the  multifile  hook
 1866%   url_protocol/1.
 1867
 1868is_url(URL, Scheme, FetchURL) :-
 1869    atom(URL),
 1870    uri_is_global(URL),
 1871    uri_normalized(URL, URL1),              % case normalization
 1872    uri_components(URL1, Components),
 1873    uri_data(scheme, Components, Scheme0),
 1874    url_protocol(Scheme0),
 1875    !,
 1876    Scheme = Scheme0,
 1877    uri_data(fragment, Components, _, Components1),
 1878    uri_components(FetchURL, Components1).
 1879
 1880url_protocol(file).                     % built-in
 1881
 1882%!  rdf_file_type(+Extension, -Format) is semidet.
 1883%
 1884%   True if Format  is  the  format   belonging  to  the  given file
 1885%   extension.  This predicate is multifile and can thus be extended
 1886%   by plugins.
 1887
 1888rdf_file_type(xml,   xml).
 1889rdf_file_type(rdf,   xml).
 1890rdf_file_type(rdfs,  xml).
 1891rdf_file_type(owl,   xml).
 1892rdf_file_type(htm,   xhtml).
 1893rdf_file_type(html,  xhtml).
 1894rdf_file_type(xhtml, xhtml).
 1895rdf_file_type(trp,   triples).
 1896
 1897
 1898%!  rdf_file_encoding(+Extension, -Format) is semidet.
 1899%
 1900%   True if Format describes the storage encoding of file.
 1901
 1902rdf_storage_encoding('', plain).
 1903
 1904
 1905%!  rdf_load_stream(+Format, +Stream, :Options)
 1906%
 1907%   Load RDF data from Stream.
 1908%
 1909%   @tbd    Handle mime-types?
 1910
 1911rdf_load_stream(xml, Stream, Options) :-
 1912    !,
 1913    graph(Options, Graph),
 1914    rdf_transaction(load_stream(Stream, Options),
 1915                    parse(Graph)).
 1916rdf_load_stream(xhtml, Stream, M:Options) :-
 1917    !,
 1918    graph(Options, Graph),
 1919    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
 1920                    parse(Graph)).
 1921rdf_load_stream(triples, Stream, Options) :-
 1922    !,
 1923    graph(Options, Graph),
 1924    rdf_load_db_(Stream, Graph, _Graphs).
 1925
 1926load_stream(Stream, M:Options) :-
 1927    process_rdf(Stream, assert_triples, M:Options),
 1928    option(graph(Graph), Options),
 1929    rdf_graph_clear_modified_(Graph).
 1930
 1931
 1932%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
 1933
 1934report_loaded(none, _, _, _, _, _) :- !.
 1935report_loaded(Action, Source, DB, Triples, T0, Options) :-
 1936    statistics(cputime, T1),
 1937    Time is T1 - T0,
 1938    (   option(silent(true), Options)
 1939    ->  Level = silent
 1940    ;   Level = informational
 1941    ),
 1942    print_message(Level,
 1943                  rdf(loaded(Action, Source, DB, Triples, Time))).
 1944
 1945
 1946%!  rdf_unload(+Source) is det.
 1947%
 1948%   Identify the graph loaded from Source and use rdf_unload_graph/1
 1949%   to erase this graph.
 1950%
 1951%   @deprecated     For compatibility, this predicate also accepts a
 1952%                   graph name instead of a source specification.
 1953%                   Please update your code to use
 1954%                   rdf_unload_graph/1.
 1955
 1956rdf_unload(Spec) :-
 1957    source_url(Spec, _Protocol, SourceURL),
 1958    rdf_graph_source_(Graph, SourceURL, _),
 1959    !,
 1960    rdf_unload_graph(Graph).
 1961rdf_unload(Graph) :-
 1962    atom(Graph),
 1963    rdf_graph(Graph),
 1964    !,
 1965    warn_deprecated_unload(Graph),
 1966    rdf_unload_graph(Graph).
 1967rdf_unload(_).
 1968
 1969:- dynamic
 1970    warned/0. 1971
 1972warn_deprecated_unload(_) :-
 1973    warned,
 1974    !.
 1975warn_deprecated_unload(Graph) :-
 1976    assertz(warned),
 1977    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
 1978
 1979
 1980%!  rdf_unload_graph(+Graph) is det.
 1981%
 1982%   Remove Graph from the RDF store.  Succeeds silently if the named
 1983%   graph does not exist.
 1984
 1985rdf_unload_graph(Graph) :-
 1986    must_be(atom, Graph),
 1987    (   rdf_graph(Graph)
 1988    ->  rdf_transaction(do_unload(Graph), unload(Graph))
 1989    ;   true
 1990    ).
 1991
 1992do_unload(Graph) :-
 1993    (   rdf_graph_(Graph, Triples),
 1994        Triples > 0
 1995    ->  rdf_retractall(_,_,_,Graph)
 1996    ;   true
 1997    ),
 1998    rdf_destroy_graph(Graph).
 1999
 2000                 /*******************************
 2001                 *         GRAPH QUERIES        *
 2002                 *******************************/
 2003
 2004%!  rdf_create_graph(+Graph) is det.
 2005%
 2006%   Create an RDF graph without triples.   Succeeds  silently if the
 2007%   graph already exists.
 2008
 2009
 2010%!  rdf_graph(?Graph) is nondet.
 2011%
 2012%   True when Graph is an existing graph.
 2013
 2014rdf_graph(Graph) :-
 2015    rdf_graph_(Graph, _Triples).
 2016
 2017%!  rdf_source(?Graph, ?SourceURL) is nondet.
 2018%
 2019%   True if named Graph is loaded from SourceURL.
 2020%
 2021%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
 2022
 2023rdf_source(Graph, SourceURL) :-
 2024    rdf_graph(Graph),
 2025    rdf_graph_source_(Graph, SourceURL, _Modified).
 2026
 2027%!  rdf_source(?Source)
 2028%
 2029%   True if Source is a loaded source.
 2030%
 2031%   @deprecated     Use rdf_graph/1 or rdf_source/2.
 2032
 2033rdf_source(SourceURL) :-
 2034    rdf_source(_Graph, SourceURL).
 2035
 2036%!  rdf_make
 2037%
 2038%   Reload all loaded files that have been modified since the last
 2039%   time they were loaded.
 2040
 2041rdf_make :-
 2042    findall(Source-Graph, modified_graph(Source, Graph), Modified),
 2043    forall(member(Source-Graph, Modified),
 2044           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
 2045                 print_message(error, E))).
 2046
 2047modified_graph(SourceURL, Graph) :-
 2048    rdf_graph(Graph),
 2049    rdf_graph_source_(Graph, SourceURL, Modified),
 2050    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
 2051    Modified > 0.
 2052
 2053%!  rdf_graph_property(?Graph, ?Property) is nondet.
 2054%
 2055%   True when Property is a property of Graph.  Defined properties
 2056%   are:
 2057%
 2058%       * hash(Hash)
 2059%       Hash is the (MD5-)hash for the content of Graph.
 2060%       * modified(Boolean)
 2061%       True if the graph is modified since it was loaded or
 2062%       rdf_set_graph/2 was called with modified(false).
 2063%       * source(Source)
 2064%       The graph is loaded from the Source (a URL)
 2065%       * source_last_modified(?Time)
 2066%       Time is the last-modified timestamp of Source at the moment
 2067%       the graph was loaded from Source.
 2068%       * triples(Count)
 2069%       True when Count is the number of triples in Graph.
 2070%
 2071%    Additional graph properties can be added  by defining rules for
 2072%    the multifile predicate  property_of_graph/2.   Currently,  the
 2073%    following extensions are defined:
 2074%
 2075%       - library(semweb/rdf_persistency)
 2076%         - persistent(Boolean)
 2077%           Boolean is =true= if the graph is persistent.
 2078
 2079rdf_graph_property(Graph, Property) :-
 2080    rdf_graph(Graph),
 2081    property_of_graph(Property, Graph).
 2082
 2083:- multifile
 2084    property_of_graph/2. 2085
 2086property_of_graph(hash(Hash), Graph) :-
 2087    rdf_md5(Graph, Hash).
 2088property_of_graph(modified(Boolean), Graph) :-
 2089    rdf_graph_modified_(Graph, Boolean, _).
 2090property_of_graph(source(URL), Graph) :-
 2091    rdf_graph_source_(Graph, URL, _).
 2092property_of_graph(source_last_modified(Time), Graph) :-
 2093    rdf_graph_source_(Graph, _, Time),
 2094    Time > 0.0.
 2095property_of_graph(triples(Count), Graph) :-
 2096    rdf_graph_(Graph, Count).
 2097
 2098%!  rdf_set_graph(+Graph, +Property) is det.
 2099%
 2100%   Set properties of Graph.  Defined properties are:
 2101%
 2102%       * modified(false)
 2103%       Set the modified state of Graph to false.
 2104
 2105rdf_set_graph(Graph, modified(Modified)) :-
 2106    must_be(oneof([false]), Modified),
 2107    rdf_graph_clear_modified_(Graph).
 2108
 2109
 2110%!  save_cache(+DB, +Cache) is det.
 2111%
 2112%   Save triples belonging to DB in the file Cache.
 2113
 2114save_cache(DB, Cache) :-
 2115    current_prolog_flag(rdf_triple_format, Version),
 2116    setup_call_cleanup(
 2117        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
 2118        rdf_save_db_(CacheStream, DB, Version),
 2119        close(CacheStream)).
 2120
 2121%!  assert_triples(+Triples, +Source)
 2122%
 2123%   Assert a list of triples into the database. Foir security
 2124%   reasons we check we aren't inserting anything but nice RDF
 2125%   triples.
 2126
 2127assert_triples([], _).
 2128assert_triples([rdf(S,P,O)|T], DB) :-
 2129    !,
 2130    rdf_assert(S, P, O, DB),
 2131    assert_triples(T, DB).
 2132assert_triples([H|_], _) :-
 2133    throw(error(type_error(rdf_triple, H), _)).
 2134
 2135
 2136                 /*******************************
 2137                 *             RESET            *
 2138                 *******************************/
 2139
 2140%!  rdf_reset_db
 2141%
 2142%   Remove all triples from the RDF database and reset all its
 2143%   statistics.
 2144%
 2145%   @bug    This predicate checks for active queries, but this check is
 2146%           not properly synchronized and therefore the use of this
 2147%           predicate is unsafe in multi-threaded contexts. It is
 2148%           mainly used to run functionality tests that need to
 2149%           start with an empty database.
 2150
 2151rdf_reset_db :-
 2152    reset_gensym('_:genid'),
 2153    rdf_reset_db_.
 2154
 2155
 2156                 /*******************************
 2157                 *           SAVE RDF           *
 2158                 *******************************/
 2159
 2160%!  rdf_save(+Out) is det.
 2161%
 2162%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
 2163
 2164%!  rdf_save(+Out, :Options) is det.
 2165%
 2166%   Write RDF data as RDF/XML. Options is a list of one or more of
 2167%   the following options:
 2168%
 2169%           * graph(+Graph)
 2170%           Save only triples associated to the given named Graph.
 2171%
 2172%           * anon(Bool)
 2173%           If =false= (default =true=) do not save blank nodes that do
 2174%           not appear (indirectly) as object of a named resource.
 2175%
 2176%           * base_uri(URI)
 2177%           BaseURI used. If present, all URIs that can be
 2178%           represented relative to this base are written using
 2179%           their shorthand.  See also =write_xml_base= option.
 2180%
 2181%           * convert_typed_literal(:Convertor)
 2182%           Call Convertor(-Type, -Content, +RDFObject), providing
 2183%           the opposite for the convert_typed_literal option of
 2184%           the RDF parser.
 2185%
 2186%           * document_language(+Lang)
 2187%           Initial =|xml:lang|= saved with rdf:RDF element.
 2188%
 2189%           * encoding(Encoding)
 2190%           Encoding for the output.  Either utf8 or iso_latin_1.
 2191%
 2192%           * inline(+Bool)
 2193%           If =true= (default =false=), inline resources when
 2194%           encountered for the first time. Normally, only bnodes
 2195%           are handled this way.
 2196%
 2197%           * namespaces(+List)
 2198%           Explicitly specify saved namespace declarations. See
 2199%           rdf_save_header/2 option namespaces for details.
 2200%
 2201%           * sorted(+Boolean)
 2202%           If =true= (default =false=), emit subjects sorted on
 2203%           the full URI.  Useful to make file comparison easier.
 2204%
 2205%           * write_xml_base(Bool)
 2206%           If =false=, do _not_ include the =|xml:base|=
 2207%           declaration that is written normally when using the
 2208%           =base_uri= option.
 2209%
 2210%           * xml_attributes(+Bool)
 2211%           If =false= (default =true=), never use xml attributes to
 2212%           save plain literal attributes, i.e., always used an XML
 2213%           element as in =|<name>Joe</name>|=.
 2214%
 2215%   @param Out      Location to save the data.  This can also be a
 2216%                   file-url (=|file://path|=) or a stream wrapped
 2217%                   in a term stream(Out).
 2218%   @see rdf_save_db/1
 2219
 2220:- thread_local
 2221    named_anon/2,                   % +Resource, -Id
 2222    inlined/1.                      % +Resource
 2223
 2224rdf_save(File) :-
 2225    rdf_save2(File, []).
 2226
 2227rdf_save(Spec, M:Options0) :-
 2228    is_list(Options0),
 2229    !,
 2230    meta_options(save_meta_option, M:Options0, Options),
 2231    to_file(Spec, File),
 2232    rdf_save2(File, Options).
 2233rdf_save(Spec, _:DB) :-
 2234    atom(DB),                      % backward compatibility
 2235    !,
 2236    to_file(Spec, File),
 2237    rdf_save2(File, [graph(DB)]).
 2238
 2239save_meta_option(convert_typed_literal).
 2240
 2241to_file(URL, File) :-
 2242    atom(URL),
 2243    uri_file_name(URL, File),
 2244    !.
 2245to_file(File, File).
 2246
 2247rdf_save2(File, Options) :-
 2248    option(encoding(Encoding), Options, utf8),
 2249    valid_encoding(Encoding),
 2250    open_output(File, Encoding, Out, Close),
 2251    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
 2252    flag(rdf_db_saved_triples, OSavedTriples, 0),
 2253    call_cleanup(rdf_do_save(Out, Options),
 2254                 Reason,
 2255                 cleanup_save(Reason,
 2256                              File,
 2257                              OSavedSubjects,
 2258                              OSavedTriples,
 2259                              Close)).
 2260
 2261open_output(stream(Out), Encoding, Out, Cleanup) :-
 2262    !,
 2263    stream_property(Out, encoding(Old)),
 2264    (   (   Old == Encoding
 2265        ;   Old == wchar_t          % Internal encoding
 2266        )
 2267    ->  Cleanup = true
 2268    ;   set_stream(Out, encoding(Encoding)),
 2269        Cleanup = set_stream(Out, encoding(Old))
 2270    ).
 2271open_output(File, Encoding, Out,
 2272            close(Out)) :-
 2273    open(File, write, Out, [encoding(Encoding)]).
 2274
 2275valid_encoding(Enc) :-
 2276    (   xml_encoding_name(Enc, _)
 2277    ->  true
 2278    ;   throw(error(domain_error(encoding, Enc), _))
 2279    ).
 2280
 2281
 2282cleanup_save(Reason,
 2283             File,
 2284             OSavedSubjects,
 2285             OSavedTriples,
 2286             Close) :-
 2287    call(Close),
 2288    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
 2289    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
 2290    retractall(named_anon(_, _)),
 2291    retractall(inlined(_)),
 2292    (   Reason == exit
 2293    ->  print_message(informational,
 2294                      rdf(saved(File, SavedSubjects, SavedTriples)))
 2295    ;   format(user_error, 'Reason = ~w~n', [Reason])
 2296    ).
 2297
 2298rdf_do_save(Out, Options0) :-
 2299    rdf_save_header(Out, Options0, Options),
 2300    graph(Options, DB),
 2301    (   option(sorted(true), Options, false)
 2302    ->  (   var(DB)
 2303        ->  setof(Subject, rdf_subject(Subject), Subjects)
 2304        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2305            sort(SubjectList, Subjects)
 2306        ),
 2307        forall(member(Subject, Subjects),
 2308               rdf_save_non_anon_subject(Out, Subject, Options))
 2309    ;   forall(rdf_subject_in_graph(Subject, DB),
 2310               rdf_save_non_anon_subject(Out, Subject, Options))
 2311    ),
 2312    rdf_save_footer(Out),
 2313    !.                                  % dubious cut; without the
 2314                                        % cleanup handlers isn't called!?
 2315
 2316%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
 2317%
 2318%   True when Subject is a subject in the   graph  DB. If DB is unbound,
 2319%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
 2320%   enumerate all subjects and filter by graph or collect all triples of
 2321%   the graph and get the unique subjects.   The  first is attractive if
 2322%   the graph is big compared  to  the   DB,  also  because  it does not
 2323%   require memory, the second if the graph is small compared to the DB.
 2324
 2325rdf_subject_in_graph(Subject, DB) :-
 2326    var(DB),
 2327    !,
 2328    rdf_subject(Subject).
 2329rdf_subject_in_graph(Subject, DB) :-
 2330    rdf_statistics(triples(AllTriples)),
 2331    rdf_graph_property(DB, triples(DBTriples)),
 2332    DBTriples > AllTriples // 10,
 2333    !,
 2334    rdf_resource(Subject),
 2335    (   rdf(Subject, _, _, DB:_)
 2336    ->  true
 2337    ).
 2338rdf_subject_in_graph(Subject, DB) :-
 2339    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2340    list_to_set(SubjectList, Subjects),
 2341    member(Subject, Subjects).
 2342
 2343
 2344graph(Options0, DB) :-
 2345    strip_module(Options0, _, Options),
 2346    (   memberchk(graph(DB0), Options)
 2347    ->  DB = DB0
 2348    ;   memberchk(db(DB0), Options)
 2349    ->  DB = DB0
 2350    ;   true                            % leave unbound
 2351    ).
 2352
 2353
 2354%!  rdf_save_header(+Fd, +Options)
 2355%
 2356%   Save XML document header, doctype and open the RDF environment.
 2357%   This predicate also sets up the namespace notation.
 2358%
 2359%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
 2360%   opening the rdf:RDF element with appropriate namespace
 2361%   declarations. It uses the primitives from section 3.5 to
 2362%   generate the required namespaces and desired short-name. Options
 2363%   is one of:
 2364%
 2365%     * graph(+URI)
 2366%     Only search for namespaces used in triples that belong to the
 2367%     given named graph.
 2368%
 2369%     * namespaces(+List)
 2370%     Where List is a list of namespace abbreviations. With this
 2371%     option, the expensive search for all namespaces that may be
 2372%     used by your data is omitted. The namespaces =rdf= and =rdfs=
 2373%     are added to the provided List. If a namespace is not
 2374%     declared, the resource is emitted in non-abreviated form.
 2375
 2376rdf_save_header(Out, Options) :-
 2377    rdf_save_header(Out, Options, _).
 2378
 2379rdf_save_header(Out, Options, OptionsOut) :-
 2380    is_list(Options),
 2381    !,
 2382    option(encoding(Enc), Options, utf8),
 2383    xml_encoding(Enc, Encoding),
 2384    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
 2385    format(Out, '<!DOCTYPE rdf:RDF [', []),
 2386    header_namespaces(Options, NSIdList),
 2387    nsmap(NSIdList, NsMap),
 2388    append(Options, [nsmap(NsMap)], OptionsOut),
 2389    forall(member(Id=URI, NsMap),
 2390           (   xml_quote_attribute(URI, NSText0, Enc),
 2391               xml_escape_parameter_entity(NSText0, NSText),
 2392               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
 2393           )),
 2394    format(Out, '~N]>~n~n', []),
 2395    format(Out, '<rdf:RDF', []),
 2396    (   member(Id, NSIdList),
 2397        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
 2398        fail
 2399    ;   true
 2400    ),
 2401    (   option(base_uri(Base), Options),
 2402        option(write_xml_base(true), Options, true)
 2403    ->  xml_quote_attribute(Base, BaseText, Enc),
 2404        format(Out, '~N    xml:base="~w"~n', [BaseText])
 2405    ;   true
 2406    ),
 2407    (   memberchk(document_language(Lang), Options)
 2408    ->  format(Out, '~N    xml:lang="~w"', [Lang])
 2409    ;   true
 2410    ),
 2411    format(Out, '>~n', []).
 2412rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
 2413    atom(FileRef),
 2414    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
 2415
 2416xml_encoding(Enc, Encoding) :-
 2417    (   xml_encoding_name(Enc, Encoding)
 2418    ->  true
 2419    ;   throw(error(domain_error(rdf_encoding, Enc), _))
 2420    ).
 2421
 2422xml_encoding_name(ascii,       'US-ASCII').
 2423xml_encoding_name(iso_latin_1, 'ISO-8859-1').
 2424xml_encoding_name(utf8,        'UTF-8').
 2425
 2426%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
 2427%
 2428%   Create a namespace-map that is compatible to xml_write/2
 2429%   for dealing with XML-Literals
 2430
 2431nsmap([], []).
 2432nsmap([Id|T0], [Id=URI|T]) :-
 2433    ns(Id, URI),
 2434    nsmap(T0, T).
 2435
 2436%!  xml_escape_parameter_entity(+In, -Out) is det.
 2437%
 2438%   Escape % as &#37; for entity declarations.
 2439
 2440xml_escape_parameter_entity(In, Out) :-
 2441    sub_atom(In, _, _, _, '%'),
 2442    !,
 2443    atom_codes(In, Codes),
 2444    phrase(escape_parent(Codes), OutCodes),
 2445    atom_codes(Out, OutCodes).
 2446xml_escape_parameter_entity(In, In).
 2447
 2448escape_parent([]) --> [].
 2449escape_parent([H|T]) -->
 2450    (   { H == 37 }
 2451    ->  "&#37;"
 2452    ;   [H]
 2453    ),
 2454    escape_parent(T).
 2455
 2456
 2457%!  header_namespaces(Options, -List)
 2458%
 2459%   Get namespaces we will define as entities
 2460
 2461header_namespaces(Options, List) :-
 2462    memberchk(namespaces(NSL0), Options),
 2463    !,
 2464    sort([rdf,rdfs|NSL0], List).
 2465header_namespaces(Options, List) :-
 2466    graph(Options, DB),
 2467    used_namespace_entities(List, DB).
 2468
 2469%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
 2470%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
 2471%
 2472%   List is a sorted list of  prefixes (namepaces) in Graph. Options
 2473%   defined are:
 2474%
 2475%       * filter(:Filter)
 2476%       optional Filter argument is used to filter the results. It
 2477%       is called with 3 additional arguments:
 2478%
 2479%           ==
 2480%           call(Filter, Where, Prefix, URI)
 2481%           ==
 2482%
 2483%       The Where argument gives the location of the prefix ans is
 2484%       one of =subject=, =predicate=, =object= or =type=. The
 2485%       Prefix argument is the potentionally new prefix and URI is
 2486%       the full URI that is being processed.
 2487%
 2488%       * expand(:Goal)
 2489%       Hook to generate the graph.  Called using
 2490%
 2491%           ==
 2492%           call(Goal,S,P,O,Graph)
 2493%           ==
 2494%
 2495%       * min_count(+Count)
 2496%       Only include prefixes that appear at least N times.  Default
 2497%       is 1. Declared prefixes are always returned if found at
 2498%       least one time.
 2499%
 2500%       * get_prefix(:GetPrefix)
 2501%       Predicate to extract the candidate prefix from an IRI.  Default
 2502%       is iri_xml_namespace/2.
 2503
 2504
 2505:- thread_local
 2506    graph_prefix/3. 2507:- meta_predicate
 2508    rdf_graph_prefixes(?, -, :). 2509
 2510rdf_graph_prefixes(Graph, List) :-
 2511    rdf_graph_prefixes(Graph, List, []).
 2512
 2513rdf_graph_prefixes(Graph, List, M:QOptions) :-
 2514    is_list(QOptions),
 2515    !,
 2516    meta_options(is_meta, M:QOptions, Options),
 2517    option(filter(Filter), Options, true),
 2518    option(expand(Expand), Options, rdf_db),
 2519    option(min_count(MinCount), Options, 1),
 2520    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
 2521    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
 2522                 retractall(graph_prefix(_,_,_))),
 2523    sort(Prefixes, List).
 2524rdf_graph_prefixes(Graph, List, M:Filter) :-
 2525    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
 2526
 2527is_meta(filter).
 2528is_meta(expand).
 2529is_meta(get_prefix).
 2530
 2531
 2532prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
 2533    (   call(Expand, S, P, O, Graph),
 2534        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
 2535        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
 2536        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
 2537        fail
 2538    ;   true
 2539    ),
 2540    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
 2541
 2542add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
 2543    \+ rdf_is_bnode(S),
 2544    call(GetPrefix, S, Full),
 2545    Full \== '',
 2546    !,
 2547    (   graph_prefix(Full, MinCount, _)
 2548    ->  true
 2549    ;   Filter == true
 2550    ->  add_ns(Full, Context)
 2551    ;   call(Filter, Where, Full, S)
 2552    ->  add_ns(Full, Context)
 2553    ;   true
 2554    ).
 2555add_ns(_, _, _, _, _, _).
 2556
 2557add_ns(Full, Context) :-
 2558    graph_prefix(Full, _, Contexts),
 2559    memberchk(Context, Contexts),
 2560    !.
 2561add_ns(Full, Context) :-
 2562    retract(graph_prefix(Full, C0, Contexts)),
 2563    !,
 2564    C1 is C0+1,
 2565    asserta(graph_prefix(Full, C1, [Context|Contexts])).
 2566add_ns(Full, _) :-
 2567    ns(_, Full),
 2568    !,
 2569    asserta(graph_prefix(Full, _, _)).
 2570add_ns(Full, Context) :-
 2571    asserta(graph_prefix(Full, 1, [Context])).
 2572
 2573
 2574add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
 2575    atom(O),
 2576    !,
 2577    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
 2578add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
 2579    atom(Type),
 2580    !,
 2581    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
 2582add_ns_obj(_, _, _, _, _).
 2583
 2584
 2585%!  used_namespace_entities(-List, ?Graph) is det.
 2586%
 2587%   Return the namespace aliases that are actually used in Graph. In
 2588%   addition, this predicate creates ns<N>   aliases  for namespaces
 2589%   used in predicates because RDF/XML cannot write predicates other
 2590%   than as an XML name.
 2591
 2592used_namespace_entities(List, Graph) :-
 2593    decl_used_predicate_ns(Graph),
 2594    used_namespaces(List, Graph).
 2595
 2596used_namespaces(List, DB) :-
 2597    rdf_graph_prefixes(DB, FullList),
 2598    ns_abbreviations(FullList, List0),
 2599    sort([rdf|List0], List).
 2600
 2601ns_abbreviations([], []).
 2602ns_abbreviations([H0|T0], [H|T]) :-
 2603    ns(H, H0),
 2604    !,
 2605    ns_abbreviations(T0, T).
 2606ns_abbreviations([_|T0], T) :-
 2607    ns_abbreviations(T0, T).
 2608
 2609
 2610/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 2611For every URL used as a predicate  we   *MUST*  define a namespace as we
 2612cannot use names holding /, :, etc. as XML identifiers.
 2613- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 2614
 2615:- thread_local
 2616    predicate_ns/2. 2617
 2618decl_used_predicate_ns(DB) :-
 2619    retractall(predicate_ns(_,_)),
 2620    (   rdf_current_predicate(P, DB),
 2621        decl_predicate_ns(P),
 2622        fail
 2623    ;   true
 2624    ).
 2625
 2626decl_predicate_ns(Pred) :-
 2627    predicate_ns(Pred, _),
 2628    !.
 2629decl_predicate_ns(Pred) :-
 2630    rdf_global_id(NS:Local, Pred),
 2631    xml_name(Local),
 2632    !,
 2633    assert(predicate_ns(Pred, NS)).
 2634decl_predicate_ns(Pred) :-
 2635    atom_codes(Pred, Codes),
 2636    append(NSCodes, LocalCodes, Codes),
 2637    xml_codes(LocalCodes),
 2638    !,
 2639    (   NSCodes \== []
 2640    ->  atom_codes(NS, NSCodes),
 2641        (   ns(Id, NS)
 2642        ->  assert(predicate_ns(Pred, Id))
 2643        ;   between(1, infinite, N),
 2644            atom_concat(ns, N, Id),
 2645            \+ ns(Id, _)
 2646        ->  rdf_register_ns(Id, NS),
 2647            print_message(informational,
 2648                          rdf(using_namespace(Id, NS)))
 2649        ),
 2650        assert(predicate_ns(Pred, Id))
 2651    ;   assert(predicate_ns(Pred, -)) % no namespace used
 2652    ).
 2653
 2654xml_codes([]).
 2655xml_codes([H|T]) :-
 2656    xml_code(H),
 2657    xml_codes(T).
 2658
 2659xml_code(X) :-
 2660    code_type(X, csym),
 2661    !.
 2662xml_code(0'-).                          % Match 0'-
 2663
 2664
 2665%!  rdf_save_footer(Out:stream) is det.
 2666%
 2667%   Finish XML generation and write the document footer.
 2668%
 2669%   @see rdf_save_header/2, rdf_save_subject/3.
 2670
 2671rdf_save_footer(Out) :-
 2672    retractall(named_anon(_, _)),
 2673    retractall(inlined(_)),
 2674    format(Out, '</rdf:RDF>~n', []).
 2675
 2676%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
 2677%
 2678%   Save an object.  Anonymous objects not saved if anon(false)
 2679%   is present in the Options list.
 2680
 2681rdf_save_non_anon_subject(_Out, Subject, Options) :-
 2682    rdf_is_bnode(Subject),
 2683    (   memberchk(anon(false), Options)
 2684    ;   graph(Options, DB),
 2685        rdf_db(_, _, Subject, DB)
 2686    ),
 2687    !.
 2688rdf_save_non_anon_subject(Out, Subject, Options) :-
 2689    rdf_save_subject(Out, Subject, Options),
 2690    flag(rdf_db_saved_subjects, X, X+1).
 2691
 2692
 2693%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
 2694%
 2695%   Save the triples associated to Subject to Out. Options:
 2696%
 2697%     * graph(+Graph)
 2698%     Only save properties from Graph.
 2699%     * base_uri(+URI)
 2700%     * convert_typed_literal(:Goal)
 2701%     * document_language(+XMLLang)
 2702%
 2703%   @see rdf_save/2 for a description of these options.
 2704
 2705rdf_save_subject(Out, Subject, Options) :-
 2706    is_list(Options),
 2707    !,
 2708    option(base_uri(BaseURI), Options, '-'),
 2709    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
 2710    ->  format(Out, '~n', [])
 2711    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
 2712    ).
 2713rdf_save_subject(Out, Subject, DB) :-
 2714    (   var(DB)
 2715    ->  rdf_save_subject(Out, Subject, [])
 2716    ;   rdf_save_subject(Out, Subject, [graph(DB)])
 2717    ).
 2718
 2719
 2720%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2721%!                   +Indent:int, +Options) is det.
 2722%
 2723%   Save properties of Subject.
 2724%
 2725%   @param Indent   Current indentation
 2726
 2727rdf_save_subject(_, Subject, _, _, _) :-
 2728    inlined(Subject),
 2729    !.
 2730rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2731    do_save_subject(Out, Subject, BaseURI, Indent, Options).
 2732
 2733do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2734    graph(Options, DB),
 2735    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
 2736    sort(Atts0, Atts),              % remove duplicates
 2737    length(Atts, L),
 2738    (   length(Atts0, L0),
 2739        Del is L0-L,
 2740        Del > 0
 2741    ->  print_message(informational,
 2742                      rdf(save_removed_duplicates(Del, Subject)))
 2743    ;   true
 2744    ),
 2745    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
 2746    flag(rdf_db_saved_triples, X, X+L).
 2747
 2748rdf_db(Subject, Pred, Object, DB) :-
 2749    var(DB),
 2750    !,
 2751    rdf(Subject, Pred, Object).
 2752rdf_db(Subject, Pred, Object, DB) :-
 2753    rdf(Subject, Pred, Object, DB:_).
 2754
 2755%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2756%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
 2757%
 2758%   Save triples defined by Atts on Subject.
 2759
 2760rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2761    rdf_equal(rdf:type, RdfType),
 2762    select(RdfType=Type, Atts, Atts1),
 2763    \+ rdf_is_bnode(Type),
 2764    rdf_id(Type, BaseURI, TypeId),
 2765    xml_is_name(TypeId),
 2766    !,
 2767    format(Out, '~*|<', [Indent]),
 2768    rdf_write_id(Out, TypeId),
 2769    save_about(Out, BaseURI, Subject, Options),
 2770    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
 2771rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2772    format(Out, '~*|<rdf:Description', [Indent]),
 2773    save_about(Out, BaseURI, Subject, Options),
 2774    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
 2775
 2776xml_is_name(_NS:Atom) :-
 2777    !,
 2778    xml_name(Atom).
 2779xml_is_name(Atom) :-
 2780    xml_name(Atom).
 2781
 2782%!  save_about(+Out, +BaseURI, +Subject, +Options) is det.
 2783%
 2784%   Save the rdf:about. If Subject is a  blank node, save the nodeID
 2785%   if any.
 2786
 2787save_about(Out, _BaseURI, Subject, _Options) :-
 2788    rdf_is_bnode(Subject),
 2789    !,
 2790    (   named_anon(Subject, NodeID)
 2791    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
 2792    ;   true
 2793    ).
 2794save_about(Out, BaseURI, Subject, Options) :-
 2795    option(encoding(Encoding), Options, utf8),
 2796    rdf_value(Subject, BaseURI, QSubject, Encoding),
 2797    format(Out, ' rdf:about="~w"', [QSubject]).
 2798
 2799%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
 2800%
 2801%   Save the attributes.  Short literal attributes are saved in the
 2802%   tag.  Others as the content of the description element.  The
 2803%   begin tag has already been filled.
 2804
 2805save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
 2806    split_attributes(Atts, InTag, InBody, Options),
 2807    SubIndent is Indent + 2,
 2808    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
 2809    (   InBody == []
 2810    ->  format(Out, '/>~n', [])
 2811    ;   format(Out, '>~n', []),
 2812        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
 2813        format(Out, '~N~*|</', [Indent]),
 2814        rdf_write_id(Out, Element),
 2815        format(Out, '>~n', [])
 2816    ).
 2817
 2818%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
 2819%
 2820%   Split attribute (Name=Value) list into attributes for the head
 2821%   and body. Attributes can only be in the head if they are literal
 2822%   and appear only one time in the attribute list.
 2823
 2824split_attributes(Atts, [], Atts, Options) :-
 2825    option(xml_attributes(false), Options),
 2826    !.
 2827split_attributes(Atts, HeadAttr, BodyAttr, _) :-
 2828    duplicate_attributes(Atts, Dupls, Singles),
 2829    simple_literal_attributes(Singles, HeadAttr, Rest),
 2830    append(Dupls, Rest, BodyAttr).
 2831
 2832%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
 2833%
 2834%   Extract attributes that appear more than onces as we cannot
 2835%   dublicate an attribute in the head according to the XML rules.
 2836
 2837duplicate_attributes([], [], []).
 2838duplicate_attributes([H|T], Dupls, Singles) :-
 2839    H = (Name=_),
 2840    named_attributes(Name, T, D, R),
 2841    D \== [],
 2842    append([H|D], Dupls2, Dupls),
 2843    !,
 2844    duplicate_attributes(R, Dupls2, Singles).
 2845duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
 2846    duplicate_attributes(T, Dupls2, Singles).
 2847
 2848named_attributes(_, [], [], []) :- !.
 2849named_attributes(Name, [H|T], D, R) :-
 2850    (   H = (Name=_)
 2851    ->  D = [H|DT],
 2852        named_attributes(Name, T, DT, R)
 2853    ;   R = [H|RT],
 2854        named_attributes(Name, T, D, RT)
 2855    ).
 2856
 2857%!  simple_literal_attributes(+Attributes, -Inline, -Body)
 2858%
 2859%   Split attributes for (literal) attributes to be used in the
 2860%   begin-tag and ones that have to go into the body of the description.
 2861
 2862simple_literal_attributes([], [], []).
 2863simple_literal_attributes([H|TA], [H|TI], B) :-
 2864    in_tag_attribute(H),
 2865    !,
 2866    simple_literal_attributes(TA, TI, B).
 2867simple_literal_attributes([H|TA], I, [H|TB]) :-
 2868    simple_literal_attributes(TA, I, TB).
 2869
 2870in_tag_attribute(_=literal(Text)) :-
 2871    atom(Text),                     % may not have lang qualifier
 2872    atom_length(Text, Len),
 2873    Len < 60.
 2874
 2875%!  save_attributes2(+List, +BaseURI, +TagOrBody, +Stream, +Indent, +Options)
 2876%
 2877%   Save a list of attributes.
 2878
 2879save_attributes2([], _, _, _, _, _).
 2880save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
 2881    save_attribute(Where, H, BaseURI, Out, Indent, Options),
 2882    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
 2883
 2884save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, Options) :-
 2885    AttIndent is Indent + 2,
 2886    rdf_id(Name, BaseURI, NameText),
 2887    option(encoding(Encoding), Options, utf8),
 2888    xml_quote_attribute(Value, QVal, Encoding),
 2889    format(Out, '~N~*|', [AttIndent]),
 2890    rdf_write_id(Out, NameText),
 2891    format(Out, '="~w"', [QVal]).
 2892save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
 2893    !,
 2894    rdf_id(Name, BaseURI, NameText),
 2895    (   memberchk(convert_typed_literal(Converter), Options),
 2896        call(Converter, Type, Content, Literal0)
 2897    ->  Literal = type(Type, Content)
 2898    ;   Literal = Literal0
 2899    ),
 2900    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
 2901save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2902    rdf_is_bnode(Value),
 2903    !,
 2904    rdf_id(Name, BaseURI, NameText),
 2905    format(Out, '~N~*|<', [Indent]),
 2906    rdf_write_id(Out, NameText),
 2907    (   named_anon(Value, NodeID)
 2908    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
 2909    ;   (   rdf(S1, Name, Value),
 2910            rdf(S2, P2, Value),
 2911            (S1 \== S2 ; Name \== P2)
 2912        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
 2913            atom_concat('bn', N, NodeID),
 2914            assertz(named_anon(Value, NodeID))
 2915        ;   true
 2916        ),
 2917        SubIndent is Indent + 2,
 2918        (   rdf_collection(Value)
 2919        ->  save_about(Out, BaseURI, Value, Options),
 2920            format(Out, ' rdf:parseType="Collection">~n', []),
 2921            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2922        ;   format(Out, '>~n', []),
 2923            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2924        ),
 2925        format(Out, '~N~*|</', [Indent]),
 2926        rdf_write_id(Out, NameText),
 2927        format(Out, '>~n', [])
 2928    ).
 2929save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2930    option(inline(true), Options),
 2931    has_attributes(Value, Options),
 2932    \+ inlined(Value),
 2933    !,
 2934    assertz(inlined(Value)),
 2935    rdf_id(Name, BaseURI, NameText),
 2936    format(Out, '~N~*|<', [Indent]),
 2937    rdf_write_id(Out, NameText),
 2938    SubIndent is Indent + 2,
 2939    (   rdf_collection(Value)
 2940    ->  save_about(Out, BaseURI, Value, Options),
 2941        format(Out, ' rdf:parseType="Collection">~n', []),
 2942        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2943    ;   format(Out, '>~n', []),
 2944        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2945    ),
 2946    format(Out, '~N~*|</', [Indent]),
 2947    rdf_write_id(Out, NameText),
 2948    format(Out, '>~n', []).
 2949save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2950    option(encoding(Encoding), Options, utf8),
 2951    rdf_value(Value, BaseURI, QVal, Encoding),
 2952    rdf_id(Name, BaseURI, NameText),
 2953    format(Out, '~N~*|<', [Indent]),
 2954    rdf_write_id(Out, NameText),
 2955    format(Out, ' rdf:resource="~w"/>', [QVal]).
 2956
 2957has_attributes(URI, Options) :-
 2958    graph(Options, DB),
 2959    rdf_db(URI, _, _, DB),
 2960    !.
 2961
 2962%!  save_body_literal(+Literal, +NameText, +BaseURI,
 2963%!                    +Out, +Indent, +Options).
 2964
 2965save_body_literal(lang(Lang, Value),
 2966                  NameText, BaseURI, Out, Indent, Options) :-
 2967    !,
 2968    format(Out, '~N~*|<', [Indent]),
 2969    rdf_write_id(Out, NameText),
 2970    (   memberchk(document_language(Lang), Options)
 2971    ->  write(Out, '>')
 2972    ;   rdf_id(Lang, BaseURI, LangText),
 2973        format(Out, ' xml:lang="~w">', [LangText])
 2974    ),
 2975    save_attribute_value(Value, Out, Options),
 2976    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2977save_body_literal(type(Type, DOM),
 2978                  NameText, _BaseURI, Out, Indent, Options) :-
 2979    rdf_equal(Type, rdf:'XMLLiteral'),
 2980    !,
 2981    (   atom(DOM)
 2982    ->  format(Out, '~N~*|<', [Indent]),
 2983        rdf_write_id(Out, NameText),
 2984        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
 2985        rdf_write_id(Out, NameText), write(Out, '>')
 2986    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
 2987    ).
 2988save_body_literal(type(Type, Value),
 2989                  NameText, BaseURI, Out, Indent, Options) :-
 2990    !,
 2991    format(Out, '~N~*|<', [Indent]),
 2992    rdf_write_id(Out, NameText),
 2993    option(encoding(Encoding), Options, utf8),
 2994    rdf_value(Type, BaseURI, QVal, Encoding),
 2995    format(Out, ' rdf:datatype="~w">', [QVal]),
 2996    save_attribute_value(Value, Out, Options),
 2997    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2998save_body_literal(Literal,
 2999                  NameText, _, Out, Indent, Options) :-
 3000    atomic(Literal),
 3001    !,
 3002    format(Out, '~N~*|<', [Indent]),
 3003    rdf_write_id(Out, NameText),
 3004    write(Out, '>'),
 3005    save_attribute_value(Literal, Out, Options),
 3006    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3007save_body_literal(DOM,
 3008                  NameText, BaseURI, Out, Indent, Options) :-
 3009    rdf_equal(Type, rdf:'XMLLiteral'),
 3010    save_body_literal(type(Type, DOM),
 3011                      NameText, BaseURI, Out, Indent, Options).
 3012
 3013save_attribute_value(Value, Out, Options) :-  % strings
 3014    (	atom(Value)
 3015    ;	string(Value)
 3016    ),
 3017    !,
 3018    option(encoding(Encoding), Options, utf8),
 3019    xml_quote_cdata(Value, QVal, Encoding),
 3020    write(Out, QVal).
 3021save_attribute_value(Value, Out, _Options) :-  % numbers
 3022    number(Value),
 3023    !,
 3024    writeq(Out, Value).             % quoted: preserve floats
 3025save_attribute_value(Value, _Out, _Options) :-
 3026    throw(error(save_attribute_value(Value), _)).
 3027
 3028%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
 3029%
 3030%   Save an XMLLiteral value. We already emitted
 3031%
 3032%           ==
 3033%           <prop parseType="literal"
 3034%           ==
 3035%
 3036%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
 3037%   namespaces used in the DOM. The   namespaces in the rdf document
 3038%   are in the nsmap-option of Options.
 3039
 3040save_xml_literal(DOM, Attr, Out, Indent, Options) :-
 3041    xml_is_dom(DOM),
 3042    !,
 3043    memberchk(nsmap(NsMap), Options),
 3044    id_to_atom(Attr, Atom),
 3045    xml_write(Out,
 3046              element(Atom, ['rdf:parseType'='Literal'], DOM),
 3047              [ header(false),
 3048                indent(Indent),
 3049                nsmap(NsMap)
 3050              ]).
 3051save_xml_literal(NoDOM, _, _, _, _) :-
 3052    must_be(xml_dom, NoDOM).
 3053
 3054id_to_atom(NS:Local, Atom) :-
 3055    !,
 3056    atomic_list_concat([NS,Local], :, Atom).
 3057id_to_atom(ID, ID).
 3058
 3059
 3060%!  rdf_collection(+URI) is semidet.
 3061%
 3062%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
 3063%   parseType=collection syntax. This means it is   a linked list of
 3064%   bnode-cells with a rdf:first that is   a  resource, optionally a
 3065%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
 3066
 3067:- rdf_meta
 3068    rdf_collection(r),
 3069    collection_p(r,r). 3070
 3071rdf_collection(rdf:nil) :- !.
 3072rdf_collection(Cell) :-
 3073    rdf_is_bnode(Cell),
 3074    findall(F, rdf(Cell, rdf:first, F), [_]),
 3075    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
 3076    forall(rdf(Cell, P, V),
 3077           collection_p(P, V)),
 3078    rdf_collection(Rest).
 3079
 3080collection_p(rdf:first, V) :- atom(V).
 3081collection_p(rdf:rest, _).
 3082collection_p(rdf:type, rdf:'List').
 3083
 3084
 3085%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
 3086
 3087rdf_save_list(_, List, _, _, _) :-
 3088    rdf_equal(List, rdf:nil),
 3089    !.
 3090rdf_save_list(Out, List, BaseURI, Indent, Options) :-
 3091    rdf_has(List, rdf:first, First),
 3092    (   rdf_is_bnode(First)
 3093    ->  nl(Out),
 3094        rdf_save_subject(Out, First, BaseURI, Indent, Options)
 3095    ;   option(encoding(Encoding), Options, utf8),
 3096        rdf_value(First, BaseURI, QVal, Encoding),
 3097        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
 3098               [Indent, QVal])
 3099    ),
 3100    flag(rdf_db_saved_triples, X, X+3),
 3101    (   rdf_has(List, rdf:rest, List2),
 3102        \+ rdf_equal(List2, rdf:nil)
 3103    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
 3104    ;   true
 3105    ).
 3106
 3107
 3108%!  rdf_id(+Resource, +BaseURI, -NSLocal)
 3109%
 3110%   Generate a NS:Local  name  for   Resource  given  the  indicated
 3111%   default namespace. This call is used for elements.
 3112
 3113rdf_id(Id, BaseURI, Local) :-
 3114    assertion(atom(BaseURI)),
 3115    atom_concat(BaseURI, Local, Id),
 3116    sub_atom(Local, 0, 1, _, #),
 3117    !.
 3118rdf_id(Id, _, NS:Local) :-
 3119    iri_xml_namespace(Id, Full, Local),
 3120    ns(NS, Full),
 3121    !.
 3122rdf_id(Id, _, NS:Local) :-
 3123    ns(NS, Full),
 3124    Full \== '',
 3125    atom_concat(Full, Local, Id),
 3126    !.
 3127rdf_id(Id, _, Id).
 3128
 3129
 3130%!  rdf_write_id(+Out, +NSLocal) is det.
 3131%
 3132%   Write an identifier. We cannot use native write on it as both NS
 3133%   and Local can be operators.
 3134
 3135rdf_write_id(Out, NS:Local) :-
 3136    !,
 3137    format(Out, '~w:~w', [NS, Local]).
 3138rdf_write_id(Out, Atom) :-
 3139    write(Out, Atom).
 3140
 3141%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
 3142%
 3143%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
 3144%   specification, a URI reference is  UNICODE string not containing
 3145%   control sequences, represented as  UTF-8   and  then  as escaped
 3146%   US-ASCII.
 3147
 3148rdf_value(Base, Base, '', _) :- !.
 3149rdf_value(V, Base, Text, Encoding) :-
 3150    atom_concat(Base, Local, V),
 3151    sub_atom(Local, 0, _, _, #),
 3152    !,
 3153    xml_quote_attribute(Local, Text, Encoding).
 3154rdf_value(V, _, Text, Encoding) :-
 3155    ns(NS, Full),
 3156    atom_concat(Full, Local, V),
 3157    xml_is_name(Local),
 3158    !,
 3159    xml_quote_attribute(Local, QLocal, Encoding),
 3160    atomic_list_concat(['&', NS, (';'), QLocal], Text).
 3161rdf_value(V, _, Q, Encoding) :-
 3162    xml_quote_attribute(V, Q, Encoding).
 3163
 3164
 3165                 /*******************************
 3166                 *       MATCH AND COMPARE      *
 3167                 *******************************/
 3168
 3169%!  rdf_compare(-Dif, +Object1, +Object2) is det.
 3170%
 3171%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
 3172%   ordering, we define a complete ordering   of terms. The ordering
 3173%   is defines as:
 3174%
 3175%     - Blank nodes < IRIs < Literals
 3176%     - Numeric literals < other literals
 3177%     - Numeric literals are compared by value and then by type,
 3178%       where Integer < Decimal < Double
 3179%     - Other literals are compare lexically, case insensitive.
 3180%       If equal, uppercase preceeds lowercase.  If still equal,
 3181%       the types are compared lexically.
 3182
 3183%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
 3184%
 3185%   True if Label matches Pattern according to   How.  How is one of
 3186%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
 3187%   compatibility, `exact` is a synonym for `icase`.
 3188
 3189
 3190                 /*******************************
 3191                 *      DEPRECATED MATERIAL     *
 3192                 *******************************/
 3193
 3194%!  rdf_split_url(+Prefix, +Local, -URL) is det.
 3195%!  rdf_split_url(-Prefix, -Local, +URL) is det.
 3196%
 3197%   Split/join a URL.  This functionality is moved to library(sgml).
 3198%
 3199%   @deprecated Use iri_xml_namespace/3. Note that the argument
 3200%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
 3201
 3202rdf_split_url(Prefix, Local, URL) :-
 3203    atomic(URL),
 3204    !,
 3205    iri_xml_namespace(URL, Prefix, Local).
 3206rdf_split_url(Prefix, Local, URL) :-
 3207    atom_concat(Prefix, Local, URL).
 3208
 3209%!  rdf_url_namespace(+URL, -Namespace)
 3210%
 3211%   Namespace is the namespace of URL.
 3212%
 3213%   @deprecated Use iri_xml_namespace/2
 3214
 3215rdf_url_namespace(URL, Prefix) :-
 3216    iri_xml_namespace(URL, Prefix).
 3217
 3218
 3219                 /*******************************
 3220                 *            LITERALS          *
 3221                 *******************************/
 3222
 3223%!  rdf_new_literal_map(-Map) is det.
 3224%
 3225%   Create a new literal map, returning an opaque handle.
 3226
 3227%!  rdf_destroy_literal_map(+Map) is det.
 3228%
 3229%   Destroy a literal map. After this call,   further use of the Map
 3230%   handle is illegal. Additional synchronisation  is needed if maps
 3231%   that are shared between threads are   destroyed to guarantee the
 3232%   handle    is    no    longer    used.    In    some    scenarios
 3233%   rdf_reset_literal_map/1 provides a safe alternative.
 3234
 3235%!  rdf_reset_literal_map(+Map) is det.
 3236%
 3237%   Delete all content from the literal map.
 3238
 3239%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
 3240%
 3241%   Add a relation between  Key  and  Value   to  the  map.  If this
 3242%   relation already exists no action is performed.
 3243
 3244%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
 3245%
 3246%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
 3247%   Map, unify KeyCount with the number of  keys in Map. This serves
 3248%   two purposes. Derived maps, such as  the stem and metaphone maps
 3249%   need to know about new  keys   and  it avoids additional foreign
 3250%   calls for doing the progress in rdf_litindex.pl.
 3251
 3252%!  rdf_delete_literal_map(+Map, +Key) is det.
 3253%
 3254%   Delete Key and all associated values from the map.
 3255
 3256%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
 3257%
 3258%   Delete the association between Key and Value from the map.
 3259
 3260%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
 3261%
 3262%   Unify ValueList with an ordered set  of values associated to all
 3263%   keys from KeyList. Each key in  KeyList   is  either an atom, an
 3264%   integer or a term not(Key).  If   not-terms  are provided, there
 3265%   must be at least one positive keywords. The negations are tested
 3266%   after establishing the positive matches.
 3267
 3268%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
 3269%
 3270%   Realises various queries on the key-set:
 3271%
 3272%     * all
 3273%
 3274%     Unify Answer with an ordered list of all keys.
 3275%     * key(+Key)
 3276%
 3277%     Succeeds if Key is a key in the map and unify Answer with the
 3278%     number of values associated with the key. This provides a fast
 3279%     test of existence without fetching the possibly large
 3280%     associated value set as with rdf_find_literal_map/3.
 3281%
 3282%     * prefix(+Prefix)
 3283%     Unify Answer with an ordered set of all keys that have the
 3284%     given prefix. See section 3.1 for details on prefix matching.
 3285%     Prefix must be an atom. This call is intended for
 3286%     auto-completion in user interfaces.
 3287%
 3288%     * ge(+Min)
 3289%     Unify Answer with all keys that are larger or equal to the
 3290%     integer Min.
 3291%
 3292%     * le(+Max)
 3293%     Unify Answer with all keys that are smaller or equal to the integer
 3294%     Max.
 3295%
 3296%     * between(+Min, +Max) Unify
 3297%     Answer with all keys between Min and Max (including).
 3298
 3299%!  rdf_statistics_literal_map(+Map, -KeyValue)
 3300%
 3301%   Query some statistics of the map. Provides KeyValue are:
 3302%
 3303%     * size(-Keys, -Relations)
 3304%     Unify Keys with the total key-count of the index and Relation
 3305%     with the total Key-Value count.
 3306
 3307
 3308
 3309                 /*******************************
 3310                 *             MISC             *
 3311                 *******************************/
 3312
 3313%!  rdf_version(-Version) is det.
 3314%
 3315%   True when Version is the numerical version-id of this library.
 3316%   The version is computed as
 3317%
 3318%           Major*10000 + Minor*100 + Patch.
 3319
 3320%!  rdf_set(+Term) is det.
 3321%
 3322%   Set properties of the RDF store.  Currently defines:
 3323%
 3324%     * hash(+Hash, +Parameter, +Value)
 3325%     Set properties for a triple index.  Hash is one of =s=,
 3326%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
 3327%     is one of:
 3328%
 3329%       - size
 3330%       Value defines the number of entries in the hash-table.
 3331%       Value is rounded _down_ to a power of 2.  After setting
 3332%       the size explicitly, auto-sizing for this table is
 3333%       disabled.  Setting the size smaller than the current
 3334%       size results in a =permission_error= exception.
 3335%
 3336%       - average_chain_len
 3337%       Set maximum average collision number for the hash.
 3338%
 3339%       - optimize_threshold
 3340%       Related to resizing hash-tables.  If 0, all triples are
 3341%       moved to the new size by the garbage collector.  If more
 3342%       then zero, those of the last Value resize steps remain at
 3343%       their current location.  Leaving cells at their current
 3344%       location reduces memory fragmentation and slows down
 3345%       access.
 3346
 3347%!  rdf_md5(+Graph, -MD5) is det.
 3348%
 3349%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
 3350%   digest itself is represented as an   atom holding a 32-character
 3351%   hexadecimal   string.   The   library   maintains   the   digest
 3352%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
 3353%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
 3354%   changed since the last rdf_load/[1,2]  call provides a practical
 3355%   means for checking whether the file needs to be saved.
 3356%
 3357%   @deprecated New code should use rdf_graph_property(Graph,
 3358%   hash(Hash)).
 3359
 3360%!  rdf_generation(-Generation) is det.
 3361%
 3362%   True when Generation is the current  generation of the database.
 3363%   Each modification to the database  increments the generation. It
 3364%   can be used to check the validity of cached results deduced from
 3365%   the database. Committing a non-empty  transaction increments the
 3366%   generation by one.
 3367%
 3368%   When inside a transaction,  Generation  is   unified  to  a term
 3369%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
 3370%   that the transaction was started at   generation 4 of the global
 3371%   database and we have  created  3   new  generations  inside  the
 3372%   transaction. Note that this choice  of representation allows for
 3373%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
 3374%   generation in one  transaction  with   a  generation  in another
 3375%   transaction is meaningless.
 3376
 3377%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
 3378%
 3379%   Return the number of alternatives as   indicated by the database
 3380%   internal hashed indexing. This is a rough measure for the number
 3381%   of alternatives we can expect for   an  rdf_has/3 call using the
 3382%   given three arguments. When  called   with  three variables, the
 3383%   total number of triples is returned.   This  estimate is used in
 3384%   query  optimisation.  See  also    rdf_predicate_property/2  and
 3385%   rdf_statistics/1 for additional information to help optimizers.
 3386
 3387%!  rdf_debug(+Level) is det.
 3388%
 3389%   Set debugging to Level.  Level is an integer 0..9.  Default is
 3390%   0 no debugging.
 3391
 3392%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
 3393%
 3394%   Computes the MD5 hash from Text, which is an atom, string or list of
 3395%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
 3396%   algorithm is repeated Times times on the generated hash. This can be
 3397%   used for password encryption algorithms   to  make generate-and-test
 3398%   loops slow.
 3399%
 3400%   @deprecated Obviously, password hash  primitives   do  not belong in
 3401%   this library. The  library(crypto)  from   the  \const{ssl}  package
 3402%   provides extensive support for  hashes.   The  \const{clib}  package
 3403%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
 3404%   implementation as well as  lightweight   implementations  of several
 3405%   popular hashes.
 3406
 3407
 3408                 /*******************************
 3409                 *             MESSAGES         *
 3410                 *******************************/
 3411
 3412:- multifile
 3413    prolog:message//1. 3414
 3415prolog:message(rdf(Term)) -->
 3416    message(Term).
 3417
 3418message(loaded(How, What, BaseURI, Triples, Time)) -->
 3419    how(How),
 3420    source(What),
 3421    into(What, BaseURI),
 3422    in_time(Triples, Time).
 3423message(save_removed_duplicates(N, Subject)) -->
 3424    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
 3425message(saved(File, SavedSubjects, SavedTriples)) -->
 3426    [ 'Saved ~D triples about ~D subjects into ~p'-
 3427      [SavedTriples, SavedSubjects, File]
 3428    ].
 3429message(using_namespace(Id, NS)) -->
 3430    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
 3431message(inconsistent_cache(DB, Graphs)) -->
 3432    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
 3433      '~t~8|~p'-[Graphs]
 3434    ].
 3435message(guess_format(Ext)) -->
 3436    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
 3437message(meta(not_expanded(G))) -->
 3438    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
 3439message(deprecated(rdf_unload(Graph))) -->
 3440    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
 3441
 3442
 3443how(load)   --> [ 'Loaded' ].
 3444how(parsed) --> [ 'Parsed' ].
 3445
 3446source(SourceURL) -->
 3447    { uri_file_name(SourceURL, File),
 3448      !,
 3449      file_base_name(File, Base)    % TBD: relative file?
 3450    },
 3451    [ ' "~w"'-[Base] ].
 3452source(SourceURL) -->
 3453    [ ' "~w"'-[SourceURL] ].
 3454
 3455into(_, _) --> [].                      % TBD
 3456
 3457in_time(Triples, ParseTime) -->
 3458    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
 3459    ]