View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2003-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_db,
   37          [ rdf_version/1,              % -Version
   38
   39            rdf/3,                      % ?Subject, ?Predicate, ?Object
   40            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
   41            rdf_has/3,                  % ?Subject, +Pred, ?Obj
   42            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
   43            rdf_reachable/3,            % ?Subject, +Pred, ?Object
   44            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
   45            rdf_resource/1,             % ?Resource
   46            rdf_subject/1,              % ?Subject
   47
   48            rdf_member_property/2,      % ?Property, ?Index
   49
   50            rdf_assert/3,               % +Subject, +Predicate, +Object
   51            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
   52            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
   53            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
   54            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
   55            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
   56            rdf_set_predicate/2,        % +Predicate, +Property
   57            rdf_predicate_property/2,   % +Predicate, ?Property
   58            rdf_current_predicate/1,    % -Predicate
   59            rdf_current_literal/1,      % -Literal
   60            rdf_transaction/1,          % :Goal
   61            rdf_transaction/2,          % :Goal, +Id
   62            rdf_transaction/3,          % :Goal, +Id, +Options
   63            rdf_active_transaction/1,   % ?Id
   64
   65            rdf_monitor/2,              % :Goal, +Options
   66
   67            rdf_save_db/1,              % +File
   68            rdf_save_db/2,              % +File, +DB
   69            rdf_load_db/1,              % +File
   70            rdf_reset_db/0,
   71
   72            rdf_node/1,                 % -Id
   73            rdf_bnode/1,                % -Id
   74            rdf_is_bnode/1,             % +Id
   75
   76            rdf_is_resource/1,          % +Term
   77            rdf_is_literal/1,           % +Term
   78            rdf_literal_value/2,        % +Term, -Value
   79
   80            rdf_load/1,                 % +File
   81            rdf_load/2,                 % +File, +Options
   82            rdf_save/1,                 % +File
   83            rdf_save/2,                 % +File, +Options
   84            rdf_unload/1,               % +File
   85            rdf_unload_graph/1,         % +Graph
   86
   87            rdf_md5/2,                  % +DB, -MD5
   88            rdf_atom_md5/3,             % +Text, +Times, -MD5
   89
   90            rdf_create_graph/1,         % ?Graph
   91            rdf_graph_property/2,       % ?Graph, ?Property
   92            rdf_set_graph/2,            % +Graph, +Property
   93            rdf_graph/1,                % ?Graph
   94            rdf_source/1,               % ?File
   95            rdf_source/2,               % ?DB, ?SourceURL
   96            rdf_make/0,                 % Reload modified databases
   97            rdf_gc/0,                   % Garbage collection
   98
   99            rdf_source_location/2,      % +Subject, -Source
  100            rdf_statistics/1,           % -Key
  101            rdf_set/1,                  % +Term
  102            rdf_generation/1,           % -Generation
  103            rdf_snapshot/1,             % -Snapshot
  104            rdf_delete_snapshot/1,      % +Snapshot
  105            rdf_current_snapshot/1,     % +Snapshot
  106            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
  107
  108            rdf_save_subject/3,         % +Stream, +Subject, +DB
  109            rdf_save_header/2,          % +Out, +Options
  110            rdf_save_footer/1,          % +Out
  111
  112            rdf_equal/2,                % ?Resource, ?Resource
  113            lang_equal/2,               % +Lang1, +Lang2
  114            lang_matches/2,             % +Lang, +Pattern
  115
  116            rdf_prefix/2,               % :Alias, +URI
  117            rdf_current_prefix/2,       % :Alias, ?URI
  118            rdf_register_prefix/2,      % +Alias, +URI
  119            rdf_register_prefix/3,      % +Alias, +URI, +Options
  120            rdf_unregister_prefix/1,    % +Alias
  121            rdf_current_ns/2,           % :Alias, ?URI
  122            rdf_register_ns/2,          % +Alias, +URI
  123            rdf_register_ns/3,          % +Alias, +URI, +Options
  124            rdf_global_id/2,            % ?NS:Name, :Global
  125            rdf_global_object/2,        % +Object, :NSExpandedObject
  126            rdf_global_term/2,          % +Term, :WithExpandedNS
  127
  128            rdf_compare/3,              % -Dif, +Object1, +Object2
  129            rdf_match_label/3,          % +How, +String, +Label
  130            rdf_split_url/3,            % ?Base, ?Local, ?URL
  131            rdf_url_namespace/2,        % +URL, ?Base
  132
  133            rdf_warm_indexes/0,
  134            rdf_warm_indexes/1,         % +Indexed
  135            rdf_update_duplicates/0,
  136
  137            rdf_debug/1,                % Set verbosity
  138
  139            rdf_new_literal_map/1,      % -Handle
  140            rdf_destroy_literal_map/1,  % +Handle
  141            rdf_reset_literal_map/1,    % +Handle
  142            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
  143            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
  144            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
  145            rdf_delete_literal_map/2,   % +Handle, +Key
  146            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
  147            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
  148            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
  149
  150            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
  151            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
  152
  153            (rdf_meta)/1,               % +Heads
  154            op(1150, fx, (rdf_meta))
  155          ]).  156:- use_module(library(rdf)).  157:- use_module(library(lists)).  158:- use_module(library(shlib)).  159:- use_module(library(gensym)).  160:- use_module(library(sgml)).  161:- use_module(library(sgml_write)).  162:- use_module(library(option)).  163:- use_module(library(error)).  164:- use_module(library(uri)).  165:- use_module(library(debug)).  166:- use_module(library(apply)).  167:- use_module(library(xsdp_types)).  168:- if(exists_source(library(thread))).  169:- use_module(library(thread)).  170:- endif.  171:- use_module(library(semweb/rdf_cache)).  172:- use_module(library(semweb/rdf_prefixes)).  173
  174:- use_foreign_library(foreign(rdf_db)).  175:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
  176
  177:- meta_predicate
  178    rdf_transaction(0),
  179    rdf_transaction(0, +),
  180    rdf_transaction(0, +, +),
  181    rdf_monitor(1, +),
  182    rdf_save(+, :),
  183    rdf_load(+, :).  184
  185:- predicate_options(rdf_graph_prefixes/3, 3,
  186                     [expand(callable), filter(callable), min_count(nonneg)]).  187:- predicate_options(rdf_load/2, 2,
  188                     [ base_uri(atom),
  189                       blank_nodes(oneof([share,noshare])),
  190                       cache(boolean),
  191                       concurrent(positive_integer),
  192                       db(atom),
  193                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
  194                       graph(atom),
  195                       multifile(boolean),
  196                       if(oneof([true,changed,not_loaded])),
  197                       modified(-float),
  198                       prefixes(-list),
  199                       silent(boolean),
  200                       register_namespaces(boolean)
  201                     ]).  202:- predicate_options(rdf_save/2, 2,
  203                     [ graph(atom),
  204                       db(atom),
  205                       anon(boolean),
  206                       base_uri(atom),
  207                       write_xml_base(boolean),
  208                       convert_typed_literal(callable),
  209                       encoding(encoding),
  210                       document_language(atom),
  211                       namespaces(list(atom)),
  212                       xml_attributes(boolean),
  213                       inline(boolean)
  214                     ]).  215:- predicate_options(rdf_save_header/2, 2,
  216                     [ graph(atom),
  217                       db(atom),
  218                       namespaces(list(atom))
  219                     ]).  220:- predicate_options(rdf_save_subject/3, 3,
  221                     [ graph(atom),
  222                       base_uri(atom),
  223                       convert_typed_literal(callable),
  224                       document_language(atom)
  225                     ]).  226:- predicate_options(rdf_transaction/3, 3,
  227                     [ snapshot(any)
  228                     ]).  229
  230:- discontiguous
  231    term_expansion/2.  232
  233/** <module> Core RDF database
  234
  235The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
  236store.
  237
  238@deprecated     New applications should use library(semweb/rdf11), which
  239                provides a much more intuitive API to the RDF store, notably
  240                for handling literals.  The library(semweb/rdf11) runs
  241                currently on top of this library and both can run side-by-side
  242                in the same application.  Terms retrieved from the database
  243                however have a different shape and can not be exchanged without
  244                precautions.
  245*/
  246
  247		 /*******************************
  248		 *            PREFIXES		*
  249		 *******************************/
  250
  251% the ns/2 predicate is historically defined  in this module. We'll keep
  252% that for compatibility reasons.
  253
  254:- multifile ns/2.  255:- dynamic   ns/2.                      % ID, URL
  256
  257:- multifile
  258    rdf_prefixes:rdf_empty_prefix_cache/2.  259
  260rdf_prefixes:rdf_empty_prefix_cache(_Prefix, _IRI) :-
  261    rdf_empty_prefix_cache.
  262
  263:- rdf_meta
  264    rdf(r,r,o),
  265    rdf_has(r,r,o,r),
  266    rdf_has(r,r,o),
  267    rdf_assert(r,r,o),
  268    rdf_retractall(r,r,o),
  269    rdf(r,r,o,?),
  270    rdf_assert(r,r,o,+),
  271    rdf_retractall(r,r,o,?),
  272    rdf_reachable(r,r,o),
  273    rdf_reachable(r,r,o,+,?),
  274    rdf_update(r,r,o,t),
  275    rdf_update(r,r,o,+,t),
  276    rdf_equal(o,o),
  277    rdf_source_location(r,-),
  278    rdf_resource(r),
  279    rdf_subject(r),
  280    rdf_create_graph(r),
  281    rdf_graph(r),
  282    rdf_graph_property(r,?),
  283    rdf_set_graph(r,+),
  284    rdf_unload_graph(r),
  285    rdf_set_predicate(r, t),
  286    rdf_predicate_property(r, -),
  287    rdf_estimate_complexity(r,r,r,-),
  288    rdf_print_predicate_cloud(r,+).  289
  290%!  rdf_equal(?Resource1, ?Resource2)
  291%
  292%   Simple equality test to exploit goal-expansion.
  293
  294rdf_equal(Resource, Resource).
  295
  296%!  lang_equal(+Lang1, +Lang2) is semidet.
  297%
  298%   True if two RFC language specifiers denote the same language
  299%
  300%   @see lang_matches/2.
  301
  302lang_equal(Lang, Lang) :- !.
  303lang_equal(Lang1, Lang2) :-
  304    downcase_atom(Lang1, LangCannon),
  305    downcase_atom(Lang2, LangCannon).
  306
  307%!  lang_matches(+Lang, +Pattern) is semidet.
  308%
  309%   True if Lang  matches  Pattern.   This  implements  XML language
  310%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
  311%   dash-separated strings of  identifiers  or   (for  Pattern)  the
  312%   wildcard *. Identifiers are  matched   case-insensitive  and a *
  313%   matches any number of identifiers. A   short pattern is the same
  314%   as *.
  315
  316
  317                 /*******************************
  318                 *     BASIC TRIPLE QUERIES     *
  319                 *******************************/
  320
  321%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
  322%
  323%   Elementary query for triples. Subject   and  Predicate are atoms
  324%   representing the fully qualified URL of  the resource. Object is
  325%   either an atom representing a resource  or literal(Value) if the
  326%   object  is  a  literal  value.   If    a   value   of  the  form
  327%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
  328%   atom  using  expand_goal/2.  This  implies   you  can  use  this
  329%   construct in compiled code without paying a performance penalty.
  330%   Literal values take one of the following forms:
  331%
  332%     * Atom
  333%     If the value is a simple atom it is the textual representation
  334%     of a string literal without explicit type or language
  335%     qualifier.
  336%
  337%     * lang(LangID, Atom)
  338%     Atom represents the text of a string literal qualified with
  339%     the given language.
  340%
  341%     * type(TypeID, Value)
  342%     Used for attributes qualified using the =|rdf:datatype|=
  343%     TypeID. The Value is either the textual representation or a
  344%     natural Prolog representation. See the option
  345%     convert_typed_literal(:Convertor) of the parser. The storage
  346%     layer provides efficient handling of atoms, integers (64-bit)
  347%     and floats (native C-doubles). All other data is represented
  348%     as a Prolog record.
  349%
  350%   For literal querying purposes, Object can be of the form
  351%   literal(+Query, -Value), where Query is one of the terms below.
  352%   If the Query takes a literal argument and the value has a
  353%   numeric type numerical comparison is performed.
  354%
  355%     * plain(+Text)
  356%     Perform exact match and demand the language or type qualifiers
  357%     to match. This query is fully indexed.
  358%
  359%     * icase(+Text)
  360%     Perform a full but case-insensitive match. This query is
  361%     fully indexed.
  362%
  363%     * exact(+Text)
  364%     Same as icase(Text).  Backward compatibility.
  365%
  366%     * substring(+Text)
  367%     Match any literal that contains Text as a case-insensitive
  368%     substring. The query is not indexed on Object.
  369%
  370%     * word(+Text)
  371%     Match any literal that contains Text delimited by a non
  372%     alpha-numeric character, the start or end of the string. The
  373%     query is not indexed on Object.
  374%
  375%     * prefix(+Text)
  376%     Match any literal that starts with Text. This call is intended
  377%     for completion. The query is indexed using the skip list of
  378%     literals.
  379%
  380%     * ge(+Literal)
  381%     Match any literal that is equal or larger than Literal in the
  382%     ordered set of literals.
  383%
  384%     * gt(+Literal)
  385%     Match any literal that is larger than Literal in the ordered set
  386%     of literals.
  387%
  388%     * eq(+Literal)
  389%     Match any literal that is equal to Literal in the ordered set
  390%     of literals.
  391%
  392%     * le(+Literal)
  393%     Match any literal that is equal or smaller than Literal in the
  394%     ordered set of literals.
  395%
  396%     * lt(+Literal)
  397%     Match any literal that is smaller than Literal in the ordered set
  398%     of literals.
  399%
  400%     * between(+Literal1, +Literal2)
  401%     Match any literal that is between Literal1 and Literal2 in the
  402%     ordered set of literals. This may include both Literal1 and
  403%     Literal2.
  404%
  405%     * like(+Pattern)
  406%     Match any literal that matches Pattern case insensitively,
  407%     where the `*' character in Pattern matches zero or more
  408%     characters.
  409%
  410%   Backtracking never returns duplicate triples.  Duplicates can be
  411%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
  412%   if called with improper arguments.  If   rdf/3  is called with a
  413%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
  414%   silently.  This  allows   for   graph    matching   goals   like
  415%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
  416
  417%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
  418%
  419%   As rdf/3 but in addition query  the   graph  to which the triple
  420%   belongs. Unlike rdf/3, this predicate does not remove duplicates
  421%   from the result set.
  422%
  423%   @param Source is a term Graph:Line.  If Source is instatiated,
  424%   passing an atom is the same as passing Atom:_.
  425
  426
  427%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
  428%
  429%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
  430%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
  431%   predicates   declared   using   rdf_set_predicate/2   with   the
  432%   =inverse_of= property.
  433
  434%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
  435%
  436%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
  437%   predicate that makes this relation   true. RealPredicate must be
  438%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
  439%   match is found, RealPredicate is the term inverse_of(Pred).
  440
  441%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
  442%
  443%   Is true if Object can  be   reached  from  Subject following the
  444%   transitive predicate Predicate or a  sub-property thereof, while
  445%   repecting the symetric(true) or inverse_of(P2) properties.
  446%
  447%   If used with either Subject or  Object unbound, it first returns
  448%   the origin, followed by  the  reachable  nodes  in breadth-first
  449%   search-order. The implementation internally   looks one solution
  450%   ahead and succeeds deterministically on  the last solution. This
  451%   predicate never generates the same  node   twice  and  is robust
  452%   against cycles in the transitive relation.
  453%
  454%   With all arguments instantiated,   it succeeds deterministically
  455%   if a path can be found from  Subject to Object. Searching starts
  456%   at Subject, assuming the branching factor   is normally lower. A
  457%   call  with  both  Subject   and    Object   unbound   raises  an
  458%   instantiation  error.  The  following    example  generates  all
  459%   subclasses of rdfs:Resource:
  460%
  461%     ==
  462%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
  463%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
  464%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
  465%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
  466%     ...
  467%     ==
  468
  469
  470%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
  471%
  472%   Same as rdf_reachable/3, but in addition, MaxD limits the number
  473%   of edges expanded and D is   unified with the `distance' between
  474%   Subject and Object. Distance 0 means  Subject and Object are the
  475%   same resource. MaxD can be the  constant =infinite= to impose no
  476%   distance-limit.
  477
  478%!  rdf_subject(?Resource) is nondet.
  479%
  480%   True if Resource appears as a   subject. This query respects the
  481%   visibility rules implied by the logical update view.
  482%
  483%   @see rdf_resource/1.
  484
  485rdf_subject(Resource) :-
  486    rdf_resource(Resource),
  487    ( rdf(Resource, _, _) -> true ).
  488
  489%!  rdf_resource(?Resource) is nondet.
  490%
  491%   True when Resource is a resource used as a subject or object in
  492%   a triple.
  493%
  494%   This predicate is primarily intended  as   a  way to process all
  495%   resources without processing resources twice.   The user must be
  496%   aware that some of the returned resources  may not appear in any
  497%   _visible_ triple.
  498
  499
  500                 /*******************************
  501                 *     TRIPLE MODIFICATIONS     *
  502                 *******************************/
  503
  504%!  rdf_assert(+Subject, +Predicate, +Object) is det.
  505%
  506%   Assert a new triple into  the   database.  This is equivalent to
  507%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
  508%   resources. Object is either a resource or a term literal(Value).
  509%   See rdf/3 for an explanation  of   Value  for typed and language
  510%   qualified literals. All arguments  are   subject  to  name-space
  511%   expansion. Complete duplicates (including  the   same  graph and
  512%   `line' and with a compatible `lifespan')   are  not added to the
  513%   database.
  514
  515%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
  516%
  517%   As rdf_assert/3, adding the  predicate   to  the indicated named
  518%   graph.
  519%
  520%   @param Graph is either the name of a   graph (an atom) or a term
  521%   Graph:Line, where Line is an integer that denotes a line number.
  522
  523%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
  524%
  525%   Remove   all   matching   triples   from    the   database.   As
  526%   rdf_retractall/4 using an unbound graph.
  527
  528%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
  529%
  530%   As rdf_retractall/3, also matching Graph.   This  is particulary
  531%   useful to remove all triples coming from a loaded file. See also
  532%   rdf_unload/1.
  533
  534%!  rdf_update(+Subject, +Predicate, +Object, +Action) is det.
  535%
  536%   Replaces one of  the  three  fields   on  the  matching  triples
  537%   depending on Action:
  538%
  539%     * subject(Resource)
  540%     Changes the first field of the triple.
  541%     * predicate(Resource)
  542%     Changes the second field of the triple.
  543%     * object(Object)
  544%     Changes the last field of the triple to the given resource or
  545%     literal(Value).
  546%     * graph(Graph)
  547%     Moves the triple from its current named graph to Graph.
  548
  549%!  rdf_update(+Subject, +Predicate, +Object, +Graph, +Action) is det
  550%
  551%   As rdf_update/4 but allows for specifying the graph.
  552
  553
  554                 /*******************************
  555                 *          COLLECTIONS         *
  556                 *******************************/
  557
  558%!  rdf_member_property(?Prop, ?Index)
  559%
  560%   Deal with the rdf:_1, ... properties.
  561
  562term_expansion(member_prefix(x),
  563               member_prefix(Prefix)) :-
  564    rdf_db:ns(rdf, NS),
  565    atom_concat(NS, '_', Prefix).
  566member_prefix(x).
  567
  568rdf_member_property(P, N) :-
  569    integer(N),
  570    !,
  571    member_prefix(Prefix),
  572    atom_concat(Prefix, N, P).
  573rdf_member_property(P, N) :-
  574    member_prefix(Prefix),
  575    atom_concat(Prefix, Sub, P),
  576    atom_number(Sub, N).
  577
  578
  579                 /*******************************
  580                 *      ANONYMOUS SUBJECTS      *
  581                 *******************************/
  582
  583%!  rdf_node(-Id)
  584%
  585%   Generate a unique blank node identifier for a subject.
  586%
  587%   @deprecated     New code should use rdf_bnode/1.
  588
  589rdf_node(Resource) :-
  590    rdf_bnode(Resource).
  591
  592%!  rdf_bnode(-Id)
  593%
  594%   Generate a unique anonymous identifier for a subject.
  595
  596rdf_bnode(Value) :-
  597    repeat,
  598    gensym('_:genid', Value),
  599    \+ rdf(Value, _, _),
  600    \+ rdf(_, _, Value),
  601    \+ rdf(_, Value, _),
  602    !.
  603
  604
  605
  606                 /*******************************
  607                 *             TYPES            *
  608                 *******************************/
  609
  610%!  rdf_is_bnode(+Id)
  611%
  612%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
  613%   resource). A blank node is represented   as  an atom that starts
  614%   with =|_:|=. For backward compatibility   reason, =|__|= is also
  615%   considered to be a blank node.
  616%
  617%   @see rdf_bnode/1.
  618
  619%!  rdf_is_resource(@Term) is semidet.
  620%
  621%   True if Term is an RDF  resource.   Note  that  this is merely a
  622%   type-test; it does not mean  this   resource  is involved in any
  623%   triple.  Blank nodes are also considered resources.
  624%
  625%   @see rdf_is_bnode/1
  626
  627rdf_is_resource(Term) :-
  628    atom(Term).
  629
  630%!  rdf_is_literal(@Term) is semidet.
  631%
  632%   True if Term is an RDF literal object. Currently only checks for
  633%   groundness and the literal functor.
  634
  635rdf_is_literal(literal(Value)) :-
  636    ground(Value).
  637
  638                 /*******************************
  639                 *             LITERALS         *
  640                 *******************************/
  641
  642%!  rdf_current_literal(-Literal) is nondet.
  643%
  644%   True when Literal is a currently  known literal. Enumerates each
  645%   unique literal exactly once. Note that   it is possible that the
  646%   literal only appears in already deleted triples. Deleted triples
  647%   may be locked due to active   queries, transactions or snapshots
  648%   or may not yet be reclaimed by the garbage collector.
  649
  650
  651%!  rdf_literal_value(+Literal, -Value) is semidet.
  652%
  653%   True when value is  the   appropriate  Prolog  representation of
  654%   Literal in the RDF _|value space|_.  Current mapping:
  655%
  656%     | Plain literals              | Atom                    |
  657%     | Language tagged literal     | Atom holding plain text |
  658%     | xsd:string                  | Atom                    |
  659%     | rdf:XMLLiteral              | XML DOM Tree            |
  660%     | Numeric XSD type            | Number                  |
  661%
  662%   @tbd    Well, this is the long-term idea.
  663%   @tbd    Add mode (-,+)
  664
  665:- rdf_meta
  666    rdf_literal_value(o, -),
  667    typed_value(r, +, -),
  668    numeric_value(r, +, -).  669
  670rdf_literal_value(literal(String), Value) :-
  671    atom(String),
  672    !,
  673    Value = String.
  674rdf_literal_value(literal(lang(_Lang, String)), String).
  675rdf_literal_value(literal(type(Type, String)), Value) :-
  676    typed_value(Type, String, Value).
  677
  678typed_value(Numeric, String, Value) :-
  679    xsdp_numeric_uri(Numeric, NumType),
  680    !,
  681    numeric_value(NumType, String, Value).
  682typed_value(xsd:string, String, String).
  683typed_value(rdf:'XMLLiteral', Value, DOM) :-
  684    (   atom(Value)
  685    ->  setup_call_cleanup(
  686            ( atom_to_memory_file(Value, MF),
  687              open_memory_file(MF, read, In, [free_on_close(true)])
  688            ),
  689            load_structure(stream(In), DOM, [dialect(xml)]),
  690            close(In))
  691    ;   DOM = Value
  692    ).
  693
  694numeric_value(xsd:integer, String, Value) :-
  695    atom_number(String, Value),
  696    integer(Value).
  697numeric_value(xsd:float, String, Value) :-
  698    atom_number(String, Number),
  699    Value is float(Number).
  700numeric_value(xsd:double, String, Value) :-
  701    atom_number(String, Number),
  702    Value is float(Number).
  703numeric_value(xsd:decimal, String, Value) :-
  704    atom_number(String, Value).
  705
  706
  707                 /*******************************
  708                 *            SOURCE            *
  709                 *******************************/
  710
  711%!  rdf_source_location(+Subject, -Location) is nondet.
  712%
  713%   True when triples for Subject are loaded from Location.
  714%
  715%   @param Location is a term File:Line.
  716
  717rdf_source_location(Subject, Source) :-
  718    findall(Source, rdf(Subject, _, _, Source), Sources),
  719    sort(Sources, Unique),
  720    member(Source, Unique).
  721
  722
  723                 /*******************************
  724                 *       GARBAGE COLLECT        *
  725                 *******************************/
  726
  727%!  rdf_create_gc_thread
  728%
  729%   Create the garbage collection thread.
  730
  731:- public
  732    rdf_create_gc_thread/0.  733
  734rdf_create_gc_thread :-
  735    thread_create(rdf_gc_loop, _,
  736                  [ alias('__rdf_GC')
  737                  ]).
  738
  739%!  rdf_gc_loop
  740%
  741%   Take care of running the RDF garbage collection.  This predicate
  742%   is called from a thread started by creating the RDF DB.
  743
  744rdf_gc_loop :-
  745    catch(rdf_gc_loop(0), E, recover_gc(E)).
  746
  747recover_gc('$aborted') :-
  748    !,
  749    thread_self(Me),
  750    thread_detach(Me).
  751recover_gc(Error) :-
  752    print_message(error, Error),
  753    rdf_gc_loop.
  754
  755rdf_gc_loop(CPU) :-
  756    repeat,
  757    (   consider_gc(CPU)
  758    ->  rdf_gc(CPU1),
  759        sleep(CPU1)
  760    ;   sleep(0.1)
  761    ),
  762    fail.
  763
  764%!  rdf_gc(-CPU) is det.
  765%
  766%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
  767%   update this in Prolog because portable access to thread specific
  768%   CPU is really hard in C.
  769
  770rdf_gc(CPU) :-
  771    statistics(cputime, CPU0),
  772    (   rdf_gc_
  773    ->  statistics(cputime, CPU1),
  774        CPU is CPU1-CPU0,
  775        rdf_add_gc_time(CPU)
  776    ;   CPU = 0.0
  777    ).
  778
  779%!  rdf_gc is det.
  780%
  781%   Run the RDF-DB garbage collector until no   garbage  is left and all
  782%   tables are fully optimized. Under normal operation a seperate thread
  783%   with identifier =|__rdf_GC|= performs garbage  collection as long as
  784%   it is considered `useful'.
  785%
  786%   Using rdf_gc/0 should  only  be  needed   to  ensure  a  fully clean
  787%   database for analysis purposes such as leak detection.
  788
  789rdf_gc :-
  790    has_garbage,
  791    !,
  792    rdf_gc(_),
  793    rdf_gc.
  794rdf_gc.
  795
  796%!  has_garbage is semidet.
  797%
  798%   True if there is something to gain using GC.
  799
  800has_garbage :-
  801    rdf_gc_info_(Info),
  802    has_garbage(Info),
  803    !.
  804
  805has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
  806has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
  807has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
  808
  809%!  consider_gc(+CPU) is semidet.
  810%
  811%   @param CPU is the amount of CPU time spent in the most recent
  812%   GC.
  813
  814consider_gc(_CPU) :-
  815    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
  816                             Garbage,       % Garbage triples in DB
  817                             Reindexed,     % Reindexed & not reclaimed
  818                             Optimizable,   % Non-optimized tables
  819                             _KeepGen,      % Oldest active generation
  820                             _LastGCGen,    % Oldest active gen at last GC
  821                             _ReindexGen,
  822                             _LastGCReindexGen))
  823    ->  (   (Garbage+Reindexed) * 5 > Triples
  824        ;   Optimizable > 4
  825        )
  826    ;   print_message(error, rdf(invalid_gc_info)),
  827        sleep(10)
  828    ),
  829    !.
  830
  831
  832                 /*******************************
  833                 *           STATISTICS         *
  834                 *******************************/
  835
  836%!  rdf_statistics(?KeyValue) is nondet.
  837%
  838%   Obtain statistics on the RDF database.  Defined statistics are:
  839%
  840%     * graphs(-Count)
  841%     Number of named graphs.
  842%
  843%     * triples(-Count)
  844%     Total number of triples in the database.  This is the number
  845%     of asserted triples minus the number of retracted ones.  The
  846%     number of _visible_ triples in a particular context may be
  847%     different due to visibility rules defined by the logical
  848%     update view and transaction isolation.
  849%
  850%     * resources(-Count)
  851%     Number of resources that appear as subject or object in a
  852%     triple.  See rdf_resource/1.
  853%
  854%     * properties(-Count)
  855%     Number of current predicates.  See rdf_current_predicate/1.
  856%
  857%     * literals(-Count)
  858%     Number of current literals.  See rdf_current_literal/1.
  859%
  860%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
  861%     Information about the garbage collector.
  862%
  863%     * searched_nodes(-Count)
  864%     Number of nodes expanded by rdf_reachable/3 and
  865%     rdf_reachable/5.
  866%
  867%     * lookup(rdf(S,P,O,G), Count)
  868%     Number of queries that have been performed for this particular
  869%     instantiation pattern.  Each of S,P,O,G is either + or -.
  870%     Fails in case the number of performed queries is zero.
  871%
  872%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
  873%     Statistics on the index for this pattern.  Indices are created
  874%     lazily on the first relevant query.
  875%
  876%     * triples_by_graph(Graph, Count)
  877%     This statistics is produced for each named graph. See
  878%     =triples= for the interpretation of this value.
  879
  880rdf_statistics(graphs(Count)) :-
  881    rdf_statistics_(graphs(Count)).
  882rdf_statistics(triples(Count)) :-
  883    rdf_statistics_(triples(Count)).
  884rdf_statistics(duplicates(Count)) :-
  885    rdf_statistics_(duplicates(Count)).
  886rdf_statistics(lingering(Count)) :-
  887    rdf_statistics_(lingering(Count)).
  888rdf_statistics(resources(Count)) :-
  889    rdf_statistics_(resources(Count)).
  890rdf_statistics(properties(Count)) :-
  891    rdf_statistics_(predicates(Count)).
  892rdf_statistics(literals(Count)) :-
  893    rdf_statistics_(literals(Count)).
  894rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
  895    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
  896rdf_statistics(searched_nodes(Count)) :-
  897    rdf_statistics_(searched_nodes(Count)).
  898rdf_statistics(lookup(Index, Count)) :-
  899    functor(Indexed, indexed, 16),
  900    rdf_statistics_(Indexed),
  901    index(Index, I),
  902    Arg is I + 1,
  903    arg(Arg, Indexed, Count),
  904    Count \== 0.
  905rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
  906    rdf_statistics_(hash_quality(List)),
  907    member(hash(Place,Size,Quality,Optimize), List),
  908    index(Index, Place).
  909rdf_statistics(triples_by_graph(Graph, Count)) :-
  910    rdf_graph_(Graph, Count).
  911
  912index(rdf(-,-,-,-), 0).
  913index(rdf(+,-,-,-), 1).
  914index(rdf(-,+,-,-), 2).
  915index(rdf(+,+,-,-), 3).
  916index(rdf(-,-,+,-), 4).
  917index(rdf(+,-,+,-), 5).
  918index(rdf(-,+,+,-), 6).
  919index(rdf(+,+,+,-), 7).
  920
  921index(rdf(-,-,-,+), 8).
  922index(rdf(+,-,-,+), 9).
  923index(rdf(-,+,-,+), 10).
  924index(rdf(+,+,-,+), 11).
  925index(rdf(-,-,+,+), 12).
  926index(rdf(+,-,+,+), 13).
  927index(rdf(-,+,+,+), 14).
  928index(rdf(+,+,+,+), 15).
  929
  930
  931                 /*******************************
  932                 *           PREDICATES         *
  933                 *******************************/
  934
  935%!  rdf_current_predicate(?Predicate) is nondet.
  936%
  937%   True when Predicate is a   currently known predicate. Predicates
  938%   are created if a triples is created  that uses this predicate or
  939%   a property of the predicate   is  set using rdf_set_predicate/2.
  940%   The predicate may (no longer) have triples associated with it.
  941%
  942%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
  943%   not automatically included in the  result-set of this predicate,
  944%   while _all_ resources that appear as   the  second argument of a
  945%   triple _are_ included.
  946%
  947%   @see rdf_predicate_property/2.
  948
  949rdf_current_predicate(P, DB) :-
  950    rdf_current_predicate(P),
  951    (   rdf(_,P,_,DB)
  952    ->  true
  953    ).
  954
  955%!  rdf_predicate_property(?Predicate, ?Property)
  956%
  957%   Query properties of  a  defined   predicate.  Currently  defined
  958%   properties are given below.
  959%
  960%     * symmetric(Bool)
  961%     True if the predicate is defined to be symetric. I.e., {A} P
  962%     {B} implies {B} P {A}. Setting symmetric is equivalent to
  963%     inverse_of(Self).
  964%
  965%     * inverse_of(Inverse)
  966%     True if this predicate is the inverse of Inverse. This
  967%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
  968%     rdf_reachable/5.
  969%
  970%     * transitive(Bool)
  971%     True if this predicate is transitive. This predicate is
  972%     currently not used. It might be used to make rdf_has/3 imply
  973%     rdf_reachable/3 for transitive predicates.
  974%
  975%     * triples(Triples)
  976%     Unify Triples with the number of existing triples using this
  977%     predicate as second argument. Reporting the number of triples
  978%     is intended to support query optimization.
  979%
  980%     * rdf_subject_branch_factor(-Float)
  981%     Unify Float with the average number of triples associated with
  982%     each unique value for the subject-side of this relation. If
  983%     there are no triples the value 0.0 is returned. This value is
  984%     cached with the predicate and recomputed only after
  985%     substantial changes to the triple set associated to this
  986%     relation. This property is intended for path optimalisation
  987%     when solving conjunctions of rdf/3 goals.
  988%
  989%     * rdf_object_branch_factor(-Float)
  990%     Unify Float with the average number of triples associated with
  991%     each unique value for the object-side of this relation. In
  992%     addition to the comments with the =rdf_subject_branch_factor=
  993%     property, uniqueness of the object value is computed from the
  994%     hash key rather than the actual values.
  995%
  996%     * rdfs_subject_branch_factor(-Float)
  997%     Same as =rdf_subject_branch_factor=, but also considering
  998%     triples of `subPropertyOf' this relation. See also rdf_has/3.
  999%
 1000%     * rdfs_object_branch_factor(-Float)
 1001%     Same as =rdf_object_branch_factor=, but also considering
 1002%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1003%
 1004%   @see rdf_set_predicate/2.
 1005
 1006rdf_predicate_property(P, Prop) :-
 1007    var(P),
 1008    !,
 1009    rdf_current_predicate(P),
 1010    rdf_predicate_property_(P, Prop).
 1011rdf_predicate_property(P, Prop) :-
 1012    rdf_predicate_property_(P, Prop).
 1013
 1014%!  rdf_set_predicate(+Predicate, +Property) is det.
 1015%
 1016%   Define a property of  the   predicate.  This predicate currently
 1017%   supports the following properties:
 1018%
 1019%       - symmetric(+Boolean)
 1020%       Set/unset the predicate as being symmetric.  Using
 1021%       symmetric(true) is the same as inverse_of(Predicate),
 1022%       i.e., creating a predicate that is the inverse of
 1023%       itself.
 1024%       - transitive(+Boolean)
 1025%       Sets the transitive property.
 1026%       - inverse_of(+Predicate2)
 1027%       Define Predicate as the inverse of Predicate2. An inverse
 1028%       relation is deleted using inverse_of([]).
 1029%
 1030%   The `transitive` property is currently not used. The `symmetric`
 1031%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
 1032%   rdf_reachable/3.
 1033%
 1034%   @tbd    Maintain these properties based on OWL triples.
 1035
 1036
 1037                 /*******************************
 1038                 *            SNAPSHOTS         *
 1039                 *******************************/
 1040
 1041%!  rdf_snapshot(-Snapshot) is det.
 1042%
 1043%   Take a snapshot of the current state   of  the RDF store. Later,
 1044%   goals may be executed in the  context   of  the database at this
 1045%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
 1046%   snapshot created outside  a  transaction   exists  until  it  is
 1047%   deleted. Snapshots taken inside a transaction   can only be used
 1048%   inside this transaction.
 1049
 1050%!  rdf_delete_snapshot(+Snapshot) is det.
 1051%
 1052%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
 1053%   call, resources used for maintaining the snapshot become subject
 1054%   to garbage collection.
 1055
 1056%!  rdf_current_snapshot(?Term) is nondet.
 1057%
 1058%   True when Term is a currently known snapshot.
 1059%
 1060%   @bug    Enumeration of snapshots is slow.
 1061
 1062rdf_current_snapshot(Term) :-
 1063    current_blob(Term, rdf_snapshot).
 1064
 1065
 1066                 /*******************************
 1067                 *          TRANSACTION         *
 1068                 *******************************/
 1069
 1070%!  rdf_transaction(:Goal) is semidet.
 1071%
 1072%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
 1073
 1074%!  rdf_transaction(:Goal, +Id) is semidet.
 1075%
 1076%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
 1077
 1078%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
 1079%
 1080%   Run Goal in an RDF  transaction.   Compared to the ACID model,
 1081%   RDF transactions have the following properties:
 1082%
 1083%     1. Modifications inside the transactions become all atomically
 1084%        visible to the outside world if Goal succeeds or remain
 1085%        invisible if Goal fails or throws an exception.  I.e.,
 1086%        the _atomicy_ property is fully supported.
 1087%     2. _Consistency_ is not guaranteed. Later versions may
 1088%        implement consistency constraints that will be checked
 1089%        serialized just before the actual commit of a transaction.
 1090%     3. Concurrently executing transactions do not infuence each
 1091%        other.  I.e., the _isolation_ property is fully supported.
 1092%     4. _Durability_ can be activated by loading
 1093%        library(semweb/rdf_persistency).
 1094%
 1095%   Processed options are:
 1096%
 1097%     * snapshot(+Snapshot)
 1098%     Execute Goal using the state of the RDF store as stored in
 1099%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
 1100%     atom =true=, which implies that an anonymous snapshot is
 1101%     created at the current state of the store.  Modifications
 1102%     due to executing Goal are only visible to Goal.
 1103
 1104rdf_transaction(Goal) :-
 1105    rdf_transaction(Goal, user, []).
 1106rdf_transaction(Goal, Id) :-
 1107    rdf_transaction(Goal, Id, []).
 1108
 1109%!  rdf_active_transaction(?Id) is nondet.
 1110%
 1111%   True if Id is the identifier of  a transaction in the context of
 1112%   which  this  call  is  executed.  If  Id  is  not  instantiated,
 1113%   backtracking yields transaction identifiers   starting  with the
 1114%   innermost nested transaction. Transaction   identifier terms are
 1115%   not copied, need not be ground   and  can be instantiated during
 1116%   the transaction.
 1117
 1118rdf_active_transaction(Id) :-
 1119    rdf_active_transactions_(List),
 1120    member(Id, List).
 1121
 1122%!  rdf_monitor(:Goal, +Options)
 1123%
 1124%   Call Goal if specified actions occur on the database.
 1125
 1126rdf_monitor(Goal, Options) :-
 1127    monitor_mask(Options, 0xffff, Mask),
 1128    rdf_monitor_(Goal, Mask).
 1129
 1130monitor_mask([], Mask, Mask).
 1131monitor_mask([H|T], Mask0, Mask) :-
 1132    update_mask(H, Mask0, Mask1),
 1133    monitor_mask(T, Mask1, Mask).
 1134
 1135update_mask(-X, Mask0, Mask) :-
 1136    !,
 1137    monitor_mask(X, M),
 1138    Mask is Mask0 /\ \M.
 1139update_mask(+X, Mask0, Mask) :-
 1140    !,
 1141    monitor_mask(X, M),
 1142    Mask is Mask0 \/ M.
 1143update_mask(X, Mask0, Mask) :-
 1144    monitor_mask(X, M),
 1145    Mask is Mask0 \/ M.
 1146
 1147%!  monitor_mask(Name, Mask)
 1148%
 1149%   Mask bit for the monitor events.  Note that this must be kept
 1150%   consistent with the enum broadcast_id defined in rdf_db.c
 1151
 1152                                        % C-defined broadcasts
 1153monitor_mask(assert,       0x0001).
 1154monitor_mask(assert(load), 0x0002).
 1155monitor_mask(retract,      0x0004).
 1156monitor_mask(update,       0x0008).
 1157monitor_mask(new_literal,  0x0010).
 1158monitor_mask(old_literal,  0x0020).
 1159monitor_mask(transaction,  0x0040).
 1160monitor_mask(load,         0x0080).
 1161monitor_mask(create_graph, 0x0100).
 1162monitor_mask(reset,        0x0200).
 1163                                        % prolog defined broadcasts
 1164monitor_mask(parse,        0x1000).
 1165monitor_mask(unload,       0x1000).     % FIXME: Duplicate
 1166                                        % mask for all
 1167monitor_mask(all,          0xffff).
 1168
 1169%rdf_broadcast(Term, MaskName) :-
 1170%%      monitor_mask(MaskName, Mask),
 1171%%      rdf_broadcast_(Term, Mask).
 1172
 1173
 1174                 /*******************************
 1175                 *            WARM              *
 1176                 *******************************/
 1177
 1178%!  rdf_warm_indexes
 1179%
 1180%   Warm all indexes.  See rdf_warm_indexes/1.
 1181
 1182rdf_warm_indexes :-
 1183    findall(Index, rdf_index(Index), Indexes),
 1184    rdf_warm_indexes(Indexes).
 1185
 1186rdf_index(s).
 1187rdf_index(p).
 1188rdf_index(o).
 1189rdf_index(sp).
 1190rdf_index(o).
 1191rdf_index(po).
 1192rdf_index(spo).
 1193rdf_index(g).
 1194rdf_index(sg).
 1195rdf_index(pg).
 1196
 1197%!  rdf_warm_indexes(+Indexes) is det.
 1198%
 1199%   Create the named indexes.  Normally,   the  RDF database creates
 1200%   indexes on lazily the first time they are needed. This predicate
 1201%   serves two purposes: it provides an   explicit  way to make sure
 1202%   that the required indexes  are   present  and  creating multiple
 1203%   indexes at the same time is more efficient.
 1204
 1205
 1206                 /*******************************
 1207                 *          DUPLICATES          *
 1208                 *******************************/
 1209
 1210%!  rdf_update_duplicates is det.
 1211%
 1212%   Update the duplicate administration of the RDF store. This marks
 1213%   every triple that is potentionally  a   duplicate  of another as
 1214%   duplicate. Being potentially a  duplicate   means  that subject,
 1215%   predicate and object are equivalent and   the  life-times of the
 1216%   two triples overlap.
 1217%
 1218%   The duplicates marks are used to  reduce the administrative load
 1219%   of avoiding duplicate answers.  Normally,   the  duplicates  are
 1220%   marked using a background thread that   is  started on the first
 1221%   query that produces a substantial amount of duplicates.
 1222
 1223:- public
 1224    rdf_update_duplicates_thread/0. 1225
 1226%!  rdf_update_duplicates_thread
 1227%
 1228%   Start a thread to initialize the duplicate administration.
 1229
 1230rdf_update_duplicates_thread :-
 1231    thread_create(rdf_update_duplicates, _,
 1232                  [ detached(true),
 1233                    alias('__rdf_duplicate_detecter')
 1234                  ]).
 1235
 1236%!  rdf_update_duplicates is det.
 1237%
 1238%   Update the duplicate administration. If   this  adminstration is
 1239%   up-to-date, each triples that _may_ have a duplicate is flagged.
 1240%   The predicate rdf/3 uses this administration to speedup checking
 1241%   for duplicate answers.
 1242%
 1243%   This predicate is normally  executed   from  a background thread
 1244%   named =__rdf_duplicate_detecter= which is created   when a query
 1245%   discovers that checking for duplicates becomes too expensive.
 1246
 1247
 1248                 /*******************************
 1249                 *    QUICK BINARY LOAD/SAVE    *
 1250                 *******************************/
 1251
 1252%!  rdf_save_db(+File) is det.
 1253%!  rdf_save_db(+File, +Graph) is det.
 1254%
 1255%   Save triples into File in a   quick-to-load binary format. If Graph
 1256%   is supplied only triples flagged to originate from that database
 1257%   are  added.  Files  created  this  way    can  be  loaded  using
 1258%   rdf_load_db/1.
 1259
 1260:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1261
 1262rdf_save_db(File) :-
 1263    current_prolog_flag(rdf_triple_format, Version),
 1264    setup_call_cleanup(
 1265        open(File, write, Out, [type(binary)]),
 1266        ( set_stream(Out, record_position(false)),
 1267          rdf_save_db_(Out, _, Version)
 1268        ),
 1269        close(Out)).
 1270
 1271
 1272rdf_save_db(File, Graph) :-
 1273    current_prolog_flag(rdf_triple_format, Version),
 1274    setup_call_cleanup(
 1275        open(File, write, Out, [type(binary)]),
 1276        ( set_stream(Out, record_position(false)),
 1277          rdf_save_db_(Out, Graph, Version)
 1278        ),
 1279        close(Out)).
 1280
 1281
 1282%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
 1283%
 1284%   Load triples from a  .trp  file   without  updating  the  source
 1285%   administration. Id is  handled  to   monitor  action.  Graphs is
 1286%   a list of graph-names encountered in File.
 1287
 1288rdf_load_db_no_admin(File, Id, Graphs) :-
 1289    open(File, read, In, [type(binary)]),
 1290    set_stream(In, record_position(false)),
 1291    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
 1292
 1293
 1294%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
 1295%
 1296%   Verify the loaded cache file and optionally fix the modification
 1297%   time (new versions save this along with the snapshot).
 1298%
 1299%   @tbd    What to do if there is a cache mismatch? Delete the loaded
 1300%           graphs and fail?
 1301
 1302check_loaded_cache(DB, [DB], _Modified) :- !.
 1303check_loaded_cache(DB, Graphs, _) :-
 1304    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
 1305
 1306
 1307%!  rdf_load_db(+File) is det.
 1308%
 1309%   Load triples from a file created using rdf_save_db/2.
 1310
 1311rdf_load_db(File) :-
 1312    uri_file_name(URL, File),
 1313    rdf_load_db_no_admin(File, URL, _Graphs).
 1314
 1315
 1316                 /*******************************
 1317                 *          LOADING RDF         *
 1318                 *******************************/
 1319
 1320:- multifile
 1321    rdf_open_hook/8,
 1322    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
 1323    rdf_load_stream/3,              % +Format, +Stream, +Options
 1324    rdf_file_type/2,                % ?Extension, ?Format
 1325    rdf_storage_encoding/2,         % ?Extension, ?Encoding
 1326    url_protocol/1.                 % ?Protocol
 1327
 1328%!  rdf_load(+FileOrList) is det.
 1329%
 1330%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
 1331
 1332%!  rdf_load(+FileOrList, :Options) is det.
 1333%
 1334%   Load RDF data. Options provides   additional processing options.
 1335%   Defined options are:
 1336%
 1337%       * blank_nodes(+ShareMode)
 1338%       How to handle equivalent blank nodes.  If =share= (default),
 1339%       equivalent blank nodes are shared in the same resource.
 1340%
 1341%       * base_uri(+URI)
 1342%       URI that is used for rdf:about="" and other RDF constructs
 1343%       that are relative to the base uri.  Default is the source
 1344%       URL.
 1345%
 1346%       * concurrent(+Jobs)
 1347%       If FileOrList is a list of files, process the input files
 1348%       using Jobs threads concurrently.  Default is the mininum
 1349%       of the number of cores and the number of inputs.  Higher
 1350%       values can be useful when loading inputs from (slow)
 1351%       network connections.  Using 1 (one) does not use
 1352%       separate worker threads.
 1353%
 1354%       * format(+Format)
 1355%       Specify the source format explicitly. Normally this is
 1356%       deduced from the filename extension or the mime-type. The
 1357%       core library understands the formats xml (RDF/XML) and
 1358%       triples (internal quick load and cache format).  Plugins,
 1359%       such as library(semweb/turtle) extend the set of recognised
 1360%       extensions.
 1361%
 1362%       * graph(?Graph)
 1363%       Named graph in which to load the data.  It is *not* allowed
 1364%       to load two sources into the same named graph.  If Graph is
 1365%       unbound, it is unified to the graph into which the data is
 1366%       loaded.  The default graph is a =|file://|= URL when loading
 1367%       a file or, if the specification is a URL, its normalized
 1368%       version without the optional _|#fragment|_.
 1369%
 1370%       * if(Condition)
 1371%       When to load the file. One of =true=, =changed= (default) or
 1372%       =not_loaded=.
 1373%
 1374%       * modified(-Modified)
 1375%       Unify Modified with one of =not_modified=, cached(File),
 1376%       last_modified(Stamp) or =unknown=.
 1377%
 1378%       * cache(Bool)
 1379%       If =false=, do not use or create a cache file.
 1380%
 1381%       * register_namespaces(Bool)
 1382%       If =true= (default =false=), register =xmlns= namespace
 1383%       declarations or Turtle =|@prefix|= prefixes using
 1384%       rdf_register_prefix/3 if there is no conflict.
 1385%
 1386%       * silent(+Bool)
 1387%       If =true=, the message reporting completion is printed using
 1388%       level =silent=. Otherwise the level is =informational=. See
 1389%       also print_message/2.
 1390%
 1391%       * prefixes(-Prefixes)
 1392%       Returns the prefixes defined in the source   data file as a list
 1393%       of pairs.
 1394%
 1395%       * multifile(+Boolean)
 1396%       Indicate that the addressed graph may be populated with
 1397%       triples from multiple sources. This disables caching and
 1398%       avoids that an rdf_load/2 call affecting the specified
 1399%       graph cleans the graph.
 1400%
 1401%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
 1402%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
 1403%   load data from other formats and   locations  using plugins. The
 1404%   full set of plugins relevant to   support  different formats and
 1405%   locations is below:
 1406%
 1407%     ==
 1408%     :- use_module(library(semweb/turtle)).        % Turtle and TriG
 1409%     :- use_module(library(semweb/rdf_ntriples)).
 1410%     :- use_module(library(semweb/rdf_zlib_plugin)).
 1411%     :- use_module(library(semweb/rdf_http_plugin)).
 1412%     :- use_module(library(http/http_ssl_plugin)).
 1413%     ==
 1414%
 1415%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
 1416%           library(semweb/rdf_cache)
 1417
 1418:- dynamic
 1419    rdf_loading/3.                          % Graph, Queue, Thread
 1420
 1421rdf_load(Spec) :-
 1422    rdf_load(Spec, []).
 1423
 1424:- if(\+current_predicate(concurrent/3)). 1425concurrent(_, Goals, _) :-
 1426    forall(member(G, Goals), call(G)).
 1427:- endif. 1428
 1429% Note that we kill atom garbage collection.  This improves performance
 1430% with about 15% loading the LUBM Univ_50 benchmark.
 1431
 1432rdf_load(Spec, M:Options) :-
 1433    must_be(list, Options),
 1434    current_prolog_flag(agc_margin, Old),
 1435    setup_call_cleanup(
 1436        set_prolog_flag(agc_margin, 0),
 1437        rdf_load_noagc(Spec, M, Options),
 1438        set_prolog_flag(agc_margin, Old)).
 1439
 1440rdf_load_noagc(List, M, Options) :-
 1441    is_list(List),
 1442    !,
 1443    flatten(List, Inputs),          % Compatibility: allow nested lists
 1444    maplist(must_be(ground), Inputs),
 1445    length(Inputs, Count),
 1446    load_jobs(Count, Jobs, Options),
 1447    (   Jobs =:= 1
 1448    ->  forall(member(Spec, Inputs),
 1449               rdf_load_one(Spec, M, Options))
 1450    ;   maplist(load_goal(Options, M), Inputs, Goals),
 1451        concurrent(Jobs, Goals, [])
 1452    ).
 1453rdf_load_noagc(One, M, Options) :-
 1454    must_be(ground, One),
 1455    rdf_load_one(One, M, Options).
 1456
 1457load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
 1458
 1459load_jobs(_, Jobs, Options) :-
 1460    option(concurrent(Jobs), Options),
 1461    !,
 1462    must_be(positive_integer, Jobs).
 1463load_jobs(Count, Jobs, _) :-
 1464    current_prolog_flag(cpu_count, CPUs),
 1465    CPUs > 0,
 1466    !,
 1467    Jobs is max(1, min(CPUs, Count)).
 1468load_jobs(_, 1, _).
 1469
 1470
 1471rdf_load_one(Spec, M, Options) :-
 1472    source_url(Spec, Protocol, SourceURL),
 1473    load_graph(SourceURL, Graph, Options),
 1474    setup_call_cleanup(
 1475        with_mutex(rdf_load_file,
 1476                   rdf_start_load(SourceURL, Loading)),
 1477        rdf_load_file(Loading, Spec, SourceURL, Protocol,
 1478                      Graph, M, Options),
 1479        rdf_end_load(Loading)).
 1480
 1481%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
 1482%!  rdf_end_load(+WhatToDo) is det.
 1483%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
 1484%!                +Module, +Options) is det.
 1485%
 1486%   Of these three predicates, rdf_load_file/7   does the real work.
 1487%   The others deal with the  possibility   that  the graph is being
 1488%   loaded by another thread. In that case,   we  wait for the other
 1489%   thread to complete the work.
 1490%
 1491%   @tbd    What if both threads disagree on what is loaded into the
 1492%           graph?
 1493%   @see    Code is modelled closely after how concurrent loading
 1494%           is handled in SWI-Prolog's boot/init.pl
 1495
 1496rdf_start_load(SourceURL, queue(Queue)) :-
 1497    rdf_loading(SourceURL, Queue, LoadThread),
 1498    \+ thread_self(LoadThread),
 1499    !,
 1500    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
 1501          [ SourceURL, LoadThread]).
 1502rdf_start_load(SourceURL, Ref) :-
 1503    thread_self(Me),
 1504    message_queue_create(Queue),
 1505    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
 1506
 1507rdf_end_load(queue(_)) :- !.
 1508rdf_end_load(Ref) :-
 1509    clause(rdf_loading(_, Queue, _), _, Ref),
 1510    erase(Ref),
 1511    thread_send_message(Queue, done),
 1512    message_queue_destroy(Queue).
 1513
 1514rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
 1515    !,
 1516    catch(thread_get_message(Queue, _), _, true).
 1517rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
 1518    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
 1519    statistics(cputime, T0),
 1520    rdf_open_input(SourceURL, Protocol, Graph,
 1521                   In, Cleanup, Modified, Format, Options),
 1522    supported_format(Format, Cleanup),
 1523    return_modified(Modified, Options),
 1524    (   Modified == not_modified
 1525    ->  Action = none
 1526    ;   Modified = cached(CacheFile)
 1527    ->  do_unload(Graph),
 1528        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
 1529        check_loaded_cache(Graph, Graphs, Modified),
 1530        Action = load
 1531    ;   option(base_uri(BaseURI), Options, Graph),
 1532        (   var(BaseURI)
 1533        ->  BaseURI = SourceURL
 1534        ;   true
 1535        ),
 1536        once(phrase(derived_options(Options, NSList), Extra)),
 1537        merge_options([ base_uri(BaseURI),
 1538                        graph(Graph),
 1539                        format(Format)
 1540                      | Extra
 1541                      ], Options, RDFOptions),
 1542        (   option(multifile(true), Options)
 1543        ->  true
 1544        ;   do_unload(Graph)
 1545        ),
 1546        graph_modified(Modified, ModifiedStamp),
 1547        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
 1548        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
 1549                     Cleanup),
 1550        save_cache(Graph, SourceURL, Options),
 1551        register_file_prefixes(NSList),
 1552        format_action(Format, Action)
 1553    ),
 1554    rdf_statistics_(triples(Graph, Triples)),
 1555    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
 1556
 1557supported_format(Format, _Cleanup) :-
 1558    rdf_file_type(_, Format),
 1559    !.
 1560supported_format(Format, Cleanup) :-
 1561    call(Cleanup),
 1562    existence_error(rdf_format_plugin, Format).
 1563
 1564format_action(triples, load) :- !.
 1565format_action(_, parsed).
 1566
 1567save_cache(Graph, SourceURL, Options) :-
 1568    option(cache(true), Options, true),
 1569    rdf_cache_file(SourceURL, write, CacheFile),
 1570    !,
 1571    catch(save_cache(Graph, CacheFile), E,
 1572          print_message(warning, E)).
 1573save_cache(_, _, _).
 1574
 1575derived_options([], _) -->
 1576    [].
 1577derived_options([H|T], NSList) -->
 1578    (   {   H == register_namespaces(true)
 1579        ;   H == (register_namespaces = true)
 1580        }
 1581    ->  [ namespaces(NSList) ]
 1582    ;   []
 1583    ),
 1584    derived_options(T, NSList).
 1585
 1586graph_modified(last_modified(Stamp), Stamp).
 1587graph_modified(unknown, Stamp) :-
 1588    get_time(Stamp).
 1589
 1590return_modified(Modified, Options) :-
 1591    option(modified(M0), Options),
 1592    !,
 1593    M0 = Modified.
 1594return_modified(_, _).
 1595
 1596
 1597                 /*******************************
 1598                 *        INPUT HANDLING        *
 1599                 *******************************/
 1600
 1601/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1602This section deals with pluggable input sources.  The task of the input
 1603layer is
 1604
 1605    * Decide on the graph-name
 1606    * Decide on the source-location
 1607    * Decide whether loading is needed (if-modified)
 1608    * Decide on the serialization in the input
 1609
 1610The protocol must ensure minimal  overhead,   in  particular for network
 1611protocols. E.g. for HTTP we want to make a single call on the server and
 1612use If-modified-since to verify that we need not reloading this file.
 1613- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1614
 1615%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
 1616%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
 1617%
 1618%   Open an input source.
 1619%
 1620%   Options processed:
 1621%
 1622%       * graph(Graph)
 1623%       * db(Graph)
 1624%       * if(Condition)
 1625%       * cache(Cache)
 1626%       * format(Format)
 1627%
 1628%   @param  Modified is one of =not_modified=, last_modified(Time),
 1629%           cached(CacheFile) or =unknown=
 1630
 1631rdf_open_input(SourceURL, Protocol, Graph,
 1632               Stream, Cleanup, Modified, Format, Options) :-
 1633    (   option(multifile(true), Options)
 1634    ->  true
 1635    ;   option(if(If), Options, changed),
 1636        (   If == true
 1637        ->  true
 1638        ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
 1639        ->  true
 1640        ;   option(cache(true), Options, true),
 1641            rdf_cache_file(SourceURL, read, CacheFile)
 1642        ->  time_file(CacheFile, HaveModified)
 1643        ;   true
 1644        )
 1645    ),
 1646    option(format(Format), Options, _),
 1647    open_input_if_modified(Protocol, SourceURL, HaveModified,
 1648                           Stream, Cleanup, Modified0, Format, Options),
 1649    (   Modified0 == not_modified
 1650    ->  (   nonvar(CacheFile)
 1651        ->  Modified = cached(CacheFile)
 1652        ;   Modified = not_modified
 1653        )
 1654    ;   Modified = Modified0
 1655    ).
 1656
 1657
 1658%!  source_url(+Spec, -Class, -SourceURL) is det.
 1659%
 1660%   Determine class and url of the source.  Class is one of
 1661%
 1662%       * stream(Stream)
 1663%       * file
 1664%       * a url-protocol (e.g., =http=)
 1665
 1666source_url(stream(In), stream(In), SourceURL) :-
 1667    !,
 1668    (   stream_property(In, file_name(File))
 1669    ->  to_url(File, SourceURL)
 1670    ;   gensym('stream://', SourceURL)
 1671    ).
 1672source_url(Stream, Class, SourceURL) :-
 1673    is_stream(Stream),
 1674    !,
 1675    source_url(stream(Stream), Class, SourceURL).
 1676source_url(Spec, Protocol, SourceURL) :-
 1677    compound(Spec),
 1678    !,
 1679    source_file(Spec, Protocol, SourceURL).
 1680source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
 1681    uri_file_name(FileURL, File),
 1682    !,
 1683    source_file(File, Protocol, SourceURL).
 1684source_url(SourceURL0, Protocol, SourceURL) :-
 1685    is_url(SourceURL0, Protocol, SourceURL),
 1686    !.
 1687source_url(File, Protocol, SourceURL) :-
 1688    source_file(File, Protocol, SourceURL).
 1689
 1690source_file(Spec, file(SExt), SourceURL) :-
 1691    findall(Ext, valid_extension(Ext), Exts),
 1692    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
 1693    storage_extension(_Plain, SExt, File),
 1694    uri_file_name(SourceURL, File).
 1695
 1696to_url(URL, URL) :-
 1697    uri_is_global(URL),
 1698    !.
 1699to_url(File, URL) :-
 1700    absolute_file_name(File, Path),
 1701    uri_file_name(URL, Path).
 1702
 1703storage_extension(Plain, SExt, File) :-
 1704    file_name_extension(Plain, SExt, File),
 1705    SExt \== '',
 1706    rdf_storage_encoding(SExt, _),
 1707    !.
 1708storage_extension(File, '', File).
 1709
 1710%!  load_graph(+SourceURL, -Graph, +Options) is det.
 1711%
 1712%   Graph is the graph into which  we   load  the  data. Tries these
 1713%   options:
 1714%
 1715%     1. The graph(Graph) option
 1716%     2. The db(Graph) option (backward compatibility)
 1717%     3. The base_uri(BaseURI) option
 1718%     4. The source URL
 1719
 1720load_graph(_Source, Graph, Options) :-
 1721    option(multifile(true), Options),
 1722    !,
 1723    (   (   option(graph(Graph), Options)
 1724        ->  true
 1725        ;   option(db(Graph), Options)
 1726        ),
 1727        ground(Graph)
 1728    ->  true
 1729    ;   throw(error(existence_error(option, graph),
 1730                    context(_, "rdf_load/2: using multifile requires graph")))
 1731    ).
 1732load_graph(Source, Graph, Options) :-
 1733    (   option(graph(Graph), Options)
 1734    ;   option(db(Graph), Options)
 1735    ),
 1736    !,
 1737    load_graph2(Source, Graph, Options).
 1738load_graph(Source, Graph, Options) :-
 1739    load_graph2(Source, Graph, Options).
 1740
 1741load_graph2(_, Graph, _) :-
 1742    ground(Graph),
 1743    !.
 1744load_graph2(_Source, Graph, Options) :-
 1745    option(base_uri(Graph), Options),
 1746    Graph \== [],
 1747    ground(Graph),
 1748    !.
 1749load_graph2(Source, Graph, _) :-
 1750    load_graph(Source, Graph).
 1751
 1752load_graph(SourceURL, BaseURI) :-
 1753    file_name_extension(BaseURI, Ext, SourceURL),
 1754    rdf_storage_encoding(Ext, _),
 1755    !.
 1756load_graph(SourceURL, SourceURL).
 1757
 1758
 1759open_input_if_modified(stream(In), SourceURL, _, In, true,
 1760                       unknown, Format, _) :-
 1761    !,
 1762    (   var(Format)
 1763    ->  guess_format(SourceURL, Format)
 1764    ;   true
 1765    ).
 1766open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
 1767                       Modified, Format, _) :-
 1768    !,
 1769    uri_file_name(SourceURL, File),
 1770    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
 1771    time_file(File, LastModified),
 1772    (   nonvar(HaveModified),
 1773        HaveModified >= LastModified
 1774    ->  Modified = not_modified,
 1775        Cleanup = true
 1776    ;   storage_open(SExt, File, Stream, Cleanup),
 1777        Modified = last_modified(LastModified),
 1778        (   var(Format)
 1779        ->  guess_format(Plain, Format)
 1780        ;   true
 1781        )
 1782    ).
 1783open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
 1784                       Modified, Format, Options) :-
 1785    !,
 1786    open_input_if_modified(file(''), SourceURL, HaveModified,
 1787                           Stream, Cleanup,
 1788                           Modified, Format, Options).
 1789open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1790                       Modified, Format, Options) :-
 1791    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 1792                  Modified, Format, Options).
 1793
 1794guess_format(File, Format) :-
 1795    file_name_extension(_, Ext, File),
 1796    (   rdf_file_type(Ext, Format)
 1797    ->  true
 1798    ;   Format = xml,
 1799        print_message(warning, rdf(guess_format(Ext)))
 1800    ).
 1801
 1802%!  storage_open(+Extension, +File, -Stream, -Cleanup)
 1803%
 1804%   Open the low-level storage. Note  that   the  file  is opened as
 1805%   binary. This is the same  as   for  HTTP  resources. The correct
 1806%   encoding will be set by the XML parser or the Turtle parser.
 1807
 1808storage_open('', File, Stream, close(Stream)) :-
 1809    !,
 1810    open(File, read, Stream, [type(binary)]).
 1811storage_open(Ext, File, Stream, Cleanup) :-
 1812    rdf_storage_encoding(Ext, Encoding),
 1813    rdf_open_decode(Encoding, File, Stream, Cleanup).
 1814
 1815valid_extension(Ext) :-
 1816    rdf_file_type(Ext, _).
 1817valid_extension(Ext) :-
 1818    rdf_storage_encoding(Ext, _).
 1819
 1820%!  is_url(@Term, -Scheme, -URL) is semidet.
 1821%
 1822%   True if Term is an atom denoting URL of the given Scheme. URL is
 1823%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
 1824%   identifier (#fragment) is removed. This  predicate only succeeds
 1825%   if  the  scheme  is   registered    using   the  multifile  hook
 1826%   url_protocol/1.
 1827
 1828is_url(URL, Scheme, FetchURL) :-
 1829    atom(URL),
 1830    uri_is_global(URL),
 1831    uri_normalized(URL, URL1),              % case normalization
 1832    uri_components(URL1, Components),
 1833    uri_data(scheme, Components, Scheme0),
 1834    url_protocol(Scheme0),
 1835    !,
 1836    Scheme = Scheme0,
 1837    uri_data(fragment, Components, _, Components1),
 1838    uri_components(FetchURL, Components1).
 1839
 1840url_protocol(file).                     % built-in
 1841
 1842%!  rdf_file_type(+Extension, -Format) is semidet.
 1843%
 1844%   True if Format  is  the  format   belonging  to  the  given file
 1845%   extension.  This predicate is multifile and can thus be extended
 1846%   by plugins.
 1847
 1848rdf_file_type(xml,   xml).
 1849rdf_file_type(rdf,   xml).
 1850rdf_file_type(rdfs,  xml).
 1851rdf_file_type(owl,   xml).
 1852rdf_file_type(htm,   xhtml).
 1853rdf_file_type(html,  xhtml).
 1854rdf_file_type(xhtml, xhtml).
 1855rdf_file_type(trp,   triples).
 1856
 1857
 1858%!  rdf_file_encoding(+Extension, -Format) is semidet.
 1859%
 1860%   True if Format describes the storage encoding of file.
 1861
 1862rdf_storage_encoding('', plain).
 1863
 1864
 1865%!  rdf_load_stream(+Format, +Stream, :Options)
 1866%
 1867%   Load RDF data from Stream.
 1868%
 1869%   @tbd    Handle mime-types?
 1870
 1871rdf_load_stream(xml, Stream, Options) :-
 1872    !,
 1873    graph(Options, Graph),
 1874    rdf_transaction(load_stream(Stream, Options),
 1875                    parse(Graph)).
 1876rdf_load_stream(xhtml, Stream, M:Options) :-
 1877    !,
 1878    graph(Options, Graph),
 1879    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
 1880                    parse(Graph)).
 1881rdf_load_stream(triples, Stream, Options) :-
 1882    !,
 1883    graph(Options, Graph),
 1884    rdf_load_db_(Stream, Graph, _Graphs).
 1885
 1886load_stream(Stream, M:Options) :-
 1887    process_rdf(Stream, assert_triples, M:Options),
 1888    option(graph(Graph), Options),
 1889    rdf_graph_clear_modified_(Graph).
 1890
 1891
 1892%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
 1893
 1894report_loaded(none, _, _, _, _, _) :- !.
 1895report_loaded(Action, Source, DB, Triples, T0, Options) :-
 1896    statistics(cputime, T1),
 1897    Time is T1 - T0,
 1898    (   option(silent(true), Options)
 1899    ->  Level = silent
 1900    ;   Level = informational
 1901    ),
 1902    print_message(Level,
 1903                  rdf(loaded(Action, Source, DB, Triples, Time))).
 1904
 1905
 1906%!  rdf_unload(+Source) is det.
 1907%
 1908%   Identify the graph loaded from Source and use rdf_unload_graph/1
 1909%   to erase this graph.
 1910%
 1911%   @deprecated     For compatibility, this predicate also accepts a
 1912%                   graph name instead of a source specification.
 1913%                   Please update your code to use
 1914%                   rdf_unload_graph/1.
 1915
 1916rdf_unload(Spec) :-
 1917    source_url(Spec, _Protocol, SourceURL),
 1918    rdf_graph_source_(Graph, SourceURL, _),
 1919    !,
 1920    rdf_unload_graph(Graph).
 1921rdf_unload(Graph) :-
 1922    atom(Graph),
 1923    rdf_graph(Graph),
 1924    !,
 1925    warn_deprecated_unload(Graph),
 1926    rdf_unload_graph(Graph).
 1927rdf_unload(_).
 1928
 1929:- dynamic
 1930    warned/0. 1931
 1932warn_deprecated_unload(_) :-
 1933    warned,
 1934    !.
 1935warn_deprecated_unload(Graph) :-
 1936    assertz(warned),
 1937    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
 1938
 1939
 1940%!  rdf_unload_graph(+Graph) is det.
 1941%
 1942%   Remove Graph from the RDF store.  Succeeds silently if the named
 1943%   graph does not exist.
 1944
 1945rdf_unload_graph(Graph) :-
 1946    must_be(atom, Graph),
 1947    (   rdf_graph(Graph)
 1948    ->  rdf_transaction(do_unload(Graph), unload(Graph))
 1949    ;   true
 1950    ).
 1951
 1952do_unload(Graph) :-
 1953    (   rdf_graph_(Graph, Triples),
 1954        Triples > 0
 1955    ->  rdf_retractall(_,_,_,Graph)
 1956    ;   true
 1957    ),
 1958    rdf_destroy_graph(Graph).
 1959
 1960                 /*******************************
 1961                 *         GRAPH QUERIES        *
 1962                 *******************************/
 1963
 1964%!  rdf_create_graph(+Graph) is det.
 1965%
 1966%   Create an RDF graph without triples.   Succeeds  silently if the
 1967%   graph already exists.
 1968
 1969
 1970%!  rdf_graph(?Graph) is nondet.
 1971%
 1972%   True when Graph is an existing graph.
 1973
 1974rdf_graph(Graph) :-
 1975    rdf_graph_(Graph, _Triples).
 1976
 1977%!  rdf_source(?Graph, ?SourceURL) is nondet.
 1978%
 1979%   True if named Graph is loaded from SourceURL.
 1980%
 1981%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
 1982
 1983rdf_source(Graph, SourceURL) :-
 1984    rdf_graph(Graph),
 1985    rdf_graph_source_(Graph, SourceURL, _Modified).
 1986
 1987%!  rdf_source(?Source)
 1988%
 1989%   True if Source is a loaded source.
 1990%
 1991%   @deprecated     Use rdf_graph/1 or rdf_source/2.
 1992
 1993rdf_source(SourceURL) :-
 1994    rdf_source(_Graph, SourceURL).
 1995
 1996%!  rdf_make
 1997%
 1998%   Reload all loaded files that have been modified since the last
 1999%   time they were loaded.
 2000
 2001rdf_make :-
 2002    findall(Source-Graph, modified_graph(Source, Graph), Modified),
 2003    forall(member(Source-Graph, Modified),
 2004           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
 2005                 print_message(error, E))).
 2006
 2007modified_graph(SourceURL, Graph) :-
 2008    rdf_graph(Graph),
 2009    rdf_graph_source_(Graph, SourceURL, Modified),
 2010    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
 2011    Modified > 0.
 2012
 2013%!  rdf_graph_property(?Graph, ?Property) is nondet.
 2014%
 2015%   True when Property is a property of Graph.  Defined properties
 2016%   are:
 2017%
 2018%       * hash(Hash)
 2019%       Hash is the (MD5-)hash for the content of Graph.
 2020%       * modified(Boolean)
 2021%       True if the graph is modified since it was loaded or
 2022%       rdf_set_graph/2 was called with modified(false).
 2023%       * source(Source)
 2024%       The graph is loaded from the Source (a URL)
 2025%       * source_last_modified(?Time)
 2026%       Time is the last-modified timestamp of Source at the moment
 2027%       the graph was loaded from Source.
 2028%       * triples(Count)
 2029%       True when Count is the number of triples in Graph.
 2030%
 2031%    Additional graph properties can be added  by defining rules for
 2032%    the multifile predicate  property_of_graph/2.   Currently,  the
 2033%    following extensions are defined:
 2034%
 2035%       - library(semweb/rdf_persistency)
 2036%         - persistent(Boolean)
 2037%           Boolean is =true= if the graph is persistent.
 2038
 2039rdf_graph_property(Graph, Property) :-
 2040    rdf_graph(Graph),
 2041    property_of_graph(Property, Graph).
 2042
 2043:- multifile
 2044    property_of_graph/2. 2045
 2046property_of_graph(hash(Hash), Graph) :-
 2047    rdf_md5(Graph, Hash).
 2048property_of_graph(modified(Boolean), Graph) :-
 2049    rdf_graph_modified_(Graph, Boolean, _).
 2050property_of_graph(source(URL), Graph) :-
 2051    rdf_graph_source_(Graph, URL, _).
 2052property_of_graph(source_last_modified(Time), Graph) :-
 2053    rdf_graph_source_(Graph, _, Time),
 2054    Time > 0.0.
 2055property_of_graph(triples(Count), Graph) :-
 2056    rdf_graph_(Graph, Count).
 2057
 2058%!  rdf_set_graph(+Graph, +Property) is det.
 2059%
 2060%   Set properties of Graph.  Defined properties are:
 2061%
 2062%       * modified(false)
 2063%       Set the modified state of Graph to false.
 2064
 2065rdf_set_graph(Graph, modified(Modified)) :-
 2066    must_be(oneof([false]), Modified),
 2067    rdf_graph_clear_modified_(Graph).
 2068
 2069
 2070%!  save_cache(+DB, +Cache) is det.
 2071%
 2072%   Save triples belonging to DB in the file Cache.
 2073
 2074save_cache(DB, Cache) :-
 2075    current_prolog_flag(rdf_triple_format, Version),
 2076    setup_call_cleanup(
 2077        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
 2078        rdf_save_db_(CacheStream, DB, Version),
 2079        close(CacheStream)).
 2080
 2081%!  assert_triples(+Triples, +Source)
 2082%
 2083%   Assert a list of triples into the database. Foir security
 2084%   reasons we check we aren't inserting anything but nice RDF
 2085%   triples.
 2086
 2087assert_triples([], _).
 2088assert_triples([rdf(S,P,O)|T], DB) :-
 2089    !,
 2090    rdf_assert(S, P, O, DB),
 2091    assert_triples(T, DB).
 2092assert_triples([H|_], _) :-
 2093    throw(error(type_error(rdf_triple, H), _)).
 2094
 2095
 2096                 /*******************************
 2097                 *             RESET            *
 2098                 *******************************/
 2099
 2100%!  rdf_reset_db
 2101%
 2102%   Remove all triples from the RDF database and reset all its
 2103%   statistics.
 2104%
 2105%   @bug    This predicate checks for active queries, but this check is
 2106%           not properly synchronized and therefore the use of this
 2107%           predicate is unsafe in multi-threaded contexts. It is
 2108%           mainly used to run functionality tests that need to
 2109%           start with an empty database.
 2110
 2111rdf_reset_db :-
 2112    reset_gensym('_:genid'),
 2113    rdf_reset_db_.
 2114
 2115
 2116                 /*******************************
 2117                 *           SAVE RDF           *
 2118                 *******************************/
 2119
 2120%!  rdf_save(+Out) is det.
 2121%
 2122%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
 2123
 2124%!  rdf_save(+Out, :Options) is det.
 2125%
 2126%   Write RDF data as RDF/XML. Options is a list of one or more of
 2127%   the following options:
 2128%
 2129%           * graph(+Graph)
 2130%           Save only triples associated to the given named Graph.
 2131%
 2132%           * anon(Bool)
 2133%           If =false= (default =true=) do not save blank nodes that do
 2134%           not appear (indirectly) as object of a named resource.
 2135%
 2136%           * base_uri(URI)
 2137%           BaseURI used. If present, all URIs that can be
 2138%           represented relative to this base are written using
 2139%           their shorthand.  See also =write_xml_base= option.
 2140%
 2141%           * convert_typed_literal(:Convertor)
 2142%           Call Convertor(-Type, -Content, +RDFObject), providing
 2143%           the opposite for the convert_typed_literal option of
 2144%           the RDF parser.
 2145%
 2146%           * document_language(+Lang)
 2147%           Initial =|xml:lang|= saved with rdf:RDF element.
 2148%
 2149%           * encoding(Encoding)
 2150%           Encoding for the output.  Either utf8 or iso_latin_1.
 2151%
 2152%           * inline(+Bool)
 2153%           If =true= (default =false=), inline resources when
 2154%           encountered for the first time. Normally, only bnodes
 2155%           are handled this way.
 2156%
 2157%           * namespaces(+List)
 2158%           Explicitly specify saved namespace declarations. See
 2159%           rdf_save_header/2 option namespaces for details.
 2160%
 2161%           * sorted(+Boolean)
 2162%           If =true= (default =false=), emit subjects sorted on
 2163%           the full URI.  Useful to make file comparison easier.
 2164%
 2165%           * write_xml_base(Bool)
 2166%           If =false=, do _not_ include the =|xml:base|=
 2167%           declaration that is written normally when using the
 2168%           =base_uri= option.
 2169%
 2170%           * xml_attributes(+Bool)
 2171%           If =false= (default =true=), never use xml attributes to
 2172%           save plain literal attributes, i.e., always used an XML
 2173%           element as in =|<name>Joe</name>|=.
 2174%
 2175%   @param Out      Location to save the data.  This can also be a
 2176%                   file-url (=|file://path|=) or a stream wrapped
 2177%                   in a term stream(Out).
 2178%   @see rdf_save_db/1
 2179
 2180:- thread_local
 2181    named_anon/2,                   % +Resource, -Id
 2182    inlined/1.                      % +Resource
 2183
 2184rdf_save(File) :-
 2185    rdf_save2(File, []).
 2186
 2187rdf_save(Spec, M:Options0) :-
 2188    is_list(Options0),
 2189    !,
 2190    meta_options(save_meta_option, M:Options0, Options),
 2191    to_file(Spec, File),
 2192    rdf_save2(File, Options).
 2193rdf_save(Spec, _:DB) :-
 2194    atom(DB),                      % backward compatibility
 2195    !,
 2196    to_file(Spec, File),
 2197    rdf_save2(File, [graph(DB)]).
 2198
 2199save_meta_option(convert_typed_literal).
 2200
 2201to_file(URL, File) :-
 2202    atom(URL),
 2203    uri_file_name(URL, File),
 2204    !.
 2205to_file(File, File).
 2206
 2207rdf_save2(File, Options) :-
 2208    option(encoding(Encoding), Options, utf8),
 2209    valid_encoding(Encoding),
 2210    open_output(File, Encoding, Out, Close),
 2211    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
 2212    flag(rdf_db_saved_triples, OSavedTriples, 0),
 2213    call_cleanup(rdf_do_save(Out, Options),
 2214                 Reason,
 2215                 cleanup_save(Reason,
 2216                              File,
 2217                              OSavedSubjects,
 2218                              OSavedTriples,
 2219                              Close)).
 2220
 2221open_output(stream(Out), Encoding, Out, Cleanup) :-
 2222    !,
 2223    stream_property(Out, encoding(Old)),
 2224    (   (   Old == Encoding
 2225        ;   Old == wchar_t          % Internal encoding
 2226        )
 2227    ->  Cleanup = true
 2228    ;   set_stream(Out, encoding(Encoding)),
 2229        Cleanup = set_stream(Out, encoding(Old))
 2230    ).
 2231open_output(File, Encoding, Out,
 2232            close(Out)) :-
 2233    open(File, write, Out, [encoding(Encoding)]).
 2234
 2235valid_encoding(Enc) :-
 2236    (   xml_encoding_name(Enc, _)
 2237    ->  true
 2238    ;   throw(error(domain_error(encoding, Enc), _))
 2239    ).
 2240
 2241
 2242cleanup_save(Reason,
 2243             File,
 2244             OSavedSubjects,
 2245             OSavedTriples,
 2246             Close) :-
 2247    call(Close),
 2248    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
 2249    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
 2250    retractall(named_anon(_, _)),
 2251    retractall(inlined(_)),
 2252    (   Reason == exit
 2253    ->  print_message(informational,
 2254                      rdf(saved(File, SavedSubjects, SavedTriples)))
 2255    ;   format(user_error, 'Reason = ~w~n', [Reason])
 2256    ).
 2257
 2258rdf_do_save(Out, Options0) :-
 2259    rdf_save_header(Out, Options0, Options),
 2260    graph(Options, DB),
 2261    (   option(sorted(true), Options, false)
 2262    ->  (   var(DB)
 2263        ->  setof(Subject, rdf_subject(Subject), Subjects)
 2264        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2265            sort(SubjectList, Subjects)
 2266        ),
 2267        forall(member(Subject, Subjects),
 2268               rdf_save_non_anon_subject(Out, Subject, Options))
 2269    ;   forall(rdf_subject_in_graph(Subject, DB),
 2270               rdf_save_non_anon_subject(Out, Subject, Options))
 2271    ),
 2272    rdf_save_footer(Out),
 2273    !.                                  % dubious cut; without the
 2274                                        % cleanup handlers isn't called!?
 2275
 2276%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
 2277%
 2278%   True when Subject is a subject in the   graph  DB. If DB is unbound,
 2279%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
 2280%   enumerate all subjects and filter by graph or collect all triples of
 2281%   the graph and get the unique subjects.   The  first is attractive if
 2282%   the graph is big compared  to  the   DB,  also  because  it does not
 2283%   require memory, the second if the graph is small compared to the DB.
 2284
 2285rdf_subject_in_graph(Subject, DB) :-
 2286    var(DB),
 2287    !,
 2288    rdf_subject(Subject).
 2289rdf_subject_in_graph(Subject, DB) :-
 2290    rdf_statistics(triples(AllTriples)),
 2291    rdf_graph_property(DB, triples(DBTriples)),
 2292    DBTriples > AllTriples // 10,
 2293    !,
 2294    rdf_resource(Subject),
 2295    (   rdf(Subject, _, _, DB:_)
 2296    ->  true
 2297    ).
 2298rdf_subject_in_graph(Subject, DB) :-
 2299    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2300    list_to_set(SubjectList, Subjects),
 2301    member(Subject, Subjects).
 2302
 2303
 2304graph(Options0, DB) :-
 2305    strip_module(Options0, _, Options),
 2306    (   memberchk(graph(DB0), Options)
 2307    ->  DB = DB0
 2308    ;   memberchk(db(DB0), Options)
 2309    ->  DB = DB0
 2310    ;   true                            % leave unbound
 2311    ).
 2312
 2313
 2314%!  rdf_save_header(+Fd, +Options)
 2315%
 2316%   Save XML document header, doctype and open the RDF environment.
 2317%   This predicate also sets up the namespace notation.
 2318%
 2319%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
 2320%   opening the rdf:RDF element with appropriate namespace
 2321%   declarations. It uses the primitives from section 3.5 to
 2322%   generate the required namespaces and desired short-name. Options
 2323%   is one of:
 2324%
 2325%     * graph(+URI)
 2326%     Only search for namespaces used in triples that belong to the
 2327%     given named graph.
 2328%
 2329%     * namespaces(+List)
 2330%     Where List is a list of namespace abbreviations. With this
 2331%     option, the expensive search for all namespaces that may be
 2332%     used by your data is omitted. The namespaces =rdf= and =rdfs=
 2333%     are added to the provided List. If a namespace is not
 2334%     declared, the resource is emitted in non-abreviated form.
 2335
 2336rdf_save_header(Out, Options) :-
 2337    rdf_save_header(Out, Options, _).
 2338
 2339rdf_save_header(Out, Options, OptionsOut) :-
 2340    is_list(Options),
 2341    !,
 2342    option(encoding(Enc), Options, utf8),
 2343    xml_encoding(Enc, Encoding),
 2344    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
 2345    format(Out, '<!DOCTYPE rdf:RDF [', []),
 2346    header_namespaces(Options, NSIdList),
 2347    nsmap(NSIdList, NsMap),
 2348    append(Options, [nsmap(NsMap)], OptionsOut),
 2349    forall(member(Id=URI, NsMap),
 2350           (   xml_quote_attribute(URI, NSText0, Enc),
 2351               xml_escape_parameter_entity(NSText0, NSText),
 2352               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
 2353           )),
 2354    format(Out, '~N]>~n~n', []),
 2355    format(Out, '<rdf:RDF', []),
 2356    (   member(Id, NSIdList),
 2357        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
 2358        fail
 2359    ;   true
 2360    ),
 2361    (   option(base_uri(Base), Options),
 2362        option(write_xml_base(true), Options, true)
 2363    ->  xml_quote_attribute(Base, BaseText, Enc),
 2364        format(Out, '~N    xml:base="~w"~n', [BaseText])
 2365    ;   true
 2366    ),
 2367    (   memberchk(document_language(Lang), Options)
 2368    ->  format(Out, '~N    xml:lang="~w"', [Lang])
 2369    ;   true
 2370    ),
 2371    format(Out, '>~n', []).
 2372rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
 2373    atom(FileRef),
 2374    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
 2375
 2376xml_encoding(Enc, Encoding) :-
 2377    (   xml_encoding_name(Enc, Encoding)
 2378    ->  true
 2379    ;   throw(error(domain_error(rdf_encoding, Enc), _))
 2380    ).
 2381
 2382xml_encoding_name(ascii,       'US-ASCII').
 2383xml_encoding_name(iso_latin_1, 'ISO-8859-1').
 2384xml_encoding_name(utf8,        'UTF-8').
 2385
 2386%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
 2387%
 2388%   Create a namespace-map that is compatible to xml_write/2
 2389%   for dealing with XML-Literals
 2390
 2391nsmap([], []).
 2392nsmap([Id|T0], [Id=URI|T]) :-
 2393    ns(Id, URI),
 2394    nsmap(T0, T).
 2395
 2396%!  xml_escape_parameter_entity(+In, -Out) is det.
 2397%
 2398%   Escape % as &#37; for entity declarations.
 2399
 2400xml_escape_parameter_entity(In, Out) :-
 2401    sub_atom(In, _, _, _, '%'),
 2402    !,
 2403    atom_codes(In, Codes),
 2404    phrase(escape_parent(Codes), OutCodes),
 2405    atom_codes(Out, OutCodes).
 2406xml_escape_parameter_entity(In, In).
 2407
 2408escape_parent([]) --> [].
 2409escape_parent([H|T]) -->
 2410    (   { H == 37 }
 2411    ->  "&#37;"
 2412    ;   [H]
 2413    ),
 2414    escape_parent(T).
 2415
 2416
 2417%!  header_namespaces(Options, -List)
 2418%
 2419%   Get namespaces we will define as entities
 2420
 2421header_namespaces(Options, List) :-
 2422    memberchk(namespaces(NSL0), Options),
 2423    !,
 2424    sort([rdf,rdfs|NSL0], List).
 2425header_namespaces(Options, List) :-
 2426    graph(Options, DB),
 2427    used_namespace_entities(List, DB).
 2428
 2429%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
 2430%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
 2431%
 2432%   List is a sorted list of  prefixes (namepaces) in Graph. Options
 2433%   defined are:
 2434%
 2435%       * filter(:Filter)
 2436%       optional Filter argument is used to filter the results. It
 2437%       is called with 3 additional arguments:
 2438%
 2439%           ==
 2440%           call(Filter, Where, Prefix, URI)
 2441%           ==
 2442%
 2443%       The Where argument gives the location of the prefix ans is
 2444%       one of =subject=, =predicate=, =object= or =type=. The
 2445%       Prefix argument is the potentionally new prefix and URI is
 2446%       the full URI that is being processed.
 2447%
 2448%       * expand(:Goal)
 2449%       Hook to generate the graph.  Called using
 2450%
 2451%           ==
 2452%           call(Goal,S,P,O,Graph)
 2453%           ==
 2454%
 2455%       * min_count(+Count)
 2456%       Only include prefixes that appear at least N times.  Default
 2457%       is 1. Declared prefixes are always returned if found at
 2458%       least one time.
 2459%
 2460%       * get_prefix(:GetPrefix)
 2461%       Predicate to extract the candidate prefix from an IRI.  Default
 2462%       is iri_xml_namespace/2.
 2463
 2464
 2465:- thread_local
 2466    graph_prefix/3. 2467:- meta_predicate
 2468    rdf_graph_prefixes(?, -, :). 2469
 2470rdf_graph_prefixes(Graph, List) :-
 2471    rdf_graph_prefixes(Graph, List, []).
 2472
 2473rdf_graph_prefixes(Graph, List, M:QOptions) :-
 2474    is_list(QOptions),
 2475    !,
 2476    meta_options(is_meta, M:QOptions, Options),
 2477    option(filter(Filter), Options, true),
 2478    option(expand(Expand), Options, rdf_db),
 2479    option(min_count(MinCount), Options, 1),
 2480    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
 2481    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
 2482                 retractall(graph_prefix(_,_,_))),
 2483    sort(Prefixes, List).
 2484rdf_graph_prefixes(Graph, List, M:Filter) :-
 2485    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
 2486
 2487is_meta(filter).
 2488is_meta(expand).
 2489is_meta(get_prefix).
 2490
 2491
 2492prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
 2493    (   call(Expand, S, P, O, Graph),
 2494        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
 2495        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
 2496        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
 2497        fail
 2498    ;   true
 2499    ),
 2500    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
 2501
 2502add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
 2503    \+ rdf_is_bnode(S),
 2504    call(GetPrefix, S, Full),
 2505    Full \== '',
 2506    !,
 2507    (   graph_prefix(Full, MinCount, _)
 2508    ->  true
 2509    ;   Filter == true
 2510    ->  add_ns(Full, Context)
 2511    ;   call(Filter, Where, Full, S)
 2512    ->  add_ns(Full, Context)
 2513    ;   true
 2514    ).
 2515add_ns(_, _, _, _, _, _).
 2516
 2517add_ns(Full, Context) :-
 2518    graph_prefix(Full, _, Contexts),
 2519    memberchk(Context, Contexts),
 2520    !.
 2521add_ns(Full, Context) :-
 2522    retract(graph_prefix(Full, C0, Contexts)),
 2523    !,
 2524    C1 is C0+1,
 2525    asserta(graph_prefix(Full, C1, [Context|Contexts])).
 2526add_ns(Full, _) :-
 2527    ns(_, Full),
 2528    !,
 2529    asserta(graph_prefix(Full, _, _)).
 2530add_ns(Full, Context) :-
 2531    asserta(graph_prefix(Full, 1, [Context])).
 2532
 2533
 2534add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
 2535    atom(O),
 2536    !,
 2537    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
 2538add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
 2539    atom(Type),
 2540    !,
 2541    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
 2542add_ns_obj(_, _, _, _, _).
 2543
 2544
 2545%!  used_namespace_entities(-List, ?Graph) is det.
 2546%
 2547%   Return the namespace aliases that are actually used in Graph. In
 2548%   addition, this predicate creates ns<N>   aliases  for namespaces
 2549%   used in predicates because RDF/XML cannot write predicates other
 2550%   than as an XML name.
 2551
 2552used_namespace_entities(List, Graph) :-
 2553    decl_used_predicate_ns(Graph),
 2554    used_namespaces(List, Graph).
 2555
 2556used_namespaces(List, DB) :-
 2557    rdf_graph_prefixes(DB, FullList),
 2558    ns_abbreviations(FullList, List0),
 2559    sort([rdf|List0], List).
 2560
 2561ns_abbreviations([], []).
 2562ns_abbreviations([H0|T0], [H|T]) :-
 2563    ns(H, H0),
 2564    !,
 2565    ns_abbreviations(T0, T).
 2566ns_abbreviations([_|T0], T) :-
 2567    ns_abbreviations(T0, T).
 2568
 2569
 2570/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 2571For every URL used as a predicate  we   *MUST*  define a namespace as we
 2572cannot use names holding /, :, etc. as XML identifiers.
 2573- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 2574
 2575:- thread_local
 2576    predicate_ns/2. 2577
 2578decl_used_predicate_ns(DB) :-
 2579    retractall(predicate_ns(_,_)),
 2580    (   rdf_current_predicate(P, DB),
 2581        decl_predicate_ns(P),
 2582        fail
 2583    ;   true
 2584    ).
 2585
 2586decl_predicate_ns(Pred) :-
 2587    predicate_ns(Pred, _),
 2588    !.
 2589decl_predicate_ns(Pred) :-
 2590    rdf_global_id(NS:Local, Pred),
 2591    xml_name(Local),
 2592    !,
 2593    assert(predicate_ns(Pred, NS)).
 2594decl_predicate_ns(Pred) :-
 2595    atom_codes(Pred, Codes),
 2596    append(NSCodes, LocalCodes, Codes),
 2597    xml_codes(LocalCodes),
 2598    !,
 2599    (   NSCodes \== []
 2600    ->  atom_codes(NS, NSCodes),
 2601        (   ns(Id, NS)
 2602        ->  assert(predicate_ns(Pred, Id))
 2603        ;   between(1, infinite, N),
 2604            atom_concat(ns, N, Id),
 2605            \+ ns(Id, _)
 2606        ->  rdf_register_ns(Id, NS),
 2607            print_message(informational,
 2608                          rdf(using_namespace(Id, NS)))
 2609        ),
 2610        assert(predicate_ns(Pred, Id))
 2611    ;   assert(predicate_ns(Pred, -)) % no namespace used
 2612    ).
 2613
 2614xml_codes([]).
 2615xml_codes([H|T]) :-
 2616    xml_code(H),
 2617    xml_codes(T).
 2618
 2619xml_code(X) :-
 2620    code_type(X, csym),
 2621    !.
 2622xml_code(0'-).                          % Match 0'-
 2623
 2624
 2625%!  rdf_save_footer(Out:stream) is det.
 2626%
 2627%   Finish XML generation and write the document footer.
 2628%
 2629%   @see rdf_save_header/2, rdf_save_subject/3.
 2630
 2631rdf_save_footer(Out) :-
 2632    retractall(named_anon(_, _)),
 2633    retractall(inlined(_)),
 2634    format(Out, '</rdf:RDF>~n', []).
 2635
 2636%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
 2637%
 2638%   Save an object.  Anonymous objects not saved if anon(false)
 2639%   is present in the Options list.
 2640
 2641rdf_save_non_anon_subject(_Out, Subject, Options) :-
 2642    rdf_is_bnode(Subject),
 2643    (   memberchk(anon(false), Options)
 2644    ;   graph(Options, DB),
 2645        rdf_db(_, _, Subject, DB)
 2646    ),
 2647    !.
 2648rdf_save_non_anon_subject(Out, Subject, Options) :-
 2649    rdf_save_subject(Out, Subject, Options),
 2650    flag(rdf_db_saved_subjects, X, X+1).
 2651
 2652
 2653%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
 2654%
 2655%   Save the triples associated to Subject to Out. Options:
 2656%
 2657%     * graph(+Graph)
 2658%     Only save properties from Graph.
 2659%     * base_uri(+URI)
 2660%     * convert_typed_literal(:Goal)
 2661%     * document_language(+XMLLang)
 2662%
 2663%   @see rdf_save/2 for a description of these options.
 2664
 2665rdf_save_subject(Out, Subject, Options) :-
 2666    is_list(Options),
 2667    !,
 2668    option(base_uri(BaseURI), Options, '-'),
 2669    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
 2670    ->  format(Out, '~n', [])
 2671    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
 2672    ).
 2673rdf_save_subject(Out, Subject, DB) :-
 2674    (   var(DB)
 2675    ->  rdf_save_subject(Out, Subject, [])
 2676    ;   rdf_save_subject(Out, Subject, [graph(DB)])
 2677    ).
 2678
 2679
 2680%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2681%!                   +Indent:int, +Options) is det.
 2682%
 2683%   Save properties of Subject.
 2684%
 2685%   @param Indent   Current indentation
 2686
 2687rdf_save_subject(_, Subject, _, _, _) :-
 2688    inlined(Subject),
 2689    !.
 2690rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2691    do_save_subject(Out, Subject, BaseURI, Indent, Options).
 2692
 2693do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 2694    graph(Options, DB),
 2695    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
 2696    sort(Atts0, Atts),              % remove duplicates
 2697    length(Atts, L),
 2698    (   length(Atts0, L0),
 2699        Del is L0-L,
 2700        Del > 0
 2701    ->  print_message(informational,
 2702                      rdf(save_removed_duplicates(Del, Subject)))
 2703    ;   true
 2704    ),
 2705    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
 2706    flag(rdf_db_saved_triples, X, X+L).
 2707
 2708rdf_db(Subject, Pred, Object, DB) :-
 2709    var(DB),
 2710    !,
 2711    rdf(Subject, Pred, Object).
 2712rdf_db(Subject, Pred, Object, DB) :-
 2713    rdf(Subject, Pred, Object, DB:_).
 2714
 2715%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 2716%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
 2717%
 2718%   Save triples defined by Atts on Subject.
 2719
 2720rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2721    rdf_equal(rdf:type, RdfType),
 2722    select(RdfType=Type, Atts, Atts1),
 2723    \+ rdf_is_bnode(Type),
 2724    rdf_id(Type, BaseURI, TypeId),
 2725    xml_is_name(TypeId),
 2726    !,
 2727    format(Out, '~*|<', [Indent]),
 2728    rdf_write_id(Out, TypeId),
 2729    save_about(Out, BaseURI, Subject, Options),
 2730    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
 2731rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 2732    format(Out, '~*|<rdf:Description', [Indent]),
 2733    save_about(Out, BaseURI, Subject, Options),
 2734    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
 2735
 2736xml_is_name(_NS:Atom) :-
 2737    !,
 2738    xml_name(Atom).
 2739xml_is_name(Atom) :-
 2740    xml_name(Atom).
 2741
 2742%!  save_about(+Out, +BaseURI, +Subject, +Options) is det.
 2743%
 2744%   Save the rdf:about. If Subject is a  blank node, save the nodeID
 2745%   if any.
 2746
 2747save_about(Out, _BaseURI, Subject, _Options) :-
 2748    rdf_is_bnode(Subject),
 2749    !,
 2750    (   named_anon(Subject, NodeID)
 2751    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
 2752    ;   true
 2753    ).
 2754save_about(Out, BaseURI, Subject, Options) :-
 2755    option(encoding(Encoding), Options, utf8),
 2756    rdf_value(Subject, BaseURI, QSubject, Encoding),
 2757    format(Out, ' rdf:about="~w"', [QSubject]).
 2758
 2759%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
 2760%
 2761%   Save the attributes.  Short literal attributes are saved in the
 2762%   tag.  Others as the content of the description element.  The
 2763%   begin tag has already been filled.
 2764
 2765save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
 2766    split_attributes(Atts, InTag, InBody, Options),
 2767    SubIndent is Indent + 2,
 2768    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
 2769    (   InBody == []
 2770    ->  format(Out, '/>~n', [])
 2771    ;   format(Out, '>~n', []),
 2772        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
 2773        format(Out, '~N~*|</', [Indent]),
 2774        rdf_write_id(Out, Element),
 2775        format(Out, '>~n', [])
 2776    ).
 2777
 2778%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
 2779%
 2780%   Split attribute (Name=Value) list into attributes for the head
 2781%   and body. Attributes can only be in the head if they are literal
 2782%   and appear only one time in the attribute list.
 2783
 2784split_attributes(Atts, [], Atts, Options) :-
 2785    option(xml_attributes(false), Options),
 2786    !.
 2787split_attributes(Atts, HeadAttr, BodyAttr, _) :-
 2788    duplicate_attributes(Atts, Dupls, Singles),
 2789    simple_literal_attributes(Singles, HeadAttr, Rest),
 2790    append(Dupls, Rest, BodyAttr).
 2791
 2792%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
 2793%
 2794%   Extract attributes that appear more than onces as we cannot
 2795%   dublicate an attribute in the head according to the XML rules.
 2796
 2797duplicate_attributes([], [], []).
 2798duplicate_attributes([H|T], Dupls, Singles) :-
 2799    H = (Name=_),
 2800    named_attributes(Name, T, D, R),
 2801    D \== [],
 2802    append([H|D], Dupls2, Dupls),
 2803    !,
 2804    duplicate_attributes(R, Dupls2, Singles).
 2805duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
 2806    duplicate_attributes(T, Dupls2, Singles).
 2807
 2808named_attributes(_, [], [], []) :- !.
 2809named_attributes(Name, [H|T], D, R) :-
 2810    (   H = (Name=_)
 2811    ->  D = [H|DT],
 2812        named_attributes(Name, T, DT, R)
 2813    ;   R = [H|RT],
 2814        named_attributes(Name, T, D, RT)
 2815    ).
 2816
 2817%!  simple_literal_attributes(+Attributes, -Inline, -Body)
 2818%
 2819%   Split attributes for (literal) attributes to be used in the
 2820%   begin-tag and ones that have to go into the body of the description.
 2821
 2822simple_literal_attributes([], [], []).
 2823simple_literal_attributes([H|TA], [H|TI], B) :-
 2824    in_tag_attribute(H),
 2825    !,
 2826    simple_literal_attributes(TA, TI, B).
 2827simple_literal_attributes([H|TA], I, [H|TB]) :-
 2828    simple_literal_attributes(TA, I, TB).
 2829
 2830in_tag_attribute(_=literal(Text)) :-
 2831    atom(Text),                     % may not have lang qualifier
 2832    atom_length(Text, Len),
 2833    Len < 60.
 2834
 2835%!  save_attributes2(+List, +BaseURI, +TagOrBody, +Stream, +Indent, +Options)
 2836%
 2837%   Save a list of attributes.
 2838
 2839save_attributes2([], _, _, _, _, _).
 2840save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
 2841    save_attribute(Where, H, BaseURI, Out, Indent, Options),
 2842    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
 2843
 2844save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, Options) :-
 2845    AttIndent is Indent + 2,
 2846    rdf_id(Name, BaseURI, NameText),
 2847    option(encoding(Encoding), Options, utf8),
 2848    xml_quote_attribute(Value, QVal, Encoding),
 2849    format(Out, '~N~*|', [AttIndent]),
 2850    rdf_write_id(Out, NameText),
 2851    format(Out, '="~w"', [QVal]).
 2852save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
 2853    !,
 2854    rdf_id(Name, BaseURI, NameText),
 2855    (   memberchk(convert_typed_literal(Converter), Options),
 2856        call(Converter, Type, Content, Literal0)
 2857    ->  Literal = type(Type, Content)
 2858    ;   Literal = Literal0
 2859    ),
 2860    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
 2861save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2862    rdf_is_bnode(Value),
 2863    !,
 2864    rdf_id(Name, BaseURI, NameText),
 2865    format(Out, '~N~*|<', [Indent]),
 2866    rdf_write_id(Out, NameText),
 2867    (   named_anon(Value, NodeID)
 2868    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
 2869    ;   (   rdf(S1, Name, Value),
 2870            rdf(S2, P2, Value),
 2871            (S1 \== S2 ; Name \== P2)
 2872        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
 2873            atom_concat('bn', N, NodeID),
 2874            assertz(named_anon(Value, NodeID))
 2875        ;   true
 2876        ),
 2877        SubIndent is Indent + 2,
 2878        (   rdf_collection(Value)
 2879        ->  save_about(Out, BaseURI, Value, Options),
 2880            format(Out, ' rdf:parseType="Collection">~n', []),
 2881            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2882        ;   format(Out, '>~n', []),
 2883            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2884        ),
 2885        format(Out, '~N~*|</', [Indent]),
 2886        rdf_write_id(Out, NameText),
 2887        format(Out, '>~n', [])
 2888    ).
 2889save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2890    option(inline(true), Options),
 2891    has_attributes(Value, Options),
 2892    \+ inlined(Value),
 2893    !,
 2894    assertz(inlined(Value)),
 2895    rdf_id(Name, BaseURI, NameText),
 2896    format(Out, '~N~*|<', [Indent]),
 2897    rdf_write_id(Out, NameText),
 2898    SubIndent is Indent + 2,
 2899    (   rdf_collection(Value)
 2900    ->  save_about(Out, BaseURI, Value, Options),
 2901        format(Out, ' rdf:parseType="Collection">~n', []),
 2902        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 2903    ;   format(Out, '>~n', []),
 2904        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
 2905    ),
 2906    format(Out, '~N~*|</', [Indent]),
 2907    rdf_write_id(Out, NameText),
 2908    format(Out, '>~n', []).
 2909save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 2910    option(encoding(Encoding), Options, utf8),
 2911    rdf_value(Value, BaseURI, QVal, Encoding),
 2912    rdf_id(Name, BaseURI, NameText),
 2913    format(Out, '~N~*|<', [Indent]),
 2914    rdf_write_id(Out, NameText),
 2915    format(Out, ' rdf:resource="~w"/>', [QVal]).
 2916
 2917has_attributes(URI, Options) :-
 2918    graph(Options, DB),
 2919    rdf_db(URI, _, _, DB),
 2920    !.
 2921
 2922%!  save_body_literal(+Literal, +NameText, +BaseURI,
 2923%!                    +Out, +Indent, +Options).
 2924
 2925save_body_literal(lang(Lang, Value),
 2926                  NameText, BaseURI, Out, Indent, Options) :-
 2927    !,
 2928    format(Out, '~N~*|<', [Indent]),
 2929    rdf_write_id(Out, NameText),
 2930    (   memberchk(document_language(Lang), Options)
 2931    ->  write(Out, '>')
 2932    ;   rdf_id(Lang, BaseURI, LangText),
 2933        format(Out, ' xml:lang="~w">', [LangText])
 2934    ),
 2935    save_attribute_value(Value, Out, Options),
 2936    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2937save_body_literal(type(Type, DOM),
 2938                  NameText, _BaseURI, Out, Indent, Options) :-
 2939    rdf_equal(Type, rdf:'XMLLiteral'),
 2940    !,
 2941    (   atom(DOM)
 2942    ->  format(Out, '~N~*|<', [Indent]),
 2943        rdf_write_id(Out, NameText),
 2944        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
 2945        rdf_write_id(Out, NameText), write(Out, '>')
 2946    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
 2947    ).
 2948save_body_literal(type(Type, Value),
 2949                  NameText, BaseURI, Out, Indent, Options) :-
 2950    !,
 2951    format(Out, '~N~*|<', [Indent]),
 2952    rdf_write_id(Out, NameText),
 2953    option(encoding(Encoding), Options, utf8),
 2954    rdf_value(Type, BaseURI, QVal, Encoding),
 2955    format(Out, ' rdf:datatype="~w">', [QVal]),
 2956    save_attribute_value(Value, Out, Options),
 2957    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2958save_body_literal(Literal,
 2959                  NameText, _, Out, Indent, Options) :-
 2960    atomic(Literal),
 2961    !,
 2962    format(Out, '~N~*|<', [Indent]),
 2963    rdf_write_id(Out, NameText),
 2964    write(Out, '>'),
 2965    save_attribute_value(Literal, Out, Options),
 2966    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 2967save_body_literal(DOM,
 2968                  NameText, BaseURI, Out, Indent, Options) :-
 2969    rdf_equal(Type, rdf:'XMLLiteral'),
 2970    save_body_literal(type(Type, DOM),
 2971                      NameText, BaseURI, Out, Indent, Options).
 2972
 2973save_attribute_value(Value, Out, Options) :-  % strings
 2974    (	atom(Value)
 2975    ;	string(Value)
 2976    ),
 2977    !,
 2978    option(encoding(Encoding), Options, utf8),
 2979    xml_quote_cdata(Value, QVal, Encoding),
 2980    write(Out, QVal).
 2981save_attribute_value(Value, Out, _Options) :-  % numbers
 2982    number(Value),
 2983    !,
 2984    writeq(Out, Value).             % quoted: preserve floats
 2985save_attribute_value(Value, _Out, _Options) :-
 2986    throw(error(save_attribute_value(Value), _)).
 2987
 2988%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
 2989%
 2990%   Save an XMLLiteral value. We already emitted
 2991%
 2992%           ==
 2993%           <prop parseType="literal"
 2994%           ==
 2995%
 2996%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
 2997%   namespaces used in the DOM. The   namespaces in the rdf document
 2998%   are in the nsmap-option of Options.
 2999
 3000save_xml_literal(DOM, Attr, Out, Indent, Options) :-
 3001    xml_is_dom(DOM),
 3002    !,
 3003    memberchk(nsmap(NsMap), Options),
 3004    id_to_atom(Attr, Atom),
 3005    xml_write(Out,
 3006              element(Atom, ['rdf:parseType'='Literal'], DOM),
 3007              [ header(false),
 3008                indent(Indent),
 3009                nsmap(NsMap)
 3010              ]).
 3011save_xml_literal(NoDOM, _, _, _, _) :-
 3012    must_be(xml_dom, NoDOM).
 3013
 3014id_to_atom(NS:Local, Atom) :-
 3015    !,
 3016    atomic_list_concat([NS,Local], :, Atom).
 3017id_to_atom(ID, ID).
 3018
 3019
 3020%!  rdf_collection(+URI) is semidet.
 3021%
 3022%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
 3023%   parseType=collection syntax. This means it is   a linked list of
 3024%   bnode-cells with a rdf:first that is   a  resource, optionally a
 3025%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
 3026
 3027:- rdf_meta
 3028    rdf_collection(r),
 3029    collection_p(r,r). 3030
 3031rdf_collection(rdf:nil) :- !.
 3032rdf_collection(Cell) :-
 3033    rdf_is_bnode(Cell),
 3034    findall(F, rdf(Cell, rdf:first, F), [_]),
 3035    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
 3036    forall(rdf(Cell, P, V),
 3037           collection_p(P, V)),
 3038    rdf_collection(Rest).
 3039
 3040collection_p(rdf:first, V) :- atom(V).
 3041collection_p(rdf:rest, _).
 3042collection_p(rdf:type, rdf:'List').
 3043
 3044
 3045%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
 3046
 3047rdf_save_list(_, List, _, _, _) :-
 3048    rdf_equal(List, rdf:nil),
 3049    !.
 3050rdf_save_list(Out, List, BaseURI, Indent, Options) :-
 3051    rdf_has(List, rdf:first, First),
 3052    (   rdf_is_bnode(First)
 3053    ->  nl(Out),
 3054        rdf_save_subject(Out, First, BaseURI, Indent, Options)
 3055    ;   option(encoding(Encoding), Options, utf8),
 3056        rdf_value(First, BaseURI, QVal, Encoding),
 3057        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
 3058               [Indent, QVal])
 3059    ),
 3060    flag(rdf_db_saved_triples, X, X+3),
 3061    (   rdf_has(List, rdf:rest, List2),
 3062        \+ rdf_equal(List2, rdf:nil)
 3063    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
 3064    ;   true
 3065    ).
 3066
 3067
 3068%!  rdf_id(+Resource, +BaseURI, -NSLocal)
 3069%
 3070%   Generate a NS:Local  name  for   Resource  given  the  indicated
 3071%   default namespace. This call is used for elements.
 3072
 3073rdf_id(Id, BaseURI, Local) :-
 3074    assertion(atom(BaseURI)),
 3075    atom_concat(BaseURI, Local, Id),
 3076    sub_atom(Local, 0, 1, _, #),
 3077    !.
 3078rdf_id(Id, _, NS:Local) :-
 3079    iri_xml_namespace(Id, Full, Local),
 3080    ns(NS, Full),
 3081    !.
 3082rdf_id(Id, _, NS:Local) :-
 3083    ns(NS, Full),
 3084    Full \== '',
 3085    atom_concat(Full, Local, Id),
 3086    !.
 3087rdf_id(Id, _, Id).
 3088
 3089
 3090%!  rdf_write_id(+Out, +NSLocal) is det.
 3091%
 3092%   Write an identifier. We cannot use native write on it as both NS
 3093%   and Local can be operators.
 3094
 3095rdf_write_id(Out, NS:Local) :-
 3096    !,
 3097    format(Out, '~w:~w', [NS, Local]).
 3098rdf_write_id(Out, Atom) :-
 3099    write(Out, Atom).
 3100
 3101%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
 3102%
 3103%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
 3104%   specification, a URI reference is  UNICODE string not containing
 3105%   control sequences, represented as  UTF-8   and  then  as escaped
 3106%   US-ASCII.
 3107
 3108rdf_value(Base, Base, '', _) :- !.
 3109rdf_value(V, Base, Text, Encoding) :-
 3110    atom_concat(Base, Local, V),
 3111    sub_atom(Local, 0, _, _, #),
 3112    !,
 3113    xml_quote_attribute(Local, Text, Encoding).
 3114rdf_value(V, _, Text, Encoding) :-
 3115    ns(NS, Full),
 3116    atom_concat(Full, Local, V),
 3117    xml_is_name(Local),
 3118    !,
 3119    xml_quote_attribute(Local, QLocal, Encoding),
 3120    atomic_list_concat(['&', NS, (';'), QLocal], Text).
 3121rdf_value(V, _, Q, Encoding) :-
 3122    xml_quote_attribute(V, Q, Encoding).
 3123
 3124
 3125                 /*******************************
 3126                 *       MATCH AND COMPARE      *
 3127                 *******************************/
 3128
 3129%!  rdf_compare(-Dif, +Object1, +Object2) is det.
 3130%
 3131%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
 3132%   ordering, we define a complete ordering   of terms. The ordering
 3133%   is defines as:
 3134%
 3135%     - Blank nodes < IRIs < Literals
 3136%     - Numeric literals < other literals
 3137%     - Numeric literals are compared by value and then by type,
 3138%       where Integer < Decimal < Double
 3139%     - Other literals are compare lexically, case insensitive.
 3140%       If equal, uppercase preceeds lowercase.  If still equal,
 3141%       the types are compared lexically.
 3142
 3143%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
 3144%
 3145%   True if Label matches Pattern according to   How.  How is one of
 3146%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
 3147%   compatibility, `exact` is a synonym for `icase`.
 3148
 3149
 3150                 /*******************************
 3151                 *      DEPRECATED MATERIAL     *
 3152                 *******************************/
 3153
 3154%!  rdf_split_url(+Prefix, +Local, -URL) is det.
 3155%!  rdf_split_url(-Prefix, -Local, +URL) is det.
 3156%
 3157%   Split/join a URL.  This functionality is moved to library(sgml).
 3158%
 3159%   @deprecated Use iri_xml_namespace/3. Note that the argument
 3160%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
 3161
 3162rdf_split_url(Prefix, Local, URL) :-
 3163    atomic(URL),
 3164    !,
 3165    iri_xml_namespace(URL, Prefix, Local).
 3166rdf_split_url(Prefix, Local, URL) :-
 3167    atom_concat(Prefix, Local, URL).
 3168
 3169%!  rdf_url_namespace(+URL, -Namespace)
 3170%
 3171%   Namespace is the namespace of URL.
 3172%
 3173%   @deprecated Use iri_xml_namespace/2
 3174
 3175rdf_url_namespace(URL, Prefix) :-
 3176    iri_xml_namespace(URL, Prefix).
 3177
 3178
 3179                 /*******************************
 3180                 *            LITERALS          *
 3181                 *******************************/
 3182
 3183%!  rdf_new_literal_map(-Map) is det.
 3184%
 3185%   Create a new literal map, returning an opaque handle.
 3186
 3187%!  rdf_destroy_literal_map(+Map) is det.
 3188%
 3189%   Destroy a literal map. After this call,   further use of the Map
 3190%   handle is illegal. Additional synchronisation  is needed if maps
 3191%   that are shared between threads are   destroyed to guarantee the
 3192%   handle    is    no    longer    used.    In    some    scenarios
 3193%   rdf_reset_literal_map/1 provides a safe alternative.
 3194
 3195%!  rdf_reset_literal_map(+Map) is det.
 3196%
 3197%   Delete all content from the literal map.
 3198
 3199%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
 3200%
 3201%   Add a relation between  Key  and  Value   to  the  map.  If this
 3202%   relation already exists no action is performed.
 3203
 3204%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
 3205%
 3206%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
 3207%   Map, unify KeyCount with the number of  keys in Map. This serves
 3208%   two purposes. Derived maps, such as  the stem and metaphone maps
 3209%   need to know about new  keys   and  it avoids additional foreign
 3210%   calls for doing the progress in rdf_litindex.pl.
 3211
 3212%!  rdf_delete_literal_map(+Map, +Key) is det.
 3213%
 3214%   Delete Key and all associated values from the map.
 3215
 3216%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
 3217%
 3218%   Delete the association between Key and Value from the map.
 3219
 3220%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
 3221%
 3222%   Unify ValueList with an ordered set  of values associated to all
 3223%   keys from KeyList. Each key in  KeyList   is  either an atom, an
 3224%   integer or a term not(Key).  If   not-terms  are provided, there
 3225%   must be at least one positive keywords. The negations are tested
 3226%   after establishing the positive matches.
 3227
 3228%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
 3229%
 3230%   Realises various queries on the key-set:
 3231%
 3232%     * all
 3233%
 3234%     Unify Answer with an ordered list of all keys.
 3235%     * key(+Key)
 3236%
 3237%     Succeeds if Key is a key in the map and unify Answer with the
 3238%     number of values associated with the key. This provides a fast
 3239%     test of existence without fetching the possibly large
 3240%     associated value set as with rdf_find_literal_map/3.
 3241%
 3242%     * prefix(+Prefix)
 3243%     Unify Answer with an ordered set of all keys that have the
 3244%     given prefix. See section 3.1 for details on prefix matching.
 3245%     Prefix must be an atom. This call is intended for
 3246%     auto-completion in user interfaces.
 3247%
 3248%     * ge(+Min)
 3249%     Unify Answer with all keys that are larger or equal to the
 3250%     integer Min.
 3251%
 3252%     * le(+Max)
 3253%     Unify Answer with all keys that are smaller or equal to the integer
 3254%     Max.
 3255%
 3256%     * between(+Min, +Max) Unify
 3257%     Answer with all keys between Min and Max (including).
 3258
 3259%!  rdf_statistics_literal_map(+Map, -KeyValue)
 3260%
 3261%   Query some statistics of the map. Provides KeyValue are:
 3262%
 3263%     * size(-Keys, -Relations)
 3264%     Unify Keys with the total key-count of the index and Relation
 3265%     with the total Key-Value count.
 3266
 3267
 3268
 3269                 /*******************************
 3270                 *             MISC             *
 3271                 *******************************/
 3272
 3273%!  rdf_version(-Version) is det.
 3274%
 3275%   True when Version is the numerical version-id of this library.
 3276%   The version is computed as
 3277%
 3278%           Major*10000 + Minor*100 + Patch.
 3279
 3280%!  rdf_set(+Term) is det.
 3281%
 3282%   Set properties of the RDF store.  Currently defines:
 3283%
 3284%     * hash(+Hash, +Parameter, +Value)
 3285%     Set properties for a triple index.  Hash is one of =s=,
 3286%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
 3287%     is one of:
 3288%
 3289%       - size
 3290%       Value defines the number of entries in the hash-table.
 3291%       Value is rounded _down_ to a power of 2.  After setting
 3292%       the size explicitly, auto-sizing for this table is
 3293%       disabled.  Setting the size smaller than the current
 3294%       size results in a =permission_error= exception.
 3295%
 3296%       - average_chain_len
 3297%       Set maximum average collision number for the hash.
 3298%
 3299%       - optimize_threshold
 3300%       Related to resizing hash-tables.  If 0, all triples are
 3301%       moved to the new size by the garbage collector.  If more
 3302%       then zero, those of the last Value resize steps remain at
 3303%       their current location.  Leaving cells at their current
 3304%       location reduces memory fragmentation and slows down
 3305%       access.
 3306
 3307%!  rdf_md5(+Graph, -MD5) is det.
 3308%
 3309%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
 3310%   digest itself is represented as an   atom holding a 32-character
 3311%   hexadecimal   string.   The   library   maintains   the   digest
 3312%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
 3313%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
 3314%   changed since the last rdf_load/[1,2]  call provides a practical
 3315%   means for checking whether the file needs to be saved.
 3316%
 3317%   @deprecated New code should use rdf_graph_property(Graph,
 3318%   hash(Hash)).
 3319
 3320%!  rdf_generation(-Generation) is det.
 3321%
 3322%   True when Generation is the current  generation of the database.
 3323%   Each modification to the database  increments the generation. It
 3324%   can be used to check the validity of cached results deduced from
 3325%   the database. Committing a non-empty  transaction increments the
 3326%   generation by one.
 3327%
 3328%   When inside a transaction,  Generation  is   unified  to  a term
 3329%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
 3330%   that the transaction was started at   generation 4 of the global
 3331%   database and we have  created  3   new  generations  inside  the
 3332%   transaction. Note that this choice  of representation allows for
 3333%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
 3334%   generation in one  transaction  with   a  generation  in another
 3335%   transaction is meaningless.
 3336
 3337%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
 3338%
 3339%   Return the number of alternatives as   indicated by the database
 3340%   internal hashed indexing. This is a rough measure for the number
 3341%   of alternatives we can expect for   an  rdf_has/3 call using the
 3342%   given three arguments. When  called   with  three variables, the
 3343%   total number of triples is returned.   This  estimate is used in
 3344%   query  optimisation.  See  also    rdf_predicate_property/2  and
 3345%   rdf_statistics/1 for additional information to help optimizers.
 3346
 3347%!  rdf_debug(+Level) is det.
 3348%
 3349%   Set debugging to Level.  Level is an integer 0..9.  Default is
 3350%   0 no debugging.
 3351
 3352%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
 3353%
 3354%   Computes the MD5 hash from Text, which is an atom, string or list of
 3355%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
 3356%   algorithm is repeated Times times on the generated hash. This can be
 3357%   used for password encryption algorithms   to  make generate-and-test
 3358%   loops slow.
 3359%
 3360%   @deprecated Obviously, password hash  primitives   do  not belong in
 3361%   this library. The  library(crypto)  from   the  \const{ssl}  package
 3362%   provides extensive support for  hashes.   The  \const{clib}  package
 3363%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
 3364%   implementation as well as  lightweight   implementations  of several
 3365%   popular hashes.
 3366
 3367
 3368                 /*******************************
 3369                 *             MESSAGES         *
 3370                 *******************************/
 3371
 3372:- multifile
 3373    prolog:message//1. 3374
 3375prolog:message(rdf(Term)) -->
 3376    message(Term).
 3377
 3378message(loaded(How, What, BaseURI, Triples, Time)) -->
 3379    how(How),
 3380    source(What),
 3381    into(What, BaseURI),
 3382    in_time(Triples, Time).
 3383message(save_removed_duplicates(N, Subject)) -->
 3384    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
 3385message(saved(File, SavedSubjects, SavedTriples)) -->
 3386    [ 'Saved ~D triples about ~D subjects into ~p'-
 3387      [SavedTriples, SavedSubjects, File]
 3388    ].
 3389message(using_namespace(Id, NS)) -->
 3390    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
 3391message(inconsistent_cache(DB, Graphs)) -->
 3392    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
 3393      '~t~8|~p'-[Graphs]
 3394    ].
 3395message(guess_format(Ext)) -->
 3396    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
 3397message(meta(not_expanded(G))) -->
 3398    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
 3399message(deprecated(rdf_unload(Graph))) -->
 3400    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
 3401
 3402
 3403how(load)   --> [ 'Loaded' ].
 3404how(parsed) --> [ 'Parsed' ].
 3405
 3406source(SourceURL) -->
 3407    { uri_file_name(SourceURL, File),
 3408      !,
 3409      file_base_name(File, Base)    % TBD: relative file?
 3410    },
 3411    [ ' "~w"'-[Base] ].
 3412source(SourceURL) -->
 3413    [ ' "~w"'-[SourceURL] ].
 3414
 3415into(_, _) --> [].                      % TBD
 3416
 3417in_time(Triples, ParseTime) -->
 3418    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
 3419    ]