View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2014-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdfa,
   36	  [ read_rdfa/3,                % +Input, -RDF, +Options
   37	    xml_rdfa/3                  % +XMLDom, -RDF, +Options
   38	  ]).   39:- use_module(library(semweb/rdf_db),
   40	    [ rdf_register_prefix/2,
   41	      rdf_meta/1,
   42	      rdf_global_id/2,
   43	      rdf_equal/2,
   44	      rdf_is_bnode/1,
   45	      rdf_global_term/2,
   46	      rdf_transaction/2,
   47	      rdf_assert/4,
   48	      rdf_set_graph/2,
   49	      op(_,_,_)
   50	    ]).   51:- use_module(library(xpath),[xpath/3, op(_,_,_)]).   52
   53:- autoload(library(apply),[maplist/3,maplist/2,exclude/3,include/3]).   54:- use_module(library(debug),[debugging/1,debug/3]).   55:- autoload(library(error),[instantiation_error/1,type_error/2]).   56:- if(exists_source(library(guitracer))).   57:- autoload(library(gui_tracer),[gtrace/0]).   58:- endif.   59:- autoload(library(lists),[append/2,reverse/2,member/2,append/3]).   60:- autoload(library(option),[merge_options/3,option/2,option/3]).   61:- autoload(library(prolog_stack),[backtrace/1]).   62:- autoload(library(sgml),
   63	    [ load_xml/3, load_html/3, xml_basechar/1, xml_ideographic/1,
   64	      xml_digit/1, xml_combining_char/1, xml_extender/1
   65	    ]).   66:- autoload(library(sgml_write),[xml_write/2]).   67:- autoload(library(uri),
   68	    [ uri_file_name/2, uri_components/2, uri_data/3, uri_data/4,
   69	      iri_normalized/3, iri_normalized/2, uri_normalized/3
   70	    ]).   71:- autoload(library(dcg/basics),[blanks/2,blank/2,alpha_to_lower/3]).   72:- autoload(library(http/http_open),[http_open/3]).   73
   74/** <module> Extract RDF from an HTML or XML DOM
   75
   76This module implements extraction of  RDFa   triples  from parsed XML or
   77HTML documents. It has two interfaces:  read_rdfa/3 to read triples from
   78some input (stream, file, URL) and xml_rdfa/3 to extract triples from an
   79HTML or XML  document  that  is   already  parsed  with  load_html/3  or
   80load_xml/3.
   81
   82@see http://www.w3.org/TR/2013/REC-rdfa-core-20130822/
   83@see http://www.w3.org/TR/html-rdfa/
   84*/
   85
   86:- rdf_register_prefix(rdfa, 'http://www.w3.org/ns/rdfa#').   87
   88:- rdf_meta
   89    add_triple(+, r, r, o),
   90    add_incomplete_triple(+, t).   91
   92:- discontiguous
   93    term_expansion/2.   94
   95:- predicate_options(xml_rdfa/3, 3,
   96		     [ base(atom),
   97		       anon_prefix(any),
   98		       lang(atom),
   99		       vocab(atom),
  100		       markup(atom)
  101		     ]).  102:- predicate_options(read_dom/3, 3,
  103		     [ pass_to(sgml:load_html/3, 3),
  104		       pass_to(sgml:load_xml/3, 3)
  105		     ]).  106:- predicate_options(read_rdfa/3, 3,
  107		     [ pass_to(read_dom/3, 3),
  108		       pass_to(xml_rdfa/3, 3),
  109		       pass_to(system:open/4, 4),
  110		       pass_to(http_open:http_open/3, 3)
  111		     ]).  112
  113
  114		 /*******************************
  115		 *          STREAM READING      *
  116		 *******************************/
  117
  118%!  read_rdfa(+Input, -Triples, +Options) is det.
  119%
  120%   True when Triples is a list of rdf(S,P,O) triples extracted from
  121%   Input. Input is either a stream, a  file name, a URL referencing
  122%   a file name or a URL that  is valid for http_open/3. Options are
  123%   passed to open/4, http_open/3 and  xml_rdfa/3.   If  no  base is
  124%   provided in Options, a base is deduced from Input.
  125
  126read_rdfa(Input, Triples, Options) :-
  127    setup_call_cleanup(
  128	open_input(Input, In, NewOptions, Close, Options),
  129	read_dom(In, DOM, Options),
  130	close_input(Close)),
  131    merge_options(Options, NewOptions, RDFaOptions),
  132    xml_rdfa(DOM, Triples, RDFaOptions).
  133
  134open_input(Input, In, NewOptions, Close, Options) :-
  135    open_input2(Input, In, NewOptions, Close0, Options),
  136    detect_bom(In, Close0, Close).
  137
  138open_input2(stream(In), In, Options, true, _) :-
  139    !,
  140    (   stream_property(In, file_name(Name)),
  141	to_uri(Name, URI)
  142    ->  Options = [base(URI)]
  143    ;   Options = []
  144    ).
  145open_input2(In, In, Options, true, _) :-
  146    is_stream(In),
  147    !,
  148    (   stream_property(In, file_name(Name)),
  149	to_uri(Name, URI)
  150    ->  Options = [base(URI)]
  151    ;   Options = []
  152    ).
  153open_input2(URL, In, [base(URL)], close(In), Options) :-
  154    atom(URL),
  155    uri_file_name(URL, File),
  156    !,
  157    open(File, read, In, Options).
  158open_input2(URL, In, [base(Base)], close(In), Options) :-
  159    atom(URL),
  160    to_uri2(URL, Base),
  161    !,
  162    http_open(URL, In, Options).
  163open_input2(File, In, [base(URI)], close(In), Options) :-
  164    absolute_file_name(File, Path, [access(read)]),
  165    uri_file_name(URI, Path),
  166    open(Path, read, In, Options).
  167
  168%!  detect_bom(+In, +Close0, -Close) is det.
  169%
  170%   We may be loading a binary stream. In   that  case we want to do
  171%   BOM detection.
  172
  173detect_bom(In, Close0, Close) :-
  174    stream_property(In, type(binary)),
  175    stream_property(In, encoding(Enc)),
  176    catch(set_stream(In, encoding(bom)),_,fail),
  177    !,
  178    merge_close(Close0, set_stream(In, encoding(Enc)), Close).
  179detect_bom(_, Close, Close).
  180
  181merge_close(true, Close, Close) :- !.
  182merge_close(Close, _, Close).
  183
  184to_uri(URI0, URI) :-
  185    to_uri2(URI0, URI),
  186    !.
  187to_uri(URI0, URI) :-
  188    absolute_file_name(URI0, Path),
  189    uri_file_name(URI, Path).
  190
  191to_uri2(URI0, Base) :-
  192    uri_components(URI0, Components),
  193    uri_data(scheme, Components, Scheme),
  194    ground(Scheme),
  195    http_scheme(Scheme),
  196    !,
  197    uri_data(fragment, Components, _, Components2),
  198    uri_components(Base, Components2).
  199
  200http_scheme(http).
  201http_scheme(https).
  202
  203close_input(true).
  204close_input(close(X)) :- close(X).
  205close_input(set_stream(In, encoding(Enc))) :- set_stream(In, encoding(Enc)).
  206
  207read_dom(In, DOM, Options) :-
  208    option(dialect(Dialect), Options),
  209    !,
  210    (   xml_dialect(Dialect)
  211    ->  load_xml(stream(In), DOM, Options)
  212    ;   load_html(stream(In), DOM, Options)
  213    ).
  214read_dom(In, DOM, Options) :-
  215    peek_string(In, 1000, Start),
  216    guess_dialect(Start, Dialect),
  217    read_dom(In, DOM, [dialect(Dialect)|Options]).
  218
  219xml_dialect(xml).
  220xml_dialect(xmlns).
  221xml_dialect(svg).
  222xml_dialect(xhtml).
  223xml_dialect(xhtml5).
  224
  225guess_dialect(Start, Dialect) :-
  226    sub_string(Start, _, _, _, "<?xml"),
  227    !,
  228    Dialect = xml.
  229guess_dialect(Start, Dialect) :-
  230    sub_string(Start, _, _, _, "<html"),
  231    !,
  232    (   sub_string(Start, _, _, _, "xmlns:")
  233    ->  Dialect = xhtml
  234    ;   string_codes(Start, Codes),
  235	phrase(html_doctype(DialectFound), Codes, _)
  236    ->  Dialect = DialectFound
  237    ;   Dialect = html
  238    ).
  239guess_dialect(Start, Dialect) :-
  240    sub_string(Start, _, _, _, "<svg"),
  241    !,
  242    Dialect = svg.
  243guess_dialect(_, xml).
  244
  245html_doctype(html5) -->
  246    blanks,
  247    "<!DOCTYPE", blank, blanks, "html", blanks, ">",
  248    !.
  249html_doctype(html4) -->
  250    blanks,
  251    "<!", icase_string(`doctype`), blank, blanks, icase_string(`html`),
  252    blank, blanks,
  253    icase_string(`public`),
  254    blank,
  255    !.
  256
  257icase_string([]) --> [].
  258icase_string([H|T]) --> alpha_to_lower(H), icase_string(T).
  259
  260
  261		 /*******************************
  262		 *        DOM PROCESSING        *
  263		 *******************************/
  264
  265%!  xml_rdfa(+DOM, -RDF, +Options)
  266%
  267%   True when RDF is a list of   rdf(S,P,O) terms extracted from DOM
  268%   according to the RDFa specification. Options processed:
  269%
  270%     * base(+BaseURI)
  271%     URI to use for ''. Normally set to the document URI.
  272%     * anon_prefix(+AnnonPrefix)
  273%     Prefix for blank nodes.
  274%     * lang(+Lang)
  275%     Default for =lang=
  276%     * vocab(+Vocab)
  277%     Default for =vocab=
  278%     * markup(+Markup)
  279%     Markup language processed (xhtml, xml, ...)
  280
  281xml_rdfa(DOM, _, _) :-
  282    var(DOM),
  283    !,
  284    instantiation_error(DOM).
  285xml_rdfa(DOM, RDF, Options) :-
  286    is_list(DOM),
  287    !,
  288    maplist(xml_rdfa_aux(Options), DOM, RDFList),
  289    append(RDFList, RDF).
  290xml_rdfa(DOM, RDF, Options) :-
  291    DOM = element(_,_,_),
  292    !,
  293    rdfa_evaluation_context(DOM, EvalContext, Options),
  294    process_node(DOM, EvalContext),
  295    arg(1, EvalContext.triples, List),
  296    reverse(List, RDF0),
  297    apply_patterns(RDF0, RDF).
  298% XML Processing Instruction (PI).
  299xml_rdfa(DOM, [], _) :-
  300    DOM = pi(_),
  301    !.
  302xml_rdfa(DOM, _, _) :-
  303    type_error(xml_dom, DOM).
  304
  305xml_rdfa_aux(Options, DOM, RDF) :-
  306    xml_rdfa(DOM, RDF, Options).
  307
  308process_node(DOM, EvalContext) :-
  309    rdfa_local_context(EvalContext, LocalContext),  % 7.5.1
  310    update_vocab(DOM, LocalContext),                % 7.5.2
  311    update_prefixes(DOM, LocalContext),             % 7.5.3
  312    update_lang(DOM, LocalContext),                 % 7.5.4
  313    update_subject(DOM, LocalContext),              % 7.5.5, 7.5.6
  314    emit_typeof(DOM, LocalContext),                 % 7.5.7
  315    update_list_mapping(DOM, LocalContext),         % 7.5.8
  316    step_7_5_9(DOM, LocalContext),                  % 7.5.9
  317    step_7_5_10(DOM, LocalContext),                 % 7.5.10
  318    update_property_value(DOM, LocalContext),       % 7.5.11
  319    complete_triples(LocalContext),                 % 7.5.12
  320    descent(DOM, LocalContext),                     % 7.5.13
  321    complete_lists(LocalContext),
  322    !.                % 7.5.14
  323:- if(current_predicate(gtrace/0)).  324process_node(DOM, EvalContext) :-
  325    print_message(warning, rdfa(failed(DOM, EvalContext))),
  326    (   debugging(rdfa(test))
  327    ->  gtrace,
  328	process_node(DOM, EvalContext)
  329    ;   true
  330    ).
  331:- endif.  332
  333%!  rdfa_evaluation_context(+DOM, -Context, +Options)
  334%
  335%   7.5.0: Create the initial evaluation context
  336%
  337%   @tbd:   derive markup from DOM
  338
  339rdfa_evaluation_context(DOM, Context, Options) :-
  340    Context = rdfa_eval{base:Base,                  % atom
  341			parent_subject:Base,        % atom
  342			parent_object:null,         % null or atom
  343			incomplete_triples:[],      % list
  344			list_mapping:ListMapping,   % IRI --> list(List)
  345			lang:Lang,                  % null or atom
  346			iri_mapping:IRIMappings,    % dict
  347			term_mapping:TermMappings,  % dict
  348			vocab:Vocab,                % null or atom
  349			bnode_id:bnode(1),          % integer
  350			markup:Markup,              % Processing profile
  351			anon_prefix:AnonPrefix,
  352			named_bnodes:r{v:_{}},
  353			root:DOM,                   % XML DOM
  354			triples:triples([])},       % list
  355    empty_list_mapping(ListMapping),
  356    option(markup(Markup), Options, xhtml),
  357    base(DOM, Options, Base),
  358    default_vocab(Markup, DefaultVocab),
  359    option(lang(Lang), Options, ''),
  360    option(vocab(Vocab), Options, DefaultVocab),
  361    (   option(anon_prefix(AnonPrefix), Options)
  362    ->  true
  363    ;   atom_concat('__', Base, AnonPrefix)
  364    ),
  365    default_prefixes(Markup, DefPrefixes),
  366    mapping(prefixes(IRIMappings0), Options),
  367    put_dict(DefPrefixes, IRIMappings0, IRIMappings),
  368    mapping(terms(TermMappings), Options).
  369
  370base(DOM, _Options, Base) :-
  371    xpath(DOM, //base(@href=Base), _),
  372    !.
  373base(_DOM, Options, Base) :-
  374    option(base(Base0), Options),
  375    rdf_global_id(Base0, Base),
  376    !.
  377base(_, _, 'http://www.example.org/').
  378
  379mapping(Term, Options) :-
  380    Term =.. [Name, Value],
  381    (   TermG =.. [Name, Var],
  382	option(TermG, Options)
  383    ->  dict_create(Value, Name, Var)
  384    ;   dict_create(Value, Name, [])
  385    ).
  386
  387%!  default_prefixes(+Markup, -Dict)
  388%
  389%   Create a default prefix map. Which   prefixes are supposed to be
  390%   in this map?
  391
  392default_prefixes(Markup, _{'':DefPrefix}) :-
  393    default_prefix_mapping(Markup, DefPrefix).
  394
  395%!  rdfa_core_prefix(?Prefix, ?URI) is nondet.
  396%
  397%   RDFa initial context prefix declarations.
  398%
  399%   @see http://www.w3.org/2011/rdfa-context/rdfa-1.1
  400
  401rdfa_core_prefix(dcat,    'http://www.w3.org/ns/dcat#').
  402rdfa_core_prefix(qb,      'http://purl.org/linked-data/cube#').
  403rdfa_core_prefix(grddl,   'http://www.w3.org/2003/g/data-view#').
  404rdfa_core_prefix(ma,      'http://www.w3.org/ns/ma-ont#').
  405rdfa_core_prefix(org,     'http://www.w3.org/ns/org#').
  406rdfa_core_prefix(owl,     'http://www.w3.org/2002/07/owl#').
  407rdfa_core_prefix(prov,    'http://www.w3.org/ns/prov#').
  408rdfa_core_prefix(rdf,     'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  409rdfa_core_prefix(rdfa,    'http://www.w3.org/ns/rdfa#').
  410rdfa_core_prefix(rdfs,    'http://www.w3.org/2000/01/rdf-schema#').
  411rdfa_core_prefix(rif,     'http://www.w3.org/2007/rif#').
  412rdfa_core_prefix(rr,      'http://www.w3.org/ns/r2rml#').
  413rdfa_core_prefix(sd,      'http://www.w3.org/ns/sparql-service-description#').
  414rdfa_core_prefix(skos,    'http://www.w3.org/2004/02/skos/core#').
  415rdfa_core_prefix(skosxl,  'http://www.w3.org/2008/05/skos-xl#').
  416rdfa_core_prefix(wdr,     'http://www.w3.org/2007/05/powder#').
  417rdfa_core_prefix(void,    'http://rdfs.org/ns/void#').
  418rdfa_core_prefix(wdrs,    'http://www.w3.org/2007/05/powder-s#').
  419rdfa_core_prefix(xhv,     'http://www.w3.org/1999/xhtml/vocab#').
  420rdfa_core_prefix(xml,     'http://www.w3.org/XML/1998/namespace').
  421rdfa_core_prefix(xsd,     'http://www.w3.org/2001/XMLSchema#').
  422rdfa_core_prefix(cc,      'http://creativecommons.org/ns#').
  423rdfa_core_prefix(ctag,    'http://commontag.org/ns#').
  424rdfa_core_prefix(dc,      'http://purl.org/dc/terms/').
  425rdfa_core_prefix(dcterms, 'http://purl.org/dc/terms/').
  426rdfa_core_prefix(dc11,    'http://purl.org/dc/elements/1.1/').
  427rdfa_core_prefix(foaf,    'http://xmlns.com/foaf/0.1/').
  428rdfa_core_prefix(gr,      'http://purl.org/goodrelations/v1#').
  429rdfa_core_prefix(ical,    'http://www.w3.org/2002/12/cal/icaltzd#').
  430rdfa_core_prefix(og,      'http://ogp.me/ns#').
  431rdfa_core_prefix(rev,     'http://purl.org/stuff/rev#').
  432rdfa_core_prefix(sioc,    'http://rdfs.org/sioc/ns#').
  433rdfa_core_prefix(v,       'http://rdf.data-vocabulary.org/#').
  434rdfa_core_prefix(vcard,   'http://www.w3.org/2006/vcard/ns#').
  435rdfa_core_prefix(schema,  'http://schema.org/').
  436
  437default_prefix_mapping(xhtml, 'http://www.w3.org/1999/xhtml/vocab#') :- !.
  438default_prefix_mapping(_,     'http://www.example.org/').
  439
  440default_vocab(_, '').
  441
  442%!  rdfa_local_context(EvalContext, LocalContext)
  443%
  444%   7.5.1: Create the local context
  445
  446rdfa_local_context(EvalContext, LocalContext) :-
  447    LocalContext = rdfa_local{skip_element:false,
  448			      new_subject:null,
  449			      current_object_resource:null,
  450			      typed_resource:null,
  451			      iri_mapping:IRIMappings,
  452			      incomplete_triples:[],
  453			      list_mapping:ListMapping,
  454			      lang:Lang,
  455			      term_mapping:TermMapping,
  456			      vocab:Vocab,
  457			      eval_context:EvalContext
  458			     },
  459    _{ iri_mapping:IRIMappings,
  460       list_mapping:ListMapping,
  461       lang:Lang,
  462       term_mapping:TermMapping,
  463       vocab:Vocab
  464     } :< EvalContext.
  465
  466
  467%!  update_vocab(+DOM, +Context) is det.
  468%
  469%   7.5.2.  Handle @vocab
  470
  471update_vocab(DOM, Context) :-
  472    xpath(DOM, /(*(@vocab=Vocab0)), _),
  473    !,
  474    (   Vocab0 == ''
  475    ->  Vocab = ''                  % Host Language defined default?
  476    ;   iri(Vocab0, Vocab, Context)
  477    ),
  478    nb_set_dict(vocab, Context, Vocab),
  479    add_triple(Context,
  480	       Context.eval_context.base,
  481	       rdfa:usesVocabulary,
  482	       Vocab).
  483update_vocab(_, _).
  484
  485%!  update_prefixes(+DOM, +Context) is det.
  486%
  487%   7.5.3:  Update  prefix  map  using  @prefix  and  @xmlns.  First
  488%   processes xmlns:Prefix=IRI.
  489
  490update_prefixes(DOM, Context) :-
  491    DOM=element(_,Attrs,_),
  492    xmlns_dict(Attrs, _{}, Dict0),
  493    (   xpath(DOM, /(*(@prefix=PrefixDecl)), _)
  494    ->  prefix_dict(PrefixDecl, Dict0, Dict)
  495    ;   Dict = Dict0
  496    ),
  497    Dict \= _{},
  498    !,
  499    put_dict(Dict, Context.iri_mapping, NewMapping),
  500    b_set_dict(iri_mapping, Context, NewMapping).
  501update_prefixes(_, _).
  502
  503xmlns_dict([], Dict, Dict).
  504xmlns_dict([Attr=IRI|T0], Dict0, Dict) :-
  505    (   Attr = xmlns:Name
  506    ;   atom_concat('xmlns:', Name, Attr)
  507    ),
  508    !,
  509    downcase_atom(Name, Prefix),
  510    put_dict(Prefix, Dict0, IRI, Dict1),
  511    xmlns_dict(T0, Dict1, Dict).
  512xmlns_dict([_|T0], Dict0, Dict) :-
  513    xmlns_dict(T0, Dict0, Dict).
  514
  515prefix_dict(Text, Dict0, Dict) :-
  516    atom_codes(Text, Codes),
  517    phrase(prefixes(Dict0, Dict), Codes).
  518
  519%!  update_lang(+DOM, +Context) is det.
  520%
  521%   7.5.4: Update lang
  522
  523update_lang(DOM, Context) :-
  524    DOM=element(_,Attrs,_),
  525    (   (   memberchk(xml:lang=Lang, Attrs)         % XML with namespaces
  526	;   memberchk('xml:lang'=Lang, Attrs)       % XML without namespaces
  527	;   memberchk(lang=Lang, Attrs)             % HTML 5
  528	)
  529    ->  nb_set_dict(lang, Context, Lang)
  530    ;   true
  531    ),
  532    (   (   memberchk(xml:base=Base, Attrs)         % XML with namespaces
  533	;   memberchk('xml:base'=Base, Attrs)       % XML without namespaces
  534	)
  535    ->  nb_set_dict(base, Context.eval_context, Base)
  536    ;   true
  537    ).
  538
  539
  540%!  update_subject(+DOM, +Context) is det.
  541%
  542%   7.5.5 and 7.5.6: establish a value for new subject
  543
  544update_subject(DOM, Context) :-
  545    DOM=element(E,Attrs,_),
  546    \+ has_attribute(rel, Attrs, Context),
  547    \+ has_attribute(rev, Attrs, Context),    % Commit to rule-set 7.5.5
  548    !,
  549    (   memberchk(property=_, Attrs),
  550	\+ memberchk(content=_, Attrs),
  551	\+ memberchk(datatype=_, Attrs)
  552    ->  (   (   about(DOM, About, Context)  % 7.5.5.1
  553	    ;   About = Context.eval_context.parent_object
  554	    ),
  555	    About \== null
  556	->  nb_set_dict(new_subject, Context, About)
  557	;   true
  558	),
  559	(   memberchk(typeof=_, Attrs)
  560	->  (   (   iri_attr(about, Attrs, TypedIRI, Context),
  561		    TypedIRI \== null
  562		;   DOM == Context.eval_context.root
  563		->  iri('', TypedIRI, Context)
  564		;   (   iri_attr(resource, Attrs, TypedIRI, Context)
  565		    ;   iri_attr(href,     Attrs, TypedIRI, Context)
  566		    ;   iri_attr(src,      Attrs, TypedIRI, Context)
  567		    ;   new_bnode(TypedIRI, Context)
  568		    ),
  569		    TypedIRI \== null
  570		->  nb_set_dict(typed_resource, Context, TypedIRI),
  571		    nb_set_dict(current_object_resource, Context, TypedIRI)
  572		)
  573	    ->  nb_set_dict(typed_resource, Context, TypedIRI)
  574	    ;   true
  575	    )
  576	;   true
  577	)
  578    ;   (   new_subject_attr_2(SubjectAttr),        % 7.5.5.2
  579	    memberchk(SubjectAttr=About0, Attrs),
  580	    attr_convert(SubjectAttr, About0, About, Context),
  581	    About \== null
  582	->  true
  583	;   html_root(E, Context),
  584	    About = Context.eval_context.parent_object,
  585	    About \== null
  586	->  true
  587	;   DOM == Context.eval_context.root
  588	->  iri('', About, Context)
  589	;   memberchk(typeof=_, Attrs)
  590	->  new_bnode(About, Context)
  591	;   About = Context.eval_context.parent_object,
  592	    About \== null
  593	->  (   \+ memberchk(typeof=_, Attrs)
  594	    ->  nb_set_dict(skip_element, Context, true)
  595	    ;   true
  596	    )
  597	),
  598	debug(rdfa(new_subject), '~w: set new_subject to ~p', [E, About]),
  599	nb_set_dict(new_subject, Context, About),
  600	(   memberchk(typeof=_, Attrs)
  601	->  nb_set_dict(typed_resource, Context, About)
  602	;   true
  603	)
  604    ).
  605update_subject(DOM, Context) :-
  606    DOM=element(_,Attrs,_),                 % 7.5.6
  607    (   iri_attr(about, Attrs, NewSubject, Context)
  608    ->  nb_set_dict(new_subject, Context, NewSubject),
  609	(   memberchk(typeof=_, Attrs)
  610	->  nb_set_dict(typed_resource, Context, NewSubject)
  611	;   true
  612	)
  613    ;   true        % was \+ memberchk(resource=_, Attrs):
  614		    % If no resource is provided ...
  615    ->  (   DOM == Context.eval_context.root
  616	->  iri('', NewSubject, Context),
  617	    nb_set_dict(new_subject, Context, NewSubject),
  618	    (   memberchk(typeof=_, Attrs)
  619	    ->  nb_set_dict(typed_resource, Context, NewSubject)
  620	    ;   true
  621	    )
  622	;   NewSubject = Context.eval_context.parent_object,
  623	    NewSubject \== null
  624	->  nb_set_dict(new_subject, Context, NewSubject)
  625	;   true
  626	)
  627    ),
  628    (   (   iri_attr(resource, Attrs, CurrentObjectResource, Context)
  629	;   iri_attr(href,     Attrs, CurrentObjectResource, Context)
  630	;   iri_attr(src,      Attrs, CurrentObjectResource, Context)
  631	;   memberchk(typeof=_, Attrs),
  632	    \+ memberchk(about=_, Attrs),
  633	    new_bnode(CurrentObjectResource, Context)
  634	),
  635	CurrentObjectResource \== null
  636    ->  nb_set_dict(current_object_resource, Context, CurrentObjectResource)
  637    ;   true
  638    ),
  639    (   memberchk(typeof=_, Attrs),
  640	\+ memberchk(about=_, Attrs)
  641    ->  nb_set_dict(typed_resource, Context,
  642		    Context.current_object_resource)
  643    ;   true
  644    ).
  645
  646new_subject_attr_2(about).
  647new_subject_attr_2(resource).
  648new_subject_attr_2(href).
  649new_subject_attr_2(src).
  650
  651html_root(head, Context) :- html_markup(Context.eval_context.markup).
  652html_root(body, Context) :- html_markup(Context.eval_context.markup).
  653
  654html_markup(html).
  655html_markup(xhtml).
  656
  657%!  emit_typeof(+DOM, +LocalContext) is det.
  658%
  659%   7.5.7: emit triples for @typeof value.
  660
  661emit_typeof(DOM, Context) :-
  662    DOM = element(_,Attrs,_),
  663    Subject = Context.typed_resource,
  664    Subject \== null,
  665    memberchk(typeof=TypeOf, Attrs),
  666    !,
  667    iri_list(TypeOf, IRIs, Context),
  668    maplist(type_triple(Context), IRIs).
  669emit_typeof(_, _).
  670
  671type_triple(Context, IRI) :-
  672    add_triple(Context, Context.typed_resource, rdf:type, IRI).
  673
  674%!  update_list_mapping(+DOM, +Context) is det.
  675%
  676%   7.5.8: Create a list mapping if appropriate
  677
  678update_list_mapping(_DOM, Context) :-
  679    Context.new_subject \== null,
  680    Context.new_subject \== Context.eval_context.parent_object,
  681    !,
  682    empty_list_mapping(ListMapping),
  683    b_set_dict(list_mapping, Context, ListMapping).
  684update_list_mapping(_, _).
  685
  686%!  empty_list_mapping(-Mapping) is det.
  687%!  empty_list_mapping(+Mapping) is semidet.
  688%!  get_list_mapping(+IRI, +Mapping, -List) is semidet.
  689%!  add_list_mapping(+IRI, !Mapping, +List) is det.
  690%
  691%   Manage a list mapping. Note this needs   to be wrapped in a term
  692%   to be able to extend the mapping while keeping its identity.
  693
  694empty_list_mapping(list_mapping(_{})).
  695
  696get_list_mapping(IRI, list_mapping(Dict), Dict.get(IRI)).
  697
  698add_list_mapping(IRI, LM, List) :-
  699    LM = list_mapping(Dict),
  700    setarg(1, LM, Dict.put(IRI, List)).
  701
  702list_mapping_pairs(list_mapping(Dict), Pairs) :-
  703    dict_pairs(Dict, _, Pairs).
  704
  705
  706%!  step_7_5_9(+DOM, +Context)
  707
  708step_7_5_9(_DOM, Context) :-
  709    Context.current_object_resource == null,
  710    !.
  711step_7_5_9(DOM, Context) :-
  712    DOM = element(_,Attrs,_),
  713    memberchk(inlist=_, Attrs),
  714    has_attribute(rel, Attrs, Rel, Context),
  715    !,
  716    iri_list(Rel, Preds, Context),
  717    CurrentObjectResource = Context.current_object_resource,
  718    maplist(add_property_list(Context, CurrentObjectResource),
  719	    Preds).
  720step_7_5_9(DOM, Context) :-
  721    DOM = element(_,Attrs,_),
  722    (   has_attribute(rel, Attrs, Rel, Context),
  723	\+ memberchk(inlist=_, Attrs)
  724    ->  iri_list(Rel, RelIRIs, Context),
  725	maplist(rel_triple(Context), RelIRIs)
  726    ;   true
  727    ),
  728    (   has_attribute(rev, Attrs, Rev, Context)
  729    ->  iri_list(Rev, RevIRIs, Context),
  730	maplist(rev_triple(Context), RevIRIs)
  731    ;   true
  732    ).
  733
  734rel_triple(Context, IRI) :-
  735    add_triple(Context,
  736	       Context.new_subject, IRI, Context.current_object_resource).
  737
  738rev_triple(Context, IRI) :-
  739    add_triple(Context,
  740	       Context.current_object_resource, IRI, Context.new_subject).
  741
  742%!  step_7_5_10(+DOM, +Context)
  743%
  744%   Similar to step_7_5_9, but adding to incomplete triples.
  745
  746step_7_5_10(_DOM, Context) :-
  747    Context.current_object_resource \== null,
  748    !.
  749step_7_5_10(DOM, Context) :-
  750    DOM = element(_,Attrs,_),
  751    memberchk(inlist=_, Attrs),
  752    has_attribute(rel, Attrs, Rel, Context),
  753    !,
  754    set_current_object_resource_to_bnode(Context),
  755    iri_list(Rel, IRIs, Context),
  756    maplist(incomplete_ll_triple(Context), IRIs).
  757step_7_5_10(DOM, Context) :-
  758    DOM = element(_,Attrs,_),
  759    (   has_attribute(rel, Attrs, Rel, Context),
  760	\+ memberchk(inlist=_, Attrs)
  761    ->  iri_list(Rel, RelIRIs, Context),
  762	set_current_object_resource_to_bnode(Context),
  763	maplist(incomplete_rel_triple(Context), RelIRIs)
  764    ;   true
  765    ),
  766    (   has_attribute(rev, Attrs, Rev, Context)
  767    ->  iri_list(Rev, RevIRIs, Context),
  768	set_current_object_resource_to_bnode(Context),
  769	maplist(incomplete_rev_triple(Context), RevIRIs)
  770    ;   true
  771    ).
  772
  773set_current_object_resource_to_bnode(Context) :-
  774    new_bnode(BNode, Context),
  775    b_set_dict(current_object_resource, Context, BNode).
  776
  777incomplete_ll_triple(Context, IRI) :-
  778    LM = Context.list_mapping,
  779    (   get_list_mapping(IRI, LM, LL)
  780    ->  true
  781    ;   LL = list([]),
  782	add_list_mapping(IRI, LM, LL)
  783    ),
  784    add_incomplete_triple(Context, _{list:LL, direction:none}).
  785
  786incomplete_rel_triple(Context, IRI) :-
  787    add_incomplete_triple(Context, _{predicate:IRI, direction:forward}).
  788
  789incomplete_rev_triple(Context, IRI) :-
  790    add_incomplete_triple(Context, _{predicate:IRI, direction:reverse}).
  791
  792
  793%!  update_property_value(+DOM, +Context) is det.
  794%
  795%   7.5.11: establish current property value.
  796
  797update_property_value(DOM, Context) :-
  798    DOM = element(Element,Attrs,Content),
  799    memberchk(property=PropSpec, Attrs),
  800    !,
  801    iri_list(PropSpec, Preds, Context),
  802    (   memberchk(datatype=DTSpec, Attrs)
  803    ->  (   DTSpec \== '',
  804	    term_or_curie_or_absiri(DTSpec, DataType, Context),
  805	    DataType \== null
  806	->  (   (   rdf_equal(rdf:'XMLLiteral', DataType)
  807		;   rdf_equal(rdf:'HTML', DataType)
  808		)
  809	    ->  content_xml(Content, Text)
  810	    ;   content_text(DOM, Text, Context)
  811	    ),
  812	    Obj0 = literal(type(DataType, Text))
  813	;   content_text(DOM, Text, Context),
  814	    Obj0 = literal(Text)
  815	)
  816    ;   memberchk(content=Text, Attrs)
  817    ->  Obj0 = literal(Text)
  818    ;   \+ has_attribute(rel, Attrs, Context),
  819	\+ has_attribute(rev, Attrs, Context),
  820	%\+ memberchk(content=_, Attrs),    % already guaranteed
  821	(   iri_attr(resource, Attrs, Obj0, Context)
  822	;   iri_attr(href,     Attrs, Obj0, Context)
  823	;   iri_attr(src,      Attrs, Obj0, Context)
  824	),
  825	Obj0 \== null
  826    ->  true
  827    ;   (   memberchk(datetime=DateTime, Attrs)
  828	;   Element == time,
  829	    Content = [DateTime]
  830	),
  831	html_markup(Context.eval_context.markup)
  832    ->  (   date_time_type(DateTime, DataType)
  833	->  Obj0 = literal(type(DataType, DateTime))
  834	;   Obj0 = literal(DateTime)
  835	)
  836    ;   memberchk(typeof=_, Attrs),
  837	\+ memberchk(about=_, Attrs)
  838    ->  Obj0 = Context.typed_resource
  839    ;   content_text(Content, Text, Context), % "as a plain literal"???
  840	Obj0 = literal(Text)
  841    ),
  842    (   Obj0 = literal(Text),
  843	atomic(Text),
  844	Context.lang \== ''
  845    ->  Obj = literal(lang(Context.lang, Text))
  846    ;   Obj = Obj0
  847    ),
  848    (   memberchk(inlist=_, Attrs)
  849    ->  maplist(add_property_list(Context, Obj), Preds)
  850    ;   NewSubject = Context.new_subject,
  851	maplist(add_property(Context, NewSubject, Obj), Preds)
  852    ).
  853update_property_value(_, _).
  854
  855add_property_list(Context, Obj, Pred) :-
  856    LM = Context.list_mapping,
  857    (   get_list_mapping(Pred, LM, LL)
  858    ->  LL = list(Old),
  859	setarg(1, LL, [Obj|Old])
  860    ;   add_list_mapping(Pred, LM, list([Obj]))
  861    ).
  862
  863add_property(Context, Subject, Object, Pred) :-
  864    add_triple(Context, Subject, Pred, Object).
  865
  866content_text(element(_,Attrs,_), Text, _Context) :-
  867    memberchk(content=Text, Attrs),
  868    !.
  869content_text(element(_,Attrs,_), Text, Context) :-
  870    memberchk(datetime=Text, Attrs),
  871    html_markup(Context.eval_context.markup),
  872    !.
  873content_text(element(_,_,Content), Text, _Context) :-
  874    !,
  875    phrase(text_nodes(Content), Texts),
  876    atomic_list_concat(Texts, Text).
  877content_text(Content, Text, _Context) :-
  878    !,
  879    phrase(text_nodes(Content), Texts),
  880    atomic_list_concat(Texts, Text).
  881
  882text_nodes([]) --> !.
  883text_nodes([H|T]) --> !, text_nodes(H), text_nodes(T).
  884text_nodes(element(_,_,Content)) --> !, text_nodes(Content).
  885text_nodes(CDATA) --> [CDATA].
  886
  887content_xml(DOM, Text) :-
  888    with_output_to(atom(Text), xml_write(DOM, [header(false)])).
  889
  890%!  complete_triples(+Context)
  891%
  892%   7.5.12: Complete incomplete triples
  893
  894complete_triples(Context) :-
  895    Context.skip_element == false,
  896    Context.new_subject \== null,
  897    Context.eval_context.incomplete_triples \== [],
  898    !,
  899    reverse(Context.eval_context.incomplete_triples, Incomplete),
  900    maplist(complete_triple(Context), Incomplete).
  901complete_triples(_).
  902
  903complete_triple(Context, Dict) :-
  904    complete_triple(Dict.direction, Dict, Context).
  905
  906complete_triple(none, Dict, Context) :-
  907    List = Dict.list,
  908    List = list(Old),
  909    setarg(1, List, [Context.new_subject|Old]).
  910complete_triple(forward, Dict, Context) :-
  911    add_triple(Context,
  912	       Context.eval_context.parent_subject,
  913	       Dict.predicate,
  914	       Context.new_subject).
  915complete_triple(reverse, Dict, Context) :-
  916    add_triple(Context,
  917	       Context.new_subject,
  918	       Dict.predicate,
  919	       Context.eval_context.parent_subject).
  920
  921
  922%!  descent(DOM, Context)
  923%
  924%   7.5.13: Descent into the children
  925
  926descent(element(_,_,Content), Context) :-
  927    (   Context.skip_element == true
  928    ->  maplist(descent_skip(Context), Content)
  929    ;   maplist(descent_no_skip(Context), Content)
  930    ).
  931
  932descent_skip(Context, DOM) :-
  933    DOM = element(E,_,_),
  934    !,
  935    debug(rdfa(descent), 'skip: ~w: new_subject=~p',
  936	  [E, Context.new_subject]),
  937    process_node(DOM, Context.eval_context.put(
  938			  _{ lang:Context.lang,
  939			     vocab:Context.vocab,
  940			     iri_mapping:Context.iri_mapping
  941			   })).
  942descent_skip(_, _).
  943
  944descent_no_skip(Context, DOM) :-
  945    DOM = element(E,_,_),
  946    !,
  947    (   ParentSubject = Context.new_subject,
  948	ParentSubject \== null
  949    ->  true
  950    ;   ParentSubject = Context.eval_context.parent_subject
  951    ),
  952    (   ParentObject = Context.current_object_resource,
  953	ParentObject \== null
  954    ->  true
  955    ;   ParentObject = ParentSubject
  956    ),
  957    debug(rdfa(descent), 'no skip: ~w: parent subject = ~p, object = ~p',
  958	  [E, ParentSubject, ParentObject]),
  959    process_node(DOM, Context.eval_context.put(
  960			  _{ parent_subject:ParentSubject,
  961			     parent_object:ParentObject,
  962			     iri_mapping:Context.iri_mapping,
  963			     incomplete_triples:Context.incomplete_triples,
  964			     list_mapping:Context.list_mapping,
  965			     lang:Context.lang,
  966			     vocab:Context.vocab
  967			    })).
  968descent_no_skip(_, _).
  969
  970%!  complete_lists(+Context) is det.
  971%
  972%   7.5.14: Complete possibly pending lists
  973
  974complete_lists(Context) :-
  975    empty_list_mapping(Context.list_mapping),
  976    !.
  977complete_lists(Context) :-
  978    (   CurrentSubject = Context.new_subject,
  979	CurrentSubject \== null
  980    ->  true
  981    ;   CurrentSubject = Context.eval_context.base
  982    ),
  983    list_mapping_pairs(Context.list_mapping, Pairs),
  984    maplist(complete_list(Context, CurrentSubject), Pairs).
  985
  986complete_list(Context, _, IRI-_) :-
  987    get_list_mapping(IRI, Context.eval_context.list_mapping, _),
  988    !.
  989complete_list(Context, CurrentSubject, IRI-list(List0)) :-
  990    reverse(List0, List),
  991    emit_list(List, ListURI, Context),
  992    add_triple(Context, CurrentSubject, IRI, ListURI).
  993
  994emit_list([], NIL, _) :-
  995    rdf_equal(NIL, rdf:nil).
  996emit_list([H|T], URI, Context) :-
  997    emit_list(T, TailURI, Context),
  998    new_bnode(URI, Context),
  999    add_triple(Context, URI, rdf:first, H),
 1000    add_triple(Context, URI, rdf:rest, TailURI).
 1001
 1002
 1003%!  has_attribute(+Name, +Attrs, +Context) is semidet.
 1004%!  has_attribute(+Name, +Attrs, -Value, +Context) is semidet.
 1005%
 1006%   True if Attrs contains Name.  We sometimes need to ignore
 1007%   Attributes if their value is invalid.
 1008%
 1009%   @see HTML+RDFa, 3.1 Additional RDFa Processing Rules, point 7.
 1010
 1011has_attribute(Name, Attrs, Context) :-
 1012    has_attribute(Name, Attrs, _, Context).
 1013
 1014has_attribute(rel, Attrs, Rel, Context) :-
 1015    memberchk(rel=Rel, Attrs),
 1016    html_markup(Context.eval_context.markup),
 1017    memberchk(property=_, Attrs),
 1018    !,
 1019    html_non_empty_rel(Rel, Context).
 1020has_attribute(rev, Attrs, Rev, Context) :-
 1021    memberchk(rev=Rev, Attrs),
 1022    html_markup(Context.eval_context.markup),
 1023    memberchk(property=_, Attrs),
 1024    !,
 1025    html_non_empty_rel(Rev, Context).
 1026has_attribute(Name, Attrs, Value, _Context) :-
 1027    memberchk(Name=Value, Attrs).
 1028
 1029html_non_empty_rel(Spec, Context) :-
 1030    Sep = "\s\t\n\r",
 1031    split_string(Spec, Sep, Sep, SpecList),
 1032    member(Spec1, SpecList),
 1033    safe_curie_or_curie_or_absiri(Spec1, _, Context),
 1034    !.
 1035
 1036
 1037%!  iri_attr(+AttName, +Attrs, -IRI, +Context) is semidet.
 1038
 1039iri_attr(Name, Attrs, IRI, Context) :-
 1040    memberchk(Name=IRI0, Attrs),
 1041    attr_convert(Name, IRI0, IRI, Context).
 1042
 1043attr_convert(about, Spec, IRI, Context) :-
 1044    safe_curie_or_curie_or_iri(Spec, IRI, Context).
 1045attr_convert(href, Spec, IRI, Context) :-
 1046    iri(Spec, IRI, Context).
 1047attr_convert(src, Spec, IRI, Context) :-
 1048    iri(Spec, IRI, Context).
 1049attr_convert(resource, Spec, IRI, Context) :-
 1050    safe_curie_or_curie_or_iri(Spec, IRI, Context).
 1051attr_convert(vocab, Spec, IRI, Context) :-
 1052    iri(Spec, IRI, Context).
 1053attr_convert(datatype, Spec, IRI, Context) :-
 1054    term_or_curie_or_absiri(Spec, IRI, Context).
 1055
 1056
 1057about(DOM, About, Context) :-
 1058    DOM=element(_,Attrs,_),
 1059    (   memberchk(about=About0, Attrs)
 1060    ->  safe_curie_or_curie_or_iri(About0, About, Context)
 1061    ;   DOM == Context.eval_context.root
 1062    ->  iri('', About, Context)
 1063    ).
 1064
 1065%!  new_bnode(-BNode, +Context) is det.
 1066%
 1067%   Create a new blank node. Note that the   current id is kept in a
 1068%   term to avoid copying the counter on the descent step.
 1069
 1070new_bnode(BNode, Context) :-
 1071    EvalCtx = Context.eval_context,
 1072    Node = EvalCtx.bnode_id,
 1073    arg(1, Node, Id),
 1074    succ(Id, Id1),
 1075    nb_setarg(1, Node, Id1),
 1076    Prefix = EvalCtx.anon_prefix,
 1077    (   atom(Prefix)
 1078    ->  atom_concat(Prefix, Id, BNode)
 1079    ;   BNode = bnode(Id)
 1080    ).
 1081
 1082%!  iri_list(+Spec, -IRIs, +Context) is det.
 1083%
 1084%   True when IRIs is a list of fulfy qualified IRIs from Spec
 1085
 1086iri_list(Spec, IRIs, Context) :-
 1087    Sep = "\s\t\n\r",
 1088    split_string(Spec, Sep, Sep, SpecList),
 1089    (   SpecList == [""]
 1090    ->  IRIs = []
 1091    ;   maplist(ctx_to_iri(Context), SpecList, IRIs0),
 1092	exclude(==(null), IRIs0, IRIs)
 1093    ).
 1094
 1095ctx_to_iri(Context, Spec, IRI) :-
 1096    term_or_curie_or_absiri(Spec, IRI, Context).
 1097
 1098%!  iri(+Spec, -IRI, +Context)
 1099%
 1100%   Used for @href and @src attributes
 1101
 1102iri(Spec, IRI, Context) :-
 1103    iri_normalized(Spec, Context.eval_context.base, IRI).
 1104
 1105abs_iri(Spec, IRI) :-
 1106    uri_components(Spec, Components),
 1107    uri_data(authority, Components, Authority), nonvar(Authority),
 1108    uri_data(scheme,    Components, Scheme),    nonvar(Scheme),
 1109    !,
 1110    iri_normalized(Spec, IRI).
 1111
 1112
 1113%!  safe_curie_or_curie_or_iri(+Spec, -IRI, +Context) is det.
 1114%
 1115%   Implement section 7.4, CURIE and IRI Processing.  Used for
 1116%   @about and @resource
 1117
 1118safe_curie_or_curie_or_iri(Spec, IRI, Context) :-
 1119    safe_curie_or_curie_or_absiri(Spec, IRI, Context),
 1120    !.
 1121safe_curie_or_curie_or_iri(Spec, IRI, Context) :-
 1122    uri_normalized(Spec, Context.eval_context.base, IRI).
 1123
 1124safe_curie_or_curie_or_absiri(Spec, IRI, _Context) :-
 1125    abs_iri(Spec, IRI0),
 1126    !,
 1127    IRI = IRI0.
 1128safe_curie_or_curie_or_absiri(Spec, IRI, Context) :-
 1129    atom_codes(Spec, Codes),
 1130    (   safe_curie(Codes, Curie)
 1131    ->  (   phrase(curie(IRI, Context), Curie)
 1132	->  true
 1133	;   IRI = null
 1134	)
 1135    ;   phrase(curie(IRI, Context), Codes)
 1136    ).
 1137
 1138safe_curie(Codes, Curie) :-
 1139    append([0'[|Curie], `]`, Codes).
 1140
 1141curie(IRI, Context) -->
 1142    "_:", !, reference_or_empty(Reference),
 1143    {   IRI = Context.eval_context.named_bnodes.v.get(Reference)
 1144    ->  true
 1145    ;   new_bnode(IRI, Context),
 1146	b_set_dict(v, Context.eval_context.named_bnodes,
 1147		   Context.eval_context.named_bnodes.v.put(Reference, IRI))
 1148    }.
 1149curie(IRI, Context) -->
 1150    ":", !, reference_or_empty(Reference),
 1151    { atom_concat(Context.iri_mapping.get(''), Reference, IRI) }.
 1152curie(IRI, Context) -->
 1153    nc_name(Prefix), ":", !, reference_or_empty(Reference),
 1154    {   atom_concat(Context.iri_mapping.get(Prefix), Reference, IRI0)
 1155    ->  IRI = IRI0
 1156    ;   rdfa_core_prefix(Prefix, URIPrefix)
 1157    ->  atom_concat(URIPrefix, Reference, IRI)
 1158    }.
 1159
 1160%!  term_or_curie_or_absiri(+Spec, -IRI, +Context) is det.
 1161%
 1162%   Used for @datatype and @property, @typeof, @rel and @rev
 1163
 1164term_or_curie_or_absiri(Spec, IRI, _Context) :-
 1165    abs_iri(Spec, IRI0),
 1166    !,
 1167    IRI = IRI0.
 1168term_or_curie_or_absiri(Spec, IRI, Context) :-
 1169    atom_codes(Spec, Codes),
 1170    (   phrase(term(Term), Codes),
 1171	downcase_atom(Term, LwrCase)
 1172    ->  (   Vocab = Context.vocab,
 1173	    Vocab \== ''
 1174	->  atom_concat(Vocab, Term, IRI)
 1175	;   term_iri(LwrCase, Context.eval_context.markup, IRI0)
 1176	->  IRI = IRI0
 1177	;   IRI = Context.term_mapping.get(Term)
 1178	->  true
 1179	;   dict_pairs(Context.term_mapping, _Tag, Pairs),
 1180	    member(TermCaps-IRI, Pairs),
 1181	    downcase_atom(TermCaps, LwrCase)
 1182	->  true
 1183	;   IRI = null
 1184	)
 1185    ;   phrase(curie(IRI, Context), Codes)
 1186    ->  true
 1187    ;   uri_normalized(Spec, Context.eval_context.base, IRI)
 1188    ).
 1189
 1190%!  term_iri(?Term, ?Markup, ?IRI)
 1191%
 1192%   @see http://www.w3.org/2011/rdfa-context/xhtml-rdfa-1.1
 1193
 1194term_expansion(term_iri(Term, Markup), term_iri(Term, Markup, URI)) :-
 1195    default_prefix_mapping(Markup, Prefix),
 1196    atom_concat(Prefix, Term, URI).
 1197
 1198term_iri(alternate,  xhtml).
 1199term_iri(appendix,   xhtml).
 1200term_iri(cite,       xhtml).
 1201term_iri(bookmark,   xhtml).
 1202term_iri(contents,   xhtml).
 1203term_iri(chapter,    xhtml).
 1204term_iri(copyright,  xhtml).
 1205term_iri(first,      xhtml).
 1206term_iri(glossary,   xhtml).
 1207term_iri(help,       xhtml).
 1208term_iri(icon,       xhtml).
 1209term_iri(index,      xhtml).
 1210term_iri(last,       xhtml).
 1211term_iri(meta,       xhtml).
 1212term_iri(next,       xhtml).
 1213term_iri(prev,       xhtml).
 1214term_iri(previous,   xhtml).
 1215term_iri(section,    xhtml).
 1216term_iri(start,      xhtml).
 1217term_iri(stylesheet, xhtml).
 1218term_iri(subsection, xhtml).
 1219term_iri(top,        xhtml).
 1220term_iri(up,         xhtml).
 1221term_iri(p3pv1,      xhtml).
 1222
 1223term_iri(describedby, _, 'http://www.w3.org/2007/05/powder-s#describedby').
 1224term_iri(license,     _, 'http://www.w3.org/1999/xhtml/vocab#license').
 1225term_iri(role,        _, 'http://www.w3.org/1999/xhtml/vocab#role').
 1226
 1227		 /*******************************
 1228		 *           GRAMMARS           *
 1229		 *******************************/
 1230
 1231prefixes(Dict0, Dict) -->
 1232    ws, nc_name(Name), ws, ":", ws, reference(IRI), !, ws,
 1233    prefixes(Dict0.put(Name,IRI), Dict).
 1234prefixes(Dict, Dict) --> [].
 1235
 1236ws --> ws1, !, ws.
 1237ws --> [].
 1238
 1239ws1 --> " ".
 1240ws1 --> "\t".
 1241ws1 --> "\r".
 1242ws1 --> "\n".
 1243
 1244nc_name(Name) -->
 1245    [H], {nc_name_start_code(H)},
 1246    nc_name_codes(Codes),
 1247    { atom_codes(Name0, [H|Codes]),
 1248      downcase_atom(Name0, Name)
 1249    }.
 1250
 1251%!  term(-Term)//
 1252%
 1253%   7.4.3
 1254
 1255term(Term) -->
 1256    [H], {nc_name_start_code(H)},
 1257    term_codes(Codes),
 1258    { atom_codes(Term, [H|Codes])
 1259    }.
 1260
 1261
 1262nc_name_codes([H|T]) --> nc_name_code(H), !, nc_name_codes(T).
 1263nc_name_codes([]) --> [].
 1264
 1265nc_name_code(H) --> [H], {nc_name_code(H)}.
 1266
 1267term_codes([H|T]) --> term_code(H), !, term_codes(T).
 1268term_codes([]) --> [].
 1269
 1270term_code(H) --> [H], {term_code(H)}.
 1271
 1272nc_name_start_code(0':) :- !, fail.
 1273nc_name_start_code(C) :- xml_basechar(C), !.
 1274nc_name_start_code(C) :- xml_ideographic(C).
 1275
 1276nc_name_code(0':) :- !, fail.
 1277nc_name_code(C) :- xml_basechar(C), !.
 1278nc_name_code(C) :- xml_digit(C), !.
 1279nc_name_code(C) :- xml_ideographic(C), !.
 1280nc_name_code(C) :- xml_combining_char(C), !.
 1281nc_name_code(C) :- xml_extender(C), !.
 1282
 1283term_code(0'/) :- !.
 1284term_code(C) :- nc_name_code(C).
 1285
 1286reference(IRI) -->
 1287    [H],
 1288    reference_codes(T),
 1289    { atom_codes(IRI, [H|T]) }.
 1290
 1291reference_codes([])    --> ws1, !.
 1292reference_codes([H|T]) --> [H], !, reference_codes(T).
 1293reference_codes([]) --> [].
 1294
 1295reference_or_empty(IRI) -->
 1296    reference_codes(Codes),
 1297    { atom_codes(IRI, Codes) }.
 1298
 1299
 1300%!  date_time_type(+DateTime, -DataType) is semidet.
 1301%
 1302%   True when DataType is the  xsd   type  that  matches the lexical
 1303%   representation of DateTime
 1304
 1305date_time_type(DateTime, DataType) :-
 1306    atom_codes(DateTime, Codes),
 1307    phrase(date_time_type(DataType), Codes).
 1308
 1309date_time_type(DT) --> duration,   !, { rdf_equal(DT, xsd:duration) }.
 1310date_time_type(DT) --> date_time,  !, { rdf_equal(DT, xsd:dateTime) }.
 1311date_time_type(DT) --> date,       !, { rdf_equal(DT, xsd:date) }.
 1312date_time_type(DT) --> time,       !, { rdf_equal(DT, xsd:time) }.
 1313date_time_type(DT) --> gyearmonth, !, { rdf_equal(DT, xsd:gYearMonth) }.
 1314date_time_type(DT) --> gyear,      !, { rdf_equal(DT, xsd:gYear) }.
 1315
 1316duration   --> opt_minus, "P",
 1317    opt_dy, opt_dm, opt_dd,
 1318    (   "T"
 1319    ->  opt_dh, opt_dm, opt_ds
 1320    ;   ""
 1321    ).
 1322
 1323date_time  --> opt_minus, yyyy, "-", !, mM, "-", dd,
 1324    "T", hh, ":", mm, ":", ss, opt_fraction, opt_zzzzzz.
 1325date       --> opt_minus, yyyy, "-", !, mM, "-", dd.
 1326time       --> hh, ":", mm, ":", ss, opt_fraction.
 1327gyearmonth --> opt_minus, yyyy, "-", !, mM.
 1328gyear      --> opt_minus, yyyy.
 1329
 1330opt_minus --> "-", !.
 1331opt_minus --> "".
 1332
 1333yyyy --> dnzs, d, d, d, d.
 1334
 1335dnzs --> "".
 1336dnzs --> dnz, dnzs.
 1337
 1338opt_fraction --> ".", !, ds.
 1339opt_fraction --> "".
 1340
 1341mM --> d(V1), d(V2), { M is V1*10+V2, M >= 1, M =< 12 }.
 1342dd --> d(V1), d(V2), { M is V1*10+V2, M >= 1, M =< 31 }.
 1343hh --> d(V1), d(V2), { M is V1*10+V2, M =< 23 }.
 1344mm --> d(V1), d(V2), { M is V1*10+V2, M =< 59 }.
 1345ss --> d(V1), d(V2), { M is V1*10+V2, M =< 59 }.
 1346
 1347d(V) --> [D], { between(0'0, 0'9, D), V is D-0'0 }.
 1348d    --> [D], { between(0'0, 0'9, D) }.
 1349dnz  --> [D], { between(0'1, 0'9, D) }.
 1350
 1351ds --> d, !, ds.
 1352ds --> "".
 1353
 1354opt_zzzzzz --> sign, hh, ":", mm.
 1355opt_zzzzzz --> "Z".
 1356opt_zzzzzz --> "".
 1357
 1358sign --> "+".
 1359sign --> "-".
 1360
 1361opt_dy --> ( int, "Y" | "" ).
 1362opt_dm --> ( int, "M" | "" ).
 1363opt_dd --> ( int, "D" | "" ).
 1364opt_dh --> ( int, "H" | "" ).
 1365opt_ds --> ( int, ("." -> int ; ""), "S" | "" ).
 1366
 1367int --> d, ds.
 1368
 1369		 /*******************************
 1370		 *           TRIPLES            *
 1371		 *******************************/
 1372
 1373%!  add_triple(+Context, +S, +P, +O) is det.
 1374%
 1375%   Add a triple to  the  global   evaluation  context.  Triples are
 1376%   embedded in a term, so we can   use  setarg/3 on the list, while
 1377%   the  evaluation  context  is  copied  for  descending  the  node
 1378%   hierarchy.
 1379
 1380add_triple(Context, S, P, O) :-
 1381    (   debugging(rdfa(triple))
 1382    ->  debug(rdfa(triple), 'Added { ~p ~p ~p }', [S,P,O]),
 1383	backtrace(4)
 1384    ;   true
 1385    ),
 1386    valid_subject(S),
 1387    valid_predicate(P),
 1388    valid_object(O),
 1389    !,
 1390    Triples = Context.eval_context.triples,
 1391    arg(1, Triples, Old),
 1392    setarg(1, Triples, [rdf(S,P,O)|Old]).
 1393add_triple(_, _, _, _).                 % ignored invalid triple.
 1394
 1395valid_subject(S)   :- S \== null.
 1396valid_predicate(P) :- P \== null, \+ rdf_is_bnode(P).
 1397valid_object(O)    :- O \== null, ( atom(O) -> true ; valid_literal(O) ).
 1398
 1399valid_literal(literal(Plain)) :-
 1400    atom(Plain),
 1401    !.
 1402valid_literal(literal(type(T, _))) :-
 1403    !,
 1404    T \== null.
 1405valid_literal(literal(lang(_,_))).
 1406
 1407add_incomplete_triple(Context, Dict) :-
 1408    debug(rdfa(incomplete), 'Incomplete: ~p', [Dict]),
 1409    b_set_dict(incomplete_triples, Context,
 1410	       [ Dict
 1411	       | Context.incomplete_triples
 1412	       ]).
 1413
 1414
 1415		 /*******************************
 1416		 *            PATTERNS          *
 1417		 *******************************/
 1418
 1419%!  apply_patterns(+TriplesIn, -TriplesOut) is det.
 1420%
 1421%   Apply RDFa patterns.  We need several passes do deal with ordering
 1422%   issues and the possibility that patterns are invalid:
 1423%
 1424%     1. find patterns from rdf(_,rdfa:copy,Pattern)
 1425%     2. collect the properties for these patterns and delete
 1426%        patterns that do not have rdf:type rdfa:Pattern.
 1427%     3. Actually copy the patterns and delete the patterns themselves.
 1428
 1429apply_patterns(TriplesIn, TriplesOut) :-
 1430    referenced_patterns(TriplesIn, Pairs),
 1431    (   Pairs == []
 1432    ->  TriplesOut = TriplesIn
 1433    ;   sort(Pairs, UniquePairs),
 1434	dict_pairs(Dict, _, UniquePairs),
 1435	pattern_properties(TriplesIn, Dict),
 1436	delete_invalid_patterns(Dict, Patterns),
 1437	phrase(apply_patterns(TriplesIn, Patterns), TriplesOut)
 1438    ).
 1439
 1440term_expansion(TIn, TOut) :-
 1441    rdf_global_term(TIn, TOut).
 1442
 1443referenced_patterns([], []).
 1444referenced_patterns([rdf(_,rdfa:copy,O)|T0], [O-[]|T]) :-
 1445    !,
 1446    referenced_patterns(T0, T).
 1447referenced_patterns([_|T0], T) :-
 1448    referenced_patterns(T0, T).
 1449
 1450pattern_properties([], _).
 1451pattern_properties([rdf(S,P,O)|T], Dict) :-
 1452    ignore(b_set_dict(S, Dict, [P-O|Dict.get(S)])),
 1453    pattern_properties(T, Dict).
 1454
 1455delete_invalid_patterns(Patterns0, Patterns) :-
 1456    dict_pairs(Patterns0, Tag, Pairs0),
 1457    include(rdfa_pattern, Pairs0, Pairs),
 1458    dict_pairs(Patterns,  Tag, Pairs).
 1459
 1460rdfa_pattern(_-PO) :-
 1461    memberchk((rdf:type)-(rdfa:'Pattern'), PO).
 1462
 1463apply_patterns([], _) --> [].
 1464apply_patterns([rdf(S,rdfa:copy,O)|T0], Dict) -->
 1465    !,
 1466    copy_pattern(Dict.O, S),
 1467    apply_patterns(T0, Dict).
 1468apply_patterns([rdf(S,_,_)|T0], Dict) -->
 1469    { _ = Dict.get(S) },
 1470    !,
 1471    apply_patterns(T0, Dict).
 1472apply_patterns([H|T], Dict) -->
 1473    [H],
 1474    apply_patterns(T, Dict).
 1475
 1476copy_pattern([], _) --> [].
 1477copy_pattern([(rdf:type)-(rdfa:'Pattern')|T], S) -->
 1478    !,
 1479    copy_pattern(T, S).
 1480copy_pattern([P-O|T], S) -->
 1481    [rdf(S,P,O)],
 1482    copy_pattern(T, S).
 1483
 1484
 1485		 /*******************************
 1486		 *       HOOK INTO RDF-DB       *
 1487		 *******************************/
 1488
 1489:- multifile
 1490    rdf_db:rdf_load_stream/3,
 1491    rdf_db:rdf_file_type/2. 1492
 1493%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options)
 1494%
 1495%   Register library(semweb/rdfa) as loader for HTML RDFa files.
 1496%
 1497%   @tbd    Which options need to be forwarded to read_rdfa/3?
 1498
 1499rdf_db:rdf_load_stream(rdfa, Stream, _Module:Options1):-
 1500    rdf_db:graph(Options1, Graph),
 1501    atom_concat('__', Graph, BNodePrefix),
 1502    merge_options([anon_prefix(BNodePrefix)], Options1, Options2),
 1503    read_rdfa(Stream, Triples, Options2),
 1504    rdf_transaction(( forall(member(rdf(S,P,O), Triples),
 1505			     rdf_assert(S, P, O, Graph)),
 1506		      rdf_set_graph(Graph, modified(false))
 1507		    ),
 1508		    parse(Graph)).
 1509
 1510rdf_db:rdf_file_type(html, rdfa)