View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2018, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(prolog_manual_index,
   36          [ clean_man_index/0,          %
   37            save_man_index/0,
   38            index_man_directory/2,      % +DirSpec, +Options
   39            index_man_file/2,           % +Class, +FileSpec
   40                                        % Query
   41            current_man_object/1,       % ?Object
   42            man_object_property/2,      % ?Object, ?Property
   43
   44            manual_object/5,            % ?Obj, ?Summary, ?File, ?Class, ?Offset
   45            doc_object_identifier/2     % +Obj, -Name
   46          ]).   47:- use_module(library(debug),[debug/3]).   48:- autoload(doc_util,[atom_to_object/2,normalise_white_space/3]).   49:- autoload(library(apply),[maplist/2]).   50:- autoload(library(assoc),[empty_assoc/1,get_assoc/3,put_assoc/4]).   51:- autoload(library(error),[type_error/2]).   52:- autoload(library(filesex),[directory_file_path/3]).   53:- autoload(library(lists),[member/2,reverse/2,append/3]).   54:- autoload(library(occurs),[sub_term/2]).   55:- autoload(library(option),[select_option/4]).   56:- autoload(library(sgml),
   57	    [ dtd/2,
   58	      new_sgml_parser/2,
   59	      set_sgml_parser/2,
   60	      sgml_parse/2,
   61	      free_sgml_parser/1,
   62	      get_sgml_parser/2
   63	    ]).

Index the HTML manuals

This module pre-processes the HTML files that constitute the manual such that we can access the summary documentation of all predicates for usage in IDE tools. */

   72:- predicate_options(index_man_directory/2, 2,
   73                     [ class(oneof([manual,packages,misc])),
   74                       symbolic(any),
   75                       pass_to(system:absolute_file_name/3, 3)
   76                     ]).   77
   78
   79:- dynamic
   80    man_index/5.            % Object, Summary, File, Class, Offset
 manual_object(?Object, ?Summary, ?File, ?Class, ?Offset) is nondet
True if Object is documented. Arguments:
Arguments:
Object- is the object documented, described by a Prolog term. Defined shapes are:
  • section(Level, Number, Label, File)
  • Name/Arity
  • Name//Arity
  • Module:Name/Arity
  • Module:Name//Arity
  • f(Name/Arity
  • c(Name)
Summary- is a string object providing a summary of object
File- is the HTML file in which the object is documented
Class- is one of manual or packages
Offset- is the character offset at which the DOM element describing Object appears. This is used by doc_man.pl to quickly extract the node.
  104manual_object(Object, Summary, File, Class, Offset) :-
  105    index_manual,
  106    man_index(Object, Summary, File, Class, Offset).
 clean_man_index is det
Clean already loaded manual index.
  112clean_man_index :-
  113    retractall(man_index(_,_,_,_,_)).
 manual_directory(-Class, -Dir)// is nondet
True if Dir is a directory holding manual files. Class is an identifier used by doc_object_summary/4.
  121user:file_search_path(swi_man_manual,   swi('doc/Manual')).
  122user:file_search_path(swi_man_packages, swi('doc/packages')).
  123
  124manual_directory(Class, Spec, Dir) :-
  125    man_path_spec(Class, Spec),
  126    absolute_file_name(Spec, Dir,
  127                       [ file_type(directory),
  128                         access(read),
  129                         solutions(all)
  130                       ]).
  131
  132man_path_spec(manual,   swi_man_manual(.)).
  133man_path_spec(packages, swi_man_packages(.)).
  134
  135
  136                 /*******************************
  137                 *          PARSE MANUAL        *
  138                 *******************************/
 save_man_index
Create swi('doc/manindex.db'), containing an index into the HTML manuals for use with help/1 and similar predicates. This predicate is called during the build process.
  146save_man_index :-
  147    cached_index_file(write, File),
  148    setup_call_cleanup(
  149        open(File, write, Out, [encoding(utf8)]),
  150        (   format(Out, '/*  Generated manual index.~n', []),
  151            format(Out, '    Do not edit.~n', []),
  152            format(Out, '*/~n~n', []),
  153            setup_call_cleanup(
  154                b_setval(pldoc_save_index, Out),
  155                do_index_manual,
  156                nb_delete(pldoc_save_index))
  157        ),
  158        close(Out)).
 index_manual is det
Load the manual index if not already done.
  165index_manual :-
  166    man_index(_,_,_,_,_),
  167    !.
  168index_manual :-
  169    with_mutex(pldoc_man,
  170               locked_index_manual).
  171
  172locked_index_manual :-
  173    man_index(_,_,_,_,_),
  174    !.
  175locked_index_manual :-
  176    cached_index_file(read, File),
  177    catch(read_index(File), E,
  178          print_message(warning, E)),
  179    !.
  180locked_index_manual :-
  181    do_index_manual.
  182
  183do_index_manual :-
  184    forall(manual_directory(Class, Symbolic, Dir),
  185           index_man_directory(Dir,
  186                               [ class(Class),
  187                                 symbolic(Symbolic),
  188                                 file_errors(fail)
  189                               ])).
 read_index(+File)
Read the manual index from File.
  195read_index(File) :-
  196    context_module(M),
  197    empty_assoc(State0),
  198    setup_call_cleanup(
  199        open(File, read, In, [encoding(utf8)]),
  200        read_man_index(In, State0, [module(M)]),
  201        close(In)).
  202
  203read_man_index(In, State0, Options) :-
  204    read_term(In, TermIn, Options),
  205    (   TermIn == end_of_file
  206    ->  true
  207    ;   valid_term(TermIn, Term, State0, State),
  208        assert(Term),
  209        read_man_index(In, State, Options)
  210    ).
  211
  212valid_term(TermIn, Term, State0, State) :-
  213    ground(TermIn),
  214    resolve_index(TermIn, Term, State0, State),
  215    !.
  216valid_term(Term, _, _, _) :-
  217    type_error(man_index_term, Term).
  218
  219resolve_index(i(Object0, Summary, File0, Class, Offset),
  220              man_index(Object, Summary, File, Class, Offset),
  221              State0, State) :-
  222    map_section(Object0, Object, File),
  223    resolve_index_file(File0, File, State0, State).
  224
  225resolve_index_file(File0, File, State, State) :-
  226    get_assoc(File0, State, File),
  227    !.
  228resolve_index_file(File0, File, State0, State) :-
  229    absolute_file_name(File0, File, [access(read)]),
  230    put_assoc(File0, State0, File, State).
  231
  232map_section(section(Level, Nr, Label), section(Level, Nr, Label, File), File) :-
  233    !.
  234map_section(Object, Object, _).
  235
  236cached_index_file(Access, File) :-
  237    absolute_file_name(swi('doc/manindex.db'), File,
  238                       [ access(Access),
  239                         file_errors(fail)
  240                       ]).
 check_duplicate_ids
Maintenance utility to check that we do not have duplicate section identifiers in the documentation.
  248:- public
  249    check_duplicate_ids/0.  250
  251check_duplicate_ids :-
  252    findall(Id, man_index(section(_,_,Id),_,_,_,_), Ids),
  253    msort(Ids, Sorted),
  254    duplicate_ids(Sorted, Duplicates),
  255    (   Duplicates == []
  256    ->  true
  257    ;   print_message(warning, pldoc(duplicate_ids(Duplicates)))
  258    ).
  259
  260duplicate_ids([], []).
  261duplicate_ids([H,H|T0], [H|D]) :-
  262    !,
  263    take_prefix(H,T0,T),
  264    duplicate_ids(T, D).
  265duplicate_ids([_|T], D) :-
  266    duplicate_ids(T, D).
  267
  268take_prefix(H, [H|T0], T) :-
  269    !,
  270    take_prefix(H, T0, T).
  271take_prefix(_, L, L).
 index_man_directory(+Dir, +Options) is det
Index the HTML directory Dir. Options are:
class(Class)
Define category of the found objects.
symbolic(+Term)
Symbolic (file search) specification for Dir

Remaining Options are passed to absolute_file_name/3.

  285index_man_directory(Spec, Options) :-
  286    select_option(class(Class), Options, Options1, misc),
  287    select_option(symbolic(Symbolic), Options1, Options2, Spec),
  288    absolute_file_name(Spec, Dir,
  289                       [ file_type(directory),
  290                         access(read)
  291                       | Options2
  292                       ]),
  293    atom_concat(Dir, '/*.html', Pattern),
  294    expand_file_name(Pattern, Files),
  295    maplist(index_man_file(Class, Symbolic), Files).
 index_man_file(+Class, +File) is det
 index_man_file(+Class, +File, +Symbolic) is det
Collect the documented objects from the SWI-Prolog manual file File.
  304index_man_file(Class, File) :-
  305    index_man_file(Class, File, File).
  306index_man_file(Class, Symbolic, File) :-
  307    absolute_file_name(File, Path,
  308                       [ access(read)
  309                       ]),
  310    debug(pldoc(man_index), 'Indexing ~p ~p', [Class, File]),
  311    open(Path, read, In, [type(binary)]),
  312    dtd(html, DTD),
  313    new_sgml_parser(Parser, [dtd(DTD)]),
  314    set_sgml_parser(Parser, file(File)),
  315    set_sgml_parser(Parser, dialect(sgml)),
  316    set_sgml_parser(Parser, shorttag(false)),
  317    nb_setval(pldoc_man_index, []),
  318    nb_setval(pldoc_index_class, Class),
  319    nb_setval(pldoc_man_dir, Symbolic),
  320    call_cleanup(sgml_parse(Parser,
  321                            [ source(In),
  322                              syntax_errors(quiet),
  323                              call(begin, index_on_begin)
  324                            ]),
  325                 (   free_sgml_parser(Parser),
  326                     close(In),
  327                     nb_delete(pldoc_man_index),
  328                     nb_delete(pldoc_man_dir)
  329                 )).
 index_on_begin(+Element, +Attributes, +Parser) is semidet
Called from sgml_parse/2 in index_man_file/2. Element is the name of the element, Attributes the list of Name=Value pairs of the open attributes. Parser is the parser objects.
  338index_on_begin(dt, Attributes, Parser) :-
  339    memberchk(class=Def, Attributes),
  340    public_def(Def),
  341    get_sgml_parser(Parser, charpos(Offset)),
  342    get_sgml_parser(Parser, file(File)),
  343    sgml_parse(Parser,
  344               [ document(DT),
  345                 syntax_errors(quiet),
  346                 parse(content)
  347               ]),
  348    (   sub_term(element(a, AA, _), DT),
  349        member(Attr, ['data-obj', id, name]),
  350        memberchk(Attr=Id, AA),
  351        atom_to_object(Id, PI)
  352    ->  true
  353    ),
  354    nb_getval(pldoc_man_index, DD0),
  355    (   memberchk(dd(PI, File, _), DD0)
  356    ->  true
  357    ;   nb_setval(pldoc_man_index, [dd(PI, File, Offset)|DD0])
  358    ).
  359index_on_begin(dd, _, Parser) :-
  360    !,
  361    nb_getval(pldoc_man_index, DDList0), DDList0 \== [],
  362    nb_setval(pldoc_man_index, []),
  363    sgml_parse(Parser,
  364               [ document(DD),
  365                 syntax_errors(quiet),
  366                 parse(content)
  367               ]),
  368    summary(DD, Summary),
  369    nb_getval(pldoc_index_class, Class),
  370    reverse(DDList0, [dd(Object, File, Offset)|DDTail]),
  371    assert_index(Object, Summary, File, Class, Offset),
  372    forall(member(dd(Obj2,_,_), DDTail),
  373           assert_index(Obj2, Summary, File, Class, Offset)).
  374index_on_begin(div, Attributes, Parser) :-
  375    !,
  376    memberchk(class=title, Attributes),
  377    get_sgml_parser(Parser, charpos(Offset)),
  378    get_sgml_parser(Parser, file(File)),
  379    sgml_parse(Parser,
  380               [ document(DOM),
  381                 syntax_errors(quiet),
  382                 parse(content)
  383               ]),
  384    dom_to_text(DOM, Title),
  385    nb_getval(pldoc_index_class, Class),
  386    swi_local_path(File, Local),
  387    assert_index(section(0, '0', Local, File),
  388                 Title, File, Class, Offset).
  389index_on_begin(H, Attributes, Parser) :- % TBD: add class for document title.
  390    heading(H, Level),
  391    get_sgml_parser(Parser, charpos(Offset)),
  392    get_sgml_parser(Parser, file(File)),
  393    sgml_parse(Parser,
  394               [ document(Doc),
  395                 syntax_errors(quiet),
  396                 parse(content)
  397               ]),
  398    dom_section(Doc, Nr, Title),
  399    nb_getval(pldoc_index_class, Class),
  400    section_id(Attributes, Title, File, ID),
  401    assert_index(section(Level, Nr, ID, File),
  402                 Title, File, Class, Offset).
  403
  404assert_index(Object, Summary, File, Class, Offset) :-
  405    nb_current(pldoc_save_index, Out),
  406    !,
  407    map_section(Object1, Object, File),
  408    symbolic_file(File, Symbolic),
  409    format(Out, '~k.~n', [i(Object1, Summary, Symbolic, Class, Offset)]).
  410assert_index(Object, Summary, File, Class, Offset) :-
  411    assertz(man_index(Object, Summary, File, Class, Offset)).
  412
  413symbolic_file(File, Symbolic) :-
  414    nb_getval(pldoc_man_dir, Dir),
  415    Dir =.. [Alias,'.'],
  416    !,
  417    file_base_name(File, Base),
  418    Symbolic =.. [Alias,Base].
  419symbolic_file(File, File).
  420
  421section_id(Attributes, _Title, _, ID) :-
  422    memberchk(id=ID, Attributes),
  423    !.
  424section_id(_, "Bibliography", _, 'sec:bibliography') :- !.
  425section_id(_Attributes, Title, File, ID) :-
  426    atomic_list_concat(Words, ' ', Title),
  427    atomic_list_concat(Words, '_', ID0),
  428    atom_concat('sec:', ID0, ID),
  429    print_message(warning, pldoc(no_section_id(File, Title))).
  430
  431public_def(pubdef).
  432public_def(multidef).
 dom_section(+HeaderDOM, -NR, -Title) is semidet
NR is the section number (e.g. 1.1, 1.23) and Title is the title from a section header. The first clauses processes the style information from latex2html, emitting sections as:
<HN> <A name="sec:nr"><span class='sec-nr'>NR</span>|_|
                      <span class='sec-title'>Title</span>
  445dom_section(DOM, Nr, Title) :-
  446    sub_term([ element(span, A1, [Nr]) | Rest ], DOM),
  447    append(_Sep, [element(span, A2, TitleDOM)], Rest),
  448    memberchk(class='sec-nr', A1),
  449    memberchk(class='sec-title', A2),
  450    !,
  451    dom_to_text(TitleDOM, Title).
  452dom_section(DOM, Nr, Title) :-
  453    dom_to_text(DOM, Title),
  454    section_number(Title, Nr, Title).
  455
  456section_number(Title, Nr, PlainTitle) :-
  457    sub_atom(Title, 0, 1, _, Start),
  458    (   char_type(Start, digit)
  459    ->  true
  460    ;   char_type(Start, upper),
  461        sub_atom(Title, 1, 1, _, '.')       % A., etc: Appendices
  462    ),
  463    sub_atom(Title, B, _, A, ' '),
  464    !,
  465    sub_atom(Title, 0, B, _, Nr),
  466    sub_string(Title, _, A, 0, PlainTitle).
  467
  468heading(h1, 1).
  469heading(h2, 2).
  470heading(h3, 3).
  471heading(h4, 4).
 summary(+DOM, -Summary:string) is det
Summary is the first sentence of DOM.
  478summary(DOM, Summary) :-
  479    phrase(summary(DOM, _), SummaryCodes0),
  480    phrase(normalise_white_space(SummaryCodes), SummaryCodes0),
  481    string_codes(Summary, SummaryCodes).
  482
  483summary([], _) -->
  484    !,
  485    [].
  486summary(_, Done) -->
  487    { Done == true },
  488    !,
  489    [].
  490summary([element(_,_,Content)|T], Done) -->
  491    !,
  492    summary(Content, Done),
  493    summary(T, Done).
  494summary([CDATA|T], Done) -->
  495    { atom_codes(CDATA, Codes)
  496    },
  497    (   { Codes = [Period|Rest],
  498          code_type(Period, period),
  499          space(Rest)
  500        }
  501    ->  [ Period ],
  502        { Done = true }
  503    ;   { append(Sentence, [C, Period|Rest], Codes),
  504          code_type(Period, period),
  505          \+ code_type(C, period),
  506          space(Rest)
  507        }
  508    ->  string(Sentence),
  509        [C, Period],
  510        { Done = true }
  511    ;   string(Codes),
  512        summary(T, Done)
  513    ).
  514
  515string([]) -->
  516    [].
  517string([H|T]) -->
  518    [H],
  519    string(T).
  520
  521space([C|_]) :- code_type(C, space), !.
  522space([]).
 dom_to_text(+DOM, -Text)
Extract the text of a parsed HTML term. White-space in the result is normalised. See normalise_white_space//1.
  529dom_to_text(Dom, Text) :-
  530    phrase(cdata_list(Dom), CDATA),
  531    with_output_to(codes(Codes0),
  532                   forall(member(T, CDATA),
  533                          write(T))),
  534    phrase(normalise_white_space(Codes), Codes0),
  535    string_codes(Text, Codes).
  536
  537cdata_list([]) -->
  538    [].
  539cdata_list([H|T]) -->
  540    cdata(H),
  541    cdata_list(T).
  542
  543cdata(element(_, _, Content)) -->
  544    !,
  545    cdata_list(Content).
  546cdata(CDATA) -->
  547    { atom(CDATA) },
  548    !,
  549    [CDATA].
  550cdata(_) -->
  551    [].
 current_man_object(?Object) is nondet
  555current_man_object(Object) :-
  556    index_manual,
  557    man_index(Object, _, _, _, _).
 man_object_property(?Object, ?Property) is nondet
True when Property is a property of the given manual object. Defined properties are:
summary(-Text)
Summary text for the object.
id(ID)
Return unique id for the text, so we can remove duplicates
  569man_object_property(Object, summary(Summary)) :-
  570    index_manual,
  571    man_index(Object, Summary, _, _, _).
  572man_object_property(Object, id(File-CharNo)) :-
  573    manual_object(Object, _, File, _, CharNo).
  574
  575swi_local_path(Path, Local) :-
  576    atom(Path),
  577    is_absolute_file_name(Path),
  578    manual_root(RootSpec, Dir),
  579    absolute_file_name(RootSpec, SWI,
  580                       [ file_type(directory),
  581                         solutions(all)
  582                       ]),
  583    directory_file_path(SWI, ManLocal, Path),
  584    !,
  585    directory_file_path(Dir, ManLocal, Local).
  586
  587manual_root(swi_man_manual(.),   'Manual').
  588manual_root(swi_man_packages(.), 'packages').
 doc_object_identifier(+DocObject, -Identifier) is semidet
True when Identifier is the name of DocObject.
  594doc_object_identifier(Name/_, Name).
  595doc_object_identifier(Name//_, Name).
  596doc_object_identifier(_:Name/_, Name).
  597doc_object_identifier(_:Name//_, Name).
  598doc_object_identifier(M:_, M).
  599doc_object_identifier(f(Name/_), Name).
  600doc_object_identifier(c(Name), Name)