View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2006-2024, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(pldoc_wiki,
   39          [ wiki_codes_to_dom/3,        % +Codes, +Args, -DOM
   40            wiki_lines_to_dom/3,        % +Lines, +Map, -DOM
   41            section_comment_header/3,   % +Lines, -Header, -RestLines
   42            summary_from_lines/2,       % +Lines, -Codes
   43            indented_lines/3,           % +Text, +PrefixChars, -Lines
   44            strip_leading_par/2,        % +DOM0, -DOM
   45            autolink_extension/2,       % ?Extension, ?Type
   46            autolink_file/2             % +FileName, -Type
   47          ]).   48:- use_module(library(lists)).   49:- use_module(library(debug)).   50:- use_module(library(error)).   51:- use_module(library(pairs)).   52:- use_module(library(option)).   53:- use_module(library(debug)).   54:- use_module(library(apply)).   55:- use_module(library(dcg/basics)).   56
   57:- use_module(doc_util).   58
   59
   60/** <module> PlDoc wiki parser
   61
   62This file defines the PlDoc wiki parser,  which parses both comments and
   63wiki text files. The original version of this SWI-Prolog wiki format was
   64largely modeled after Twiki (http://twiki.org/).  The current version is
   65extended to take many aspects from   markdown, in particular the doxygen
   66refinement thereof.
   67
   68@see http://www.stack.nl/~dimitri/doxygen/manual/markdown.html
   69*/
   70
   71:- multifile
   72    prolog:doc_wiki_face//2,        % -Out, +VarNames
   73    prolog:doc_url_expansion/3,     % +Alias(Rest), -HREF, -Label
   74    prolog:url_expansion_hook/3,    % +Term, -Ref, -Label
   75    prolog:doc_autolink_extension/2.% +Extension, -Type
   76
   77
   78                 /*******************************
   79                 *          WIKI PARSING        *
   80                 *******************************/
   81
   82%!  wiki_lines_to_dom(+Lines:lines, +Args:list(atom), -Term) is det
   83%
   84%   Translate a Wiki text into  an   HTML  term suitable for html//1
   85%   from the html_write library.
   86
   87wiki_lines_to_dom(Lines, Args, HTML) :-
   88    tokenize_lines(Lines, Tokens0),
   89    normalise_indentation(Tokens0, Tokens),
   90    wiki_structure(Tokens, -1, Pars),
   91    wiki_faces(Pars, Args, HTML).
   92
   93
   94%!  wiki_codes_to_dom(+String, +Args, -DOM) is det.
   95%
   96%   Translate a plain text into a DOM term.
   97%
   98%   @param String   Plain text.  Either a string or a list of codes.
   99
  100wiki_codes_to_dom(Codes, Args, DOM) :-
  101    indented_lines(Codes, [], Lines),
  102    wiki_lines_to_dom(Lines, Args, DOM).
  103
  104
  105%!  wiki_structure(+Lines:lines, +BaseIndent,
  106%!                 -Blocks:list(block)) is det
  107%
  108%   Get the structure in terms  of block-level elements: paragraphs,
  109%   lists and tables. This processing uses   a mixture of layout and
  110%   punctuation.
  111
  112wiki_structure([], _, []) :- !.
  113wiki_structure([_-[]|T], BI, Pars) :-          % empty lines
  114    !,
  115    wiki_structure(T, BI, Pars).
  116wiki_structure(Lines, _, [\tags(Tags)]) :-
  117    tags(Lines, Tags),
  118    !.
  119wiki_structure(Lines, BI, [P1|PL]) :-
  120    take_block(Lines, BI, P1, RestLines),
  121    wiki_structure(RestLines, BI, PL).
  122
  123%!  take_block(+Lines, +BaseIndent, ?Block, -RestLines) is semidet.
  124%
  125%   Take a block-structure from the input.  Defined block elements
  126%   are lists, table, hrule, section header and paragraph.
  127
  128take_block([_-[]|Lines], BaseIndent, Block, Rest) :-
  129    !,
  130    take_block(Lines, BaseIndent, Block, Rest).
  131take_block([N-_|_], BaseIndent, _, _) :-
  132    N < BaseIndent,
  133    !,
  134    fail.                           % less indented
  135take_block(Lines, BaseIndent, List, Rest) :-
  136    list_item(Lines, Type, Indent, LI, LIT, Rest0),
  137    !,
  138    Indent > BaseIndent,
  139    rest_list(Rest0, Type, Indent, LIT, [], Rest),
  140    List0 =.. [Type, LI],
  141    (   ul_to_dl(List0, List)
  142    ->  true
  143    ;   List0 = dl(Items)
  144    ->  List = dl(class=wiki, Items)
  145    ;   List = List0
  146    ).
  147take_block([N-['|'|RL1]|LT], _, Table, Rest) :-
  148    phrase(row(R0), RL1),
  149    take_table(LT, N, R0, Table, Rest),
  150    !.
  151take_block([0-[-,-|More]|LT], _, Block, LT) :-  % separation line
  152    maplist(=(-), More),
  153    !,
  154    Block = hr([]).
  155take_block([_-Line|LT], _, Block, LT) :-        % separation line
  156    ruler(Line),
  157    !,
  158    Block = hr([]).
  159take_block([_-[@|_]], _, _, _) :-              % starts @tags section
  160    !,
  161    fail.
  162take_block(Lines, _BaseIndent, Section, RestLines) :-
  163    section_header(Lines, Section, RestLines),
  164    !.
  165take_block([_-Verb|Lines], _, Verb, Lines) :-
  166    verbatim_term(Verb),
  167    !.
  168take_block([I-L1|LT], BaseIndent, Elem, Rest) :-
  169    !,
  170    append(L1, PT, Par),
  171    rest_par(LT, PT, I, BaseIndent, MaxI, Rest),
  172    (   MaxI >= BaseIndent+16
  173    ->  Elem = center(Par)
  174    ;   phrase(blockquote(BQ), Par)
  175    ->  Elem = blockquote(BQ)
  176    ;   Elem = p(Par)
  177    ).
  178take_block([Verb|Lines], _, Verb, Lines).
  179
  180blockquote(Clean) -->
  181    [>, ' '],
  182    bq_lines(Clean).
  183
  184bq_lines([' '|Par]) -->
  185    ['\n'], !, [>,' '],
  186    bq_lines(Par).
  187bq_lines([H|T]) -->
  188    [H],
  189    bq_lines(T).
  190bq_lines([]) -->
  191    [].
  192
  193
  194%!  ruler(+Line) is semidet.
  195%
  196%   True if Line contains 3 ruler chars and otherwise spaces.
  197
  198ruler([C0|Line]) :-
  199    rule_char(C0),
  200    phrase(ruler(C0, 1), Line).
  201
  202ruler(C, N) --> [C], !, { N2 is N+1 }, ruler(C, N2).
  203ruler(C, N) --> [' '], !, ruler(C, N).
  204ruler(_, N) --> { N >= 3 }.
  205
  206rule_char('-').
  207rule_char('_').
  208rule_char('*').
  209
  210%!  list_item(+Lines, ?Type, ?Indent, -LI0, -LIT, -RestLines) is det.
  211%
  212%   Create a list-item. Naturally this should produce a single item,
  213%   but DL lists produce two items, so   we create the list of items
  214%   as a difference list.
  215%
  216%   @tbd    Pass base-indent
  217
  218list_item([Indent-Line|LT], Type, Indent, Items, ItemT, Rest) :-
  219    !,
  220    list_item_prefix(Type, Line, L1),
  221    (   Type == dl
  222    ->  split_dt(L1, DT0, DD1),
  223        append(DD1, LIT, DD),
  224        strip_ws_tokens(DT0, DT),
  225        Items = [dt(DT),dd(DD)|ItemT]
  226    ;   append(L1, LIT, LI0),
  227        Items = [li(LI0)|ItemT]
  228    ),
  229    rest_list_item(LT, Type, Indent, LIT, Rest).
  230
  231%!  rest_list_item(+Lines, +Type, +Indent, -RestItem, -RestLines) is det
  232%
  233%   Extract the remainder (after the first line) of a list item.
  234
  235rest_list_item(Lines, _Type, Indent, RestItem, RestLines) :-
  236    take_blocks_at_indent(Lines, Indent, Blocks, RestLines),
  237    (   Blocks = [p(Par)|MoreBlocks]
  238    ->  append(['\n'|Par], MoreBlocks, RestItem)
  239    ;   RestItem = Blocks
  240    ).
  241
  242%!  take_blocks_at_indent(+Lines, +Indent, -Pars, -RestLines) is det.
  243%
  244%   Process paragraphs and verbatim blocks (==..==) in bullet-lists.
  245
  246take_blocks_at_indent(Lines, _, [], Lines) :-
  247    skip_empty_lines(Lines, Lines1),
  248    section_header(Lines1, _, _),
  249    !.
  250take_blocks_at_indent(Lines, N, [Block|RestBlocks], RestLines) :-
  251    take_block(Lines, N, Block, Rest0),
  252    !,
  253    take_blocks_at_indent(Rest0, N, RestBlocks, RestLines).
  254take_blocks_at_indent(Lines, _, [], Lines).
  255
  256
  257%!  rest_list(+Lines, +Type, +Indent,
  258%!            -Items, -ItemTail, -RestLines) is det.
  259
  260rest_list(Lines, Type, N, Items, IT, Rest) :-
  261    skip_empty_lines(Lines, Lines1),
  262    list_item(Lines1, Type, N, Items, IT0, Rest0),
  263    !,
  264    rest_list(Rest0, Type, N, IT0, IT, Rest).
  265rest_list(Rest, _, _, IT, IT, Rest).
  266
  267%!  list_item_prefix(?Type, +Line, -Rest) is det.
  268
  269list_item_prefix(ul, [*, ' '|T], T) :- !.
  270list_item_prefix(ul, [-, ' '|T], T) :- !.
  271list_item_prefix(dl, [$, ' '|T], T) :-
  272    split_dt(T, _, _),
  273    !.
  274list_item_prefix(ol, [w(N), '.', ' '|T], T) :-
  275    atom_codes(N, [D]),
  276    between(0'0, 0'9, D).
  277
  278%!  split_dt(+LineAfterDollar, -DT, -Rest)
  279%
  280%   First see whether the entire line is the item. This allows
  281%   creating items holding : by using $ <tokens> :\n
  282
  283split_dt(In, DT, []) :-
  284    append(DT, [':'], In),
  285    !.
  286split_dt(In, DT, Rest) :-
  287    append(DT, [':'|Rest0], In),
  288    (   Rest0 == []
  289    ->  Rest = []
  290    ;   Rest0 = [' '|Rest]
  291    ),
  292    !.
  293
  294
  295%!  ul_to_dl(+UL, -DL) is semidet.
  296%
  297%   Translate an UL list into a DL list   if  all entries are of the
  298%   form "* <term> nl, <description>" and at least one <description>
  299%   is   non-empty,   or    all    items     are    of    the   form
  300%   [[PredicateIndicator]].
  301
  302ul_to_dl(ul(Items), Description) :-
  303    term_items(Items, DLItems, []),
  304    (   terms_to_predicate_includes(DLItems, Preds)
  305    ->  Description = dl(class(predicates), Preds)
  306    ;   member(dd(DD), DLItems), DD \== []
  307    ->  Description = dl(class(termlist), DLItems)
  308    ).
  309
  310term_items([], T, T).
  311term_items([LI|LIs], DLItems, Tail) :-
  312    term_item(LI, DLItems, Tail1),
  313    term_items(LIs, Tail1, Tail).
  314
  315%!  term_item(+LI, -DLItem, ?Tail) is semidet.
  316%
  317%   If LI is of the form <Term> followed  by a newline, return it as
  318%   dt-dd  tuple.  The  <dt>  item    contains  a  term
  319%
  320%       \term(Text, Term, Bindings).
  321
  322term_item(li(Tokens),
  323          [ dt(class=term, \term(Text, Term, Bindings)),
  324            dd(Descr)
  325          | Tail
  326          ], Tail) :-
  327    (   (   append(TermTokens, ['\n'|Descr], Tokens)
  328        ->  true
  329        ;   TermTokens = Tokens,
  330            Descr = []
  331        )
  332    ->  with_output_to(string(Tmp),
  333                       ( forall(member(T, TermTokens),
  334                                write_token(T)),
  335                         write(' .\n'))),
  336        E = error(_,_),
  337        catch(setup_call_cleanup(
  338                  open_string(Tmp, In),
  339                  ( read_dt_term(In, Term, Bindings),
  340                    read_dt_term(In, end_of_file, []),
  341                    atom_string(Text, Tmp)
  342                  ),
  343                  close(In)),
  344              E, fail)
  345    ).
  346
  347write_token(w(X)) :-
  348    !,
  349    write(X).
  350write_token(X) :-
  351    write(X).
  352
  353read_dt_term(In, Term, Bindings) :-
  354    read_term(In, Term,
  355              [ variable_names(Bindings),
  356                module(pldoc_modes)
  357              ]).
  358
  359terms_to_predicate_includes([], []).
  360terms_to_predicate_includes([dt(class=term, \term(_, [[PI]], [])), dd([])|T0],
  361                            [\include(PI, predicate, [])|T]) :-
  362    is_pi(PI),
  363    terms_to_predicate_includes(T0, T).
  364
  365is_pi(Name/Arity) :-
  366    atom(Name),
  367    integer(Arity),
  368    between(0, 20, Arity).
  369is_pi(Name//Arity) :-
  370    atom(Name),
  371    integer(Arity),
  372    between(0, 20, Arity).
  373
  374
  375%!  row(-Cells)// is det.
  376
  377row([C0|CL]) -->
  378    cell(C0),
  379    !,
  380    row(CL).
  381row([]) -->
  382    [].
  383
  384cell(td(C)) -->
  385    face_tokens(C0),
  386    ['|'],
  387    !,
  388    { strip_ws_tokens(C0, C)
  389    }.
  390
  391face_tokens([]) -->
  392    [].
  393face_tokens(Tokens) -->
  394    face_token(H),                          % Deal with embedded *|...|*, etc.
  395    token('|'),
  396    face_tokens(Face),
  397    token('|'),
  398    face_token(H),
  399    !,
  400    { append([[H,'|'], Face, ['|', H], Rest], Tokens) },
  401    face_tokens(Rest).
  402face_tokens([H|T]) -->
  403    token(H),
  404    face_tokens(T).
  405
  406face_token(=) --> [=].
  407face_token(*) --> [*].
  408face_token('_') --> ['_'].
  409
  410take_table(Lines, Indent, Row0, Table, Rest) :-
  411    rest_table(Lines, Indent, Rows, Rest),
  412    (   Rows = [align(Align)|Rows1]
  413    ->  maplist(align_row(Align), Rows1, Rows2),
  414        (   maplist(=(td([])), Row0)        % empty header
  415        ->  Table = table(class=wiki, Rows2)
  416        ;   maplist(td_to_th, Row0, Header),
  417            Table = table(class=wiki, [tr(Header)|Rows2])
  418        )
  419    ;   Table = table(class=wiki, [tr(Row0)|Rows])
  420    ).
  421
  422td_to_th(td(X), th(X)) :- !.
  423td_to_th(X, X).
  424
  425align_row(Align, tr(Row0), tr(Row)) :-
  426    align_cells(Align, Row0, Row).
  427
  428align_cells([Align|AT], [Cell0|T0], [Cell|T]) :-
  429    align_cell(Align, Cell0, Cell),
  430    align_cells(AT, T0, T).
  431align_cells(_, Cells, Cells).
  432
  433align_cell(Align, td(Content), td(class=Align, Content)).
  434
  435%!  rest_table(+Lines, +Indent, -Rows, -RestLines).
  436
  437rest_table([N-Line|LT], N, [align(Align)|RL], Rest) :-
  438    phrase(column_alignment(Align), Line),
  439    !,
  440    rest_table2(LT, N, RL, Rest).
  441rest_table(Lines, N, RL, Rest) :-
  442    rest_table2(Lines, N, RL, Rest).
  443
  444rest_table2([N-['|'|RL1]|LT], N, [tr(R0)|RL], Rest) :-
  445    !,
  446    phrase(row(R0), RL1),
  447    rest_table2(LT, N, RL, Rest).
  448rest_table2(Rest, _, [], Rest).
  449
  450%!  column_alignment(-Alignment) is semidet.
  451%
  452%   Process an alignment line.
  453
  454column_alignment([H|T]) -->
  455    ['|'],
  456    (   colspec(H)
  457    ->  column_alignment(T)
  458    ;   {T=[]}
  459    ).
  460
  461colspec(Align) -->
  462    ws_tokens, [':'], dashes3,
  463    (   [':']
  464    ->  {Align = center}
  465    ;   {Align = left}
  466    ),
  467    ws_tokens.
  468colspec(Align) -->
  469    ws_tokens, dashes3,
  470    (   [':']
  471    ->  {Align = right}
  472    ;   {Align = left}
  473    ),
  474    ws_tokens.
  475
  476dashes3 -->
  477    [-,-,-],
  478    dashes.
  479
  480dashes --> [-], !, dashes.
  481dashes --> [].
  482
  483ws_tokens --> [' '], !, ws_tokens.
  484ws_tokens --> [].
  485
  486%!  rest_par(+Lines, -Par,
  487%!           +BaseIndent, +MaxI0, -MaxI, -RestLines) is det.
  488%
  489%   Take the rest of a paragraph. Paragraphs   are  ended by a blank
  490%   line or the start of a list-item.   The latter is a bit dubious.
  491%   Why not a general  block-level   object?  The current definition
  492%   allows for writing lists without a blank line between the items.
  493
  494rest_par([], [], BI, MaxI0, MaxI, []) :-
  495    !,
  496    MaxI is max(BI, MaxI0).
  497rest_par([_-[]|Rest], [], _, MaxI, MaxI, Rest) :- !.
  498rest_par(Lines, [], _, MaxI, MaxI, Lines) :-
  499    Lines = [_-Verb|_],
  500    verbatim_term(Verb),
  501    !.
  502rest_par([I-L|Rest], [], _, MaxI, MaxI, [I-L|Rest]) :-
  503    list_item_prefix(_, L, _),
  504    !.
  505rest_par([I-L1|LT], ['\n'|Par], BI, MaxI0, MaxI, Rest) :-
  506    append(L1, PT, Par),
  507    MaxI1 is max(I, MaxI0),
  508    rest_par(LT, PT, BI, MaxI1, MaxI, Rest).
  509
  510
  511%!  section_header(+Lines, -Section, -RestLines) is semidet.
  512%
  513%   Get a section line from the input.
  514
  515section_header([_-L1|LT], Section, LT) :-
  516    twiki_section_line(L1, Section),
  517    !.
  518section_header([0-L1|LT], Section, LT) :-
  519    md_section_line(L1, Section),
  520    !.
  521section_header([_-L1,0-L2|LT], Section, LT) :-
  522    md_section_line(L1, L2, Section),
  523    !.
  524
  525%!  twiki_section_line(+Tokens, -Section) is semidet.
  526%
  527%   Extract a section using the Twiki   conventions. The section may
  528%   be preceeded by [Word], in which case we generate an anchor name
  529%   Word for the section.
  530
  531twiki_section_line([-,-,-|Rest], Section) :-
  532    plusses(Rest, Section).
  533
  534plusses([+, ' '|Rest], h1(Attrs, Content)) :-
  535    hdr_attributes(Rest, Attrs, Content).
  536plusses([+, +, ' '|Rest], h2(Attrs, Content)) :-
  537    hdr_attributes(Rest, Attrs, Content).
  538plusses([+, +, +, ' '|Rest], h3(Attrs, Content)) :-
  539    hdr_attributes(Rest, Attrs, Content).
  540plusses([+, +, +, +, ' '|Rest], h4(Attrs, Content)) :-
  541    hdr_attributes(Rest, Attrs, Content).
  542
  543hdr_attributes(List, Attrs, Content) :-
  544    strip_leading_ws(List, List2),
  545    (   List2 = ['[',w(Name),']'|List3]
  546    ->  strip_ws_tokens(List3, Content),
  547        Attrs = [class(wiki), id(Name)]
  548    ;   Attrs = class(wiki),
  549        strip_ws_tokens(List, Content)
  550    ).
  551
  552%!  md_section_line(+Tokens, -Section) is semidet.
  553%
  554%   Handle markdown section lines staring with #
  555
  556md_section_line([#, ' '|Rest], h1(Attrs, Content)) :-
  557    md_section_attributes(Rest, Attrs, Content).
  558md_section_line([#, #, ' '|Rest], h2(Attrs, Content)) :-
  559    md_section_attributes(Rest, Attrs, Content).
  560md_section_line([#, #, #, ' '|Rest], h3(Attrs, Content)) :-
  561    md_section_attributes(Rest, Attrs, Content).
  562md_section_line([#, #, #, #, ' '|Rest], h4(Attrs, Content)) :-
  563    md_section_attributes(Rest, Attrs, Content).
  564
  565md_section_attributes(List, Attrs, Content) :-
  566    phrase((tokens(Content), [' '], section_label(Label)), List),
  567    !,
  568    Attrs = [class(wiki), id(Label)].
  569md_section_attributes(Content, Attrs, Content) :-
  570    Attrs = [class(wiki)].
  571
  572section_label(Label) -->
  573    [ '{', '#', w(Name) ],
  574    label_conts(Cont), ['}'],
  575    !,
  576    { atomic_list_concat([Name|Cont], Label) }.
  577
  578label_conts([H|T]) --> label_cont(H), !, label_conts(T).
  579label_conts([]) --> [].
  580
  581label_cont(-) --> [-].
  582label_cont(Name) --> [w(Name)].
  583
  584
  585md_section_line(Line1, Line2, Header) :-
  586    Line1 \== [],
  587    section_underline(Line2, Type),
  588    is_list(Line1),
  589    phrase(wiki_words(_), Line1),  % Should not have structure elements
  590    !,
  591    (   phrase(labeled_section_line(Title, Attrs), Line1)
  592    ->  true
  593    ;   Title = Line1,
  594        Attrs = []
  595    ),
  596    Header =.. [Type, [class(wiki)|Attrs], Title].
  597
  598section_underline([=,=,=|T], h1) :-
  599    maplist(=(=), T),
  600    !.
  601section_underline([-,-,-|T], h2) :-
  602    maplist(=(-), T),
  603    !.
  604
  605labeled_section_line(Title, Attrs) -->
  606    tokens(Title), [' '], section_label(Label),
  607    !,
  608    { Attrs = [id(Label)] }.
  609
  610
  611%!  strip_ws_tokens(+Tokens, -Stripped)
  612%
  613%   Strip leading and trailing whitespace from a token list.  Note
  614%   the the whitespace is already normalised.
  615
  616strip_ws_tokens([' '|T0], T) :-
  617    !,
  618    strip_ws_tokens(T0, T).
  619strip_ws_tokens(L0, L) :-
  620    append(L, [' '], L0),
  621    !.
  622strip_ws_tokens(L, L).
  623
  624
  625%!  strip_leading_ws(+Tokens, -Stripped) is det.
  626%
  627%   Strip leading whitespace from a token list.
  628
  629strip_leading_ws([' '|T], T) :- !.
  630strip_leading_ws(T, T).
  631
  632
  633                 /*******************************
  634                 *             TAGS             *
  635                 *******************************/
  636
  637%!  tags(+Lines:lines, -Tags) is semidet.
  638%
  639%   If the first line is a @tag, read the remainder of the lines to
  640%   a list of \tag(Name, Value) terms.
  641
  642tags(Lines, Tags) :-
  643    collect_tags(Lines, Tags0),
  644    keysort(Tags0, Tags1),
  645    pairs_values(Tags1, Tags2),
  646    combine_tags(Tags2, Tags).
  647
  648%!  collect_tags(+IndentedLines, -Tags) is semidet
  649%
  650%   Create a list Order-tag(Tag,Tokens) for   each @tag encountered.
  651%   Order is the desired position as defined by tag_order/2.
  652%
  653%   @tbd Tag content is  often  poorly   aligned.  We  now  find the
  654%   alignment of subsequent lines  and  assume   the  first  line is
  655%   alligned with the remaining lines.
  656
  657collect_tags([], []).
  658collect_tags([Indent-[@,String|L0]|Lines], [Order-tag(Tag,Value)|Tags]) :-
  659    tag_name(String, Tag, Order),
  660    !,
  661    strip_leading_ws(L0, L),
  662    rest_tag(Lines, Indent, VT, RestLines),
  663    normalise_indentation(VT, VT1),
  664    wiki_structure([0-L|VT1], -1, Value0),
  665    strip_leading_par(Value0, Value),
  666    collect_tags(RestLines, Tags).
  667
  668
  669%!  tag_name(+String, -Tag:atom, -Order:int) is semidet.
  670%
  671%   If String denotes a know tag-name,
  672
  673tag_name(w(Name), Tag, Order) :-
  674    (   renamed_tag(Name, Tag, Level),
  675        tag_order(Tag, Order)
  676    ->  print_message(Level, pldoc(deprecated_tag(Name, Tag)))
  677    ;   tag_order(Name, Order)
  678    ->  Tag = Name
  679    ;   print_message(warning, pldoc(unknown_tag(Name))),
  680        fail
  681    ).
  682
  683
  684rest_tag([], _, [], []) :- !.
  685rest_tag(Lines, Indent, [], Lines) :-
  686    Lines = [Indent-[@,Word|_]|_],
  687    tag_name(Word, _, _),
  688    !.
  689rest_tag([L|Lines0], Indent, [L|VT], Lines) :-
  690    rest_tag(Lines0, Indent, VT, Lines).
  691
  692
  693%!  renamed_tag(+DeprecatedTag:atom, -Tag:atom, -Warn) is semidet.
  694%
  695%   Declaration for deprecated tags.
  696
  697renamed_tag(exception, throws, warning).
  698renamed_tag(param,     arg,    silent).
  699
  700
  701%!  tag_order(+Tag:atom, -Order:int) is semidet.
  702%
  703%   Both declares the know tags and  their expected order. Currently
  704%   the tags are forced into  this   order  without  warning. Future
  705%   versions may issue a warning if the order is inconsistent.
  706
  707:- multifile
  708    pldoc:tag_order/2.  709
  710tag_order(Tag, Order) :-
  711    pldoc:tag_order(Tag, Order),
  712    !.
  713tag_order(arg,         100).
  714tag_order(error,       200).            % same as throw
  715tag_order(throws,      300).
  716tag_order(author,      400).
  717tag_order(version,     500).
  718tag_order(see,         600).
  719tag_order(deprecated,  700).
  720tag_order(compat,      800).            % PlDoc extension
  721tag_order(copyright,   900).
  722tag_order(license,    1000).
  723tag_order(bug,        1100).
  724tag_order(tbd,        1200).
  725tag_order(since,      1300).
  726
  727%!  combine_tags(+Tags:list(tag(Key, Value)), -Tags:list) is det.
  728%
  729%   Creates the final tag-list.  Tags is a list of
  730%
  731%           * \params(list(param(Name, Descr)))
  732%           * \tag(Name, list(Descr))
  733%
  734%   Descr is a list of tokens.
  735
  736combine_tags([], []).
  737combine_tags([tag(arg, V1)|T0], [\args([P1|PL])|Tags]) :-
  738    !,
  739    arg_tag(V1, P1),
  740    arg_tags(T0, PL, T1),
  741    combine_tags(T1, Tags).
  742combine_tags([tag(Tag,V0)|T0], [\tag(Tag, [V0|Vs])|T]) :-
  743    same_tag(Tag, T0, T1, Vs),
  744    combine_tags(T1, T).
  745
  746arg_tag([PT|Descr0], arg(PN, Descr)) :-
  747    word_of(PT, PN),
  748    strip_leading_ws(Descr0, Descr).
  749
  750word_of(w(W), W) :- !.                  % TBD: check non-word arg
  751word_of(W, W).
  752
  753arg_tags([tag(arg, V1)|T0], [P1|PL], T) :-
  754    !,
  755    arg_tag(V1, P1),
  756    arg_tags(T0, PL, T).
  757arg_tags(T, [], T).
  758
  759same_tag(Tag, [tag(Tag, V)|T0], T, [V|Vs]) :-
  760    !,
  761    same_tag(Tag, T0, T, Vs).
  762same_tag(_, L, L, []).
  763
  764
  765                 /*******************************
  766                 *             FACES            *
  767                 *******************************/
  768
  769%!  wiki_faces(+Structure, +ArgNames, -HTML) is det.
  770%
  771%   Given the wiki structure, analyse the content of the paragraphs,
  772%   list items and table cells and apply font faces and links.
  773
  774wiki_faces([dt(Class, \term(Text, Term, Bindings)), dd(Descr0)|T0],
  775           ArgNames,
  776           [dt(Class, \term(Text, Term, Bindings)), dd(Descr)|T]) :-
  777    !,
  778    varnames(Bindings, VarNames, ArgNames),
  779    wiki_faces(Descr0, VarNames, Descr),
  780    wiki_faces(T0, ArgNames, T).
  781wiki_faces(DOM0, ArgNames, DOM) :-
  782    structure_term(DOM0, Functor, Content0),
  783    !,
  784    wiki_faces_list(Content0, ArgNames, Content),
  785    structure_term(DOM, Functor, Content).
  786wiki_faces(Verb, _, Verb) :-
  787    verbatim_term(Verb),
  788    !.
  789wiki_faces(Content0, ArgNames, Content) :-
  790    assertion(is_list(Content0)),
  791    phrase(wiki_faces(Content, ArgNames), Content0),
  792    !.
  793
  794varnames([], List, List).
  795varnames([Name=_|T0], [Name|T], List) :-
  796    varnames(T0, T, List).
  797
  798wiki_faces_list([], _, []).
  799wiki_faces_list([H0|T0], Args, [H|T]) :-
  800    wiki_faces(H0, Args, H),
  801    wiki_faces_list(T0, Args, T).
  802
  803%!  structure_term(+Term, -Functor, -Content) is semidet.
  804%!  structure_term(-Term, +Functor, +Content) is det.
  805%
  806%   (Un)pack a term describing structure, so  we can process Content
  807%   and re-pack the structure.
  808
  809structure_term(\tags(Tags), tags, [Tags]) :- !.
  810structure_term(\args(Params), args, [Params]) :- !.
  811structure_term(arg(Name,Descr), arg(Name), [Descr]) :- !.
  812structure_term(\tag(Name,Value), tag(Name), [Value]) :- !.
  813structure_term(\include(What,Type,Opts), include(What,Type,Opts), []) :- !.
  814structure_term(dl(Att, Args), dl(Att), [Args]) :- !.
  815structure_term(dt(Att, Args), dt(Att), [Args]) :- !.
  816structure_term(table(Att, Args), table(Att), [Args]) :- !.
  817structure_term(td(Att, Args), td(Att), [Args]) :- !.
  818structure_term(h1(Att, Args), h1(Att), [Args]) :- !.
  819structure_term(h2(Att, Args), h2(Att), [Args]) :- !.
  820structure_term(h3(Att, Args), h3(Att), [Args]) :- !.
  821structure_term(h4(Att, Args), h4(Att), [Args]) :- !.
  822structure_term(hr(Att), hr(Att), []) :- !.
  823structure_term(p(Args), p, [Args]) :- !.
  824structure_term(Term, Functor, Args) :-
  825    structure_term_any(Term, Functor, Args).
  826
  827structure_term(Term) :-
  828    structure_term_any(Term, _Functor, _Args).
  829
  830structure_term_any(Term, Functor, Args) :-
  831    functor(Term, Functor, 1),
  832    structure_tag(Functor),
  833    !,
  834    Term =.. [Functor|Args].
  835
  836structure_tag(ul).
  837structure_tag(ol).
  838structure_tag(dl).
  839structure_tag(li).
  840structure_tag(dt).
  841structure_tag(dd).
  842structure_tag(table).
  843structure_tag(tr).
  844structure_tag(td).
  845structure_tag(th).
  846structure_tag(blockquote).
  847structure_tag(center).
  848
  849
  850%!  verbatim_term(?Term) is det
  851%
  852%   True if Term must be passes verbatim.
  853
  854verbatim_term(pre(_,_)).
  855verbatim_term(\term(_,_,_)).
  856
  857%!  matches(:Goal, -Input, -Last)//
  858%
  859%   True when Goal runs successfully on the DCG input and Input
  860%   is the list of matched tokens.
  861
  862:- meta_predicate matches(2, -, -, ?, ?).  863
  864matches(Goal, Input, Last, List, Rest) :-
  865    call(Goal, List, Rest),
  866    input(List, Rest, Input, Last).
  867
  868input([H|T0], Rest, Input, Last) :-
  869    (   T0 == Rest
  870    ->  Input = [H],
  871        Last = H
  872    ;   Input = [H|T],
  873        input(T0, Rest, T, Last)
  874    ).
  875
  876
  877%!  wiki_faces(-WithFaces, +ArgNames)// is nondet.
  878%!  wiki_faces(-WithFaces, +ArgNames, +Options)// is nondet.
  879%
  880%   Apply font-changes and automatic  links   to  running  text. The
  881%   faces are applied after discovering   the structure (paragraphs,
  882%   lists, tables, keywords).
  883%
  884%   @arg Options is a dict, minimally containing `depth`
  885
  886wiki_faces(WithFaces, ArgNames, List, Rest) :-
  887    default_faces_options(Options),
  888    catch(wiki_faces(WithFaces, ArgNames, Options, List, Rest),
  889          pldoc(depth_limit),
  890          failed_faces(WithFaces, List, Rest)).
  891
  892default_faces_options(_{depth:5}).
  893
  894failed_faces(WithFaces) -->
  895    { debug(markdown(overflow), 'Depth limit exceeded', []) },
  896    wiki_words(WithFaces).
  897
  898wiki_faces([EmphTerm|T], ArgNames, Options) -->
  899    emphasis_seq(EmphTerm, ArgNames, Options),
  900    !,
  901    wiki_faces_int(T, ArgNames).
  902wiki_faces(Faces, ArgNames, Options) -->
  903    wiki_faces_int(Faces, ArgNames, Options).
  904
  905wiki_faces_int(WithFaces, ArgNames) -->
  906    { default_faces_options(Options)
  907    },
  908    wiki_faces_int(WithFaces, ArgNames, Options).
  909
  910wiki_faces_int([], _, _) -->
  911    [].
  912wiki_faces_int(List, ArgNames, Options) -->
  913    wiki_face(H, ArgNames, Options),
  914    !,
  915    {   is_list(H)
  916    ->  append(H, T, List)
  917    ;   List = [H|T]
  918    },
  919    wiki_faces(T, ArgNames, Options).
  920wiki_faces_int([Before,EmphTerm|T], ArgNames, Options) -->
  921    emphasis_before(Before),
  922    emphasis_seq(EmphTerm, ArgNames, Options),
  923    !,
  924    wiki_faces_int(T, ArgNames, Options).
  925wiki_faces_int([H|T], ArgNames, Options) -->
  926    wiki_face_simple(H, ArgNames, Options),
  927    !,
  928    wiki_faces_int(T, ArgNames, Options).
  929
  930next_level(Options0, Options) -->
  931    {   succ(NewDepth, Options0.depth)
  932    ->  Options = Options0.put(depth, NewDepth)
  933    ;   throw(pldoc(depth_limit))
  934    }.
  935
  936%!  prolog:doc_wiki_face(-Out, +VarNames)// is semidet.
  937%!  prolog:doc_wiki_face(-Out, +VarNames, +Options0)// is semidet.
  938%
  939%   Hook that can be  used  to   provide  additional  processing for
  940%   additional _inline_ wiki constructs.  The DCG list is a list of
  941%   tokens.  Defined tokens are:
  942%
  943%     - w(Atom)
  944%     Recognised word (alphanumerical)
  945%     - Atom
  946%     Single character atom representing punctuation marks or the
  947%     atom =|' '|= (space), representing white-space.
  948%
  949%   The  Out  variable  is  input  for    the  backends  defined  in
  950%   doc_latex.pl and doc_html.pl. Roughly, these   are terms similar
  951%   to what html//1 from library(http/html_write) accepts.
  952
  953wiki_face(Out, Args, _) -->
  954    prolog:doc_wiki_face(Out, Args),
  955    !.
  956wiki_face(var(Arg), ArgNames, _) -->
  957    [w(Arg)],
  958    { memberchk(Arg, ArgNames)
  959    },
  960    !.
  961wiki_face(b(Bold), ArgNames, Options) -->
  962    [*,'|'], string(Tokens), ['|',*],
  963    !,
  964    { phrase(wiki_faces(Bold, ArgNames, Options), Tokens) }.
  965wiki_face(i(Italic), ArgNames, Options) -->
  966    ['_','|'], string(Tokens), ['|','_'],
  967    !,
  968    { phrase(wiki_faces(Italic, ArgNames, Options), Tokens) }.
  969wiki_face(strong(Strong), ArgNames, Options) -->
  970    ['_','_'], string(Tokens), ['_','_'],
  971    !,
  972    { phrase(wiki_faces(Strong, ArgNames, Options), Tokens) }.
  973wiki_face(strong(Strong), ArgNames, Options) -->
  974    ['*','*'], string(Tokens), ['*','*'],
  975    !,
  976    { phrase(wiki_faces(Strong, ArgNames, Options), Tokens) }.
  977wiki_face(code(Code), _, _) -->
  978    [=], eq_code_words(Words), [=],
  979    !,
  980    { atomic_list_concat(Words, Code) }.
  981wiki_face(code(Code), _, _) -->
  982    [=,'|'], wiki_words(Code), ['|',=],
  983    !.
  984wiki_face(PredRef, _, _) -->
  985    ['`'], take_predref(PredRef), ['`'],
  986    !.
  987wiki_face(\nopredref(Pred), _, _) -->
  988    ['`', '`'], take_predref(\predref(Pred)), ['`', '`'],
  989    !.
  990wiki_face([flag, ' ', \flagref(Flag)], _, _) -->
  991    [ w('flag'), ' ', '`', w(Flag), '`' ],
  992    { current_prolog_flag(Flag, _) },
  993    !.
  994wiki_face(code(Code), _, _) -->
  995    ['`','`'], wiki_words(Code), ['`','`'],
  996    !.
  997wiki_face(Code, _, _) -->
  998    (   ['`'], code_words(Words), ['`']
  999    ->  { atomic_list_concat(Words, Text),
 1000          E = error(_,_),
 1001          catch(atom_to_term(Text, Term, Vars), E, fail),
 1002          !,
 1003          code_face(Text, Term, Vars, Code)
 1004        }
 1005    ).
 1006wiki_face(Face, _, Options) -->
 1007    [ w(Name) ], arg_list(List),
 1008    { atomic_list_concat([Name|List], Text),
 1009      E = error(_,_),
 1010      catch(atom_to_term(Text, Term, Vars), E, fail),
 1011      term_face(Text, Term, Vars, Face, Options)
 1012    },
 1013    !.
 1014wiki_face(br([]), _, _) -->
 1015    [<,w(br),>,'\n'], !.
 1016wiki_face(br([]), _, _) -->
 1017    [<,w(br),/,>,'\n'], !.
 1018        % Below this, we only do links.
 1019wiki_face(_, _, Options) -->
 1020    { Options.get(link) == false,
 1021      !,
 1022      fail
 1023    }.
 1024wiki_face(PredRef, _, _) -->
 1025    take_predref(PredRef),
 1026    !.
 1027wiki_face(\cite(Citations), _, _) -->
 1028    ['['], citations(Citations), [']'].
 1029wiki_face(\include(Name, Type, Options), _, _) -->
 1030    ['[','['], file_name(Base, Ext), [']',']'],
 1031    { autolink_extension(Ext, Type),
 1032      !,
 1033      file_name_extension(Base, Ext, Name),
 1034      resolve_file(Name, Options, [])
 1035    },
 1036    !.
 1037wiki_face(\include(Name, Type, [caption(Caption)|Options]), _, _) -->
 1038    (   ['!','['], tokens(100, Caption), [']','(']
 1039    ->  file_name(Base, Ext), [')'],
 1040        { autolink_extension(Ext, Type),
 1041          !,
 1042          file_name_extension(Base, Ext, Name),
 1043          resolve_file(Name, Options, [])
 1044        }
 1045    ),
 1046    !.
 1047wiki_face(Link, ArgNames, Options) -->          % TWiki: [[Label][Link]]
 1048    (   ['[','['], wiki_label(Label, ArgNames, Options), [']','[']
 1049    ->  wiki_link(Link, [label(Label), relative(true), end(']')]),
 1050        [']',']'], !
 1051    ).
 1052wiki_face(Link, ArgNames, Options) -->          % Markdown: [Label](Link)
 1053    (   ['['], wiki_label(Label, ArgNames, Options), [']','(']
 1054    ->  wiki_link(Link, [label(Label), relative(true), end(')')]),
 1055        [')'], !
 1056    ).
 1057wiki_face(Link, _ArgNames, _) -->
 1058    wiki_link(Link, []),
 1059    !.
 1060
 1061wiki_label(Label, _ArgNames, _Options) -->
 1062    image_label(Label).
 1063wiki_label(Label, ArgNames, Options) -->
 1064    next_level(Options, NOptions),
 1065    limit(40, wiki_faces(Label, ArgNames, NOptions.put(link,false))).
 1066
 1067%!  wiki_face_simple(-Out, +ArgNames, +Options)
 1068%
 1069%   Skip simple (non-markup) wiki.
 1070
 1071wiki_face_simple(Word, _, _) -->
 1072    [ w(Word) ],
 1073    !.
 1074wiki_face_simple(SpaceOrPunct, _, _) -->
 1075    [ SpaceOrPunct ],
 1076    { atomic(SpaceOrPunct) },
 1077    !.
 1078wiki_face_simple(FT, ArgNames, _) -->
 1079    [Structure],
 1080    { wiki_faces(Structure, ArgNames, FT)
 1081    }.
 1082
 1083wiki_words([]) --> [].
 1084wiki_words([Word|T]) --> [w(Word)], !, wiki_words(T).
 1085wiki_words([Punct|T]) --> [Punct], {atomic(Punct)}, wiki_words(T).
 1086
 1087%!  code_words(-Words)//
 1088%
 1089%   True when Words is the  content   as  it  appears in =|`code`|=,
 1090%   where =|``|= is mapped to =|`|=.
 1091
 1092code_words([]) --> [].
 1093code_words([Word|T]) --> [w(Word)], code_words(T).
 1094code_words(CodeL) --> ['`','`'], {CodeL = ['`'|T]}, code_words(T).
 1095code_words([Punct|T]) --> [Punct], {atomic(Punct)}, code_words(T).
 1096
 1097%!  eq_code_words(-Words)//
 1098%
 1099%   Stuff that can be between single `=`.  This is limited to
 1100%
 1101%           - Start and end must be a word
 1102%           - In between may be the following punctuation chars:
 1103%             =|.-:/|=, notably dealing with file names and
 1104%             identifiers in various external languages.
 1105
 1106eq_code_words([Word]) -->
 1107    [ w(Word) ].
 1108eq_code_words([Word|T]) -->
 1109    [ w(Word) ], eq_code_internals(T, [End]), [w(End)].
 1110
 1111eq_code_internals(T, T) --> [].
 1112eq_code_internals([H|T], Tail) -->
 1113    eq_code_internal(H),
 1114    eq_code_internals(T, Tail).
 1115
 1116eq_code_internal(Word) -->
 1117    [w(Word)].
 1118eq_code_internal(Punct) -->
 1119    [Punct],
 1120    { eq_code_internal_punct(Punct) }.
 1121
 1122eq_code_internal_punct('.').
 1123eq_code_internal_punct('-').
 1124eq_code_internal_punct(':').
 1125eq_code_internal_punct('/').
 1126
 1127
 1128%!  code_face(+Text, +Term, +Vars, -Code) is det.
 1129%
 1130%   Deal with =|`... code ...`|=  sequences.   Text  is  the matched
 1131%   text, Term is the parsed Prolog term   and Code is the resulting
 1132%   intermediate code.
 1133
 1134code_face(Text, Var, _, Code) :-
 1135    var(Var),
 1136    !,
 1137    Code = var(Text).
 1138code_face(Text, _, _, code(Text)).
 1139
 1140
 1141%!  emphasis_seq(-Out, +ArgNames, +Options) is semidet.
 1142%
 1143%   Recognise emphasis sequences
 1144
 1145emphasis_seq(EmphTerm, ArgNames, Options) -->
 1146    emphasis_start(C),
 1147    next_level(Options, NOptions),
 1148    matches(limit(100, wiki_faces(Emph, ArgNames, NOptions)), Input, Last),
 1149    emphasis_end(C),
 1150    { emph_markdown(Last, Input),
 1151      emphasis_term(C, Emph, EmphTerm)
 1152    },
 1153    !.
 1154
 1155
 1156%!  emphasis_term(+Emphasis, +Tokens, -Term) is det.
 1157%!  emphasis_before(-Before)// is semidet.
 1158%!  emphasis_start(-Emphasis)// is semidet.
 1159%!  emphasis_end(+Emphasis)// is semidet.
 1160%
 1161%   Primitives for Doxygen emphasis handling.
 1162
 1163emphasis_term('_',   Term, i(Term)).
 1164emphasis_term('*',   Term, b(Term)).
 1165
 1166emph_markdown(_, [w(_)]) :- !.
 1167emph_markdown(Last, Tokens) :-
 1168    \+ emphasis_after_sep(Last),
 1169    E = error(_,_),
 1170    catch(b_getval(pldoc_object, Obj), E, Obj = '??'),
 1171    debug(markdown(emphasis), '~q: additionally emphasis: ~p',
 1172          [Obj, Tokens]).
 1173
 1174emphasis_before(Before) -->
 1175    [Before],
 1176    { emphasis_start_sep(Before) }.
 1177
 1178emphasis_start_sep('\n').
 1179emphasis_start_sep(' ').
 1180emphasis_start_sep('<').
 1181emphasis_start_sep('{').
 1182emphasis_start_sep('(').
 1183emphasis_start_sep('[').
 1184emphasis_start_sep(',').
 1185emphasis_start_sep(':').
 1186emphasis_start_sep(';').
 1187
 1188emphasis_start(Which), [w(Word)] -->
 1189    emphasis(Which),
 1190    [w(Word)].
 1191
 1192emphasis(**)   --> [*, *].
 1193emphasis(*)    --> [*].
 1194emphasis('__') --> ['_', '_'].
 1195emphasis('_')  --> ['_'].
 1196
 1197emphasis_end(Which), [After] -->
 1198    emphasis(Which),
 1199    [ After ],
 1200    !,
 1201    { emphasis_close_sep(After) -> true }.
 1202emphasis_end(Which) -->
 1203    emphasis(Which).
 1204
 1205% these characters should not be before a closing * or _.
 1206
 1207emphasis_after_sep('\n').
 1208emphasis_after_sep(' ').
 1209emphasis_after_sep('(').
 1210emphasis_after_sep('[').
 1211emphasis_after_sep('<').
 1212emphasis_after_sep('=').
 1213emphasis_after_sep('+').
 1214emphasis_after_sep('\\').
 1215emphasis_after_sep('@').
 1216
 1217emphasis_close_sep('\n').                       % white
 1218emphasis_close_sep(' ').                        % white
 1219emphasis_close_sep(',').                        % sentence punctuation
 1220emphasis_close_sep('.').
 1221emphasis_close_sep('!').
 1222emphasis_close_sep('?').
 1223emphasis_close_sep(':').
 1224emphasis_close_sep(';').
 1225emphasis_close_sep(']').                        % [**label**](link)
 1226emphasis_close_sep(')').                        % ... _italic_)
 1227emphasis_close_sep('}').                        % ... _italic_}
 1228emphasis_close_sep(Token) :-
 1229    structure_term(Token).
 1230
 1231
 1232%!  arg_list(-Atoms) is nondet.
 1233%
 1234%   Atoms  is  a  token-list  for  a    Prolog   argument  list.  An
 1235%   argument-list is a sequence of tokens '(' ... ')'.
 1236%
 1237%   @bug    the current implementation does not deal correctly with
 1238%           brackets that are embedded in quoted strings.
 1239
 1240arg_list(['('|T]) -->
 1241    ['('], arg_list_close(T, 1).
 1242
 1243arg_list_close(Tokens, Depth) -->
 1244    [')'],
 1245    !,
 1246    (   { Depth == 1 }
 1247    ->  { Tokens = [')'] }
 1248    ;   { Depth > 1 }
 1249    ->  { Tokens = [')'|More],
 1250          NewDepth is Depth - 1
 1251        },
 1252        arg_list_close(More, NewDepth)
 1253    ).
 1254arg_list_close(['('|T], Depth) -->
 1255    ['('], { NewDepth is Depth+1 },
 1256    arg_list_close(T, NewDepth).
 1257arg_list_close([H|T], Depth) -->
 1258    [w(H)],
 1259    !,
 1260    arg_list_close(T, Depth).
 1261arg_list_close([H|T], Depth) -->
 1262    [H],
 1263    arg_list_close(T, Depth).
 1264
 1265
 1266%!  term_face(+Text, +Term, +Vars, -Face, +Options) is semidet.
 1267%
 1268%   Process embedded Prolog-terms. Currently   processes  Alias(Arg)
 1269%   terms that refer to files.  Future   versions  will also provide
 1270%   pretty-printing of Prolog terms.
 1271
 1272term_face(_Text, Term, _Vars, \file(Name, FileOptions), Options) :-
 1273    ground(Term),
 1274    compound(Term),
 1275    compound_name_arity(Term, Alias, 1),
 1276    user:file_search_path(Alias, _),
 1277    existing_file(Term, FileOptions, [], Options),
 1278    !,
 1279    format(atom(Name), '~q', [Term]).
 1280term_face(Text, Term, Vars, Face, _Options) :-
 1281    code_face(Text, Term, Vars, Face).
 1282
 1283untag([], []).
 1284untag([w(W)|T0], [W|T]) :-
 1285    !,
 1286    untag(T0, T).
 1287untag([H|T0], [H|T]) :-
 1288    untag(T0, T).
 1289
 1290%!  image_label(-Label)//
 1291%
 1292%   Match File[;param=value[,param=value]*]
 1293
 1294image_label(\include(Name, image, Options)) -->
 1295    file_name(Base, Ext),
 1296    { autolink_extension(Ext, image),
 1297      file_name_extension(Base, Ext, Name),
 1298      resolve_file(Name, Options, RestOptions)
 1299    },
 1300    file_options(RestOptions).
 1301
 1302
 1303take_predref(\predref(Name/Arity)) -->
 1304    [ w(Name), '/' ], arity(Arity),
 1305    { functor_name(Name)
 1306    }.
 1307take_predref(\predref(Module:(Name/Arity))) -->
 1308    [ w(Module), ':', w(Name), '/' ], arity(Arity),
 1309    { functor_name(Name)
 1310    }.
 1311take_predref(\predref(Name/Arity)) -->
 1312    prolog_symbol_char(S0),
 1313    symbol_string(SRest), [ '/' ], arity(Arity),
 1314    !,
 1315    { atom_chars(Name, [S0|SRest])
 1316    }.
 1317take_predref(\predref(Name//Arity)) -->
 1318    [ w(Name), '/', '/' ], arity(Arity),
 1319    { functor_name(Name)
 1320    }.
 1321take_predref(\predref(Module:(Name//Arity))) -->
 1322    [ w(Module), ':', w(Name), '/', '/' ], arity(Arity),
 1323    { functor_name(Name)
 1324    }.
 1325
 1326%!  file_options(-Options) is det.
 1327%
 1328%   Extracts additional processing options for  files. The format is
 1329%   ;name="value",name2=value2,... Spaces are not allowed.
 1330
 1331file_options(Options) -->
 1332    [;], nv_pairs(Options),
 1333    !.
 1334file_options([]) -->
 1335    [].
 1336
 1337nv_pairs([H|T]) -->
 1338    nv_pair(H),
 1339    (   [',']
 1340    ->  nv_pairs(T)
 1341    ;   {T=[]}
 1342    ).
 1343
 1344nv_pair(Option) -->
 1345    [ w(Name), =,'"'], tokens(Tokens), ['"'],
 1346    !,
 1347    { untag(Tokens, Atoms),
 1348      atomic_list_concat(Atoms, Value0),
 1349      (   atom_number(Value0, Value)
 1350      ->  true
 1351      ;   Value = Value0
 1352      ),
 1353      Option =.. [Name,Value]
 1354    }.
 1355
 1356
 1357%!  wiki_link(-Link, +Options)// is semidet.
 1358%
 1359%   True if we can find a link to a file or URL. Links are described
 1360%   as one of:
 1361%
 1362%       $ filename :
 1363%       A filename defined using autolink_file/2 or
 1364%       autolink_extension/2
 1365%       $ <url-protocol>://<rest-url> :
 1366%       A fully qualified URL
 1367%       $ '<' URL '>' :
 1368%       Be more relaxed on the URL specification.
 1369
 1370:- multifile
 1371    user:url_path/2. 1372
 1373wiki_link(\file(Name, FileOptions), Options) -->
 1374    file_name(Base, Ext),
 1375    { file_name_extension(Base, Ext, Name),
 1376      (   autolink_file(Name, _)
 1377      ;   autolink_extension(Ext, _)
 1378      ),
 1379      !,
 1380      resolve_file(Name, FileOptions, Options)
 1381    }.
 1382wiki_link(\file(Name, FileOptions), Options) -->
 1383    [w(Name)],
 1384    { autolink_file(Name, _),
 1385      !,
 1386      resolve_file(Name, FileOptions, Options)
 1387    },
 1388    !.
 1389wiki_link(a(href(Ref), Label), Options) -->
 1390    [ w(Prot),:,/,/], { url_protocol(Prot) },
 1391    { option(end(End), Options, space)
 1392    },
 1393    tokens_no_whitespace(Rest), peek_end_url(End),
 1394    !,
 1395    { atomic_list_concat([Prot, :,/,/ | Rest], Ref),
 1396      option(label(Label), Options, Ref)
 1397    }.
 1398wiki_link(a(href(Ref), Label), Options) -->
 1399    [ w(mailto),:],
 1400    { option(end(End), Options, space)
 1401    },
 1402    tokens_no_whitespace(Rest), peek_end_url(End),
 1403    !,
 1404    { atomic_list_concat([mailto, : | Rest], Ref),
 1405      option(label(Label), Options, Ref)
 1406    }.
 1407wiki_link(a(href(Ref), Label), _Options) -->
 1408    [<, w(Alias), :],
 1409    tokens_no_whitespace(Rest), [>],
 1410    { Term = (Alias:Rest),
 1411      prolog:url_expansion_hook(Term, Ref, Label), !
 1412    }.
 1413wiki_link(a(href(Ref), Label), Options) -->
 1414    [<, w(Alias), :],
 1415    { user:url_path(Alias, _)
 1416    },
 1417    tokens_no_whitespace(Rest), [>],
 1418    { atomic_list_concat(Rest, Local),
 1419      (   Local == ''
 1420      ->  Term =.. [Alias,'.']
 1421      ;   Term =.. [Alias,Local]
 1422      ),
 1423      E = error(_,_),
 1424      catch(expand_url_path(Term, Ref), E, fail),
 1425      option(label(Label), Options, Ref)
 1426    }.
 1427wiki_link(a(href(Ref), Label), Options) -->
 1428    [#, w(First)],
 1429    { option(end(End), Options) },
 1430    tokens_no_whitespace(Rest),
 1431    peek_end_url(End),
 1432    !,
 1433    { atomic_list_concat([#,First|Rest], Ref),
 1434      option(label(Label), Options, Ref)
 1435    }.
 1436wiki_link(a(href(Ref), Label), Options) -->
 1437    [<],
 1438    (   { option(relative(true), Options),
 1439          Parts = Rest
 1440        }
 1441    ->  tokens_no_whitespace(Rest)
 1442    ;   { Parts = [Prot, : | Rest]
 1443        },
 1444        [w(Prot), :], tokens_no_whitespace(Rest)
 1445    ),
 1446    [>],
 1447    !,
 1448    { atomic_list_concat(Parts, Ref),
 1449      option(label(Label), Options, Ref)
 1450    }.
 1451
 1452%!  prolog:url_expansion_hook(+Term, -HREF, -Label) is semidet.
 1453%
 1454%   This hook is called after   recognising  =|<Alias:Rest>|=, where
 1455%   Term is of the form Alias(Rest). If   it  succeeds, it must bind
 1456%   HREF to an atom or string representing the link target and Label
 1457%   to an html//1 expression for the label.
 1458
 1459%!  file_name(-Name:atom, -Ext:atom)// is semidet.
 1460%
 1461%   Matches a filename.  A filename is defined as a sequence
 1462%   <segment>{/<segment}.<ext>.
 1463
 1464file_name(FileBase, Extension) -->
 1465    segment(S1),
 1466    segments(List),
 1467    ['.'], file_extension(Extension),
 1468    !,
 1469    { atomic_list_concat([S1|List], '/', FileBase) }.
 1470file_name(FileBase, Extension) -->
 1471    [w(Alias), '('],
 1472    { once(user:file_search_path(Alias, _)) },
 1473    segment(S1),
 1474    segments(List),
 1475    [')'],
 1476    !,
 1477    { atomic_list_concat([S1|List], '/', Base),
 1478      Spec =.. [Alias,Base],
 1479      absolute_file_name(Spec, Path,
 1480                         [ access(read),
 1481                           extensions([pl]),
 1482                           file_type(prolog),
 1483                           file_errors(fail)
 1484                         ]),
 1485      file_name_extension(FileBase, Extension, Path)
 1486    }.
 1487
 1488
 1489segment(..) -->
 1490    ['.','.'],
 1491    !.
 1492segment(Word) -->
 1493    [w(Word)].
 1494segment(Dir) -->
 1495    [w(Word),'.',w(d)],
 1496    { atom_concat(Word, '.d', Dir) }.
 1497
 1498segments([H|T]) -->
 1499    ['/'],
 1500    !,
 1501    segment(H),
 1502    segments(T).
 1503segments([]) -->
 1504    [].
 1505
 1506file_extension(Ext) -->
 1507    [w(Ext)],
 1508    { autolink_extension(Ext, _)
 1509    }.
 1510
 1511
 1512%!  resolve_file(+Name, -FileOptions, ?RestOptions, +Options) is det.
 1513%
 1514%   Find the actual file based on the pldoc_file global variable. If
 1515%   present  and  the   file   is    resolvable,   add   an   option
 1516%   absolute_path(Path) that reflects the current   location  of the
 1517%   file.
 1518
 1519resolve_file(Name, FileOptions, Rest) :-
 1520    existing_file(Name, FileOptions, Rest, []),
 1521    !.
 1522resolve_file(_, Options, Options).
 1523
 1524
 1525existing_file(Name, FileOptions, Rest, Options) :-
 1526    \+ Options.get(link) == false,
 1527    E = error(_,_),
 1528    catch(existing_file_p(Name, FileOptions, Rest), E, fail).
 1529
 1530existing_file_p(Name, FileOptions, Rest) :-
 1531    (   nb_current(pldoc_file, RelativeTo),
 1532        RelativeTo \== []
 1533    ->  Extra = [relative_to(RelativeTo)|Extra1]
 1534    ;   Extra = Extra1
 1535    ),
 1536    (   compound(Name)
 1537    ->  Extra1 = [file_type(prolog)]
 1538    ;   Extra1 = []
 1539    ),
 1540    absolute_file_name(Name, Path,
 1541                       [ access(read),
 1542                         file_errors(fail)
 1543                       | Extra
 1544                       ]),
 1545    FileOptions = [ absolute_path(Path) | Rest ].
 1546
 1547%!  arity(-Arity:int)// is semidet.
 1548%
 1549%   True if the next token can be  interpreted as an arity. That is,
 1550%   refers to a non-negative integers of at most 20. Although Prolog
 1551%   allows for higher arities, we assume 20   is  a fair maximum for
 1552%   user-created predicates that are documented.
 1553
 1554arity(Arity) -->
 1555    [ w(Word) ],
 1556    { E = error(_,_),
 1557      catch(atom_number(Word, Arity), E, fail),
 1558      Arity >= 0, Arity < 20
 1559    }.
 1560
 1561%!  symbol_string(-String)// is nondet
 1562%
 1563%   Accept a sequence of Prolog symbol characters, starting with the
 1564%   shortest (empty) match.
 1565
 1566symbol_string([]) -->
 1567    [].
 1568symbol_string([H|T]) -->
 1569    [H],
 1570    { prolog_symbol_char(H) },
 1571    symbol_string(T).
 1572
 1573prolog_symbol_char(C) -->
 1574    [C],
 1575    { prolog_symbol_char(C) }.
 1576
 1577%!  prolog_symbol_char(?Char)
 1578%
 1579%   True if char is classified by Prolog as a symbol char.
 1580
 1581prolog_symbol_char(#).
 1582prolog_symbol_char($).
 1583prolog_symbol_char(&).
 1584prolog_symbol_char(*).
 1585prolog_symbol_char(+).
 1586prolog_symbol_char(-).
 1587prolog_symbol_char(.).
 1588prolog_symbol_char(/).
 1589prolog_symbol_char(:).
 1590prolog_symbol_char(<).
 1591prolog_symbol_char(=).
 1592prolog_symbol_char(>).
 1593prolog_symbol_char(?).
 1594prolog_symbol_char(@).
 1595prolog_symbol_char(\).
 1596prolog_symbol_char(^).
 1597prolog_symbol_char(~).
 1598
 1599
 1600functor_name(String) :-
 1601    sub_atom(String, 0, 1, _, Char),
 1602    char_type(Char, lower).
 1603
 1604url_protocol(http).
 1605url_protocol(https).
 1606url_protocol(ftp).
 1607
 1608peek_end_url(space) -->
 1609    peek(Punct, End),
 1610    { punct_token(Punct),
 1611      space_token(End)
 1612    },
 1613    !.
 1614peek_end_url(space) -->
 1615    peek(End),
 1616    { space_token(End) },
 1617    !.
 1618peek_end_url(space, [], []) :- !.
 1619peek_end_url(Token) -->
 1620    peek(Token),
 1621    !.
 1622
 1623punct_token('.').
 1624punct_token('!').
 1625punct_token('?').
 1626punct_token(',').
 1627punct_token(';').
 1628
 1629space_token(' ') :- !.
 1630space_token('\r') :- !.
 1631space_token('\n') :- !.
 1632space_token(T) :-
 1633    \+ atom(T),                     % high level format like p(...)
 1634    \+ T = w(_).
 1635
 1636%!  autolink_extension(?Ext, ?Type) is nondet.
 1637%
 1638%   True if Ext is a filename extensions that create automatic links
 1639%   in the documentation.
 1640
 1641autolink_extension(Ext, Type) :-
 1642    prolog:doc_autolink_extension(Ext, Type),
 1643    !.
 1644autolink_extension(Ext, prolog) :-
 1645    user:prolog_file_type(Ext,prolog),
 1646    !.
 1647autolink_extension(txt, wiki).
 1648autolink_extension(md,  wiki).
 1649autolink_extension(gif, image).
 1650autolink_extension(png, image).
 1651autolink_extension(jpg, image).
 1652autolink_extension(jpeg, image).
 1653autolink_extension(svg, image).
 1654
 1655%!  autolink_file(?File, -Type) is nondet.
 1656%
 1657%   Files to which we automatically create links, regardless of the
 1658%   extension.
 1659
 1660autolink_file('README', wiki).
 1661autolink_file('TODO', wiki).
 1662autolink_file('ChangeLog', wiki).
 1663
 1664%!  citations(-List)//
 1665%
 1666%   Parse @cite1[;@cite2]* into a list of citations.
 1667
 1668citations([H|T]) -->
 1669    citation(H),
 1670    (   [';']
 1671    ->  citations(T)
 1672    ;   {T=[]}
 1673    ).
 1674
 1675citation(Atom) -->
 1676    [@], wiki_words(Atoms),
 1677    { length(Atoms, Len),
 1678      Len > 10, !,
 1679      fail
 1680    ; true
 1681    },
 1682    end_citation,
 1683    !,
 1684    { atomic_list_concat(Atoms, Atom)
 1685    }.
 1686
 1687end_citation, [';'] --> [';'].
 1688end_citation, ['@'] --> ['@'].
 1689end_citation, [']'] --> [']'].
 1690
 1691
 1692                 /*******************************
 1693                 *           SECTIONS           *
 1694                 *******************************/
 1695
 1696%!  section_comment_header(+Lines, -Header, -RestLines) is semidet.
 1697%
 1698%   Processes   /**   <section>   comments.   Header   is   a   term
 1699%   \section(Type, Title), where  Title  is   an  atom  holding  the
 1700%   section title and Type is an atom holding the text between <>.
 1701%
 1702%   @param Lines    List of Indent-Codes.
 1703%   @param Header   DOM term of the format \section(Type, Title),
 1704%                   where Type is an atom from <type> and Title is
 1705%                   a string holding the type.
 1706
 1707section_comment_header([_-Line|Lines], Header, Lines) :-
 1708    phrase(section_line(Header), Line).
 1709
 1710section_line(\section(Type, Title)) -->
 1711    ws, "<", word(Codes), ">", normalise_white_space(TitleCodes),
 1712    { atom_codes(Type, Codes),
 1713      atom_codes(Title, TitleCodes)
 1714    }.
 1715
 1716                 /*******************************
 1717                 *           TOKENIZER          *
 1718                 *******************************/
 1719
 1720%!  tokenize_lines(+Lines:lines, -TokenLines) is det
 1721%
 1722%   Convert Indent-Codes into Indent-Tokens
 1723
 1724tokenize_lines(Lines, TokenLines) :-
 1725    tokenize_lines(Lines, -1, TokenLines).
 1726
 1727tokenize_lines([], _, []) :- !.
 1728tokenize_lines(Lines, Indent, [Pre|T]) :-
 1729    verbatim(Lines, Indent, Pre, RestLines),
 1730    !,
 1731    tokenize_lines(RestLines, Indent, T).
 1732tokenize_lines([I-H0|T0], Indent0, [I-H|T]) :-
 1733    phrase(line_tokens(H), H0),
 1734    (   H == []
 1735    ->  Indent = Indent0
 1736    ;   Indent = I
 1737    ),
 1738    tokenize_lines(T0, Indent, T).
 1739
 1740
 1741%!  line_tokens(-Tokens:list)// is det.
 1742%
 1743%   Create a list of tokens, where  is  token   is  either  a ' ' to
 1744%   denote spaces, a  term  w(Word)  denoting   a  word  or  an atom
 1745%   denoting a punctuation  character.   Underscores  (_)  appearing
 1746%   inside an alphanumerical string are considered part of the word.
 1747%   E.g., "hello_world_" tokenizes into [w(hello_world), '_'].
 1748
 1749line_tokens([H|T]) -->
 1750    line_token(H),
 1751    !,
 1752    line_tokens(T).
 1753line_tokens([]) -->
 1754    [].
 1755
 1756line_token(T) -->
 1757    [C],
 1758    (   { code_type(C, space) }
 1759    ->  ws,
 1760        { T = ' ' }
 1761    ;   { code_type(C, alnum) },
 1762        word(Rest),
 1763        { atom_codes(W, [C|Rest]),
 1764          T = w(W)
 1765        }
 1766    ;   { char_code(T, C) }
 1767    ).
 1768
 1769word([C0|T]) -->
 1770    [C0],  { code_type(C0, alnum) },
 1771    !,
 1772    word(T).
 1773word([0'_, C1|T]) -->
 1774    [0'_, C1],  { code_type(C1, alnum) },
 1775    !,
 1776    word(T).
 1777word([]) -->
 1778    [].
 1779
 1780alphas([C0|T]) -->
 1781    [C0],  { code_type(C0, alpha) },
 1782    !,
 1783    alphas(T).
 1784alphas([]) -->
 1785    [].
 1786
 1787%!  verbatim(+Lines, +EnvIndent, -Pre, -RestLines) is det.
 1788%
 1789%   Extract a verbatim environment.  The  returned   Pre  is  of the
 1790%   format pre(Attributes, String). The indentation   of the leading
 1791%   fence is substracted from the indentation of the verbatim lines.
 1792%   Two types of fences are supported:   the  traditional =|==|= and
 1793%   the Doxygen =|~~~|= (minimum  3   =|~|=  characters), optionally
 1794%   followed by =|{.ext}|= to indicate the language.
 1795%
 1796%   Verbatim environment is delimited as
 1797%
 1798%     ==
 1799%       ...,
 1800%       verbatim(Lines, Pre, Rest)
 1801%       ...,
 1802%     ==
 1803%
 1804%   In addition, a verbatim environment may  simply be indented. The
 1805%   restrictions are described in the documentation.
 1806
 1807verbatim(Lines, _,
 1808         Indent-pre([class(code), ext(Ext)],Pre),
 1809         RestLines) :-
 1810    skip_empty_lines(Lines, [Indent-FenceLine|CodeLines]),
 1811    verbatim_fence(FenceLine, Fence, Ext),
 1812    verbatim_body(CodeLines, Indent, [10|PreCodes], [],
 1813                  [Indent-Fence|RestLines]),
 1814    !,
 1815    atom_codes(Pre, PreCodes).
 1816verbatim([_-[],Indent-Line|Lines], EnvIndent,
 1817         Indent-pre(class(code),Pre),
 1818         RestLines) :-
 1819    EnvIndent >= 0,
 1820    Indent >= EnvIndent+4, Indent =< EnvIndent+8,
 1821    valid_verbatim_opening(Line),
 1822    indented_verbatim_body([Indent-Line|Lines], Indent,
 1823                           CodeLines, RestLines),
 1824    !,
 1825    lines_code_text(CodeLines, Indent, [10|PreCodes]),
 1826    atom_codes(Pre, PreCodes).
 1827
 1828verbatim_body(Lines, _, PreT, PreT, Lines).
 1829verbatim_body([I-L|Lines], Indent, [10|Pre], PreT, RestLines) :-
 1830    PreI is I - Indent,
 1831    phrase(pre_indent(PreI), Pre, PreT0),
 1832    verbatim_line(L, PreT0, PreT1),
 1833    verbatim_body(Lines, Indent, PreT1, PreT, RestLines).
 1834
 1835verbatim_fence(Line, Fence, '') :-
 1836    Line == [0'=,0'=],
 1837    !,
 1838    Fence = Line.
 1839verbatim_fence(Line, Fence, Ext) :-
 1840    tilde_fence(Line, Fence, 0, Ext).
 1841verbatim_fence(Line, Fence, Ext) :-
 1842    md_fence(Line, Fence, 0, Ext).
 1843
 1844tilde_fence([0'~|T0], [0'~|F0], C0, Ext) :-
 1845    !,
 1846    C1 is C0+1,
 1847    tilde_fence(T0, F0, C1, Ext).
 1848tilde_fence(List, [], C, Ext) :-
 1849    C >= 3,
 1850    (   List == []
 1851    ->  Ext = ''
 1852    ;   phrase(tilde_fence_ext(ExtCodes), List)
 1853    ->  atom_codes(Ext, ExtCodes)
 1854    ).
 1855
 1856%!  tilde_fence_ext(-Ext)// is semidet.
 1857%
 1858%   Detect ```{.prolog} (Doxygen) or ```{prolog} (GitHub)
 1859
 1860tilde_fence_ext(Ext) -->
 1861    "{.", !, alphas(Ext), "}".
 1862tilde_fence_ext(Ext) -->
 1863    "{", alphas(Ext), "}".
 1864
 1865md_fence([0'`|T0], [0'`|F0], C0, Ext) :-
 1866    !,
 1867    C1 is C0+1,
 1868    md_fence(T0, F0, C1, Ext).
 1869md_fence(List, [], C, Ext) :-
 1870    C >= 3,
 1871    (   List == []
 1872    ->  Ext = ''
 1873    ;   phrase(md_fence_ext(ExtCodes), List),
 1874        atom_codes(Ext, ExtCodes)
 1875    ).
 1876
 1877% Also support Doxygen's curly bracket notation.
 1878md_fence_ext(Ext) -->
 1879    tilde_fence_ext(Ext),
 1880    !.
 1881% In Markdown language names appear without brackets.
 1882md_fence_ext(Ext) -->
 1883    alphas(Ext).
 1884
 1885%!  indented_verbatim_body(+Lines, +Indent, -CodeLines, -RestLines)
 1886%
 1887%   Takes more verbatim lines. The input   ends  with the first line
 1888%   that is indented less than Indent. There cannot be more than one
 1889%   consequtive empty line in the verbatim body.
 1890
 1891indented_verbatim_body([I-L|T0], Indent, [I-L|T], RestLines) :-
 1892    L \== [], I >= Indent,
 1893    !,
 1894    indented_verbatim_body(T0, Indent, T, RestLines).
 1895indented_verbatim_body([I0-[],I-L|T0], Indent, [I0-[],I-L|T], RestLines) :-
 1896    I >= Indent,
 1897    valid_verbatim_opening(L),
 1898    indented_verbatim_body(T0, Indent, T, RestLines).
 1899indented_verbatim_body(Lines, _, [], Lines).
 1900
 1901%!  valid_verbatim_opening(+Line) is semidet.
 1902%
 1903%   Tests that line does not look like a list item or table.
 1904
 1905valid_verbatim_opening([0'||_]) :- !, fail.
 1906valid_verbatim_opening(Line) :-
 1907    Line \== [],
 1908    \+ ( phrase(line_tokens(Tokens), Line),
 1909         list_item_prefix(_Type, Tokens, _Rest)
 1910       ).
 1911
 1912%!  lines_code_text(+Lines, +Indent, -Codes) is det.
 1913%
 1914%   Extract the actual code content from a list of line structures.
 1915
 1916lines_code_text([], _, []).
 1917lines_code_text([_-[]|T0], Indent, [10|T]) :-
 1918    !,
 1919    lines_code_text(T0, Indent, T).
 1920lines_code_text([I-Line|T0], Indent, [10|T]) :-
 1921    PreI is I-Indent,
 1922    phrase(pre_indent(PreI), T, T1),
 1923    verbatim_line(Line, T1, T2),
 1924    lines_code_text(T0, Indent, T2).
 1925
 1926
 1927%!  pre_indent(+Indent)// is det.
 1928%
 1929%   Insert Indent leading spaces.  Note we cannot use tabs as these
 1930%   are not expanded by the HTML <pre> element.
 1931
 1932pre_indent(N) -->
 1933    { N > 0,
 1934      !,
 1935      N2 is N - 1
 1936    }, " ",
 1937    pre_indent(N2).
 1938pre_indent(_) -->
 1939    "".
 1940
 1941verbatim_line(Line, Pre, PreT) :-
 1942    append(Line, PreT, Pre).
 1943
 1944
 1945                 /*******************************
 1946                 *            SUMMARY           *
 1947                 *******************************/
 1948
 1949%!  summary_from_lines(+Lines:lines, -Summary:list(codes)) is det.
 1950%
 1951%   Produce a summary for Lines. Similar  to JavaDoc, the summary is
 1952%   defined as the first sentence of the documentation. In addition,
 1953%   a sentence is also ended by an  empty   line  or  the end of the
 1954%   comment.
 1955
 1956summary_from_lines(Lines, Sentence) :-
 1957    skip_empty_lines(Lines, Lines1),
 1958    summary2(Lines1, Sentence0),
 1959    end_sentence(Sentence0, Sentence).
 1960
 1961summary2(_, Sentence) :-
 1962    Sentence == [],
 1963    !.              % we finished our sentence
 1964summary2([], []) :- !.
 1965summary2([_-[]|_], []) :- !.            % empty line
 1966summary2([_-[0'@|_]|_], []) :- !.       % keyword line
 1967summary2([_-L0|Lines], Sentence) :-
 1968    phrase(sentence(Sentence, Tail), L0, _),
 1969    summary2(Lines, Tail).
 1970
 1971sentence([C,End], []) -->
 1972    [C,End],
 1973    { \+ code_type(C, period),
 1974      code_type(End, period)                % ., !, ?
 1975    },
 1976    space_or_eos,
 1977    !.
 1978sentence([0' |T0], T) -->
 1979    space,
 1980    !,
 1981    ws,
 1982    sentence(T0, T).
 1983sentence([H|T0], T) -->
 1984    [H],
 1985    sentence(T0, T).
 1986sentence([0' |T], T) -->                % '
 1987    eos.
 1988
 1989space_or_eos -->
 1990    [C],
 1991    !,
 1992    {code_type(C, space)}.
 1993space_or_eos -->
 1994    eos.
 1995
 1996%!  skip_empty_lines(+LinesIn, -LinesOut) is det.
 1997%
 1998%   Remove empty lines from the start of the input.  Note that
 1999%   this is used both to process character and token data.
 2000
 2001skip_empty_lines([], []).
 2002skip_empty_lines([_-[]|Lines0], Lines) :-
 2003    !,
 2004    skip_empty_lines(Lines0, Lines).
 2005skip_empty_lines(Lines, Lines).
 2006
 2007end_sentence([], []).
 2008end_sentence([0'\s], [0'.]) :- !.
 2009end_sentence([H|T0], [H|T]) :-
 2010    end_sentence(T0, T).
 2011
 2012
 2013                 /*******************************
 2014                 *        CREATE LINES          *
 2015                 *******************************/
 2016
 2017%!  indented_lines(+Text:list(codes), +Prefixes:list(codes),
 2018%!                 -Lines:list) is det.
 2019%
 2020%   Extract a list of lines  without   leading  blanks or characters
 2021%   from Prefix from Text. Each line   is a term Indent-Codes, where
 2022%   Indent specifies the line_position of the real text of the line.
 2023
 2024indented_lines(Comment, Prefixes, Lines) :-
 2025    must_be(codes, Comment),
 2026    phrase(split_lines(Prefixes, Lines), Comment),
 2027    !.
 2028
 2029split_lines(_, []) -->
 2030    end_of_comment.
 2031split_lines(Prefixes, [Indent-L1|Ls]) -->
 2032    take_prefix(Prefixes, 0, Indent0),
 2033    white_prefix(Indent0, Indent),
 2034    take_line(L1),
 2035    split_lines(Prefixes, Ls).
 2036
 2037
 2038%!  end_of_comment//
 2039%
 2040%   Succeeds if we hit the end of the comment.
 2041%
 2042%   @bug    %*/ will be seen as the end of the comment.
 2043
 2044end_of_comment -->
 2045    eos.
 2046end_of_comment -->
 2047    ws, stars, "*/".
 2048
 2049stars --> [].
 2050stars --> "*", !, stars.
 2051
 2052
 2053%!  take_prefix(+Prefixes:list(codes), +Indent0:int, -Indent:int)// is det.
 2054%
 2055%   Get the leading characters  from  the   input  and  compute  the
 2056%   line-position at the end of the leading characters.
 2057
 2058take_prefix(Prefixes, I0, I) -->
 2059    { member(Prefix, Prefixes),
 2060      string_codes(Prefix, PrefixCodes)
 2061    },
 2062    prefix(PrefixCodes),
 2063    !,
 2064    { string_update_linepos(PrefixCodes, I0, I) }.
 2065take_prefix(_, I, I) -->
 2066    [].
 2067
 2068prefix([]) --> [].
 2069prefix([H|T]) --> [H], prefix(T).
 2070
 2071white_prefix(I0, I) -->
 2072    [C],
 2073    {  code_type(C, white),
 2074       !,
 2075       update_linepos(C, I0, I1)
 2076    },
 2077    white_prefix(I1, I).
 2078white_prefix(I, I) -->
 2079    [].
 2080
 2081%!  string_update_linepos(+Codes, +Pos0, -Pos) is det.
 2082%
 2083%   Update line-position after adding Codes at Pos0.
 2084
 2085string_update_linepos([], I, I).
 2086string_update_linepos([H|T], I0, I) :-
 2087    update_linepos(H, I0, I1),
 2088    string_update_linepos(T, I1, I).
 2089
 2090%!  update_linepos(+Code, +Pos0, -Pos) is det.
 2091%
 2092%   Update line-position after adding Code.
 2093%
 2094%   @tbd    Currently assumes tab-width of 8.
 2095
 2096update_linepos(0'\t, I0, I) :-
 2097    !,
 2098    I is (I0\/7)+1.
 2099update_linepos(0'\b, I0, I) :-
 2100    !,
 2101    I is max(0, I0-1).
 2102update_linepos(0'\r, _, 0) :- !.
 2103update_linepos(0'\n, _, 0) :- !.
 2104update_linepos(_, I0, I) :-
 2105    I is I0 + 1.
 2106
 2107%!  take_line(-Line:codes)// is det.
 2108%
 2109%   Take  a  line  from  the  input.   Line  does  not  include  the
 2110%   terminating \r or \n character(s), nor trailing whitespace.
 2111
 2112take_line([]) -->
 2113    "\r\n",
 2114    !.                      % DOS file
 2115take_line([]) -->
 2116    "\n",
 2117    !.                        % Unix file
 2118take_line(Line) -->
 2119    [H], { code_type(H, white) },
 2120    !,
 2121    take_white(White, WT),
 2122    (   nl
 2123    ->  { Line = [] }
 2124    ;   { Line = [H|White] },
 2125        take_line(WT)
 2126    ).
 2127take_line([H|T]) -->
 2128    [H],
 2129    !,
 2130    take_line(T).
 2131take_line([]) -->                       % end of string
 2132    [].
 2133
 2134take_white([H|T0], T) -->
 2135    [H],  { code_type(H, white) },
 2136    !,
 2137    take_white(T0, T).
 2138take_white(T, T) -->
 2139    [].
 2140
 2141%!  normalise_indentation(+LinesIn, -LinesOut) is det.
 2142%
 2143%   Re-normalise the indentation, such that the  lef-most line is at
 2144%   zero.  Note that we skip empty lines in the computation.
 2145
 2146normalise_indentation(Lines0, Lines) :-
 2147    skip_empty_lines(Lines0, Lines1),
 2148    Lines1 = [I0-_|Lines2],
 2149    !,
 2150    smallest_indentation(Lines2, I0, Subtract),
 2151    (   Subtract == 0
 2152    ->  Lines = Lines0
 2153    ;   maplist(substract_indent(Subtract), Lines0, Lines)
 2154    ).
 2155normalise_indentation(Lines, Lines).
 2156
 2157smallest_indentation([], I, I).
 2158smallest_indentation([_-[]|T], I0, I) :-
 2159    !,
 2160    smallest_indentation(T, I0, I).
 2161smallest_indentation([X-_|T], I0, I) :-
 2162    I1 is min(I0, X),
 2163    smallest_indentation(T, I1, I).
 2164
 2165substract_indent(Subtract, I0-L, I-L) :-
 2166    I is max(0,I0-Subtract).
 2167
 2168
 2169                 /*******************************
 2170                 *             MISC             *
 2171                 *******************************/
 2172
 2173%!  strip_leading_par(+Dom0, -Dom) is det.
 2174%
 2175%   Remove the leading paragraph for  environments where a paragraph
 2176%   is not required.
 2177
 2178strip_leading_par([p(C)|T], L) :-
 2179    !,
 2180    append(C, T, L).
 2181strip_leading_par(L, L).
 2182
 2183
 2184                 /*******************************
 2185                 *           DCG BASICS         *
 2186                 *******************************/
 2187
 2188%!  ws// is det
 2189%
 2190%   Eagerly skip layout characters
 2191
 2192ws -->
 2193    [C], {code_type(C, space)},
 2194    !,
 2195    ws.
 2196ws -->
 2197    [].
 2198
 2199%       space// is det
 2200%
 2201%       True if then next code is layout.
 2202
 2203space -->
 2204    [C],
 2205    {code_type(C, space)}.
 2206
 2207%!  nl//
 2208%
 2209%   Get end-of-line
 2210
 2211nl -->
 2212    "\r\n",
 2213    !.
 2214nl -->
 2215    "\n".
 2216
 2217%!  peek(H)//
 2218%
 2219%   True if next token is H without eating it.
 2220
 2221peek(H, L, L) :-
 2222    L = [H|_].
 2223
 2224peek(H1, H2, L, L) :-
 2225    L = [H1, H2|_].
 2226
 2227%!  tokens(-Tokens:list)// is nondet.
 2228%!  tokens(+Max, -Tokens:list)// is nondet.
 2229%
 2230%   Defensively take tokens from the input.  Backtracking takes more
 2231%   tokens.  Do not include structure terms.
 2232
 2233tokens([]) --> [].
 2234tokens([H|T]) --> token(H), tokens(T).
 2235
 2236tokens(_, []) --> [].
 2237tokens(C, [H|T]) --> token(H), {succ(C1, C)}, tokens(C1, T).
 2238
 2239%!  tokens_no_whitespace(-Tokens:list(atom))// is nondet.
 2240%
 2241%   Defensively take tokens from the  input. Backtracking takes more
 2242%   tokens.  Tokens  cannot  include  whitespace.  Word  tokens  are
 2243%   returned as their represented words.
 2244
 2245tokens_no_whitespace([]) -->
 2246    [].
 2247tokens_no_whitespace([Word|T]) -->
 2248    [ w(Word) ],
 2249    !,
 2250    tokens_no_whitespace(T).
 2251tokens_no_whitespace([H|T]) -->
 2252    [H],
 2253    { \+ space_token(H) },
 2254    tokens_no_whitespace(T).
 2255
 2256token(Token) -->
 2257    [Token],
 2258    { token(Token) }.
 2259
 2260token(w(_)) :- !.
 2261token(Token) :- atom(Token).
 2262
 2263%!  limit(+Count, :Rule)//
 2264%
 2265%   As limit/2, but for grammar rules.
 2266
 2267:- meta_predicate limit(+,2,?,?). 2268
 2269limit(Count, Rule, Input, Rest) :-
 2270    Count > 0,
 2271    State = count(0),
 2272    call(Rule, Input, Rest),
 2273    arg(1, State, N0),
 2274    N is N0+1,
 2275    (   N =:= Count
 2276    ->  !
 2277    ;   nb_setarg(1, State, N)
 2278    ).
 2279
 2280
 2281                 /*******************************
 2282                 *           MESSAGES           *
 2283                 *******************************/
 2284
 2285:- multifile
 2286    prolog:message//1. 2287
 2288prolog:message(pldoc(deprecated_tag(Name, Tag))) -->
 2289    [ 'PlDoc: Deprecated tag @~w (use @~w)'-[Name, Tag]
 2290    ].
 2291prolog:message(pldoc(unknown_tag(Name))) -->
 2292    [ 'PlDoc: unknown tag @~w'-[Name]
 2293    ]