1:- module(xml_reader,[fileToLineInfoElements/3]).

Utility LOGICMOO XML READER

Allows you to read xml files from prolog.

    8:- use_module(library(sgml)).    9
   10atrace:-trace.
   11useCateID:- fail.
   12:- dynamic(aimlCateSig/1).   13:- dynamic(aimlCate/13).   14
   15prolog_mostly_ground(Out):-ground(Out),!.
   16prolog_mostly_ground(Out):-var(Out),!,atrace.
   17prolog_mostly_ground([H|_Out]):-!,prolog_must(prolog_mostly_ground1(H)),!.
   18prolog_mostly_ground(Out):- ((arg(_N,Out,Arg),prolog_must(prolog_mostly_ground1(Arg)),fail));true.
   19prolog_mostly_ground1(Out):-prolog_must(nonvar(Out)).
   20 
   21aimlCateSig(_).
 get_sgml_parser_defs(PARSER_DEFAULTS, PARSER_CALLBACKS)
   25get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS):- 
   26  current_prolog_flag(sgml_parser_defaults,PARSER_DEFAULTS),
   27  current_prolog_flag(sgml_parser_callbacks,PARSER_CALLBACKS),!.
   28
   29get_sgml_parser_defs(
   30  [defaults(false), space(remove),/*number(integer),*/ qualify_attributes(false),
   31         %call(decl, on_decl),
   32         %call(pi, on_pi),call(xmlns, on_xmlns),call(urlns, xmlns),
   33         %%call(error,xml_error),
   34         dialect(xml)
   35         ],
   36         [max_errors(0),call(begin, on_begin),call(end, on_end)]).
   37
   38
   39
   40% % ?- string_to_structure('<?xml version="1.0" encoding="ISO-8859-1"?>\n<aiml><p>hi</p></aiml>',X).
   41% % ?- string_to_structure('<category><pattern>_ PLANETS</pattern></category>',X).
   42
   43
   44on_end('aiml', _) :- !,
   45        ignore(retract(in_aiml_tag(_))).
   46
   47on_begin('aiml', Attribs, _) :- !,
   48        asserta(in_aiml_tag(Attribs)).
   49
   50
   51on_begin(Tag, Attr, Parser) :- skipOver(not(inLineNum)),
   52        get_sgml_parser(Parser,context(Context)), Context=[Tag,aiml|_],
   53        skipOver(debugFmt(on_begin(Tag, Attr, Context))),
   54        skipOver(retract(in_aiml_tag(AimlAttr))),
   55       % skipOver(get_sgml_parser_defs(PARSER_DEFAULTS, PARSER_CALLBACKS)),
   56        get_sgml_parser(Parser,line(Line)),
   57        get_sgml_parser(Parser,charpos(Offset)),
   58        get_sgml_parser(Parser,file(File)),
   59        global_pathname(File,Pathname),
   60      %  get_sgml_parser(Parser,source(Stream)),
   61        skipOver(asserta(inLineNum)),
   62%        load_structure(Stream,Content,[line(Line)|PARSER_DEFAULTS]),!,
   63 %      skipOver( sgml_parse(Parser,[ document(Content),parse(input)])),
   64        NEW = t_l:lineInfoElem(Pathname,Line:Offset, Context, element(Tag, Attr, no_content_yet)),
   65        %%debugFmt(NEW),
   66        skipOver(ignore(retract(inLineNum))),
   67        skipOver(asserta(in_aiml_tag(AimlAttr))),
   68        assertz(NEW),!.
   69
   70on_begin(_Tag, _Attr, _Parser) :-!. %%get_sgml_parser(Parser,context(Context)),!. %%,debugFmt(on_begin_Context(Tag, Attr, Context)).
   71
   72%%on_begin_ctx(TAG, URL, Parser, Context) :-!, debugFmt(on_begin_ctx(URL, TAG, Parser,Context)),!.
   73on_begin_ctx(_TAG, _URL, _Parser, _Context) :- !. %%, debugFmt(on_begin_ctx(URL, TAG, Parser,Context)),!.
   74
   75
   76
   77:- thread_local
   78        xmlns/3.   79
   80on_xmlns(rdf, URL, _Parser) :- !,debugFmt(on_xmlns(URL, rdf)),asserta(xmlns(URL, rdf, _)).
   81on_xmlns(TAG, URL, _Parser) :- sub_atom(URL, _, _, _, 'rdf-syntax'), !,
   82        debugFmt('rdf-syntax'(URL, TAG)),
   83        immediateCall(_Ctx,asserta(xmlns(URL, rdf, _))).
   84on_xmlns(TAG, URL, _Parser) :- debugFmt(on_xmlns(URL, TAG)).
   85
   86on_decl(URL, _Parser) :- debugFmt(on_decl(URL)).
   87on_pi(URL, _Parser) :- debugFmt(on_pi(URL)).
   88
   89
   90xml_error(TAG, URL, Parser) :- !, debugFmt(xml_error(URL, TAG, Parser)).
   91% ============================================
   92% Loading content
   93% ============================================
   94
   95load_aiml_structure_lineno(Attributes,Ctx,L):-must_maplist(load_inner_aiml_lineno(Attributes,Ctx),L),!.
   96
   97:-thread_local(t_l:lineInfoElem/4).   98
   99load_inner_aiml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):-
  100   appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
  101   load_aiml_structure(Ctx,element(Tag,RightAttribs,ContentIn)),!.
  102
  103/*
  104%% offset
  105load_inner_aiml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):-
  106   appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
  107   prolog_must(attributeValue(Ctx,RightAttribs,[srcfile,srcdir],File,'$error')),
  108   MATCH = t_l:lineInfoElem(File,Line:Offset, Context, element(Tag, Attribs, no_content_yet)),
  109   ignore(MATCH),                                            
  110   Context=[_Tag0,aiml|_More],
  111   ignore(Line = nonfile),
  112   ignore(Offset = nonfile),
  113   NewAttribs  = [srcfile=File,lineno=Line:Offset|RightAttribs],
  114   ignore(retract(MATCH)),
  115   load_aiml_structure(Ctx,element(Tag,NewAttribs,ContentIn)),!.
  116*/
  117   /*
  118
  119   load_inner_aiml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):-
  120   prolog_must(current_value(Ctx,srcfile,File)),
  121   retract((t_l:lineInfoElem(File0,Line0:Offset0,graph, element(_Tag0, _Attr0, _Content0)))),
  122   prolog_must(call(OLD)),
  123
  124   MATCH = t_l:lineInfoElem(File,Line:Offset,Context, element(Tag, Attribs, _ContentIn)),!,
  125   prolog_must((call(MATCH),!,not(not((Line:Offset)==(Line0:Offset0))),retract(OLD),
  126   load_aiml_structure(Ctx,element(Tag,[srcinfo=File0:Line0-Offset0|Attribs],ContentIn)),
  127        NEW = t_l:lineInfoElem(File,Line:Offset,Attributes, element(Tag, Attribs, ContentIn)),
  128        assertz(NEW))),!.
  129
  130   */
  131
  132
  133
  134
  135
  136tls :- string_to_structure('<aiml><p>hi</p></aiml>',X),debugFmt(X).
  137tls2 :- string_to_structure('<?xml version="1.0" encoding="ISO-8859-1"?>\n<aiml><p>hi</p></aiml>\n\n',X),debugFmt(X).
  138
  139string_to_structure(String,XMLSTRUCTURESIN):- fail, sformat(Strin0,'<pre>~s</pre>',[String]),string_to_structure0(Strin0,XMLSTRUCTURES),!,  
  140   prolog_must([element(pre,[],XMLSTRUCTURESIN)]=XMLSTRUCTURES).
  141   
  142string_to_structure(String,XMLSTRUCTURES):- string_to_structure0(String,XMLSTRUCTURES),!.
  143string_to_structure0(String,XMLSTRUCTURES):- 
  144     %%get_sgml_parser_defs(PARSER_DEFAULTS,_PARSER_CALLBACKS),
  145     PARSER_DEFAULTS = [defaults(false), space(remove),/*number(integer),*/ qualify_attributes(false),dialect(xml)],
  146     string_to_structure0(String,PARSER_DEFAULTS,XMLSTRUCTURES),!.
  147
  148string_to_structure(String,PARSER_DEFAULTS0,XMLSTRUCTURES):-string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURES).
  149
  150string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURESIN):-
  151        setup_call_cleanup(((string_to_stream(String,In),new_sgml_parser(Parser, []))),
  152          prolog_must((                     
  153           atom_length(String,Len),
  154           append(PARSER_DEFAULTS0,[],PARSER_DEFAULTS),
  155           must_maplist(set_sgml_parser(Parser),PARSER_DEFAULTS),
  156           string_parse_structure(Len, Parser, user:PARSER_DEFAULTS, XMLSTRUCTURES, In)
  157           )),
  158       (free_sgml_parser(Parser),close(In))),!,prolog_must(XMLSTRUCTURESIN=XMLSTRUCTURES).
  159
  160string_parse_structure(Len,Parser, M:Options, Document, In) :-
  161	quietly((catch(call(call,string_parse_structure_opts_547(Parser),In,M,Options,Options2),_,string_parse_structure_opts(Parser,In,M,Options,Options2)))),
  162        % quietly((string_parse_structure_opts(Parser,In,M,Options,Options2))),
  163	sgml:sgml_parse(Parser,
  164		   [ document(Document),
  165		     source(In),
  166                     parse(input),
  167                     content_length(Len)
  168		   | Options2
  169		   ]).
  170
  171/*
  172string_parse_structure_opts_547(Parser, _In, _M, Options,Options2):-
  173	sgml:set_parser_options(Parser, Options, Options1),
  174	Options2=Options1.
  175*/
  176
  177string_parse_structure_opts(Parser,In,M,Options,Options2):-
  178	sgml:set_parser_options(Options, Parser, In, Options1),
  179        sgml:parser_meta_options(Options1, M, Options2).
  180
  181
  182
  183fileToLineInfoElements(Ctx,File,XMLSTRUCTURES):-
  184    atom_concat(File,'.term',Elis),
  185     ((fail,file_newer(Elis,File)) ->  
  186      termFileContents(Ctx,Elis,XMLSTRUCTURES) ;
  187       fileToLineInfoElements0(Ctx,File,XMLSTRUCTURES)).
  188
  189
  190termFileContents(_Ctx,File,termFileContents(File)):-!. %%,atrace.
  191termFileContents(_Ctx,File,element(aiml,[],XMLSTRUCTURES)):-
  192   setup_call_cleanup((open(File, read, In, [])), 
  193      findall(Elem,((repeat,read(In,Elem),((Elem\=end_of_file)->true;!))),XMLSTRUCTURES), close(In)),!
  193.
  194
  195
  196
  197
  198% gather line numbers
  199fileToLineInfoElements0(Ctx,F0,XMLSTRUCTURES):-
  200   global_pathname(F0,File),
  201       retractall(t_l:lineInfoElem(File,_,_,_)),
  202        setup_call_cleanup((open(File, read, In, [type(binary)]),new_sgml_parser(Parser, [])),
  203
  204          prolog_must((           
  205           get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS),
  206           must_maplist(set_sgml_parser(Parser),[file(File)|PARSER_DEFAULTS]),
  208           sgml_parse(Parser,[source(In)|PARSER_CALLBACKS]))
  208)
  208,
  209
  210        (free_sgml_parser(Parser),close(In)))
  210,!,
  211
  212
  213        fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES)
  213.
  214
  215
  216% gather line contents
  217fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES):-!,
  218  get_sgml_parser_defs(PARSER_DEFAULTS,_PARSER_CALLBACKS),
  219  setup_call_cleanup(open(File, read, In, [type(binary)]),(load_structure(In,Whole, [file(File)|PARSER_DEFAULTS]),!,
  220   load_inner_aiml_w_lineno(File,[],[],[],Ctx,Whole,XMLSTRUCTURES)),close(In)),!.
  221
  222load_inner_aiml_w_lineno(_SrcFile,_OuterTag,_Parent,_Attributes,_Ctx,Atom,Atom):-(atomic(Atom);var(Atom)),!.
  223load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx,[H|T],LL):-!,
  224      must_maplist(load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx),[H|T],LL),!.
  225
  226% % offset
  227load_inner_aiml_w_lineno(SrcFile,[OuterTag|PREV],Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,NewAttribs,ContentOut)):-
  228   Context=[Tag,OuterTag|_],
  229   MATCH = t_l:lineInfoElem(SrcFile,Line:Offset, Context, element(Tag, Attribs, no_content_yet)),
  230   MATCH,!,
  231   ignore(Line = nonfile),
  232   ignore(Offset = nonfile),
  233   appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
  234   % % Src = element(Tag,Attribs,ContentIn),
  235   Src = nosrc,
  236   appendAttributes(Ctx,[srcfile=SrcFile:Line-Offset,srcinfo=Src],RightAttribs,NewAttribs),
  237   ignore(retract(MATCH)),
  238   (member(Tag,[aiml,topic]) ->  NextAttribs = NewAttribs ; NextAttribs = []),
  239   must_maplist(load_inner_aiml_w_lineno(SrcFile,[Tag,OuterTag|PREV],Parent,NextAttribs,Ctx),ContentIn,ContentOut),!.
  240
  241load_inner_aiml_w_lineno(SrcFile,MORE,Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,RightAttribs,ContentOut)):-
  242   appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
  243   load_inner_aiml_w_lineno(SrcFile,[Tag|MORE],Parent,[],Ctx,ContentIn,ContentOut),!.
  244
  245load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,_Ctx,L,L):-
  246   aiml_error(load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,L)).
  247
  248
  249addAttribsToXML(Attribs,element(Tag,Pre,Content),element(Tag,Post,Content)):-appendAttributes(_Ctx,Pre,Attribs,Post),!.
  250addAttribsToXML(Attribs,[H|T],OUT):-must_maplist(addAttribsToXML(Attribs),[H|T],OUT),!.
  251addAttribsToXML(Attribs,OUT,OUT):-!,debugFmt(addAttribsToXML(Attribs,OUT,OUT)),!.
  252
  253
  254appendAttributes(_Ctx,L,R,AA):-hotrace((mergeAppend0(L,R,A),list_to_set_safe(A,AA))),!.
  255mergeAppend0(L,R,R):-var(L),!,var(R),!.
  256mergeAppend0(L,R,A):-var(R),append(L,R,A),!.
  257mergeAppend0(L,R,A):-var(L),append(L,R,A),!.
  258mergeAppend0(L,[R|RR],A):-eqmember(R,L),mergeAppend0(L,RR,A).
  259mergeAppend0([L|LL],R,A):-eqmember(L,R),mergeAppend0(LL,R,A).
  260mergeAppend0(L,R,A):-append(L,R,A).
  261
  262eqmember(E,List):-copy_term_numvars(E:List,E0:List0),member(E0,List0).
  263
  264list_to_set_safe(A,A):-(var(A);atomic(A)),!.
  265list_to_set_safe([A|AA],BB):- (not(not(lastMember(A,AA))) -> list_to_set_safe(AA,BB) ; (list_to_set_safe(AA,NB),BB=[A|NB])),!.
  266
  267
  268lastMember(E,List):-hotrace(lastMember0(E,List)).
  269
  270lastMember0(_E,List):-var(List),!,fail.
  271lastMember0(E,[H|List]):-lastMember0(E,List);E=H.
  272
  273lastMember(E,List,Rest):-hotrace(lastMember0(E,List,Rest)).
  274
  275lastMember0(E,List,Rest):-lastMember0(E,List),!,delete_safe(List,E,Rest),!.
  276lastMember0(E,List,Rest):-lastMember0(EE,List),!,lastMember0(E,EE,Rest),!,atrace. %%delete_safe(List,EE,Rest),!.
  277
  278delete_safe(List,_E,Rest):-var(List),!,Rest=List.
  279delete_safe(List,E,Rest):-is_list(List),!,delete(List,E,Rest).
  280delete_safe([H|List],E,Rest):- H==E,!,delete_safe(List,E,Rest).
  281delete_safe([H|List],E,[H|Rest]):-delete_safe(List,E,Rest).
  282
  283
  284getKeyValue(FullList,N=V):-lastMember(N=V,FullList),!.
  285%%addKeyValue(FullList,N=V):-nonvar(N),!,append(_Closed,[N=V|_],FullList),!.
  286addKeyValue(FullList,NV):- prolog_must((not(ground(FullList)),nonvar(NV))),append(_Closed,[NV|_],FullList),!.
  287
  288
  289lastMember2(E,List):-to_open_list(_,Closed,_Open,List),reverse(Closed,Rev),member(E,Rev).
  290
  291%lastMember(End,List) :- append(_,[End|_],List).
  292
  293
  294to_open_list(FullList,Closed,Open,FullList) :- append(Closed,Open,FullList),var(Open),!.
  295to_open_list(Closed,Closed,Open,FullList) :- append(Closed,Open,FullList),!.
  296
  297
  298copy_term_numvars(OLD,NEW):-copy_term(OLD,NEW),numbervars(NEW,0,_).
  299
  300
  301error_catch(C,E,F):-E=error(E1,E2),!,catch(C,error(E1,E2),F).
  302error_catch(C,E,F):-nonvar(E),!,catch(C,E,F).
  303error_catch(C,E,F):-catch(C,E,(needs_rethrown(E),F)).
  304needs_rethrown(E):- functor(aiml_goto,E,_),!,throw(E).
  305needs_rethrown(E):- functor(aiml_novalue,E,_),!,throw(E).
  306needs_rethrown(_).
  307
  308hotrace(G):- quietly(G).
  309
  310
  311:-thread_local(in_aiml_tag/1).  312:-thread_local(inLineNum/0).  313
  314skipOver(_).
  315
  316aiml_error(G):- wdmsg(aiml_error(G)),!.
  317
  318immediateCall(_,G):- trace,call(G).
  319
  320:- use_module(library(memfile)).
  323string_to_stream(String,InStream):- string(String),string_to_atom(String,Atom),!,string_to_stream(Atom,InStream).
  324string_to_stream(Atom,InStream):- atom_to_memory_file(Atom, Handle),open_memory_file(Handle,read,InStream).
  325
  326
  327% \n\n\n
  328load_aiml_structure(Ctx,O):-atomic(O),!,debugFmt(load_aiml_structure(Ctx,O)),!.
  329
  330
  331:- fixup_exports.