1%
    2% pPEG == SWI-Prolog module for parsing strings with pPEG grammars
    3%
    4/*	The MIT License (MIT)
    5 *
    6 *	Copyright (c) 2021-2023 Rick Workman
    7 *
    8 *	Permission is hereby granted, free of charge, to any person obtaining a copy
    9 *	of this software and associated documentation files (the "Software"), to deal
   10 *	in the Software without restriction, including without limitation the rights
   11 *	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   12 *	copies of the Software, and to permit persons to whom the Software is
   13 *	furnished to do so, subject to the following conditions:
   14 *
   15 *	The above copyright notice and this permission notice shall be included in all
   16 *	copies or substantial portions of the Software.
   17 *
   18 *	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19 *	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20 *	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   21 *	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   22 *	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   23 *	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   24 *	SOFTWARE.
   25 */
   26:- module(pPEG,[            % module pPEG exports:
   27	 peg_compile/2,         % create a grammar from a source string
   28	 peg_compile/3,         % as above with option list
   29	 peg_parse/3,           % use a pPEG grammar to parse an Input string to a ptree Result
   30	 peg_parse/5,           % as above with unmatched residue and option list
   31	 peg_grammar/1,         % pPEG grammar source
   32	 peg_lookup_previous/3, % used by CSG extensions to lookup previous matches
   33	 pPEG/4                 % quasi-quotation hook for pPEG
   34	]).   35
   36:- use_module(library(strings),[string/4]).         % for quasi-quoted strings
   37:- use_module(library(debug)).                      % for tracing (see peg_trace/0)
   38:- use_module(library(option),[option/3]).          % for option list processing
   39:- use_module(library(pcre),[re_matchsub/4]).       % uses a regular expression for error & trace output
   40:- use_module(library(quasi_quotations), [          % pPEG as quasi-quotation
   41    quasi_quotation_syntax/1, 
   42    with_quasi_quotation_input/3
   43]).   44
   45%
   46% the "standard" pPEG grammar source for bootstrapping and reference, e.g.,
   47% ?- peg_grammar(S), write_term(S,[]).
   48%
   49peg_grammar({|string||
   50	Peg   = _ rule+ _
   51	rule  = id _ '=' _ alt
   52
   53	alt   = seq ('/'_ seq)*
   54	seq   = rep*
   55	rep   = pre sfx? _
   56	pre   = pfx? term
   57	term  = call / sq / chs / group / extn
   58
   59	group = '('_ alt ')'
   60	pfx   = [&!~]
   61	sfx   = [+?] / '*' range?
   62	range = num (dots num?)?
   63	num   = [0-9]+
   64	dots  = '..'
   65
   66	call  = id _ !'='
   67	id    = [a-zA-Z_] [a-zA-Z0-9_-]*
   68	sq    = ['] ~[']* ['] 'i'?
   69	chs   = '[' ~']'* ']'
   70	extn  = '<' ~'>'* '>'
   71	_     = ('#' ~[\n\r]* / [ \t\n\r]+)*
   72
   73|}).
   74
   75%
   76% Peg grammar in ptree form (bootstrapped from previous)
   77%
   78boot_grammar_def('Peg'([
   79	rule([id("Peg"), seq([id("_"), rep([id("rule"), sfx("+")]), id("_")])]), 
   80	rule([id("rule"), seq([id("id"), id("_"), sq("'='"), id("_"), id("alt")])]), 
   81	
   82	rule([id("alt"), seq([id("seq"), rep([seq([sq("'/'"), id("_"), id("seq")]), sfx("*")])])]), 
   83	rule([id("seq"), rep([id("rep"), sfx("*")])]), 
   84	rule([id("rep"), seq([id("pre"), rep([id("sfx"), sfx("?")]), id("_")])]), 
   85	rule([id("pre"), seq([rep([id("pfx"), sfx("?")]), id("term")])]), 
   86	rule([id("term"), alt([id("call"), id("sq"), id("chs"), id("group"), id("extn")])]), 
   87	
   88	rule([id("group"), seq([sq("'('"), id("_"), id("alt"), sq("')'")])]), 
   89	rule([id("pfx"), chs("[&!~]")]), 
   90	rule([id("sfx"), alt([chs("[+?]"), seq([sq("'*'"), rep([id("range"), sfx("?")])])])]), 
   91	rule([id("range"), seq([id("num"), rep([seq([id("dots"), rep([id("num"), sfx("?")])]), sfx("?")])])]), 
   92	rule([id("num"), rep([chs("[0-9]"), sfx("+")])]), 
   93	rule([id("dots"), sq("'..'")]), 
   94	
   95	rule([id("call"), seq([id("id"), id("_"), pre([pfx("!"), sq("'='")])])]), 
   96	rule([id("id"), seq([chs("[a-zA-Z_]"), rep([chs("[a-zA-Z0-9_-]"), sfx("*")])])]), 
   97	rule([id("sq"), seq([chs("[']"), rep([pre([pfx("~"), chs("[']")]), sfx("*")]), chs("[']"), rep([sq("'i'"), sfx("?")])])]), 
   98	rule([id("chs"), seq([sq("'['"), rep([pre([pfx("~"), sq("']'")]), sfx("*")]), sq("']'")])]), 
   99	rule([id("extn"), seq([sq("'<'"), rep([pre([pfx("~"), sq("'>'")]), sfx("*")]), sq("'>'")])]), 
  100	rule([id("_"), rep([alt([seq([sq("'#'"), rep([pre([pfx("~"), chs("[\\n\\r]")]), sfx("*")])]), rep([chs("[ \\t\\n\\r]"), sfx("+")])]), sfx("*")])])
  101], _)).
  102
  103%
  104% initialization code
  105%
  106:-set_prolog_flag(optimise,false).  % for debug
  107
  108% provide debug support before turning optimization on 
  109debug_peg_trace(FString,Args) :- debug(pPEG(trace),FString,Args).
  110
  111:-set_prolog_flag(optimise,true).  % mainly optimizes arithmetic (module scope only)
  112
  113% called from :- initialization.
  114init_peg :-
  115	foreach((nb_current(Key,_), atom_concat('pPEG:',_,Key)), nb_delete(Key)),  % clear pPEG globals
  116	nodebug(pPEG(trace)),              % init trace
  117	bootstrap_grammar.                 % initial load
  118
  119user:exception(undefined_global_variable,'pPEG:$pPEG',retry) :-
  120	bootstrap_grammar.                 % support multi-threads (need copy for each thread)              
  121
  122bootstrap_grammar :-
  123	boot_grammar_def(BootPeg),         % bootstrap and optimize
  124	nb_setval('pPEG:$pPEG',BootPeg),
  125	peg_grammar(PegSrc),
  126	peg_compile(PegSrc,pPEG,[optimise(true)]).  % if successful, will overwrite boot parser
  127
  128%
  129% support pPEG grammar in quasi-quotation (compiles to a grammar term, Args are options)
  130%
  131:- quasi_quotation_syntax(pPEG).  132
  133pPEG(Content, Args, _Binding, Grammar) :-
  134	with_quasi_quotation_input(Content, Stream, read_string(Stream, _, String)),
  135	peg_compile(String,Grammar,Args).  % Args are compiler options
  136
  137%
  138% peg_compile/2, peg_compile/3 :create a grammar from a source string
  139%
  140peg_compile(Src, GrammarSpec) :-              % create an unoptimized parser (a ptree)
  141	peg_compile(Src, GrammarSpec, []).
  142
  143peg_compile(Src, GrammarSpec, OptionList) :-  % create parser, optionally optimized
  144	peg_parse(pPEG, Src, Ptree, _, OptionList),
  145	option_value(optimise(Opt),OptionList,true),
  146	make_grammar(Opt,Ptree,Grammar),
  147	(Grammar = GrammarSpec
  148	 -> true                                  % GrammarSpec unified with Grammar
  149	 ;  (atom(GrammarSpec)                    % GrammarSpec is name of a Grammar
  150	     -> atomic_concat('pPEG:$',GrammarSpec,GKey),
  151	        nb_setval(GKey,Grammar)
  152	     ;  current_prolog_flag(verbose,GVrbse),
  153	        option_value(verbose(Vrbse),OptionList,GVrbse), % default = global setting
  154	        peg_fail_msg(peg(argError('GrammarSpec',GrammarSpec)),Vrbse)
  155	    )
  156	).
  157
  158make_grammar(true,Ptree,Grammar) :- !,        % optimise grammar, refs created by optimiser
  159	optimize_peg(Ptree,Grammar).
  160make_grammar(_,'Peg'(Rules),'Peg'(Rules,_)).  % non-optimised, no refs needed
  161
  162%
  163% peg_parse/3 :use a Peg grammar to parse an Input string to a ptree Result
  164% peg_parse/5 :parse/3 with Match string and Options
  165%
  166peg_parse(GrammarSpec, Input, Result) :-
  167	peg_parse(GrammarSpec, Input, Result, _Residue, []).
  168
  169peg_parse(GrammarSpec, Input, Result, Residue, OptionList) :-
  170	% process options
  171	option_value(incomplete(Incomplete),OptionList,false),  % default = complete parse option
  172	option_value(tracing(TRules),OptionList,[]),            % default = no tracing
  173	current_prolog_flag(verbose,GVrbse),
  174	option_value(verbose(Vrbse),OptionList,GVrbse),         % default = global setting
  175	peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,GName,Env,Eval),  % setup initial Env and Eval
  176	(eval_(Eval, Env, Input, 0, PosOut, Result0)            % parse using Eval
  177	 -> (Result0 = [] -> sub_string(Input,0,PosOut,_,Result) ; Result = Result0)  % parse successful, map [] to matched
  178	 ;  (persistent_env_(Env,(@(Name,Inst,Pos),_))          % parse unsuccessful, check errorInfo
  179	     -> peg_fail_msg(peg(errorinfo(GName,Name,Inst,Pos,Input)),Vrbse)  % fail with message
  180	     ;  fail                                               %  or just fail
  181	    )
  182	),
  183	(string_length(Input,PosOut)                            % did parse consume all input?
  184	 -> Residue=""                                          % yes, set residue to empty
  185	 ;  (Incomplete = true                                  % no, incomplete allowed?
  186	     -> sub_string(Input,PosOut,_,0,Residue)            % yes, set Residue to remaining
  187	     ;  (persistent_env_(Env,(@(Name,Inst,Pos),_)), Inst \== []  % parse unsuccessful, check errorInfo
  188	         -> peg_fail_msg(peg(errorinfo(GName,Name,Inst,Pos,Input)),Vrbse)  % use errorinfo if relevant
  189	         ;  peg_fail_msg(peg(incompleteParse(GName,Input,PosOut)),Vrbse)   % else use incomplete
  190	        )
  191	    )
  192	).
  193
  194option_value(Option, Options, Default) :-
  195	(Options = []
  196	 -> arg(1,Option,Default)                      % faster option when empty list
  197	 ;  option(Option, Options, Default)           % else use option/3
  198	).
  199
  200% persist environment not trailed - contains latest error info and trace indentation
  201% Note: arg/3 calls are handled in VM if last argument is a "first variable".
  202persistent_env_(Env,PEnv) :- arg(5,Env,PEnv).      % Env[5] = persistent environment
  203
  204peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,GName,@(Grammar,GName,0,([],[]),PEnv),Eval) :-
  205	(string(Input)
  206	 -> true
  207	 ;  peg_fail_msg(peg(argError('Input',Input)),Vrbse)
  208	),
  209	(copy_term(GrammarSpec,'Peg'(Grammar0,Grammar0))  % make a copy before substituting refs
  210	 -> true
  211	 ; % retrieving from globals makes it's own copy
  212	   (atom(GrammarSpec), atomic_concat('pPEG:$',GrammarSpec,GKey), nb_current(GKey,'Peg'(Grammar0,Grammar0))
  213	    -> true
  214	    ;  peg_fail_msg(peg(argError('Grammar',GrammarSpec)),Vrbse)
  215	   )
  216	),
  217	peg_add_tracing(TRules,Grammar0,Grammar),      % add required tracing
  218	(Vrbse = normal                                % init persistent environment
  219	 -> PEnv = (@(GName,[],0), "")                 % error info and indent (for tracing)
  220	 ;  PEnv = (@(), "")
  221	),
  222	Grammar = [FirstRule|_],                       % first rule
  223	(FirstRule = rule([Eval|_])                    % GName is name of first rule
  224	 -> Eval = id(GName)                           % non-optimized version: id(Name)
  225	 ;  Eval = call_O(FirstRule),                  % optimized version
  226	    FirstRule = rule(GName,_)
  227	).
  228
  229peg_fail_msg(Msg, normal) :-                       % only print if verbose = normal
  230	print_message(informational, Msg),
  231	fail.
  232
  233:- multifile prolog:message/1.  234
  235prolog:message(peg(argError(Arg,Value))) -->  % DCG
  236	[ "pPEG Error: invalid argument, ~w = ~w" - [Arg,Value] ].
  237
  238prolog:message(peg(errorinfo(GName,Rule,Inst,Pos,Input))) -->  % DCG
  239	{rule_elements(Rule,GName,Elems),
  240	 atomics_to_string(Elems,".",RName),
  241	 string_length(Input,InputLen),                     % Pos may be past Input length
  242	 StartPos is min(Pos,InputLen-1),
  243	 peg_line_pos(Input,StartPos,0,1,Text,EPos,ELineNum),  % source text information
  244	 CPos is EPos+1,                                    % cursor position is 1 based
  245	 (vm_instruction(Inst,Exp) -> true ; Exp = []),
  246	 rule_elements(Exp,GName,FElems),
  247	 atomics_to_string(FElems,".",FExp),
  248	 (FExp = "" -> Expct = "" ; Expct = ", expected ")
  249	},
  250	% a bit of format magic using tab settings to right justify LineNo and position cursor
  251	[ 'pPEG Error: ~w failed~w~w at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+   ~|~` t~*+^' 
  252	        - [RName,Expct,FExp,ELineNum,CPos,ELineNum,Text,EPos]
  253	].
  254
  255prolog:message(peg(incompleteParse(GName,Input,PosOut))) -->  % DCG
  256	{peg_line_pos(Input,PosOut,0,1,Text,EPos,ELineNum),  % source text information
  257	 CPos is EPos+1                                      % cursor position is 1 based
  258	},
  259	% more format magic using tab settings to right justify LineNo and position cursor
  260	[ 'pPEG Error: ~w fell short at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+   ~|~` t~*+^'
  261	        - [GName,ELineNum,CPos,ELineNum,Text,EPos]
  262	].
  263
  264prolog:message(peg(undefined(RuleName))) -->  % DCG
  265	[ 'pPEG: ~w undefined' - [RuleName] ].               % from VM so limited info
  266
  267rule_elements([],GName,[GName,GName]) :- !.  % nil rule name, use grammar name (twice)
  268rule_elements(Rule,GName,[GName,Rule]) :- 
  269	sub_atom(Rule,0,1,_,RType),              % is it a rule name (starts with alpha)
  270	char_type(RType,alpha),
  271	!.
  272rule_elements(Rule,_GName,[Rule]).           % nothing else qualified by grammar name
  273
  274% 
  275% lookup previous match of rule Name in Env
  276%
  277peg_lookup_previous(Name,Env,Match) :-
  278	arg(4,Env,Ctxt),                         % Env[4] = Ctxt for maintaining prior matches
  279	(var(Name)
  280	 -> lookup_match_(Ctxt,RName,Match),     % most recent match
  281	    atom_string(RName,Name)
  282	 ;  atom_string(RName,Name),             % previous named match
  283	    lookup_match_(Ctxt,RName,Match)
  284	).
  285
  286lookup_match_((Matches,Parent),Name,Match) :-
  287	(memberchk((Name,slice(Input,PosIn,PosOut)),Matches)
  288	 -> Len is PosOut-PosIn,                 % construct Match string from slice
  289	    sub_string(Input,PosIn,Len,_,Match)
  290	 ;  lookup_match_(Parent,Name,Match)     % at root, Parent = [] (see peg_setup_parse_/7)
  291	).
  292
  293%
  294% peg VM implementation - 8 native plus 4 "optimized" instructions (plus trace)
  295%
  296eval_(id(Name), Env, Input, PosIn, PosOut, R) :-                % id "instruction"
  297	atom_string(PName,Name),                     % map to call_O(Rule), requires atom Name
  298	arg(1,Env,Grammar),                          % Env[1] = Grammar
  299	(memberchk(rule([id(Name), Exp]), Grammar)   % linear search, can be slow
  300	 -> eval_(call_O(rule(PName,Exp)), Env, Input, PosIn, PosOut, R) % continue with call_O
  301	 ;  print_message(warning, peg(undefined(PName))),  % undefined rule, fail with warning
  302	    fail
  303	). 
  304
  305eval_(alt(Ss), Env, Input, PosIn, PosOut, R) :-                 % alt "instruction"
  306	alt_eval(Ss, Env, Input, PosIn, PosOut, R).
  307
  308eval_(seq(Ss), Env, Input, PosIn, PosOut, R) :-                 % seq "instruction"
  309	seq_eval(Ss, PosIn, Env, Input, PosIn, PosOut, R).
  310
  311eval_(rep([Exp, ROp]), Env, Input, PosIn, PosOut, R) :-         % rep "instruction"
  312	rep_counts(ROp,Min,Max), !,                  % green cut for rep_counts
  313	repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
  314
  315eval_(pre([pfx(POp), Exp]), Env, Input, PosIn, PosOut, []) :-   % pre "instruction"
  316	% requires help with managing errorInfo
  317	arg(5,Env,PEnv),  % VM optimized persistent_env_(Env,PEnv),
  318	arg(1,PEnv,ErrorInfo),                      % save errorinfo
  319	nb_linkarg(1,PEnv,@()),                     % disable errorinfo collection
  320	(eval_(Exp, Env, Input, PosIn, _PosOut, _R)
  321	 -> nb_linkarg(1,PEnv,ErrorInfo),           % restore previous errorinfo
  322	    % eval_(Exp) succeeded
  323	    (POp = "&" -> PosOut = PosIn) % ; fail)
  324	 ;  nb_linkarg(1,PEnv,ErrorInfo),           % restore previous errorinfo
  325	    % eval_(Exp) failed
  326	    (POp = "!" -> PosOut = PosIn
  327	    ;POp = "~" -> (string_length(Input,PosIn) -> fail ; PosOut is PosIn+1)  % no match, bump position
  328	    )
  329	).
  330
  331eval_(sq(S), _Env, Input, PosIn, PosOut, []) :-                 % sq "instruction"
  332	(sub_string(S,_,1,0,"i")                  % case insensitive match test
  333	 -> sub_string(S,0,_,1,S1),               % strip i
  334	    literal_match_(S1,SMatch),            % string to match
  335	    string_upper(SMatch,UMatch),
  336	    string_length(SMatch,Len),
  337	    sub_string(Input,PosIn,Len,_,Match),
  338	    string_upper(Match,UMatch)	          % case insensitive match ... 
  339	 ;  literal_match_(S,Match),              % string to match
  340	    sub_string(Input,PosIn,Len,_,Match)   % case sensitive match
  341	),
  342	PosOut is PosIn+Len.
  343
  344eval_(chs(MatchSet), _Env, Input, PosIn, PosOut, []) :-        % chs "instruction"
  345	sub_atom(Input, PosIn, 1, _, R),          % input char, fails if end of Input
  346	match_chars(MatchSet,MChars),             % convert Match string to MChars list
  347	chars_in_match(MChars,R,in),              % character in set
  348	PosOut is PosIn+1.                        % match succeeded, consume 1 char
  349
  350eval_(extn(S), Env, Input, PosIn, PosOut, R) :-                % extn "instruction"
  351	(string(S) -> extn_pred(S,T) ; T = S),    % avoid extra work if already optimised
  352	extn_call(T,Env,Input,PosIn,PosOut,R).
  353
  354% additional instructions produced by optimizer
  355eval_(call_O(rule(Name, Exp)), @(Grammar,_RName,Dep,Ctxt,PEnv), Input, PosIn, PosOut, R) :-  % call_O "instruction"
  356	% also called from id instruction after lookup in non-optimised grammars
  357	nonvar(Exp),    % test for undefined rule called, warning would have been printed by optimizer
  358	Dep1 is Dep+1,  % increment call depth
  359	% recursion check - expensive, so use sparingly
  360	(Dep1 >= 64     % only check when call depth exceeds 64
  361	 -> recursive_loop_check(eval_(call_O(rule(Name,_)),_,_,P,_,_),P,PosIn,Name)
  362	 ;  true
  363	),
  364	eval_(Exp, @(Grammar,Name,Dep1,([],Ctxt),PEnv), Input, PosIn, PosOut, Res),  % with new context
  365	(Exp = trace(_)
  366	 -> R = Res  % if tracing, already done
  367	 ;  Match = slice(Input,PosIn,PosOut),  % Input slice matched
  368	    % add Match to siblings in context (undo on backtrack)
  369	    arg(1,Ctxt,Matches), setarg(1,Ctxt,[(Name,Match)|Matches]),
  370	    sub_atom(Name,0,1,_,RType),  % first character of name determines rule type
  371	    (RType == '_'
  372	     -> R = []                   % optimise anonymous rule => null result
  373	     ;  % ptree result
  374	        flatten_(Res,[],RRs),                % flatten args list
  375	        build_ptree(RRs,RType,Match,Name,R)  % and build
  376	    )
  377	).
  378
  379eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R) :-    % rep_O "instruction"
  380	repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
  381
  382eval_(sq_O(Case,Match), _Env, Input, PosIn, PosOut, []) :-      % sq_O "instruction"
  383	(Case = exact
  384	 -> sub_string(Input,PosIn,Len,_,Match)   % will match "" with Len=0
  385	 ;  % assume Case=upper
  386	    string_length(Match,Len),
  387	    sub_string(Input,PosIn,Len,_,S),
  388	    string_upper(S,Match)
  389	),   
  390	PosOut is PosIn+Len.
  391
  392eval_(chs_O(In,MChars), _Env, Input, PosIn, PosOut, []) :-      % chs_O "instruction"
  393	sub_atom(Input, PosIn, 1, _, R),          % input char, fails if end of Input
  394	chars_in_match(MChars,R,In),              % character in/notin set
  395	PosOut is PosIn+1.                        % match succeeded, consume 1 char
  396
  397eval_(trace(Rule), Env, Input, PosIn, PosOut, R) :-             % trace "instruction"
  398	% start tracing this rule
  399	(debugging(pPEG(trace),true)
  400	 -> eval_(call_O(Rule),Env,Input,PosIn,PosOut,R)  % already tracing, just call_O
  401	 ;  current_prolog_flag(debug,DF),  % save debug state
  402	    peg_trace,                      % enable tracing
  403	    persistent_env_(Env,PEnv),
  404	    nb_linkarg(2,PEnv," "),         % reset indent
  405	    (eval_(call_O(Rule),Env,Input,PosIn,PosOut,R)  % call_O with tracing enabled
  406	     -> peg_notrace,                % success, disable tracing and return a result
  407	        set_prolog_flag(debug,DF)   % restore saved debug state
  408	     ;  peg_notrace,                % fail, first disable tracing
  409	        set_prolog_flag(debug,DF),  % restore saved debug state
  410	        fail
  411	    )
  412	).
  413
  414%
  415% Support for VM instructions
  416%
  417
  418% alt instruction
  419alt_eval([S|Ss], Env, Input, PosIn, PosOut, R) :- 
  420	eval_(S, Env, Input, PosIn, PosOut, R)                  % try S
  421	 -> true                                                % succeed, committed choice
  422	 ;  alt_eval(Ss, Env, Input, PosIn, PosOut, R).         % S failed, keep trying
  423
  424
  425% seq instruction
  426% responsible for capturing error info on failure 
  427seq_eval([], _Start, _Env, _Input, PosIn, PosIn, []).
  428seq_eval([S|Ss], Start, Env, Input, PosIn, PosOut, R) :-
  429	(eval_(S, Env, Input, PosIn, PosNxt, Re)                    % try S
  430	 -> (Re == []
  431	     -> seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, R)  % nil result, loop to next in sequence
  432	     ;  R = [Re|Rs],                                        % collect result
  433	        seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, Rs) % loop to next in sequence 
  434	    )
  435	 ;  PosIn > Start,          % S failed but something consumed in this sequence
  436	    arg(5,Env,PEnv),        % VM optimized persistent_env_(Env,PEnv),
  437	    arg(1,PEnv,ErrInfo),    % exploit optimized arg/3 
  438	    arg(3,ErrInfo,HWM),
  439	    PosIn > HWM,            % new high water mark ?
  440	    arg(2,Env,FName),       % Env[2] = current rule name from environment
  441	    nb_linkarg(1,PEnv,@(FName,S,PosIn)),
  442	    fail
  443	).
  444
  445
  446% rep instruction 
  447% counts for repeat a match, -1 signifies any number for Max  
  448rep_counts(sfx("?"),0, 1).
  449rep_counts(sfx("+"),1,-1).
  450rep_counts(sfx("*"),0,-1).                         % *
  451rep_counts(num(StrN),N,N) :-                       % *N
  452	number_string(N,StrN).
  453rep_counts(range([num(StrN),_]),N,-1) :-           % *N..
  454	number_string(N,StrN).
  455rep_counts(range([num(StrM),_,num(StrN)]),M,N) :-  % *M..N
  456	number_string(M,StrM),
  457	number_string(N,StrN).
  458
  459% repeat evaluation loop, evaluates to a list
  460repeat_eval(Max, _Min, Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- !.  % terminate if C=Max
  461repeat_eval(C,    Min, Max,  Exp,  Env,  Input, PosIn, PosOut, R) :- 
  462	eval_(Exp, Env, Input, PosIn, PosN, Re),
  463	PosN > PosIn,  % expressions in loops must consume
  464	!,
  465	C1 is C+1,     % increment count
  466	(Re == []      % don't accumulate empty results
  467	 -> repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, R) 
  468	 ;  R = [Re|Rs],
  469	    repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, Rs)
  470	).
  471repeat_eval(C,    Min,_Max, _Exp, _Env, _Input, PosIn, PosIn, []) :-  % eval failed
  472	C >= Min.      % C greater than or equal Min, else fail
  473
  474
  475% sq instruction (also dq)
  476% strip outer quotes and map escapes
  477literal_match_(S,Match) :-
  478	match_chars(S,Chars),                    % convert S string to escaped Chars list
  479	string_chars(Match,Chars).               % string to match
  480
  481
  482% chars instruction
  483% construct list of MChars for matching
  484match_chars(MatchSet, MChars) :- 
  485	sub_string(MatchSet,1,_,1,Str),  % strips outer [], ", '
  486	string_chars(Str,Chars),
  487	unescape_(Chars,MChars).
  488
  489unescape_([],[]).
  490unescape_(['\\',u,C1,C2,C3,C4|NxtChars],[Esc|MChars]) :-
  491	hex_value(C1,V1), hex_value(C2,V2), hex_value(C3,V3), hex_value(C4,V4), !,
  492	VEsc is ((V1*16+V2)*16+V3)*16+V4,
  493	char_code(Esc,VEsc),
  494	unescape_(NxtChars,MChars).
  495unescape_(['\\','U',C1,C2,C3,C4,C5,C6,C7,C8|NxtChars],[Esc|MChars]) :-
  496	hex_value(C1,V1), hex_value(C2,V2), hex_value(C3,V3), hex_value(C4,V4), 
  497	hex_value(C5,V5), hex_value(C6,V6), hex_value(C7,V7), hex_value(C8,V8), !,
  498	VEsc is ((((((V1*16+V2)*16+V3)*16+V4)*16*V5)*16+V6)*16+V7)*16+V8,
  499	char_code(Esc,VEsc),
  500	unescape_(NxtChars,MChars).
  501unescape_(['\\',CEsc|Chars],[Esc|MChars]) :-
  502	std_escape_(CEsc,Esc), !,
  503	unescape_(Chars,MChars).
  504unescape_([Char|Chars],[Char|MChars]) :-
  505	unescape_(Chars,MChars).
  506
  507std_escape_('n','\n').
  508std_escape_('r','\r').
  509std_escape_('t','\t').
  510
  511hex_value(C,V) :- char_type(C,digit(V)) -> true ; char_type(C,xdigit(V)).
  512
  513% search for Ch in list of MChars (including ranges)
  514chars_in_match([],_Ch,In) :- In == notin.                 % EOList, succeed if 'notin'
  515chars_in_match([Cl,'-',Cu|MChars],Ch,In) :- !,            % range
  516	(Cl@=<Ch,Ch@=<Cu -> In == in ; chars_in_match(MChars,Ch,In)).
  517chars_in_match([Cl|MChars],Ch,In) :-                      % equivalence
  518	(Cl==Ch -> In == in ; chars_in_match(MChars,Ch,In)).
  519
  520
  521% id/call instruction
  522% recursive loop check - SWI Prolog specific (also used by skip_ws/4)
  523recursive_loop_check(Goal,Last,Pos,Name) :-
  524	prolog_current_frame(F),                % this frame
  525	prolog_frame_attribute(F,parent,IPF),   % caller's frame
  526	prolog_frame_attribute(IPF,parent,GPF), % caller's predecessor's frame
  527	(once(prolog_frame_attribute(GPF,parent_goal,Goal)), Last=Pos 
  528	 -> % found a parent call with identical cursor position ==> infinte recursion
  529	    peg_notrace,
  530	    format(string(Message),"pPEG infinite recursion applying ~w",[Name]),
  531	    throw(error(resource_error(Message),_))
  532	 ;  true
  533	).
  534
  535% flatten arguments and remove [] (uses difference lists)
  536flatten_([], Tl, Tl) :-
  537	!.
  538flatten_([Hd|Tl], Tail, List) :-
  539	!,
  540	flatten_(Hd, FlatHeadTail, List),
  541	flatten_(Tl, Tail, FlatHeadTail).
  542flatten_(NonList, Tl, [NonList|Tl]).
  543
  544% build a ptree from a flattened list of args
  545build_ptree([],RType,Match,PName,R) :- !,      % no args, 2 cases
  546	(char_type(RType,lower)
  547	 -> Match = slice(Input,PosIn,PosOut),     % string result
  548	    Len is PosOut-PosIn,
  549	    sub_string(Input,PosIn,Len,_,Arg),
  550	    R =.. [PName,Arg]
  551	 ;  R =.. [PName,[]]                       % empty args
  552	).
  553build_ptree([Arg],RType,_Match,_PName,Arg) :-  % single arg
  554	compound(Arg),
  555	char_type(RType,lower),                    % cull case, don't wrap
  556	!.                
  557build_ptree(Arg,_RType,_Match,PName,R) :-      % general case, construct ptree node   
  558	R =.. [PName,Arg].
  559
  560
  561% extn instruction
  562% convert extension contents to callable Mod:Pred(StringArg)
  563extn_pred(S,T) :-
  564	(sub_string(S,Pos,1,_," ")                 % contains a space at Pos
  565	 -> FLen is Pos-1,                         % functor length
  566	    sub_atom(S,1,FLen,_,Pred),             % strip <
  567	    APos is Pos+1,                         % StringArg pos           
  568	    sub_string(S,APos,_,1,S1),             % also strip >    
  569	    split_string(S1,""," ",[StringArg])    % and trim whitespace from Arg 
  570	 ;  sub_atom(S,1,_,1,Pred),                % empty StringArg
  571	    StringArg = ""
  572	),
  573	(split_string(Pred,':','',[SM,SF])         % optional module specification
  574	 -> atom_string(M,SM), atom_string(F,SF),
  575	    P =.. [F,StringArg],
  576	    T = M:P
  577	 ;  T =.. [Pred,StringArg]
  578	).
  579
  580% extensions call T/6 if defined, else just a tracepoint with nothing returned
  581extn_call(T,Env,Input,PosIn,PosOut,R) :-
  582	catch(call(T,Env,Input,PosIn,PosOut,R),
  583	      Err, extn_error(Err,T,Env,Input,PosIn,PosOut,R)
  584	).
  585
  586extn_error(error(existence_error(procedure,_),_),T,_Env,Input,PosIn,PosIn,[]) :- !,
  587	sub_string(Input,PosIn,_,0,Rem),
  588	print_message(information, peg(extension(T,Rem))).
  589extn_error(Err,_T,_Env,_Input,_PosIn,_PosOut,_R) :-
  590	throw(Err).
  591
  592prolog:message(peg(extension(T,Rem))) -->  % DCG
  593	[ "Extension ~p parsing: ~p\n" - [T,Rem] ].
  594
  595%
  596% set tracing on named rules
  597%
  598peg_add_tracing([],Grammar,Grammar) :- !.  % nothing to trace
  599peg_add_tracing(TRules,Grammar,GrammarT) :-
  600	(Grammar = [rule(_,_)|_]
  601	 -> duplicate_term(Grammar,GrammarC)   % create duplicate of optimized grammar
  602	 ;  GrammarC = Grammar                 % unoptimized case copies as needed
  603	),
  604	add_tracing(TRules,GrammarC,GrammarT).
  605
  606add_tracing([],Grammar,Grammar) :- !.
  607add_tracing([Name|Names],Grammar,GrammarT) :- !,
  608	add_tracing(Name,Grammar,NxtGrammar), 
  609	add_tracing(Names,NxtGrammar,GrammarT).
  610add_tracing(Name,Grammar,GrammarT) :-
  611	add_trace(Grammar,Name,GrammarT).
  612	
  613add_trace([],_SName,[]).
  614add_trace([rule([id(SName), Exp])|Rules], Name, 
  615          [rule([id(SName), trace(rule(AName,Exp))])|Rules]) :-
  616	nonvar(Exp),              % must be defined
  617	atom_string(AName,SName), % SName and Name equivalent to AName
  618	atom_string(AName,Name),
  619	!.
  620add_trace([Rule|Rules], Name, [Rule|Rules]) :-
  621	Rule = rule(AName, Exp),  % optimized Rule
  622	nonvar(Exp),              % must be defined
  623	atom_string(AName,Name),  % name matches
  624	!,
  625	% overwrite expression in place so all call references persist
  626	setarg(2,Rule,trace(rule(AName,Exp))).
  627add_trace([Rule|Rules], Name, [Rule|RulesT]) :-
  628	add_trace(Rules, Name, RulesT).
  629
  630%
  631% enable/disable tracing (from trace instruction)
  632%
  633peg_trace :-
  634	debug(pPEG(trace)),
  635	trace_control_(spy(pPEG:eval_)).
  636
  637peg_notrace :-
  638	(debugging(pPEG(trace),true)
  639	 -> trace_control_(nospy(pPEG:eval_)),
  640	    nodebug(pPEG(trace))
  641	 ;  true
  642	).
  643
  644trace_control_(G) :-    % suppress informational messages when controlling spy point
  645	current_prolog_flag(verbose,V),
  646	set_prolog_flag(verbose,silent),
  647	call(G),
  648	set_prolog_flag(verbose,V).
  649
  650% entry point when the eval_ spypoint is triggered
  651:- multifile user:prolog_trace_interception/4.  652
  653user:prolog_trace_interception(Port,Frame,_Choice,continue) :-
  654	debugging(pPEG(trace),true),  % peg(trace) enabled?
  655	prolog_frame_attribute(Frame,goal,Goal),
  656	peg_trace_port(Port,Goal),
  657	!.  % defensive, remove any CP's
  658
  659peg_trace_port(Port,pPEG:eval_(Inst, Env, Input, PosIn, PosOut, R)) :-  % only trace pPEG:eval_/6
  660	peg_inst_type(Inst,Type),
  661	vm_instruction(Inst,TInst),
  662	persistent_env_(Env,PEnv),
  663	peg_trace_port_(Type, Port, TInst, PEnv, Input, PosIn, PosOut, R),
  664	!.
  665
  666peg_trace_port_(call, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  667	peg_cursor_pos(Input,PosIn,Cursor),
  668	peg_trace_msg(postInc, PEnv, "~w~w~w", [Cursor,TInst]).            % with indent parm
  669peg_trace_port_(call, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  670	peg_cursor_pos(Input,PosIn,Cursor),
  671	peg_trace_input(Input,PosIn,Str),
  672	peg_trace_msg(preDec, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). % with indent parm
  673peg_trace_port_(call, exit, TInst, PEnv, Input, PosIn, PosOut, R) :- !,
  674	peg_cursor_pos(Input,PosOut,Cursor),		
  675	(R = []  % if null result (_rule), print matching string
  676	 -> Len is PosOut-PosIn,
  677	    sub_string(Input,PosIn,Len,_,RT) 
  678	 ;  RT = R
  679	),
  680	(string(RT) -> MatchOp = "==" ; MatchOp = "=>"),
  681	peg_trace_msg(preDec, PEnv, "~w~w~w ~w \t~p", [Cursor,TInst,MatchOp,RT]). % with indent parm
  682peg_trace_port_(meta, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  683	peg_cursor_pos(Input,PosIn,Cursor),
  684	peg_trace_msg(indent, PEnv, "~w~w~w", [Cursor,TInst]).             % with indent parm
  685peg_trace_port_(terminal, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  686	peg_cursor_pos(Input,PosIn,Cursor),
  687	peg_trace_input(Input,PosIn,Str),
  688	peg_trace_msg(indent, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). % with indent parm
  689peg_trace_port_(terminal, exit, TInst, PEnv, Input, PosIn, PosOut, _R) :- !,
  690	peg_cursor_pos(Input,PosOut,Cursor),
  691	Len is PosOut-PosIn,
  692	sub_string(Input,PosIn,Len,_,RT),
  693	peg_trace_msg(indent, PEnv, "~w~w~w == \t~p", [Cursor,TInst,RT]).  % with indent parm
  694peg_trace_port_(_Other, _, _, _, _, _, _, _).  % else no trace message
  695
  696peg_inst_type(alt(_),meta).
  697peg_inst_type(seq(_),meta).
  698peg_inst_type(pre(_),call).
  699peg_inst_type(rep(_),meta).
  700peg_inst_type(rep_O(_,_,_),meta).
  701peg_inst_type(sq(_),terminal). 
  702peg_inst_type(sq_O(_,_),terminal).
  703peg_inst_type(chs(_),terminal).
  704peg_inst_type(chs_O(_,_),terminal).
  705peg_inst_type(extn(_),terminal).
  706peg_inst_type(id(_),notrace).               % not traced, caught in call_O
  707peg_inst_type(call_O(rule(_,Exp)),Type) :-  % don't trace calls which are explicitly traced
  708	Exp = trace(_) -> Type = notrace ; Type = call.
  709peg_inst_type(trace(_),notrace).            % not traced, caught in call_O
  710
  711peg_cursor_pos(Input,Pos,Cursor) :-
  712	string_length(Input,InputLen),                             % Pos may be past Input length
  713	StartPos is min(Pos,InputLen-1),
  714	peg_line_pos(Input,StartPos,0,1,_Text,LinePos,LineNo),     % source text information
  715	CPos is LinePos +1,                                        % cursor position is 1 based
  716	format(string(Cursor),"~` t~d~4+.~d~4+",[LineNo,CPos]).    % more format tab magic
  717
  718peg_line_pos("",_Pos,_LinePos,LineNum,"",0,LineNum) :- !.      % corner case: empty string
  719peg_line_pos(Input,Pos,LinePos,LineNum,Text,EPos,ELineNum) :-  % assumes Pos has been range checked
  720	% Note: could use a pPEG for line matching, but this avoids re-entrant issues with globalvars
  721	re_matchsub("[^\n\r]*(\n|\r\n?)?",Input,Match,[start(LinePos)]),  % match a line
  722	string_length(Match.0,Len),
  723	NxtLinePos is LinePos+Len,
  724	((LinePos =< Pos,Pos < NxtLinePos)                 % Pos is within this line?
  725	 -> string_concat(Text,Match.get(1,""),Match.0),   % yes
  726	    EPos is Pos-LinePos,
  727	    ELineNum = LineNum
  728	 ;  NxtLineNum is LineNum+1,                       % no
  729	    peg_line_pos(Input,Pos,NxtLinePos,NxtLineNum,Text,EPos,ELineNum) 
  730	).
  731
  732peg_trace_input(Input,PosIn,Str) :-
  733	sub_string(Input,PosIn,L,0,SStr),          % current residue
  734	(L =< 32
  735	 -> Str = SStr
  736	  ; sub_string(SStr,0,32,_,SStr1),
  737	    string_concat(SStr1," ... ",Str)
  738	).
  739
  740peg_trace_msg(postInc, PEnv, Msg, [Cursor|Args]) :-
  741	arg(2,PEnv,Indent),
  742	debug_peg_trace(Msg,[Cursor,Indent|Args]),
  743	string_concat(Indent,"|  ",NxtIndent),      % add "|  " to current indent
  744	nb_linkarg(2,PEnv,NxtIndent).
  745peg_trace_msg(preDec, PEnv, Msg, [Cursor|Args]) :-
  746	arg(2,PEnv,Indent),
  747	sub_string(Indent,0,_,3,NxtIndent),         % subtract 3 chars from end of current indent
  748	debug_peg_trace(Msg,[Cursor,NxtIndent|Args]),
  749	nb_linkarg(2,PEnv,NxtIndent).
  750peg_trace_msg(indent, PEnv, Msg, [Cursor|Args]) :-
  751	arg(2,PEnv,Indent),
  752	debug_peg_trace(Msg,[Cursor,Indent|Args]).
  753
  754%
  755% de-compile VM instructions of interest, used for tracing and error messages
  756%
  757vm_instruction(id(Name), Name).
  758vm_instruction(call_O(rule(Name,_Exp)), Name). 
  759vm_instruction(seq(Exps), Is) :-
  760	vm_instruction_list(Exps,LIs),
  761	atomics_to_string(LIs," ",Is0),
  762	atomics_to_string(["(",Is0,")"],Is).
  763vm_instruction(alt(Exps), Is) :-
  764	vm_instruction_list(Exps,LIs),
  765	atomics_to_string(LIs," / ",Is0),
  766	atomics_to_string(["(",Is0,")"],Is).
  767vm_instruction(rep([Exp, Sfx]), Is) :-
  768	vm_rep_sfx(Sfx,ROp), !,
  769	vm_instruction(Exp,I),
  770	string_concat(I,ROp,Is).
  771vm_instruction(rep_O(Exp, Min, Max), Is) :-
  772	rep_counts(Sfx, Min, Max), !,
  773	vm_instruction(rep([Exp, Sfx]), Is).
  774vm_instruction(pre([pfx(Chs),Exp]), Is) :-
  775	vm_instruction(Exp,I),
  776	string_concat(Chs,I,Is).
  777vm_instruction(sq(Match), Is) :-
  778	unescape_std(Match,Is).
  779vm_instruction(sq_O(Case,Match), Is) :-
  780	(Case = exact -> Sens = "" ; Sens = "i"),
  781	unescape_std(Match,S1),
  782	unescape_string(S1,"'","\\u0027",S),
  783	atomics_to_string(["'",S,"'",Sens],Is).
  784vm_instruction(chs(Match), Is) :-
  785	unescape_std(Match,Is).
  786vm_instruction(chs_O(In,MChars), Is) :-
  787	(In = notin -> Pfx = '~' ; Pfx = ''),
  788	string_chars(MStr,MChars),
  789	unescape_std(MStr,S),
  790	unescape_string(S,"]","\\u005d",S1),
  791	atomics_to_string([Pfx,"[",S1,"]"],Is).
  792vm_instruction(extn(Ext), Is) :-
  793	(string(Ext)
  794	 -> Is = Ext                              % native string format
  795	 ;  (Ext = Mod:Pred
  796	     -> Pred =.. [Func,StringArg],        % module qualified predicate
  797	        atomics_to_string(['<',Mod,':',Func,' ',StringArg,'>'],Is)
  798	     ;  Ext =.. [Func,StringArg],         % plain predicate
  799	        atomics_to_string(['<',Func,' ',StringArg,'>'],Is)
  800	    )
  801	).
  802vm_instruction(trace(Rule), Is) :-
  803	vm_instruction(call_O(Rule), Is).
  804
  805vm_instruction_list([],[]).
  806vm_instruction_list([Exp|Exps],[Is|LIs]) :-
  807	vm_instruction(Exp,Is),
  808	vm_instruction_list(Exps,LIs).
  809
  810vm_rep_sfx(sfx(ROp), ROp).
  811vm_rep_sfx(num(StrN), ROp) :-                      atomics_to_string(["*",StrN],ROp).
  812vm_rep_sfx(range([num(StrN),_]), ROp) :-           atomics_to_string(["*",StrN,".."],ROp).
  813vm_rep_sfx(range([num(StrM),_,num(StrN)]), ROp) :- atomics_to_string(["*",StrM,"..",StrN],ROp).
  814
  815unescape_string(Sin,Esc,Usc,Sout) :-
  816	split_string(Sin,Esc,"",L),
  817	atomics_to_string(L,Usc,Sout).
  818
  819unescape_std(Sin,Sout) :-
  820	string_chars(Sin,CharsIn),
  821	escape_chars(CharsIn,CharsOut),
  822	string_chars(Sout,CharsOut).
  823
  824escape_chars([],[]).
  825escape_chars([C|CharsIn],[C|CharsOut]) :-
  826	char_code(C,CS), between(32,126,CS), !,     % ASCII
  827	escape_chars(CharsIn,CharsOut).
  828escape_chars([ECh|CharsIn],['\\',Ch|CharsOut]) :- 
  829	std_escape_(Ch,ECh),!,                      % escapes
  830	escape_chars(CharsIn,CharsOut).
  831escape_chars([C|CharsIn],['\\','u',X1,X2,X3,X4|CharsOut]) :-
  832	char_code(C,CS), % (CS =< 31 ; CS >= 127),  % outside ASCII, but not std escape
  833	divmod(CS,16,Q4,R4),
  834	divmod(Q4,16,Q3,R3),
  835	divmod(Q3,16,R1,R2),
  836	hex_value(X1,R1), hex_value(X2,R2), hex_value(X3,R3), hex_value(X4,R4),
  837	escape_chars(CharsIn,CharsOut).
  838
  839%
  840% optimizing compiler for use with peg_compile
  841% normally takes unoptimized ptree as input, but it's idempotent
  842% produces an optimized grammar object which is faster but not a ptree
  843%
  844optimize_peg('Peg'(Rules),'Peg'(RulesO,RRefs)) :-
  845	(optimize_rules(Rules,RDefs,RulesO)
  846	 -> once(length(RDefs,_)),         % make indefinite list definite
  847	    chk_RDefs(RulesO,RDefs,RRefs)  % must be done after optimize so as to not corrupt refs
  848	 ;  (Rules = [rule([id(GName),_])|_Rules] -> true ; GName = "?unknown?"),
  849	    print_message(warning,peg(optimize_fail(GName))),  % ensures failure msg               
  850	    fail
  851	 ).
  852
  853chk_RDefs([],RDefs,[]) :-
  854	forall(member(Name:_,RDefs), print_message(warning, peg(undefined(Name)))).
  855chk_RDefs([rule(PName,_)|Rules],RDefs,[_|RRefs]) :-
  856	memberchk(rule(PName,_),Rules), !,                % check for duplicates
  857	print_message(warning,peg(duplicate(PName))),     % found, later rules overwrite                
  858	chk_RDefs(Rules,RDefs,RRefs).
  859chk_RDefs([rule(PName,_)|Rules],RDefs,[RRef|RRefs]) :-
  860	atom_string(PName,Name),
  861	remove_def(RDefs,Name,RRef,NxtRDefs),
  862	chk_RDefs(Rules,NxtRDefs,RRefs).
  863
  864remove_def([],_Name,_RRef,[]).
  865%	print_message(warning, peg(unreferenced(Name))).
  866remove_def([Name:RRef|RDefs],Name,RRef,RDefs) :- !.
  867remove_def([RDef|RDefs],Name,RRef,[RDef|NxtRDefs]) :-
  868	remove_def(RDefs,Name,RRef,NxtRDefs).
  869
  870prolog:message(peg(duplicate(Name))) -->  % DCG
  871	[ "pPEG: duplicate rule ~w, last definition will apply" - [Name] ].
  872
  873prolog:message(peg(optimize_fail(GName))) -->  % DCG
  874	[ "pPEG: grammar ~w optimization failed" - [GName] ].
  875
  876optimize_rules([],_RDefs,[]).
  877optimize_rules([Rule|Rules],RDefs,[RuleO|RulesO]) :-
  878	optimize_rule(Rule,RDefs,RuleO),
  879	optimize_rules(Rules,RDefs,RulesO).
  880
  881optimize_rule(rule([id(Name),Exp]), RDefs, rule(PName,ExpO)) :- !, % unoptimized rule 
  882	atom_string(PName,Name),  % optimised rule name is atom for building ptrees
  883	optimize_exp(Exp, RDefs, ExpO).
  884optimize_rule(rule(Name,Exp), _RDefs, rule(Name,Exp)).  % already optimized?
  885
  886optimize_exp(id(Name), RDefs, call_O(Rule)) :-          % id(Name) ==> call_O(Rule)
  887	memberchk(Name:Rule, RDefs).
  888
  889optimize_exp(seq(Ins), RDefs, seq(Opt)) :-
  890	optimize_exp_list(Ins,RDefs,Opt).
  891
  892optimize_exp(alt(Ins), RDefs, alt(Opt)) :-
  893	optimize_exp_list(Ins,RDefs,Opt).
  894
  895optimize_exp(rep([Exp, ROp]), RDefs, rep_O(ExpO, Min, Max)) :-
  896	rep_counts(ROp,Min,Max), !,
  897	optimize_exp(Exp,RDefs,ExpO).
  898
  899optimize_exp(pre([pfx("~"), chs(MatchSet)]), RDefs, chs_O(notin,MChars)) :- !,
  900	optimize_exp(chs(MatchSet), RDefs, chs_O(_,MChars)).
  901optimize_exp(pre([pfx(POp), Exp]), RDefs, pre([pfx(POp), ExpO])) :-
  902	optimize_exp(Exp,RDefs,ExpO).
  903
  904optimize_exp(chs(MatchSet), _RDefs, chs_O(in,MChars)) :- 
  905	match_chars(MatchSet, MChars).
  906
  907optimize_exp(sq(QS), _RDefs, sq_O(Case,Match)) :-
  908	(sub_string(QS,_,1,0,"i")                  % case insensitive match test
  909	 -> Case = upper,
  910	    sub_string(QS,0,_,1,S),                % strip i
  911	    literal_match_(S,AMatch),              % string to match
  912	    string_upper(AMatch,Match)
  913	 ;  Case = exact,
  914	    literal_match_(QS,Match)               % string to match
  915	).
  916
  917optimize_exp(extn(S), _RDefs, extn(T)) :-      % extn "instruction"
  918	(string(S) -> extn_pred(S,T) ; T = S).
  919
  920optimize_exp(call_O(Rule), _RDefs, call_O(Rule)).                 % already optimized?
  921optimize_exp(rep_O(Exp, Min, Max), _RDefs, rep_O(Exp, Min, Max)). % already optimized?
  922optimize_exp(sq_O(C,M), _RDefs, sq_O(C,M)).                       % already optimized?
  923optimize_exp(chs_O(M), _RDefs, chs_O(M)).                         % already optimized?
  924% Note: trace instructions don't appear in static grammar.
  925
  926optimize_exp_list([],_RDefs,[]).
  927optimize_exp_list([Exp|Exps],RDefs,[ExpO|ExpOs]) :-
  928	optimize_exp(Exp,RDefs,ExpO),
  929	optimize_exp_list(Exps,RDefs,ExpOs).
  930
  931%
  932% time to initialize...
  933%
  934:- initialization(init_peg,now).