1:- module(regex_parser, [re//2]). 2:- use_module(library(dcg/basics), [integer//1, string//1]). 3:- use_module(library(regex/state), [adjust_case/3]). 4
5:- set_prolog_flag(double_quotes, string). 6
8re(Opt, Z) -->
9 basic_re(Opt,W),
10 re_tail(Opt,W,Z).
11
12
13re_tail(Opt, W, Z) -->
14 "|",
15 basic_re(Opt,X),
16 re_tail(Opt,union(W,X), Z).
17re_tail(_Opt, W, W) -->
18 { true }.
19
20
21basic_re(Opt, Z) -->
22 simple_re(Opt,W),
23 basic_re_tail(Opt,W,Z).
24
25basic_re_tail(Opt, W, Z) -->
26 simple_re(Opt,X),
27 basic_re_tail(Opt,conc(W,X), Z).
28basic_re_tail(_Opt, W, W) -->
29 { true }.
30
31
32simple_re(Opt, Z) -->
33 elemental_re(Opt,W),
34 simple_re_tail(Opt,W,Z).
35
36simple_re_tail(_Opt, W, count(W,0,999_999_999)) -->
37 "*".
38simple_re_tail(_Opt, W, count(W,1,999_999_999)) -->
39 "+".
40simple_re_tail(_Opt, W, count(W,0,1)) -->
41 "?".
42simple_re_tail(_Opt, W, count(W,N,N)) -->
43 44 "{",
45 integer(N),
46 { N >= 0 },
47 "}".
48simple_re_tail(_Opt, W, count(W,N,999_999_999)) -->
49 50 "{",
51 integer(N),
52 { N >= 0 },
53 ",",
54 "}".
55simple_re_tail(_Opt, W, count(W,N,M)) -->
56 57 "{",
58 integer(N),
59 { N >= 0 },
60 ",",
61 integer(M),
62 { M >= N },
63 "}".
64simple_re_tail(_Opt, W, W) -->
65 { true }.
66
67
68elemental_re(_Opt, any) -->
69 ".".
72elemental_re(Opt, group(X)) -->
73 "(",
74 re(Opt, X),
75 ")".
76elemental_re(Opt, named_group(Name, X)) -->
77 "(?<",
78 string(NameCodes),
79 { atom_codes(Name, NameCodes) },
80 ">",
81 re(Opt, X),
82 ")".
83elemental_re(_Opt, eos) -->
84 "$".
85elemental_re(State, char(C)) -->
86 [C0],
87 { \+ re_metachar(C0) },
88 { adjust_case(State, C0, C) }.
89elemental_re(Opt, RE) -->
90 "\\",
91 [C],
92 { perl_character_class(C, Opt, RE) }.
93elemental_re(_Opt, char(C)) -->
94 "\\",
95 [C],
96 { re_metachar(C) }.
97elemental_re(Opt, neg_set(X)) -->
98 "[^",
99 !, 100 set_items(Opt,X),
101 "]".
102elemental_re(Opt, pos_set([char(0'-)|X])) -->
103 "[-",
104 !, 105 set_items(Opt,X),
106 "]".
107elemental_re(Opt, pos_set(X)) -->
108 "[",
109 set_items(Opt,X),
110 "]".
111elemental_re(Opt, pos_set([char(0'-)|X])) -->
112 "[",
113 set_items(Opt,X),
114 "-]".
115
116
118re_metachar(0'^).
119re_metachar(0'\\).
120re_metachar(0'|).
121re_metachar(0'*).
122re_metachar(0'+).
123re_metachar(0'.).
124re_metachar(0'?).
125re_metachar(0'[).
126re_metachar(0'$).
127re_metachar(0'().
128re_metachar(0')).
129
130
132perl_character_class(0'd, Opt, pos_set(X)) :-
133 string_codes("0-9", Codes),
134 set_items(Opt, X,Codes,[]).
135perl_character_class(0'w, Opt, pos_set(X)) :-
136 string_codes("0-9A-Za-z_", Codes),
137 set_items(Opt, X,Codes,[]).
138perl_character_class(0's, _Opt, pos_set([ char(0'\t) 139 , char(0'\n) 140 , char(0'\f) 141 , char(0'\r) 142 , char(0' ) 143 ])).
144perl_character_class(Upper, Opt, neg_set(Set)) :-
145 code_type(Lower, lower(Upper)),
146 perl_character_class(Lower, Opt, pos_set(Set)).
147
148
149set_items(Opt, [Item1|MoreItems]) -->
150 set_item(Opt, Item1),
151 set_items(Opt, MoreItems).
152set_items(Opt, [Item1]) -->
153 set_item(Opt, Item1).
154
155set_item(State, char(C)) -->
156 [C0],
157 { \+ set_metachar(C0) },
158 { adjust_case(State,C0,C) }.
159set_item(_Opt, char(C)) -->
160 "\\",
161 [C],
162 { set_metachar(C) }.
163set_item(Opt, range(A,B)) -->
164 set_item(Opt, char(A)),
165 "-",
166 set_item(Opt, char(B)).
167
168
169set_metachar(0'\\).
170set_metachar(0']).
171set_metachar(0'-)