1:- module(pac_regex, [parse_regex/2,
2 regex_unix_macro/2]). 3
5
6regex_unix_macro($, @( $ )).
7regex_unix_macro(^, []).
8
15
16 24
58parse_regex(X, Y) :- string_chars(X, X0),
59 once(parse_regex([], [], [], Y0, X0, [])),
60 Y0 = [Y].
62parse_regex(A, B, A0, B0) --> token(T), !,
63 { once(push_pop_stacks(T, A, B, A1, B1)) },
64 parse_regex(A1, B1, A0, B0).
65parse_regex(A, B, A0, B0) --> 66 { close_block(A, B, A0, B1),
67 fold_block_reversely(B1, B0) }.
68
70push_pop_stacks(')', A, X, B, Y) :- close_block(A, X, B, Z), 71 fold_block_reversely(Z, Y).
72push_pop_stacks('(', A, X, ['('|A], ['('|X]). 73push_pop_stacks({}(I), A, [T|X], A, [T^I|X]). 74push_pop_stacks(F, A, X, A, Y) :- unary_opr(F),
75 apply_unary_opr(F, X, Y).
76push_pop_stacks(F, A, X, [F|B], Y) :- binary_opr(F),
77 once(sweep_higher_opr(F, A, X, B, Y)).
78push_pop_stacks(T, A, X, A, [T|X]).
79
81token('C'([C])) --> [(\)], [C0], {char_code(C0, C)}. 82token(X) --> [ A ], { regex_unix_macro(A, X) }.
84token(T) --> [T], {memberchk(T, ['(',')', '*', '+', '|', '.', '?'])}.
85token(out(D)) --> ['[', ^], char_class(D0, []), {chars_interval(D0, D)}.
86token(dot(D)) --> ['['], char_class(D0, []), {chars_interval(D0, D)}.
87token({}(I)) --> ['{'], repeat_spec(I).
88token('C'([C])) --> [C0], {char_code(C0, C)}.
89
91sweep_higher_opr(_, [], X, [], X).
92sweep_higher_opr(_, ['('|X], Y, ['('|X], Y).
93sweep_higher_opr(F, [G|A], X, B, Y):- higher_priority(G, F),
94 apply_binary_opr(G, X, Z),
95 sweep_higher_opr(F, A, Z, B, Y).
96sweep_higher_opr(F, A, X, B, Y):- push_pop_stacks(F, A, X, B, Y).
97
99close_block([], X, [], X).
100close_block(['('|A], X, A, X).
101close_block([F|A], X, B, Y):-
102 apply_binary_opr(F, X, Z),
103 close_block(A, Z, B, Y).
104
106fold_block_reversely([X|Y], [Z|U]):- fold_block_reversely(X, Y, Z, U).
107fold_block_reversely([], []).
108
110fold_block_reversely(X, [], X, []).
111fold_block_reversely(X, ['('|Y], X, Y).
112fold_block_reversely(X, [Y|Z], U, V):- fold_block_reversely(Y+X, Z, U, V).
113
115unary_opr(*).
116unary_opr(?).
118unary_opr(+).
119
121binary_opr(&).
122binary_opr('|').
123
125higher_priority(&, &).
126higher_priority(&, '|').
127higher_priority('|', '|').
128
130apply_unary_opr(*, [X|Y], [*(X)|Y]).
131apply_unary_opr(+, [X|Y], [+(X)|Y]).
132apply_unary_opr(?, [X|Y], [?(X)|Y]).
134
136apply_binary_opr('|', [X, Y|Z], ['|'(Y, X)|Z]).
137apply_binary_opr('&', [X, Y|Z], ['&'(Y, X)|Z]).
138
143
144repeat_spec(X) --> number_chars(J0), [','], { J0\==[] },
145 number_chars(K0), ['}'],
146 { number_chars(J, J0),
147 ( K0 \== []
148 -> number_chars(K, K0),
149 X = J-K
150 ; X = (>=(J))
151 )
152 }.
153
155number_chars([D|Ds]) --> [D], {char_type(D, digit)},
156 number_chars(Ds).
157number_chars([]) --> [].
158
160char_class([\(C)|Y], Z) --> [\], [C], char_class(Y,Z).
161char_class([A|X], Y) --> [A, A], char_class(X, Y). 162char_class(X, X) --> [']'].
163char_class([C|Y], Z) --> [C], char_class(Y,Z).
164
166chars_interval([],[]).
167chars_interval([X, -, Y|R], [C - D|S]):- drop_escape(X, X0),
168 drop_escape(Y, Y0),
169 char_code(X0, C),
170 char_code(Y0, D),
171 chars_interval(R, S).
172chars_interval([X|R], [C|S]):- drop_escape(X, X0),
173 char_code(X0, C),
174 chars_interval(R, S).
175
177drop_escape(\(X), X).
178drop_escape(X, X)