2:- module(transform,[preprocess/6,
3 topsem/2, 4 topatt/2, 5 topstr/2, 6 topcat/2]). 7
8:- use_module(library(lists),[append/3]). 9:- use_module(semlib(options),[option/2]). 10:- use_module(semlib(errors),[error/2,warning/2]). 11:- use_module(boxer(morpha),[morpha/2]). 12:- use_module(boxer(slashes)). 13
14
18
19preprocess(SID,X,Y,Tags,Start,End):-
20 setTokID(SID,Start,TokID),
21 trans(X,TokID,Y,End,Tags), !.
22
23preprocess(SID,_,_,_,_,_):-
24 error('unable to preprocess derivation ~p',[SID]), !, fail.
25
26
27/* -------------------------------------------------------------------------
28 Funny (C&C wrongly analysed cases of N coordination)
29------------------------------------------------------------------------- */
30
31trans(fa(n,X,funny(n,Conj,fa(n,Y,Z))),N1,X2,N3,Tags):- !,
32 trans(fa(n,ba(n/n,X,conj((n/n)\(n/n),n/n,Conj,Y)),Z),N1,X2,N3,Tags).
33
34trans(funny(_,_,X1),N1,X2,N3,Tags):- !,
35 warning('the funny combinatory rule causes skipping token ~p',[N1]),
36 N2 is N1 + 1,
37 trans(X1,N2,X2,N3,Tags)
37.
38
39
40/* -------------------------------------------------------------------------
41 Punctuation typechange rules
42------------------------------------------------------------------------- */
43
44trans(rtc(C,X1,Pu1),N1,ba(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):-
45 Pu1 =.. [t,_|Cs], !,
46 trans(X1,N1,X2,N2,Tags1-Tags2),
47 topcat(X2,Cat),
48 topatt(X2,Att),
49 Pu2 =.. [t,C\Cat|Cs],
50 trans(Pu2,N2,X3,N3,Tags2-Tags3),
51 strings(X2,X3,Str).
52
53trans(ltc(C,Pu1,X1),N1,fa(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):-
54 trans(Pu1,N1,Pu2,N2,Tags1-Tags2),
55 Pu2 = t(_,Tok2,Sem2,Att2,I), !,
56 trans(X1,N2,X3,N3,Tags2-Tags3),
57 topcat(X3,Cat),
58 topatt(X3,Att),
59 X2 = t(C/Cat,Tok2,Sem2,Att2,I),
60 strings(X3,X2,Str).
61
62
63/* -------------------------------------------------------------------------
64 Punctuation rules
65------------------------------------------------------------------------- */
66
67trans(rp(Cat,X1,Y0),N1,X2,N3,Tags):-
68 Y0 =.. [t,_|L], !, Y1 =.. [t,Cat\Cat|L],
69 trans(ba(Cat,X1,Y1),N1,X2,N3,Tags).
70
71trans(lp(Cat,X0,Y1),N1,X2,N3,Tags):-
72 X0 =.. [t,_|L], !, X1 =.. [t,Cat/Cat|L],
73 trans(fa(Cat,X1,Y1),N1,X2,N3,Tags).
74
75
79
80trans(fa(_,X1,Y1), N1, fa(C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
81 trans(X1,N1,X2,N2,Tags1-Tags2),
82 topcat(X2,C1/C2),
83 trans(Y1,N2,Y2,N3,Tags2-Tags3),
84 topcat(Y2,C2),
85 strings(X2,Y2,Str),
86 headAtt(X2,Y2,Att).
87
88trans(ba(_,X1,Y1), N1, ba(C2,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
89 trans(X1,N1,X2,N2,Tags1-Tags2),
90 topcat(X2,C1),
91 trans(Y1,N2,Y2,N3,Tags2-Tags3),
92 topcat(Y2,C2\C1),
93 strings(X2,Y2,Str),
94 headAtt(Y2,X2,Att).
95
96
100
101trans(fc(_,X1,Y1), N1, fc(C1/C3,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
102 trans(X1,N1,X2,N2,Tags1-Tags2),
103 trans(Y1,N2,Y2,N3,Tags2-Tags3),
104 topcat(X2,C1/C2),
105 topcat(Y2,C2/C3),
106 strings(X2,Y2,Str),
107 headAtt(X2,Y2,Att).
108
109trans(bc(_,X1,Y1), N1, bc(C3\C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
110 trans(X1,N1,X2,N2,Tags1-Tags2),
111 trans(Y1,N2,Y2,N3,Tags2-Tags3),
112 topcat(X2,C2\C1),
113 topcat(Y2,C3\C2),
114 strings(X2,Y2,Str),
115 headAtt(Y2,X2,Att).
116
120
121trans(gfc(C,N,X1,Y1), N1, gfc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
122 trans(X1,N1,X2,N2,Tags1-Tags2),
123 trans(Y1,N2,Y2,N3,Tags2-Tags3),
124 strings(X2,Y2,Str),
125 headAtt(X2,Y2,Att).
126
127trans(gbc(C,N,X1,Y1), N1, gbc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
128 trans(X1,N1,X2,N2,Tags1-Tags2),
129 trans(Y1,N2,Y2,N3,Tags2-Tags3),
130 strings(X2,Y2,Str),
131 headAtt(Y2,X2,Att).
132
133
134/* -------------------------------------------------------------------------
135 Crossed Composition
136------------------------------------------------------------------------- */
137
138trans(bxc(_,X1,Y1), N1, bxc(C3/C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
139 trans(X1,N1,X2,N2,Tags1-Tags2),
140 trans(Y1,N2,Y2,N3,Tags2-Tags3),
141 topcat(X2,C2/C1),
142 topcat(Y2,C3\C2),
143 strings(X2,Y2,Str),
144 headAtt(X2,Y2,Att).
145
146trans(fxc(C,X1,Y1), N1, fxc(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
147 trans(X1,N1,X2,N2,Tags1-Tags2),
148 trans(Y1,N2,Y2,N3,Tags2-Tags3),
149 strings(X2,Y2,Str),
150 headAtt(Y2,X2,Att).
151
152
156
157trans(gfxc(C,N,X1,Y1), N1, gfxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
158 trans(X1,N1,X2,N2,Tags1-Tags2),
159 trans(Y1,N2,Y2,N3,Tags2-Tags3),
160 strings(X2,Y2,Str),
161 headAtt(X2,Y2,Att).
162
163trans(gbxc(C,N,X1,Y1), N1, gbxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
164 trans(X1,N1,X2,N2,Tags1-Tags2),
165 trans(Y1,N2,Y2,N3,Tags2-Tags3),
166 strings(X2,Y2,Str),
167 headAtt(Y2,X2,Att).
168
169
170/* -------------------------------------------------------------------------
171 Conjuction (Coordination)
172------------------------------------------------------------------------- */
173
174trans(conj(C\C,X,Y),N1,Cat,N2,Tags):-
175 trans(conj(C\C,C,X,Y),N1,Cat,N2,Tags).
176
177%trans(conj(np:nb\np:nb,np:nb,X1,Y1), N1, conj(np\np,np,nil,Att,X2,Y2), N3, Tags1-Tags3):-
178% X1 =.. [t,conj|Cs], !,
179% X3 =.. [t,conj:np|Cs],
180% trans(X3,N1,X2,N2,Tags1-Tags2),
181% trans(Y1,N2,Y2,N3,Tags2-Tags3),
182% topatt(Y2,Att).
183
184trans(conj(np\np,np,X1,Y1), N1, conj(np\np,np,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):-
185 X1 =.. [t,comma |Cs], !, % replace apposition comma
186 X3 =.. [t,conj:app|Cs], % by category conj:app
187 trans(X3,N1,X2,N2,Tags1-Tags2),
188 trans(Y1,N2,Y2,N3,Tags2-Tags3),
189 strings(X2,Y2,Str),
190 topatt(Y2,Att).
191
192trans(conj(Cat\Cat,Cat,X1,Y1), N1, conj(NewCat\NewCat,NewCat,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
193 adjustFeatures(Cat,NewCat),
194 trans(X1,N1,X2,N2,Tags1-Tags2),
195 X2 =.. [_,conj:NewCat|_],
196 trans(Y1,N2,Y2,N3,Tags2-Tags3),
197 strings(X2,Y2,Str),
198 topatt(Y2,Att).
199
200trans(conj(_C1,_C2,Y1,Z1), N1, conj(C\C,C,nil,Att,Str,Y2,Z2), N3, Tags1-Tags3):- !,
201 trans(Y1,N1,Y2,N2,Tags1-Tags2),
202 trans(Z1,N2,Z2,N3,Tags2-Tags3),
203 strings(Y2,Z2,Str),
204 topcat(Z2,C),
205 topatt(Z2,Att).
206
207trans(coord(_C,X1,Y1,Z1), N1, coord(C,nil,Att,Str,X2,Y2,Z2), N4, Tags1-Tags4):- !,
208 trans(X1,N1,X2,N2,Tags1-Tags2),
209 trans(Y1,N2,Y2,N3,Tags2-Tags3),
210 trans(Z1,N3,Z2,N4,Tags3-Tags4),
211 topstr(X2,S1),
212 topstr(Y2,S2), append(S1,S2,S3),
213 topstr(Z2,S4), append(S3,S4,Str),
214 topcat(Z2,C),
215 topatt(Z2,Att).
216
217
221
222trans(lx(C,D,X),N1,T,N2,Tags):- !, trans(tc(C,D,X),N1,T,N2,Tags).
223
224trans(tc(C1,_,X1), N1, tc(C3,C2,nil,Att,Str,X2), N2, Tags):- !,
225 adjustFeatures(C1,C3),
226 trans(X1,N1,X2,N2,Tags),
227 topcat(X2,C2),
228 topstr(X2,Str),
229 topatt(X2,Att).
230
231
232/* -------------------------------------------------------------------------
233 Unary Rules: Type Raising
234------------------------------------------------------------------------- */
235
236trans(tr(C1/(C1\C2),X1), N1, ftr(C1/(C1\C2),C2,nil,Att,Str,X2), N2, Tags):- !,
237 trans(X1,N1,X2,N2,Tags),
238 topcat(X2,C2),
239 topstr(X2,Str),
240 topatt(X2,Att).
241
242trans(tr(C1\(C1/C2),X1), N1, btr(C1\(C1/C2),C2,nil,Att,Str,X2), N2, Tags):- !,
243 trans(X1,N1,X2,N2,Tags),
244 topcat(X2,C2),
245 topstr(X2,Str),
246 topatt(X2,Att).
247
248
252
253trans(fs(C,X1,Y1), N1, fs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
254 trans(X1,N1,X2,N2,Tags1-Tags2),
255 trans(Y1,N2,Y2,N3,Tags2-Tags3),
256 strings(X2,Y2,Str),
257 headAtt(X2,Y2,Att).
258
259trans(bs(C,X1,Y1), N1, bs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
260 trans(X1,N1,X2,N2,Tags1-Tags2),
261 trans(Y1,N2,Y2,N3,Tags2-Tags3),
262 strings(X2,Y2,Str),
263 headAtt(Y2,X2,Att).
264
265trans(fxs(C,X1,Y1), N1, fxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
266 trans(X1,N1,X2,N2,Tags1-Tags2),
267 trans(Y1,N2,Y2,N3,Tags2-Tags3),
268 strings(X2,Y2,Str),
269 headAtt(X2,Y2,Att).
270
271trans(bxs(C,X1,Y1), N1, bxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
272 trans(X1,N1,X2,N2,Tags1-Tags2),
273 trans(Y1,N2,Y2,N3,Tags2-Tags3),
274 strings(X2,Y2,Str),
275 headAtt(Y2,X2,Att).
276
277
281
282trans(t(A,B,people,C,D,E),N,Tok,M,Tags):-
283 option('--x',true), !, 284 trans(t(A,B,person,C,D,E),N,Tok,M,Tags).
285
286trans(t(A,B,C,'NNP',D,E),N,Tok,M,Tags):-
287 option('--x',true), !, 288 trans(t(A,B,C,'NN',D,E),N,Tok,M,Tags).
289
290trans(t(Cat,'\'t',_,'VB',S,Ne),N,Tok,M,Tags):- !,
291 trans(t(Cat,'\'t','RB',[lemma:not,sense:S,namex:Ne]),N,Tok,M,Tags).
292
293trans(t(Cat,'\'t',_,'VB',Att),N,Tok,M,Tags):- !,
294 trans(t(Cat,'\'t','RB',Att),N,Tok,M,Tags).
295
296trans(t(A,B,please,'VB',D,E),N,Tok,M,Tags):- !,
297 trans(t(A,B,please,'RB',D,E),N,Tok,M,Tags).
298
299
303
304trans(t(Cat,Token,Pos),N,Tok,M,Tags):- !,
305 trans(t(Cat,Token,Pos,[]),N,Tok,M,Tags).
306
307trans(t(Cat,Token,Lem,Pos,S1,Ne),N,Tok,M,Tags):- !,
308 context(S1,S2),
309 trans(t(Cat,Token,Pos,[lemma:Lem,namex:Ne|S2]),N,Tok,M,Tags).
310
311
315
316trans(t(Cat1,Tok,Pos,Tags),N,t(Cat2,Tok,nil,RevTags,N),M,T1-T2):-
317 adjustFeatures(Cat1,Cat2),
318 tags(T2,N,[tok:Tok,pos:Pos|Tags],T1),
319 morpha([pos:Pos|Tags],RevTags),
320 M is N + 1.
321
328
329
333
334strings(D1,D2,W):- topstr(D1,W1), topstr(D2,W2), append(W1,W2,W).
335
336
340
341context(Number,Sense):- number(Number), !, Sense = [sense:Number].
342context(_,Sense):- Sense = [].
343
344
348
349featureN('NNPS', nam):- !.
350featureN('NNP', nam):- !.
351featureN('CD', num):- !.
352featureN(_, nom).
353
354
355
359
360adjustFeatures(conj/conj, conj:X/conj:X):- !.
361
362adjustFeatures(conj, conj:_):- !.
363
364adjustFeatures(comma, conj:_):- !.
365
366adjustFeatures(semi, conj:_):- !.
367
368adjustFeatures(s, s:_):- !. %%% bug in C&C parser
369
370adjustFeatures(s\np, s:dcl\np):- !. 371
372adjustFeatures(s/s:X, s:X/s:X). 373adjustFeatures(s/s:X, s:_/s:X):- !. %%% bug in C&C parser
374
375adjustFeatures( (((s:Y\np)\(s:Y\np))\((s\np)\(s\np)))/((s\np)\(s\np)),Cat):- !,
376 Cat = (((s:Y\np)\(s:Y\np))\((s:X\np)\(s:X\np)))/((s:Z\np)\(s:Z\np)).
377
378adjustFeatures(np:_, np):- !.
379
380adjustFeatures(n:_, n):- !.
381
382
386
387adjustFeatures(F1/A1,F2/A2):- !,
388 adjustFeatures(F1,F2),
389 adjustFeatures(A1,A2).
390
391adjustFeatures(F1\A1,F2\A2):- !,
392 adjustFeatures(F1,F2),
393 adjustFeatures(A1,A2).
394
395adjustFeatures(Cat,Cat).
396
397
401
402tags(T,ID,Tags,[ID:Tags|T]).
403
404
408
409topcat(Der,Cat):- top(Der,Cat,_,_,_), !.
410topsem(Der,Sem):- top(Der,_,Sem,_,_), !.
411topatt(Der,Att):- top(Der,_,_,Att,_), !.
412topstr(Der,Str):- top(Der,_,_,_,Str), !.
413
414
418
419top(fa(C,S,A,W,_,_),C,S,A,W).
420top(ba(C,S,A,W,_,_),C,S,A,W).
421top(fc(C,S,A,W,_,_),C,S,A,W).
422top(bc(C,S,A,W,_,_),C,S,A,W).
423top(fxc(C,S,A,W,_,_),C,S,A,W).
424top(bxc(C,S,A,W,_,_),C,S,A,W).
425top(fs(C,S,A,W,_,_),C,S,A,W).
426top(bs(C,S,A,W,_,_),C,S,A,W).
427top(fxs(C,S,A,W,_,_),C,S,A,W).
428top(bxs(C,S,A,W,_,_),C,S,A,W).
429top(gfc(C,_,S,A,W,_,_),C,S,A,W).
430top(gbc(C,_,S,A,W,_,_),C,S,A,W).
431top(gfxc(C,_,S,A,W,_,_),C,S,A,W).
432top(gbxc(C,_,S,A,W,_,_),C,S,A,W).
433top(gfc(C,S,A,W,_,_),C,S,A,W).
434top(gbc(C,S,A,W,_,_),C,S,A,W).
435top(gfxc(C,S,A,W,_,_),C,S,A,W).
436top(gbxc(C,S,A,W,_,_),C,S,A,W).
437top(ftr(C,_,S,A,W,_),C,S,A,W).
438top(btr(C,_,S,A,W,_),C,S,A,W).
439top(tc(C,_,S,A,W,_),C,S,A,W).
440top(lx(C,S,A,W,_,_),C,S,A,W).
441top(t(C,W,S,A,_),C,S,A,[W]).
442top(conj(C,_,S,A,W,_,_),C,S,A,W).
443top(coord(C,S,A,W,_,_,_),C,S,A,W).
444
445
449
450headAtt(D1,D2,Att):- topcat(D1,C1/C2), C1==C2, !, topatt(D2,Att).
451headAtt(D1,D2,Att):- topcat(D1,C1\C2), C1==C2, !, topatt(D2,Att).
452headAtt(D0,_ ,Att):- topatt(D0,Att), !.
453
454
458
459setTokID(_,Start,Start):-
460 option('--tokid',global), !.
461
462setTokID(SID,_,Start):-
463 option('--tokid',local), !,
464 Start is (SID*1000)+1