2:- module(transform,[preprocess/6,
    3                     topsem/2,      % +Der, -Sem 
    4                     topatt/2,      % +Der, -Attributes
    5                     topstr/2,      % +Der, -String
    6                     topcat/2]).    % +Der, -Category
    7
    8:- use_module(library(lists),[append/3]).    9:- use_module(semlib(options),[option/2]).   10:- use_module(semlib(errors),[error/2,warning/2]).   11:- use_module(boxer(morpha),[morpha/2]).   12:- use_module(boxer(slashes)).   13
   14
   15/* -------------------------------------------------------------------------
   16   Pre-Processing of CCG derivation to ensure correct format
   17------------------------------------------------------------------------- */
   18
   19preprocess(SID,X,Y,Tags,Start,End):-
   20   setTokID(SID,Start,TokID),
   21   trans(X,TokID,Y,End,Tags), !.
   22
   23preprocess(SID,_,_,_,_,_):-
   24   error('unable to preprocess derivation ~p',[SID]), !, fail.
   25
   26
   27/* -------------------------------------------------------------------------
   28   Funny (C&C wrongly analysed cases of N coordination)
   29------------------------------------------------------------------------- */
   30
   31trans(fa(n,X,funny(n,Conj,fa(n,Y,Z))),N1,X2,N3,Tags):- !,
   32   trans(fa(n,ba(n/n,X,conj((n/n)\(n/n),n/n,Conj,Y)),Z),N1,X2,N3,Tags).
   33   
   34trans(funny(_,_,X1),N1,X2,N3,Tags):- !,
   35   warning('the funny combinatory rule causes skipping token ~p',[N1]),
   36   N2 is N1 + 1,
   37   trans(X1,N2,X2,N3,Tags)
   37.
   38
   39
   40/* -------------------------------------------------------------------------
   41   Punctuation typechange rules
   42------------------------------------------------------------------------- */
   43
   44trans(rtc(C,X1,Pu1),N1,ba(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):- 
   45   Pu1 =.. [t,_|Cs], !,
   46   trans(X1,N1,X2,N2,Tags1-Tags2),
   47   topcat(X2,Cat),
   48   topatt(X2,Att),
   49   Pu2 =.. [t,C\Cat|Cs],
   50   trans(Pu2,N2,X3,N3,Tags2-Tags3),
   51   strings(X2,X3,Str).
   52
   53trans(ltc(C,Pu1,X1),N1,fa(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):-
   54   trans(Pu1,N1,Pu2,N2,Tags1-Tags2),
   55   Pu2 = t(_,Tok2,Sem2,Att2,I), !,
   56   trans(X1,N2,X3,N3,Tags2-Tags3),
   57   topcat(X3,Cat),
   58   topatt(X3,Att),
   59   X2 = t(C/Cat,Tok2,Sem2,Att2,I),
   60   strings(X3,X2,Str).
   61
   62
   63/* -------------------------------------------------------------------------
   64   Punctuation rules
   65------------------------------------------------------------------------- */
   66
   67trans(rp(Cat,X1,Y0),N1,X2,N3,Tags):-
   68   Y0 =.. [t,_|L], !, Y1 =.. [t,Cat\Cat|L],
   69   trans(ba(Cat,X1,Y1),N1,X2,N3,Tags).
   70
   71trans(lp(Cat,X0,Y1),N1,X2,N3,Tags):-
   72   X0 =.. [t,_|L], !, X1 =.. [t,Cat/Cat|L],
   73   trans(fa(Cat,X1,Y1),N1,X2,N3,Tags).
   74
   75
   76/* -------------------------------------------------------------------------
   77   Application
   78------------------------------------------------------------------------- */
   79
   80trans(fa(_,X1,Y1),  N1,  fa(C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
   81   trans(X1,N1,X2,N2,Tags1-Tags2), 
   82   topcat(X2,C1/C2),
   83   trans(Y1,N2,Y2,N3,Tags2-Tags3),
   84   topcat(Y2,C2),
   85   strings(X2,Y2,Str),
   86   headAtt(X2,Y2,Att).
   87
   88trans(ba(_,X1,Y1),  N1,  ba(C2,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
   89   trans(X1,N1,X2,N2,Tags1-Tags2), 
   90   topcat(X2,C1),
   91   trans(Y1,N2,Y2,N3,Tags2-Tags3),
   92   topcat(Y2,C2\C1),
   93   strings(X2,Y2,Str),
   94   headAtt(Y2,X2,Att).
   95
   96
   97/* -------------------------------------------------------------------------
   98   Composition
   99------------------------------------------------------------------------- */
  100
  101trans(fc(_,X1,Y1),  N1,  fc(C1/C3,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  102   trans(X1,N1,X2,N2,Tags1-Tags2), 
  103   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  104   topcat(X2,C1/C2),
  105   topcat(Y2,C2/C3),
  106   strings(X2,Y2,Str),
  107   headAtt(X2,Y2,Att).
  108
  109trans(bc(_,X1,Y1),  N1,  bc(C3\C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  110   trans(X1,N1,X2,N2,Tags1-Tags2), 
  111   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  112   topcat(X2,C2\C1),
  113   topcat(Y2,C3\C2),
  114   strings(X2,Y2,Str),  
  115   headAtt(Y2,X2,Att).
  116
  117/* -------------------------------------------------------------------------
  118   Generalised Composition
  119------------------------------------------------------------------------- */
  120
  121trans(gfc(C,N,X1,Y1),  N1,  gfc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  122   trans(X1,N1,X2,N2,Tags1-Tags2), 
  123   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  124   strings(X2,Y2,Str),  
  125   headAtt(X2,Y2,Att).
  126
  127trans(gbc(C,N,X1,Y1),  N1,  gbc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  128   trans(X1,N1,X2,N2,Tags1-Tags2), 
  129   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  130   strings(X2,Y2,Str),  
  131   headAtt(Y2,X2,Att).
  132
  133
  134/* -------------------------------------------------------------------------
  135   Crossed Composition
  136------------------------------------------------------------------------- */
  137
  138trans(bxc(_,X1,Y1),  N1,  bxc(C3/C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  139   trans(X1,N1,X2,N2,Tags1-Tags2), 
  140   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  141   topcat(X2,C2/C1),
  142   topcat(Y2,C3\C2),
  143   strings(X2,Y2,Str),  
  144   headAtt(X2,Y2,Att).
  145
  146trans(fxc(C,X1,Y1), N1, fxc(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  147   trans(X1,N1,X2,N2,Tags1-Tags2), 
  148   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  149   strings(X2,Y2,Str),  
  150   headAtt(Y2,X2,Att).
  151
  152
  153/* -------------------------------------------------------------------------
  154   Generalised Crossed Composition
  155------------------------------------------------------------------------- */
  156
  157trans(gfxc(C,N,X1,Y1), N1, gfxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  158   trans(X1,N1,X2,N2,Tags1-Tags2), 
  159   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  160   strings(X2,Y2,Str),  
  161   headAtt(X2,Y2,Att).
  162
  163trans(gbxc(C,N,X1,Y1), N1, gbxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  164   trans(X1,N1,X2,N2,Tags1-Tags2), 
  165   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  166   strings(X2,Y2,Str),  
  167   headAtt(Y2,X2,Att).
  168
  169
  170/* -------------------------------------------------------------------------
  171   Conjuction (Coordination)
  172------------------------------------------------------------------------- */
  173
  174trans(conj(C\C,X,Y),N1,Cat,N2,Tags):- 
  175   trans(conj(C\C,C,X,Y),N1,Cat,N2,Tags).
  176
  177%trans(conj(np:nb\np:nb,np:nb,X1,Y1), N1, conj(np\np,np,nil,Att,X2,Y2), N3, Tags1-Tags3):- 
  178%   X1 =.. [t,conj|Cs], !,
  179%   X3 =.. [t,conj:np|Cs], 
  180%   trans(X3,N1,X2,N2,Tags1-Tags2), 
  181%   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  182%   topatt(Y2,Att).
  183
  184trans(conj(np\np,np,X1,Y1), N1, conj(np\np,np,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- 
  185   X1 =.. [t,comma   |Cs], !,       % replace apposition comma 
  186   X3 =.. [t,conj:app|Cs],          % by category conj:app
  187   trans(X3,N1,X2,N2,Tags1-Tags2), 
  188   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  189   strings(X2,Y2,Str),
  190   topatt(Y2,Att).
  191
  192trans(conj(Cat\Cat,Cat,X1,Y1), N1, conj(NewCat\NewCat,NewCat,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !,
  193   adjustFeatures(Cat,NewCat),
  194   trans(X1,N1,X2,N2,Tags1-Tags2), 
  195   X2 =.. [_,conj:NewCat|_], 
  196   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  197   strings(X2,Y2,Str),
  198   topatt(Y2,Att).
  199
  200trans(conj(_C1,_C2,Y1,Z1), N1, conj(C\C,C,nil,Att,Str,Y2,Z2), N3, Tags1-Tags3):- !, 
  201   trans(Y1,N1,Y2,N2,Tags1-Tags2), 
  202   trans(Z1,N2,Z2,N3,Tags2-Tags3),
  203   strings(Y2,Z2,Str),
  204   topcat(Z2,C),
  205   topatt(Z2,Att).
  206
  207trans(coord(_C,X1,Y1,Z1), N1, coord(C,nil,Att,Str,X2,Y2,Z2), N4, Tags1-Tags4):- !, 
  208   trans(X1,N1,X2,N2,Tags1-Tags2), 
  209   trans(Y1,N2,Y2,N3,Tags2-Tags3), 
  210   trans(Z1,N3,Z2,N4,Tags3-Tags4),
  211   topstr(X2,S1),
  212   topstr(Y2,S2), append(S1,S2,S3),
  213   topstr(Z2,S4), append(S3,S4,Str),
  214   topcat(Z2,C),
  215   topatt(Z2,Att).
  216
  217
  218/* -------------------------------------------------------------------------
  219   Unary Rules: Type Changing
  220------------------------------------------------------------------------- */
  221
  222trans(lx(C,D,X),N1,T,N2,Tags):- !, trans(tc(C,D,X),N1,T,N2,Tags).
  223
  224trans(tc(C1,_,X1), N1, tc(C3,C2,nil,Att,Str,X2), N2, Tags):- !, 
  225   adjustFeatures(C1,C3),
  226   trans(X1,N1,X2,N2,Tags),
  227   topcat(X2,C2),
  228   topstr(X2,Str),
  229   topatt(X2,Att).
  230
  231
  232/* -------------------------------------------------------------------------
  233   Unary Rules: Type Raising
  234------------------------------------------------------------------------- */
  235
  236trans(tr(C1/(C1\C2),X1), N1, ftr(C1/(C1\C2),C2,nil,Att,Str,X2), N2, Tags):- !, 
  237   trans(X1,N1,X2,N2,Tags),
  238   topcat(X2,C2),
  239   topstr(X2,Str),
  240   topatt(X2,Att).
  241
  242trans(tr(C1\(C1/C2),X1), N1, btr(C1\(C1/C2),C2,nil,Att,Str,X2), N2, Tags):- !, 
  243   trans(X1,N1,X2,N2,Tags),
  244   topcat(X2,C2),
  245   topstr(X2,Str),
  246   topatt(X2,Att).
  247
  248
  249/* -------------------------------------------------------------------------
  250   Substitution
  251------------------------------------------------------------------------- */
  252
  253trans(fs(C,X1,Y1),  N1,  fs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  254   trans(X1,N1,X2,N2,Tags1-Tags2), 
  255   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  256   strings(X2,Y2,Str),  
  257   headAtt(X2,Y2,Att).
  258
  259trans(bs(C,X1,Y1),  N1,  bs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  260   trans(X1,N1,X2,N2,Tags1-Tags2), 
  261   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  262   strings(X2,Y2,Str),  
  263   headAtt(Y2,X2,Att).
  264
  265trans(fxs(C,X1,Y1), N1, fxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  266   trans(X1,N1,X2,N2,Tags1-Tags2), 
  267   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  268   strings(X2,Y2,Str),  
  269   headAtt(X2,Y2,Att).
  270
  271trans(bxs(C,X1,Y1), N1, bxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, 
  272   trans(X1,N1,X2,N2,Tags1-Tags2),  
  273   trans(Y1,N2,Y2,N3,Tags2-Tags3),
  274   strings(X2,Y2,Str),  
  275   headAtt(Y2,X2,Att).
  276
  277
  278/* -------------------------------------------------------------------------
  279   Token (repair rules -- systematically wrong output of C&C parser)
  280------------------------------------------------------------------------- */
  281
  282trans(t(A,B,people,C,D,E),N,Tok,M,Tags):- 
  283   option('--x',true), !, % SemEval-2014
  284   trans(t(A,B,person,C,D,E),N,Tok,M,Tags).
  285
  286trans(t(A,B,C,'NNP',D,E),N,Tok,M,Tags):- 
  287   option('--x',true), !, % SemEval-2014
  288   trans(t(A,B,C,'NN',D,E),N,Tok,M,Tags).
  289
  290trans(t(Cat,'\'t',_,'VB',S,Ne),N,Tok,M,Tags):- !,
  291   trans(t(Cat,'\'t','RB',[lemma:not,sense:S,namex:Ne]),N,Tok,M,Tags).
  292
  293trans(t(Cat,'\'t',_,'VB',Att),N,Tok,M,Tags):- !,
  294   trans(t(Cat,'\'t','RB',Att),N,Tok,M,Tags).
  295
  296trans(t(A,B,please,'VB',D,E),N,Tok,M,Tags):- !,
  297   trans(t(A,B,please,'RB',D,E),N,Tok,M,Tags).
  298
  299
  300/* -------------------------------------------------------------------------
  301   Token (wrapper rules -- dealing with old output of C&C parser)
  302------------------------------------------------------------------------- */
  303
  304trans(t(Cat,Token,Pos),N,Tok,M,Tags):- !,
  305   trans(t(Cat,Token,Pos,[]),N,Tok,M,Tags).
  306
  307trans(t(Cat,Token,Lem,Pos,S1,Ne),N,Tok,M,Tags):- !,
  308   context(S1,S2),
  309   trans(t(Cat,Token,Pos,[lemma:Lem,namex:Ne|S2]),N,Tok,M,Tags).
  310
  311
  312/* -------------------------------------------------------------------------
  313   Token
  314------------------------------------------------------------------------- */
  315
  316trans(t(Cat1,Tok,Pos,Tags),N,t(Cat2,Tok,nil,RevTags,N),M,T1-T2):-
  317   adjustFeatures(Cat1,Cat2),
  318   tags(T2,N,[tok:Tok,pos:Pos|Tags],T1),
  319   morpha([pos:Pos|Tags],RevTags),
  320   M is N + 1.
  321
  322% old input version (t/6 terms)
  323%trans(t(Cat1,Tok,Lem,Pos,S1,Ne),N,t(Cat2,Tok,nil,[pos:Pos,lemma:Lem,namex:Ne|S2],N),M,T1-T2):-
  324%   context(S1,S2),
  325%   adjustFeatures(Cat1,Cat2),
  326%   tags(T2,N,[tok:Tok,pos:Pos,lemma:Lem,namex:Ne|S2],T1),  
  327%   M is N + 1.
  328 
  329
  330/* =========================================================================
  331   String Formation
  332========================================================================= */
  333
  334strings(D1,D2,W):- topstr(D1,W1), topstr(D2,W2), append(W1,W2,W).
  335
  336
  337/* =========================================================================
  338   External Context Information (for now only Word Sense Disambiguation)
  339========================================================================= */
  340
  341context(Number,Sense):- number(Number), !, Sense = [sense:Number].
  342context(_,Sense):- Sense = [].
  343
  344
  345/* =========================================================================
  346   Determine Feature on N
  347========================================================================= */
  348
  349featureN('NNPS', nam):- !.
  350featureN('NNP',  nam):- !.
  351featureN('CD',   num):- !.
  352featureN(_,      nom).
  353
  354
  355
  356/* =========================================================================
  357   Adjust features (mostly bugs in C&C parser)
  358========================================================================= */
  359
  360adjustFeatures(conj/conj, conj:X/conj:X):- !.        
  361
  362adjustFeatures(conj, conj:_):- !.        
  363
  364adjustFeatures(comma, conj:_):- !.        
  365
  366adjustFeatures(semi, conj:_):- !.        
  367
  368adjustFeatures(s, s:_):- !.             %%% bug in C&C parser
  369
  370adjustFeatures(s\np, s:dcl\np):- !.     %%% bug in C&C parser
  371
  372adjustFeatures(s/s:X, s:X/s:X).         %%% bug in C&C parser
  373adjustFeatures(s/s:X, s:_/s:X):- !.     %%% bug in C&C parser
  374
  375adjustFeatures( (((s:Y\np)\(s:Y\np))\((s\np)\(s\np)))/((s\np)\(s\np)),Cat):- !,
  376   Cat = (((s:Y\np)\(s:Y\np))\((s:X\np)\(s:X\np)))/((s:Z\np)\(s:Z\np)).
  377
  378adjustFeatures(np:_, np):- !.
  379
  380adjustFeatures(n:_, n):- !.
  381
  382
  383/* =========================================================================
  384   Adjust features
  385========================================================================= */
  386
  387adjustFeatures(F1/A1,F2/A2):- !,
  388   adjustFeatures(F1,F2),
  389   adjustFeatures(A1,A2).
  390
  391adjustFeatures(F1\A1,F2\A2):- !,
  392   adjustFeatures(F1,F2),
  393   adjustFeatures(A1,A2).
  394
  395adjustFeatures(Cat,Cat).
  396
  397
  398/* =========================================================================
  399   Adding Info
  400========================================================================= */
  401
  402tags(T,ID,Tags,[ID:Tags|T]).
  403
  404
  405/* =========================================================================
  406   Get top categorie, semantics or attributes from a derivation
  407========================================================================= */
  408
  409topcat(Der,Cat):- top(Der,Cat,_,_,_), !.
  410topsem(Der,Sem):- top(Der,_,Sem,_,_), !.
  411topatt(Der,Att):- top(Der,_,_,Att,_), !.
  412topstr(Der,Str):- top(Der,_,_,_,Str), !.
  413
  414
  415/* =========================================================================
  416   Top categorie, semantics or attributes from a derivation
  417========================================================================= */
  418
  419top(fa(C,S,A,W,_,_),C,S,A,W).
  420top(ba(C,S,A,W,_,_),C,S,A,W).
  421top(fc(C,S,A,W,_,_),C,S,A,W).
  422top(bc(C,S,A,W,_,_),C,S,A,W).
  423top(fxc(C,S,A,W,_,_),C,S,A,W).
  424top(bxc(C,S,A,W,_,_),C,S,A,W).
  425top(fs(C,S,A,W,_,_),C,S,A,W).
  426top(bs(C,S,A,W,_,_),C,S,A,W).
  427top(fxs(C,S,A,W,_,_),C,S,A,W).
  428top(bxs(C,S,A,W,_,_),C,S,A,W).
  429top(gfc(C,_,S,A,W,_,_),C,S,A,W).
  430top(gbc(C,_,S,A,W,_,_),C,S,A,W). 
  431top(gfxc(C,_,S,A,W,_,_),C,S,A,W). 
  432top(gbxc(C,_,S,A,W,_,_),C,S,A,W). 
  433top(gfc(C,S,A,W,_,_),C,S,A,W).
  434top(gbc(C,S,A,W,_,_),C,S,A,W). 
  435top(gfxc(C,S,A,W,_,_),C,S,A,W). 
  436top(gbxc(C,S,A,W,_,_),C,S,A,W). 
  437top(ftr(C,_,S,A,W,_),C,S,A,W).
  438top(btr(C,_,S,A,W,_),C,S,A,W).
  439top(tc(C,_,S,A,W,_),C,S,A,W).
  440top(lx(C,S,A,W,_,_),C,S,A,W).
  441top(t(C,W,S,A,_),C,S,A,[W]).
  442top(conj(C,_,S,A,W,_,_),C,S,A,W).
  443top(coord(C,S,A,W,_,_,_),C,S,A,W).
  444
  445
  446/* -------------------------------------------------------------------------
  447   Take attributes from head
  448------------------------------------------------------------------------- */
  449
  450headAtt(D1,D2,Att):- topcat(D1,C1/C2), C1==C2, !, topatt(D2,Att).
  451headAtt(D1,D2,Att):- topcat(D1,C1\C2), C1==C2, !, topatt(D2,Att).
  452headAtt(D0,_ ,Att):- topatt(D0,Att), !.
  453
  454
  455/* -------------------------------------------------------------------------
  456   Set Token-ID
  457------------------------------------------------------------------------- */
  458
  459setTokID(_,Start,Start):-
  460   option('--tokid',global), !.
  461
  462setTokID(SID,_,Start):-
  463   option('--tokid',local), !,
  464   Start is (SID*1000)+1