1% This file is part of the Attempto Parsing Engine (APE). 2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 3% 4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it 5% under the terms of the GNU Lesser General Public License as published by the Free Software 6% Foundation, either version 3 of the License, or (at your option) any later version. 7% 8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT 9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10% PURPOSE. See the GNU Lesser General Public License for more details. 11% 12% You should have received a copy of the GNU Lesser General Public License along with the Attempto 13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. 14 15 16:- module(morphgen, [ 17 clear_vars/0, 18 add_var/1, 19 remove_singletons/2, 20 acesentencelist_pp/2, 21 listlist_listatom/2, 22 format_dr/2, 23 surface_noun/4, 24 surface_verb/3, 25 surface_neg_verb/3, 26 surface_adverb/3, 27 surface_property/3, 28 surface_property/6, 29 surface_quotedstring/2, 30 get_di_marker/4 31 ]).
40:- use_module('../lexicon/lexicon_interface', []). 41:- use_module('../lexicon/chars', [ 42 is_capitalized/1 43 ]). 44 45:- use_module(ace_niceace, [ 46 ace_niceace/2, 47 word_article/2, 48 atom_capitalize/2 49 ]). 50 51 52:- dynamic var_once/1. 53:- dynamic var_twice/1. 54:- dynamic var/2.
62format_dr(Dr, NiceDr) :-
63 format(atom(NiceDr), '~W', [Dr, [numbervars(true)]]).
surface_verb(sg, know, knows)
surface_verb(pl, know, know)
surface_verb(part, know, known)
BUGs:format(atom(WordForm), "is ~w by", [WordFormTmp])
90surface_verb(_SgPl, i(Lemma), WordForm) :- 91 !, 92 surface_verb_x(part, Lemma, WordFormTmp), 93 concat_atom([is, WordFormTmp, by], ' ', WordForm). 94 95surface_verb(SgPl, Lemma, WordForm) :- 96 surface_verb_x(SgPl, Lemma, WordForm).
105surface_verb_x(sg, Verb, SurfaceVerb) :- 106 verb_sg(Verb, SurfaceVerb). 107 108surface_verb_x(pl, Verb, SurfaceVerb) :- 109 verb_pl(Verb, SurfaceVerb). 110 111 112surface_verb_x(part, Verb, VerbEd) :- 113 lexicon_interface:tv_pp(VerbEd, Verb), 114 !. 115 116surface_verb_x(part, Verb, VerbEd) :- 117 lexicon_interface:dv_pp(VerbEd, Verb, _), 118 !. 119 120surface_verb_x(part, Verb, [v, ':', Verb]).
131surface_neg_verb(sg, be, [is, not]) :- !. 132surface_neg_verb(pl, be, [are, not]) :- !. 133surface_neg_verb(sg, Verb, [does, not, WordForm]) :- !, surface_verb(pl, Verb, WordForm). 134surface_neg_verb(pl, Verb, [do, not, WordForm]) :- !, surface_verb(pl, Verb, WordForm).
145surface_adverb(Adverb, pos, [Adverb]). 146 147surface_adverb(Adverb, comp, [CompAdverb]) :- 148 lexicon_interface:adv_comp(CompAdverb, Adverb), 149 !. 150 151surface_adverb(Adverb, comp, [more, Adverb]) :- 152 lexicon_interface:adv(Adverb, _), 153 !. 154 155surface_adverb(Adverb, comp, [more, a, ':', Adverb]). 156 157surface_adverb(Adverb, sup, [SupAdverb]) :- 158 lexicon_interface:adv_sup(SupAdverb, Adverb), 159 !. 160 161surface_adverb(Adverb, sup, [most, Adverb]) :- 162 lexicon_interface:adv(Adverb, _), 163 !. 164 165surface_adverb(Adverb, sup, [most, a, ':', Adverb]).
176surface_property(Adjective, pos, [Adjective]) :- 177 lexicon_interface:adj_itr(Adjective, _), 178 !. 179 180surface_property(Adjective, pos, [Adjective]) :- 181 lexicon_interface:adj_tr(Adjective, _, _), 182 !. 183 184surface_property(Adjective, pos, [a, ':', Adjective]). 185 186 187surface_property(Adjective, pos_as, [as, Adjective, as]) :- 188 lexicon_interface:adj_itr(Adjective, _), 189 !. 190 191surface_property(Adjective, pos_as, [as, a, ':', Adjective, as]). 192 193 194surface_property(Adjective, comp, MoreAdjective) :- pos_comp(Adjective, MoreAdjective, _). 195 196surface_property(Adjective, sup, MostAdjective) :- pos_sup(Adjective, MostAdjective). 197 198surface_property(Adjective, comp_than, [MoreAdjective, than]) :- pos_comp(Adjective, MoreAdjective, _).
Examples:
220surface_property(Adjective, pos_as, subj, Ref2Text, Ref3Text, [as, Adjective, Ref2Text, as, Ref3Text]). 221 222surface_property(Adjective, pos_as, obj, Ref2Text, Ref3Text, [as, Adjective, Ref2Text, as, Preposition, Ref3Text]) :- 223 pos_comp(Adjective, _, Preposition). 224 225surface_property(Adjective, comp_than, subj, Ref2Text, Ref3Text, [MoreAdjective, Ref2Text, than, Ref3Text]) :- 226 pos_comp(Adjective, MoreAdjective, _). 227 228surface_property(Adjective, comp_than, obj, Ref2Text, Ref3Text, [MoreAdjective, Ref2Text, than, Preposition, Ref3Text]) :- 229 pos_comp(Adjective, MoreAdjective, Preposition).
242pos_comp(Positive, [Comparative], []) :- 243 lexicon_interface:adj_itr_comp(Comparative, Positive), 244 !. 245 246pos_comp(Positive, [Comparative], [Preposition]) :- 247 lexicon_interface:adj_tr_comp(Comparative, Positive, Preposition), 248 !. 249 250pos_comp(Positive, [more, Positive], []) :- 251 lexicon_interface:adj_itr(Positive, _), 252 !. 253 254pos_comp(Positive, [more, Positive], [Preposition]) :- 255 lexicon_interface:adj_tr(Positive, _, Preposition), 256 !. 257 258% If everything else fails, then we add a prefix. 259pos_comp(Positive, [more, a, ':', Positive], []).
267pos_sup(Positive, [Superlative]) :- 268 lexicon_interface:adj_itr_sup(Superlative, Positive), 269 !. 270 271pos_sup(Positive, [Superlative]) :- 272 lexicon_interface:adj_tr_sup(Superlative, Positive, _), 273 !. 274 275pos_sup(Positive, [most, Positive]) :- 276 lexicon_interface:adj_itr(Positive, _), 277 !. 278 279pos_sup(Positive, [most, Positive]) :- 280 lexicon_interface:adj_tr(Positive, _, _), 281 !. 282 283% If everything else fails, then we add a prefix. 284pos_sup(Positive, [most, a, ':', Positive]).
string(.)
,
escaping also double quotes and backslashes.
The result is an ACE quoted string.
296surface_quotedstring(DrsString, QuotedString) :-
297 atom_codes(DrsString, Codes1),
298 quote_and_escape(Codes1, Codes2),
299 atom_codes(QuotedString, Codes2).
307quote_and_escape(Cs, [34 | QCs]) :- 308 quote_and_escape_(Cs, QCs). 309 310quote_and_escape_([], [34]). 311 312quote_and_escape_([92 | Cs], [92, 92 | QCs]) :- 313 !, 314 quote_and_escape_(Cs, QCs). 315 316quote_and_escape_([34 | Cs], [92, 34 | QCs]) :- 317 !, 318 quote_and_escape_(Cs, QCs). 319 320quote_and_escape_([C | Cs], [C | QCs]) :- 321 quote_and_escape_(Cs, QCs).
Note that in the presence of aliases, the mapping of lemmas to surface forms is not deterministic. Consider e.g. the mapping of surface forms to lemmas:
@param Type is one of {cn, pn}, i.e. common noun or proper name @param Lemma is the lemma of the noun as found in the DRS @param Num is one of {sg, mass, pl} @param Form is the surface form of the noun (possibly a list of tokens)
@bug: possibly slow, because uses an unindexed argument
352surface_noun(cn, Lemma, sg, Form) :- 353 lexicon_interface:noun_sg(Form, Lemma, _), 354 !. 355 356surface_noun(cn, Lemma, mass, Form) :- 357 lexicon_interface:noun_mass(Form, Lemma, _), 358 !. 359 360surface_noun(cn, Lemma, pl, Form) :- 361 lexicon_interface:noun_pl(Form, Lemma, _), 362 !. 363 364% If the common noun lemma does not exist in the lexicon 365% then output it with the n-prefix, regardless of 366% its number. 367surface_noun(cn, Lemma, _Num, [n, ':', Lemma]). 368 369 370surface_noun(pn, Lemma, sg, Form) :- 371 lexicon_interface:pn_sg(Form, Lemma, _), 372 !. 373 374surface_noun(pn, Lemma, sg, [the, Form]) :- 375 lexicon_interface:pndef_sg(Form, Lemma, _), 376 !. 377 378surface_noun(pn, Lemma, pl, Form) :- 379 lexicon_interface:pn_pl(Form, Lemma, _), 380 !. 381 382surface_noun(pn, Lemma, pl, [the, Form]) :- 383 lexicon_interface:pndef_pl(Form, Lemma, _), 384 !. 385 386surface_noun(pn, Lemma, sg, [p, ':', Lemma]) :- 387 \+ is_capitalized(Lemma), 388 !. 389 390surface_noun(pn, Lemma, pl, [p, ':', Lemma]) :- 391 \+ is_capitalized(Lemma), 392 !. 393 394surface_noun(pn, Lemma, sg, Lemma). 395 396surface_noun(pn, Lemma, pl, Lemma).
405verb_pl(be, are) :- !. % BUG: is this needed? 406 407verb_pl(Lemma, Pl) :- 408 lexicon_interface:iv_infpl(Pl, Lemma), 409 !. 410 411verb_pl(Lemma, Pl) :- 412 lexicon_interface:tv_infpl(Pl, Lemma), 413 !. 414 415verb_pl(Lemma, Pl) :- 416 lexicon_interface:dv_infpl(Pl, Lemma, _), 417 !. 418 419verb_pl(Lemma, [v, ':', Lemma]).
427verb_sg(be, is) :- !. % BUG: is this needed? 428 429verb_sg(Lemma, Sg) :- 430 lexicon_interface:iv_finsg(Sg, Lemma), 431 !. 432 433verb_sg(Lemma, Sg) :- 434 lexicon_interface:tv_finsg(Sg, Lemma), 435 !. 436 437verb_sg(Lemma, Sg) :- 438 lexicon_interface:dv_finsg(Sg, Lemma, _), 439 !. 440 441verb_sg(Lemma, [v, ':', Lemma]).
453get_di_marker(sg, Lemma, Form, DiMarker) :- 454 lexicon_interface:dv_finsg(Form, Lemma, DiMarker), 455 DiMarker \= '', 456 !. 457 458get_di_marker(sg, Lemma, Form, '') :- 459 lexicon_interface:dv_finsg(Form, Lemma, ''), 460 !. 461 462get_di_marker(pl, Lemma, Form, DiMarker) :- 463 lexicon_interface:dv_infpl(Form, Lemma, DiMarker), 464 DiMarker \= '', 465 !. 466 467get_di_marker(pl, Lemma, Form, '') :- 468 lexicon_interface:dv_infpl(Form, Lemma, ''), 469 !. 470 471get_di_marker(_, Lemma, [v, ':', Lemma], '').
483acesentencelist_pp([], ''). 484 485acesentencelist_pp([X | Xs], Ace) :- 486 is_list(X), % peek, if is paragraph 487 !, 488 maplist(acesentencelist_pp_x, [X | Xs], AceList), 489 concat_atom(AceList, '\n\n', Ace). 490 491acesentencelist_pp([X | Xs], Ace) :- 492 atom(X), % peek, if is sentence 493 !, 494 acesentencelist_pp_x([X | Xs], Ace). 495 496 497acesentencelist_pp_x([], '# ...') :- !. 498 499acesentencelist_pp_x(AceSentenceList, AceText) :- 500 concat_atom(AceSentenceList, '\n', AceText).
512listlist_listatom([], []). 513 514listlist_listatom([TokenList | Tail], [Atom | RestT]) :- 515 ( 516 TokenList = [] 517 -> 518 Atom = 'ERROR' 519 ; 520 sentence_type(TokenList, NewTokenList), 521 ace_niceace(NewTokenList, [FirstToken | RestTokenList]), 522 atom_capitalize(FirstToken, FirstTokenCapitalized), 523 concat_atom([FirstTokenCapitalized | RestTokenList], ' ', Atom) 524 ), 525 listlist_listatom(Tail, RestT).
The qp/1 term contains a list of tokens that represent the query word (e.g. [how, many]).
536sentence_type(TokenList, NewTokenList) :- 537 sentence_type(TokenList, _, NewTokenList). 538 539 540sentence_type([], '.', ['.']) :- !. 541 542sentence_type([], '?', ['?']). 543 544sentence_type([qp(QPhrase) | T], _, Tokens) :- 545 !, 546 sentence_type(T, '?', T2), 547 append(QPhrase, T2, Tokens). 548 549sentence_type([H | T], Type, [H | T2]) :- 550 sentence_type(T, Type, T2).
557clear_vars :-
558 retractall(var(_, _)),
559 retractall(var_once(_)),
560 retractall(var_twice(_)).
570add_var(Var) :-
571 (
572 var_twice(Var)
573 ->
574 true
575 ;
576 (
577 var_once(Var)
578 ->
579 retract(var_once(Var)),
580 assert(var_twice(Var))
581 ;
582 assert(var_once(Var))
583 )
584 ).
591remove_singletons(In, Out) :- 592 remove_singletons_x(1, _, In, Out). 593 594remove_singletons_x(Count, Count, [], []). 595 596remove_singletons_x(CountIn, CountOut, [List | T], [RList | RT]) :- 597 remove_singletons_x(CountIn, CountTmp, List, RList), 598 remove_singletons_x(CountTmp, CountOut, T, RT). 599 600remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], RT) :- 601 var_once('$VAR'(N)), 602 !, 603 remove_singletons_x(CountIn, CountOut, T, RT). 604 605remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], [NiceRef | RT]) :- 606 var(N, VarNumber), 607 !, 608 format(atom(NiceRef), 'X~w', [VarNumber]), 609 remove_singletons_x(CountIn, CountOut, T, RT). 610 611remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], [NiceRef | RT]) :- 612 !, 613 assert(var(N, CountIn)), 614 format(atom(NiceRef), 'X~w', [CountIn]), 615 NewCount is CountIn + 1, 616 remove_singletons_x(NewCount, CountOut, T, RT). 617 618remove_singletons_x(CountIn, CountOut, [H | T], [H | RT]) :- 619 remove_singletons_x(CountIn, CountOut, T, RT)
Morphological synthesis and text generation
*/