1% This file is part of the Attempto Parsing Engine (APE).
    2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
    3%
    4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
    5% under the terms of the GNU Lesser General Public License as published by the Free Software
    6% Foundation, either version 3 of the License, or (at your option) any later version.
    7%
    8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
    9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   10% PURPOSE. See the GNU Lesser General Public License for more details.
   11%
   12% You should have received a copy of the GNU Lesser General Public License along with the Attempto
   13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
   14
   15
   16:- module(morphgen, [
   17		clear_vars/0,
   18		add_var/1,
   19		remove_singletons/2,
   20		acesentencelist_pp/2,
   21		listlist_listatom/2,
   22		format_dr/2,
   23		surface_noun/4,
   24		surface_verb/3,
   25		surface_neg_verb/3,
   26		surface_adverb/3,
   27		surface_property/3,
   28		surface_property/6,
   29		surface_quotedstring/2,
   30		get_di_marker/4
   31	]).

Morphological synthesis and text generation

author
- Kaarel Kaljurand
version
- 2010-04-20

*/

   40:- use_module('../lexicon/lexicon_interface', []).   41:- use_module('../lexicon/chars', [
   42		is_capitalized/1
   43	]).   44
   45:- use_module(ace_niceace, [
   46		ace_niceace/2,
   47		word_article/2,
   48		atom_capitalize/2
   49	]).   50
   51
   52:- dynamic var_once/1.   53:- dynamic var_twice/1.   54:- dynamic var/2.
 format_dr(+Dr:nvar, -NiceDr:atom) is det
Arguments:
Dr- numbervared variable
NiceDr- pretty-printed numbervared variable
   62format_dr(Dr, NiceDr) :-
   63	format(atom(NiceDr), '~W', [Dr, [numbervars(true)]]).
 surface_verb(+SgPlPart:atom, +Lemma:atom, -WordForm:atom) is det
Examples:
   90surface_verb(_SgPl, i(Lemma), WordForm) :-
   91	!,
   92	surface_verb_x(part, Lemma, WordFormTmp),
   93	concat_atom([is, WordFormTmp, by], ' ', WordForm).
   94
   95surface_verb(SgPl, Lemma, WordForm) :-
   96	surface_verb_x(SgPl, Lemma, WordForm).
 surface_verb_x(+SgPlPart:atom, +Lemma:atom, -WordForm:atom) is det
Arguments:
SgPlPart- {sg, pl, part}, is the number of the verb or its participleness
Lemma- lemma of the verb
WordForm- is the synthesized form of the verb
  105surface_verb_x(sg, Verb, SurfaceVerb) :-
  106	verb_sg(Verb, SurfaceVerb).
  107
  108surface_verb_x(pl, Verb, SurfaceVerb) :-
  109	verb_pl(Verb, SurfaceVerb).
  110
  111
  112surface_verb_x(part, Verb, VerbEd) :-
  113	lexicon_interface:tv_pp(VerbEd, Verb),
  114	!.
  115
  116surface_verb_x(part, Verb, VerbEd) :-
  117	lexicon_interface:dv_pp(VerbEd, Verb, _),
  118	!.
  119
  120surface_verb_x(part, Verb, [v, ':', Verb]).
 surface_neg_verb(+SgPl:atom, +Lemma:atom, -WordForm:list) is det
BUG: We should also support participles here.
Arguments:
SgPl- {sg, pl}, is the number of the negated verb
Lemma- lemma of the negated verb
WordForm- is the synthesized form of the verb
  131surface_neg_verb(sg, be, [is, not]) :- !.
  132surface_neg_verb(pl, be, [are, not]) :- !.
  133surface_neg_verb(sg, Verb, [does, not, WordForm]) :- !, surface_verb(pl, Verb, WordForm).
  134surface_neg_verb(pl, Verb, [do, not, WordForm]) :- !, surface_verb(pl, Verb, WordForm).
 surface_adverb(+Adverb:atom, +Comparison:atom, -SurfaceText:list) is det
Note that comparative forms like 'faster', are lemmatized as 'fast'.
Arguments:
Adverb- is a lemma form of an adverb, as found in the DRS.
Comparision- is one of {pos, comp, sup}
SurfaceText- is a combination of the adverb and the comparision type.
  145surface_adverb(Adverb, pos, [Adverb]).
  146
  147surface_adverb(Adverb, comp, [CompAdverb]) :-
  148	lexicon_interface:adv_comp(CompAdverb, Adverb),
  149	!.
  150
  151surface_adverb(Adverb, comp, [more, Adverb]) :-
  152	lexicon_interface:adv(Adverb, _),
  153	!.
  154
  155surface_adverb(Adverb, comp, [more, a, ':', Adverb]).
  156
  157surface_adverb(Adverb, sup, [SupAdverb]) :-
  158	lexicon_interface:adv_sup(SupAdverb, Adverb),
  159	!.
  160
  161surface_adverb(Adverb, sup, [most, Adverb]) :-
  162	lexicon_interface:adv(Adverb, _),
  163	!.
  164
  165surface_adverb(Adverb, sup, [most, a, ':', Adverb]).
 surface_property(+Adjective:atom, +Comparison:atom, -SurfaceText:list) is det
Note that comparative forms like 'better', are lemmatized as 'good'.
Arguments:
Adjective- is a lemma form of an adjective, as found in the DRS.
Comparision- is one of {pos, pos_as, comp, sup, comp_than}
SurfaceText- is a combination of the adjective and the comparision type.
  176surface_property(Adjective, pos, [Adjective]) :-
  177	lexicon_interface:adj_itr(Adjective, _),
  178	!.
  179
  180surface_property(Adjective, pos, [Adjective]) :-
  181	lexicon_interface:adj_tr(Adjective, _, _),
  182	!.
  183
  184surface_property(Adjective, pos, [a, ':', Adjective]).
  185
  186
  187surface_property(Adjective, pos_as, [as, Adjective, as]) :-
  188	lexicon_interface:adj_itr(Adjective, _),
  189	!.
  190
  191surface_property(Adjective, pos_as, [as, a, ':', Adjective, as]).
  192
  193
  194surface_property(Adjective, comp, MoreAdjective) :- pos_comp(Adjective, MoreAdjective, _).
  195
  196surface_property(Adjective, sup, MostAdjective) :- pos_sup(Adjective, MostAdjective).
  197
  198surface_property(Adjective, comp_than, [MoreAdjective, than]) :- pos_comp(Adjective, MoreAdjective, _).
 surface_property(+Adjective:atom, +Comparison:atom, +ComparisonTarget:atom, +Ref2Text:list, +Ref3Text:list, -SurfaceText:list) is det
Note that comparative forms like 'fonder-of', are lemmatized as 'fond-of'.

Examples:

  220surface_property(Adjective, pos_as, subj, Ref2Text, Ref3Text, [as, Adjective, Ref2Text, as, Ref3Text]).
  221
  222surface_property(Adjective, pos_as, obj, Ref2Text, Ref3Text, [as, Adjective, Ref2Text, as, Preposition, Ref3Text]) :-
  223	pos_comp(Adjective, _, Preposition).
  224
  225surface_property(Adjective, comp_than, subj, Ref2Text, Ref3Text, [MoreAdjective, Ref2Text, than, Ref3Text]) :-
  226	pos_comp(Adjective, MoreAdjective, _).
  227
  228surface_property(Adjective, comp_than, obj, Ref2Text, Ref3Text, [MoreAdjective, Ref2Text, than, Preposition, Ref3Text]) :-
  229	pos_comp(Adjective, MoreAdjective, Preposition).
 pos_comp(+Adjective:atom, -ComparativeAdjective:list, -Preposition:list) is det
Note that as with plural nouns, Clex does not tell us whether the surface form of an adjective (e.g. 'far') is to be preferred over another surface form with the same lemma. E.g. 'far' can be mapped to either 'further' or 'farther'.
Arguments:
Adjective- is an ACE adjective (e.g. 'good')
ComparativeAdjective- is an ACE comparative adjective (e.g. 'better', 'more tall', 'fonder-of')
Preposition- is the preposition of a transitive adjective (e.g. 'of' in case of 'fonder-of')
  242pos_comp(Positive, [Comparative], []) :-
  243	lexicon_interface:adj_itr_comp(Comparative, Positive),
  244	!.
  245
  246pos_comp(Positive, [Comparative], [Preposition]) :-
  247	lexicon_interface:adj_tr_comp(Comparative, Positive, Preposition),
  248	!.
  249
  250pos_comp(Positive, [more, Positive], []) :-
  251	lexicon_interface:adj_itr(Positive, _),
  252	!.
  253
  254pos_comp(Positive, [more, Positive], [Preposition]) :-
  255	lexicon_interface:adj_tr(Positive, _, Preposition),
  256	!.
  257
  258% If everything else fails, then we add a prefix.
  259pos_comp(Positive, [more, a, ':', Positive], []).
 pos_sup(+Adjective:atom, -SuperlativeAdjective:list) is det
Arguments:
Adjective- is an ACE adjective (e.g. 'good')
SuperlativeAdjective- is an ACE superlative adjective (e.g. 'best', 'most tall')
  267pos_sup(Positive, [Superlative]) :-
  268	lexicon_interface:adj_itr_sup(Superlative, Positive),
  269	!.
  270
  271pos_sup(Positive, [Superlative]) :-
  272	lexicon_interface:adj_tr_sup(Superlative, Positive, _),
  273	!.
  274
  275pos_sup(Positive, [most, Positive]) :-
  276	lexicon_interface:adj_itr(Positive, _),
  277	!.
  278
  279pos_sup(Positive, [most, Positive]) :-
  280	lexicon_interface:adj_tr(Positive, _, _),
  281	!.
  282
  283% If everything else fails, then we add a prefix.
  284pos_sup(Positive, [most, a, ':', Positive]).
 surface_quotedstring(+DrsString:atomic, -QuotedString:atom) is det
Adds quotes around the atom within the DRS string(.), escaping also double quotes and backslashes. The result is an ACE quoted string.
Arguments:
DrsString- e.g. 'a\\b"c'
QuotedString- e.g. '"a\\\\b\"c"'
  296surface_quotedstring(DrsString, QuotedString) :-
  297	atom_codes(DrsString, Codes1),
  298	quote_and_escape(Codes1, Codes2),
  299	atom_codes(QuotedString, Codes2).
 quote_and_escape(+CodesIn:list, -CodesOut:list) is det
Escapes quote (34) and backslash (92) characters in the input list of codes, borders the resulting list with quotes (34).
  307quote_and_escape(Cs, [34 | QCs]) :-
  308	quote_and_escape_(Cs, QCs).
  309
  310quote_and_escape_([], [34]).
  311
  312quote_and_escape_([92 | Cs], [92, 92 | QCs]) :-
  313	!,
  314	quote_and_escape_(Cs, QCs).
  315
  316quote_and_escape_([34 | Cs], [92, 34 | QCs]) :-
  317	!,
  318	quote_and_escape_(Cs, QCs).
  319
  320quote_and_escape_([C | Cs], [C | QCs]) :-
  321	quote_and_escape_(Cs, QCs).
 surface_noun(+Type:atom, +Lemma:atom, ?Num:atom, -Form:term)
Examples:
surface_noun(cn,man,sg,man)
surface_noun(cn,somebody,sg,somebody)
surface_noun(cn,man,pl,men)
surface_noun(cn,sand,mass,sand)
Note: we do not add a prefix to unknown proper names unless they start with a lowercase character.

Note that in the presence of aliases, the mapping of lemmas to surface forms is not deterministic. Consider e.g. the mapping of surface forms to lemmas:

abaci->abacus
abacuses->abacus
As Clex does not include information about the main word vs its alias, we return whatever comes first.

@param Type is one of {cn, pn}, i.e. common noun or proper name @param Lemma is the lemma of the noun as found in the DRS @param Num is one of {sg, mass, pl} @param Form is the surface form of the noun (possibly a list of tokens)

@bug: possibly slow, because uses an unindexed argument

  352surface_noun(cn, Lemma, sg, Form) :-
  353	lexicon_interface:noun_sg(Form, Lemma, _),
  354	!.
  355
  356surface_noun(cn, Lemma, mass, Form) :-
  357	lexicon_interface:noun_mass(Form, Lemma, _),
  358	!.
  359
  360surface_noun(cn, Lemma, pl, Form) :-
  361	lexicon_interface:noun_pl(Form, Lemma, _),
  362	!.
  363
  364% If the common noun lemma does not exist in the lexicon
  365% then output it with the n-prefix, regardless of
  366% its number.
  367surface_noun(cn, Lemma, _Num, [n, ':', Lemma]).
  368
  369
  370surface_noun(pn, Lemma, sg, Form) :-
  371	lexicon_interface:pn_sg(Form, Lemma, _),
  372	!.
  373
  374surface_noun(pn, Lemma, sg, [the, Form]) :-
  375	lexicon_interface:pndef_sg(Form, Lemma, _),
  376	!.
  377
  378surface_noun(pn, Lemma, pl, Form) :-
  379	lexicon_interface:pn_pl(Form, Lemma, _),
  380	!.
  381
  382surface_noun(pn, Lemma, pl, [the, Form]) :-
  383	lexicon_interface:pndef_pl(Form, Lemma, _),
  384	!.
  385
  386surface_noun(pn, Lemma, sg, [p, ':', Lemma]) :-
  387	\+ is_capitalized(Lemma),
  388	!.
  389
  390surface_noun(pn, Lemma, pl, [p, ':', Lemma]) :-
  391	\+ is_capitalized(Lemma),
  392	!.
  393
  394surface_noun(pn, Lemma, sg, Lemma).
  395
  396surface_noun(pn, Lemma, pl, Lemma).
 verb_pl(+Lemma:atom, -Pl:atom) is det
Arguments:
Lemma- is the lemma of the verb as found in the DRS
Pl- is the plural form of the verb
  405verb_pl(be, are) :- !. % BUG: is this needed?
  406
  407verb_pl(Lemma, Pl) :-
  408	lexicon_interface:iv_infpl(Pl, Lemma),
  409	!.
  410
  411verb_pl(Lemma, Pl) :-
  412	lexicon_interface:tv_infpl(Pl, Lemma),
  413	!.
  414
  415verb_pl(Lemma, Pl) :-
  416	lexicon_interface:dv_infpl(Pl, Lemma, _),
  417	!.
  418
  419verb_pl(Lemma, [v, ':', Lemma]).
 verb_sg(+Lemma:atom, -Sg:atom) is det
Arguments:
Lemma- is the lemma of the verb as found in the DRS
Sg- is the singular form of the verb
  427verb_sg(be, is) :- !. % BUG: is this needed?
  428
  429verb_sg(Lemma, Sg) :-
  430	lexicon_interface:iv_finsg(Sg, Lemma),
  431	!.
  432
  433verb_sg(Lemma, Sg) :-
  434	lexicon_interface:tv_finsg(Sg, Lemma),
  435	!.
  436
  437verb_sg(Lemma, Sg) :-
  438	lexicon_interface:dv_finsg(Sg, Lemma, _),
  439	!.
  440
  441verb_sg(Lemma, [v, ':', Lemma]).
 get_di_marker(+SgPl:atom, +Lemma:atom, -SurfaceForm:term, -DiMarker:atom)
Looks up the prepositional marker of a ditransitive verb by its lemma and its number. (Note that the ACE lexicon allows the same lemma to have a different marker in singular and in plural context. This is not the case in English though.)
  453get_di_marker(sg, Lemma, Form, DiMarker) :-
  454	lexicon_interface:dv_finsg(Form, Lemma, DiMarker),
  455	DiMarker \= '',
  456	!.
  457
  458get_di_marker(sg, Lemma, Form, '') :-
  459	lexicon_interface:dv_finsg(Form, Lemma, ''),
  460	!.
  461
  462get_di_marker(pl, Lemma, Form, DiMarker) :-
  463	lexicon_interface:dv_infpl(Form, Lemma, DiMarker),
  464	DiMarker \= '',
  465	!.
  466
  467get_di_marker(pl, Lemma, Form, '') :-
  468	lexicon_interface:dv_infpl(Form, Lemma, ''),
  469	!.
  470
  471get_di_marker(_, Lemma, [v, ':', Lemma], '').
 acesentencelist_pp(+AceList:list, -AceText:atom) is det
Expects the input list to be a list of atoms (ACE sentences) or a list of lists of atoms (lists of ACE sentences). Produces an ACE text (atom).
Arguments:
AceList- is a list of (lists of) ACE sentences (atoms)
AceText- is an ACE text (atom)
  483acesentencelist_pp([], '').
  484
  485acesentencelist_pp([X | Xs], Ace) :-
  486	is_list(X), % peek, if is paragraph
  487	!,
  488	maplist(acesentencelist_pp_x, [X | Xs], AceList),
  489	concat_atom(AceList, '\n\n', Ace).
  490
  491acesentencelist_pp([X | Xs], Ace) :-
  492	atom(X), % peek, if is sentence
  493	!,
  494	acesentencelist_pp_x([X | Xs], Ace).
  495
  496
  497acesentencelist_pp_x([], '# ...') :- !.
  498
  499acesentencelist_pp_x(AceSentenceList, AceText) :-
  500	concat_atom(AceSentenceList, '\n', AceText).
 listlist_listatom(+ListOfList:list, -ListOfAtom:list) is det
Arguments:
ListOfList- is a list (ACE sentences) of lists (ACE tokens)
ListOfAtom- is a list of atoms (ACE sentences)
deprecated
- try to use tokens_to_sentences/2 instead

used only by Core ACE, NP ACE verbalizers, clean it up

  512listlist_listatom([], []).
  513
  514listlist_listatom([TokenList | Tail], [Atom | RestT]) :-
  515	(
  516		TokenList = []
  517	->
  518		Atom = 'ERROR'
  519	;
  520		sentence_type(TokenList, NewTokenList),
  521		ace_niceace(NewTokenList, [FirstToken | RestTokenList]),
  522		atom_capitalize(FirstToken, FirstTokenCapitalized),
  523		concat_atom([FirstTokenCapitalized | RestTokenList], ' ', Atom)
  524	),
  525	listlist_listatom(Tail, RestT).
 sentence_type(+TokenList:list, -NewTokenList:list) is det
 sentence_type(+TokenList:list, -Type:atom, -NewTokenList:list) is det
Assigns a type to the given sentence. Type is one of '.' and '?'.

The qp/1 term contains a list of tokens that represent the query word (e.g. [how, many]).

  536sentence_type(TokenList, NewTokenList) :-
  537	sentence_type(TokenList, _, NewTokenList).
  538
  539
  540sentence_type([], '.', ['.']) :- !.
  541
  542sentence_type([], '?', ['?']).
  543
  544sentence_type([qp(QPhrase) | T], _, Tokens) :-
  545	!,
  546	sentence_type(T, '?', T2),
  547	append(QPhrase, T2, Tokens).
  548
  549sentence_type([H | T], Type, [H | T2]) :-
  550	sentence_type(T, Type, T2).
 clear_vars is det
Retracts dynamic predicates: var/2, var_once/1, var_twice/1.
  557clear_vars :-
  558	retractall(var(_, _)),
  559	retractall(var_once(_)),
  560	retractall(var_twice(_)).
 add_var(+Var:nvar) is det
Asserts the (numbervared) variable so that we know if it has been used once or more times.
Arguments:
Var- is a numbervared discourse referent
  570add_var(Var) :-
  571	(
  572		var_twice(Var)
  573	->
  574		true
  575	;
  576		(
  577			var_once(Var)
  578		->
  579			retract(var_once(Var)),
  580			assert(var_twice(Var))
  581		;
  582			assert(var_once(Var))
  583		)
  584	).
 remove_singletons(+ListIn:list, -ListOut:list) is det
 remove_singletons_x(+VarCount:integer, +ListIn:list, -ListOut:list) is det
  591remove_singletons(In, Out) :-
  592	remove_singletons_x(1, _, In, Out).
  593
  594remove_singletons_x(Count, Count, [], []).
  595
  596remove_singletons_x(CountIn, CountOut, [List | T], [RList | RT]) :-
  597	remove_singletons_x(CountIn, CountTmp, List, RList),
  598	remove_singletons_x(CountTmp, CountOut, T, RT).
  599
  600remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], RT) :-
  601	var_once('$VAR'(N)),
  602	!,
  603	remove_singletons_x(CountIn, CountOut, T, RT).
  604
  605remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], [NiceRef | RT]) :-
  606	var(N, VarNumber),
  607	!,
  608	format(atom(NiceRef), 'X~w', [VarNumber]),
  609	remove_singletons_x(CountIn, CountOut, T, RT).
  610
  611remove_singletons_x(CountIn, CountOut, ['$VAR'(N) | T], [NiceRef | RT]) :-
  612	!,
  613	assert(var(N, CountIn)),
  614	format(atom(NiceRef), 'X~w', [CountIn]),
  615	NewCount is CountIn + 1,
  616	remove_singletons_x(NewCount, CountOut, T, RT).
  617
  618remove_singletons_x(CountIn, CountOut, [H | T], [H | RT]) :-
  619	remove_singletons_x(CountIn, CountOut, T, RT)