1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    2%% WN_CONNECT v1.3 : wn_gen_prox_equations
    3%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    4/*
    5AUTHORS: Pascual Julia´n-Iranzo (Universidad de Castilla-La Mancha, Spain)
    6Fernando Sa´enz-Pe´rez  (Universidad Complutense de Madrid, Spain)
    7
    8WN_CONNECT is licensed for research and educational purposes only and it is
    9distributed with NO WARRANTY OF ANY KIND. You are freely allowed to use, copy
   10and distribute WN_CONNECT provided that you make no modifications to any of its
   11files and give credit to its original authors.
   12*******************************************************************************/
   13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   14% Generation of proximity equations based on WordNet
   15
   16:- module(wn_gen_prox_equations, [
   17  wn_gen_ontology_file/3,       % +ListOfListOfWords, +File, +Measure
   18	wn_gen_prox_equations_list/3, % +ListOfListOfWords, +Measure, -Equations
   19	wn_auto_gen_prox_equations/4  % +Directives, +Rules, -InEquations, -OutEquations
   20	]
   21	).   22
   23:- use_module(wn_sim_measures).   24:- use_module(wn_utilities).   25%%:- use_module(utilities).
   26%:- use_module(library(ordsets)).
   27
   28
   29%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   30
   31
   32%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   33%%% wn_gen_ontology_file(+ListOfListOfWords, +File, +Measure)
   34%%% Given a list of list of words, ListOfListOfWords, the name of a file, File,
   35%%% and the acronym of a measure, Measure (by now [path, wup, lch, res, lin, jcn,
   36%%% yarm]), it generates a set of proximity equations and stored them into the file File.
   37%%%
   38wn_gen_ontology_file(ListOfListOfWords, File, Measure) :-
   39    file_name_extension(_, Extension, File),
   40    ((Extension = ont) -> File_ont = File;
   41     (Extension = '')  -> file_name_extension(File, ont, File_ont)
   42    ),
   43    (exists_file(File_ont) ->
   44        write('The file '), write(File), write(' or '), write(File_ont), write(' does exists.'), nl
   45        ;
   46        (member(Measure, [path, wup, lch, res, lin, jcn, hso, lesk, vector, yar]) ->
   47            wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations),
   48            open(File_ont, write, OutputStream),
   49            write(OutputStream,'%% PROXIMITY EQUATIONS'), nl(OutputStream),
   50            write_equations(Equations, OutputStream),
   51            close(OutputStream)
   52            ;
   53            write(Measure), write(' is not a similarity or relatedness measure.'), nl
   54        )
   55    ).
   56
   57%%% write_equations(+Equations, +OutputStream)
   58%%%
   59write_equations([], _).
   60write_equations([sim(W1,W2,D)|Equations], OutputStream) :-
   61            concat_atom([W1,'~', W2, '=', D, '.'], ProxEqu),
   62            write(OutputStream,ProxEqu), nl(OutputStream),
   63            write_equations(Equations, OutputStream).
   64
   65
   66%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   67%%% wn_auto_gen_prox_equations(+Directives, +Rules, -InEquations, -OutEquations)
   68%%%
   69%%% If Directives is [:- directive(wn_gen_prox_equations, [Measure, Auto])], then 
   70%%% return in OutEquations the equations in InEquations plus all the proximity 
   71%%% equations derived from the following:
   72%%% - Extract three sets from Rules: constant, functor and predicate identifiers, 
   73%%% - For each word W1 in a set compare it to any other word W2 in the same set to 
   74%%%   determine their relatedness degree D, and generate a proximity equation 
   75%%%   sim(W1, W2, D) in OutEquations.
   76%%% Otherwise, just return InEquations in OutEquations
   77%%%
   78
   79wn_auto_gen_prox_equations([:- directive(wn_gen_prox_equations, [Measure, Auto])], Rules, InEquations, OutEquations) :-
   80  \+ is_list(Auto),
   81  atoms_functors_in_term(Rules, AllAtoms, AllFunctors),
   82  exception_words(ExceptionWords),
   83  ordsets:ord_subtract(AllAtoms, ExceptionWords, Atoms),
   84  ordsets:ord_subtract(AllFunctors, ExceptionWords, AllValidFunctors),
   85  bpl_predicates(AllValidFunctors, Functors, Predicates),
   86  !,
   87  wn_gen_prox_equations_list([Atoms, Functors, Predicates], Measure, NewEquations),
   88  lists:append(InEquations, NewEquations, OutEquations).
   89  
   90wn_auto_gen_prox_equations(_Directives, _Rules, Equations, Equations).
   91
   92
   93% List of words that are not to be related with WordNet words. 
   94% This must be an ordered set
   95exception_words([
   96  ':-',
   97  true   % true is the body of a fact, it is dismissed
   98  ]). 
   99
  100
  101
  102%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  103%%% wn_gen_prox_equations_list(+ListOfListOfWords, +Measure, -Equations)
  104%%% Given a ListOfListOfWords computes all proximity equations that can be formed paring
  105%%% the words of each list between them and then computing their proximity degree using
  106%%% the measure Measure.
  107%%%
  108%%% NOTES: Each list of ListOfListOfWords must be compounded by words of the same part of
  109%%%        speech (either nouns, verbs or adjectives)
  110%%%
  111%%%        "sim(Word1, Word2, Degree)" is the internal Bousi~Prolog representation of a
  112%%%        proximity equation "Word1 ~ Word2 = Degree" (i.e., Word1 is close to Word2 with
  113%%%        approximation degree Degree).
  114%%%
  115
  116wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations) :-
  117    wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations, []).
  118
  119%%%
  120%%% wn_gen_prox_equations_list/4 is implemented by a set of Definite Clause Grammar
  121%%% rules. Grammar rules are expanded automatically into Prolog clauses with two extra
  122%%% arguments added as the two arguments of the predicate in order to represent the
  123%%% input (or output) tokens as a difference list.
  124%%%
  125%%%
  126
  127wn_gen_prox_equations_list([], _Measure) -->
  128  [].
  129  
  130wn_gen_prox_equations_list([ListOfWords|ListOfListOfWords], Measure) -->
  131  wn_gen_prox_equations(ListOfWords, Measure), 
  132  wn_gen_prox_equations_list(ListOfListOfWords, Measure).
  133
  134  
  135wn_gen_prox_equations([], _Measure) -->
  136  [].
  137  
  138wn_gen_prox_equations([Word|ListOfWords], Measure) -->
  139  wn_gen_prox_equations(ListOfWords, Word, Measure), 
  140  wn_gen_prox_equations(ListOfWords, Measure).
  141
  142  
  143wn_gen_prox_equations([], _Word1, _Measure) -->
  144  [].
  145  
  146wn_gen_prox_equations([Word2|ListOfWords], Word1, Measure) -->
  147  {gen_prox_equation(Measure, Word1, Word2, Equation),
  148   !},
  149  [Equation],
  150  wn_gen_prox_equations(ListOfWords, Word1, Measure).
  151  
  152wn_gen_prox_equations([_Word2|ListOfWords], Word1, Measure) --> % Word1 and Word2 are not related
  153  wn_gen_prox_equations(ListOfWords, Word1, Measure).
  154
  155 
  156%%% gen_prox_equation(+Measure, +Pattern1, +Pattern2, -Equation)
  157%%%
  158%%%   Return the Equation for the given Measure and words. 
  159%%%   Words come expressed as the term Word:Type:SenseNumber
  160
  161gen_prox_equation(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation) :-
  162  atom(Word1),
  163  atom(Word2),
  164  valid_word_type(Type1),
  165  valid_word_type(Type2),
  166  number(Sense1),
  167  number(Sense2),
  168  !,
  169  gen_prox_equation_aux(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation).
  170  
  171gen_prox_equation(Measure, Word1, Word2, Equation) :-
  172  atom(Word1),
  173  atom(Word2),
  174  !,
  175  gen_prox_equation_aux(Measure, Word1:Type:1, Word2:Type:1, Equation).
  176  
  177gen_prox_equation(_Measure, Word1, Word2, _Equation) :-
  178  format('ERROR: Incorrect pattern for ~p and/or ~p. Expected either plain words or patterns Word:Type:Sense, where Type is in {n,v} and Sense is a number.', [Word1, Word2]),
  179  fail.  
  180  
  181 
  182gen_prox_equation_aux(Measure, Pattern1, Pattern2, sim(Word1, Word2, NormalizedDegree)) :-
  183   wn_measure_module_goal(Measure, Module, MeasureGoalName),
  184   MeasureGoal =.. [MeasureGoalName, Pattern1, Pattern2, Degree],
  185   Module:MeasureGoal,
  186   measure_max_value(Measure, Max),
  187   NormalizedDegree is Degree/Max,
  188   Pattern1 = Word1:_:_,
  189   Pattern2 = Word2:_:_.
  190   
  191
  192%%% valid_word_type(+Type).
  193%%%   Valid word types. Currently, only nouns (n) and verbs (v)
  194   
  195valid_word_type(n).
  196
  197valid_word_type(v).
  198
  199
  200%%% wn_measure_module_goal(?Measure, ?Module, ?MeasureGoalName)
  201%%% This predicate stores a list of parameters:
  202%%% Measure: it is the name of the measure used to compute the similarity or
  203%%% relatedness of two ListOfWords.
  204%%% Module: it is the module where is implemented the corresponding measure.
  205%%% MeasureGoalName: it is the name of the predicate that implements the measure.
  206%%%
  207
  208wn_measure_module_goal(path, wn_sim_measures, wn_path).
  209wn_measure_module_goal(wup,  wn_sim_measures, wn_wup).
  210wn_measure_module_goal(lch,  wn_sim_measures, wn_lch).
  211wn_measure_module_goal(res,  wn_ic_measures,  wn_res).
  212wn_measure_module_goal(jcn,  wn_ic_measures,  wn_jcn).
  213wn_measure_module_goal(lin,  wn_ic_measures,  wn_lin).
  214wn_measure_module_goal(yarm, wn_rel_measures, wn_yarm).
  215
  216
  217%%% measure_max_value(?Measure, ?Value)
  218%%% Measure: Name of the measure (path, lch, ...).
  219%%% Value: Maximum value the measure can take.
  220%%%
  221
  222measure_max_value(path, 1).
  223measure_max_value(wup, 1).
  224measure_max_value(lch, 3.6888794541139363).
  225measure_max_value(res, 1) :- % WARNING: Check this value!
  226  nl, write('WARNING: Normalization is not checked.'), nl.
  227measure_max_value(jcn, 1) :- % WARNING: Check this value!
  228  nl, write('WARNING: Normalization is not checked.'), nl.
  229measure_max_value(lin, 1).
  230%measure_max_value(lesk,  ???).
  231measure_max_value(hso, 16).
  232measure_max_value(yarm, 1)