% convert_file.pl % % ProNTo - Prolog Natural Language Toolkit % Accessing WordNet from Prolog % % author: Sarah Witzig % date created: 02-13-2003 % date last modified: 05-07-2003 % notes: Artificial Intelligence Center, University of Georgia % % This is an extended version of improve_file.pl. Additionally to % changing the word order of the arguments to improve indexing, % it changes all upper case letters to lower case, removes all % underscores and represents the words as open lists. The new % predicates of the wn_s_new.pl files have the following structure: % s([human,action|_G4016],100022113,2,n,1,1). % % convert_file % Open a wn_s.pl to read from it. % Open a new file to write the new predicates in it. % Call improve_file/2 to make the changes, close the two Streams. convert_file :- open('wn_s.pl',read,InStream), open('wn_s_convert.pl',write,OutStream), convert_file(InStream,OutStream), close(OutStream), close(InStream). % convert_file(+InStream,-OutStream) % Read a predicate from the stream and change the order of its arguments. % To improve indexing, the word should be the first argument. % Test whether the word has capital letters, apostrophes and make the % required changes. Call create_open_list/2 to convert the word % into an open list and then write out the new predicate in the % output file. Call convert_file/2 recursively until all predicates % are converted convert_file(Stream,_) :- at_end_of_stream(Stream). convert_file(InStream,OutStream) :- read(InStream,s(Num,Syn,Word,Cat,X,Y)), write(OutStream,s), write(OutStream,'('), name(Word,WordAtoms), ( test_nocap_noap(WordAtoms,WordAtoms1); test_nocap_ap(WordAtoms,WordAtoms1); test_cap_noap(WordAtoms,WordAtoms1); test_cap_ap(WordAtoms,WordAtoms1) ), create_open_list(WordAtoms1,OpenList), write(OutStream,OpenList), write(OutStream,','), write(OutStream,Num), write(OutStream,','), write(OutStream,Syn), write(OutStream,','), write(OutStream,Cat), write(OutStream,','), write(OutStream,X), write(OutStream,','), write(OutStream,Y), write(OutStream,')'), put(OutStream,46), put(OutStream,10), convert_file(InStream,OutStream). % test_nocap_noap(+WordAtoms,-WordAtoms) % Test succeeds, if there is no apostrophe and no capital letter in the atom list of the word. % If it succeeds, it returns the atom list to the second argument. test_nocap_noap(WordAtoms,WordAtoms) :- \+ member(39,WordAtoms), \+ capital_in_list(WordAtoms). % test_nocap_ap(+WordAtoms,-WordAtoms1) % Test succeeds, if there are apostrophes in the list, but no capital letters. % The new ASCII list is changed by the predicate remove_ap/2, which removes the apostrophes. test_nocap_ap(WordAtoms,WordAtoms1) :- member(39,WordAtoms), \+ capital_in_list(WordAtoms), remove_ap(WordAtoms,WordAtoms1). % test_cap_noap(+WordAtoms,-WordAtoms1) % Succeeds, if there are no apostrophes in the word, but capital letters. So the predicate % change_to_lower/2 is called that converts all capital letters into lower case letters. test_cap_noap(WordAtoms,WordAtoms1) :- \+ member(39,WordAtoms), capital_in_list(WordAtoms), change_to_lower(WordAtoms,WordAtoms1). % test_cap_ap(+WordAtoms,-WordAtoms1) % Succeeds, if there are apostrophes and capital letters in the word. Remove_ap/2 takes care of % the apostrophes, change_to_lower/2 changes the capital letters to lower case letters. test_cap_ap(WordAtoms,WordAtoms1) :- member(39,WordAtoms), capital_in_list(WordAtoms), remove_ap(WordAtoms,WordAtomsTemp), change_to_lower(WordAtomsTemp,WordAtoms1). % remove_ap(+List,-List1) % Takes a List and changes the apostrophes to an underscore. % So later, when the underscores get removed and the open % list gets created, the apostrophe in a word will % basically divide the word, e.g. mother's becomes [mother,s|_]. remove_ap([],[]). remove_ap([39|List],[95|List1]) :- remove_ap(List,List1), !. remove_ap([First|List],[First|List1]) :- remove_ap(List,List1). % change_to_lower(+List,-List) % Takes a list and changes all capital letters into lower case letters. change_to_lower([],[]). change_to_lower([First|Rest],[First|Rest1]) :- \+ capital(First), change_to_lower(Rest,Rest1), !. change_to_lower([First|Rest],[First1|Rest1]) :- First1 is First + 32, change_to_lower(Rest,Rest1). % capital_in_list(+List) % Checks whether there is a capital letter in an ASCII code list. capital_in_list([F|_]) :- capital(F), !. capital_in_list([_|Rest]) :- capital_in_list(Rest). % capital(+X) % X is a capital letter if its ASCII code is between 65 and 90. capital(X) :- X >=65, X =< 90. % create_open_list(+WordAtoms,-OpenList) % If there is no underscore in WordAtoms, then make an open list containing only the one word. % If there is an underscore in WordAtoms, then call change_underscore/2 to put commas % instead of the underscore, and make an open list out of all the words. create_open_list(WordAtoms,OpenList) :- \+ member(95,WordAtoms), append([91,39],WordAtoms,List1), append(List1,[39,124,95,93],List2), name(OpenList,List2). create_open_list(WordAtoms,OpenList) :- member(95,WordAtoms), change_underscore(WordAtoms,WordAtoms2), append([91,39],WordAtoms2,List1), append(List1,[39,124,95,93],List2), name(OpenList,List2). % change_underscore(+List1,-List2) % Change all underscores of a list into commas. change_underscore([],[]). change_underscore([95|Rest1],[39,44,39|Rest2]) :- change_underscore(Rest1,Rest2), !. change_underscore([First|Rest1],[First|Rest2]) :- change_underscore(Rest1,Rest2).