:- ensure_loaded('$REGULUS/PrologLib/compatibility'). :- module(parse_regulus_generated_gsl, [test/1, parse_regulus_generated_gsl/2, convert_parsed_gsl_to_normal_gsl/2 ]). :- use_module('$REGULUS/Prolog/regulus_utilities'). :- use_module('$REGULUS/PrologLib/utilities'). :- use_module(library(lists)). :- use_module(library(system)). 'SICSTUS4ONLY'( ( :- use_module(library(file_systems)) ) ). %------------------------------------------------------------------------------------ test(parse_toy1) :- parse_regulus_generated_gsl('$REGULUS/Examples/Toy1/Generated/recogniser.grammar', '$REGULUS/Examples/Toy1/Generated/recogniser.pl'). test(print_parsed_toy1) :- convert_parsed_gsl_to_normal_gsl('$REGULUS/Examples/Toy1/Generated/recogniser.pl', '$REGULUS/Examples/Toy1/Generated/recogniser_from_parsed.grammar'). test(parse_jackie_dynamic) :- parse_regulus_generated_gsl('$JACKIE/Generated/jackie_specialised_recogniser_dynamic.grammar', '$JACKIE/Generated/jackie_specialised_recogniser_dynamic.pl'). test(print_jackie_dynamic) :- convert_parsed_gsl_to_normal_gsl('$JACKIE/Generated/jackie_specialised_recogniser_dynamic.pl', '$JACKIE/Generated/jackie_specialised_recogniser_dynamic_from_parsed.grammar'). test(parse_jackie_pcfg_static) :- parse_regulus_generated_gsl('$JACKIE/Generated/pcfg_trained/jackie_specialised_recogniser_static.grammar', '$JACKIE/Generated/pcfg_trained/jackie_specialised_recogniser_static.pl'). test(parse_role_marked_ara) :- parse_regulus_generated_gsl('$MED_SLT2/Ara/GeneratedFiles/pcfg_trained_role_marked/specialised_recogniser_role_marked.grammar', '$MED_SLT2/Ara/GeneratedFiles/pcfg_trained_role_marked/specialised_recogniser_role_marked.pl'). %------------------------------------------------------------------------------------ parse_regulus_generated_gsl(GrammarFile, ParsedFile) :- safe_absolute_file_name(GrammarFile, AbsGrammarFile), safe_absolute_file_name(ParsedFile, AbsParsedFile), open(AbsGrammarFile, read, SIn), open(AbsParsedFile, write, SOut), format('~N~n--- Parsing GSL grammar from ~w~n', [AbsGrammarFile]), parse_gsl1(SIn, waiting_for_rule_group, SOut, 0-N), close(SIn), close(SOut), format('~N~n--- Written parsed grammar (~d non-terminals) to ~w~n', [N, AbsParsedFile]), !. convert_parsed_gsl_to_normal_gsl(ParsedFile, GrammarFile) :- safe_absolute_file_name(ParsedFile, AbsParsedFile), safe_absolute_file_name(GrammarFile, AbsGrammarFile), open(AbsParsedFile, read, SIn), open(AbsGrammarFile, write, SOut), format('~N~n--- Reading parsed GSL grammar from ~w~n', [AbsParsedFile]), print_parsed_gsl(SIn, SOut, 0-N), close(SIn), close(SOut), format('~N~n--- Written normal grammar (~d non-terminals) to ~w~n', [N, AbsGrammarFile]), !. %------------------------------------------------------------------------------------ parse_gsl1(SIn, InState, SOut, InN-OutN) :- read_and_parse_line(SIn, ParsedLine), process_parsed_line(ParsedLine, InState-NextState, SOut, InN-NextN), ( NextN > InN, 0 is NextN mod 100 -> format('~d ', [NextN]), flush_output(user) ; true ), !, parse_gsl2(ParsedLine, SIn, NextState, SOut, NextN-OutN). parse_gsl2(end_of_file, _SIn, _InState, _SOut, InN-InN) :- !. parse_gsl2(_ParsedLine, SIn, InState, SOut, InN-OutN) :- parse_gsl1(SIn, InState, SOut, InN-OutN). read_and_parse_line(S, ParsedLine) :- read_line(S, Line), parse_line(Line, ParsedLine). parse_line(end_of_file, end_of_file) :- !. parse_line(Line, ParsedLine) :- line(ParsedLine, Line, []), !. parse_line(Line, _ParsedLine) :- format2error('~N*** Error: unable to parse line "~s"~n', [Line]), fail. %------------------------------------------------------------------------------------ print_parsed_gsl(SIn, SOut, InN-OutN) :- read(SIn, ParsedGSL), ( ParsedGSL = end_of_file -> OutN = InN ; otherwise -> print_parsed_gsl_item(SOut, ParsedGSL), NextN is InN + 1, ( NextN > InN, 0 is NextN mod 100 -> format('~d ', [NextN]), flush_output(user) ; true ), !, print_parsed_gsl(SIn, SOut, NextN-OutN) ). %------------------------------------------------------------------------------------ /* Typical rule: NP_N_N_SUBJ_Y_NULL_NULL_NORMAL_N_N_N_N_N_N_N_N_N_N_N_N_N_N_DEGREE_SYMPTOM_NONE_3_SING [ ( the N_ANY_N_N_N_N_N_N_N_N_N_N_N_N_N_DEF_DEGREE_ANY_ANY_SYMPTOM_NONE_3_SING:v_0 )~0.766666666667 {return( $v_0 )} ( POSSESSIVE:v_0 N_ANY_N_N_N_N_N_N_N_N_N_N_N_N_N_DEF_DEGREE_ANY_ANY_SYMPTOM_NONE_3_SING:v_1 )~0.116666666667 {return( concat( $v_0 $v_1 ) )} ( D_N_ANY_ANY_N_DEF_3_SING:v_0 N_ANY_N_N_N_N_N_N_N_N_N_N_N_N_N_DEF_DEGREE_ANY_ANY_SYMPTOM_NONE_3_SING:v_1 )~0.116666666667 {return( concat( ( ( spec $v_0 ) ) $v_1 ) )} ] */ % process_parsed_line(ParsedLine, InState-OutState, InN-OutN) process_parsed_line(empty, InState-InState, _SOut, InN-InN) :- !. process_parsed_line(non_terminal(NonTerminal), waiting_for_rule_group-started_rule_group(NonTerminal), _SOut, InN-InN) :- !. process_parsed_line(left_square_bracket, started_rule_group(NonTerminal)-filling_rule_group(NonTerminal, []), _SOut, InN-InN) :- !. process_parsed_line(right_square_bracket, filling_rule_group(NonTerminal, Rules)-waiting_for_rule_group, SOut, InN-OutN) :- reverse(Rules, ReversedRules), format(SOut, '~N~n', []), prettyprintq_to_stream_unlimited_depth(SOut, rule_group(NonTerminal, ReversedRules)), format(SOut, '.~n', []), OutN is InN + 1, !. process_parsed_line(syn_rule(Prob, Body), filling_rule_group(NonTerminal, InRules)- [filling_rule_group(NonTerminal, InRules), syn_rule(Prob, Body)], _SOut, InN-InN) :- !. process_parsed_line(syn_rule(NewProb, NewBody), [filling_rule_group(NonTerminal, InRules), syn_rule(Prob, Body)]- [filling_rule_group(NonTerminal, [rule(Prob, Body, no_sem) | InRules]), syn_rule(NewProb, NewBody)], _SOut, InN-InN) :- !. process_parsed_line(semantic_return_value(Sem), [filling_rule_group(NonTerminal, InRules), syn_rule(Prob, Body)]- filling_rule_group(NonTerminal, [rule(Prob, Body, Sem) | InRules]), _SOut, InN-InN) :- !. process_parsed_line(end_of_file, InState-InState, _SOut, InN-InN) :- !. process_parsed_line(ParsedLine, State, SOut, N) :- format2error('~N*** Error: bad call: ~w~n', [process_parsed_line(ParsedLine, State, SOut, N)]), fail. %------------------------------------------------------------------------------------ /* Typical parsed rules: rule_group(public_rule('MAIN'), [rule(no_prob,[non_terminal('TOP_USER_COMMAND',v_0)],' < value $v_0 > ')]). rule_group('NP_Y_N_SUBJ_N_NULL_NULL_NORMAL_N_N_N_N_N_N_N_N_N_N_N_N_N_N_EXHIBIT_NONE_NONE_3_SING', [rule(no_prob, [non_terminal('D_Y_ANY_Y_ANY_ANY_0111111_3_SING',v_0)], 'return( ( ( spec $v_0 ) ) )'), rule(no_prob, [non_terminal('D_Y_ANY_Y_ANY_ANY_0111111_3_SING',v_0), non_terminal('N_ANY_N_N_N_N_N_N_N_N_N_N_N_N_N_QUANT_EXHIBIT_ANY_ANY_NONE_NONE_3_SING', v_1)], 'return( concat( concat( ( ( spec $v_0 ) ) $v_1 ) () ) )'), rule(no_prob, [non_terminal('D_Y_ANY_Y_ANY_ANY_0111111_3_SING',v_0), non_terminal('TMP_CAT_41_QUANT_EXHIBIT_3_SING',v_1)], 'return( concat( concat( ( ( spec $v_0 ) ) $v_1.sem_2 ) $v_1.sem_1 ) )')]). rule_group('POLITENESS_POST_YNQ', [rule(no_prob,[terminal(please)],'return( ( ( politeness please ) ) )'), rule(no_prob, external_grammar_non_terminal('../DynamicNuance/DYNAMIC_POLITENESS_POST_YNQ_1.gsl', v_0), 'return( $v_0 )'), rule(no_prob,[terminal(benjamin)],'return( ( ( politeness system_name ) ) )')]). */ print_parsed_gsl_item(S, ParsedGSL) :- ParsedGSL = rule_group(LHS, RHS), format(S, '~N~n', []), print_rule_lhs(S, LHS), format(S, '~N[~n', []), print_rule_rhs(S, RHS), format(S, '~N]~n', []), !. print_parsed_gsl_item(_S, ParsedGSL) :- format2error('~N*** Error: unable to print parsed GSL rule:~n', []), prettyprintq(ParsedGSL), format('~N', []), !, fail. print_rule_lhs(S, public_rule(NonTerminal)) :- format(S, '~w:public', [NonTerminal]), !. print_rule_lhs(S, NonTerminal) :- atom(NonTerminal), format(S, '~w', [NonTerminal]), !. print_rule_lhs(S, NonTerminal) :- format2error('~N*** Error: bad call: ~w~n', [print_rule_lhs(S, NonTerminal)]), fail. print_rule_rhs(_S, []). print_rule_rhs(S, [F | R]) :- print_rule_rhs_line(S, F), !, print_rule_rhs(S, R). /* rule(no_prob, external_grammar_non_terminal('../DynamicNuance/DYNAMIC_POLITENESS_POST_YNQ_1.gsl', v_0), 'return( $v_0 )') */ print_rule_rhs_line(S, rule(Prob, external_grammar_non_terminal(File, Var), Sem)) :- format(S, '~N', [File]), print_optional_var(S, Var), print_optional_prob(S, Prob), format(S, '~n', []), print_optional_sem(S, Sem), !. /* rule(no_prob, [non_terminal('D_Y_ANY_Y_ANY_ANY_0111111_3_SING',v_0), non_terminal('N_ANY_N_N_N_N_N_N_N_N_N_N_N_N_N_QUANT_EXHIBIT_ANY_ANY_NONE_NONE_3_SING', v_1)], 'return( concat( concat( ( ( spec $v_0 ) ) $v_1 ) () ) )') */ print_rule_rhs_line(S, rule(Prob, Items, Sem)) :- format(S, '~N( ', []), print_rhs_items(S, Items), format(S, ')', []), print_optional_prob(S, Prob), format(S, '~n', []), print_optional_sem(S, Sem), !. print_rule_rhs_line(S, Rule) :- format2error('~N*** Error: bad call to print_rule_rhs_line/2:~n', []), prettyprintq(print_rule_rhs_line(S, Rule)), fail. print_rhs_items(_S, []). print_rhs_items(S, [F | R]) :- print_rhs_item(S, F), format(S, ' ', []), !, print_rhs_items(S, R). print_rhs_item(S, non_terminal(Symbol, Var)) :- format(S, '~w', [Symbol]), print_optional_var(S, Var), !. print_rhs_item(S, terminal(Symbol)) :- format(S, '~w', [Symbol]), !. print_optional_var(_S, no_var) :- !. print_optional_var(S, Var) :- format(S, ':~w', [Var]), !. print_optional_prob(_S, no_prob) :- !. print_optional_prob(S, Prob) :- format(S, '~~~4f', [Prob]), !. print_optional_sem(_S, no_sem) :- !. print_optional_sem(S, Sem) :- format(S, '~N {~w}~n', [Sem]), !. %------------------------------------------------------------------------------------ line(empty) --> whitespace_or_empty. line(non_terminal(public_rule(NonTerminal))) --> non_terminal(NonTerminal), ":public". line(non_terminal(NonTerminal)) --> non_terminal(NonTerminal). line(left_square_bracket) --> "[", whitespace_or_empty. line(right_square_bracket) --> "]", whitespace_or_empty. line(semantic_return_value(Value)) --> whitespace_or_empty, "{", non_curly_bracket_sequence(String), "}", {atom_codes(Value, String)}. line(syn_rule(Prob, Body)) --> "(", whitespace_or_empty, body(Body), whitespace_or_empty, ")", probability_value(Prob). line(syn_rule(Prob, ExternalGrammarNonTerminal)) --> external_grammar_non_terminal(ExternalGrammarNonTerminal, Prob). body([F | R]) --> body_element(F), whitespace_or_empty, !, body(R). body([]) --> whitespace_or_empty. %:v_0 external_grammar_non_terminal(external_grammar_non_terminal(File, Var), Prob) --> "", {atom_codes(File, FileString)}, optional_associated_var(Var), probability_value(Prob), !. body_element(non_terminal(NonTerminal, Var)) --> non_terminal(NonTerminal), optional_associated_var(Var), !. body_element(terminal(Terminal)) --> terminal(Terminal). probability_value(DecimalNumber) --> "~0.", digit_sequence(DigitsAfterPoint), {safe_number_codes(DecimalNumber, [0'0, 0'. | DigitsAfterPoint])}, !. probability_value(1) --> "~1", !. probability_value(no_prob) --> [], !. non_terminal(NonTerminal) --> ".", uppercase_char(F), non_special_char_sequence(R), !, {atom_codes(NonTerminal, [0'., F | R])}. non_terminal(NonTerminal) --> nuance_uppercase_char(F), non_special_char_sequence(R), !, {atom_codes(NonTerminal, [F | R])}. terminal(Terminal) --> nuance_lowercase_char(F), non_special_char_sequence(R), !, {atom_codes(Terminal, [F | R])}. optional_associated_var(Var) --> ":", non_special_char_sequence(String), {atom_codes(Var, String)}, whitespace_or_empty, !. optional_associated_var(no_var) --> whitespace_or_empty. non_special_char_sequence([F | R]) --> non_special_char(F), !, non_special_char_sequence(R). non_special_char_sequence([]) --> []. digit_sequence([F | R]) --> digit_char(F), !, digit_sequence(R). digit_sequence([]) --> []. non_curly_bracket_sequence([F | R]) --> non_curly_bracket_char(F), !, non_curly_bracket_sequence(R). non_curly_bracket_sequence([]) --> []. whitespace_or_empty --> whitespace_char(_), !, whitespace_or_empty. whitespace_or_empty --> []. anything --> [_Any], !, anything. anything --> []. whitespace_char(Char) --> [Char], {whitespace_char(Char)}. uppercase_char(Char) --> [Char], {uppercase_char(Char)}. nuance_uppercase_char(Char) --> [Char], {nuance_uppercase_char(Char)}. nuance_lowercase_char(0'\') --> [0'\']. nuance_lowercase_char(Char) --> [Char], %{lowercase_char(Char)}. {nuance_lowercase_char(Char)}. digit_char(Char) --> [Char], {digit_char(Char)}. non_special_char(Char) --> [Char], {non_special_char(Char)}. non_special_char(Char) :- \+ special_char(Char). nuance_lowercase_char(Char) :- \+ uppercase_char(Char), \+ special_char(Char). nuance_lowercase_char(Char) :- accented_char(Char), \+ special_char(Char). nuance_uppercase_char(Char) :- uppercase_char(Char), \+ accented_char(Char). non_curly_bracket_char(Char) --> [Char], {\+ curly_bracket_char(Char)}. special_char(0' ). special_char(0'\n). special_char(0'\t). special_char(0':). special_char(0'~). special_char(0'(). special_char(0')). special_char(0'[). special_char(0']). special_char(0'{). special_char(0'}). special_char(0'$). special_char(0'~). special_char(0'<). special_char(0'>). curly_bracket_char(0'{). curly_bracket_char(0'}).