:- module(nbest,
	[unpack_nbest_batchrec_list/2,
	 unpack_nbest_batchrec_list_for_dialogue/2,

	 notional_batchrec_item_for_transcription_in_nbest_list/2,
	 
	 nbest_preferences/4,
	 print_nbest_trace/3,
	 maybe_extract_and_print_training_data/2,
	 nbest_preferences_score_for_dialogue_record/3,
	 print_nbest_preference_info/2,

	 init_stored_nbest_translation_summary/0,
	 store_nbest_translation_summary/6,
	 print_stored_nbest_translation_summary/1,

	 zero_nbest_training_data_file_if_necessary/1,
	 print_nbest_training_data/0,
	 dont_print_nbest_training_data/0,

	 set_nbest_n/1,
	 get_nbest_n/1
	]).

:- use_module('$REGULUS/Prolog/paraphrases').

:- use_module('$REGULUS/Prolog/regulus_utilities').
:- use_module('$REGULUS/PrologLib/utilities').

:- use_module(library(lists)).

%---------------------------------------------------------------

/*
batchrec_item([wavfile='c:/cygwin/home/speech/regulus/examples/toy1/corpora/wavfiles/utt01.wav',
	       transcription=[switch,on,the,light],
	       
	       words=[switch,on,the,lights],
	       confidence=39,
	       nl_confidence=39,
	       nl_value=[[utterance_type,command],[action,switch],[onoff,on],[device,light]],
	       
	       words=[switch,off,the,lights],
	       confidence=38,
	       nl_confidence=39,
	       nl_value=[[utterance_type,command],[action,switch],[onoff,off],[device,light]],
	       
	       transcription=[switch,on,the,light]
	      ]).
*/

unpack_nbest_batchrec_list(BatchrecList, NBestBatchrecLists) :-
	select_pairs(BatchrecList, words, WordsList, NWordsLists),
	select_pairs(BatchrecList, confidence, ConfidenceList, NConfidenceList),
	select_pairs(BatchrecList, nl_confidence, NLConfidenceList, NNLConfidenceList),
	select_pairs(BatchrecList, nl_value, NLValueList, NNLValueList),
	member(wavfile=Wavfile, BatchrecList),
	(   member(transcription=Transcription, BatchrecList) ->
	    true
	;
	    otherwise ->
	    transcription = ['*no_transcription*']
	),
	% Check all lists have same length and that that length is greater than 1
	sort([NWordsLists, NConfidenceList, NNLConfidenceList, NNLValueList], [Length]),
	Length > 1,
	unpack_nbest_batchrec_list1(1, WordsList, ConfidenceList, NLConfidenceList, NLValueList, Wavfile, Transcription,
				    NBestBatchrecLists0),
	(   get_nbest_n(N) ->
	    firstn_or_all(NBestBatchrecLists0, N, NBestBatchrecLists)
	;
	    otherwise ->
	    NBestBatchrecLists0 = NBestBatchrecLists
	).

unpack_nbest_batchrec_list1(_N, [], [], [], [], _Wavfile, _Transcription, []).
unpack_nbest_batchrec_list1(I, [F | R], [F1 | R1], [F2 | R2], [F3 | R3], Wavfile, Transcription, [FCombined | RCombined]) :-
	create_nbest_item(I, F, F1, F2, F3, Wavfile, Transcription, FCombined),
	I1 is I + 1,
	!,
	unpack_nbest_batchrec_list1(I1, R, R1, R2, R3, Wavfile, Transcription, RCombined).

create_nbest_item(I, F, F1, F2, F3, Wavfile, Transcription, FCombined) :-
	FCombined = [wavfile=Wavfile,
		     transcription=Transcription,
		     rank=I,
		     F, F1, F2, F3].

unpack_nbest_batchrec_list_for_dialogue(BatchrecList, NBestBatchrecLists) :-
	select_pairs(BatchrecList, words, WordsList, NWordsLists),
	select_pairs(BatchrecList, confidence, ConfidenceList, NConfidenceList),
	member(wavfile=Wavfile, BatchrecList),
	(   member(transcription=Transcription, BatchrecList) ->
	    true
	;
	    otherwise ->
	    transcription = ['*no_transcription*']
	),
	% Check both lists have same length and that that length is greater than 1
	sort([NWordsLists, NConfidenceList], [Length]),
	Length > 1,
	unpack_nbest_batchrec_list_for_dialogue1(1, WordsList, ConfidenceList, Wavfile, Transcription,
						 NBestBatchrecLists0),
	(   get_nbest_n(N) ->
	    firstn_or_all(NBestBatchrecLists0, N, NBestBatchrecLists)
	;
	    otherwise ->
	    NBestBatchrecLists0 = NBestBatchrecLists
	).

unpack_nbest_batchrec_list_for_dialogue1(_N, [], [], _Wavfile, _Transcription, []).
unpack_nbest_batchrec_list_for_dialogue1(I, [F | R], [F1 | R1], Wavfile, Transcription, [FCombined | RCombined]) :-
	create_nbest_item_for_dialogue(I, F, F1, Wavfile, Transcription, FCombined),
	I1 is I + 1,
	!,
	unpack_nbest_batchrec_list_for_dialogue1(I1, R, R1, Wavfile, Transcription, RCombined).

create_nbest_item_for_dialogue(I, F, F1, Wavfile, Transcription, FCombined) :-
	FCombined = [wavfile=Wavfile,
		     transcription=Transcription,
		     rank=I,
		     F,
		     F1].

select_pairs(List, Key, SelectedList, N) :-
	select_pairs1(List, Key, SelectedList),
	length(SelectedList, N).

select_pairs1([], _Key, []).
select_pairs1([Key=Value | R], Key, [Key=Value | R1]) :-
	!,
	select_pairs1(R, Key, R1).
select_pairs1([_F | R], Key, R1) :-
	!,
	select_pairs1(R, Key, R1).

%---------------------------------------------------------------

notional_batchrec_item_for_transcription_in_nbest_list(BatchrecList, TranscriptionBatchrecList) :-
	member(wavfile=Wavfile, BatchrecList),
	member(transcription=SourceWords, BatchrecList),
	\+ SourceWords = '*no_transcription*',
	\+ member('(guessed)', SourceWords),
	(   paraphrase_for_sentence_words(SourceWords, ParaphraseWords) ->
	    TranscriptionBatchrecList = [wavfile=Wavfile,
					 words=ParaphraseWords,
					 transcription=SourceWords,
					 paraphrase=ParaphraseWords,
					 rank=0,
					 confidence=100]
	;
	    otherwise ->
	    TranscriptionBatchrecList = [wavfile=Wavfile,
					 words=SourceWords,
					 transcription=SourceWords,
					 rank=0,
					 confidence=100]
	),
	!.

%---------------------------------------------------------------

/*

Tuples is a list of elements of the form [record=Record | OtherMaterial]

Each Record is an alist produced by dialogue:dialogue_process_item_normal. It must contain an element of the form rank=Rank.

*/

nbest_preferences(Tuples, ChosenTuple, Trace, RankOfChosenElement) :-
	findall(Score-[Tuple, PrefTrace],
		( member(Tuple, Tuples), nbest_preferences_score(Tuple, Score, PrefTrace) ),
		ScoredTuples),
	keysort(ScoredTuples, SortedScoredTuples),
	reverse(SortedScoredTuples, ReversedSortedScoredTuples),
	ReversedSortedScoredTuples = [BestElement | _Rest],
	get_rank_for_trace_element(BestElement, RankOfChosenElement),
	BestElement = _BestScore-[ChosenTuple, _ChosenPrefTrace],
	Trace = ScoredTuples,
	!.
nbest_preferences(_Tuples, _ChosenTuple, _Trace) :-
	format2error('~N*** Error in nbest_preferences/3~n', []),
	fail.

nbest_preferences_score(Tuple, TotalScore, PrefTrace) :-
	member(record=Record, Tuple),
	nbest_preferences_score_for_dialogue_record(Record, TotalScore, PrefTrace).

nbest_preferences_score_for_dialogue_record(Record, TotalScore, PrefTrace) :-
	remove_judgements_from_record(Record, Record1),
	findall([Feature, Weight, Score],
		(   nbest_features:feature_weight(Feature, Weight),
		    nbest_features:feature_value_for_record(Feature, Record1, Score)
		),
		Triples),
	total_score(Triples, 0-TotalScore),
	PrefTrace = Triples.

total_score([], In-In).
total_score([[_Feature, Weight, Score] | R], In-Out) :-
	Next is In + ( Score * Weight ),
	!,
	total_score(R, Next-Out).

%---------------------------------------------------------------

print_nbest_trace(Trace, RankOfChosenElement, S) :-
	(   RankOfChosenElement = 1 ->
	    Comment = ''
	;
	    otherwise ->
	    Comment = 'NON-TOP '
	),
	format(S, '~N~n/*~n', []),
	format(S, '~NN-best trace (~whypothesis #~d selected):~n~n', [Comment, RankOfChosenElement]),
	print_nbest_trace1(Trace, S, 0),
	format(S, '~N*/~n', []),
	
	maybe_extract_and_print_training_data(Trace, RankOfChosenElement),
	!.
print_nbest_trace(_Trace, _S) :-
	format2error('~N*** Error: unable to print N-best trace~n', []),
	fail.

print_nbest_trace1([], _S, _N).
print_nbest_trace1([F | R], S, N) :-
	get_rec_result_and_transcription_for_trace_element(F, RecResult, Transcription),
	(   N = 0 ->
	    format(S, '~NHypothesis #~d~n"~w" (transcription - not real speech hypothesis)~n', [N, RecResult])
	;
	    otherwise ->
	    format(S, '~NHypothesis #~d~n"~w" (correct: "~w")~n', [N, RecResult, Transcription])
	),
	print_nbest_trace_element(F, S),
	N1 is N + 1,
	!,
	print_nbest_trace1(R, S, N1).

print_nbest_trace_element(TraceElement, S) :-
	TraceElement = Score-[Tuple, PrefTrace],
	member(record=Record, Tuple),
	remove_judgements_from_record(Record, Record1),
	format(S, '~N~n', []),
	prettyprint_to_stream(S, Record1),
	format(S, '~N~nPreference info:~n~n', []),
	%prettyprint_to_stream(S, PrefTrace),
	print_nbest_preference_info(PrefTrace, S),
	FloatScore is float(Score),
	format(S, '~NScore: ~2f~n~n', [FloatScore]),
	!.
print_nbest_trace_element(TraceElement, _S) :-
	format2error('~N*** Error: unable to print N-best trace element~n', []),
	prettyprint(TraceElement),
	fail.

print_nbest_preference_info([], _S).
print_nbest_preference_info([F | R], S) :-
	print_nbest_preference_info_item(F, S),
	!,
	print_nbest_preference_info(R, S).

print_nbest_preference_info_item(Item, S) :-
	Item = [Feature, Weight, Score],
	WeightedScore is Weight * Score,
	FloatWeight is float(Weight),
	FloatScore is float(Score),
	FloatWeightedScore is float(WeightedScore),
	format(S, '~N~w~40|~2f * ~2f = ~2f~n', [Feature, FloatWeight, FloatScore, FloatWeightedScore]).

%---------------------------------------------------------------

:- dynamic stored_nbest_translation_summary/1.

init_stored_nbest_translation_summary :-
	retractall(stored_nbest_translation_summary(_)).

store_nbest_translation_summary(Source, Recognised, Wavfile, Target, Stats, Judgement) :-
	member(non_top_hyp=yes, Stats),
	member(selected_hyp=Rank, Stats),
	member(interlingua_surface=InterlinguaSurface, Stats),
	assertz(stored_nbest_translation_summary([wavfile=Wavfile,
						  source=Source,
						  recognised=Recognised,
						  selected_hyp=Rank,
						  interlingua_surface=InterlinguaSurface,
						  target=Target,
						  judgement=Judgement])),
	!.
store_nbest_translation_summary(_Source, _Recognised, _Wavfile, _Target, _Stats, _Judgement).
						  
print_stored_nbest_translation_summary(_S) :-
	\+ stored_nbest_translation_summary(_),
	!.
print_stored_nbest_translation_summary(S) :-
	format(S, '~N~n~n/*', []),
	format(S, '~N~n*** N-BEST TRANSLATION SUMMARY ***~n', []),
	print_stored_nbest_translation_summary1(S),
	format(S, '~N~n~n*/~n', []),
	!.

print_stored_nbest_translation_summary1(S) :-
	stored_nbest_translation_summary(List),
	format(S, '~N~n', []),
	prettyprintq_to_stream(S, nbest_example(List), 0, 150),
	format(S, '.~n', []),
	fail.
print_stored_nbest_translation_summary1(_S).

%---------------------------------------------------------------

maybe_extract_and_print_training_data(Trace, RankOfChosenElement) :-
	(   printing_nbest_training_data ->
	    extract_and_print_training_data(Trace, RankOfChosenElement)
	;
	    otherwise ->
	    true
	).

extract_and_print_training_data(Trace, _RankOfChosenElement) :-
	get_nbest_training_data_file(File),
	extract_nbest_training_data(Trace, Data),
	open(File, append, S),
	format(S, '~N~n', []),
	prettyprintq_to_stream(S, Data),
	%format(S, '.~N~n', []),
	%print_semantic_recognition_info_as_comment(S, Trace, RankOfChosenElement),
	close(S),
	!.
extract_and_print_training_data(Trace, RankOfChosenElement) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [extract_and_print_training_data(Trace, RankOfChosenElement)]),
	fail.

zero_nbest_training_data_file_if_necessary(File) :-
	printing_nbest_training_data,
	get_nbest_training_data_file(File),
	open(File, write, S),
	close(S),
	!.
zero_nbest_training_data_file_if_necessary(no_file).

get_nbest_training_data_file(AbsFile) :-
	user:get_regulus_config_item(nbest_training_data_file, File),
	safe_absolute_file_name(File, AbsFile),
	!.

%---------------------------------------------------------------

print_semantic_recognition_info_as_comment(S, Trace, RankOfChosenElement) :-
	format(S, '~N%~n', []),
	format(S, '~N% ---------------------------------~n', []),
	format(S, '~N%~n', []),
	(   \+ trace_shows_good_semantic_recognition_exists(Trace) ->
	    format(S, '~N% [ALL CHOICES ARE BAD]~n', []),
	    format(S, '~N%~n', [])
	;
	    trace_shows_bad_semantic_recognition(Trace, RankOfChosenElement) ->
	    format(S, '~N% [BAD N-BEST CHOICE]~n', []),
	    format(S, '~N%~n', [])
	;
	    RankOfChosenElement > 1 ->
	    format(S, '~N% [GOOD NON-TOP HYPOTHESIS CHOSEN]~n', []),
	    format(S, '~N%~n', [])
	;
	    otherwise ->
	    format(S, '~N%~n', [])
	),
	print_semantic_recognition_as_comment(S, Trace, RankOfChosenElement),
	!.
print_semantic_recognition_info_as_comment(_S, _Trace, _RankOfChosenElement).

trace_shows_good_semantic_recognition_exists(Trace) :-
	member(_Score-[Tuple, _PrefTrace], Trace),
	member(record=Record, Tuple),
	member(sem_recognition=GoodOrUnclear, Record),
	good_or_unclear(GoodOrUnclear),
	member(rank=Rank, Record),
	Rank > 0,
	!.

good_or_unclear(good).
good_or_unclear(unclear).

trace_shows_bad_semantic_recognition(Trace, RankOfChosenElement) :-
	member(_Score-[Tuple, _PrefTrace], Trace),
	member(record=Record, Tuple),
	member(rank=RankOfChosenElement, Record),
	member(sem_recognition=bad, Record),
	!.

print_semantic_recognition_as_comment(_S, [], _RankOfChosenElement).
print_semantic_recognition_as_comment(S, [F | R], RankOfChosenElement) :-
	print_semantic_recognition_hyp_as_comment(S, F, RankOfChosenElement),
	!,
	print_semantic_recognition_as_comment(S, R, RankOfChosenElement).

print_semantic_recognition_hyp_as_comment(S, TraceElement, RankOfChosenElement) :-
	get_rec_result_and_transcription_for_trace_element(TraceElement, RecSent, TranscriptionSent),
	get_rank_for_trace_element(TraceElement, Rank),
	(   Rank = 0 ->
	    format(S, '~N% Transcription: "~w"~n~n', [TranscriptionSent])
	;
	    otherwise ->
	    (   Rank = RankOfChosenElement ->
		Tag = ' (selected)'
	    ;
		Tag = ''
	    ),
	    format(S, '~N% ~d: "~w"~w~n', [Rank, RecSent, Tag])
	).
print_semantic_recognition_hyp_as_comment(S, TraceElement, RankOfChosenElement) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [print_semantic_recognition_hyp_as_comment(S, TraceElement, RankOfChosenElement)]),
	fail.

%---------------------------------------------------------------

get_rec_result_and_transcription_for_trace_element(TraceElement, RecSent, TranscriptionSent) :-
	TraceElement = _Score-[Tuple, _PrefTrace],
	member(record=Record, Tuple),
	member(sent=TranscriptionSent, Record),
	member(recognised=RecWords, Record),
	join_with_spaces(RecWords, RecSent),
	!.
get_rec_result_and_transcription_for_trace_element(TraceElement, RecSent, TranscriptionSent) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [get_rec_result_and_transcription_for_trace_element(TraceElement, RecSent, TranscriptionSent)]),
	fail.

get_rank_for_trace_element(TraceElement, Rank) :-
	TraceElement = _Score-[Tuple, _PrefTrace],
	member(record=Record, Tuple),	
	member(rank=Rank, Record),
	!.
get_rank_for_trace_element(TraceElement, Rank) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [get_rank_for_trace_element(TraceElement, Rank)]),
	fail.

%---------------------------------------------------------------

:- dynamic printing_nbest_training_data/0.

print_nbest_training_data :-
	(   user:get_regulus_config_item(nbest_training_data_file, _File) ->
	    retractall(printing_nbest_training_data),
	    assertz(printing_nbest_training_data)
	;
	    otherwise ->
	    format2error('~N*** Error: meaningless to switch on printing of N-best training data without nbest_training_data_file entry~n', []),
	    fail
	).	

dont_print_nbest_training_data :-
	retractall(printing_nbest_training_data).

%---------------------------------------------------------------

extract_nbest_training_data(Trace, Data) :-
	Data = nbest_data([wavfile=Wavfile, correct_words=Sent],
			  HypsData),
	get_wavfile_and_correct_words_from_trace(Trace, Wavfile, Sent),
	extract_nbest_training_data1(Trace, HypsData),
	!.

get_wavfile_and_correct_words_from_trace(Trace, Wavfile, SentWords) :-
	Trace = [FirstHyp | _Rest],
	FirstHyp = _Score-[Tuple, _PrefTrace],
	member(record=Record, Tuple),
	(   member(wavfile=Wavfile, Record) ->
	    true
	;
	    otherwise ->
	    Wavfile = unknown_wavfile
	),
	member(sent=SentAtom, Record),
	split_atom_into_words(SentAtom, SentWords),
	!.
get_wavfile_and_correct_words_from_trace(Trace, Wavfile, SentWords) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [get_wavfile_and_correct_words_from_trace(Trace, Wavfile, SentWords)]),
	fail.

extract_nbest_training_data1([], []).
extract_nbest_training_data1([F | R], [F1 | R1]) :-
	extract_training_data_from_hyp(F, F1),
	!,
	extract_nbest_training_data1(R, R1).

extract_training_data_from_hyp(Hyp, FeatValPairs) :-
	Hyp = _Score-[Tuple, PrefTrace],
	member(record=Record, Tuple),
	remove_judgements_from_record(Record, Record1),
	findall(Feat=Val,
		(   extract_feat_val_from_record_if_possible(Record1, Feat, Val)
		;   extract_feat_val_from_pref_trace(PrefTrace, Feat, Val)
		),
		FeatValPairs0),
	safe_remove_duplicates(FeatValPairs0, FeatValPairs).

extract_feat_val_from_pref_trace(PrefTrace, Feat, Val) :-
	member([Feat, _Weight, Val], PrefTrace).

extract_feat_val_from_record_if_possible(Record, Feat, Val) :-
	current_predicate(nbest_features:extract_feat_val_from_record/3),
	nbest_features:extract_feat_val_from_record(Record, Feat, Val).

%---------------------------------------------------------------

:- dynamic nbest_n/1.

set_nbest_n(N) :-
	integer(N),
	retractall(nbest_n(_)),
	assertz(nbest_n(N)),
	!.
set_nbest_n(N) :-
	format2error('~N*** Error: bad call: ~w~n', [set_nbest_n(N)]),
	fail.

get_nbest_n(N) :-
	nbest_n(N).