% This file is part of the Attempto Parsing Engine (APE).
% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
%
% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
% under the terms of the GNU Lesser General Public License as published by the Free Software
% Foundation, either version 3 of the License, or (at your option) any later version.
%
% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
% PURPOSE. See the GNU Lesser General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License along with the Attempto
% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
/* APE client for the Unix command-line.
@author Tobias Kuhn
@author Kaarel Kaljurand
Building the executable file:
==
swipl -g "working_directory(_, 'prolog/parser'), [fit_to_plp], halt."
swipl -O -F none -g "[ape], qsave_program('ape.exe', [goal(ape), toplevel(halt)])." -t halt
==
TODO:
- Do better checking of which arguments can be used together. E.g. if the user uses both -server
and -httpserver, what should ape.exe do then?
- All: Should we drop ulexreload? Otherwise all the interfaces should support it, and one would
need to add also "acetextreload" for reasons of uniformity.
- All: make timelimits configurable, currently they are hard-coded
- Socket: The socket server currently supports exactly these arguments that get_ape_results supports.
This is different from the other interfaces which also support other arguments (e.g. for fetching
files from URLs). More unification is needed.
- Java: assuming that it supports the file-parameter, how are the possible errors
communicated to Java?
- HTTP: Add logging
*/
:- consult('pack.pl').
:- use_module('prolog/ape', [
		get_ape_results/2,
		get_ape_results_timelimit/3,
		get_ape_results_timelimit/4
	]).
:- use_module('prolog/logger/error_logger').
% Default encoding used for opening files in text mode.
:- set_prolog_flag(encoding, utf8).
:- initialization on_signal(int, _, default).
%% argument(?Arg, -Value, -Desc)
%
% @tbd maybe we could describe the types more formally, so that they could be checked,
% e.g. the type of the solo value is one of [drs, drsxml, ...], which could be checked
% with memberchk/2.
%
argument('-text', '"TEXT"', 'The input ACE text. If neither -text nor -file is present then the ACE text is read from stdin.').
argument('-file', 'FILENAME', 'The name or URL of the input file containing the ACE text.').
argument('-ulextext', '"TEXT"', 'The user lexicon (taken from a string).').
argument('-ulexfile', 'FILENAME', 'The user lexicon (taken from a file or URL).').
argument('-solo', 'OUTPUT', 'Output just one output component. OUTPUT has to be one of {paraphrase,paraphrase1,paraphrase2,owlfss,owlfsspp,owlrdf,owlxml,ruleml,fol,pnf,tptp,tokens,syntax,syntaxpp,syntaxd,syntaxdpp,drs,drsxml,drspp,drshtml}.').
argument('-cinput', '', hidden).
argument('-cdrs', '', 'Output the DRS as a Prolog term.').
argument('-cdrsxml', '', 'Output the DRS in XML.').
argument('-cdrspp', '', 'Output the DRS in pretty-printed form in plain text.').
argument('-cdrshtml', '', 'Output the DRS in pretty-printed form in HTML.').
argument('-cparaphrase', '', 'Output a paraphrase which is a "best-effort" combination of paraphrase1 and paraphrase2.').
argument('-cparaphrase1', '', 'Output a paraphrase which uses full sentences instead of relative clauses.').
argument('-cparaphrase2', '', 'Output a paraphrase which uses relative clauses instead of full sentences.').
argument('-ctokens', '', 'Output tokens as a Prolog list of lists.').
argument('-csentences', '', 'Output sentences as a Prolog list.').
argument('-csyntax', '', 'Output simplified syntax trees as a Prolog list.').
argument('-csyntaxpp', '', 'Output simplified syntax trees in pretty-printed form.').
argument('-csyntaxd', '', 'Output plain syntax trees as a Prolog list (for debugging).').
argument('-csyntaxdpp', '', 'Output plain syntax trees in pretty-printed form (for debugging).').
argument('-cowlfss', '', 'Output OWL/SWRL in the Functional-Style Syntax representation (as Prolog term).').
argument('-cowlfsspp', '', 'Output OWL/SWRL in the Functional-Style Syntax representation (pretty-printed).').
argument('-cowlxml', '', 'Output OWL/SWRL in the XML representation.').
argument('-cowlrdf', '', 'Output OWL/SWRL in the RDF/XML representation. DEPRECATED').
argument('-cruleml', '', 'Output RuleML representation of the DRS.').
argument('-cfol', '', 'Output standard first-order logic representations (default form) of the DRS as a Prolog term.').
argument('-cpnf', '', 'Output standard first-order logic representations (prenex normal form) of the DRS as a Prolog term.').
argument('-ctptp', '', 'Output TPTP representation of the DRS.').
argument('-uri', 'URI', 'URI for the OWL outputs.').
argument('-noclex', '', 'Ignore the lexicon entries that are compiled into the executable.').
argument('-guess', '', 'Guess the word-class of unknown words.').
argument('-server', '', 'Launch a socket interface to APE at port 2766 (0xACE).').
argument('-httpserver', '', 'Launch an HTTP interface to APE at port 8000.').
argument('-port', 'NUMBER', 'Override the default port of either the socket or the HTTP interface.').
argument('-version', '', 'Shows version information.').
argument('-help', '', 'Shows this help page.').
%% ape is det.
%
% This is the default goal of =|ape.exe|=.
% Parses the command-line arguments, processes the arguments,
% if an ACE text is specified then parses the ACE text.
% Pretty-prints an error message in case an exception was thrown.
%
ape :-
	current_prolog_flag(argv, RawArgList),
	get_arglist(RawArgList, ArgList),
	catch(
		( arglist_namevaluelist(ArgList, InputList1), process_input(InputList1) ),
		Exception,
		format_error_for_terminal(Exception)
	).
%% get_arglist(+RawArgList, -ArgList)
%
% Returns the list of arguments.
% In SWI v6.6.0+ this can be achieved simply by:
%
%     get_arglist(ArgList) :-
%       current_prolog_flag(argv, ArgList).
%
% For backwards compatibility we assume that the argument
% list can contain '--', or the path to ape.exe before the
% first argument (which must start with '-').
%
get_arglist(RawArgList, ArgList) :-
    append(_, ['--'|ArgList], RawArgList),
    !.
% TODO: on which OS is this needed?
get_arglist([NonFlag|ArgList], ArgList) :-
	\+ atom_concat('-', _, NonFlag),
	!.
get_arglist(ArgList, ArgList).
%% process_input(+InputList:list) is det.
%
% @param InputList is a list of input parameters
%
process_input(InputList) :-
	( InputList = [] ; member(help=on, InputList) ),
	!,
	show_help.
process_input(InputList) :-
	memberchk(version=on, InputList),
	!,
	show_version.
process_input([server=on]) :-
	!,
	server.
process_input([server=on, port=Port]) :-
	!,
	server(Port).
process_input([port=Port, server=on]) :-
	!,
	server(Port).
process_input([httpserver=on]) :-
	!,
	http_server.
process_input([httpserver=on, port=Port]) :-
	!,
	http_server(Port).
process_input([port=Port, httpserver=on]) :-
	!,
	http_server(Port).
% @tbd Any other usage of server, httpserver, or port is illegal
process_input(InputList1) :-
	read_ulex(InputList1, InputList2),
	read_file(InputList2, InputList3),
	get_ape_results(InputList3, Content),
	set_utf8_encoding(user_output),
	writeln(Content).
%% show_help
%
show_help :-
	show_version,
	write('Copyright 2008-2013, Attempto Group, University of Zurich\n'),
	write('This program comes with ABSOLUTELY NO WARRANTY.\n'),
	write('This is free software, and you are welcome to redistribute it under certain conditions.\n'),
	write('Please visit http://attempto.ifi.uzh.ch for details.\n'),
	nl,
	write('Command-line arguments:\n'),
	argument(Arg, Value, Desc),
	\+ Desc = hidden,
	format('~w ~w~20|~w~n', [Arg, Value, Desc]),
	fail.
show_help.
%% show_version is det.
%
% Prints the version information.
%
show_version :-
	version(Version),
	format("Attempto Parsing Engine for ACE 6.7, version ~w~n", [Version]).
%% arglist_namevaluelist(+ArgList:list, -NameValueList:list) is det.
%
% @param ArgList is a list of arguments
% @param NameValueList is a list of ArgumentName=ArgumentValue pairs
arglist_namevaluelist([], []).
arglist_namevaluelist([Arg|Tail1], [Name=on|Tail2]) :-
    argument(Arg, '', _),
    !,
    atom_concat('-', Name, Arg),
	arglist_namevaluelist(Tail1, Tail2).
arglist_namevaluelist([Arg,ValueAtom|Tail1], [Name=Value|Tail2]) :-
    argument(Arg, _, _),
    \+ argument(ValueAtom, _, _),
    !,
    atom_concat('-', Name, Arg),
	(
		catch(atom_number(ValueAtom, ValueNumber), _, fail)
	->
		Value = ValueNumber
	;
		Value = ValueAtom
	),
	arglist_namevaluelist(Tail1, Tail2).
arglist_namevaluelist([Arg|_], _) :-
	argument(Arg, _, _),
	!,
	throw(error('Missing value for argument', context(arglist_namevaluelist/2, Arg))).
arglist_namevaluelist([Arg|_], _) :-
	throw(error('Illegal argument', context(arglist_namevaluelist/2, Arg))).
%% read_file(+InputListIn:list, -InputListOut:list) is det.
%
% @param InputListIn is a list of APE parameters
% @param InputListOut is a modified list of APE parameters
%
read_file(InputListIn, [text=AceText | InputListOut]) :-
	select(file=AceFile, InputListIn, InputListOut),
	!,
	filename_to_filecontent(AceFile, AceText).
read_file(InputList, InputList) :-
	member(text=_, InputList),
	!.
read_file(InputList, [text=AceText | InputList]) :-
	prompt(_, ''),
	read_stream_to_codes(user_input, AceTextCodes),
	atom_codes(AceText, AceTextCodes).
%% read_ulex(+InputListIn:list, -InputListOut:list) is det.
%
% Stores the user lexicon in a local (temporary) file.
% Modifies the list of APE parameters to include the name
% of the local file.
%
% @param InputListIn is a list of APE parameters
% @param InputListOut is a modified list of APE parameters
%
read_ulex(InputListIn, [ulextext=UlexText | InputListOut]) :-
	select(ulexfile=UlexFile, InputListIn, InputListOut),
	!,
	filename_to_filecontent(UlexFile, UlexText).
read_ulex(InputList, InputList).
% Note: we use: set_stream(In, encoding(utf8))
% This makes characters travel correctly through the socket.
% There might be other (better) solutions though.
:- use_module(library(streampool)).
:- style_check(-singleton).
port(2766).  % ape (0xACE)
%port(2767).  % ape-alpha (0xACF)
%port(2768).  % ape-old (0xAD0)
server :-
	port(Port),
	server(Port).
server(Port) :-
	get_time_formatted(Time),
	format(user_error, "~w: Starting a socket interface for APE at port ~w ...~n", [Time, Port]),
	tcp_socket(Socket),
	tcp_bind(Socket, Port),
	tcp_listen(Socket, 5),
	tcp_open_socket(Socket, In, _Out),
	add_stream_to_pool(In, accept(Socket)),
	stream_pool_main_loop.
accept(Socket) :-
	tcp_accept(Socket, Slave, Peer),
	tcp_open_socket(Slave, In, Out),
	set_utf8_encoding(In),
	set_utf8_encoding(Out),
	add_stream_to_pool(In, client(In, Out, Peer)).
client(In, Out, _Peer) :-
	catch(
		client_x(In, Out, _Peer),
		CatchType,
		(
			format(user_error, "~w~n", [CatchType]),
			(is_stream(In) -> close(In), delete_stream_from_pool(In) ; true),
			(is_stream(Out) -> close(Out) ; true)
		)
	).
% NB: 'APESERVERSTREAMEND' on a separate line marks the end of the stream.
% @bug should we use write_canonical/2 here?
client_x(In, Out, _Peer) :-
	% We suppress warnings of the atom being longer than 5 lines.
	% This declaration seems to have effect only here.
	style_check(-atom),
	read(In, ClientRequest),
	close(In),
	(
		ClientRequest = get(I)
	->
		get_ape_results_timelimit(I, O, 20),  % 20 seconds timelimit
		format(Out, '~w~nAPESERVERSTREAMEND~n', [O])
	;
		format(Out, 'fail~nAPESERVERSTREAMEND~n')
	),
	close(Out),
	delete_stream_from_pool(In).
/**
The HTTP interface relies on SWI-Prolog HTTP support.
@see http://www.swi-prolog.org/packages/http.html
Note that the number of workers is set to 1 (default is 5).
Multiple workers would share the same assert/retract space and we do not want that
because APE is not completely thread-safe.
A better solution would be to make APE thread-safe and let the user decide on
the command-line on the number of workers (because the best-performing
number depends on the number of processor cores).
Setting the number of workers to 1 seems to make the server unresponsive at times,
thus lowering the timeout from the default 60 sec to 5 sec.
At Prolog prompt, stop the server by:
==
?- http_stop_server(8000, []).
==
Note that http_stop_server/2 does not make the port immediately available,
you have to wait a few (two?) minutes. Observe it with 'netstat -na'.
*/
:- use_module(library('http/thread_httpd')).
:- use_module(library('http/http_dispatch')).
:- use_module(library('http/http_parameters')).
:- use_module(library('http/http_client')).
:- use_module(library('http/http_cors')).
:- use_module(library('http/http_files')).
% Configure the port.
http_port(8000).
% Allow any origin.
% TODO: make configurable on the command-line
:- set_setting(http:cors, [*]).
% Configure the www root.
:- http_handler('/', ape, []).
%:- http_handler('/ape/', ape, []).
:-
	getenv('APE_HTTPSERVER_FILES_PATH', Path),
	getenv('APE_HTTPSERVER_FILES_DIR', Dir),
	http_handler(Path, http_reply_from_files(Dir, []), [prefix]);
	true.
% Configure the APE webservice parameters.
% @bug unify this with the main argument description
% Note that while 'text' and 'file' are optional, at least one of them must be present.
% It is probably impossible to say this declaratively.
parameters([
	text(_,         [optional(true)]),
	file(_,         [optional(true)]),
	ulextext(_,     [optional(true)]),
	ulexfile(_,     [optional(true)]),
	noclex(_,       [oneof([on, off]), optional(true)]),
	guess(_,        [oneof([on, off]), optional(true)]),
	% ulexreload(_, [optional(true)]), % @tbd
	uri(_,          [default('http://attempto.ifi.uzh.ch/ontologies/owlswrl/test')]),
	cinput(_,       [oneof([on, off]), optional(true)]), % @bug deprecated
	cdrs(_,         [oneof([on, off]), optional(true)]),
	cdrsxml(_,      [oneof([on, off]), optional(true)]),
	cdrspp(_,       [oneof([on, off]), optional(true)]),
	cdrshtml(_,     [oneof([on, off]), optional(true)]),
	cparaphrase(_,  [oneof([on, off]), optional(true)]),
	cparaphrase1(_, [oneof([on, off]), optional(true)]),
	cparaphrase2(_, [oneof([on, off]), optional(true)]),
	ctokens(_,      [oneof([on, off]), optional(true)]),
	csentences(_,   [oneof([on, off]), optional(true)]),
	csyntax(_,      [oneof([on, off]), optional(true)]),
	csyntaxpp(_,    [oneof([on, off]), optional(true)]),
	csyntaxd(_,     [oneof([on, off]), optional(true)]),
	csyntaxdpp(_,   [oneof([on, off]), optional(true)]),
	cowlfss(_,      [oneof([on, off]), optional(true)]),
	cowlfsspp(_,    [oneof([on, off]), optional(true)]),
	cowlrdf(_,      [oneof([on, off]), optional(true)]),
	cowlxml(_,      [oneof([on, off]), optional(true)]),
	cruleml(_,      [oneof([on, off]), optional(true)]),
	cfol(_,         [oneof([on, off]), optional(true)]),
	ctptp(_,        [oneof([on, off]), optional(true)]),
	solo(_,         [oneof([drs, drsxml, drspp, drshtml,
					paraphrase, paraphrase1, paraphrase2,
					tokens, sentences,
					syntax, syntaxpp, syntaxd, syntaxdpp,
					owlfss, owlfsspp, owlrdf, owlxml,
					ruleml,
					fol, pnf, tptp]), optional(true)])
]).
http_server :-
	http_port(Port),
	http_server(Port).
http_server(Port) :-
	get_time_formatted(Time),
	format(user_error, "~w: Starting an HTTP interface for APE at port ~w ...~n", [Time, Port]),
	http_server(http_dispatch, [port(Port), workers(1), timeout(5)]),
	thread_get_message(_),
	halt.
%% ape(+Request) is det.
%
% This is the HTTP interface toplevel where input is received
% and results/errors are output.
%
ape(Request) :-
	parameters(Parameters),
	catch(
		(
			http_parameters(Request, Parameters),
			http_parameters_to_ape_parameters(Parameters, ApeParameters),
			get_ape_results_timelimit(ApeParameters, ContentType, Content, 10) % 10 second timelimit
		),	
		Exception,
		format_error_for_http(Exception, ContentType, Content)
	),
	cors_enable(Request, [methods([get])]),
	format('Content-type: ~w\r\n\r\n~w', [ContentType, Content]).
%% http_parameters_to_ape_parameters(+HttpParameters:list, -ApeParameters:list) is det.
%
% Converts the parameters that have been instantiated by http_parameters/2
% into the parameters' format that APE accepts. Uninstantiated parameters are filtered out.
%
% @param HttpParameters is the result of http_parameters/2
% @param ApeParameters is the inputlist for get_ape_results_timelimit/4
%
http_parameters_to_ape_parameters([], []).
http_parameters_to_ape_parameters([Parameter | Parameters], Out) :-
	arg(1, Parameter, Arg1),
	(
		nonvar(Arg1)
	->
		functor(Parameter, Functor, _),
		key_value_to_parameter(Functor, Arg1, ApeParameter),
		Out = [ApeParameter | ApeParameters]
	;
		Out = ApeParameters
	),
	http_parameters_to_ape_parameters(Parameters, ApeParameters).
%% key_value_to_parameter(+Key:atom, +Value:atom, -ApeParameter:term) is det.
%
% Constructs an APE parameter which has a form Key = Value.
% Maps 'file' to 'text', 'ulextext' to 'ulexfile', etc.
%
% Note that for security reasons, the value of 'file' and 'ulexfile' cannot point
% to a local file (e.g. /etc/passwd). Only http-urls are allowed.
% The following example shows an illegal query.
%
%==
% http://attempto.ifi.uzh.ch:8000/?file=/etc/passwd
%==
%
% @param Key is a parameter name
% @param Value is the parameter value
% @param ApeParameter is the corresponding APE parameter
%
key_value_to_parameter(file, HttpUrl, text = FileContent) :-
	!,
	httpurl_to_filecontent(HttpUrl, FileContent).
key_value_to_parameter(ulexfile, HttpUrl, ulextext = FileContent) :-
	!,
	httpurl_to_filecontent(HttpUrl, FileContent).
key_value_to_parameter(Key, Value, Key = Value).
%% filename_to_filecontent(+FileName:atom, -FileContent:atom) is det.
%
% Reads the content of a file that is specified by FileName into an atom.
% FileName can be a URL or a regular file name. In case of URLs, only the http-prefix
% is allowed. The content of the file is expected to be encoded in UTF-8.
%
% @param FileName is the name (possibly a URL) of a file
% @param FileContent is the content (as an atom) of the file
%
filename_to_filecontent(FileName, FileContent) :-
	(
		is_http_url(FileName)
	->
		httpurl_to_filecontent(FileName, FileContent)
	;
		read_file_to_codes(FileName, Codes, [encoding(utf8)]),
		atom_codes(FileContent, Codes)
	).
%% httpurl_to_filecontent(+HttpUrl:atom, -FileContent:atom) is det.
%
% Makes an HTTP GET request to a http-resource.
%
% @param HttpUrl is a URL (expected to start as 'http://')
% @param FileContent is the content (as an atom) of the resource
% @throws socket_error (and other exceptions by http_get/3)
% @throws 'HTTP request failed' in case HTTP status code was not 'ok' (200)
%
% @bug Check if http_get expects UTF-8 or can handle other encodings as well
% @bug Make sure that http_get/3 never accesses local files (e.g. that it does not support 'file://')
%
httpurl_to_filecontent(HttpUrl, FileContent) :-
	http_get(HttpUrl, FileContent, [ user_agent('ape.exe (http://attempto.ifi.uzh.ch)'), reply_header(ReplyHeader) ]),
	memberchk(status(Code, Message), ReplyHeader),
	(
		Code = ok
	->
		true
	;
		with_output_to(atom(MessageAtom), format("~s: ~w", [Message, HttpUrl])),
		throw(error('HTTP request failed', context(httpurl_to_filecontent/2, MessageAtom)))
	).
%% is_http_url(+Atom:atom) is det.
%
% Tests is an atom is an HTTP URL.
% The test is quite naive.
%
% @param Atom is an atom to be tested.
%
% @bug Make use of SWI library url.pl
% @bug What is faster for substring matching sub_atom/5 or concat_atom/2,
%      e.g. concat_atom(['http://', _], FileName)
%
is_http_url(Atom) :-
	sub_atom(Atom, 0, 7, _, 'http://').
%% format_error_for_terminal(+Exception:term) is det.
%
% Pretty-prints the exception term for the terminal.
%
% @param Exception is the exception term, usually in the form
%        error(Formal, context(Module:Name/Arity, Message))
%
format_error_for_terminal(error(Formal, context(Predicate, Message))) :-
	!,
	format_message(Message, FMessage),
	format(user_error, "ERROR: ~w: ~w: ~w~n", [Formal, Predicate, FMessage]).
format_error_for_terminal(Error) :-
	format(user_error, "ERROR: ~w~n", [Error]).
%% format_error_for_http(+Exception:term, -ContentType:atom, -Content:term) is det.
%
% Generates an error message from the exception term.
%
% @param Exception is the exception term, usually in the form
%        error(Formal, context(Module:Name/Arity, Message))
% @param ContentType is the content type that message is formatted into, e.g. text/xml
% @param Content is the formatted error message
%
% @tbd return the same error messages as apews.perl
% @bug not XML-safe
%
format_error_for_http(error(Formal, context(Predicate, Message)), 'text/xml', Xml) :-
	!,
	functor(Formal, Name, _),
	format_message(Message, FMessage),
	with_output_to(atom(Xml), format("~w: ~w: ~w", [Name, Formal, Predicate, FMessage])).
format_error_for_http(Error, 'text/xml', Xml) :-
	with_output_to(atom(Xml), format("~w", [Error])).
%% format_message(+Message:term, -FormattedMessage:atom)
%
% Formats the message that sometimes comes with the
% exception term. Often this message is unbound. The
% purpose of this rule is to return an empty atom if this
% is the case.
format_message(Message, '') :-
	var(Message),
	!.
format_message(Message, Message).
%% set_utf8_encoding(+Stream)
%
% Sets the encoding of the given stream to UTF-8. For some unknown reason, an error is sometimes
% thrown under Windows XP when calling set_stream/2, and this error is catched here.
set_utf8_encoding(Stream) :-
	catch(
		set_stream(Stream, encoding(utf8)),
		_,
		true
	).
%% get_time_formatted(-FormattedTimestamp)
%
% Generates a timestamp for the current time
%
get_time_formatted(FormattedTimestamp) :-
	get_time(Timestamp),
	format_time(atom(FormattedTimestamp), '%F %T%z', Timestamp).