1/*  $Id: http_open.pl,v 1.1.1.1 2005/03/04 07:57:51 stavros Exp $
    2
    3    Part of SWI-Prolog
    4
    5    Author:        Jan Wielemaker
    6    E-mail:        jan@swi.psy.uva.nl
    7    WWW:           http://www.swi-prolog.org
    8    Copyright (C): 1985-2002, University of Amsterdam
    9
   10    This program is free software; you can redistribute it and/or
   11    modify it under the terms of the GNU General Public License
   12    as published by the Free Software Foundation; either version 2
   13    of the License, or (at your option) any later version.
   14
   15    This program is distributed in the hope that it will be useful,
   16    but WITHOUT ANY WARRANTY; without even the implied warranty of
   17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18    GNU General Public License for more details.
   19
   20    You should have received a copy of the GNU Lesser General Public
   21    License along with this library; if not, write to the Free Software
   22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23
   24    As a special exception, if you link this library with other files,
   25    compiled with a Free Software compiler, to produce an executable, this
   26    library does not by itself cause the resulting executable to be covered
   27    by the GNU General Public License. This exception does not however
   28    invalidate any other reasons why the executable file might be covered by
   29    the GNU General Public License.
   30*/
   31
   32
   33:- module(http_open,
   34	  [ http_open/3			% +URL, -Stream, +Options
   35	  ]).   36:- use_module(library(url)).   37:- use_module(library(readutil)).   38:- use_module(library(socket)).   39
   40user_agent('SWI-Prolog (http://www.swi-prolog.org)').
   41
   42/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   43This library provides a simple-minded   light-weight HTTP client library
   44to get the data from an  URL   using  the GET-method. More advanced HTTP
   45client support is provided by http_client.pl
   46- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
   47
   48%	http_open(+Url, -Stream, [+Options])
   49%	
   50%	Open a HTTP url as a (binary) stream. Uses HTTP 1.0 protocol
   51%	revision to deal with virtual hosts and to be able to interpret
   52%	the header.
   53%	
   54%	Supported options:
   55%	
   56%		size(-Size)		Return size of the resource
   57%		header(Name, -Atom)	Return headerfield as atom
   58%		timeout(+Timeout)	Raise exception on timeout
   59%		proxy(+Host, +Port)	Use an HTTP proxy server
   60%		user_agent(+Agent)	User agent for identifying
   61
   62http_open(Url, Stream, Options) :-
   63	atom(Url), !,
   64	parse_url(Url, Parts),
   65	http_open(Parts, Stream, Options).
   66http_open(Parts, Stream, Options) :-
   67	memberchk(proxy(Host, Port), Options), !,
   68	user_agent(Agent, Options),
   69	parse_url(URL, Parts),
   70	open_socket(Host:Port, In, Out, Options),
   71	format(Out,
   72	       'GET ~w HTTP/1.0~n\
   73	       Host: ~w~n\
   74	       User-Agent: ~w~n\
   75	       Connection: close~n~n',
   76	       [URL, Host, Agent]),
   77	close(Out),
   78					% read the reply header
   79	read_header(In, Code, Comment, Lines),
   80	do_open(Code, Comment, Lines, Options, Parts, In, Stream).
   81http_open(Parts, Stream, Options) :-
   82	memberchk(host(Host), Parts),
   83	option(port(Port), Parts, 80),
   84	http_location(Parts, Location),
   85	user_agent(Agent, Options),
   86	open_socket(Host:Port, In, Out, Options),
   87	format(Out,
   88	       'GET ~w HTTP/1.0~n\
   89	       Host: ~w~n\
   90	       User-Agent: ~w~n\
   91	       Connection: close~n~n',
   92	       [Location, Host, Agent]),
   93	close(Out),
   94					% read the reply header
   95	read_header(In, Code, Comment, Lines),
   96	do_open(Code, Comment, Lines, Options, Parts, In, Stream).
   97
   98
   99option(Option, List, Default) :-
  100	(   memberchk(Option, List)
  101	->  true
  102	;   arg(1, Option, Default)
  103	).
  104
  105user_agent(Agent, Options) :-
  106	(   memberchk(user_agent(Agent), Options)
  107	->  true
  108	;   user_agent(Agent)
  109	).
  110
  111do_open(200, _, Lines, Options, Parts, In, In) :- !,
  112	return_size(Options, Lines),
  113	return_fields(Options, Lines),
  114					% properly re-initialise the stream
  115	parse_url(Id, Parts),
  116	set_stream(In, file_name(Id)),
  117	set_stream(In, record_position(true)).
  118					% Handle redirections
  119do_open(302, _, Lines, Options, _Parts, In, Stream) :-
  120	location(Lines, Location), !,
  121	close(In),
  122	http_open(Location, Stream, Options).
  123					% report anything else as error
  124do_open(Code, Comment, _, _, Parts, In, In) :-
  125	close(In),
  126	parse_url(Id, Parts),
  127	throw(error(existence_error(url, Id),
  128		    context(_, status(Code, Comment)))).
  129
  130
  131open_socket(Host:Port, In, Out, Options) :-
  132	tcp_socket(Socket),
  133	tcp_connect(Socket, Host:Port),
  134	tcp_open_socket(Socket, In, Out),
  135	set_stream(In, record_position(false)),
  136	(   memberchk(Options, timeout(Timeout))
  137	->  set_stream(In, timeout(Timeout))
  138	;   true
  139	).
  140
  141
  142return_size(Options, Lines) :-
  143	memberchk(size(Size), Options), !,
  144	content_length(Lines, Size).
  145return_size(_, _).
  146
  147return_fields([], _).
  148return_fields([header(Name, Value)|T], Lines) :-
  149	atom_codes(Name, Codes),
  150	(   member(Line, Lines),
  151	    phrase(atom_field(Codes, Value), Line)
  152	->  true
  153	;   Value = ''
  154	),
  155	return_fields(T, Lines).
  156return_fields([_|T], Lines) :-
  157	return_fields(T, Lines).
  158
  159
  160read_header(In, Code, Comment, Lines) :-
  161	read_line_to_codes(In, Line),
  162	phrase(first_line(Code, Comment), Line),
  163	read_line_to_codes(In, Line2),
  164	rest_header(Line2, In, Lines).
  165
  166
  167rest_header("", _, []).
  168rest_header(L0, In, [L0|L]) :-
  169	read_line_to_codes(In, L1),
  170	rest_header(L1, In, L).
  171
  172content_length(Lines, Length) :-
  173	member(Line, Lines),
  174	phrase(content_length(Length0), Line), !,
  175	Length = Length0.
  176
  177location(Lines, Location) :-
  178	member(Line, Lines),
  179	phrase(atom_field("location", Location), Line), !.
  180
  181first_line(Code, Comment) -->
  182	"HTTP/", [_], ".", [_],
  183	skip_blanks,
  184	integer(Code),
  185	skip_blanks,
  186	rest(Comment).
  187
  188atom_field(Name, Value) -->
  189	field(Name),
  190	rest(Value).
  191
  192content_length(Len) -->
  193	field("content-length"),
  194	integer(Len).
  195
  196field([]) -->
  197	":",
  198	skip_blanks.
  199field([H|T]) -->
  200	[C],
  201	{ match_header_char(H, C)
  202	},
  203	field(T).
  204
  205match_header_char(C, C) :- !.
  206match_header_char(C, U) :-
  207	code_type(C, to_lower(U)), !.
  208match_header_char(0'_, 0'-).
  209
  210
  211skip_blanks -->
  212	[C],
  213	{ code_type(C, white)
  214	}, !,
  215	skip_blanks.
  216skip_blanks -->
  217	[].
  218
  219
  220integer(Code) -->
  221	digit(D0),
  222	digits(D),
  223	{ number_codes(Code, [D0|D])
  224	}.
  225
  226
  227digit(C) -->
  228	[C],
  229	{ code_type(C, digit)
  230	}.
  231
  232
  233digits([D0|D]) -->
  234	digit(D0), !,
  235	digits(D).
  236digits([]) -->
  237	[].
  238
  239
  240rest(A,L,[]) :-
  241	atom_codes(A, L)