View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(http_open,
   38          [ http_open/3,                % +URL, -Stream, +Options
   39            http_set_authorization/2,   % +URL, +Authorization
   40            http_close_keep_alive/1     % +Address
   41          ]).   42:- autoload(library(aggregate),[aggregate_all/3]).   43:- autoload(library(apply),[foldl/4,include/3]).   44:- autoload(library(base64),[base64/3]).   45:- autoload(library(debug),[debug/3,debugging/1]).   46:- autoload(library(error),
   47	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   48	    ]).   49:- autoload(library(lists),[last/2,member/2]).   50:- autoload(library(option),
   51	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   52	      option/3, select_option/3
   53	    ]).   54:- autoload(library(readutil),[read_line_to_codes/2]).   55:- autoload(library(socket),
   56	    [tcp_connect/3,negotiate_socks_connection/2]).   57:- autoload(library(uri),
   58	    [ uri_resolve/3, uri_components/2, uri_data/3,
   59              uri_authority_components/2, uri_authority_data/3,
   60	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   61	    ]).   62:- autoload(library(http/http_header),
   63            [ http_parse_header/2, http_post_data/3 ]).   64:- autoload(library(http/http_stream),[stream_range_open/3]).   65:- if(exists_source(library(ssl))).   66:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   67:- endif.

HTTP client library

This library defines http_open/3, which opens a URL as a Prolog stream. The functionality of the library can be extended by loading two additional modules that act as plugins:

library(http/http_ssl_plugin)
Loading this library causes http_open/3 to handle HTTPS connections. Relevant options for SSL certificate handling are handed to ssl_context/3. This plugin is loaded automatically if the scheme https is requested using a default SSL context. See the plugin for additional information regarding security.
library(http/http_cookie)
Loading this library adds tracking cookies to http_open/3. Returned cookies are collected in the Prolog database and supplied for subsequent requests.

Here is a simple example to fetch a web-page:

?- http_open('http://www.google.com/search?q=prolog', In, []),
   copy_stream_data(In, user_output),
   close(In).
<!doctype html><head><title>prolog - Google Search</title><script>
...

The example below fetches the modification time of a web-page. Note that Modified is '' (the empty atom) if the web-server does not provide a time-stamp for the resource. See also parse_time/2.

modified(URL, Stamp) :-
        http_open(URL, In,
                  [ method(head),
                    header(last_modified, Modified)
                  ]),
        close(In),
        Modified \== '',
        parse_time(Modified, Stamp).

Then next example uses Google search. It exploits library(uri) to manage URIs, library(sgml) to load an HTML document and library(xpath) to navigate the parsed HTML. Note that you may need to adjust the XPath queries if the data returned by Google changes.

:- use_module(library(http/http_open)).
:- use_module(library(xpath)).
:- use_module(library(sgml)).
:- use_module(library(uri)).

google(For, Title, HREF) :-
        uri_encoded(query_value, For, Encoded),
        atom_concat('http://www.google.com/search?q=', Encoded, URL),
        http_open(URL, In, []),
        call_cleanup(
            load_html(In, DOM, []),
            close(In)),
        xpath(DOM, //h3(@class=r), Result),
        xpath(Result, //a(@href=HREF0, text), Title),
        uri_components(HREF0, Components),
        uri_data(search, Components, Query),
        uri_query_components(Query, Parts),
        memberchk(q=HREF, Parts).

An example query is below:

?- google(prolog, Title, HREF).
Title = 'SWI-Prolog',
HREF = 'http://www.swi-prolog.org/' ;
Title = 'Prolog - Wikipedia',
HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
Title = 'Prolog - Wikipedia, the free encyclopedia',
HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
HREF = 'http://www.pro-log.nl/' ;
Title = 'Learn Prolog Now!',
HREF = 'http://www.learnprolognow.org/' ;
Title = 'Free Online Version - Learn Prolog
...
See also
- load_html/3 and xpath/3 can be used to parse and navigate HTML documents.
-
http_get/3 and http_post/4 provide an alternative interface that convert the reply depending on the Content-Type header. */
  162:- multifile
  163    http:encoding_filter/3,           % +Encoding, +In0, -In
  164    http:current_transfer_encoding/1, % ?Encoding
  165    http:disable_encoding_filter/1,   % +ContentType
  166    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  167                                      % -NewStreamPair, +Options
  168    http:open_options/2,              % +Parts, -Options
  169    http:write_cookies/3,             % +Out, +Parts, +Options
  170    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  171    http:authenticate_client/2,       % +URL, +Action
  172    http:http_connection_over_proxy/6.  173
  174:- meta_predicate
  175    http_open(+,-,:).  176
  177:- predicate_options(http_open/3, 3,
  178                     [ authorization(compound),
  179                       final_url(-atom),
  180                       header(+atom, -atom),
  181                       headers(-list),
  182                       connection(+atom),
  183                       method(oneof([delete,get,put,head,post,patch,options])),
  184                       size(-integer),
  185                       status_code(-integer),
  186                       output(-stream),
  187                       timeout(number),
  188                       proxy(atom, integer),
  189                       proxy_authorization(compound),
  190                       bypass_proxy(boolean),
  191                       request_header(any),
  192                       user_agent(atom),
  193                       version(-compound),
  194        % The option below applies if library(http/http_header) is loaded
  195                       post(any),
  196        % The options below apply if library(http/http_ssl_plugin)) is loaded
  197                       pem_password_hook(callable),
  198                       cacert_file(atom),
  199                       cert_verify_hook(callable)
  200                     ]).
 user_agent(-Agent) is det
Default value for User-Agent, can be overruled using the option user_agent(Agent) of http_open/3.
  207user_agent('SWI-Prolog').
 http_open(+URL, -Stream, +Options) is det
Open the data at the HTTP server as a Prolog stream. URL is either an atom specifying a URL or a list representing a broken-down URL as specified below. After this predicate succeeds the data can be read from Stream. After completion this stream must be closed using the built-in Prolog predicate close/1. Options provides additional options:
authenticate(+Boolean)
If false (default true), do not try to automatically authenticate the client if a 401 (Unauthorized) status code is received.
authorization(+Term)
Send authorization. See also http_set_authorization/2. Supported schemes:
basic(+User, +Password)
HTTP Basic authentication.
bearer(+Token)
HTTP Bearer authentication.
digest(+User, +Password)
HTTP Digest authentication. This option is only provided if the plugin library(http/http_digest) is also loaded.
connection(+Connection)
Specify the Connection header. Default is close. The alternative is Keep-alive. This maintains a pool of available connections as determined by keep_connection/1. The library(http/websockets) uses Keep-alive, Upgrade. Keep-alive connections can be closed explicitly using http_close_keep_alive/1. Keep-alive connections may significantly improve repetitive requests on the same server, especially if the IP route is long, HTTPS is used or the connection uses a proxy.
final_url(-FinalURL)
Unify FinalURL with the final destination. This differs from the original URL if the returned head of the original indicates an HTTP redirect (codes 301, 302 or 303). Without a redirect, FinalURL is the same as URL if URL is an atom, or a URL constructed from the parts.
header(Name, -AtomValue)
If provided, AtomValue is unified with the value of the indicated field in the reply header. Name is matched case-insensitive and the underscore (_) matches the hyphen (-). Multiple of these options may be provided to extract multiple header fields. If the header is not available AtomValue is unified to the empty atom ('').
headers(-List)
If provided, List is unified with a list of Name(Value) pairs corresponding to fields in the reply header. Name and Value follow the same conventions used by the header(Name,Value) option.
method(+Method)
One of get (default), head, delete, post, put or patch. The head message can be used in combination with the header(Name, Value) option to access information on the resource without actually fetching the resource itself. The returned stream must be closed immediately.

If post(Data) is provided, the default is post.

size(-Size)
Size is unified with the integer value of Content-Length in the reply header.
version(-Version)
Version is a pair Major-Minor, where Major and Minor are integers representing the HTTP version in the reply header.
range(+Range)
Ask for partial content. Range is a term Unit(From,To), where From is an integer and To is either an integer or the atom end. HTTP 1.1 only supports Unit = bytes. E.g., to ask for bytes 1000-1999, use the option range(bytes(1000,1999))
redirect(+Boolean)
If false (default true), do not automatically redirect if a 3XX code is received. Must be combined with status_code(Code) and one of the header options to read the redirect reply. In particular, without status_code(Code) a redirect is mapped to an exception.
status_code(-Code)
If this option is present and Code unifies with the HTTP status code, do not translate errors (4xx, 5xx) into an exception. Instead, http_open/3 behaves as if 2xx (success) is returned, providing the application to read the error document from the returned stream.
output(-Out)
Unify the output stream with Out and do not close it. This can be used to upgrade a connection.
timeout(+Timeout)
If provided, set a timeout on the stream using set_stream/2. With this option if no new data arrives within Timeout seconds the stream raises an exception. Default is to wait forever (infinite).
post(+Data)
Issue a POST request on the HTTP server. Data is handed to http_post_data/3.
proxy(+Host:Port)
Use an HTTP proxy to connect to the outside world. See also socket:proxy_for_url/3. This option overrules the proxy specification defined by socket:proxy_for_url/3.
proxy(+Host, +Port)
Synonym for proxy(+Host:Port). Deprecated.
proxy_authorization(+Authorization)
Send authorization to the proxy. Otherwise the same as the authorization option.
bypass_proxy(+Boolean)
If true, bypass proxy hooks. Default is false.
request_header(Name=Value)
Additional name-value parts are added in the order of appearance to the HTTP request header. No interpretation is done.
max_redirect(+Max)
Sets the maximum length of a redirection chain. This is needed for some IRIs that redirect indefinitely to other IRIs without looping (e.g., redirecting to IRIs with a random element in them). Max must be either a non-negative integer or the atom infinite. The default value is 10.
user_agent(+Agent)
Defines the value of the User-Agent field of the HTTP header. Default is SWI-Prolog.

The hook http:open_options/2 can be used to provide default options based on the broken-down URL. The option status_code(-Code) is particularly useful to query REST interfaces that commonly return status codes other than 200 that need to be be processed by the client code.

Arguments:
URL- is either an atom or string (url) or a list of parts.

When provided, this list may contain the fields scheme, user, password, host, port, path and either query_string (whose argument is an atom) or search (whose argument is a list of Name(Value) or Name=Value compound terms). Only host is mandatory. The example below opens the URL http://www.example.com/my/path?q=Hello%20World&lang=en. Note that values must not be quoted because the library inserts the required quotes.

http_open([ host('www.example.com'),
            path('/my/path'),
            search([ q='Hello world',
                     lang=en
                   ])
          ])
throws
- error(existence_error(url, Id),Context) is raised if the HTTP result code is not in the range 200..299. Context has the shape context(Message, status(Code, TextCode)), where Code is the numeric HTTP code and TextCode is the textual description thereof provided by the server. Message may provide additional details or may be unbound.
See also
- ssl_context/3 for SSL related options if library(http/http_ssl_plugin) is loaded.
  389:- multifile
  390    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  391
  392http_open(URL, Stream, QOptions) :-
  393    meta_options(is_meta, QOptions, Options0),
  394    (   atomic(URL)
  395    ->  parse_url_ex(URL, Parts)
  396    ;   Parts = URL
  397    ),
  398    autoload_https(Parts),
  399    upgrade_ssl_options(Parts, Options0, Options),
  400    add_authorization(Parts, Options, Options1),
  401    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  402    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  403    (   option(bypass_proxy(true), Options)
  404    ->  try_http_proxy(direct, Parts, Stream, Options2)
  405    ;   term_variables(Options2, Vars2),
  406        findall(Result-Vars2,
  407                try_a_proxy(Parts, Result, Options2),
  408                ResultList),
  409        last(ResultList, Status-Vars2)
  410    ->  (   Status = true(_Proxy, Stream)
  411        ->  true
  412        ;   throw(error(proxy_error(tried(ResultList)), _))
  413        )
  414    ;   try_http_proxy(direct, Parts, Stream, Options2)
  415    ).
  416
  417try_a_proxy(Parts, Result, Options) :-
  418    parts_uri(Parts, AtomicURL),
  419    option(host(Host), Parts),
  420    (   (   option(proxy(ProxyHost:ProxyPort), Options)
  421        ;   is_list(Options),
  422            memberchk(proxy(ProxyHost,ProxyPort), Options)
  423        )
  424    ->  Proxy = proxy(ProxyHost, ProxyPort)
  425    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  426    ),
  427    debug(http(proxy),
  428          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  429    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  430    ->  (   var(E)
  431        ->  !, Result = true(Proxy, Stream)
  432        ;   Result = error(Proxy, E)
  433        )
  434    ;   Result = false(Proxy)
  435    ),
  436    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  437
  438try_http_proxy(Method, Parts, Stream, Options0) :-
  439    option(host(Host), Parts),
  440    (   Method == direct
  441    ->  parts_request_uri(Parts, RequestURI)
  442    ;   parts_uri(Parts, RequestURI)
  443    ),
  444    select_option(visited(Visited0), Options0, OptionsV, []),
  445    Options = [visited([Parts|Visited0])|OptionsV],
  446    parts_scheme(Parts, Scheme),
  447    default_port(Scheme, DefPort),
  448    url_part(port(Port), Parts, DefPort),
  449    host_and_port(Host, DefPort, Port, HostPort),
  450    (   option(connection(Connection), Options0),
  451        keep_alive(Connection),
  452        get_from_pool(Host:Port, StreamPair),
  453        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  454              [ Host:Port, StreamPair ]),
  455        catch(send_rec_header(StreamPair, Stream, HostPort,
  456                              RequestURI, Parts, Options),
  457              error(E,_),
  458              keep_alive_error(E))
  459    ->  true
  460    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  461                                        SocketStreamPair, Options, Options1),
  462        (   catch(http:http_protocol_hook(Scheme, Parts,
  463                                          SocketStreamPair,
  464                                          StreamPair, Options),
  465                  Error,
  466                  ( close(SocketStreamPair, [force(true)]),
  467                    throw(Error)))
  468        ->  true
  469        ;   StreamPair = SocketStreamPair
  470        ),
  471        send_rec_header(StreamPair, Stream, HostPort,
  472                        RequestURI, Parts, Options1)
  473    ),
  474    return_final_url(Options).
  475
  476http:http_connection_over_proxy(direct, _, Host:Port,
  477                                StreamPair, Options, Options) :-
  478    !,
  479    open_socket(Host:Port, StreamPair, Options).
  480http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  481                                StreamPair, Options, Options) :-
  482    \+ ( memberchk(scheme(Scheme), Parts),
  483         secure_scheme(Scheme)
  484       ),
  485    !,
  486    % We do not want any /more/ proxy after this
  487    open_socket(ProxyHost:ProxyPort, StreamPair,
  488                [bypass_proxy(true)|Options]).
  489http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  490                                StreamPair, Options, Options) :-
  491    !,
  492    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  493    catch(negotiate_socks_connection(Host:Port, StreamPair),
  494          Error,
  495          ( close(StreamPair, [force(true)]),
  496            throw(Error)
  497          )).
 hooked_options(+Parts, -Options) is nondet
Calls http:open_options/2 and if necessary upgrades old SSL cacerts_file(File) option to a cacerts(List) option to ensure proper merging of options.
  505hooked_options(Parts, Options) :-
  506    http:open_options(Parts, Options0),
  507    upgrade_ssl_options(Parts, Options0, Options).
  508
  509:- if(current_predicate(ssl_upgrade_legacy_options/2)).  510upgrade_ssl_options(Parts, Options0, Options) :-
  511    requires_ssl(Parts),
  512    !,
  513    ssl_upgrade_legacy_options(Options0, Options).
  514:- endif.  515upgrade_ssl_options(_, Options, Options).
  516
  517merge_options_rev(Old, New, Merged) :-
  518    merge_options(New, Old, Merged).
  519
  520is_meta(pem_password_hook).             % SSL plugin callbacks
  521is_meta(cert_verify_hook).
  522
  523
  524http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  525
  526default_port(https, 443) :- !.
  527default_port(wss,   443) :- !.
  528default_port(_,     80).
  529
  530host_and_port(Host, DefPort, DefPort, Host) :- !.
  531host_and_port(Host, _,       Port,    Host:Port).
 autoload_https(+Parts) is det
If the requested scheme is https or wss, load the HTTPS plugin.
  537autoload_https(Parts) :-
  538    requires_ssl(Parts),
  539    memberchk(scheme(S), Parts),
  540    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  541    exists_source(library(http/http_ssl_plugin)),
  542    !,
  543    use_module(library(http/http_ssl_plugin)).
  544autoload_https(_).
  545
  546requires_ssl(Parts) :-
  547    memberchk(scheme(S), Parts),
  548    secure_scheme(S).
  549
  550secure_scheme(https).
  551secure_scheme(wss).
 send_rec_header(+StreamPair, -Stream, +Host, +RequestURI, +Parts, +Options) is det
Send header to Out and process reply. If there is an error or failure, close In and Out and return the error or failure.
  559send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  560    (   catch(guarded_send_rec_header(StreamPair, Stream,
  561                                      Host, RequestURI, Parts, Options),
  562              E, true)
  563    ->  (   var(E)
  564        ->  (   option(output(StreamPair), Options)
  565            ->  true
  566            ;   true
  567            )
  568        ;   close(StreamPair, [force(true)]),
  569            throw(E)
  570        )
  571    ;   close(StreamPair, [force(true)]),
  572        fail
  573    ).
  574
  575guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  576    user_agent(Agent, Options),
  577    method(Options, MNAME),
  578    http_version(Version),
  579    option(connection(Connection), Options, close),
  580    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  581    debug(http(send_request), "> Host: ~w", [Host]),
  582    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  583    debug(http(send_request), "> Connection: ~w", [Connection]),
  584    format(StreamPair,
  585           '~w ~w HTTP/~w\r\n\c
  586               Host: ~w\r\n\c
  587               User-Agent: ~w\r\n\c
  588               Connection: ~w\r\n',
  589           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  590    parts_uri(Parts, URI),
  591    x_headers(Options, URI, StreamPair),
  592    write_cookies(StreamPair, Parts, Options),
  593    (   option(post(PostData), Options)
  594    ->  http_post_data(PostData, StreamPair, [])
  595    ;   format(StreamPair, '\r\n', [])
  596    ),
  597    flush_output(StreamPair),
  598                                    % read the reply header
  599    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  600    update_cookies(Lines, Parts, Options),
  601    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  602            StreamPair, Stream).
 http_version(-Version:atom) is det
HTTP version we publish. We can only use 1.1 if we support chunked encoding.
  610http_version('1.1') :-
  611    http:current_transfer_encoding(chunked),
  612    !.
  613http_version('1.0').
  614
  615method(Options, MNAME) :-
  616    option(post(_), Options),
  617    !,
  618    option(method(M), Options, post),
  619    (   map_method(M, MNAME0)
  620    ->  MNAME = MNAME0
  621    ;   domain_error(method, M)
  622    ).
  623method(Options, MNAME) :-
  624    option(method(M), Options, get),
  625    (   map_method(M, MNAME0)
  626    ->  MNAME = MNAME0
  627    ;   map_method(_, M)
  628    ->  MNAME = M
  629    ;   domain_error(method, M)
  630    ).
  631
  632map_method(delete,  'DELETE').
  633map_method(get,     'GET').
  634map_method(head,    'HEAD').
  635map_method(post,    'POST').
  636map_method(put,     'PUT').
  637map_method(patch,   'PATCH').
  638map_method(options, 'OPTIONS').
 x_headers(+Options, +URI, +Out) is det
Emit extra headers from request_header(Name=Value) options in Options.
To be done
- Use user/password fields
  647x_headers(Options, URI, Out) :-
  648    x_headers_(Options, [url(URI)|Options], Out).
  649
  650x_headers_([], _, _).
  651x_headers_([H|T], Options, Out) :-
  652    x_header(H, Options, Out),
  653    x_headers_(T, Options, Out).
  654
  655x_header(request_header(Name=Value), _, Out) :-
  656    !,
  657    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  658    format(Out, '~w: ~w\r\n', [Name, Value]).
  659x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  660    !,
  661    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  662x_header(authorization(Authorization), Options, Out) :-
  663    !,
  664    auth_header(Authorization, Options, 'Authorization', Out).
  665x_header(range(Spec), _, Out) :-
  666    !,
  667    Spec =.. [Unit, From, To],
  668    (   To == end
  669    ->  ToT = ''
  670    ;   must_be(integer, To),
  671        ToT = To
  672    ),
  673    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  674    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  675x_header(_, _, _).
 auth_header(+AuthOption, +Options, +HeaderName, +Out)
  679auth_header(basic(User, Password), _, Header, Out) :-
  680    !,
  681    format(codes(Codes), '~w:~w', [User, Password]),
  682    phrase(base64(Codes), Base64Codes),
  683    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  684    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  685auth_header(bearer(Token), _, Header, Out) :-
  686    !,
  687    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  688    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  689auth_header(Auth, Options, _, Out) :-
  690    option(url(URL), Options),
  691    add_method(Options, Options1),
  692    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  693    !.
  694auth_header(Auth, _, _, _) :-
  695    domain_error(authorization, Auth).
  696
  697user_agent(Agent, Options) :-
  698    (   option(user_agent(Agent), Options)
  699    ->  true
  700    ;   user_agent(Agent)
  701    ).
  702
  703add_method(Options0, Options) :-
  704    option(method(_), Options0),
  705    !,
  706    Options = Options0.
  707add_method(Options0, Options) :-
  708    option(post(_), Options0),
  709    !,
  710    Options = [method(post)|Options0].
  711add_method(Options0, [method(get)|Options0]).
 do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header, +Options, +Parts, +Host, +In, -FinalIn) is det
Handle the HTTP status once available. If 200-299, we are ok. If a redirect, redo the open, returning a new stream. Else issue an error.
Errors
- existence_error(url, URL)
  722                                        % Redirections
  723do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  724    redirect_code(Code),
  725    option(redirect(true), Options0, true),
  726    location(Lines, RequestURI),
  727    !,
  728    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  729    close(In),
  730    parts_uri(Parts, Base),
  731    uri_resolve(RequestURI, Base, Redirected),
  732    parse_url_ex(Redirected, RedirectedParts),
  733    (   redirect_limit_exceeded(Options0, Max)
  734    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  735        throw(error(permission_error(redirect, http, Redirected),
  736                    context(_, Comment)))
  737    ;   redirect_loop(RedirectedParts, Options0)
  738    ->  throw(error(permission_error(redirect, http, Redirected),
  739                    context(_, 'Redirection loop')))
  740    ;   true
  741    ),
  742    redirect_options(Options0, Options),
  743    http_open(RedirectedParts, Stream, Options).
  744                                        % Need authentication
  745do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  746    authenticate_code(Code),
  747    option(authenticate(true), Options0, true),
  748    parts_uri(Parts, URI),
  749    parse_headers(Lines, Headers),
  750    http:authenticate_client(
  751             URI,
  752             auth_reponse(Headers, Options0, Options)),
  753    !,
  754    close(In0),
  755    http_open(Parts, Stream, Options).
  756                                        % Accepted codes
  757do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  758    (   option(status_code(Code), Options),
  759        Lines \== []
  760    ->  true
  761    ;   successful_code(Code)
  762    ),
  763    !,
  764    parts_uri(Parts, URI),
  765    parse_headers(Lines, Headers),
  766    return_version(Options, Version),
  767    return_size(Options, Headers),
  768    return_fields(Options, Headers),
  769    return_headers(Options, Headers),
  770    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  771    transfer_encoding_filter(Lines, In1, In),
  772                                    % properly re-initialise the stream
  773    set_stream(In, file_name(URI)),
  774    set_stream(In, record_position(true)).
  775do_open(_, _, _, [], Options, _, _, _, _) :-
  776    option(connection(Connection), Options),
  777    keep_alive(Connection),
  778    !,
  779    throw(error(keep_alive(closed),_)).
  780                                        % report anything else as error
  781do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  782    parts_uri(Parts, URI),
  783    (   map_error_code(Code, Error)
  784    ->  Formal =.. [Error, url, URI]
  785    ;   Formal = existence_error(url, URI)
  786    ),
  787    throw(error(Formal, context(_, status(Code, Comment)))).
  788
  789
  790successful_code(Code) :-
  791    between(200, 299, Code).
 redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet
True if we have exceeded the maximum redirection length (default 10).
  797redirect_limit_exceeded(Options, Max) :-
  798    option(visited(Visited), Options, []),
  799    length(Visited, N),
  800    option(max_redirect(Max), Options, 10),
  801    (Max == infinite -> fail ; N > Max).
 redirect_loop(+Parts, +Options) is semidet
True if we are in a redirection loop. Note that some sites redirect once to the same place using cookies or similar, so we allow for two tries. In fact, we should probably test whether authorization or cookie headers have changed.
  811redirect_loop(Parts, Options) :-
  812    option(visited(Visited), Options, []),
  813    include(==(Parts), Visited, Same),
  814    length(Same, Count),
  815    Count > 2.
 redirect_options(+Options0, -Options) is det
A redirect from a POST should do a GET on the returned URI. This means we must remove the method(post) and post(Data) options from the original option-list.
  824redirect_options(Options0, Options) :-
  825    (   select_option(post(_), Options0, Options1)
  826    ->  true
  827    ;   Options1 = Options0
  828    ),
  829    (   select_option(method(Method), Options1, Options),
  830        \+ redirect_method(Method)
  831    ->  true
  832    ;   Options = Options1
  833    ).
  834
  835redirect_method(delete).
  836redirect_method(get).
  837redirect_method(head).
 map_error_code(+HTTPCode, -PrologError) is semidet
Map HTTP error codes to Prolog errors.
To be done
- Many more maps. Unfortunately many have no sensible Prolog counterpart.
  847map_error_code(401, permission_error).
  848map_error_code(403, permission_error).
  849map_error_code(404, existence_error).
  850map_error_code(405, permission_error).
  851map_error_code(407, permission_error).
  852map_error_code(410, existence_error).
  853
  854redirect_code(301).                     % Moved Permanently
  855redirect_code(302).                     % Found (previously "Moved Temporary")
  856redirect_code(303).                     % See Other
  857redirect_code(307).                     % Temporary Redirect
  858
  859authenticate_code(401).
 open_socket(+Address, -StreamPair, +Options) is det
Create and connect a client socket to Address. Options
timeout(+Timeout)
Sets timeout on the stream, after connecting the socket.
To be done
- Make timeout also work on tcp_connect/4.
- This is the same as do_connect/4 in http_client.pl
  872open_socket(Address, StreamPair, Options) :-
  873    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  874    tcp_connect(Address, StreamPair, Options),
  875    stream_pair(StreamPair, In, Out),
  876    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  877    set_stream(In, record_position(false)),
  878    (   option(timeout(Timeout), Options)
  879    ->  set_stream(In, timeout(Timeout))
  880    ;   true
  881    ).
  882
  883
  884return_version(Options, Major-Minor) :-
  885    option(version(Major-Minor), Options, _).
  886
  887return_size(Options, Headers) :-
  888    (   memberchk(content_length(Size), Headers)
  889    ->  option(size(Size), Options, _)
  890    ;   true
  891    ).
  892
  893return_fields([], _).
  894return_fields([header(Name, Value)|T], Headers) :-
  895    !,
  896    (   Term =.. [Name,Value],
  897        memberchk(Term, Headers)
  898    ->  true
  899    ;   Value = ''
  900    ),
  901    return_fields(T, Headers).
  902return_fields([_|T], Lines) :-
  903    return_fields(T, Lines).
  904
  905return_headers(Options, Headers) :-
  906    option(headers(Headers), Options, _).
 parse_headers(+Lines, -Headers:list(compound)) is det
Parse the header lines for the headers(-List) option. Invalid header lines are skipped, printing a warning using pring_message/2.
  914parse_headers([], []) :- !.
  915parse_headers([Line|Lines], Headers) :-
  916    catch(http_parse_header(Line, [Header]), Error, true),
  917    (   var(Error)
  918    ->  Headers = [Header|More]
  919    ;   print_message(warning, Error),
  920        Headers = More
  921    ),
  922    parse_headers(Lines, More).
 return_final_url(+Options) is semidet
If Options contains final_url(URL), unify URL with the final URL after redirections.
  930return_final_url(Options) :-
  931    option(final_url(URL), Options),
  932    var(URL),
  933    !,
  934    option(visited([Parts|_]), Options),
  935    parts_uri(Parts, URL).
  936return_final_url(_).
 transfer_encoding_filter(+Lines, +In0, -In) is det
Install filters depending on the transfer encoding. If In0 is a stream-pair, we close the output side. If transfer-encoding is not specified, the content-encoding is interpreted as a synonym for transfer-encoding, because many servers incorrectly depend on this. Exceptions to this are content-types for which disable_encoding_filter/1 holds.
  948transfer_encoding_filter(Lines, In0, In) :-
  949    transfer_encoding(Lines, Encoding),
  950    !,
  951    transfer_encoding_filter_(Encoding, In0, In).
  952transfer_encoding_filter(Lines, In0, In) :-
  953    content_encoding(Lines, Encoding),
  954    content_type(Lines, Type),
  955    \+ http:disable_encoding_filter(Type),
  956    !,
  957    transfer_encoding_filter_(Encoding, In0, In).
  958transfer_encoding_filter(_, In, In).
  959
  960transfer_encoding_filter_(Encoding, In0, In) :-
  961    stream_pair(In0, In1, Out),
  962    (   nonvar(Out)
  963    ->  close(Out)
  964    ;   true
  965    ),
  966    (   http:encoding_filter(Encoding, In1, In)
  967    ->  true
  968    ;   autoload_encoding(Encoding),
  969        http:encoding_filter(Encoding, In1, In)
  970    ->  true
  971    ;   domain_error(http_encoding, Encoding)
  972    ).
  973
  974:- multifile
  975    autoload_encoding/1.  976
  977:- if(exists_source(library(zlib))).  978autoload_encoding(gzip) :-
  979    use_module(library(zlib)).
  980:- endif.  981
  982content_type(Lines, Type) :-
  983    member(Line, Lines),
  984    phrase(field('content-type'), Line, Rest),
  985    !,
  986    atom_codes(Type, Rest).
 http:disable_encoding_filter(+ContentType) is semidet
Do not use the Content-encoding as Transfer-encoding encoding for specific values of ContentType. This predicate is multifile and can thus be extended by the user.
  994http:disable_encoding_filter('application/x-gzip').
  995http:disable_encoding_filter('application/x-tar').
  996http:disable_encoding_filter('x-world/x-vrml').
  997http:disable_encoding_filter('application/zip').
  998http:disable_encoding_filter('application/x-gzip').
  999http:disable_encoding_filter('application/x-zip-compressed').
 1000http:disable_encoding_filter('application/x-compress').
 1001http:disable_encoding_filter('application/x-compressed').
 1002http:disable_encoding_filter('application/x-spoon').
 transfer_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Transfer-encoding header.
 1009transfer_encoding(Lines, Encoding) :-
 1010    what_encoding(transfer_encoding, Lines, Encoding).
 1011
 1012what_encoding(What, Lines, Encoding) :-
 1013    member(Line, Lines),
 1014    phrase(encoding_(What, Debug), Line, Rest),
 1015    !,
 1016    atom_codes(Encoding, Rest),
 1017    debug(http(What), '~w: ~p', [Debug, Rest]).
 1018
 1019encoding_(content_encoding, 'Content-encoding') -->
 1020    field('content-encoding').
 1021encoding_(transfer_encoding, 'Transfer-encoding') -->
 1022    field('transfer-encoding').
 content_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Content-encoding header.
 1029content_encoding(Lines, Encoding) :-
 1030    what_encoding(content_encoding, Lines, Encoding).
 read_header(+In:stream, +Parts, -Version, -Code:int, -Comment:atom, -Lines:list) is det
Read the HTTP reply-header. If the reply is completely empty an existence error is thrown. If the replied header is otherwise invalid a 500 HTTP error is simulated, having the comment Invalid reply header.
Arguments:
Parts- A list of compound terms that describe the parsed request URI.
Version- HTTP reply version as Major-Minor pair
Code- Numeric HTTP reply-code
Comment- Comment of reply-code as atom
Lines- Remaining header lines as code-lists.
Errors
- existence_error(http_reply, Uri)
 1049read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1050    read_line_to_codes(In, Line),
 1051    (   Line == end_of_file
 1052    ->  parts_uri(Parts, Uri),
 1053        existence_error(http_reply,Uri)
 1054    ;   true
 1055    ),
 1056    Line \== end_of_file,
 1057    phrase(first_line(Major-Minor, Code, Comment), Line),
 1058    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1059    read_line_to_codes(In, Line2),
 1060    rest_header(Line2, In, Lines),
 1061    !,
 1062    (   debugging(http(open))
 1063    ->  forall(member(HL, Lines),
 1064               debug(http(open), '~s', [HL]))
 1065    ;   true
 1066    ).
 1067read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1068
 1069rest_header([], _, []) :- !.            % blank line: end of header
 1070rest_header(L0, In, [L0|L]) :-
 1071    read_line_to_codes(In, L1),
 1072    rest_header(L1, In, L).
 content_length(+Header, -Length:int) is semidet
Find the Content-Length in an HTTP reply-header.
 1078content_length(Lines, Length) :-
 1079    member(Line, Lines),
 1080    phrase(content_length(Length0), Line),
 1081    !,
 1082    Length = Length0.
 1083
 1084location(Lines, RequestURI) :-
 1085    member(Line, Lines),
 1086    phrase(atom_field(location, RequestURI), Line),
 1087    !.
 1088
 1089connection(Lines, Connection) :-
 1090    member(Line, Lines),
 1091    phrase(atom_field(connection, Connection0), Line),
 1092    !,
 1093    Connection = Connection0.
 1094
 1095first_line(Major-Minor, Code, Comment) -->
 1096    "HTTP/", integer(Major), ".", integer(Minor),
 1097    skip_blanks,
 1098    integer(Code),
 1099    skip_blanks,
 1100    rest(Comment).
 1101
 1102atom_field(Name, Value) -->
 1103    field(Name),
 1104    rest(Value).
 1105
 1106content_length(Len) -->
 1107    field('content-length'),
 1108    integer(Len).
 1109
 1110field(Name) -->
 1111    { atom_codes(Name, Codes) },
 1112    field_codes(Codes).
 1113
 1114field_codes([]) -->
 1115    ":",
 1116    skip_blanks.
 1117field_codes([H|T]) -->
 1118    [C],
 1119    { match_header_char(H, C)
 1120    },
 1121    field_codes(T).
 1122
 1123match_header_char(C, C) :- !.
 1124match_header_char(C, U) :-
 1125    code_type(C, to_lower(U)),
 1126    !.
 1127match_header_char(0'_, 0'-).
 1128
 1129
 1130skip_blanks -->
 1131    [C],
 1132    { code_type(C, white)
 1133    },
 1134    !,
 1135    skip_blanks.
 1136skip_blanks -->
 1137    [].
 integer(-Int)//
Read 1 or more digits and return as integer.
 1143integer(Code) -->
 1144    digit(D0),
 1145    digits(D),
 1146    { number_codes(Code, [D0|D])
 1147    }.
 1148
 1149digit(C) -->
 1150    [C],
 1151    { code_type(C, digit)
 1152    }.
 1153
 1154digits([D0|D]) -->
 1155    digit(D0),
 1156    !,
 1157    digits(D).
 1158digits([]) -->
 1159    [].
 rest(-Atom:atom)//
Get rest of input as an atom.
 1165rest(Atom) --> call(rest_(Atom)).
 1166
 1167rest_(Atom, L, []) :-
 1168    atom_codes(Atom, L).
 1169
 1170
 1171                 /*******************************
 1172                 *   AUTHORIZATION MANAGEMENT   *
 1173                 *******************************/
 http_set_authorization(+URL, +Authorization) is det
Set user/password to supply with URLs that have URL as prefix. If Authorization is the atom -, possibly defined authorization is cleared. For example:
?- http_set_authorization('http://www.example.com/private/',
                          basic('John', 'Secret'))
To be done
- Move to a separate module, so http_get/3, etc. can use this too.
 1189:- dynamic
 1190    stored_authorization/2,
 1191    cached_authorization/2. 1192
 1193http_set_authorization(URL, Authorization) :-
 1194    must_be(atom, URL),
 1195    retractall(stored_authorization(URL, _)),
 1196    (   Authorization = (-)
 1197    ->  true
 1198    ;   check_authorization(Authorization),
 1199        assert(stored_authorization(URL, Authorization))
 1200    ),
 1201    retractall(cached_authorization(_,_)).
 1202
 1203check_authorization(Var) :-
 1204    var(Var),
 1205    !,
 1206    instantiation_error(Var).
 1207check_authorization(basic(User, Password)) :-
 1208    must_be(atom, User),
 1209    must_be(text, Password).
 1210check_authorization(digest(User, Password)) :-
 1211    must_be(atom, User),
 1212    must_be(text, Password).
 authorization(+URL, -Authorization) is semidet
True if Authorization must be supplied for URL.
To be done
- Cleanup cache if it gets too big.
 1220authorization(_, _) :-
 1221    \+ stored_authorization(_, _),
 1222    !,
 1223    fail.
 1224authorization(URL, Authorization) :-
 1225    cached_authorization(URL, Authorization),
 1226    !,
 1227    Authorization \== (-).
 1228authorization(URL, Authorization) :-
 1229    (   stored_authorization(Prefix, Authorization),
 1230        sub_atom(URL, 0, _, _, Prefix)
 1231    ->  assert(cached_authorization(URL, Authorization))
 1232    ;   assert(cached_authorization(URL, -)),
 1233        fail
 1234    ).
 1235
 1236add_authorization(_, Options, Options) :-
 1237    option(authorization(_), Options),
 1238    !.
 1239add_authorization(Parts, Options0, Options) :-
 1240    url_part(user(User), Parts),
 1241    url_part(password(Passwd), Parts),
 1242    !,
 1243    Options = [authorization(basic(User,Passwd))|Options0].
 1244add_authorization(Parts, Options0, Options) :-
 1245    stored_authorization(_, _) ->   % quick test to avoid work
 1246    parts_uri(Parts, URL),
 1247    authorization(URL, Auth),
 1248    !,
 1249    Options = [authorization(Auth)|Options0].
 1250add_authorization(_, Options, Options).
 parse_url_ex(+URL, -Parts)
Parts: Scheme, Host, Port, User:Password, RequestURI (no fragment).
 1258parse_url_ex(URL, [uri(URL)|Parts]) :-
 1259    uri_components(URL, Components),
 1260    phrase(components(Components), Parts),
 1261    (   option(host(_), Parts)
 1262    ->  true
 1263    ;   domain_error(url, URL)
 1264    ).
 1265
 1266components(Components) -->
 1267    uri_scheme(Components),
 1268    uri_path(Components),
 1269    uri_authority(Components),
 1270    uri_request_uri(Components).
 1271
 1272uri_scheme(Components) -->
 1273    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1274    !,
 1275    [ scheme(Scheme)
 1276    ].
 1277uri_scheme(_) --> [].
 1278
 1279uri_path(Components) -->
 1280    { uri_data(path, Components, Path0), nonvar(Path0),
 1281      (   Path0 == ''
 1282      ->  Path = (/)
 1283      ;   Path = Path0
 1284      )
 1285    },
 1286    !,
 1287    [ path(Path)
 1288    ].
 1289uri_path(_) --> [].
 1290
 1291uri_authority(Components) -->
 1292    { uri_data(authority, Components, Auth), nonvar(Auth),
 1293      !,
 1294      uri_authority_components(Auth, Data)
 1295    },
 1296    [ authority(Auth) ],
 1297    auth_field(user, Data),
 1298    auth_field(password, Data),
 1299    auth_field(host, Data),
 1300    auth_field(port, Data).
 1301uri_authority(_) --> [].
 1302
 1303auth_field(Field, Data) -->
 1304    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1305      !,
 1306      (   atom(EncValue)
 1307      ->  uri_encoded(query_value, Value, EncValue)
 1308      ;   Value = EncValue
 1309      ),
 1310      Part =.. [Field,Value]
 1311    },
 1312    [ Part ].
 1313auth_field(_, _) --> [].
 1314
 1315uri_request_uri(Components) -->
 1316    { uri_data(path, Components, Path0),
 1317      uri_data(search, Components, Search),
 1318      (   Path0 == ''
 1319      ->  Path = (/)
 1320      ;   Path = Path0
 1321      ),
 1322      uri_data(path, Components2, Path),
 1323      uri_data(search, Components2, Search),
 1324      uri_components(RequestURI, Components2)
 1325    },
 1326    [ request_uri(RequestURI)
 1327    ].
 parts_scheme(+Parts, -Scheme) is det
 parts_uri(+Parts, -URI) is det
 parts_request_uri(+Parts, -RequestURI) is det
 parts_search(+Parts, -Search) is det
 parts_authority(+Parts, -Authority) is semidet
 1335parts_scheme(Parts, Scheme) :-
 1336    url_part(scheme(Scheme), Parts),
 1337    !.
 1338parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1339    url_part(protocol(Scheme), Parts),
 1340    !.
 1341parts_scheme(_, http).
 1342
 1343parts_authority(Parts, Auth) :-
 1344    url_part(authority(Auth), Parts),
 1345    !.
 1346parts_authority(Parts, Auth) :-
 1347    url_part(host(Host), Parts, _),
 1348    url_part(port(Port), Parts, _),
 1349    url_part(user(User), Parts, _),
 1350    url_part(password(Password), Parts, _),
 1351    uri_authority_components(Auth,
 1352                             uri_authority(User, Password, Host, Port)).
 1353
 1354parts_request_uri(Parts, RequestURI) :-
 1355    option(request_uri(RequestURI), Parts),
 1356    !.
 1357parts_request_uri(Parts, RequestURI) :-
 1358    url_part(path(Path), Parts, /),
 1359    ignore(parts_search(Parts, Search)),
 1360    uri_data(path, Data, Path),
 1361    uri_data(search, Data, Search),
 1362    uri_components(RequestURI, Data).
 1363
 1364parts_search(Parts, Search) :-
 1365    option(query_string(Search), Parts),
 1366    !.
 1367parts_search(Parts, Search) :-
 1368    option(search(Fields), Parts),
 1369    !,
 1370    uri_query_components(Search, Fields).
 1371
 1372
 1373parts_uri(Parts, URI) :-
 1374    option(uri(URI), Parts),
 1375    !.
 1376parts_uri(Parts, URI) :-
 1377    parts_scheme(Parts, Scheme),
 1378    ignore(parts_authority(Parts, Auth)),
 1379    parts_request_uri(Parts, RequestURI),
 1380    uri_components(RequestURI, Data),
 1381    uri_data(scheme, Data, Scheme),
 1382    uri_data(authority, Data, Auth),
 1383    uri_components(URI, Data).
 1384
 1385parts_port(Parts, Port) :-
 1386    parts_scheme(Parts, Scheme),
 1387    default_port(Scheme, DefPort),
 1388    url_part(port(Port), Parts, DefPort).
 1389
 1390url_part(Part, Parts) :-
 1391    Part =.. [Name,Value],
 1392    Gen =.. [Name,RawValue],
 1393    option(Gen, Parts),
 1394    !,
 1395    Value = RawValue.
 1396
 1397url_part(Part, Parts, Default) :-
 1398    Part =.. [Name,Value],
 1399    Gen =.. [Name,RawValue],
 1400    (   option(Gen, Parts)
 1401    ->  Value = RawValue
 1402    ;   Value = Default
 1403    ).
 1404
 1405
 1406                 /*******************************
 1407                 *            COOKIES           *
 1408                 *******************************/
 1409
 1410write_cookies(Out, Parts, Options) :-
 1411    http:write_cookies(Out, Parts, Options),
 1412    !.
 1413write_cookies(_, _, _).
 1414
 1415update_cookies(_, _, _) :-
 1416    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1417    !.
 1418update_cookies(Lines, Parts, Options) :-
 1419    (   member(Line, Lines),
 1420        phrase(atom_field('set_cookie', CookieData), Line),
 1421        http:update_cookies(CookieData, Parts, Options),
 1422        fail
 1423    ;   true
 1424    ).
 1425
 1426
 1427                 /*******************************
 1428                 *           OPEN ANY           *
 1429                 *******************************/
 1430
 1431:- multifile iostream:open_hook/6.
 iostream:open_hook(+Spec, +Mode, -Stream, -Close, +Options0, -Options) is semidet
Hook implementation that makes open_any/5 support http and https URLs for Mode == read.
 1439iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1440    (atom(URL) -> true ; string(URL)),
 1441    uri_is_global(URL),
 1442    uri_components(URL, Components),
 1443    uri_data(scheme, Components, Scheme),
 1444    http_scheme(Scheme),
 1445    !,
 1446    Options = Options0,
 1447    Close = close(Stream),
 1448    http_open(URL, Stream, Options0).
 1449
 1450http_scheme(http).
 1451http_scheme(https).
 1452
 1453
 1454                 /*******************************
 1455                 *          KEEP-ALIVE          *
 1456                 *******************************/
 consider_keep_alive(+HeaderLines, +Parts, +Host, +Stream0, -Stream, +Options) is det
 1462consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1463    option(connection(Asked), Options),
 1464    keep_alive(Asked),
 1465    connection(Lines, Given),
 1466    keep_alive(Given),
 1467    content_length(Lines, Bytes),
 1468    !,
 1469    stream_pair(StreamPair, In0, _),
 1470    connection_address(Host, Parts, HostPort),
 1471    debug(http(connection),
 1472          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1473    stream_range_open(In0, In,
 1474                      [ size(Bytes),
 1475                        onclose(keep_alive(StreamPair, HostPort))
 1476                      ]).
 1477consider_keep_alive(_, _, _, Stream, Stream, _).
 1478
 1479connection_address(Host, _, Host) :-
 1480    Host = _:_,
 1481    !.
 1482connection_address(Host, Parts, Host:Port) :-
 1483    parts_port(Parts, Port).
 1484
 1485keep_alive(keep_alive) :- !.
 1486keep_alive(Connection) :-
 1487    downcase_atom(Connection, 'keep-alive').
 1488
 1489:- public keep_alive/4. 1490
 1491keep_alive(StreamPair, Host, _In, 0) :-
 1492    !,
 1493    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1494    add_to_pool(Host, StreamPair).
 1495keep_alive(StreamPair, Host, In, Left) :-
 1496    Left < 100,
 1497    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1498    read_incomplete(In, Left),
 1499    add_to_pool(Host, StreamPair),
 1500    !.
 1501keep_alive(StreamPair, _, _, _) :-
 1502    debug(http(connection),
 1503          'Closing connection due to excessive unprocessed input', []),
 1504    (   debugging(http(connection))
 1505    ->  catch(close(StreamPair), E,
 1506              print_message(warning, E))
 1507    ;   close(StreamPair, [force(true)])
 1508    ).
 read_incomplete(+In, +Left) is semidet
If we have not all input from a Keep-alive connection, read the remainder if it is short. Else, we fail and close the stream.
 1515read_incomplete(In, Left) :-
 1516    catch(setup_call_cleanup(
 1517              open_null_stream(Null),
 1518              copy_stream_data(In, Null, Left),
 1519              close(Null)),
 1520          _,
 1521          fail).
 1522
 1523:- dynamic
 1524    connection_pool/4,              % Hash, Address, Stream, Time
 1525    connection_gc_time/1. 1526
 1527add_to_pool(Address, StreamPair) :-
 1528    keep_connection(Address),
 1529    get_time(Now),
 1530    term_hash(Address, Hash),
 1531    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1532
 1533get_from_pool(Address, StreamPair) :-
 1534    term_hash(Address, Hash),
 1535    retract(connection_pool(Hash, Address, StreamPair, _)).
 keep_connection(+Address) is semidet
Succeeds if we want to keep the connection open. We currently keep a maximum of 10 connections waiting and a maximum of 2 waiting for the same address. Connections older than 2 seconds are closed.
 1544keep_connection(Address) :-
 1545    close_old_connections(2),
 1546    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1547    C =< 10,
 1548    term_hash(Address, Hash),
 1549    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1550    Count =< 2.
 1551
 1552close_old_connections(Timeout) :-
 1553    get_time(Now),
 1554    Before is Now - Timeout,
 1555    (   connection_gc_time(GC),
 1556        GC > Before
 1557    ->  true
 1558    ;   (   retractall(connection_gc_time(_)),
 1559            asserta(connection_gc_time(Now)),
 1560            connection_pool(Hash, Address, StreamPair, Added),
 1561            Added < Before,
 1562            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1563            debug(http(connection),
 1564                  'Closing inactive keep-alive to ~p', [Address]),
 1565            close(StreamPair, [force(true)]),
 1566            fail
 1567        ;   true
 1568        )
 1569    ).
 http_close_keep_alive(+Address) is det
Close all keep-alive connections matching Address. Address is of the form Host:Port. In particular, http_close_keep_alive(_) closes all currently known keep-alive connections.
 1578http_close_keep_alive(Address) :-
 1579    forall(get_from_pool(Address, StreamPair),
 1580           close(StreamPair, [force(true)])).
 keep_alive_error(+Error)
Deal with an error from reusing a keep-alive connection. If the error is due to an I/O error or end-of-file, fail to backtrack over get_from_pool/2. Otherwise it is a real error and we thus re-raise it.
 1589keep_alive_error(keep_alive(closed)) :-
 1590    !,
 1591    debug(http(connection), 'Keep-alive connection was closed', []),
 1592    fail.
 1593keep_alive_error(io_error(_,_)) :-
 1594    !,
 1595    debug(http(connection), 'IO error on Keep-alive connection', []),
 1596    fail.
 1597keep_alive_error(Error) :-
 1598    throw(Error).
 1599
 1600
 1601                 /*******************************
 1602                 *     HOOK DOCUMENTATION       *
 1603                 *******************************/
 http:open_options(+Parts, -Options) is nondet
This hook is used by the HTTP client library to define default options based on the the broken-down request-URL. The following example redirects all trafic, except for localhost over a proxy:
:- multifile
    http:open_options/2.

http:open_options(Parts, Options) :-
    option(host(Host), Parts),
    Host \== localhost,
    Options = [proxy('proxy.local', 3128)].

This hook may return multiple solutions. The returned options are combined using merge_options/3 where earlier solutions overrule later solutions.

 http:write_cookies(+Out, +Parts, +Options) is semidet
Emit a Cookie: header for the current connection. Out is an open stream to the HTTP server, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open. The predicate is called as if using ignore/1.
See also
- complements http:update_cookies/3.
- library(http/http_cookie) implements cookie handling on top of these hooks.
 http:update_cookies(+CookieData, +Parts, +Options) is semidet
Update the cookie database. CookieData is the value of the Set-Cookie field, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open.
See also
- complements http:write_cookies
- library(http/http_cookies) implements cookie handling on top of these hooks.