View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(http_open,
   39          [ http_open/3,                % +URL, -Stream, +Options
   40            http_set_authorization/2,   % +URL, +Authorization
   41            http_close_keep_alive/1     % +Address
   42          ]).   43:- autoload(library(aggregate),[aggregate_all/3]).   44:- autoload(library(apply),[foldl/4,include/3]).   45:- autoload(library(base64),[base64/3]).   46:- use_module(library(debug),[debug/3,debugging/1]).   47:- autoload(library(error),
   48	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   49	    ]).   50:- autoload(library(lists),[last/2,member/2]).   51:- autoload(library(option),
   52	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   53	      option/3, select_option/3
   54	    ]).   55:- autoload(library(readutil),[read_line_to_codes/2]).   56:- autoload(library(uri),
   57	    [ uri_resolve/3, uri_components/2, uri_data/3,
   58              uri_authority_components/2, uri_authority_data/3,
   59	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   60	    ]).   61:- autoload(library(http/http_header),
   62            [ http_parse_header/2, http_post_data/3 ]).   63:- autoload(library(http/http_stream),[stream_range_open/3]).   64:- if(exists_source(library(ssl))).   65:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   66:- endif.   67:- use_module(library(socket)).

HTTP client library

This library defines http_open/3, which opens an URL as a Prolog stream. The functionality of the library can be extended by loading two additional modules that act as plugins:

library(http/http_ssl_plugin)
Loading this library causes http_open/3 to handle HTTPS connections. Relevant options for SSL certificate handling are handed to ssl_context/3. This plugin is loaded automatically if the scheme https is requested using a default SSL context. See the plugin for additional information regarding security.
library(zlib)
Loading this library supports the gzip transfer encoding. This plugin is lazily loaded if a connection is opened that claims this transfer encoding.
library(http/http_cookie)
Loading this library adds tracking cookies to http_open/3. Returned cookies are collected in the Prolog database and supplied for subsequent requests.
library(http/http_stream)
This library adds support for chunked encoding. It is lazily loaded if the server sends a Transfer-encoding: chunked header.

Here is a simple example to fetch a web-page:

?- http_open('http://www.google.com/search?q=prolog', In, []),
   copy_stream_data(In, user_output),
   close(In).
<!doctype html><head><title>prolog - Google Search</title><script>
...

The example below fetches the modification time of a web-page. Note that Modified is '' (the empty atom) if the web-server does not provide a time-stamp for the resource. See also parse_time/2.

modified(URL, Stamp) :-
       http_open(URL, In,
                 [ method(head),
                   header(last_modified, Modified)
                 ]),
       close(In),
       Modified \== '',
       parse_time(Modified, Stamp).

Then next example uses Google search. It exploits library(uri) to manage URIs, library(sgml) to load an HTML document and library(xpath) to navigate the parsed HTML. Note that you may need to adjust the XPath queries if the data returned by Google changes (this example indeed no longer works and currently fails at the first xpath/3 call)

:- use_module(library(http/http_open)).
:- use_module(library(xpath)).
:- use_module(library(sgml)).
:- use_module(library(uri)).

google(For, Title, HREF) :-
        uri_encoded(query_value, For, Encoded),
        atom_concat('http://www.google.com/search?q=', Encoded, URL),
        http_open(URL, In, []),
        call_cleanup(
            load_html(In, DOM, []),
            close(In)),
        xpath(DOM, //h3(@class=r), Result),
        xpath(Result, //a(@href=HREF0, text), Title),
        uri_components(HREF0, Components),
        uri_data(search, Components, Query),
        uri_query_components(Query, Parts),
        memberchk(q=HREF, Parts).

An example query is below:

?- google(prolog, Title, HREF).
Title = 'SWI-Prolog',
HREF = 'http://www.swi-prolog.org/' ;
Title = 'Prolog - Wikipedia',
HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
Title = 'Prolog - Wikipedia, the free encyclopedia',
HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
HREF = 'http://www.pro-log.nl/' ;
Title = 'Learn Prolog Now!',
HREF = 'http://www.learnprolognow.org/' ;
Title = 'Free Online Version - Learn Prolog
...
See also
- load_html/3 and xpath/3 can be used to parse and navigate HTML documents.
- http_get/3 and http_post/4 provide an alternative interface that convert the reply depending on the Content-Type header. */
  174:- multifile
  175    http:encoding_filter/3,           % +Encoding, +In0, -In
  176    http:current_transfer_encoding/1, % ?Encoding
  177    http:disable_encoding_filter/1,   % +ContentType
  178    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  179                                      % -NewStreamPair, +Options
  180    http:open_options/2,              % +Parts, -Options
  181    http:write_cookies/3,             % +Out, +Parts, +Options
  182    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  183    http:authenticate_client/2,       % +URL, +Action
  184    http:http_connection_over_proxy/6.  185
  186:- meta_predicate
  187    http_open(+,-,:).  188
  189:- predicate_options(http_open/3, 3,
  190                     [ authorization(compound),
  191                       final_url(-atom),
  192                       header(+atom, -atom),
  193                       headers(-list),
  194                       raw_headers(-list(string)),
  195                       connection(+atom),
  196                       method(oneof([delete,get,put,purge,head,
  197                                     post,patch,options])),
  198                       size(-integer),
  199                       status_code(-integer),
  200                       output(-stream),
  201                       timeout(number),
  202                       unix_socket(+atom),
  203                       proxy(atom, integer),
  204                       proxy_authorization(compound),
  205                       bypass_proxy(boolean),
  206                       request_header(any),
  207                       user_agent(atom),
  208                       version(-compound),
  209        % The option below applies if library(http/http_header) is loaded
  210                       post(any),
  211        % The options below apply if library(http/http_ssl_plugin)) is loaded
  212                       pem_password_hook(callable),
  213                       cacert_file(atom),
  214                       cert_verify_hook(callable)
  215                     ]).
 user_agent(-Agent) is det
Default value for User-Agent, can be overruled using the option user_agent(Agent) of http_open/3.
  222user_agent('SWI-Prolog').
 http_open(+URL, -Stream, +Options) is det
Open the data at the HTTP server as a Prolog stream. URL is either an atom specifying a URL or a list representing a broken-down URL as specified below. After this predicate succeeds the data can be read from Stream. After completion this stream must be closed using the built-in Prolog predicate close/1. Options provides additional options:
authenticate(+Boolean)
If false (default true), do not try to automatically authenticate the client if a 401 (Unauthorized) status code is received.
authorization(+Term)
Send authorization. See also http_set_authorization/2. Supported schemes:
basic(+User, +Password)
HTTP Basic authentication.
bearer(+Token)
HTTP Bearer authentication.
digest(+User, +Password)
HTTP Digest authentication. This option is only provided if the plugin library(http/http_digest) is also loaded.
unix_socket(+Path)
Connect to the given Unix domain socket. In this scenario the host name and port or ignored. If the server replies with a redirect message and the host differs from the original host as normal TCP connection is used to handle the redirect. This option is inspired by curl(1)'s option `--unix-socket`.
connection(+Connection)
Specify the Connection header. Default is close. The alternative is Keep-alive. This maintains a pool of available connections as determined by keep_connection/1. The library(http/websockets) uses Keep-alive, Upgrade. Keep-alive connections can be closed explicitly using http_close_keep_alive/1. Keep-alive connections may significantly improve repetitive requests on the same server, especially if the IP route is long, HTTPS is used or the connection uses a proxy.
final_url(-FinalURL)
Unify FinalURL with the final destination. This differs from the original URL if the returned head of the original indicates an HTTP redirect (codes 301, 302 or 303). Without a redirect, FinalURL is the same as URL if URL is an atom, or a URL constructed from the parts.
header(Name, -AtomValue)
If provided, AtomValue is unified with the value of the indicated field in the reply header. Name is matched case-insensitive and the underscore (_) matches the hyphen (-). Multiple of these options may be provided to extract multiple header fields. If the header is not available AtomValue is unified to the empty atom ('').
headers(-List)
If provided, List is unified with a list of Name(Value) pairs corresponding to fields in the reply header. Name and Value follow the same conventions used by the header(Name,Value) option. A pseudo header status_code(Code) is added to provide the HTTP status as an integer. See also raw_headers(-List) which provides the entire HTTP reply header in unparsed representation.
method(+Method)
One of get (default), head, delete, post, put or patch. The head message can be used in combination with the header(Name, Value) option to access information on the resource without actually fetching the resource itself. The returned stream must be closed immediately.

If post(Data) is provided, the default is post.

size(-Size)
Size is unified with the integer value of Content-Length in the reply header.
version(-Version)
Version is a pair Major-Minor, where Major and Minor are integers representing the HTTP version in the reply header.
range(+Range)
Ask for partial content. Range is a term Unit(From,To), where From is an integer and To is either an integer or the atom end. HTTP 1.1 only supports Unit = bytes. E.g., to ask for bytes 1000-1999, use the option range(bytes(1000,1999))
raw_encoding(+Encoding)
Do not install a decoding filter for Encoding. For example, using raw_encoding('applocation/gzip') the system will not decompress the stream if it is compressed using gzip.
raw_headers(-Lines)
Unify Lines with a list of strings that represents the complete reply header returned by the server. See also headers(-List).
redirect(+Boolean)
If false (default true), do not automatically redirect if a 3XX code is received. Must be combined with status_code(Code) and one of the header options to read the redirect reply. In particular, without status_code(Code) a redirect is mapped to an exception.
status_code(-Code)
If this option is present and Code unifies with the HTTP status code, do not translate errors (4xx, 5xx) into an exception. Instead, http_open/3 behaves as if 2xx (success) is returned, providing the application to read the error document from the returned stream.
output(-Out)
Unify the output stream with Out and do not close it. This can be used to upgrade a connection.
timeout(+Timeout)
If provided, set a timeout on the stream using set_stream/2. With this option if no new data arrives within Timeout seconds the stream raises an exception. Default is to wait forever (infinite).
post(+Data)
Issue a POST request on the HTTP server. Data is handed to http_post_data/3.
proxy(+Host:Port)
Use an HTTP proxy to connect to the outside world. See also socket:proxy_for_url/3. This option overrules the proxy specification defined by socket:proxy_for_url/3.
proxy(+Host, +Port)
Synonym for proxy(+Host:Port). Deprecated.
proxy_authorization(+Authorization)
Send authorization to the proxy. Otherwise the same as the authorization option.
bypass_proxy(+Boolean)
If true, bypass proxy hooks. Default is false.
request_header(Name=Value)
Additional name-value parts are added in the order of appearance to the HTTP request header. No interpretation is done.
max_redirect(+Max)
Sets the maximum length of a redirection chain. This is needed for some IRIs that redirect indefinitely to other IRIs without looping (e.g., redirecting to IRIs with a random element in them). Max must be either a non-negative integer or the atom infinite. The default value is 10.
user_agent(+Agent)
Defines the value of the User-Agent field of the HTTP header. Default is SWI-Prolog.

The hook http:open_options/2 can be used to provide default options based on the broken-down URL. The option status_code(-Code) is particularly useful to query REST interfaces that commonly return status codes other than 200 that need to be be processed by the client code.

Arguments:
URL- is either an atom or string (url) or a list of parts.

When provided, this list may contain the fields scheme, user, password, host, port, path and either query_string (whose argument is an atom) or search (whose argument is a list of Name(Value) or Name=Value compound terms). Only host is mandatory. The example below opens the URL http://www.example.com/my/path?q=Hello%20World&lang=en. Note that values must not be quoted because the library inserts the required quotes.

http_open([ host('www.example.com'),
            path('/my/path'),
            search([ q='Hello world',
                     lang=en
                   ])
          ])
throws
- error(existence_error(url, Id),Context) is raised if the HTTP result code is not in the range 200..299. Context has the shape context(Message, status(Code, TextCode)), where Code is the numeric HTTP code and TextCode is the textual description thereof provided by the server. Message may provide additional details or may be unbound.
See also
- ssl_context/3 for SSL related options if library(http/http_ssl_plugin) is loaded.
  424:- multifile
  425    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  426
  427http_open(URL, Stream, QOptions) :-
  428    meta_options(is_meta, QOptions, Options0),
  429    (   atomic(URL)
  430    ->  parse_url_ex(URL, Parts)
  431    ;   Parts = URL
  432    ),
  433    autoload_https(Parts),
  434    upgrade_ssl_options(Parts, Options0, Options),
  435    add_authorization(Parts, Options, Options1),
  436    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  437    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  438    (   option(bypass_proxy(true), Options)
  439    ->  try_http_proxy(direct, Parts, Stream, Options2)
  440    ;   term_variables(Options2, Vars2),
  441        findall(Result-Vars2,
  442                try_a_proxy(Parts, Result, Options2),
  443                ResultList),
  444        last(ResultList, Status-Vars2)
  445    ->  (   Status = true(_Proxy, Stream)
  446        ->  true
  447        ;   throw(error(proxy_error(tried(ResultList)), _))
  448        )
  449    ;   try_http_proxy(direct, Parts, Stream, Options2)
  450    ).
  451
  452try_a_proxy(Parts, Result, Options) :-
  453    parts_uri(Parts, AtomicURL),
  454    option(host(Host), Parts),
  455    (   option(unix_socket(Path), Options)
  456    ->  Proxy = unix_socket(Path)
  457    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  458        ;   is_list(Options),
  459            memberchk(proxy(ProxyHost,ProxyPort), Options)
  460        )
  461    ->  Proxy = proxy(ProxyHost, ProxyPort)
  462    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  463    ),
  464    debug(http(proxy),
  465          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  466    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  467    ->  (   var(E)
  468        ->  !, Result = true(Proxy, Stream)
  469        ;   Result = error(Proxy, E)
  470        )
  471    ;   Result = false(Proxy)
  472    ),
  473    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  474
  475try_http_proxy(Method, Parts, Stream, Options0) :-
  476    option(host(Host), Parts),
  477    proxy_request_uri(Method, Parts, RequestURI),
  478    select_option(visited(Visited0), Options0, OptionsV, []),
  479    Options = [visited([Parts|Visited0])|OptionsV],
  480    parts_scheme(Parts, Scheme),
  481    default_port(Scheme, DefPort),
  482    url_part(port(Port), Parts, DefPort),
  483    host_and_port(Host, DefPort, Port, HostPort),
  484    (   option(connection(Connection), Options0),
  485        keep_alive(Connection),
  486        get_from_pool(Host:Port, StreamPair),
  487        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  488              [ Host:Port, StreamPair ]),
  489        catch(send_rec_header(StreamPair, Stream, HostPort,
  490                              RequestURI, Parts, Options),
  491              Error,
  492              keep_alive_error(Error, StreamPair))
  493    ->  true
  494    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  495                                        SocketStreamPair, Options, Options1),
  496        (   catch(http:http_protocol_hook(Scheme, Parts,
  497                                          SocketStreamPair,
  498                                          StreamPair, Options),
  499                  Error,
  500                  ( close(SocketStreamPair, [force(true)]),
  501                    throw(Error)))
  502        ->  true
  503        ;   StreamPair = SocketStreamPair
  504        ),
  505        send_rec_header(StreamPair, Stream, HostPort,
  506                        RequestURI, Parts, Options1)
  507    ),
  508    return_final_url(Options).
  509
  510proxy_request_uri(direct, Parts, RequestURI) :-
  511    !,
  512    parts_request_uri(Parts, RequestURI).
  513proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  514    !,
  515    parts_request_uri(Parts, RequestURI).
  516proxy_request_uri(_, Parts, RequestURI) :-
  517    parts_uri(Parts, RequestURI).
  518
  519http:http_connection_over_proxy(unix_socket(Path), _, _,
  520                                StreamPair, Options, Options) :-
  521    !,
  522    unix_domain_socket(Socket),
  523    tcp_connect(Socket, Path),
  524    tcp_open_socket(Socket, In, Out),
  525    stream_pair(StreamPair, In, Out).
  526http:http_connection_over_proxy(direct, _, Host:Port,
  527                                StreamPair, Options, Options) :-
  528    !,
  529    open_socket(Host:Port, StreamPair, Options).
  530http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  531                                StreamPair, Options, Options) :-
  532    \+ ( memberchk(scheme(Scheme), Parts),
  533         secure_scheme(Scheme)
  534       ),
  535    !,
  536    % We do not want any /more/ proxy after this
  537    open_socket(ProxyHost:ProxyPort, StreamPair,
  538                [bypass_proxy(true)|Options]).
  539http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  540                                StreamPair, Options, Options) :-
  541    !,
  542    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  543    catch(negotiate_socks_connection(Host:Port, StreamPair),
  544          Error,
  545          ( close(StreamPair, [force(true)]),
  546            throw(Error)
  547          )).
 hooked_options(+Parts, -Options) is nondet
Calls http:open_options/2 and if necessary upgrades old SSL cacerts_file(File) option to a cacerts(List) option to ensure proper merging of options.
  555hooked_options(Parts, Options) :-
  556    http:open_options(Parts, Options0),
  557    upgrade_ssl_options(Parts, Options0, Options).
  558
  559:- if(current_predicate(ssl_upgrade_legacy_options/2)).  560upgrade_ssl_options(Parts, Options0, Options) :-
  561    requires_ssl(Parts),
  562    !,
  563    ssl_upgrade_legacy_options(Options0, Options).
  564:- endif.  565upgrade_ssl_options(_, Options, Options).
  566
  567merge_options_rev(Old, New, Merged) :-
  568    merge_options(New, Old, Merged).
  569
  570is_meta(pem_password_hook).             % SSL plugin callbacks
  571is_meta(cert_verify_hook).
  572
  573
  574http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  575
  576default_port(https, 443) :- !.
  577default_port(wss,   443) :- !.
  578default_port(_,     80).
  579
  580host_and_port(Host, DefPort, DefPort, Host) :- !.
  581host_and_port(Host, _,       Port,    Host:Port).
 autoload_https(+Parts) is det
If the requested scheme is https or wss, load the HTTPS plugin.
  587autoload_https(Parts) :-
  588    requires_ssl(Parts),
  589    memberchk(scheme(S), Parts),
  590    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  591    exists_source(library(http/http_ssl_plugin)),
  592    !,
  593    use_module(library(http/http_ssl_plugin)).
  594autoload_https(_).
  595
  596requires_ssl(Parts) :-
  597    memberchk(scheme(S), Parts),
  598    secure_scheme(S).
  599
  600secure_scheme(https).
  601secure_scheme(wss).
 send_rec_header(+StreamPair, -Stream, +Host, +RequestURI, +Parts, +Options) is det
Send header to Out and process reply. If there is an error or failure, close In and Out and return the error or failure.
  609send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  610    (   catch(guarded_send_rec_header(StreamPair, Stream,
  611                                      Host, RequestURI, Parts, Options),
  612              E, true)
  613    ->  (   var(E)
  614        ->  (   option(output(StreamPair), Options)
  615            ->  true
  616            ;   true
  617            )
  618        ;   close(StreamPair, [force(true)]),
  619            throw(E)
  620        )
  621    ;   close(StreamPair, [force(true)]),
  622        fail
  623    ).
  624
  625guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  626    user_agent(Agent, Options),
  627    method(Options, MNAME),
  628    http_version(Version),
  629    option(connection(Connection), Options, close),
  630    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  631    debug(http(send_request), "> Host: ~w", [Host]),
  632    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  633    debug(http(send_request), "> Connection: ~w", [Connection]),
  634    format(StreamPair,
  635           '~w ~w HTTP/~w\r\n\c
  636               Host: ~w\r\n\c
  637               User-Agent: ~w\r\n\c
  638               Connection: ~w\r\n',
  639           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  640    parts_uri(Parts, URI),
  641    x_headers(Options, URI, StreamPair),
  642    write_cookies(StreamPair, Parts, Options),
  643    (   option(post(PostData), Options)
  644    ->  http_post_data(PostData, StreamPair, [])
  645    ;   format(StreamPair, '\r\n', [])
  646    ),
  647    flush_output(StreamPair),
  648                                    % read the reply header
  649    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  650    update_cookies(Lines, Parts, Options),
  651    reply_header(Lines, Options),
  652    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  653            StreamPair, Stream).
 http_version(-Version:atom) is det
HTTP version we publish. We can only use 1.1 if we support chunked encoding.
  661http_version('1.1') :-
  662    http:current_transfer_encoding(chunked),
  663    !.
  664http_version('1.1') :-
  665    autoload_encoding(chunked),
  666    !.
  667http_version('1.0').
  668
  669method(Options, MNAME) :-
  670    option(post(_), Options),
  671    !,
  672    option(method(M), Options, post),
  673    (   map_method(M, MNAME0)
  674    ->  MNAME = MNAME0
  675    ;   domain_error(method, M)
  676    ).
  677method(Options, MNAME) :-
  678    option(method(M), Options, get),
  679    (   map_method(M, MNAME0)
  680    ->  MNAME = MNAME0
  681    ;   map_method(_, M)
  682    ->  MNAME = M
  683    ;   domain_error(method, M)
  684    ).
 map_method(+MethodID, -Method)
Support additional METHOD keywords. Default are the official HTTP methods as defined by the various RFCs.
  691:- multifile
  692    map_method/2.  693
  694map_method(delete,  'DELETE').
  695map_method(get,     'GET').
  696map_method(head,    'HEAD').
  697map_method(post,    'POST').
  698map_method(put,     'PUT').
  699map_method(patch,   'PATCH').
  700map_method(options, 'OPTIONS').
 x_headers(+Options, +URI, +Out) is det
Emit extra headers from request_header(Name=Value) options in Options.
To be done
- Use user/password fields
  709x_headers(Options, URI, Out) :-
  710    x_headers_(Options, [url(URI)|Options], Out).
  711
  712x_headers_([], _, _).
  713x_headers_([H|T], Options, Out) :-
  714    x_header(H, Options, Out),
  715    x_headers_(T, Options, Out).
  716
  717x_header(request_header(Name=Value), _, Out) :-
  718    !,
  719    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  720    format(Out, '~w: ~w\r\n', [Name, Value]).
  721x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  722    !,
  723    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  724x_header(authorization(Authorization), Options, Out) :-
  725    !,
  726    auth_header(Authorization, Options, 'Authorization', Out).
  727x_header(range(Spec), _, Out) :-
  728    !,
  729    Spec =.. [Unit, From, To],
  730    (   To == end
  731    ->  ToT = ''
  732    ;   must_be(integer, To),
  733        ToT = To
  734    ),
  735    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  736    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  737x_header(_, _, _).
 auth_header(+AuthOption, +Options, +HeaderName, +Out)
  741auth_header(basic(User, Password), _, Header, Out) :-
  742    !,
  743    format(codes(Codes), '~w:~w', [User, Password]),
  744    phrase(base64(Codes), Base64Codes),
  745    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  746    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  747auth_header(bearer(Token), _, Header, Out) :-
  748    !,
  749    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  750    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  751auth_header(Auth, Options, _, Out) :-
  752    option(url(URL), Options),
  753    add_method(Options, Options1),
  754    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  755    !.
  756auth_header(Auth, _, _, _) :-
  757    domain_error(authorization, Auth).
  758
  759user_agent(Agent, Options) :-
  760    (   option(user_agent(Agent), Options)
  761    ->  true
  762    ;   user_agent(Agent)
  763    ).
  764
  765add_method(Options0, Options) :-
  766    option(method(_), Options0),
  767    !,
  768    Options = Options0.
  769add_method(Options0, Options) :-
  770    option(post(_), Options0),
  771    !,
  772    Options = [method(post)|Options0].
  773add_method(Options0, [method(get)|Options0]).
 do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header, +Options, +Parts, +Host, +In, -FinalIn) is det
Handle the HTTP status once available. If 200-299, we are ok. If a redirect, redo the open, returning a new stream. Else issue an error.
Errors
- existence_error(url, URL)
  784                                        % Redirections
  785do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  786    redirect_code(Code),
  787    option(redirect(true), Options0, true),
  788    location(Lines, RequestURI),
  789    !,
  790    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  791    close(In),
  792    parts_uri(Parts, Base),
  793    uri_resolve(RequestURI, Base, Redirected),
  794    parse_url_ex(Redirected, RedirectedParts),
  795    (   redirect_limit_exceeded(Options0, Max)
  796    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  797        throw(error(permission_error(redirect, http, Redirected),
  798                    context(_, Comment)))
  799    ;   redirect_loop(RedirectedParts, Options0)
  800    ->  throw(error(permission_error(redirect, http, Redirected),
  801                    context(_, 'Redirection loop')))
  802    ;   true
  803    ),
  804    redirect_options(Parts, RedirectedParts, Options0, Options),
  805    http_open(RedirectedParts, Stream, Options).
  806                                        % Need authentication
  807do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  808    authenticate_code(Code),
  809    option(authenticate(true), Options0, true),
  810    parts_uri(Parts, URI),
  811    parse_headers(Lines, Headers),
  812    http:authenticate_client(
  813             URI,
  814             auth_reponse(Headers, Options0, Options)),
  815    !,
  816    close(In0),
  817    http_open(Parts, Stream, Options).
  818                                        % Accepted codes
  819do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  820    (   option(status_code(Code), Options),
  821        Lines \== []
  822    ->  true
  823    ;   successful_code(Code)
  824    ),
  825    !,
  826    parts_uri(Parts, URI),
  827    parse_headers(Lines, Headers),
  828    return_version(Options, Version),
  829    return_size(Options, Headers),
  830    return_fields(Options, Headers),
  831    return_headers(Options, [status_code(Code)|Headers]),
  832    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  833    transfer_encoding_filter(Lines, In1, In, Options),
  834                                    % properly re-initialise the stream
  835    set_stream(In, file_name(URI)),
  836    set_stream(In, record_position(true)).
  837do_open(_, _, _, [], Options, _, _, _, _) :-
  838    option(connection(Connection), Options),
  839    keep_alive(Connection),
  840    !,
  841    throw(error(keep_alive(closed),_)).
  842                                        % report anything else as error
  843do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  844    parts_uri(Parts, URI),
  845    (   map_error_code(Code, Error)
  846    ->  Formal =.. [Error, url, URI]
  847    ;   Formal = existence_error(url, URI)
  848    ),
  849    throw(error(Formal, context(_, status(Code, Comment)))).
  850
  851
  852successful_code(Code) :-
  853    between(200, 299, Code).
 redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet
True if we have exceeded the maximum redirection length (default 10).
  859redirect_limit_exceeded(Options, Max) :-
  860    option(visited(Visited), Options, []),
  861    length(Visited, N),
  862    option(max_redirect(Max), Options, 10),
  863    (Max == infinite -> fail ; N > Max).
 redirect_loop(+Parts, +Options) is semidet
True if we are in a redirection loop. Note that some sites redirect once to the same place using cookies or similar, so we allow for two tries. In fact, we should probably test whether authorization or cookie headers have changed.
  873redirect_loop(Parts, Options) :-
  874    option(visited(Visited), Options, []),
  875    include(==(Parts), Visited, Same),
  876    length(Same, Count),
  877    Count > 2.
 redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det
A redirect from a POST should do a GET on the returned URI. This means we must remove the method(post) and post(Data) options from the original option-list.

If we are connecting over a Unix domain socket we drop this option if the redirect host does not match the initial host.

  889redirect_options(Parts, RedirectedParts, Options0, Options) :-
  890    select_option(unix_socket(_), Options0, Options1),
  891    memberchk(host(Host), Parts),
  892    memberchk(host(RHost), RedirectedParts),
  893    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  894          [Host, RHost]),
  895    Host \== RHost,
  896    !,
  897    redirect_options(Options1, Options).
  898redirect_options(_, _, Options0, Options) :-
  899    redirect_options(Options0, Options).
  900
  901redirect_options(Options0, Options) :-
  902    (   select_option(post(_), Options0, Options1)
  903    ->  true
  904    ;   Options1 = Options0
  905    ),
  906    (   select_option(method(Method), Options1, Options),
  907        \+ redirect_method(Method)
  908    ->  true
  909    ;   Options = Options1
  910    ).
  911
  912redirect_method(delete).
  913redirect_method(get).
  914redirect_method(head).
 map_error_code(+HTTPCode, -PrologError) is semidet
Map HTTP error codes to Prolog errors.
To be done
- Many more maps. Unfortunately many have no sensible Prolog counterpart.
  924map_error_code(401, permission_error).
  925map_error_code(403, permission_error).
  926map_error_code(404, existence_error).
  927map_error_code(405, permission_error).
  928map_error_code(407, permission_error).
  929map_error_code(410, existence_error).
  930
  931redirect_code(301).                     % Moved Permanently
  932redirect_code(302).                     % Found (previously "Moved Temporary")
  933redirect_code(303).                     % See Other
  934redirect_code(307).                     % Temporary Redirect
  935
  936authenticate_code(401).
 open_socket(+Address, -StreamPair, +Options) is det
Create and connect a client socket to Address. Options
timeout(+Timeout)
Sets timeout on the stream, after connecting the socket.
To be done
- Make timeout also work on tcp_connect/4.
- This is the same as do_connect/4 in http_client.pl
  949open_socket(Address, StreamPair, Options) :-
  950    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  951    tcp_connect(Address, StreamPair, Options),
  952    stream_pair(StreamPair, In, Out),
  953    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  954    set_stream(In, record_position(false)),
  955    (   option(timeout(Timeout), Options)
  956    ->  set_stream(In, timeout(Timeout))
  957    ;   true
  958    ).
  959
  960
  961return_version(Options, Major-Minor) :-
  962    option(version(Major-Minor), Options, _).
  963
  964return_size(Options, Headers) :-
  965    (   memberchk(content_length(Size), Headers)
  966    ->  option(size(Size), Options, _)
  967    ;   true
  968    ).
  969
  970return_fields([], _).
  971return_fields([header(Name, Value)|T], Headers) :-
  972    !,
  973    (   Term =.. [Name,Value],
  974        memberchk(Term, Headers)
  975    ->  true
  976    ;   Value = ''
  977    ),
  978    return_fields(T, Headers).
  979return_fields([_|T], Lines) :-
  980    return_fields(T, Lines).
  981
  982return_headers(Options, Headers) :-
  983    option(headers(Headers), Options, _).
 parse_headers(+Lines, -Headers:list(compound)) is det
Parse the header lines for the headers(-List) option. Invalid header lines are skipped, printing a warning using pring_message/2.
  991parse_headers([], []) :- !.
  992parse_headers([Line|Lines], Headers) :-
  993    catch(http_parse_header(Line, [Header]), Error, true),
  994    (   var(Error)
  995    ->  Headers = [Header|More]
  996    ;   print_message(warning, Error),
  997        Headers = More
  998    ),
  999    parse_headers(Lines, More).
 return_final_url(+Options) is semidet
If Options contains final_url(URL), unify URL with the final URL after redirections.
 1007return_final_url(Options) :-
 1008    option(final_url(URL), Options),
 1009    var(URL),
 1010    !,
 1011    option(visited([Parts|_]), Options),
 1012    parts_uri(Parts, URL).
 1013return_final_url(_).
 transfer_encoding_filter(+Lines, +In0, -In, +Options) is det
Install filters depending on the transfer encoding. If In0 is a stream-pair, we close the output side. If transfer-encoding is not specified, the content-encoding is interpreted as a synonym for transfer-encoding, because many servers incorrectly depend on this. Exceptions to this are content-types for which disable_encoding_filter/1 holds.
 1025transfer_encoding_filter(Lines, In0, In, Options) :-
 1026    transfer_encoding(Lines, Encoding),
 1027    !,
 1028    transfer_encoding_filter_(Encoding, In0, In, Options).
 1029transfer_encoding_filter(Lines, In0, In, Options) :-
 1030    content_encoding(Lines, Encoding),
 1031    content_type(Lines, Type),
 1032    \+ http:disable_encoding_filter(Type),
 1033    !,
 1034    transfer_encoding_filter_(Encoding, In0, In, Options).
 1035transfer_encoding_filter(_, In, In, _Options).
 1036
 1037transfer_encoding_filter_(Encoding, In0, In, Options) :-
 1038    option(raw_encoding(Encoding), Options),
 1039    !,
 1040    In = In0.
 1041transfer_encoding_filter_(Encoding, In0, In, _Options) :-
 1042    stream_pair(In0, In1, Out),
 1043    (   nonvar(Out)
 1044    ->  close(Out)
 1045    ;   true
 1046    ),
 1047    (   http:encoding_filter(Encoding, In1, In)
 1048    ->  true
 1049    ;   autoload_encoding(Encoding),
 1050        http:encoding_filter(Encoding, In1, In)
 1051    ->  true
 1052    ;   domain_error(http_encoding, Encoding)
 1053    ).
 1054
 1055:- multifile
 1056    autoload_encoding/1. 1057
 1058:- if(exists_source(library(zlib))). 1059autoload_encoding(gzip) :-
 1060    use_module(library(zlib)).
 1061:- endif. 1062:- if(exists_source(library(http/http_stream))). 1063autoload_encoding(chunked) :-
 1064    use_module(library(http/http_stream)).
 1065:- endif. 1066
 1067content_type(Lines, Type) :-
 1068    member(Line, Lines),
 1069    phrase(field('content-type'), Line, Rest),
 1070    !,
 1071    atom_codes(Type, Rest).
 http:disable_encoding_filter(+ContentType) is semidet
Do not use the Content-encoding as Transfer-encoding encoding for specific values of ContentType. This predicate is multifile and can thus be extended by the user.
 1079http:disable_encoding_filter('application/x-gzip').
 1080http:disable_encoding_filter('application/x-tar').
 1081http:disable_encoding_filter('x-world/x-vrml').
 1082http:disable_encoding_filter('application/zip').
 1083http:disable_encoding_filter('application/x-gzip').
 1084http:disable_encoding_filter('application/x-zip-compressed').
 1085http:disable_encoding_filter('application/x-compress').
 1086http:disable_encoding_filter('application/x-compressed').
 1087http:disable_encoding_filter('application/x-spoon').
 transfer_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Transfer-encoding header.
 1094transfer_encoding(Lines, Encoding) :-
 1095    what_encoding(transfer_encoding, Lines, Encoding).
 1096
 1097what_encoding(What, Lines, Encoding) :-
 1098    member(Line, Lines),
 1099    phrase(encoding_(What, Debug), Line, Rest),
 1100    !,
 1101    atom_codes(Encoding, Rest),
 1102    debug(http(What), '~w: ~p', [Debug, Rest]).
 1103
 1104encoding_(content_encoding, 'Content-encoding') -->
 1105    field('content-encoding').
 1106encoding_(transfer_encoding, 'Transfer-encoding') -->
 1107    field('transfer-encoding').
 content_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Content-encoding header.
 1114content_encoding(Lines, Encoding) :-
 1115    what_encoding(content_encoding, Lines, Encoding).
 read_header(+In:stream, +Parts, -Version, -Code:int, -Comment:atom, -Lines:list) is det
Read the HTTP reply-header. If the reply is completely empty an existence error is thrown. If the replied header is otherwise invalid a 500 HTTP error is simulated, having the comment Invalid reply header.
Arguments:
Parts- A list of compound terms that describe the parsed request URI.
Version- HTTP reply version as Major-Minor pair
Code- Numeric HTTP reply-code
Comment- Comment of reply-code as atom
Lines- Remaining header lines as code-lists.
Errors
- existence_error(http_reply, Uri)
 1134read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1135    read_line_to_codes(In, Line),
 1136    (   Line == end_of_file
 1137    ->  parts_uri(Parts, Uri),
 1138        existence_error(http_reply,Uri)
 1139    ;   true
 1140    ),
 1141    Line \== end_of_file,
 1142    phrase(first_line(Major-Minor, Code, Comment), Line),
 1143    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1144    read_line_to_codes(In, Line2),
 1145    rest_header(Line2, In, Lines),
 1146    !,
 1147    (   debugging(http(open))
 1148    ->  forall(member(HL, Lines),
 1149               debug(http(open), '~s', [HL]))
 1150    ;   true
 1151    ).
 1152read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1153
 1154rest_header([], _, []) :- !.            % blank line: end of header
 1155rest_header(L0, In, [L0|L]) :-
 1156    read_line_to_codes(In, L1),
 1157    rest_header(L1, In, L).
 content_length(+Header, -Length:int) is semidet
Find the Content-Length in an HTTP reply-header.
 1163content_length(Lines, Length) :-
 1164    member(Line, Lines),
 1165    phrase(content_length(Length0), Line),
 1166    !,
 1167    Length = Length0.
 1168
 1169location(Lines, RequestURI) :-
 1170    member(Line, Lines),
 1171    phrase(atom_field(location, RequestURI), Line),
 1172    !.
 1173
 1174connection(Lines, Connection) :-
 1175    member(Line, Lines),
 1176    phrase(atom_field(connection, Connection0), Line),
 1177    !,
 1178    Connection = Connection0.
 1179
 1180first_line(Major-Minor, Code, Comment) -->
 1181    "HTTP/", integer(Major), ".", integer(Minor),
 1182    skip_blanks,
 1183    integer(Code),
 1184    skip_blanks,
 1185    rest(Comment).
 1186
 1187atom_field(Name, Value) -->
 1188    field(Name),
 1189    rest(Value).
 1190
 1191content_length(Len) -->
 1192    field('content-length'),
 1193    integer(Len).
 1194
 1195field(Name) -->
 1196    { atom_codes(Name, Codes) },
 1197    field_codes(Codes).
 1198
 1199field_codes([]) -->
 1200    ":",
 1201    skip_blanks.
 1202field_codes([H|T]) -->
 1203    [C],
 1204    { match_header_char(H, C)
 1205    },
 1206    field_codes(T).
 1207
 1208match_header_char(C, C) :- !.
 1209match_header_char(C, U) :-
 1210    code_type(C, to_lower(U)),
 1211    !.
 1212match_header_char(0'_, 0'-).
 1213
 1214
 1215skip_blanks -->
 1216    [C],
 1217    { code_type(C, white)
 1218    },
 1219    !,
 1220    skip_blanks.
 1221skip_blanks -->
 1222    [].
 integer(-Int)//
Read 1 or more digits and return as integer.
 1228integer(Code) -->
 1229    digit(D0),
 1230    digits(D),
 1231    { number_codes(Code, [D0|D])
 1232    }.
 1233
 1234digit(C) -->
 1235    [C],
 1236    { code_type(C, digit)
 1237    }.
 1238
 1239digits([D0|D]) -->
 1240    digit(D0),
 1241    !,
 1242    digits(D).
 1243digits([]) -->
 1244    [].
 rest(-Atom:atom)//
Get rest of input as an atom.
 1250rest(Atom) --> call(rest_(Atom)).
 1251
 1252rest_(Atom, L, []) :-
 1253    atom_codes(Atom, L).
 reply_header(+Lines, +Options) is det
Return the entire reply header as a list of strings to the option raw_headers(-Headers).
 1261reply_header(Lines, Options) :-
 1262    option(raw_headers(Headers), Options),
 1263    !,
 1264    maplist(string_codes, Headers, Lines).
 1265reply_header(_, _).
 1266
 1267
 1268                 /*******************************
 1269                 *   AUTHORIZATION MANAGEMENT   *
 1270                 *******************************/
 http_set_authorization(+URL, +Authorization) is det
Set user/password to supply with URLs that have URL as prefix. If Authorization is the atom -, possibly defined authorization is cleared. For example:
?- http_set_authorization('http://www.example.com/private/',
                          basic('John', 'Secret'))
To be done
- Move to a separate module, so http_get/3, etc. can use this too.
 1286:- dynamic
 1287    stored_authorization/2,
 1288    cached_authorization/2. 1289
 1290http_set_authorization(URL, Authorization) :-
 1291    must_be(atom, URL),
 1292    retractall(stored_authorization(URL, _)),
 1293    (   Authorization = (-)
 1294    ->  true
 1295    ;   check_authorization(Authorization),
 1296        assert(stored_authorization(URL, Authorization))
 1297    ),
 1298    retractall(cached_authorization(_,_)).
 1299
 1300check_authorization(Var) :-
 1301    var(Var),
 1302    !,
 1303    instantiation_error(Var).
 1304check_authorization(basic(User, Password)) :-
 1305    must_be(atom, User),
 1306    must_be(text, Password).
 1307check_authorization(digest(User, Password)) :-
 1308    must_be(atom, User),
 1309    must_be(text, Password).
 authorization(+URL, -Authorization) is semidet
True if Authorization must be supplied for URL.
To be done
- Cleanup cache if it gets too big.
 1317authorization(_, _) :-
 1318    \+ stored_authorization(_, _),
 1319    !,
 1320    fail.
 1321authorization(URL, Authorization) :-
 1322    cached_authorization(URL, Authorization),
 1323    !,
 1324    Authorization \== (-).
 1325authorization(URL, Authorization) :-
 1326    (   stored_authorization(Prefix, Authorization),
 1327        sub_atom(URL, 0, _, _, Prefix)
 1328    ->  assert(cached_authorization(URL, Authorization))
 1329    ;   assert(cached_authorization(URL, -)),
 1330        fail
 1331    ).
 1332
 1333add_authorization(_, Options, Options) :-
 1334    option(authorization(_), Options),
 1335    !.
 1336add_authorization(Parts, Options0, Options) :-
 1337    url_part(user(User), Parts),
 1338    url_part(password(Passwd), Parts),
 1339    !,
 1340    Options = [authorization(basic(User,Passwd))|Options0].
 1341add_authorization(Parts, Options0, Options) :-
 1342    stored_authorization(_, _) ->   % quick test to avoid work
 1343    parts_uri(Parts, URL),
 1344    authorization(URL, Auth),
 1345    !,
 1346    Options = [authorization(Auth)|Options0].
 1347add_authorization(_, Options, Options).
 parse_url_ex(+URL, -Parts)
Parts: Scheme, Host, Port, User:Password, RequestURI (no fragment).
 1355parse_url_ex(URL, [uri(URL)|Parts]) :-
 1356    uri_components(URL, Components),
 1357    phrase(components(Components), Parts),
 1358    (   option(host(_), Parts)
 1359    ->  true
 1360    ;   domain_error(url, URL)
 1361    ).
 1362
 1363components(Components) -->
 1364    uri_scheme(Components),
 1365    uri_path(Components),
 1366    uri_authority(Components),
 1367    uri_request_uri(Components).
 1368
 1369uri_scheme(Components) -->
 1370    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1371    !,
 1372    [ scheme(Scheme)
 1373    ].
 1374uri_scheme(_) --> [].
 1375
 1376uri_path(Components) -->
 1377    { uri_data(path, Components, Path0), nonvar(Path0),
 1378      (   Path0 == ''
 1379      ->  Path = (/)
 1380      ;   Path = Path0
 1381      )
 1382    },
 1383    !,
 1384    [ path(Path)
 1385    ].
 1386uri_path(_) --> [].
 1387
 1388uri_authority(Components) -->
 1389    { uri_data(authority, Components, Auth), nonvar(Auth),
 1390      !,
 1391      uri_authority_components(Auth, Data)
 1392    },
 1393    [ authority(Auth) ],
 1394    auth_field(user, Data),
 1395    auth_field(password, Data),
 1396    auth_field(host, Data),
 1397    auth_field(port, Data).
 1398uri_authority(_) --> [].
 1399
 1400auth_field(Field, Data) -->
 1401    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1402      !,
 1403      (   atom(EncValue)
 1404      ->  uri_encoded(query_value, Value, EncValue)
 1405      ;   Value = EncValue
 1406      ),
 1407      Part =.. [Field,Value]
 1408    },
 1409    [ Part ].
 1410auth_field(_, _) --> [].
 1411
 1412uri_request_uri(Components) -->
 1413    { uri_data(path, Components, Path0),
 1414      uri_data(search, Components, Search),
 1415      (   Path0 == ''
 1416      ->  Path = (/)
 1417      ;   Path = Path0
 1418      ),
 1419      uri_data(path, Components2, Path),
 1420      uri_data(search, Components2, Search),
 1421      uri_components(RequestURI, Components2)
 1422    },
 1423    [ request_uri(RequestURI)
 1424    ].
 parts_scheme(+Parts, -Scheme) is det
 parts_uri(+Parts, -URI) is det
 parts_request_uri(+Parts, -RequestURI) is det
 parts_search(+Parts, -Search) is det
 parts_authority(+Parts, -Authority) is semidet
 1432parts_scheme(Parts, Scheme) :-
 1433    url_part(scheme(Scheme), Parts),
 1434    !.
 1435parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1436    url_part(protocol(Scheme), Parts),
 1437    !.
 1438parts_scheme(_, http).
 1439
 1440parts_authority(Parts, Auth) :-
 1441    url_part(authority(Auth), Parts),
 1442    !.
 1443parts_authority(Parts, Auth) :-
 1444    url_part(host(Host), Parts, _),
 1445    url_part(port(Port), Parts, _),
 1446    url_part(user(User), Parts, _),
 1447    url_part(password(Password), Parts, _),
 1448    uri_authority_components(Auth,
 1449                             uri_authority(User, Password, Host, Port)).
 1450
 1451parts_request_uri(Parts, RequestURI) :-
 1452    option(request_uri(RequestURI), Parts),
 1453    !.
 1454parts_request_uri(Parts, RequestURI) :-
 1455    url_part(path(Path), Parts, /),
 1456    ignore(parts_search(Parts, Search)),
 1457    uri_data(path, Data, Path),
 1458    uri_data(search, Data, Search),
 1459    uri_components(RequestURI, Data).
 1460
 1461parts_search(Parts, Search) :-
 1462    option(query_string(Search), Parts),
 1463    !.
 1464parts_search(Parts, Search) :-
 1465    option(search(Fields), Parts),
 1466    !,
 1467    uri_query_components(Search, Fields).
 1468
 1469
 1470parts_uri(Parts, URI) :-
 1471    option(uri(URI), Parts),
 1472    !.
 1473parts_uri(Parts, URI) :-
 1474    parts_scheme(Parts, Scheme),
 1475    ignore(parts_authority(Parts, Auth)),
 1476    parts_request_uri(Parts, RequestURI),
 1477    uri_components(RequestURI, Data),
 1478    uri_data(scheme, Data, Scheme),
 1479    uri_data(authority, Data, Auth),
 1480    uri_components(URI, Data).
 1481
 1482parts_port(Parts, Port) :-
 1483    parts_scheme(Parts, Scheme),
 1484    default_port(Scheme, DefPort),
 1485    url_part(port(Port), Parts, DefPort).
 1486
 1487url_part(Part, Parts) :-
 1488    Part =.. [Name,Value],
 1489    Gen =.. [Name,RawValue],
 1490    option(Gen, Parts),
 1491    !,
 1492    Value = RawValue.
 1493
 1494url_part(Part, Parts, Default) :-
 1495    Part =.. [Name,Value],
 1496    Gen =.. [Name,RawValue],
 1497    (   option(Gen, Parts)
 1498    ->  Value = RawValue
 1499    ;   Value = Default
 1500    ).
 1501
 1502
 1503                 /*******************************
 1504                 *            COOKIES           *
 1505                 *******************************/
 1506
 1507write_cookies(Out, Parts, Options) :-
 1508    http:write_cookies(Out, Parts, Options),
 1509    !.
 1510write_cookies(_, _, _).
 1511
 1512update_cookies(_, _, _) :-
 1513    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1514    !.
 1515update_cookies(Lines, Parts, Options) :-
 1516    (   member(Line, Lines),
 1517        phrase(atom_field('set_cookie', CookieData), Line),
 1518        http:update_cookies(CookieData, Parts, Options),
 1519        fail
 1520    ;   true
 1521    ).
 1522
 1523
 1524                 /*******************************
 1525                 *           OPEN ANY           *
 1526                 *******************************/
 1527
 1528:- multifile iostream:open_hook/6.
 iostream:open_hook(+Spec, +Mode, -Stream, -Close, +Options0, -Options) is semidet
Hook implementation that makes open_any/5 support http and https URLs for Mode == read.
 1536iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1537    (atom(URL) -> true ; string(URL)),
 1538    uri_is_global(URL),
 1539    uri_components(URL, Components),
 1540    uri_data(scheme, Components, Scheme),
 1541    http_scheme(Scheme),
 1542    !,
 1543    Options = Options0,
 1544    Close = close(Stream),
 1545    http_open(URL, Stream, Options0).
 1546
 1547http_scheme(http).
 1548http_scheme(https).
 1549
 1550
 1551                 /*******************************
 1552                 *          KEEP-ALIVE          *
 1553                 *******************************/
 consider_keep_alive(+HeaderLines, +Parts, +Host, +Stream0, -Stream, +Options) is det
If we have agree on a Keep-alive connection, return a range stream rather than the original stream. We also use the content length and a range stream if we are dealing with an HTTPS connection. This is because not all servers seem to complete the TLS closing handshake. If the server does not complete this we receive a TLS handshake error on end-of-file, causing the read to fail.
 1566consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1567    option(connection(Asked), Options),
 1568    keep_alive(Asked),
 1569    connection(Lines, Given),
 1570    keep_alive(Given),
 1571    content_length(Lines, Bytes),
 1572    !,
 1573    stream_pair(StreamPair, In0, _),
 1574    connection_address(Host, Parts, HostPort),
 1575    debug(http(connection),
 1576          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1577    stream_range_open(In0, In,
 1578                      [ size(Bytes),
 1579                        onclose(keep_alive(StreamPair, HostPort))
 1580                      ]).
 1581consider_keep_alive(Lines, Parts, _Host, StreamPair, In, _Options) :-
 1582    memberchk(scheme(https), Parts),
 1583    content_length(Lines, Bytes),
 1584    !,
 1585    stream_pair(StreamPair, In0, _),
 1586    stream_range_open(In0, In,
 1587                      [ size(Bytes),
 1588                        onclose(close_range(StreamPair))
 1589                      ]).
 1590consider_keep_alive(_, _, _, Stream, Stream, _).
 1591
 1592connection_address(Host, _, Host) :-
 1593    Host = _:_,
 1594    !.
 1595connection_address(Host, Parts, Host:Port) :-
 1596    parts_port(Parts, Port).
 1597
 1598keep_alive(keep_alive) :- !.
 1599keep_alive(Connection) :-
 1600    downcase_atom(Connection, 'keep-alive').
 1601
 1602:- public keep_alive/4. 1603
 1604keep_alive(StreamPair, Host, _In, 0) :-
 1605    !,
 1606    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1607    add_to_pool(Host, StreamPair).
 1608keep_alive(StreamPair, Host, In, Left) :-
 1609    Left < 100,
 1610    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1611    read_incomplete(In, Left),
 1612    add_to_pool(Host, StreamPair),
 1613    !.
 1614keep_alive(StreamPair, _, _, _) :-
 1615    debug(http(connection),
 1616          'Closing connection due to excessive unprocessed input', []),
 1617    (   debugging(http(connection))
 1618    ->  catch(close(StreamPair), E,
 1619              print_message(warning, E))
 1620    ;   close(StreamPair, [force(true)])
 1621    ).
 1622
 1623:- public close_range/3. 1624close_range(StreamPair, _Raw, _BytesLeft) :-
 1625    close(StreamPair, [force(true)]).
 read_incomplete(+In, +Left) is semidet
If we have not all input from a Keep-alive connection, read the remainder if it is short. Else, we fail and close the stream.
 1632read_incomplete(In, Left) :-
 1633    catch(setup_call_cleanup(
 1634              open_null_stream(Null),
 1635              copy_stream_data(In, Null, Left),
 1636              close(Null)),
 1637          _,
 1638          fail).
 1639
 1640:- dynamic
 1641    connection_pool/4,              % Hash, Address, Stream, Time
 1642    connection_gc_time/1. 1643
 1644add_to_pool(Address, StreamPair) :-
 1645    keep_connection(Address),
 1646    get_time(Now),
 1647    term_hash(Address, Hash),
 1648    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1649
 1650get_from_pool(Address, StreamPair) :-
 1651    term_hash(Address, Hash),
 1652    retract(connection_pool(Hash, Address, StreamPair, _)).
 keep_connection(+Address) is semidet
Succeeds if we want to keep the connection open. We currently keep a maximum of 10 connections waiting and a maximum of 2 waiting for the same address. Connections older than 2 seconds are closed.
 1661keep_connection(Address) :-
 1662    close_old_connections(2),
 1663    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1664    C =< 10,
 1665    term_hash(Address, Hash),
 1666    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1667    Count =< 2.
 1668
 1669close_old_connections(Timeout) :-
 1670    get_time(Now),
 1671    Before is Now - Timeout,
 1672    (   connection_gc_time(GC),
 1673        GC > Before
 1674    ->  true
 1675    ;   (   retractall(connection_gc_time(_)),
 1676            asserta(connection_gc_time(Now)),
 1677            connection_pool(Hash, Address, StreamPair, Added),
 1678            Added < Before,
 1679            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1680            debug(http(connection),
 1681                  'Closing inactive keep-alive to ~p', [Address]),
 1682            close(StreamPair, [force(true)]),
 1683            fail
 1684        ;   true
 1685        )
 1686    ).
 http_close_keep_alive(+Address) is det
Close all keep-alive connections matching Address. Address is of the form Host:Port. In particular, http_close_keep_alive(_) closes all currently known keep-alive connections.
 1695http_close_keep_alive(Address) :-
 1696    forall(get_from_pool(Address, StreamPair),
 1697           close(StreamPair, [force(true)])).
 keep_alive_error(+Error, +StreamPair)
Deal with an error from reusing a keep-alive connection. If the error is due to an I/O error or end-of-file, fail to backtrack over get_from_pool/2. Otherwise it is a real error and we thus re-raise it. In all cases we close StreamPair rather than returning it to the pool as we may have done a partial read and thus be out of sync wrt. the HTTP protocol.
 1708keep_alive_error(error(keep_alive(closed), _), _) :-
 1709    !,
 1710    debug(http(connection), 'Keep-alive connection was closed', []),
 1711    fail.
 1712keep_alive_error(error(io_error(_,_), _), StreamPair) :-
 1713    !,
 1714    close(StreamPair, [force(true)]),
 1715    debug(http(connection), 'IO error on Keep-alive connection', []),
 1716    fail.
 1717keep_alive_error(error(existence_error(http_reply, _URL), _), _) :-
 1718    !,
 1719    debug(http(connection), 'Got empty reply on Keep-alive connection', []),
 1720    fail.
 1721keep_alive_error(Error, StreamPair) :-
 1722    close(StreamPair, [force(true)]),
 1723    throw(Error).
 1724
 1725
 1726                 /*******************************
 1727                 *     HOOK DOCUMENTATION       *
 1728                 *******************************/
 http:open_options(+Parts, -Options) is nondet
This hook is used by the HTTP client library to define default options based on the the broken-down request-URL. The following example redirects all trafic, except for localhost over a proxy:
:- multifile
    http:open_options/2.

http:open_options(Parts, Options) :-
    option(host(Host), Parts),
    Host \== localhost,
    Options = [proxy('proxy.local', 3128)].

This hook may return multiple solutions. The returned options are combined using merge_options/3 where earlier solutions overrule later solutions.

 http:write_cookies(+Out, +Parts, +Options) is semidet
Emit a Cookie: header for the current connection. Out is an open stream to the HTTP server, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open. The predicate is called as if using ignore/1.
See also
- complements http:update_cookies/3.
- library(http/http_cookie) implements cookie handling on top of these hooks.
 http:update_cookies(+CookieData, +Parts, +Options) is semidet
Update the cookie database. CookieData is the value of the Set-Cookie field, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open.
See also
- complements http:write_cookies
- library(http/http_cookies) implements cookie handling on top of these hooks.