View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(http_open,
   38          [ http_open/3,                % +URL, -Stream, +Options
   39            http_set_authorization/2,   % +URL, +Authorization
   40            http_close_keep_alive/1     % +Address
   41          ]).   42:- autoload(library(aggregate),[aggregate_all/3]).   43:- autoload(library(apply),[foldl/4,include/3]).   44:- autoload(library(base64),[base64/3]).   45:- autoload(library(debug),[debug/3,debugging/1]).   46:- autoload(library(error),
   47	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   48	    ]).   49:- autoload(library(lists),[last/2,member/2]).   50:- autoload(library(option),
   51	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   52	      option/3, select_option/3
   53	    ]).   54:- autoload(library(readutil),[read_line_to_codes/2]).   55:- autoload(library(socket),
   56	    [tcp_connect/3,negotiate_socks_connection/2]).   57:- autoload(library(uri),
   58	    [ uri_resolve/3, uri_components/2, uri_data/3,
   59              uri_authority_components/2, uri_authority_data/3,
   60	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   61	    ]).   62:- autoload(library(http/http_header),
   63            [ http_parse_header/2, http_post_data/3 ]).   64:- autoload(library(http/http_stream),[stream_range_open/3]).   65:- if(( exists_source(library(ssl)),
   66        \+ current_prolog_flag(pldoc_to_tex,true))).   67:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   68:- endif.   69
   70/** <module> HTTP client library
   71
   72This library defines http_open/3, which opens a  URL as a Prolog stream.
   73The functionality of the  library  can   be  extended  by  loading two
   74additional modules that act as plugins:
   75
   76    * library(http/http_ssl_plugin)
   77    Loading this library causes http_open/3 to handle HTTPS connections.
   78    Relevant options for SSL certificate handling are handed to
   79    ssl_context/3. This plugin is loaded automatically if the scheme
   80    `https` is requested using a default SSL context. See the plugin for
   81    additional information regarding security.
   82
   83    * library(http/http_cookie)
   84    Loading this library adds tracking cookies to http_open/3. Returned
   85    cookies are collected in the Prolog database and supplied for
   86    subsequent requests.
   87
   88Here is a simple example to fetch a web-page:
   89
   90  ==
   91  ?- http_open('http://www.google.com/search?q=prolog', In, []),
   92     copy_stream_data(In, user_output),
   93     close(In).
   94  <!doctype html><head><title>prolog - Google Search</title><script>
   95  ...
   96  ==
   97
   98The example below fetches the modification time of a web-page. Note that
   99Modified is '' (the empty atom)  if   the  web-server does not provide a
  100time-stamp for the resource. See also parse_time/2.
  101
  102  ==
  103  modified(URL, Stamp) :-
  104          http_open(URL, In,
  105                    [ method(head),
  106                      header(last_modified, Modified)
  107                    ]),
  108          close(In),
  109          Modified \== '',
  110          parse_time(Modified, Stamp).
  111  ==
  112
  113Then next example uses Google search. It exploits library(uri) to manage
  114URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  115navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  116queries if the data returned by Google changes.
  117
  118  ==
  119  :- use_module(library(http/http_open)).
  120  :- use_module(library(xpath)).
  121  :- use_module(library(sgml)).
  122  :- use_module(library(uri)).
  123
  124  google(For, Title, HREF) :-
  125          uri_encoded(query_value, For, Encoded),
  126          atom_concat('http://www.google.com/search?q=', Encoded, URL),
  127          http_open(URL, In, []),
  128          call_cleanup(
  129              load_html(In, DOM, []),
  130              close(In)),
  131          xpath(DOM, //h3(@class=r), Result),
  132          xpath(Result, //a(@href=HREF0, text), Title),
  133          uri_components(HREF0, Components),
  134          uri_data(search, Components, Query),
  135          uri_query_components(Query, Parts),
  136          memberchk(q=HREF, Parts).
  137  ==
  138
  139An example query is below:
  140
  141==
  142?- google(prolog, Title, HREF).
  143Title = 'SWI-Prolog',
  144HREF = 'http://www.swi-prolog.org/' ;
  145Title = 'Prolog - Wikipedia',
  146HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  147Title = 'Prolog - Wikipedia, the free encyclopedia',
  148HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  149Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  150HREF = 'http://www.pro-log.nl/' ;
  151Title = 'Learn Prolog Now!',
  152HREF = 'http://www.learnprolognow.org/' ;
  153Title = 'Free Online Version - Learn Prolog
  154...
  155==
  156
  157@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  158     documents.
  159@see http_get/3 and http_post/4 provide an alternative interface that
  160     convert the reply depending on the =|Content-Type|= header.
  161*/
  162
  163:- multifile
  164    http:encoding_filter/3,           % +Encoding, +In0, -In
  165    http:current_transfer_encoding/1, % ?Encoding
  166    http:disable_encoding_filter/1,   % +ContentType
  167    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  168                                      % -NewStreamPair, +Options
  169    http:open_options/2,              % +Parts, -Options
  170    http:write_cookies/3,             % +Out, +Parts, +Options
  171    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  172    http:authenticate_client/2,       % +URL, +Action
  173    http:http_connection_over_proxy/6.  174
  175:- meta_predicate
  176    http_open(+,-,:).  177
  178:- predicate_options(http_open/3, 3,
  179                     [ authorization(compound),
  180                       final_url(-atom),
  181                       header(+atom, -atom),
  182                       headers(-list),
  183                       connection(+atom),
  184                       method(oneof([delete,get,put,head,post,patch,options])),
  185                       size(-integer),
  186                       status_code(-integer),
  187                       output(-stream),
  188                       timeout(number),
  189                       proxy(atom, integer),
  190                       proxy_authorization(compound),
  191                       bypass_proxy(boolean),
  192                       request_header(any),
  193                       user_agent(atom),
  194                       version(-compound),
  195        % The option below applies if library(http/http_header) is loaded
  196                       post(any),
  197        % The options below apply if library(http/http_ssl_plugin)) is loaded
  198                       pem_password_hook(callable),
  199                       cacert_file(atom),
  200                       cert_verify_hook(callable)
  201                     ]).  202
  203%!  user_agent(-Agent) is det.
  204%
  205%   Default value for =|User-Agent|=,  can   be  overruled using the
  206%   option user_agent(Agent) of http_open/3.
  207
  208user_agent('SWI-Prolog').
  209
  210%!  http_open(+URL, -Stream, +Options) is det.
  211%
  212%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  213%   either an atom  specifying  a  URL   or  a  list  representing a
  214%   broken-down  URL  as  specified  below.   After  this  predicate
  215%   succeeds the data can be read from Stream. After completion this
  216%   stream must be  closed  using   the  built-in  Prolog  predicate
  217%   close/1. Options provides additional options:
  218%
  219%     * authenticate(+Boolean)
  220%     If `false` (default `true`), do _not_ try to automatically
  221%     authenticate the client if a 401 (Unauthorized) status code
  222%     is received.
  223%
  224%     * authorization(+Term)
  225%     Send authorization. See also http_set_authorization/2. Supported
  226%     schemes:
  227%
  228%       - basic(+User, +Password)
  229%       HTTP Basic authentication.
  230%       - bearer(+Token)
  231%       HTTP Bearer authentication.
  232%       - digest(+User, +Password)
  233%       HTTP Digest authentication.  This option is only provided
  234%       if the plugin library(http/http_digest) is also loaded.
  235%
  236%     * connection(+Connection)
  237%     Specify the =Connection= header.  Default is =close=.  The
  238%     alternative is =|Keep-alive|=.  This maintains a pool of
  239%     available connections as determined by keep_connection/1.
  240%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  241%     Keep-alive connections can be closed explicitly using
  242%     http_close_keep_alive/1. Keep-alive connections may
  243%     significantly improve repetitive requests on the same server,
  244%     especially if the IP route is long, HTTPS is used or the
  245%     connection uses a proxy.
  246%
  247%     * final_url(-FinalURL)
  248%     Unify FinalURL with the final   destination. This differs from
  249%     the  original  URL  if  the  returned  head  of  the  original
  250%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  251%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  252%     URL constructed from the parts.
  253%
  254%     * header(Name, -AtomValue)
  255%     If provided, AtomValue is  unified  with   the  value  of  the
  256%     indicated  field  in  the  reply    header.  Name  is  matched
  257%     case-insensitive and the underscore  (_)   matches  the hyphen
  258%     (-). Multiple of these options  may   be  provided  to extract
  259%     multiple  header  fields.  If  the  header  is  not  available
  260%     AtomValue is unified to the empty atom ('').
  261%
  262%     * headers(-List)
  263%     If provided, List is unified with  a list of Name(Value) pairs
  264%     corresponding to fields in the reply   header.  Name and Value
  265%     follow the same conventions  used   by  the header(Name,Value)
  266%     option.
  267%
  268%     * method(+Method)
  269%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  270%     =patch=.
  271%     The  =head= message can be
  272%     used in combination with  the   header(Name,  Value) option to
  273%     access information on the resource   without actually fetching
  274%     the resource itself.  The  returned   stream  must  be  closed
  275%     immediately.
  276%
  277%     If post(Data) is provided, the default is =post=.
  278%
  279%     * size(-Size)
  280%     Size is unified with the   integer value of =|Content-Length|=
  281%     in the reply header.
  282%
  283%     * version(-Version)
  284%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  285%     are integers representing the HTTP version in the reply header.
  286%
  287%     * range(+Range)
  288%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  289%     where `From` is an integer and `To`   is  either an integer or
  290%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  291%     to   ask   for    bytes    1000-1999,     use    the    option
  292%     range(bytes(1000,1999))
  293%
  294%     * redirect(+Boolean)
  295%     If `false` (default `true`), do _not_ automatically redirect
  296%     if a 3XX code is received.  Must be combined with
  297%     status_code(Code) and one of the header options to read the
  298%     redirect reply. In particular, without status_code(Code) a
  299%     redirect is mapped to an exception.
  300%
  301%     * status_code(-Code)
  302%     If this option is  present  and   Code  unifies  with the HTTP
  303%     status code, do *not* translate errors (4xx, 5xx) into an
  304%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  305%     returned, providing the application to read the error document
  306%     from the returned stream.
  307%
  308%     * output(-Out)
  309%     Unify the output stream with Out and do not close it. This can
  310%     be used to upgrade a connection.
  311%
  312%     * timeout(+Timeout)
  313%     If provided, set a timeout on   the stream using set_stream/2.
  314%     With this option if no new data arrives within Timeout seconds
  315%     the stream raises an exception.  Default   is  to wait forever
  316%     (=infinite=).
  317%
  318%     * post(+Data)
  319%     Issue a =POST= request on the HTTP server.  Data is
  320%     handed to http_post_data/3.
  321%
  322%     * proxy(+Host:Port)
  323%     Use an HTTP proxy to connect to the outside world.  See also
  324%     socket:proxy_for_url/3.  This option overrules the proxy
  325%     specification defined by socket:proxy_for_url/3.
  326%
  327%     * proxy(+Host, +Port)
  328%     Synonym for proxy(+Host:Port).  Deprecated.
  329%
  330%     * proxy_authorization(+Authorization)
  331%     Send authorization to the proxy.  Otherwise   the  same as the
  332%     =authorization= option.
  333%
  334%     * bypass_proxy(+Boolean)
  335%     If =true=, bypass proxy hooks.  Default is =false=.
  336%
  337%     * request_header(Name = Value)
  338%     Additional  name-value  parts  are  added   in  the  order  of
  339%     appearance to the HTTP request   header.  No interpretation is
  340%     done.
  341%
  342%     * max_redirect(+Max)
  343%     Sets the maximum length of a redirection chain.  This is needed
  344%     for some IRIs that redirect indefinitely to other IRIs without
  345%     looping (e.g., redirecting to IRIs with a random element in them).
  346%     Max must be either a non-negative integer or the atom `infinite`.
  347%     The default value is `10`.
  348%
  349%     * user_agent(+Agent)
  350%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  351%     header. Default is =SWI-Prolog=.
  352%
  353%   The hook http:open_options/2 can  be   used  to  provide default
  354%   options   based   on   the   broken-down     URL.   The   option
  355%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  356%   interfaces that commonly return status   codes  other than `200`
  357%   that need to be be processed by the client code.
  358%
  359%   @param URL is either an atom or string (url) or a list of _parts_.
  360%
  361%               When provided, this list may contain the fields
  362%               =scheme=, =user=, =password=, =host=, =port=, =path=
  363%               and either =query_string= (whose argument is an atom)
  364%               or =search= (whose argument is a list of
  365%               =|Name(Value)|= or =|Name=Value|= compound terms).
  366%               Only =host= is mandatory.  The example below opens the
  367%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  368%               Note that values must *not* be quoted because the
  369%               library inserts the required quotes.
  370%
  371%               ==
  372%               http_open([ host('www.example.com'),
  373%                           path('/my/path'),
  374%                           search([ q='Hello world',
  375%                                    lang=en
  376%                                  ])
  377%                         ])
  378%               ==
  379%
  380%   @throws error(existence_error(url, Id),Context) is raised if the
  381%   HTTP result code is not in the range 200..299. Context has the
  382%   shape context(Message, status(Code, TextCode)), where `Code` is the
  383%   numeric HTTP code and `TextCode` is the textual description thereof
  384%   provided by the server. `Message` may provide additional details or
  385%   may be unbound.
  386%
  387%   @see ssl_context/3 for SSL related options if
  388%   library(http/http_ssl_plugin) is loaded.
  389
  390:- multifile
  391    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  392
  393http_open(URL, Stream, QOptions) :-
  394    meta_options(is_meta, QOptions, Options0),
  395    (   atomic(URL)
  396    ->  parse_url_ex(URL, Parts)
  397    ;   Parts = URL
  398    ),
  399    autoload_https(Parts),
  400    upgrade_ssl_options(Parts, Options0, Options),
  401    add_authorization(Parts, Options, Options1),
  402    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  403    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  404    (   option(bypass_proxy(true), Options)
  405    ->  try_http_proxy(direct, Parts, Stream, Options2)
  406    ;   term_variables(Options2, Vars2),
  407        findall(Result-Vars2,
  408                try_a_proxy(Parts, Result, Options2),
  409                ResultList),
  410        last(ResultList, Status-Vars2)
  411    ->  (   Status = true(_Proxy, Stream)
  412        ->  true
  413        ;   throw(error(proxy_error(tried(ResultList)), _))
  414        )
  415    ;   try_http_proxy(direct, Parts, Stream, Options2)
  416    ).
  417
  418try_a_proxy(Parts, Result, Options) :-
  419    parts_uri(Parts, AtomicURL),
  420    option(host(Host), Parts),
  421    (   (   option(proxy(ProxyHost:ProxyPort), Options)
  422        ;   is_list(Options),
  423            memberchk(proxy(ProxyHost,ProxyPort), Options)
  424        )
  425    ->  Proxy = proxy(ProxyHost, ProxyPort)
  426    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  427    ),
  428    debug(http(proxy),
  429          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  430    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  431    ->  (   var(E)
  432        ->  !, Result = true(Proxy, Stream)
  433        ;   Result = error(Proxy, E)
  434        )
  435    ;   Result = false(Proxy)
  436    ),
  437    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  438
  439try_http_proxy(Method, Parts, Stream, Options0) :-
  440    option(host(Host), Parts),
  441    (   Method == direct
  442    ->  parts_request_uri(Parts, RequestURI)
  443    ;   parts_uri(Parts, RequestURI)
  444    ),
  445    select_option(visited(Visited0), Options0, OptionsV, []),
  446    Options = [visited([Parts|Visited0])|OptionsV],
  447    parts_scheme(Parts, Scheme),
  448    default_port(Scheme, DefPort),
  449    url_part(port(Port), Parts, DefPort),
  450    host_and_port(Host, DefPort, Port, HostPort),
  451    (   option(connection(Connection), Options0),
  452        keep_alive(Connection),
  453        get_from_pool(Host:Port, StreamPair),
  454        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  455              [ Host:Port, StreamPair ]),
  456        catch(send_rec_header(StreamPair, Stream, HostPort,
  457                              RequestURI, Parts, Options),
  458              error(E,_),
  459              keep_alive_error(E))
  460    ->  true
  461    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  462                                        SocketStreamPair, Options, Options1),
  463        (   catch(http:http_protocol_hook(Scheme, Parts,
  464                                          SocketStreamPair,
  465                                          StreamPair, Options),
  466                  Error,
  467                  ( close(SocketStreamPair, [force(true)]),
  468                    throw(Error)))
  469        ->  true
  470        ;   StreamPair = SocketStreamPair
  471        ),
  472        send_rec_header(StreamPair, Stream, HostPort,
  473                        RequestURI, Parts, Options1)
  474    ),
  475    return_final_url(Options).
  476
  477http:http_connection_over_proxy(direct, _, Host:Port,
  478                                StreamPair, Options, Options) :-
  479    !,
  480    open_socket(Host:Port, StreamPair, Options).
  481http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  482                                StreamPair, Options, Options) :-
  483    \+ ( memberchk(scheme(Scheme), Parts),
  484         secure_scheme(Scheme)
  485       ),
  486    !,
  487    % We do not want any /more/ proxy after this
  488    open_socket(ProxyHost:ProxyPort, StreamPair,
  489                [bypass_proxy(true)|Options]).
  490http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  491                                StreamPair, Options, Options) :-
  492    !,
  493    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  494    catch(negotiate_socks_connection(Host:Port, StreamPair),
  495          Error,
  496          ( close(StreamPair, [force(true)]),
  497            throw(Error)
  498          )).
  499
  500%!  hooked_options(+Parts, -Options) is nondet.
  501%
  502%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  503%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  504%   merging of options.
  505
  506hooked_options(Parts, Options) :-
  507    http:open_options(Parts, Options0),
  508    upgrade_ssl_options(Parts, Options0, Options).
  509
  510:- if(current_predicate(ssl_upgrade_legacy_options/2)).  511upgrade_ssl_options(Parts, Options0, Options) :-
  512    requires_ssl(Parts),
  513    !,
  514    ssl_upgrade_legacy_options(Options0, Options).
  515:- endif.  516upgrade_ssl_options(_, Options, Options).
  517
  518merge_options_rev(Old, New, Merged) :-
  519    merge_options(New, Old, Merged).
  520
  521is_meta(pem_password_hook).             % SSL plugin callbacks
  522is_meta(cert_verify_hook).
  523
  524
  525http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  526
  527default_port(https, 443) :- !.
  528default_port(wss,   443) :- !.
  529default_port(_,     80).
  530
  531host_and_port(Host, DefPort, DefPort, Host) :- !.
  532host_and_port(Host, _,       Port,    Host:Port).
  533
  534%!  autoload_https(+Parts) is det.
  535%
  536%   If the requested scheme is https or wss, load the HTTPS plugin.
  537
  538autoload_https(Parts) :-
  539    requires_ssl(Parts),
  540    memberchk(scheme(S), Parts),
  541    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  542    exists_source(library(http/http_ssl_plugin)),
  543    !,
  544    use_module(library(http/http_ssl_plugin)).
  545autoload_https(_).
  546
  547requires_ssl(Parts) :-
  548    memberchk(scheme(S), Parts),
  549    secure_scheme(S).
  550
  551secure_scheme(https).
  552secure_scheme(wss).
  553
  554%!  send_rec_header(+StreamPair, -Stream,
  555%!                  +Host, +RequestURI, +Parts, +Options) is det.
  556%
  557%   Send header to Out and process reply.  If there is an error or
  558%   failure, close In and Out and return the error or failure.
  559
  560send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  561    (   catch(guarded_send_rec_header(StreamPair, Stream,
  562                                      Host, RequestURI, Parts, Options),
  563              E, true)
  564    ->  (   var(E)
  565        ->  (   option(output(StreamPair), Options)
  566            ->  true
  567            ;   true
  568            )
  569        ;   close(StreamPair, [force(true)]),
  570            throw(E)
  571        )
  572    ;   close(StreamPair, [force(true)]),
  573        fail
  574    ).
  575
  576guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  577    user_agent(Agent, Options),
  578    method(Options, MNAME),
  579    http_version(Version),
  580    option(connection(Connection), Options, close),
  581    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  582    debug(http(send_request), "> Host: ~w", [Host]),
  583    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  584    debug(http(send_request), "> Connection: ~w", [Connection]),
  585    format(StreamPair,
  586           '~w ~w HTTP/~w\r\n\c
  587               Host: ~w\r\n\c
  588               User-Agent: ~w\r\n\c
  589               Connection: ~w\r\n',
  590           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  591    parts_uri(Parts, URI),
  592    x_headers(Options, URI, StreamPair),
  593    write_cookies(StreamPair, Parts, Options),
  594    (   option(post(PostData), Options)
  595    ->  http_post_data(PostData, StreamPair, [])
  596    ;   format(StreamPair, '\r\n', [])
  597    ),
  598    flush_output(StreamPair),
  599                                    % read the reply header
  600    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  601    update_cookies(Lines, Parts, Options),
  602    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  603            StreamPair, Stream).
  604
  605
  606%!  http_version(-Version:atom) is det.
  607%
  608%   HTTP version we publish. We  can  only   use  1.1  if we support
  609%   chunked encoding.
  610
  611http_version('1.1') :-
  612    http:current_transfer_encoding(chunked),
  613    !.
  614http_version('1.0').
  615
  616method(Options, MNAME) :-
  617    option(post(_), Options),
  618    !,
  619    option(method(M), Options, post),
  620    (   map_method(M, MNAME0)
  621    ->  MNAME = MNAME0
  622    ;   domain_error(method, M)
  623    ).
  624method(Options, MNAME) :-
  625    option(method(M), Options, get),
  626    (   map_method(M, MNAME0)
  627    ->  MNAME = MNAME0
  628    ;   map_method(_, M)
  629    ->  MNAME = M
  630    ;   domain_error(method, M)
  631    ).
  632
  633map_method(delete,  'DELETE').
  634map_method(get,     'GET').
  635map_method(head,    'HEAD').
  636map_method(post,    'POST').
  637map_method(put,     'PUT').
  638map_method(patch,   'PATCH').
  639map_method(options, 'OPTIONS').
  640
  641%!  x_headers(+Options, +URI, +Out) is det.
  642%
  643%   Emit extra headers from   request_header(Name=Value)  options in
  644%   Options.
  645%
  646%   @tbd Use user/password fields
  647
  648x_headers(Options, URI, Out) :-
  649    x_headers_(Options, [url(URI)|Options], Out).
  650
  651x_headers_([], _, _).
  652x_headers_([H|T], Options, Out) :-
  653    x_header(H, Options, Out),
  654    x_headers_(T, Options, Out).
  655
  656x_header(request_header(Name=Value), _, Out) :-
  657    !,
  658    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  659    format(Out, '~w: ~w\r\n', [Name, Value]).
  660x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  661    !,
  662    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  663x_header(authorization(Authorization), Options, Out) :-
  664    !,
  665    auth_header(Authorization, Options, 'Authorization', Out).
  666x_header(range(Spec), _, Out) :-
  667    !,
  668    Spec =.. [Unit, From, To],
  669    (   To == end
  670    ->  ToT = ''
  671    ;   must_be(integer, To),
  672        ToT = To
  673    ),
  674    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  675    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  676x_header(_, _, _).
  677
  678%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  679
  680auth_header(basic(User, Password), _, Header, Out) :-
  681    !,
  682    format(codes(Codes), '~w:~w', [User, Password]),
  683    phrase(base64(Codes), Base64Codes),
  684    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  685    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  686auth_header(bearer(Token), _, Header, Out) :-
  687    !,
  688    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  689    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  690auth_header(Auth, Options, _, Out) :-
  691    option(url(URL), Options),
  692    add_method(Options, Options1),
  693    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  694    !.
  695auth_header(Auth, _, _, _) :-
  696    domain_error(authorization, Auth).
  697
  698user_agent(Agent, Options) :-
  699    (   option(user_agent(Agent), Options)
  700    ->  true
  701    ;   user_agent(Agent)
  702    ).
  703
  704add_method(Options0, Options) :-
  705    option(method(_), Options0),
  706    !,
  707    Options = Options0.
  708add_method(Options0, Options) :-
  709    option(post(_), Options0),
  710    !,
  711    Options = [method(post)|Options0].
  712add_method(Options0, [method(get)|Options0]).
  713
  714%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  715%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  716%
  717%   Handle the HTTP status once available. If   200-299, we are ok. If a
  718%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  719%   error.
  720%
  721%   @error  existence_error(url, URL)
  722
  723                                        % Redirections
  724do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  725    redirect_code(Code),
  726    option(redirect(true), Options0, true),
  727    location(Lines, RequestURI),
  728    !,
  729    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  730    close(In),
  731    parts_uri(Parts, Base),
  732    uri_resolve(RequestURI, Base, Redirected),
  733    parse_url_ex(Redirected, RedirectedParts),
  734    (   redirect_limit_exceeded(Options0, Max)
  735    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  736        throw(error(permission_error(redirect, http, Redirected),
  737                    context(_, Comment)))
  738    ;   redirect_loop(RedirectedParts, Options0)
  739    ->  throw(error(permission_error(redirect, http, Redirected),
  740                    context(_, 'Redirection loop')))
  741    ;   true
  742    ),
  743    redirect_options(Options0, Options),
  744    http_open(RedirectedParts, Stream, Options).
  745                                        % Need authentication
  746do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  747    authenticate_code(Code),
  748    option(authenticate(true), Options0, true),
  749    parts_uri(Parts, URI),
  750    parse_headers(Lines, Headers),
  751    http:authenticate_client(
  752             URI,
  753             auth_reponse(Headers, Options0, Options)),
  754    !,
  755    close(In0),
  756    http_open(Parts, Stream, Options).
  757                                        % Accepted codes
  758do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  759    (   option(status_code(Code), Options),
  760        Lines \== []
  761    ->  true
  762    ;   successful_code(Code)
  763    ),
  764    !,
  765    parts_uri(Parts, URI),
  766    parse_headers(Lines, Headers),
  767    return_version(Options, Version),
  768    return_size(Options, Headers),
  769    return_fields(Options, Headers),
  770    return_headers(Options, Headers),
  771    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  772    transfer_encoding_filter(Lines, In1, In),
  773                                    % properly re-initialise the stream
  774    set_stream(In, file_name(URI)),
  775    set_stream(In, record_position(true)).
  776do_open(_, _, _, [], Options, _, _, _, _) :-
  777    option(connection(Connection), Options),
  778    keep_alive(Connection),
  779    !,
  780    throw(error(keep_alive(closed),_)).
  781                                        % report anything else as error
  782do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  783    parts_uri(Parts, URI),
  784    (   map_error_code(Code, Error)
  785    ->  Formal =.. [Error, url, URI]
  786    ;   Formal = existence_error(url, URI)
  787    ),
  788    throw(error(Formal, context(_, status(Code, Comment)))).
  789
  790
  791successful_code(Code) :-
  792    between(200, 299, Code).
  793
  794%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  795%
  796%   True if we have exceeded the maximum redirection length (default 10).
  797
  798redirect_limit_exceeded(Options, Max) :-
  799    option(visited(Visited), Options, []),
  800    length(Visited, N),
  801    option(max_redirect(Max), Options, 10),
  802    (Max == infinite -> fail ; N > Max).
  803
  804
  805%!  redirect_loop(+Parts, +Options) is semidet.
  806%
  807%   True if we are in  a  redirection   loop.  Note  that some sites
  808%   redirect once to the same place using  cookies or similar, so we
  809%   allow for two tries. In fact,   we  should probably test whether
  810%   authorization or cookie headers have changed.
  811
  812redirect_loop(Parts, Options) :-
  813    option(visited(Visited), Options, []),
  814    include(==(Parts), Visited, Same),
  815    length(Same, Count),
  816    Count > 2.
  817
  818
  819%!  redirect_options(+Options0, -Options) is det.
  820%
  821%   A redirect from a POST should do a GET on the returned URI. This
  822%   means we must remove  the   method(post)  and post(Data) options
  823%   from the original option-list.
  824
  825redirect_options(Options0, Options) :-
  826    (   select_option(post(_), Options0, Options1)
  827    ->  true
  828    ;   Options1 = Options0
  829    ),
  830    (   select_option(method(Method), Options1, Options),
  831        \+ redirect_method(Method)
  832    ->  true
  833    ;   Options = Options1
  834    ).
  835
  836redirect_method(delete).
  837redirect_method(get).
  838redirect_method(head).
  839
  840
  841%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  842%
  843%   Map HTTP error codes to Prolog errors.
  844%
  845%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  846%           counterpart.
  847
  848map_error_code(401, permission_error).
  849map_error_code(403, permission_error).
  850map_error_code(404, existence_error).
  851map_error_code(405, permission_error).
  852map_error_code(407, permission_error).
  853map_error_code(410, existence_error).
  854
  855redirect_code(301).                     % Moved Permanently
  856redirect_code(302).                     % Found (previously "Moved Temporary")
  857redirect_code(303).                     % See Other
  858redirect_code(307).                     % Temporary Redirect
  859
  860authenticate_code(401).
  861
  862%!  open_socket(+Address, -StreamPair, +Options) is det.
  863%
  864%   Create and connect a client socket to Address.  Options
  865%
  866%       * timeout(+Timeout)
  867%       Sets timeout on the stream, *after* connecting the
  868%       socket.
  869%
  870%   @tbd    Make timeout also work on tcp_connect/4.
  871%   @tbd    This is the same as do_connect/4 in http_client.pl
  872
  873open_socket(Address, StreamPair, Options) :-
  874    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  875    tcp_connect(Address, StreamPair, Options),
  876    stream_pair(StreamPair, In, Out),
  877    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  878    set_stream(In, record_position(false)),
  879    (   option(timeout(Timeout), Options)
  880    ->  set_stream(In, timeout(Timeout))
  881    ;   true
  882    ).
  883
  884
  885return_version(Options, Major-Minor) :-
  886    option(version(Major-Minor), Options, _).
  887
  888return_size(Options, Headers) :-
  889    (   memberchk(content_length(Size), Headers)
  890    ->  option(size(Size), Options, _)
  891    ;   true
  892    ).
  893
  894return_fields([], _).
  895return_fields([header(Name, Value)|T], Headers) :-
  896    !,
  897    (   Term =.. [Name,Value],
  898        memberchk(Term, Headers)
  899    ->  true
  900    ;   Value = ''
  901    ),
  902    return_fields(T, Headers).
  903return_fields([_|T], Lines) :-
  904    return_fields(T, Lines).
  905
  906return_headers(Options, Headers) :-
  907    option(headers(Headers), Options, _).
  908
  909%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  910%
  911%   Parse the header lines for   the  headers(-List) option. Invalid
  912%   header   lines   are   skipped,   printing   a   warning   using
  913%   pring_message/2.
  914
  915parse_headers([], []) :- !.
  916parse_headers([Line|Lines], Headers) :-
  917    catch(http_parse_header(Line, [Header]), Error, true),
  918    (   var(Error)
  919    ->  Headers = [Header|More]
  920    ;   print_message(warning, Error),
  921        Headers = More
  922    ),
  923    parse_headers(Lines, More).
  924
  925
  926%!  return_final_url(+Options) is semidet.
  927%
  928%   If Options contains final_url(URL), unify URL with the final
  929%   URL after redirections.
  930
  931return_final_url(Options) :-
  932    option(final_url(URL), Options),
  933    var(URL),
  934    !,
  935    option(visited([Parts|_]), Options),
  936    parts_uri(Parts, URL).
  937return_final_url(_).
  938
  939
  940%!  transfer_encoding_filter(+Lines, +In0, -In) is det.
  941%
  942%   Install filters depending on the transfer  encoding. If In0 is a
  943%   stream-pair, we close the output   side. If transfer-encoding is
  944%   not specified, the content-encoding is  interpreted as a synonym
  945%   for transfer-encoding, because many   servers incorrectly depend
  946%   on  this.  Exceptions  to  this   are  content-types  for  which
  947%   disable_encoding_filter/1 holds.
  948
  949transfer_encoding_filter(Lines, In0, In) :-
  950    transfer_encoding(Lines, Encoding),
  951    !,
  952    transfer_encoding_filter_(Encoding, In0, In).
  953transfer_encoding_filter(Lines, In0, In) :-
  954    content_encoding(Lines, Encoding),
  955    content_type(Lines, Type),
  956    \+ http:disable_encoding_filter(Type),
  957    !,
  958    transfer_encoding_filter_(Encoding, In0, In).
  959transfer_encoding_filter(_, In, In).
  960
  961transfer_encoding_filter_(Encoding, In0, In) :-
  962    stream_pair(In0, In1, Out),
  963    (   nonvar(Out)
  964    ->  close(Out)
  965    ;   true
  966    ),
  967    (   http:encoding_filter(Encoding, In1, In)
  968    ->  true
  969    ;   autoload_encoding(Encoding),
  970        http:encoding_filter(Encoding, In1, In)
  971    ->  true
  972    ;   domain_error(http_encoding, Encoding)
  973    ).
  974
  975:- multifile
  976    autoload_encoding/1.  977
  978:- if(exists_source(library(zlib))).  979autoload_encoding(gzip) :-
  980    use_module(library(zlib)).
  981:- endif.  982
  983content_type(Lines, Type) :-
  984    member(Line, Lines),
  985    phrase(field('content-type'), Line, Rest),
  986    !,
  987    atom_codes(Type, Rest).
  988
  989%!  http:disable_encoding_filter(+ContentType) is semidet.
  990%
  991%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
  992%   encoding for specific values of   ContentType. This predicate is
  993%   multifile and can thus be extended by the user.
  994
  995http:disable_encoding_filter('application/x-gzip').
  996http:disable_encoding_filter('application/x-tar').
  997http:disable_encoding_filter('x-world/x-vrml').
  998http:disable_encoding_filter('application/zip').
  999http:disable_encoding_filter('application/x-gzip').
 1000http:disable_encoding_filter('application/x-zip-compressed').
 1001http:disable_encoding_filter('application/x-compress').
 1002http:disable_encoding_filter('application/x-compressed').
 1003http:disable_encoding_filter('application/x-spoon').
 1004
 1005%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1006%
 1007%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1008%   header.
 1009
 1010transfer_encoding(Lines, Encoding) :-
 1011    what_encoding(transfer_encoding, Lines, Encoding).
 1012
 1013what_encoding(What, Lines, Encoding) :-
 1014    member(Line, Lines),
 1015    phrase(encoding_(What, Debug), Line, Rest),
 1016    !,
 1017    atom_codes(Encoding, Rest),
 1018    debug(http(What), '~w: ~p', [Debug, Rest]).
 1019
 1020encoding_(content_encoding, 'Content-encoding') -->
 1021    field('content-encoding').
 1022encoding_(transfer_encoding, 'Transfer-encoding') -->
 1023    field('transfer-encoding').
 1024
 1025%!  content_encoding(+Lines, -Encoding) is semidet.
 1026%
 1027%   True if Encoding is the value of the =|Content-encoding|=
 1028%   header.
 1029
 1030content_encoding(Lines, Encoding) :-
 1031    what_encoding(content_encoding, Lines, Encoding).
 1032
 1033%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1034%!  -Comment:atom, -Lines:list) is det.
 1035%
 1036%   Read the HTTP reply-header.  If the reply is completely empty
 1037%   an existence error is thrown.  If the replied header is
 1038%   otherwise invalid a 500 HTTP error is simulated, having the
 1039%   comment =|Invalid reply header|=.
 1040%
 1041%   @param Parts    A list of compound terms that describe the
 1042%                   parsed request URI.
 1043%   @param Version  HTTP reply version as Major-Minor pair
 1044%   @param Code     Numeric HTTP reply-code
 1045%   @param Comment  Comment of reply-code as atom
 1046%   @param Lines    Remaining header lines as code-lists.
 1047%
 1048%   @error existence_error(http_reply, Uri)
 1049
 1050read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1051    read_line_to_codes(In, Line),
 1052    (   Line == end_of_file
 1053    ->  parts_uri(Parts, Uri),
 1054        existence_error(http_reply,Uri)
 1055    ;   true
 1056    ),
 1057    Line \== end_of_file,
 1058    phrase(first_line(Major-Minor, Code, Comment), Line),
 1059    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1060    read_line_to_codes(In, Line2),
 1061    rest_header(Line2, In, Lines),
 1062    !,
 1063    (   debugging(http(open))
 1064    ->  forall(member(HL, Lines),
 1065               debug(http(open), '~s', [HL]))
 1066    ;   true
 1067    ).
 1068read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1069
 1070rest_header([], _, []) :- !.            % blank line: end of header
 1071rest_header(L0, In, [L0|L]) :-
 1072    read_line_to_codes(In, L1),
 1073    rest_header(L1, In, L).
 1074
 1075%!  content_length(+Header, -Length:int) is semidet.
 1076%
 1077%   Find the Content-Length in an HTTP reply-header.
 1078
 1079content_length(Lines, Length) :-
 1080    member(Line, Lines),
 1081    phrase(content_length(Length0), Line),
 1082    !,
 1083    Length = Length0.
 1084
 1085location(Lines, RequestURI) :-
 1086    member(Line, Lines),
 1087    phrase(atom_field(location, RequestURI), Line),
 1088    !.
 1089
 1090connection(Lines, Connection) :-
 1091    member(Line, Lines),
 1092    phrase(atom_field(connection, Connection0), Line),
 1093    !,
 1094    Connection = Connection0.
 1095
 1096first_line(Major-Minor, Code, Comment) -->
 1097    "HTTP/", integer(Major), ".", integer(Minor),
 1098    skip_blanks,
 1099    integer(Code),
 1100    skip_blanks,
 1101    rest(Comment).
 1102
 1103atom_field(Name, Value) -->
 1104    field(Name),
 1105    rest(Value).
 1106
 1107content_length(Len) -->
 1108    field('content-length'),
 1109    integer(Len).
 1110
 1111field(Name) -->
 1112    { atom_codes(Name, Codes) },
 1113    field_codes(Codes).
 1114
 1115field_codes([]) -->
 1116    ":",
 1117    skip_blanks.
 1118field_codes([H|T]) -->
 1119    [C],
 1120    { match_header_char(H, C)
 1121    },
 1122    field_codes(T).
 1123
 1124match_header_char(C, C) :- !.
 1125match_header_char(C, U) :-
 1126    code_type(C, to_lower(U)),
 1127    !.
 1128match_header_char(0'_, 0'-).
 1129
 1130
 1131skip_blanks -->
 1132    [C],
 1133    { code_type(C, white)
 1134    },
 1135    !,
 1136    skip_blanks.
 1137skip_blanks -->
 1138    [].
 1139
 1140%!  integer(-Int)//
 1141%
 1142%   Read 1 or more digits and return as integer.
 1143
 1144integer(Code) -->
 1145    digit(D0),
 1146    digits(D),
 1147    { number_codes(Code, [D0|D])
 1148    }.
 1149
 1150digit(C) -->
 1151    [C],
 1152    { code_type(C, digit)
 1153    }.
 1154
 1155digits([D0|D]) -->
 1156    digit(D0),
 1157    !,
 1158    digits(D).
 1159digits([]) -->
 1160    [].
 1161
 1162%!  rest(-Atom:atom)//
 1163%
 1164%   Get rest of input as an atom.
 1165
 1166rest(Atom) --> call(rest_(Atom)).
 1167
 1168rest_(Atom, L, []) :-
 1169    atom_codes(Atom, L).
 1170
 1171
 1172                 /*******************************
 1173                 *   AUTHORIZATION MANAGEMENT   *
 1174                 *******************************/
 1175
 1176%!  http_set_authorization(+URL, +Authorization) is det.
 1177%
 1178%   Set user/password to supply with URLs   that have URL as prefix.
 1179%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1180%   authorization is cleared.  For example:
 1181%
 1182%   ==
 1183%   ?- http_set_authorization('http://www.example.com/private/',
 1184%                             basic('John', 'Secret'))
 1185%   ==
 1186%
 1187%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1188%           too.
 1189
 1190:- dynamic
 1191    stored_authorization/2,
 1192    cached_authorization/2. 1193
 1194http_set_authorization(URL, Authorization) :-
 1195    must_be(atom, URL),
 1196    retractall(stored_authorization(URL, _)),
 1197    (   Authorization = (-)
 1198    ->  true
 1199    ;   check_authorization(Authorization),
 1200        assert(stored_authorization(URL, Authorization))
 1201    ),
 1202    retractall(cached_authorization(_,_)).
 1203
 1204check_authorization(Var) :-
 1205    var(Var),
 1206    !,
 1207    instantiation_error(Var).
 1208check_authorization(basic(User, Password)) :-
 1209    must_be(atom, User),
 1210    must_be(text, Password).
 1211check_authorization(digest(User, Password)) :-
 1212    must_be(atom, User),
 1213    must_be(text, Password).
 1214
 1215%!  authorization(+URL, -Authorization) is semidet.
 1216%
 1217%   True if Authorization must be supplied for URL.
 1218%
 1219%   @tbd    Cleanup cache if it gets too big.
 1220
 1221authorization(_, _) :-
 1222    \+ stored_authorization(_, _),
 1223    !,
 1224    fail.
 1225authorization(URL, Authorization) :-
 1226    cached_authorization(URL, Authorization),
 1227    !,
 1228    Authorization \== (-).
 1229authorization(URL, Authorization) :-
 1230    (   stored_authorization(Prefix, Authorization),
 1231        sub_atom(URL, 0, _, _, Prefix)
 1232    ->  assert(cached_authorization(URL, Authorization))
 1233    ;   assert(cached_authorization(URL, -)),
 1234        fail
 1235    ).
 1236
 1237add_authorization(_, Options, Options) :-
 1238    option(authorization(_), Options),
 1239    !.
 1240add_authorization(Parts, Options0, Options) :-
 1241    url_part(user(User), Parts),
 1242    url_part(password(Passwd), Parts),
 1243    !,
 1244    Options = [authorization(basic(User,Passwd))|Options0].
 1245add_authorization(Parts, Options0, Options) :-
 1246    stored_authorization(_, _) ->   % quick test to avoid work
 1247    parts_uri(Parts, URL),
 1248    authorization(URL, Auth),
 1249    !,
 1250    Options = [authorization(Auth)|Options0].
 1251add_authorization(_, Options, Options).
 1252
 1253
 1254%!  parse_url_ex(+URL, -Parts)
 1255%
 1256%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1257%   fragment).
 1258
 1259parse_url_ex(URL, [uri(URL)|Parts]) :-
 1260    uri_components(URL, Components),
 1261    phrase(components(Components), Parts),
 1262    (   option(host(_), Parts)
 1263    ->  true
 1264    ;   domain_error(url, URL)
 1265    ).
 1266
 1267components(Components) -->
 1268    uri_scheme(Components),
 1269    uri_path(Components),
 1270    uri_authority(Components),
 1271    uri_request_uri(Components).
 1272
 1273uri_scheme(Components) -->
 1274    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1275    !,
 1276    [ scheme(Scheme)
 1277    ].
 1278uri_scheme(_) --> [].
 1279
 1280uri_path(Components) -->
 1281    { uri_data(path, Components, Path0), nonvar(Path0),
 1282      (   Path0 == ''
 1283      ->  Path = (/)
 1284      ;   Path = Path0
 1285      )
 1286    },
 1287    !,
 1288    [ path(Path)
 1289    ].
 1290uri_path(_) --> [].
 1291
 1292uri_authority(Components) -->
 1293    { uri_data(authority, Components, Auth), nonvar(Auth),
 1294      !,
 1295      uri_authority_components(Auth, Data)
 1296    },
 1297    [ authority(Auth) ],
 1298    auth_field(user, Data),
 1299    auth_field(password, Data),
 1300    auth_field(host, Data),
 1301    auth_field(port, Data).
 1302uri_authority(_) --> [].
 1303
 1304auth_field(Field, Data) -->
 1305    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1306      !,
 1307      (   atom(EncValue)
 1308      ->  uri_encoded(query_value, Value, EncValue)
 1309      ;   Value = EncValue
 1310      ),
 1311      Part =.. [Field,Value]
 1312    },
 1313    [ Part ].
 1314auth_field(_, _) --> [].
 1315
 1316uri_request_uri(Components) -->
 1317    { uri_data(path, Components, Path0),
 1318      uri_data(search, Components, Search),
 1319      (   Path0 == ''
 1320      ->  Path = (/)
 1321      ;   Path = Path0
 1322      ),
 1323      uri_data(path, Components2, Path),
 1324      uri_data(search, Components2, Search),
 1325      uri_components(RequestURI, Components2)
 1326    },
 1327    [ request_uri(RequestURI)
 1328    ].
 1329
 1330%!  parts_scheme(+Parts, -Scheme) is det.
 1331%!  parts_uri(+Parts, -URI) is det.
 1332%!  parts_request_uri(+Parts, -RequestURI) is det.
 1333%!  parts_search(+Parts, -Search) is det.
 1334%!  parts_authority(+Parts, -Authority) is semidet.
 1335
 1336parts_scheme(Parts, Scheme) :-
 1337    url_part(scheme(Scheme), Parts),
 1338    !.
 1339parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1340    url_part(protocol(Scheme), Parts),
 1341    !.
 1342parts_scheme(_, http).
 1343
 1344parts_authority(Parts, Auth) :-
 1345    url_part(authority(Auth), Parts),
 1346    !.
 1347parts_authority(Parts, Auth) :-
 1348    url_part(host(Host), Parts, _),
 1349    url_part(port(Port), Parts, _),
 1350    url_part(user(User), Parts, _),
 1351    url_part(password(Password), Parts, _),
 1352    uri_authority_components(Auth,
 1353                             uri_authority(User, Password, Host, Port)).
 1354
 1355parts_request_uri(Parts, RequestURI) :-
 1356    option(request_uri(RequestURI), Parts),
 1357    !.
 1358parts_request_uri(Parts, RequestURI) :-
 1359    url_part(path(Path), Parts, /),
 1360    ignore(parts_search(Parts, Search)),
 1361    uri_data(path, Data, Path),
 1362    uri_data(search, Data, Search),
 1363    uri_components(RequestURI, Data).
 1364
 1365parts_search(Parts, Search) :-
 1366    option(query_string(Search), Parts),
 1367    !.
 1368parts_search(Parts, Search) :-
 1369    option(search(Fields), Parts),
 1370    !,
 1371    uri_query_components(Search, Fields).
 1372
 1373
 1374parts_uri(Parts, URI) :-
 1375    option(uri(URI), Parts),
 1376    !.
 1377parts_uri(Parts, URI) :-
 1378    parts_scheme(Parts, Scheme),
 1379    ignore(parts_authority(Parts, Auth)),
 1380    parts_request_uri(Parts, RequestURI),
 1381    uri_components(RequestURI, Data),
 1382    uri_data(scheme, Data, Scheme),
 1383    uri_data(authority, Data, Auth),
 1384    uri_components(URI, Data).
 1385
 1386parts_port(Parts, Port) :-
 1387    parts_scheme(Parts, Scheme),
 1388    default_port(Scheme, DefPort),
 1389    url_part(port(Port), Parts, DefPort).
 1390
 1391url_part(Part, Parts) :-
 1392    Part =.. [Name,Value],
 1393    Gen =.. [Name,RawValue],
 1394    option(Gen, Parts),
 1395    !,
 1396    Value = RawValue.
 1397
 1398url_part(Part, Parts, Default) :-
 1399    Part =.. [Name,Value],
 1400    Gen =.. [Name,RawValue],
 1401    (   option(Gen, Parts)
 1402    ->  Value = RawValue
 1403    ;   Value = Default
 1404    ).
 1405
 1406
 1407                 /*******************************
 1408                 *            COOKIES           *
 1409                 *******************************/
 1410
 1411write_cookies(Out, Parts, Options) :-
 1412    http:write_cookies(Out, Parts, Options),
 1413    !.
 1414write_cookies(_, _, _).
 1415
 1416update_cookies(_, _, _) :-
 1417    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1418    !.
 1419update_cookies(Lines, Parts, Options) :-
 1420    (   member(Line, Lines),
 1421        phrase(atom_field('set_cookie', CookieData), Line),
 1422        http:update_cookies(CookieData, Parts, Options),
 1423        fail
 1424    ;   true
 1425    ).
 1426
 1427
 1428                 /*******************************
 1429                 *           OPEN ANY           *
 1430                 *******************************/
 1431
 1432:- multifile iostream:open_hook/6. 1433
 1434%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1435%!                     +Options0, -Options) is semidet.
 1436%
 1437%   Hook implementation that makes  open_any/5   support  =http= and
 1438%   =https= URLs for `Mode == read`.
 1439
 1440iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1441    (atom(URL) -> true ; string(URL)),
 1442    uri_is_global(URL),
 1443    uri_components(URL, Components),
 1444    uri_data(scheme, Components, Scheme),
 1445    http_scheme(Scheme),
 1446    !,
 1447    Options = Options0,
 1448    Close = close(Stream),
 1449    http_open(URL, Stream, Options0).
 1450
 1451http_scheme(http).
 1452http_scheme(https).
 1453
 1454
 1455                 /*******************************
 1456                 *          KEEP-ALIVE          *
 1457                 *******************************/
 1458
 1459%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1460%!                      +Stream0, -Stream,
 1461%!                      +Options) is det.
 1462
 1463consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1464    option(connection(Asked), Options),
 1465    keep_alive(Asked),
 1466    connection(Lines, Given),
 1467    keep_alive(Given),
 1468    content_length(Lines, Bytes),
 1469    !,
 1470    stream_pair(StreamPair, In0, _),
 1471    connection_address(Host, Parts, HostPort),
 1472    debug(http(connection),
 1473          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1474    stream_range_open(In0, In,
 1475                      [ size(Bytes),
 1476                        onclose(keep_alive(StreamPair, HostPort))
 1477                      ]).
 1478consider_keep_alive(_, _, _, Stream, Stream, _).
 1479
 1480connection_address(Host, _, Host) :-
 1481    Host = _:_,
 1482    !.
 1483connection_address(Host, Parts, Host:Port) :-
 1484    parts_port(Parts, Port).
 1485
 1486keep_alive(keep_alive) :- !.
 1487keep_alive(Connection) :-
 1488    downcase_atom(Connection, 'keep-alive').
 1489
 1490:- public keep_alive/4. 1491
 1492keep_alive(StreamPair, Host, _In, 0) :-
 1493    !,
 1494    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1495    add_to_pool(Host, StreamPair).
 1496keep_alive(StreamPair, Host, In, Left) :-
 1497    Left < 100,
 1498    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1499    read_incomplete(In, Left),
 1500    add_to_pool(Host, StreamPair),
 1501    !.
 1502keep_alive(StreamPair, _, _, _) :-
 1503    debug(http(connection),
 1504          'Closing connection due to excessive unprocessed input', []),
 1505    (   debugging(http(connection))
 1506    ->  catch(close(StreamPair), E,
 1507              print_message(warning, E))
 1508    ;   close(StreamPair, [force(true)])
 1509    ).
 1510
 1511%!  read_incomplete(+In, +Left) is semidet.
 1512%
 1513%   If we have not all input from  a Keep-alive connection, read the
 1514%   remainder if it is short. Else, we fail and close the stream.
 1515
 1516read_incomplete(In, Left) :-
 1517    catch(setup_call_cleanup(
 1518              open_null_stream(Null),
 1519              copy_stream_data(In, Null, Left),
 1520              close(Null)),
 1521          _,
 1522          fail).
 1523
 1524:- dynamic
 1525    connection_pool/4,              % Hash, Address, Stream, Time
 1526    connection_gc_time/1. 1527
 1528add_to_pool(Address, StreamPair) :-
 1529    keep_connection(Address),
 1530    get_time(Now),
 1531    term_hash(Address, Hash),
 1532    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1533
 1534get_from_pool(Address, StreamPair) :-
 1535    term_hash(Address, Hash),
 1536    retract(connection_pool(Hash, Address, StreamPair, _)).
 1537
 1538%!  keep_connection(+Address) is semidet.
 1539%
 1540%   Succeeds if we want to keep   the  connection open. We currently
 1541%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1542%   waiting for the same address. Connections   older than 2 seconds
 1543%   are closed.
 1544
 1545keep_connection(Address) :-
 1546    close_old_connections(2),
 1547    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1548    C =< 10,
 1549    term_hash(Address, Hash),
 1550    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1551    Count =< 2.
 1552
 1553close_old_connections(Timeout) :-
 1554    get_time(Now),
 1555    Before is Now - Timeout,
 1556    (   connection_gc_time(GC),
 1557        GC > Before
 1558    ->  true
 1559    ;   (   retractall(connection_gc_time(_)),
 1560            asserta(connection_gc_time(Now)),
 1561            connection_pool(Hash, Address, StreamPair, Added),
 1562            Added < Before,
 1563            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1564            debug(http(connection),
 1565                  'Closing inactive keep-alive to ~p', [Address]),
 1566            close(StreamPair, [force(true)]),
 1567            fail
 1568        ;   true
 1569        )
 1570    ).
 1571
 1572
 1573%!  http_close_keep_alive(+Address) is det.
 1574%
 1575%   Close all keep-alive connections matching Address. Address is of
 1576%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1577%   closes all currently known keep-alive connections.
 1578
 1579http_close_keep_alive(Address) :-
 1580    forall(get_from_pool(Address, StreamPair),
 1581           close(StreamPair, [force(true)])).
 1582
 1583%!  keep_alive_error(+Error)
 1584%
 1585%   Deal with an error from reusing  a keep-alive connection. If the
 1586%   error is due to an I/O error   or end-of-file, fail to backtrack
 1587%   over get_from_pool/2. Otherwise it is a   real error and we thus
 1588%   re-raise it.
 1589
 1590keep_alive_error(keep_alive(closed)) :-
 1591    !,
 1592    debug(http(connection), 'Keep-alive connection was closed', []),
 1593    fail.
 1594keep_alive_error(io_error(_,_)) :-
 1595    !,
 1596    debug(http(connection), 'IO error on Keep-alive connection', []),
 1597    fail.
 1598keep_alive_error(Error) :-
 1599    throw(Error).
 1600
 1601
 1602                 /*******************************
 1603                 *     HOOK DOCUMENTATION       *
 1604                 *******************************/
 1605
 1606%!  http:open_options(+Parts, -Options) is nondet.
 1607%
 1608%   This hook is used by the HTTP   client library to define default
 1609%   options based on the the broken-down request-URL.  The following
 1610%   example redirects all trafic, except for localhost over a proxy:
 1611%
 1612%       ==
 1613%       :- multifile
 1614%           http:open_options/2.
 1615%
 1616%       http:open_options(Parts, Options) :-
 1617%           option(host(Host), Parts),
 1618%           Host \== localhost,
 1619%           Options = [proxy('proxy.local', 3128)].
 1620%       ==
 1621%
 1622%   This hook may return multiple   solutions.  The returned options
 1623%   are  combined  using  merge_options/3  where  earlier  solutions
 1624%   overrule later solutions.
 1625
 1626%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1627%
 1628%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1629%   open stream to the HTTP server, Parts is the broken-down request
 1630%   (see uri_components/2) and Options is the list of options passed
 1631%   to http_open.  The predicate is called as if using ignore/1.
 1632%
 1633%   @see complements http:update_cookies/3.
 1634%   @see library(http/http_cookie) implements cookie handling on
 1635%   top of these hooks.
 1636
 1637%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1638%
 1639%   Update the cookie database.  CookieData  is   the  value  of the
 1640%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1641%   uri_components/2) and Options is the list   of options passed to
 1642%   http_open.
 1643%
 1644%   @see complements http:write_cookies
 1645%   @see library(http/http_cookies) implements cookie handling on
 1646%   top of these hooks.