View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(base64,
   38          [ base64_encoded/3,		% ?Plain, ?Encoded, +Options
   39            base64_encoded//2,          % ?Plain, +Options
   40
   41            base64/2,                   % ?PlainText, ?Encoded
   42            base64//1,                  % ?PlainText
   43
   44            base64url/2,                % ?PlainText, ?Encoded
   45            base64url//1                % ?PlainText
   46          ]).   47:- autoload(library(error),
   48	    [instantiation_error/1,must_be/2,syntax_error/1]).   49:- autoload(library(option),[option/3]).   50
   51:- encoding(utf8).
   52
   53/** <module> Base64 encoding and decoding
   54
   55Prolog-based base64 encoding using  DCG   rules.  Encoding  according to
   56rfc2045. For example:
   57
   58==
   591 ?- base64('Hello World', X).
   60X = 'SGVsbG8gV29ybGQ='.
   61
   622 ?- base64(H, 'SGVsbG8gV29ybGQ=').
   63H = 'Hello World'.
   64==
   65
   66The Base64URL encoding provides a URL and file name friendly alternative
   67to base64. Base64URL encoded strings do not contain white space.
   68
   69@tbd    Stream I/O
   70@tbd    White-space introduction and parsing
   71@tbd	Encoding support (notably UTF-8)
   72@bug	Base64 only works with _bytes_.  The grammars do not check
   73        the input to be in the range 0..255.
   74*/
   75
   76%!  base64_encoded(+Plain, -Encoded, +Options) is det.
   77%!  base64_encoded(-Plain, +Encoded, +Options) is det.
   78%
   79%   General the base64 encoding and   decoding.  This predicate subsumes
   80%   base64/2  and  base64url/2,  providing  control  over  padding,  the
   81%   characters used for encoding and the output type. Options:
   82%
   83%     - charset(+Charset)
   84%     Define the encoding character set to use.  The (default) `classic`
   85%     uses the classical rfc2045 characters.  The value `url` uses URL
   86%     and file name friendly characters.  See base64url/2.  The value
   87%     `openbsd` uses the OpenBSD password-file alphabet.
   88%     - padding(+Boolean)
   89%     If `true` (default), the output is padded with `=` characters.
   90%     - as(+Type)
   91%     Defines the type of the output.  One of `string` (default) or
   92%     `atom`.
   93%     - encoding(+Encoding)
   94%     Encoding to use for translation between (Unicode) text and
   95%     _bytes_ (Base64 is an encoding for bytes).  Default is `utf8`.
   96%
   97%   @arg Plain is an atom or string containing the unencoded (plain)
   98%   text.
   99%   @arg Encoded is an atom or string containing the base64 encoded
  100%   version of Plain.
  101
  102base64_encoded(Plain, Encoded, Options) :-
  103    option(charset(CharSet), Options, classic),
  104    option(padding(Padding), Options, true),
  105    option(as(As), Options, string),
  106    option(encoding(Enc), Options, utf8),
  107    (   nonvar(Plain)
  108    ->  string_bytes(Plain, PlainBytes, Enc),
  109        phrase(base64(Padding, PlainBytes, CharSet), EncCodes),
  110        as(As, Encoded, EncCodes, iso_latin_1)
  111    ;   nonvar(Encoded)
  112    ->  string_bytes(Encoded, EncCodes, iso_latin_1),
  113        phrase(base64(Padding, PlainBytes, CharSet), EncCodes),
  114        as(As, Plain, PlainBytes, Enc)
  115    ;   instantiation_error(base64(Plain, Encoded))
  116    ).
  117
  118as(atom, Atom, Codes, Enc) :-
  119    !,
  120    string_bytes(String, Codes, Enc),
  121    atom_string(Atom, String).
  122as(string, String, Codes, Enc) :-
  123    !,
  124    string_bytes(String, Codes, Enc).
  125as(As, _, _, _) :-
  126    must_be(oneof([atom,string]), As).
  127
  128%!  base64(+Plain, -Encoded) is det.
  129%!  base64(-Plain, +Encoded) is det.
  130%
  131%   Equivalent  to  base64_encoded/3  using  the  options  as(atom)  and
  132%   encoding(iso_latin_1).
  133%
  134%   @deprecated  New  code  should  use  base64_encoded/3.  Notably  the
  135%   `iso_latin_1` should be `utf8` in most today's applications.
  136
  137base64(Plain, Encoded) :-
  138    base64_encoded(Plain, Encoded, [ as(atom), encoding(iso_latin_1) ]).
  139
  140%!  base64url(+Plain, -Encoded) is det.
  141%!  base64url(-Plain, +Encoded) is det.
  142%
  143%   Translates between plaintext  and  base64url   encoded  atom  or
  144%   string. Base64URL encoded values can safely  be used as URLs and
  145%   file names. The use "-" instead of   "+", "_" instead of "/" and
  146%   do not use padding. This implies   that the encoded value cannot
  147%   be embedded inside a longer string.
  148%
  149%   Equivalent  to  base64_encoded/3   using    the   options  as(atom),
  150%   encoding(utf8) and charset(url).
  151
  152base64url(Plain, Encoded) :-
  153    base64_encoded(Plain, Encoded,
  154                   [ as(atom),
  155                     encoding(utf8),
  156                     charset(url)
  157                   ]).
  158
  159%!  base64_encoded(+PlainText, +Options)// is det.
  160%!  base64_encoded(-PlainText, +Options)// is det.
  161
  162base64_encoded(PlainText, Options) -->
  163    { option(charset(CharSet), Options, classic),
  164      option(padding(Padding), Options, true)
  165    },
  166    base64(Padding, PlainText, CharSet).
  167
  168
  169%!  base64(+PlainText)// is det.
  170%!  base64(-PlainText)// is det.
  171%
  172%   Encode/decode list of character codes using _base64_.  See also
  173%   base64/2.
  174
  175base64(PlainText) -->
  176    base64(true, PlainText, classic).
  177
  178%!  base64url(+PlainText)// is det.
  179%!  base64url(-PlainText)// is det.
  180%
  181%   Encode/decode list of character codes  using Base64URL. See also
  182%   base64url/2.
  183
  184base64url(PlainText) -->
  185    base64(false, PlainText, url).
  186
  187base64(Padded, Input, Charset) -->
  188    { nonvar(Input) },
  189    !,
  190    encode(Padded, Input, Charset).
  191base64(Padded, Output, Charset) -->
  192    decode(Padded, Output, Charset).
  193
  194                 /*******************************
  195                 *            ENCODING          *
  196                 *******************************/
  197
  198%!  encode(+Padded, +PlainText, +Charset)//
  199
  200encode(Padded, [I0, I1, I2|Rest], Charset) -->
  201    !,
  202    [O0, O1, O2, O3],
  203    { A is (I0<<16)+(I1<<8)+I2,
  204      O00 is (A>>18) /\ 0x3f,
  205      O01 is (A>>12) /\ 0x3f,
  206      O02 is  (A>>6) /\ 0x3f,
  207      O03 is       A /\ 0x3f,
  208      base64_char(Charset, O00, O0),
  209      base64_char(Charset, O01, O1),
  210      base64_char(Charset, O02, O2),
  211      base64_char(Charset, O03, O3)
  212    },
  213    encode(Padded, Rest, Charset).
  214encode(true, [I0, I1], Charset) -->
  215    !,
  216    [O0, O1, O2, 0'=],
  217    { A is (I0<<16)+(I1<<8),
  218      O00 is (A>>18) /\ 0x3f,
  219      O01 is (A>>12) /\ 0x3f,
  220      O02 is  (A>>6) /\ 0x3f,
  221      base64_char(Charset, O00, O0),
  222      base64_char(Charset, O01, O1),
  223      base64_char(Charset, O02, O2)
  224    }.
  225encode(true, [I0], Charset) -->
  226    !,
  227    [O0, O1, 0'=, 0'=],
  228    { A is (I0<<16),
  229      O00 is (A>>18) /\ 0x3f,
  230      O01 is (A>>12) /\ 0x3f,
  231      base64_char(Charset, O00, O0),
  232      base64_char(Charset, O01, O1)
  233    }.
  234encode(false, [I0, I1], Charset) -->
  235    !,
  236    [O0, O1, O2],
  237    { A is (I0<<16)+(I1<<8),
  238      O00 is (A>>18) /\ 0x3f,
  239      O01 is (A>>12) /\ 0x3f,
  240      O02 is  (A>>6) /\ 0x3f,
  241      base64_char(Charset, O00, O0),
  242      base64_char(Charset, O01, O1),
  243      base64_char(Charset, O02, O2)
  244    }.
  245encode(false, [I0], Charset) -->
  246    !,
  247    [O0, O1],
  248    { A is (I0<<16),
  249      O00 is (A>>18) /\ 0x3f,
  250      O01 is (A>>12) /\ 0x3f,
  251      base64_char(Charset, O00, O0),
  252      base64_char(Charset, O01, O1)
  253    }.
  254encode(_, [], _) -->
  255    [].
  256
  257
  258                 /*******************************
  259                 *            DECODE            *
  260                 *******************************/
  261
  262%!  decode(+Padded, -PlainText, +Charset)//
  263
  264decode(true, Text, Charset) -->
  265    [C0, C1, C2, C3],
  266    !,
  267    { base64_char(Charset, B0, C0),
  268      base64_char(Charset, B1, C1)
  269    },
  270    !,
  271    {   C3 == 0'=
  272    ->  (   C2 == 0'=
  273        ->  A is (B0<<18) + (B1<<12),
  274            I0 is (A>>16) /\ 0xff,
  275            Text = [I0|Rest]
  276        ;   base64_char(Charset, B2, C2)
  277        ->  A is (B0<<18) + (B1<<12) + (B2<<6),
  278            I0 is (A>>16) /\ 0xff,
  279            I1 is  (A>>8) /\ 0xff,
  280            Text = [I0,I1|Rest]
  281        )
  282    ;   base64_char(Charset, B2, C2),
  283        base64_char(Charset, B3, C3)
  284    ->  A is (B0<<18) + (B1<<12) + (B2<<6) + B3,
  285        I0 is (A>>16) /\ 0xff,
  286        I1 is  (A>>8) /\ 0xff,
  287        I2 is      A  /\ 0xff,
  288        Text = [I0,I1,I2|Rest]
  289    },
  290    decode(true, Rest, Charset).
  291decode(false, Text, Charset) -->
  292    [C0, C1, C2, C3],
  293    !,
  294    { base64_char(Charset, B0, C0),
  295      base64_char(Charset, B1, C1),
  296      base64_char(Charset, B2, C2),
  297      base64_char(Charset, B3, C3),
  298      A is (B0<<18) + (B1<<12) + (B2<<6) + B3,
  299      I0 is (A>>16) /\ 0xff,
  300      I1 is  (A>>8) /\ 0xff,
  301      I2 is      A  /\ 0xff,
  302      Text = [I0,I1,I2|Rest]
  303    },
  304    decode(false, Rest, Charset).
  305decode(false, Text, Charset) -->
  306    [C0, C1, C2],
  307    !,
  308    { base64_char(Charset, B0, C0),
  309      base64_char(Charset, B1, C1),
  310      base64_char(Charset, B2, C2),
  311      A is (B0<<18) + (B1<<12) + (B2<<6),
  312      I0 is (A>>16) /\ 0xff,
  313      I1 is  (A>>8) /\ 0xff,
  314      Text = [I0,I1]
  315    }.
  316decode(false, Text, Charset) -->
  317    [C0, C1],
  318    !,
  319    { base64_char(Charset, B0, C0),
  320      base64_char(Charset, B1, C1),
  321      A is (B0<<18) + (B1<<12),
  322      I0 is (A>>16) /\ 0xff,
  323      Text = [I0]
  324    }.
  325decode(_, [], _) -->
  326    [].
  327
  328
  329
  330                 /*******************************
  331                 *   BASIC CHARACTER ENCODING   *
  332                 *******************************/
  333
  334base64_char(00, 0'A).
  335base64_char(01, 0'B).
  336base64_char(02, 0'C).
  337base64_char(03, 0'D).
  338base64_char(04, 0'E).
  339base64_char(05, 0'F).
  340base64_char(06, 0'G).
  341base64_char(07, 0'H).
  342base64_char(08, 0'I).
  343base64_char(09, 0'J).
  344base64_char(10, 0'K).
  345base64_char(11, 0'L).
  346base64_char(12, 0'M).
  347base64_char(13, 0'N).
  348base64_char(14, 0'O).
  349base64_char(15, 0'P).
  350base64_char(16, 0'Q).
  351base64_char(17, 0'R).
  352base64_char(18, 0'S).
  353base64_char(19, 0'T).
  354base64_char(20, 0'U).
  355base64_char(21, 0'V).
  356base64_char(22, 0'W).
  357base64_char(23, 0'X).
  358base64_char(24, 0'Y).
  359base64_char(25, 0'Z).
  360base64_char(26, 0'a).
  361base64_char(27, 0'b).
  362base64_char(28, 0'c).
  363base64_char(29, 0'd).
  364base64_char(30, 0'e).
  365base64_char(31, 0'f).
  366base64_char(32, 0'g).
  367base64_char(33, 0'h).
  368base64_char(34, 0'i).
  369base64_char(35, 0'j).
  370base64_char(36, 0'k).
  371base64_char(37, 0'l).
  372base64_char(38, 0'm).
  373base64_char(39, 0'n).
  374base64_char(40, 0'o).
  375base64_char(41, 0'p).
  376base64_char(42, 0'q).
  377base64_char(43, 0'r).
  378base64_char(44, 0's).
  379base64_char(45, 0't).
  380base64_char(46, 0'u).
  381base64_char(47, 0'v).
  382base64_char(48, 0'w).
  383base64_char(49, 0'x).
  384base64_char(50, 0'y).
  385base64_char(51, 0'z).
  386base64_char(52, 0'0).
  387base64_char(53, 0'1).
  388base64_char(54, 0'2).
  389base64_char(55, 0'3).
  390base64_char(56, 0'4).
  391base64_char(57, 0'5).
  392base64_char(58, 0'6).
  393base64_char(59, 0'7).
  394base64_char(60, 0'8).
  395base64_char(61, 0'9).
  396base64_char(62, 0'+).
  397base64_char(63, 0'/).
  398
  399base64url_char_x(62, 0'-).
  400base64url_char_x(63, 0'_).
  401
  402base64bsd_char_x(00, 0'.).
  403base64bsd_char_x(01, 0'/).
  404
  405base64_char(classic, Value, Char) :-
  406    (   base64_char(Value, Char)
  407    ->  true
  408    ;   syntax_error(base64_char(Value, Char))
  409    ).
  410base64_char(url, Value, Char) :-
  411    (   base64url_char_x(Value, Char)
  412    ->  true
  413    ;   base64_char(Value, Char)
  414    ->  true
  415    ;   syntax_error(base64_char(Value, Char))
  416    ).
  417base64_char(openbsd, Value, Char) :-
  418    (   base64bsd_char_x(Value, Char)
  419    ->  true
  420    ;   nonvar(Value)
  421    ->  Value0 is Value - 2,
  422        (   base64_char(Value0, Char)
  423        ->  true
  424        ;   syntax_error(base64_char(Value, Char))
  425        )
  426    ;   (   base64_char(Value0, Char)
  427        ->  Value0 < 62, Value is Value0 + 2
  428        ;   syntax_error(base64_char(Value, Char))
  429        )
  430    ).
  431
  432                 /*******************************
  433                 *            MESSAGES          *
  434                 *******************************/
  435
  436:- multifile prolog:error_message//1.  437
  438prolog:error_message(syntax_error(base64_char(_D,E))) -->
  439    { nonvar(E) },
  440    !,
  441    [ 'Illegal Base64 character: "~c"'-[E] ]