View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2020, VU University Amsterdam
    7                         CWI, Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(hashtable,
   37          [ ht_new/1,                   % --HT
   38            ht_is_hashtable/1,          % @HT
   39            ht_size/2,                  % +HT, -Size
   40
   41            ht_put/3,                   % !HT, +Key, +Value
   42            ht_update/4,                % +HT, +Key, ?Old, +New
   43            ht_put_new/3,               % !HT, +Key, +Value
   44            ht_put/5,                   % !HT, +Key, +Value, +IfNew, -Old
   45            ht_del/3,                   % !HT, +Key, -Value
   46
   47            ht_get/3,                   % +HT, +Key, -Value
   48            ht_gen/3,                   % +HT, ?Key, ?Value
   49            ht_pairs/2,                 % ?HT, ?Pairs
   50            ht_keys/2                   % +HT, -Keys
   51          ]).   52:- autoload(library(error), [must_be/2]).   53
   54/** <module> Hash tables
   55
   56Hash tables are one of the   many key-value representations available to
   57SWI-Prolog.
   58
   59This module implements a hash table   as a _mutable_ and _backtrackable_
   60data structure. The hash table is implemented  as a _closed hash table_,
   61where the _buckets_ array  is  implemented   using  an  unbounded  arity
   62compound term. Elements in this array are manipulated using setarg/3.
   63
   64Hash tables allow for any Prolog data   types  as keys or values, except
   65that the key cannot be a  variable.   Applications  that require a plain
   66variable as key can do so by  wrapping   all  keys  in a compound, e.g.,
   67k(Var).
   68*/
   69
   70/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   71Data structure
   72
   73    ht(Load, Size, Table)
   74- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
   75
   76%!  ht_new(--HT)
   77%
   78%   Create a new hash table.
   79
   80ht_new(ht(0,0,[](_))).
   81
   82%!  ht_is_hashtable(@HT) is semidet.
   83%
   84%   True when HT is a hash table.
   85
   86ht_is_hashtable(HT) :-
   87    nonvar(HT),
   88    HT = ht(Load, Size, Buckets),
   89    integer(Load),
   90    integer(Size),
   91    compound_name_arity(Buckets, [], Arity),
   92    Arity =:= Size*2+1.
   93
   94%!  ht_size(+HT, -Count) is det.
   95%
   96%   True when Size is the number of key-value pairs in HT.
   97
   98ht_size(ht(Count, _Size, _Buckets), Count).
   99
  100%!  ht_put(!HT, +Key, +Value) is det.
  101%
  102%   Add a Key-Value to HT. The binding is undone on backtracking.
  103
  104ht_put(HT, Key, Value) :-
  105    must_be(nonvar, Key),
  106    ht_put(HT, Key, Value, _, _, _).
  107
  108%!  ht_put_new(!HT, +Key, +Value) is semidet.
  109%
  110%   As ht_put/3, but fails if Key is   already in HT instead of updating
  111%   the associated value.
  112
  113ht_put_new(HT, Key, Value) :-
  114    must_be(nonvar, Key),
  115    ht_put(HT, Key, Value, _, _, true).
  116
  117%!  ht_update(+HT, +Key, ?Old, +New) is semidet.
  118%
  119%   True when HT holds Key-Old before and  Key-New after this call. Note
  120%   that it is possible to update  to   a  variable  and the instantiate
  121%   this. For example, a word-count update could be implemented as:
  122%
  123%   ```
  124%   update_word_count(HT, Word) :-
  125%       (   ht_update(HT, Word, Old, New)
  126%       ->  New is Old+1
  127%       ;   ht_put(HT, Word, 1)
  128%       ).
  129%   ```
  130
  131ht_update(HT, Key, Old, New) :-
  132    must_be(nonvar, Key),
  133    ht_put(HT, Key, New, _, Old, false).
  134
  135%!  ht_put(!HT, +Key, +Value, +IfNew, -Old) is det.
  136%
  137%   Add Key-Value to HT. Old is unified   with  the old value associated
  138%   with Key or, if Key  is  new,  with   IfNew.  This  can  be  used to
  139%   bootstrap managing a list of values, e.g.
  140%
  141%       ht_put_list(HT, Key, Value) :-
  142%           ht_put(HT, Key, [Value|Tail], [], Tail).
  143
  144ht_put(HT, Key, Value, IfNew, Old) :-
  145    must_be(nonvar, Key),
  146    ht_put(HT, Key, Value, IfNew, Old, _).
  147
  148ht_put(HT, Key, Value, IfNew, Old, IsNew) :-
  149    HT = ht(Load, Size, Buckets),
  150    (   Load >= Size//2
  151    ->  ht_resize(HT),
  152        ht_put(HT, Key, Value, IfNew, Old, IsNew)
  153    ;   variant_hash(Key, I0),
  154        I is I0 mod Size,
  155        put_(Buckets, I, Size, Key, Old, IfNew, Value, IsNew),
  156        (   IsNew == true
  157        ->  Load2 is Load+1,
  158            setarg(1, HT, Load2)
  159        ;   true
  160        )
  161    ).
  162
  163put_(Buckets, I, Size, Key, Old, IfNew, Value, IsNew) :-
  164    ht_kv(Buckets, I, K, V),
  165    (   var(K)
  166    ->  IsNew = true,
  167        Old = IfNew,
  168        K = Key,
  169        V = Value
  170    ;   K == Key
  171    ->  IsNew = false,
  172        Old = V,
  173        ht_put_v(Buckets, I, Value)
  174    ;   I2 is (I+1) mod Size,
  175        put_(Buckets, I2, Size, Key, Old, IfNew, Value, IsNew)
  176    ).
  177
  178ht_resize(HT) :-
  179    HT = ht(_Load, Size, Buckets),
  180    NewSize is max(4, Size*2),
  181    NewArity is NewSize*2+1,
  182    compound_name_arity(NewBuckets, [], NewArity),
  183    copy_members(0, Size, Buckets, NewSize, NewBuckets),
  184    setarg(2, HT, NewSize),
  185    setarg(3, HT, NewBuckets).
  186
  187copy_members(I, OSize, OBuckets, NSize, NBuckets) :-
  188    I < OSize,
  189    !,
  190    ht_kv(OBuckets, I, K, V),
  191    (   nonvar(K)
  192    ->  variant_hash(K, I0),
  193        NI is I0 mod NSize,
  194        copy_(NBuckets, NI, NSize, K, V)
  195    ;   true
  196    ),
  197    I2 is I+1,
  198    copy_members(I2, OSize, OBuckets, NSize, NBuckets).
  199copy_members(_, _, _, _, _).
  200
  201copy_(Buckets, I, Size, Key, Value) :-
  202    ht_kv(Buckets, I, K, V),
  203    (   var(K)
  204    ->  K = Key,
  205        V = Value
  206    ;   I2 is (I+1) mod Size,
  207        copy_(Buckets, I2, Size, Key, Value)
  208    ).
  209
  210
  211%!  ht_del(!HT, +Key, -Value) is semidet.
  212%
  213%   Delete Key-Value from HT. Fails if  Key   does  not  appear in HT or
  214%   Value does not unify with the old associated value.
  215
  216ht_del(HT, Key, Value) :-
  217    must_be(nonvar, Key),
  218    HT = ht(Load, Size, Buckets),
  219    Load > 0,
  220    variant_hash(Key, I0),
  221    I is I0 mod Size,
  222    del_(Buckets, I, Size, Key, Value),
  223    Load2 is Load - 1,
  224    setarg(1, HT, Load2).
  225
  226del_(Buckets, I, Size, Key, Value) :-
  227    ht_kv(Buckets, I, K, V),
  228    (   var(K)
  229    ->  fail
  230    ;   K == Key
  231    ->  V = Value,
  232        ht_put_kv(Buckets, I, _, _),
  233        del_shift(Buckets, I, I, Size)
  234    ;   I2 is (I+1) mod Size,
  235        del_(Buckets, I2, Size, Key, Value)
  236    ).
  237
  238del_shift(Buckets, I0, J, Size) :-
  239    I is (I0+1) mod Size,
  240    ht_kv(Buckets, I, K, V),
  241    (   var(K)
  242    ->  true
  243    ;   variant_hash(K, Hash),
  244        R is Hash mod Size,
  245        (   (   I >= R, R > J
  246            ;   R > J, J > I
  247            ;   J > I, I >= R
  248            )
  249        ->  del_shift(Buckets, I, J, Size)
  250        ;   ht_put_kv(Buckets, J, K, V),
  251            ht_put_kv(Buckets, I, _, _),
  252            del_shift(Buckets, I, I, Size)
  253        )
  254    ).
  255
  256
  257%!  ht_get(+HT, +Key, -Value) is semidet.
  258%
  259%   True when Key is in HT and associated with Value.
  260
  261ht_get(ht(Load, Size, Buckets), Key, Value) :-
  262    Load > 0,
  263    must_be(nonvar, Key),
  264    variant_hash(Key, I0),
  265    I is I0 mod Size,
  266    get_(Buckets, I, Size, Key, Value).
  267
  268get_(Buckets, I, Size, Key, Value) :-
  269    ht_kv(Buckets, I, K, V),
  270    (   Key == K
  271    ->  Value = V
  272    ;   nonvar(K)
  273    ->  I2 is (I+1) mod Size,
  274        get_(Buckets, I2, Size, Key, Value)
  275    ).
  276
  277ht_k(Buckets, I, K) :-
  278    IK is I*2+1,
  279    arg(IK, Buckets, K).
  280
  281ht_kv(Buckets, I, K, V) :-
  282    IK is I*2+1,
  283    IV is IK+1,
  284    arg(IK, Buckets, K),
  285    arg(IV, Buckets, V).
  286
  287ht_put_kv(Buckets, I, K, V) :-
  288    IK is I*2+1,
  289    IV is IK+1,
  290    setarg(IK, Buckets, K),
  291    setarg(IV, Buckets, V).
  292
  293ht_put_v(Buckets, I, V) :-
  294    IV is I*2+2,
  295    setarg(IV, Buckets, V).
  296
  297%!  ht_gen(+HT, ?Key, ?Value) is nondet.
  298%
  299%   True when Key-Value is in HT.   Pairs are enumerated on backtracking
  300%   using the hash table order.
  301
  302ht_gen(HT, Key, Value) :-
  303    HT = ht(_, Size, Buckets),
  304    End is Size - 1,
  305    between(0, End, I),
  306    ht_kv(Buckets, I, K, V),
  307    nonvar(K),
  308    K = Key,
  309    V = Value.
  310
  311%!  ht_pairs(?HT, ?Pairs) is det.
  312%
  313%   True when Pairs and HT represent the  same association. When used in
  314%   mode (+,-), Pairs is an ordered set.
  315
  316ht_pairs(HT, Pairs) :-
  317    ht_is_hashtable(HT),
  318    !,
  319    HT = ht(_Load, Size, Buckets),
  320    pairs_(0, Size, Buckets, Pairs0),
  321    sort(Pairs0, Pairs).
  322ht_pairs(HT, Pairs) :-
  323    must_be(list(pair), Pairs),
  324    ht_new(HT),
  325    ht_fill(Pairs, HT).
  326
  327pairs_(I, Size, Buckets, Pairs) :-
  328    (   I < Size
  329    ->  ht_kv(Buckets, I, K, V),
  330        (   nonvar(K)
  331        ->  Pairs = [K-V|T],
  332            I2 is I+1,
  333            pairs_(I2, Size, Buckets, T)
  334        ;   I2 is I+1,
  335            pairs_(I2, Size, Buckets, Pairs)
  336        )
  337    ;   Pairs = []
  338    ).
  339
  340ht_fill([], _).
  341ht_fill([K-V|T], HT) :-
  342    ht_put(HT, K, V),
  343    ht_fill(T, HT).
  344
  345%!  ht_keys(+HT, -Keys) is det.
  346%
  347%   True when Keys is an ordered set of all keys in HT.
  348
  349ht_keys(HT, Keys) :-
  350    HT = ht(_Load, Size, Buckets),
  351    keys_(0, Size, Buckets, Keys0),
  352    sort(Keys0, Keys).
  353
  354keys_(I, Size, Buckets, Keys) :-
  355    (   I < Size
  356    ->  ht_k(Buckets, I, K),
  357        (   nonvar(K)
  358        ->  Keys = [K|T],
  359            I2 is I+1,
  360            keys_(I2, Size, Buckets, T)
  361        ;   I2 is I+1,
  362            keys_(I2, Size, Buckets, Keys)
  363        )
  364    ;   Keys = []
  365    )