1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2017, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(cache_rocks,
   36          [ cache_open/1,               % +Directory
   37            cached/1,                   % :Goal
   38            cached/2,                   % :Goal, +Hash
   39            cache_property/2,           % :Goal, ?Property
   40            cache_properties/2,         % :Goal, ?Properties:dict
   41            forget/1,                   % :Goal
   42            cache_statistics/1,         % ?Property
   43            cache_listing/0,
   44            cache_listing/1             % +Options
   45          ]).   46:- use_module(library(rocksdb)).   47:- use_module(library(error)).   48:- use_module(library(lists)).   49:- use_module(library(apply)).   50:- use_module(library(debug)).   51:- use_module(library(option)).   52:- use_module(signature).

Persistent answer caching

This module implements persistent caching of answers. The inspiration comes from tabled execution where tabled answers are kept as a trie of tries. The outer trie maps goal variants to answer tries and the answer tries provide the answers to a specific goal variant. The library(rocksdb) (and library(bdb)) provide a persistent Key-Value store that can map a term to a term. The term is represented as an external record, basically a binary alternative to write/read. This binary representation is a blob for the key-value store. The representation represents a variant, currently with two limitations:

  1. If the term has internal sharing, it is different from a term without. E.g., A = f(X), B = b(A,A) is a different B than you get from B = b(f(X),f(X)).
  2. If the key is cyclic, it only matches internally equivalent cycles. E.g., A = [a|A] and A = [a,a|A] are considered different.

Ignoring these two issues (which can be fixed), we can use RocksDB or BDB as the outer trie used in tabling. We could use a trie or similar structure for the set of answers, but in this case a list preserves the original order and is more compact. Our database basically maps call variants to a list of answers.

In addition, it does some book keeping. First of all, it uses signature.pl to compute a deep hash of the predicate. A deep hash is an SHA1 hash computed from the clauses of the predicates and all predicates called by it. The original goal, say m:p(a1, ...) is translated into <SHA1>(a1, ...). This implies that changing a predicate or one of the predicates called by it invalidate the cache. Second, it keeps track of partially completed goals and fully completed goals. Re-running a fully completed goal simply retrieves the cached answers. Re-running a partially completed goal first retrieves the cached answers and then re-runs the goal with an offset to compute additional answers, updating the status. */

   92:- meta_predicate
   93    cached(0),
   94    cached(:, +),
   95    forget(:),
   96    cache_property(:, ?),
   97    offset_check(+, 0, +).   98
   99:- dynamic
  100    rocks_d/2.
 cache_open(+Directory)
Open an answer cache in Directory. If Directory does not exist, create it as an empty answer store. Otherwise re-open an existing store.
  108cache_open(Dir) :-
  109    rocks_d(_, Dir),
  110    !.
  111cache_open(Dir) :-
  112    rocks_d(_, _),
  113    permission_error(open, cache, Dir).
  114cache_open(Dir) :-
  115    rocks_open(Dir, DB,
  116               [ key(term),
  117                 value(term)
  118               ]),
  119    asserta(rocks_d(DB, Dir)).
  120
  121rocks(DB) :-
  122    rocks_d(DB, _),
  123    !.
  124rocks(_) :-
  125    existence_error(cache, answers).
 cached(:Goal)
This predicate is logically equivalent to Goal. However, answers are on the first call collected in a trie and subsequently returned in arbitrary (hash key) order without duplicates.
  133cached(G) :-
  134    rocks(DB),
  135    goal_signature(G, Signature, Vars),
  136    (   rocks_get(DB, Signature, cache(G, Answers, State, _Time, _Hash))
  137    ->  from_db(State, Vars, Answers, restart(G, Signature, DB))
  138    ;   generalise_goal(G, 2, General, Bindings),
  139        goal_signature(General, GenSignature, GenVars),
  140        rocks_get(DB, GenSignature,
  141                  cache(GenGoal, GenAnswers, State, Time, Hash))
  142    ->  debug(cache, 'Filtering ~p for ~p', [GenGoal, G]),
  143        maplist(bind, Bindings),
  144        findall(Vars, member(GenVars, GenAnswers), Answers),
  145        rocks_put(DB, Signature, cache(G, Answers, State, Time, Hash)),
  146        member(Vars, Answers)
  147    ;   cache(G, Signature, Vars, [], DB)
  148    ).
  149
  150bind(V=V).
  151
  152cache(G, Sign, Vars, Sofar, DB) :-
  153    get_time(Now),
  154    copy_term(G+Sign, Goal+Signature),
  155    functor(Signature, Hash, _),
  156    Cache = cache(Goal, _Answers, _State, Now, Hash),
  157    (   Sofar == []
  158    ->  Enum = (G, add_answer(Set, Vars))
  159    ;   Enum = (offset_check(Vars, G, Sofar), add_answer(Set, Vars))
  160    ),
  161    setup_call_catcher_cleanup(
  162        answer_set(Sofar, Set),
  163        Enum,
  164        Catcher,
  165        commit(Catcher, Set, Signature, Cache, DB)).
  166
  167commit(exit, Set, Signature, Cache, DB) :-
  168    answers(Set, Answers),
  169    set_cache(Cache, Answers, complete),
  170    rocks_put(DB, Signature, Cache).
  171commit(fail, Set, Signature, Cache, DB) :-
  172    answers(Set, Answers),
  173    set_cache(Cache, Answers, complete),
  174    rocks_put(DB, Signature, Cache).
  175commit(!, Set, Signature, Cache, DB) :-
  176    answers(Set, Answers),
  177    set_cache(Cache, Answers, partial),
  178    rocks_put(DB, Signature, Cache).
  179commit(exception(E), Set, Signature, Cache, DB) :-
  180    answers(Set, Answers),
  181    set_cache(Cache, Answers, exception(E)),
  182    rocks_put(DB, Signature, Cache).
  183commit(external_exception(_), Set, Signature, Cache, DB) :-
  184    answers(Set, Answers),
  185    set_cache(Cache, Answers, partial),
  186    rocks_put(DB, Signature, Cache).
  187
  188from_db(complete, Vars, Answers, _Restart) :-
  189    member(Vars, Answers).
  190from_db(partial, Vars, Answers, restart(G, Signature, DB)) :-
  191    (   member(Vars, Answers)
  192    ;   cache(G, Signature, Vars, Answers, DB)
  193    ).
  194from_db(exception(E), Vars, Answers, _Restart) :-
  195    (   member(Vars, Answers)
  196    ;   throw(E)
  197    ).
  198
  199
  200set_cache(cache(_Goal, Answers, State, _Now, _Hash), Answers, State).
 answer_set(+List0, -Answers) is det
 add_answer(+Answers, +Answer) is det
 answers(+Answers, -List) is det
  206answer_set([], answers(List, List)) :-
  207    !,
  208    List = [$|_].
  209answer_set(Set0, answers([$|OpenSet], Tail)) :-
  210    open_list(Set0, OpenSet, Tail2),
  211    Tail = Tail2.                       % delay unification to avoid
  212                                        % Tail becoming a reference chain
  213open_list([Last], T, T) :-
  214    !,
  215    T = [Last|_].
  216open_list([H|T0], [H|T], Last) :-
  217    open_list(T0, T, Last).
  218
  219add_answer(Set, A) :-
  220    arg(2, Set, T0),
  221    duplicate_term(A, A2),
  222    nb_linkarg(2, T0, [A2|_]),
  223    arg(2, T0, T),
  224    nb_linkarg(2, Set, T).
  225
  226answers(answers([$|Answers], T), Answers) :-
  227    arg(2, T, []).
 offset_check(+Template, :Goal, +Expected)
Skip the first Expected answers from Goal. Raise an exception if a produced answer is not a variant of Template.
  234offset_check(Template, Goal, Expected) :-
  235    State = state(Expected),
  236    Goal,
  237    arg(1, State, Answers),
  238    (   Answers == []
  239    ->  true
  240    ;   Answers = [First|More],
  241        (   First =@= Template
  242        ->  nb_linkarg(1, State, More),
  243            fail
  244        ;   throw(error(consistency_error(Goal, Template, First),_))
  245        )
  246    ).
 generalise_goal(:Goal, +MaxDepth, -General, -Bindings) is nondet
True when General is a generalization of Goal achieved by generalising terms in Bindings. Bindings is a list of Term=Var pairs. Generalization first turns a compound entirely into a variable before preserving the functor and generalizing the arguments.
Arguments:
MaxDepth- defines how deep we look into arguments for possible generalizations. When 1, we merely replace nonvar arguments of Goal with a variable.
  260generalise_goal(M:G0, MaxDepth, M:G, Bindings) :-
  261    generalise(MaxDepth, G0, G, [], Bindings),
  262    nonvar(G),
  263    G0 \== G.
  264
  265generalise(_, Term, Term, Bindings, Bindings).
  266generalise(_, Term, Var, Bindings0, Bindings) :-
  267    nonvar(Term),
  268    Bindings = [Term=Var|Bindings0].
  269generalise(MaxDepth, Term, Gen, Bindings0, Bindings) :-
  270    succ(MaxDepth2, MaxDepth),
  271    compound(Term),
  272    compound_name_arguments(Term, Name, Args0),
  273    foldl(generalise(MaxDepth2), Args0, Args, Bindings0, Bindings),
  274    compound_name_arguments(Gen, Name, Args),
  275    Gen \== Term.
 cached(:Goal, +Hash)
Get the answers for Goal from an old hashed result. Hash is either the full hash or a shorthash (7 character prefix).
  282cached(Goal, HashS) :-
  283    atom_string(Hash, HashS),
  284    is_hash(Hash, Type),
  285    rocks(DB),
  286    cached(Type, DB, Goal, Hash).
  287
  288cached(sha1, DB, M:Goal, Hash) :-
  289    (   Goal =.. [_|Args],
  290        Signature =.. [Hash|Args],
  291        rocks_get(DB, Signature,
  292                  cache(M:Goal, Answers, _State, _Time, _Hash))
  293    ->  term_variables(Goal, VarList),
  294        Vars =.. [v|VarList],
  295        member(Vars, Answers)
  296    ;   generalise_goal(M:Goal, 5, M:General, Bindings),
  297        General =.. [_|Args],
  298        Signature =.. [Hash|Args],
  299        rocks_get(DB, Signature,
  300                  cache(M:GenGoal, GenAnswers, _State, _Time, _Hash))
  301    ->  debug(cache, 'Filtering ~p for ~p', [GenGoal, Goal]),
  302        term_variables(General, VarList),
  303        GenVars =.. [v|VarList],
  304        maplist(bind, Bindings),
  305        member(GenVars, GenAnswers)
  306    ;   existence_error(answer_cache, Hash)
  307    ).
  308cached(short, DB, Goal, ShortHash) :-
  309    Cache = cache(GoalV, Answers, _State, _Now, Hash),
  310    rocks_enum(DB, _Key, Cache),
  311    sub_atom(Hash, 0, _, _, ShortHash),
  312    !,
  313    (   Goal =@= GoalV
  314    ->  term_variables(Goal, VarList),
  315        Vars =.. [v|VarList],
  316        member(Vars, Answers)
  317    ;   subsumes_term(GoalV, Goal)
  318    ->  term_variables(GoalV, VarList),
  319        GenVars =.. [v|VarList],
  320        Goal = GoalV,
  321        member(GenVars, Answers)
  322    ;   throw(error(specific_expected(Goal, GoalV, ShortHash), _))
  323    ).
  324
  325is_hash(Atom, Hash) :-
  326    atom_length(Atom, Len),
  327    (   Len == 40
  328    ->  Hash = sha1
  329    ;   Len == 7
  330    ->  Hash = short
  331    ;   domain_error(hash, Atom)
  332    ).
 cache_property(:Goal, ?Property) is nondet
 cache_properties(:Goal, ?Properties:dict) is nondet
True if Property is a properly of the cached answers for Goal. Defined properties are:
count(-Count)
Number of answers
hash(-Hash)
Deep hash of the predicate associated with the goal.
time_cached(-Time)
Time stamp when the cache was created.

The cache_properties/2 variant returns all properties of a cache in a dict using the above keys.

  350cache_property(M:SubGoal, Property) :-
  351    rocks(DB),
  352    Cache = cache(M:SubGoal, _Answers, _Time, _Hash),
  353    rocks_enum(DB, _, Cache),
  354    property(Property, Cache).
  355
  356property(count(Count), cache(_, Answers, _, _)) :-
  357    length(Answers, Count).
  358property(time_cached(Time), cache(_, _, Time, _)).
  359property(hash(Hash), cache(_, _, _, Hash)).
  360
  361cache_properties(M:SubGoal,
  362                 cache_properties{count:Count,
  363                                  time_cached:Time,
  364                                  state:State,
  365                                  hash:Hash
  366                                 }) :-
  367    rocks(DB),
  368    Cache = cache(M:SubGoal, Answers, State, Time, Hash),
  369    rocks_enum(DB, _, Cache),
  370    length(Answers, Count).
 forget(:Goal)
Forget all cached results that are subsumed by Goal. Typically used as forget(m:p(_,_)) to remove all data cached for p/2. Notably forget(_:_) will destroy the entire cache.
  378forget(Goal) :-
  379    rocks(DB),
  380    Cache = cache(CGoal, _Answers, _State, _Now, _Hash),
  381    forall(( rocks_enum(DB, Key, Cache),
  382             subsumes_term(Goal, CGoal)
  383           ),
  384           rocks_delete(DB, Key)).
 cache_statistics(?Key)
True when Key is a know statistics about the caching mechanism.
  390cache_statistics(Property) :-
  391    rocks(DB),
  392    rocks_property(DB, Property).
 cache_listing is det
 cache_listing(+Options) is det
List contents of the persistent cache.
  399cache_listing :-
  400    cache_listing([]).
  401
  402cache_listing(Options) :-
  403    format('Predicate ~t Cached at~55|    Hash State ~t Count~76|~n', []),
  404    format('~`=t~76|~n'),
  405    forall(setof(Variant-Properties,
  406                 cached_predicate(Pred, Variant, Properties), PList),
  407           report(Pred, PList, Options)).
  408
  409cached_predicate(M:Name/Arity, Goal, Properties) :-
  410    cache_properties(M:Goal, Properties),
  411    functor(Goal, Name, Arity).
  412
  413
  414report(M:Name/Arity, Variants, Options) :-
  415    length(Variants, VCount),
  416    format('~w:~w/~d (~D variants)~n', [M, Name, Arity, VCount]),
  417    forall(limit(10, member(Variant-Properties, Variants)),
  418           ( short_state(Properties.state, State),
  419             short_hash(Properties.hash, M:Variant, Hash, Options),
  420             format_time(string(Date), "%FT%T", Properties.time_cached),
  421             numbervars(Variants, 0, _, [singletons(true)]),
  422             format('  ~p ~`.t ~s~55| ~w ~`.t ~w ~69| ~`.t ~D~76|~n',
  423                    [Variant, Date, Hash, State, Properties.count])
  424           )),
  425    Skipped is VCount - 10,
  426    (   Skipped > 0
  427    ->  format('  (skipped ~D variants)~n', [Skipped])
  428    ;   true
  429    ).
  430
  431short_state(complete,     'C').
  432short_state(partial,      'P').
  433short_state(exception(_), 'E').
  434
  435short_hash(Hash, Variant, Short, Options) :-
  436    option(hash(long), Options),
  437    !,
  438    (   deep_predicate_hash(Variant, Hash)
  439    ->  string_concat(Hash, *, Short)
  440    ;   Short = Hash
  441    ).
  442short_hash(Hash, Variant, Short, _) :-
  443    sub_string(Hash, 0, 7, _, Short0),
  444    (   deep_predicate_hash(Variant, Hash)
  445    ->  string_concat(Short0, *, Short)
  446    ;   Short = Short0
  447    ).
  448
  449:- multifile prolog:error_message//1.  450
  451prolog:error_message(consistency_error(Goal, Template, First)) -->
  452    [ '~p yielded inconsistent results (~p \\=@= ~p)'-[Goal, Template, First] ].
  453prolog:error_message(specific_expected(Goal, Expected, _Hash)) -->
  454    [ '~p is not a specialization of ~p'-[Goal, Expected] ].
  455
  456:- multifile sandbox:safe_meta_predicate/1.  457
  458sandbox:safe_meta_predicate(cache_rocks:cached/1).
  459sandbox:safe_meta_predicate(cache_rocks:forget/1)