1:- module(bc_similarity, [
    2    bc_similar/3 % +Type, +Id, -List
    3]).

Entry similarity analysis */

    7:- use_module(library(docstore)).    8:- use_module(library(sort_dict)).    9
   10:- use_module(bc_search).   11:- use_module(bc_excerpt).
 bc_similar(+Type, +Id, -List) is det
Finds similar entries of the type for the given entry.
   18bc_similar(Type, Id, List):-
   19    must_be(atom, Type),
   20    must_be(atom, Id),
   21    similarity_list(Type, Id, List).
   22
   23similarity_list(Type, Id, Results):-
   24    (   ds_col_get(entry, Id, [tags], Entry)
   25    ->  ds_find(entry, (published=true, type=Type),
   26            [slug, tags, title, author, date_published,
   27            date_updated, description, language], Entries),
   28        include(common_tag(Entry.tags), Entries, WithCommonTag),
   29        exclude(same_entry(Id), WithCommonTag, Filtered),
   30        maplist(entry_cosine_similarity(Id), Filtered, Similarities),
   31        sort_dict(score, desc, Similarities, Sorted),
   32        maplist(add_extra_data, Sorted, Results)
   33    ;   throw(error(no_entry(Id), _))).
   34
   35% Adds excerpt.
   36
   37add_extra_data(Result, WithData):-
   38    ds_id(Result.entry, Id),
   39    ds_col_get(entry, Id, [html], Excerpt),
   40    bc_excerpt(Excerpt.html, 200, Text),
   41    ds_col_get(user, Result.entry.author,
   42        [fullname, link], Author),
   43    Entry = Result.entry.put(_{
   44        excerpt: Text,
   45        author: Author
   46    }),
   47    WithData = Result.put(entry, Entry).
   48
   49% Succeeds when the entry has
   50% the given id.
   51
   52same_entry(Id, Entry):-
   53    ds_id(Entry, Id).
   54
   55% Succeeds when the entry has a common
   56% tag with the list of tags.
   57
   58common_tag(Tags, Entry):-
   59    member(Tag, Tags),
   60    member(Tag, Entry.tags).
   61
   62% Finds the entry similarity score
   63% for the given tags. Gives back a dict.
   64
   65entry_cosine_similarity(Id1, Entry, Similarity):-
   66    ds_id(Entry, Id2),
   67    bc_cosine_similarity(Id1, Id2, Cosine),
   68    Similarity = _{ entry: Entry, score: Cosine }