1:- module(bc_analytics_db, [
    2    bc_analytics_user_ts/3,      % +Interval, +MinDuration, -Series
    3    bc_analytics_session_ts/3,   % +Interval, +MinDuration, -Series
    4    bc_analytics_pageview_ts/3,  % +Interval, +MinDuration, -Series
    5    bc_analytics_users/5,        % +Interval, +MinDuration, +Offset, +Count, -Users
    6    bc_analytics_sessions/5,     % +Interval, +MinDuration, +Offset, +Count, -Sessions
    7    bc_analytics_top_pages/3,    % +Interval, +MinDuration, -Pages
    8    bc_analytics_summary/3       % +Interval, +MinDuration, -Summary
    9]).

Generic visitor tracking analytics */

   13:- use_module(library(assoc)).   14:- use_module(library(error)).   15:- use_module(library(debug)).   16:- use_module(bc_env).   17:- use_module(bc_analytics).   18:- use_module(bc_analytics_read).   19:- use_module(bc_analytics_ts).   20
   21:- dynamic(analytics_cache/3).   22
   23% Validates the analytics interval.
   24% This is used for loading the analytics
   25% database and cache the loaded data.
   26
   27valid_interval(Interval):-
   28    Interval = (YearFrom, MonthFrom)-(YearTo, MonthTo),
   29    YearFrom >= 2018, YearFrom =< 2100,
   30    YearTo >= 2018, YearTo =< 2100,
   31    MonthFrom >= 1, MonthFrom =< 12,
   32    MonthTo >= 1, MonthTo =< 12, !.
   33
   34valid_interval(Interval):-
   35    throw(error(invalid_analytics_interval(Interval))).
   36
   37% Loads the analytics database into a module
   38% or uses the previously loaded database.
   39
   40analytics_module(Interval, Module):-
   41    with_mutex(analytics_cache,
   42        analytics_module_unsafe(Interval, Module)).
   43
   44analytics_module_unsafe(Interval, Module):-
   45    must_be(ground, Interval),
   46    valid_interval(Interval),
   47    (   analytics_cache(Interval, Module, _)
   48    ->  debug(bc_analytics,
   49            'Using cached analytics for ~w.', [Interval])
   50    ;   Interval = From-To,
   51        bc_analytics_flush_output,
   52        bc_analytics_read(From, To, Module),
   53        get_time(TimeStamp),
   54        assert_analytics_cache(Interval, Module, TimeStamp)).
   55
   56% Stores the cache entry only in the production
   57% environment.
   58
   59assert_analytics_cache(Interval, Module, TimeStamp):-
   60    bc_env_production, !,
   61    assertz(analytics_cache(Interval, Module, TimeStamp)).
   62
   63assert_analytics_cache(_, _, _).
   64
   65% Timeseries analytics.
   66
   67% Calculates the daily time series of the
   68% new user count.
   69
   70bc_analytics_user_ts(Interval, MinDuration, SeriesAsList):-
   71    analytics_module(Interval, Module),
   72    bc_analytics_ts_zero(Interval, ZeroSeries),
   73    findall(UserId, (
   74        call(Module:user(UserId)),
   75        call(Module:user_duration(UserId, Duration)),
   76        Duration >= MinDuration), AllUserIds),
   77    fill_user_ts(AllUserIds, Module, ZeroSeries, Series),
   78    bc_analytics_ts_list(Series, SeriesAsList).
   79
   80fill_user_ts([UserId|UserIds], Module, SeriesIn, SeriesOut):-
   81    call(Module:user_timestamp(UserId, TimeStamp)),
   82    bc_analytics_ts_incr(SeriesIn, TimeStamp, SeriesTmp),
   83    fill_user_ts(UserIds, Module, SeriesTmp, SeriesOut).
   84
   85fill_user_ts([], _, Series, Series).
   86
   87% Calculates the daily time series of the
   88% new session count.
   89
   90bc_analytics_session_ts(Interval, MinDuration, SeriesAsList):-
   91    analytics_module(Interval, Module),
   92    bc_analytics_ts_zero(Interval, ZeroSeries),
   93    findall(SessionId, (
   94        call(Module:session(SessionId)),
   95        call(Module:session_duration(SessionId, Duration)),
   96        Duration >= MinDuration), AllSessionIds),
   97    fill_session_ts(AllSessionIds, Module, ZeroSeries, Series),
   98    bc_analytics_ts_list(Series, SeriesAsList).
   99
  100fill_session_ts([SessionId|SessionIds], Module, SeriesIn, SeriesOut):-
  101    call(Module:session_timestamp(SessionId, TimeStamp)),
  102    bc_analytics_ts_incr(SeriesIn, TimeStamp, SeriesTmp),
  103    fill_session_ts(SessionIds, Module, SeriesTmp, SeriesOut).
  104
  105fill_session_ts([], _, Series, Series).
  106
  107% Calculates the daily time series of the
  108% pageview count. Only those pageviews from the
  109% sessions passing the minimum duration are considered.
  110
  111bc_analytics_pageview_ts(Interval, MinDuration, SeriesAsList):-
  112    analytics_module(Interval, Module),
  113    bc_analytics_ts_zero(Interval, ZeroSeries),
  114    findall(PageviewId, (
  115        call(Module:pageview(PageviewId)),
  116        call(Module:pageview_session(PageviewId, SessionId)),
  117        call(Module:session(SessionId)),
  118        call(Module:session_duration(SessionId, Duration)),
  119        Duration >= MinDuration), AllPageviewIds),
  120    fill_pageview_ts(AllPageviewIds, Module, ZeroSeries, Series),
  121    bc_analytics_ts_list(Series, SeriesAsList).
  122
  123fill_pageview_ts([PageviewId|PageviewIds], Module, SeriesIn, SeriesOut):-
  124    call(Module:pageview_timestamp(PageviewId, TimeStamp)),
  125    bc_analytics_ts_incr(SeriesIn, TimeStamp, SeriesTmp),
  126    fill_pageview_ts(PageviewIds, Module, SeriesTmp, SeriesOut).
  127
  128fill_pageview_ts([], _, Series, Series).
  129
  130% List of users that have spent more time on the site
  131% than the given minimum duration.
  132
  133bc_analytics_users(Interval, MinDuration, Offset, Count, Users):-
  134    analytics_module(Interval, Module),
  135    findall(UserId, (
  136        call(Module:user(UserId)),
  137        call(Module:user_duration(UserId, Duration)),
  138        Duration >= MinDuration), AllUserIds),
  139    reverse(AllUserIds, RecentFirstIds),
  140    sublist_offset_count(RecentFirstIds, Offset, Count, UserIds),
  141    maplist(user_data(Module), UserIds, Users).
  142
  143% List of sessions that are longer
  144% than the given minimum duration.
  145
  146bc_analytics_sessions(Interval, MinDuration, Offset, Count, Sessions):-
  147    analytics_module(Interval, Module),
  148    findall(SessionId, (
  149        call(Module:session(SessionId)),
  150        call(Module:session_duration(SessionId, Duration)),
  151        Duration >= MinDuration), AllSessionIds),
  152    sublist_offset_count(AllSessionIds, Offset, Count, SessionIds),
  153    maplist(session_data(Module), SessionIds, Sessions).
  154
  155% All given user sessions.
  156
  157bc_analytics_user_sessions(Module, UserId, Sessions):-
  158    findall(SessionId,
  159        call(Module:session_user(SessionId, UserId)), SessionIds),
  160    maplist(session_data(Module), SessionIds, Sessions).
  161
  162% All given session pageviews.
  163
  164bc_analytics_session_pageviews(Module, SessionId, Pageviews):-
  165    findall(PageviewId,
  166        call(Module:pageview_session(PageviewId, SessionId)), PageviewIds),
  167    maplist(pageview_data(Module), PageviewIds, Pageviews).
  168
  169% Extracts sublist by offset and count.
  170
  171sublist_offset_count(List, Offset, Count, Sublist):-
  172    UpperBound is Offset + Count,
  173    findall(Member, (
  174        nth0(Index, List, Member),
  175        Index >= Offset,
  176        Index < UpperBound), Sublist), !.
  177
  178% Top pages that were viewed in a session with
  179% the given minimum duration. Pages are identified
  180% by location paths.
  181
  182bc_analytics_top_pages(Interval, MinDuration, Pages):-
  183    analytics_module(Interval, Module),
  184    findall(Location, (
  185        call(Module:pageview_location(PageviewId, Location)),
  186        call(Module:pageview_session(PageviewId, SessionId)),
  187        call(Module:session_duration(SessionId, Duration)),
  188        Duration >= MinDuration), Locations),
  189    empty_assoc(Empty),
  190    fill_page_count_assoc(Locations, Empty, Assoc),
  191    assoc_to_list(Assoc, List),
  192    sort(2, @>=, List, Sorted),
  193    sublist_offset_count(Sorted, 0, 50, Top),
  194    maplist(top_page_data(Module), Top, Pages).
  195
  196fill_page_count_assoc([Location|Locations], AssocIn, AssocOut):-
  197    (   get_assoc(Location, AssocIn, Count)
  198    ->  NewCount is Count + 1,
  199        put_assoc(Location, AssocIn, NewCount, AssocTmp),
  200        fill_page_count_assoc(Locations, AssocTmp, AssocOut)
  201    ;   put_assoc(Location, AssocIn, 1, AssocTmp),
  202        fill_page_count_assoc(Locations, AssocTmp, AssocOut)).
  203
  204fill_page_count_assoc([], Assoc, Assoc).
  205
  206top_page_data(Module, Location-Count, Dict):-
  207    location_title(Module, Location, Title),
  208    Dict = _{
  209        location: Location,
  210        title: Title,
  211        count: Count}.
  212
  213location_title(Module, Location, Title):-
  214    call(Module:pageview_location(PageviewId, Location)),
  215    call(Module:pageview_title(PageviewId, Title)), !.
  216
  217% Analytics summary for the given period.
  218
  219bc_analytics_summary(Interval, MinDuration, Summary):-
  220    analytics_module(Interval, Module),
  221    pageview_count(Module, MinDuration, PageviewCount),
  222    session_count(Module, MinDuration, SessionCount),
  223    user_count(Module, MinDuration, UserCount),
  224    Summary = summary{
  225        pageview_count: PageviewCount,
  226        session_count: SessionCount,
  227        user_count: UserCount}.
  228
  229% TODO: just count solutions.
  230
  231pageview_count(Module, MinDuration, Count):-
  232    findall(_, (
  233        call(Module:pageview_session(_, SessionId)),
  234        call(Module:session_duration(SessionId, Duration)),
  235        Duration >= MinDuration), Pageviews),
  236    length(Pageviews, Count).
  237
  238session_count(Module, MinDuration, Count):-
  239    findall(_, (
  240        call(Module:session_duration(_, Duration)),
  241        Duration >= MinDuration), Sessions),
  242    length(Sessions, Count).
  243
  244user_count(Module, MinDuration, Count):-
  245    findall(_, (
  246        call(Module:user_duration(_, Duration)),
  247        Duration >= MinDuration), Users),
  248    length(Users, Count).
  249
  250% Turns user id into a data dict containing
  251% information about the user.
  252
  253user_data(Module, UserId, Dict):-
  254    call(Module:user_duration(UserId, Duration)),
  255    call(Module:user_timestamp(UserId, TimeStamp)),
  256    call(Module:user_session_count(UserId, SessionCount)),
  257    Dict = user{
  258        user_id: UserId,
  259        duration: Duration,
  260        timestamp: TimeStamp,
  261        session_count: SessionCount}.
  262
  263% Turns user id into a data dict containing
  264% information about the user.
  265
  266session_data(Module, SessionId, Dict):-
  267    call(Module:session_user(SessionId, UserId)),
  268    call(Module:session_duration(SessionId, Duration)),
  269    call(Module:session_timestamp(SessionId, TimeStamp)),
  270    call(Module:session_pagecount(SessionId, PageCount)),
  271    call(Module:session_agent(SessionId, Agent)),
  272    call(Module:session_platform(SessionId, Platform)),
  273    Dict = session{
  274        session_id: SessionId,
  275        user_id: UserId,
  276        duration: Duration,
  277        timestamp: TimeStamp,
  278        pagecount: PageCount,
  279        agent: Agent,
  280        platform: Platform}.
  281
  282% Turns pageview id into a data dict containing
  283% information about the pageview.
  284
  285pageview_data(Module, PageviewId, Dict):-
  286    call(Module:pageview_duration(PageviewId, Duration)),
  287    call(Module:pageview_timestamp(PageviewId, TimeStamp)),
  288    call(Module:pageview_location(PageviewId, Location)),
  289    call(Module:pageview_referrer(PageviewId, Referrer)),
  290    call(Module:pageview_title(PageviewId, Title)),
  291    call(Module:pageview_entry(PageviewId, EntryId)),
  292    Dict = pageview{
  293        pageview_id: PageviewId,
  294        duration: Duration,
  295        timestamp: TimeStamp,
  296        location: Location,
  297        referrer: Referrer,
  298        title: Title,
  299        entry_id: EntryId}.
  300
  301% Sleep time setting for the
  302% cache invalidation queue thread.
  303
  304cache_thread_sleep(60).
  305
  306start_cache_thread:-
  307    debug(bc_analytics, 'Started analytics invalidation thread', []),
  308    cache_loop.
  309
  310% Tail-call optimized loop.
  311
  312cache_loop:-
  313    cache_loop_iteration,
  314    cache_thread_sleep(Sleep),
  315    sleep(Sleep),
  316    cache_loop.
  317
  318cache_loop_iteration:-
  319    findall(Module-TimeStamp,
  320        analytics_cache(_, Module, TimeStamp), Entries),
  321    include(expired_cache, Entries, Expired),
  322    maplist(invalidate_expired, Expired).
  323
  324expired_cache(_-TimeStamp):-
  325    get_time(CurrentTime),
  326    CurrentTime > TimeStamp + 1800.
  327
  328invalidate_expired(Entry):-
  329    with_mutex(analytics_cache,
  330        invalidate_expired_unsafe(Entry)).
  331
  332invalidate_expired_unsafe(Module-_):-
  333    debug(bc_analytics, 'Invalidating analytics cache ~w.', [Module]),
  334    retractall(analytics_cache(_, Module, _)),
  335    clear_module(Module).
  336
  337clear_module(Module):-
  338    PredicateIndicator = Module:_,
  339    forall(current_predicate(PredicateIndicator),
  340        abolish(PredicateIndicator)).
  341
  342% Starts the cache invalidation thread.
  343
  344:- thread_create(start_cache_thread, _, []).