1:- module(bc_analytics_read, [
    2    bc_analytics_read/3 % +From, +To, -Module
    3]).

Generic visitor tracking analytics */

    7:- use_module(library(debug)).    8:- use_module(library(error)).    9:- use_module(library(gensym)).   10:- use_module(library(pcre)).   11:- use_module(library(gensym)).   12:- use_module(bc_analytics).   13
   14% Reads analytics data into the given module.
   15% From and To are terms in the form: (Year, Month).
   16
   17bc_analytics_read(From, To, Module):-
   18    must_be(ground, From),
   19    must_be(ground, To),
   20    gensym(analytics_cache_module_, Module),
   21    dynamic(Module:user/1),
   22    dynamic(Module:user_pixel/1),
   23    dynamic(Module:user_duration/2),
   24    dynamic(Module:user_timestamp/2),
   25    dynamic(Module:user_session_count/2),
   26    dynamic(Module:user_pagecount/2),
   27    dynamic(Module:session/1),
   28    dynamic(Module:session_pixel/1),
   29    dynamic(Module:session_user/2),
   30    dynamic(Module:session_duration/2),
   31    dynamic(Module:session_pagecount/2),
   32    dynamic(Module:session_agent/2),
   33    dynamic(Module:session_platform/2),
   34    dynamic(Module:pageview/1),
   35    dynamic(Module:pageview_pixel/1),
   36    dynamic(Module:pageview_session/2),
   37    dynamic(Module:pageview_duration/2),
   38    dynamic(Module:pageview_timestamp/2),
   39    dynamic(Module:pageview_location/2),
   40    dynamic(Module:pageview_referrer/2),
   41    dynamic(Module:pageview_title/2),
   42    dynamic(Module:pageview_entry/2),
   43    findall(Name, file_name(From, To, Name), Names),
   44    maplist(read_file_into(Module), Names),
   45    compute_session_pagecounts(Module),
   46    compute_session_durations(Module),
   47    compute_user_session_counts(Module),
   48    compute_user_pagecounts(Module),
   49    compute_user_durations(Module). 
   50
   51read_file_into(Module, File):-
   52    debug(bc_analytics, 'Reading file ~w into module ~w.', [File, Module]),
   53    setup_call_cleanup(
   54        open(File, read, Stream, [encoding('utf8')]),
   55        read_stream_into(Module, Stream),
   56        close(Stream)).
   57
   58% Reads and loads the terms from the given
   59% file into the target module.
   60
   61read_stream_into(Module, Stream):-
   62    catch(
   63        read_term(Stream, Term, [dotlists(true)]),
   64        E, true),
   65    (   var(E)
   66    ->  (   Term = end_of_file
   67        ->  true
   68        ;   load_term_into(Module, Term),
   69            read_stream_into(Module, Stream))
   70    ;   true).
   71
   72load_term_into(Module, Term):-
   73    is_dict(Term, Tag),
   74    load_dict_term_into(Tag, Module, Term).
   75
   76load_dict_term_into(user, Module, Dict):- !,
   77    UserId = Dict.user_id,
   78    assertz(Module:user(UserId)),
   79    assertz(Module:user_duration(UserId, 0)),
   80    assertz(Module:user_timestamp(UserId, Dict.timestamp)),
   81    assertz(Module:user_session_count(UserId, 0)),
   82    assertz(Module:user_pagecount(UserId, 0)).
   83
   84load_dict_term_into(session, Module, Dict):-
   85    \+ re_match("crawler|bot|spider"/i, Dict.agent),
   86    UserId = Dict.user_id,
   87    call(Module:user(UserId)), !,
   88    SessionId = Dict.session_id,
   89    assertz(Module:session(SessionId)),
   90    assertz(Module:session_user(SessionId, UserId)),
   91    assertz(Module:session_duration(SessionId, 0)),
   92    assertz(Module:session_pagecount(SessionId, 0)),
   93    assertz(Module:session_timestamp(SessionId, Dict.timestamp)),
   94    assertz(Module:session_agent(SessionId, Dict.agent)),
   95    assertz(Module:session_platform(SessionId, Dict.platform)).
   96
   97load_dict_term_into(pageview, Module, Dict):-
   98    SessionId = Dict.session_id,
   99    call(Module:session(SessionId)), !,
  100    PageviewId = Dict.pageview_id,
  101    assertz(Module:pageview(PageviewId)),
  102    assertz(Module:pageview_session(PageviewId, SessionId)),
  103    assertz(Module:pageview_duration(PageviewId, 0)),
  104    assertz(Module:pageview_timestamp(PageviewId, Dict.timestamp)),
  105    assertz(Module:pageview_location(PageviewId, Dict.location)),
  106    assertz(Module:pageview_referrer(PageviewId, Dict.referrer)),
  107    assertz(Module:pageview_title(PageviewId, Dict.title)),
  108    assertz(Module:pageview_entry(PageviewId, Dict.entry_id)).
  109
  110load_dict_term_into(pageview_extend, Module, Dict):-
  111    PageviewId = Dict.pageview_id,
  112    call(Module:pageview(PageviewId)), !,
  113    retractall(Module:pageview_duration(PageviewId, _)),
  114    assertz(Module:pageview_duration(PageviewId, Dict.elapsed)).
  115
  116load_dict_term_into(pixel, Module, Dict):-
  117    _{
  118        user_id: UserId,
  119        agent: Agent,
  120        platform: Platform,
  121        session_id: SessionId,
  122        location: Location,
  123        referrer: Referrer,
  124        entry_id: EntryId,
  125        title: Title,
  126        timestamp: TimeStamp
  127    } :< Dict,
  128    load_pixel_user(Module, UserId, SessionId, TimeStamp),
  129    load_pixel_session(Module, UserId, SessionId, TimeStamp, Agent, Platform),
  130    load_pixel_pageview(Module, SessionId, TimeStamp, Location, Referrer, Title, EntryId).
  131
  132load_dict_term_into(_, _, _).
  133
  134% Loads user data from a pixel tracking event.
  135% Updates user duration, page and session count.
  136
  137load_pixel_user(Module, UserId, SessionId, TimeStamp):-
  138    call(Module:user(UserId)), !,
  139    call(Module:user_timestamp(UserId, OldTimeStamp)),
  140    call(Module:user_pagecount(UserId, OldPageCount)),
  141    call(Module:user_session_count(UserId, OldSessionCount)),
  142    Duration is TimeStamp - OldTimeStamp,
  143    PageCount is OldPageCount + 1,
  144    (   call(Module:session_user(SessionId, UserId))
  145    ->  SessionCount = OldSessionCount
  146    ;   SessionCount is OldSessionCount + 1),
  147    retractall(Module:user_duration(UserId, _)),
  148    retractall(Module:user_pagecount(UserId, _)),
  149    retractall(Module:user_session_count(UserId, _)),
  150    assertz(Module:user_duration(UserId, Duration)),
  151    assertz(Module:user_pagecount(UserId, PageCount)),
  152    assertz(Module:user_session_count(UserId, SessionCount)).
  153
  154% Loads user data from a pixel tracking event.
  155% Sets initial duration, page and session count.
  156
  157load_pixel_user(Module, UserId, _, TimeStamp):-    
  158    assertz(Module:user(UserId)),
  159    assertz(Module:user_pixel(UserId)),
  160    assertz(Module:user_duration(UserId, 0)),
  161    assertz(Module:user_timestamp(UserId, TimeStamp)),
  162    assertz(Module:user_session_count(UserId, 1)),
  163    assertz(Module:user_pagecount(UserId, 1)).
  164
  165% Loads session data from a pixel tracking event.
  166% Updates session duration and page count.
  167
  168load_pixel_session(Module, _, SessionId, TimeStamp, _, _):-
  169    call(Module:session(SessionId)), !,
  170    call(Module:session_pagecount(SessionId, OldPageCount)),
  171    call(Module:session_timestamp(SessionId, OldTimeStamp)),
  172    Duration is TimeStamp - OldTimeStamp,
  173    PageCount is OldPageCount + 1,
  174    retractall(Module:session_duration(SessionId, _)),
  175    retractall(Module:session_pagecount(SessionId, _)),    
  176    assertz(Module:session_pagecount(SessionId, PageCount)),
  177    assertz(Module:session_duration(SessionId, Duration)).
  178
  179% Loads session data from a pixel tracking event.
  180% Sets initial session duration and page count, user agent and platform.
  181
  182load_pixel_session(Module, UserId, SessionId, TimeStamp, Agent, Platform):-
  183    assertz(Module:session(SessionId)),
  184    assertz(Module:session_pixel(SessionId)),
  185    assertz(Module:session_user(SessionId, UserId)),
  186    assertz(Module:session_duration(SessionId, 0)),
  187    assertz(Module:session_pagecount(SessionId, 0)),
  188    assertz(Module:session_timestamp(SessionId, TimeStamp)),
  189    assertz(Module:session_agent(SessionId, Agent)),
  190    assertz(Module:session_platform(SessionId, Platform)).
  191
  192% Loads pageview data from pixel tracing event.
  193
  194load_pixel_pageview(Module, SessionId, TimeStamp, Location, Referrer, Title, EntryId):-
  195    gensym(pv_, PageviewId),
  196    assertz(Module:pageview(PageviewId)),
  197    assertz(Module:pageview_session(PageviewId, SessionId)),
  198    assertz(Module:pageview_duration(PageviewId, 0)),
  199    assertz(Module:pageview_timestamp(PageviewId, TimeStamp)),
  200    assertz(Module:pageview_location(PageviewId, Location)),
  201    assertz(Module:pageview_referrer(PageviewId, Referrer)),
  202    assertz(Module:pageview_title(PageviewId, Title)),
  203    assertz(Module:pageview_entry(PageviewId, EntryId)).
  204
  205% Computes total session durations from pageview
  206% durations.
  207
  208compute_session_durations(Module):-
  209    findall(SessionId, (
  210        call(Module:session(SessionId)),
  211        \+ call(Module:session_pixel(SessionId))
  212    ), Sessions),
  213    maplist(compute_session_duration(Module), Sessions).
  214
  215compute_session_duration(Module, SessionId):-
  216    findall(Duration, (
  217        call(Module:pageview_session(PageviewId, SessionId)),
  218        call(Module:pageview_duration(PageviewId, Duration))
  219    ), Durations),
  220    sum_list(Durations, Total),
  221    retractall(Module:session_duration(SessionId, _)),
  222    assertz(Module:session_duration(SessionId, Total)).
  223
  224% Computes total user durations from session durations.
  225
  226compute_user_durations(Module):-
  227    findall(UserId, (
  228        call(Module:user(UserId)),
  229        \+ call(Module:user_pixel(UserId))
  230    ), Users),
  231    maplist(compute_user_duration(Module), Users).
  232
  233compute_user_duration(Module, UserId):-
  234    findall(Duration, (
  235        call(Module:session_user(SessionId, UserId)),
  236        call(Module:session_duration(SessionId, Duration))), Durations),
  237    sum_list(Durations, Total),
  238    retractall(Module:user_duration(UserId, _)),
  239    assertz(Module:user_duration(UserId, Total)).
  240
  241% Computes total user page views from the sum of
  242% session page views.
  243
  244compute_user_pagecounts(Module):-
  245    findall(UserId, (
  246        call(Module:user(UserId)),
  247        \+ call(Module:user_pixel(UserId))
  248    ), Users),
  249    maplist(compute_user_pagecount(Module), Users).
  250
  251compute_user_pagecount(Module, UserId):-
  252    findall(PageCount, (
  253        call(Module:session_user(SessionId, UserId)),
  254        call(Module:session_pagecount(SessionId, PageCount))), PageCounts),
  255    sum_list(PageCounts, Total),
  256    retractall(Module:user_pagecount(UserId, _)),
  257    assertz(Module:user_pagecount(UserId, Total)).
  258
  259% Computes the number of sessions for the user.
  260
  261compute_user_session_counts(Module):-
  262    findall(UserId, (
  263        call(Module:user(UserId)),
  264        \+ call(Module:user_pixel(UserId))
  265    ), Users),
  266    maplist(compute_user_session_count(Module), Users).
  267
  268compute_user_session_count(Module, UserId):-
  269    findall(_, call(Module:session_user(_, UserId)), List),
  270    length(List, SessionCount),
  271    retractall(Module:user_session_count(UserId, _)),
  272    assertz(Module:user_session_count(UserId, SessionCount)).
  273
  274% Computes the number of pagecounts for the sessions.
  275
  276compute_session_pagecounts(Module):-
  277    findall(SessionId, (
  278        call(Module:session(SessionId)),
  279        \+ call(Module:session_pixel(SessionId))
  280    ), Sessions),
  281    maplist(compute_session_pagecount(Module), Sessions).
  282
  283compute_session_pagecount(Module, SessionId):-
  284    findall(_, call(Module:pageview_session(_, SessionId)), List),
  285    length(List, PageCount),
  286    retractall(Module:session_pagecount(SessionId, PageCount)),
  287    assertz(Module:session_pagecount(SessionId, PageCount)).
  288
  289file_name(From, To, File):-
  290    From = (YearFrom, MonthFrom),
  291    To = (YearTo, MonthTo),
  292    between(YearFrom, YearTo, Year),
  293    between(1, 12, Month),
  294    (   Year = YearFrom
  295    ->  Month >= MonthFrom
  296    ;   true),
  297    (   Year = YearTo
  298    ->  Month =< MonthTo
  299    ;   true),
  300    bc_month_file_name(Year, Month, File),
  301    exists_file(File)