1/* cplint_r.pl
    2 *
    3 * Copyright (c) 2016-2017 Franco Masotti (franco.masotti@student.unife.it)
    4 *
    5 * This is free software: you can redistribute it and/or modify it under the 
    6 * terms of the Artistic License 2.0 as published by The Perl Foundation.
    7 * 
    8 * This source is distributed in the hope that it will be useful, but WITHOUT 
    9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
   10 * FITNESS FOR A PARTICULAR PURPOSE. See the Artistic License 2.0 for more 
   11 * details.
   12 *
   13 * You should have received a copy of the Artistic License 2.0 along the source 
   14 * as a LICENSE file. If not, obtain it from 
   15 * http://www.perlfoundation.org/artistic_license_2_0.
   16 */
   17
   18/* Interface */
   19
   20:- module(cplint_r,
   21          [ build_xy_list/3,
   22            r_row/3,
   23            get_set_from_xy_list/2,
   24            prob_bar_r/1,
   25            prob_bar_r/2,
   26            mc_prob_bar_r/1,
   27            mc_sample_bar_r/2,
   28            mc_sample_arg_bar_r/3,
   29            mc_sample_arg_first_bar_r/3,
   30            mc_rejection_sample_arg_bar_r/4,
   31            mc_mh_sample_arg_bar_r/5,
   32            mc_mh_sample_arg_bar_r/6,
   33            histogram_r/2,
   34            density_r/1,
   35            densities_r/2,
   36            compute_areas_diagrams_r/3,
   37            test_r/5
   38          ]).   39
   40
   41/* Dependencies */
   42
   43:- use_module(library(r/r_call)).   44:- use_module(library(r/r_data)).   45:- use_module(library(lists)).   46:- use_module(library(pita)).   47:- use_module(library(mcintyre)).   48:- use_module(library(auc)).   49:- use_module(library(slipcover)).   50
   51/* Optional module */
   52:- use_module(swish(lib/r_swish)).   53
   54/* Meta predicate definitions. */
   55
   56:-meta_predicate prob_bar_r(:).   57:-meta_predicate prob_bar_r(:,:).   58:-meta_predicate mc_prob_bar_r(:).   59:-meta_predicate mc_sample_bar_r(:,+).   60:-meta_predicate mc_sample_arg_bar_r(:,+,+).   61:-meta_predicate mc_sample_arg_first_bar_r(:,+,+).   62:-meta_predicate mc_rejection_sample_arg_bar_r(:,:,+,+).   63:-meta_predicate mc_mh_sample_arg_bar_r(:,:,+,+,+).   64:-meta_predicate mc_mh_sample_arg_bar_r(:,:,+,+,+,+).   65:-meta_predicate test_r(:,+,-,-,-).   66
   67:- multifile sandbox:safe_primitive/1.   68sandbox:safe_primitive(current_predicate(_)).
   69
   70/*********** 
   71 * Helpers *
   72 ***********/
   73
   74check_modules :-
   75    current_module(r_call),
   76    current_module(r_data),
   77    current_module(lists),
   78    current_module(pita),
   79    current_module(mcintyre),
   80    current_module(auc),
   81    current_module(slipcover).
   82
   83check_modules :-
   84    writeln("ERROR: Library/ies missing"),
   85    abort.
   86
   87/* Debug purposes
   88 *
   89 *  use_rendering(table). % should be available by default.
   90 *  <- df. % name of a data frame.
   91 */
   92
   93load_r_libraries :-
   94    /* To enable pdf output instead of using the default plotting window, add 
   95     * the following command
   96     */
   97    /* <- pdf("plot.pdf"), */
   98    check_modules,
   99    !,
  100    <- library("ggplot2").
  101
  102/* Only do if library(r_swish) exists, otherwise return true. */
  103finalize_r_graph :-
  104    current_predicate(r_download/0),
  105    !,
  106    r_download.
  107
  108finalize_r_graph :-
  109    true.
  110
  111bin_width(Min,Max,NBins,Width) :-
  112    D is Max-Min,
  113    Width is D/NBins.
 build_xy_list(+X:list, +Y:list, -Out:list) is det
Given to lists X and Y build an output list Out in the form [X1-Y1,X2-Y2,...,XN-YN]. /
  121build_xy_list([], [], []).
  122
  123build_xy_list([XH|XT], [YH|YT], [XH-YH|Out]) :-
  124        build_xy_list(XT, YT, Out).
 r_row(+X:atom, +Y:atom, -Out:atom) is det
Given two atoms X and Y, build the term r(X,Y) in Out. /
  131r_row(X,Y,r(X,Y)).
 get_set_from_xy_list(+L:list, -R:list) is det
Given an input list L in the form [X1-Y1,X2-Y2,...,XN-YN], transform it in an output list R in the form [r(X1,Y1),r(X2,Y2),...,r(XN,YN)]. This means that R will contain an X-Y relationship which can be then passed to an R data frame. /
  141get_set_from_xy_list(L,R) :-
  142    maplist(key,L,X),
  143    maplist(y,L,Y),
  144    maplist(r_row,X,Y,R).
  145
  146
  147/*******************************************
  148 * Plot predicates *************************
  149 *******************************************
  150 * cd swish/examples/inference *************
  151 * grep -l "<predicate_name>(" *.pl | less *
  152 *******************************************/
  153
  154/* pita */
  155
  156
  157/* Scale between 0 and 1 with 10 ticks (0.1,0.2,...,1)
  158 * This represents a probability between 0 and 1.
  159 */
  160geom_prob_bar(PTrue,PFalse) :-
  161    X=['T','F'],
  162    Y=[PTrue,PFalse],
  163    build_xy_list(X,Y,L),
  164    get_set_from_xy_list(L,R),
  165    r_data_frame_from_rows(df1, R),
  166    colnames(df1) <- c("names", "prob"),    df <- data.frame(        ids=as.character(df1$names),        probabilities=c(df1$prob)    ),    <- ggplot(        data=df,        aes(            x=ids,            y=probabilities,            fill=ids        )    ) + geom_bar(        stat="identity",        width=0.5    )    + scale_y_continuous(        breaks=seq(0,1,0.1)    )    + coord_flip(        ylim=c(0,1)    )    + theme(        aspect.ratio=1/2    ).
 prob_bar_r(:Query:atom) is nondet
The predicate computes and plots the probability of Query as a bar chart with a bar for the probability of Query true and a bar for the probability of Query false. If Query is not ground, it returns in backtracking all ground instantiations of Query together with their probabilities /
  201prob_bar_r(M:Goal) :-
  202    load_r_libraries,
  203    s(M:Goal,PT),
  204    PF is 1.0-PT,
  205    geom_prob_bar(PT,PF),
  206    finalize_r_graph.
  207/*
  208 * PF is 1.0-PT i.e:
  209 * Probability False = 1 - Probability True. 
  210 */
 prob_bar_r(:Query:atom, :Evidence:atom) is nondet
The predicate computes the probability of the Query given Evidence as a bar chart with a bar for the probability of Query true and a bar for the probability of Query false given Evidence. If Query /Evidence are not ground, it returns in backtracking all ground instantiations of Query/Evidence together with their probabilities /
  224prob_bar_r(M:Goal,M:Evidence):-
  225    load_r_libraries,
  226    prob(M:Goal,M:Evidence,PT),
  227    PF is 1.0-PT,
  228    geom_prob_bar(PT,PF),
  229    finalize_r_graph.
  230
  231/* mcintyre */
 mc_prob_bar_r(:Query:atom) is det
See prob_bar. /
  239mc_prob_bar_r(M:Goal):-
  240    load_r_libraries,
  241    mc_prob(M:Goal,PT),
  242    PF is 1.0-PT,
  243    geom_prob_bar(PT,PF),
  244    finalize_r_graph.
  245
  246
  247geom_mc_sample_bar(PTrue,PFalse) :-
  248    X=['T','F'],
  249    Y=[PTrue,PFalse],
  250    build_xy_list(X,Y,L),
  251    get_set_from_xy_list(L,R),
  252    r_data_frame_from_rows(df1, R),
  253    colnames(df1) <- c("names", "prob"),    df <- data.frame(        ids=as.character(df1$names),        probabilities=c(df1$prob)    ),    <- ggplot(        data=df,        aes(            x=ids,            y=probabilities,            fill=ids        )    ) + geom_bar(        stat="identity",        width=0.5    )    + coord_flip()    + theme(aspect.ratio=1/2).
 mc_sample_bar_r(:Query:atom, +Samples:int) is det
The predicate samples Query a number of Samples times and plots a bar chart with a bar for the number of successes and a bar for the number of failures. If Query is not ground, it considers it as an existential query. /
  280mc_sample_bar_r(M:Goal,S):-
  281    load_r_libraries,
  282    mc_sample(M:Goal,S,T,F,_P),
  283    geom_mc_sample_bar(T,F),
  284    finalize_r_graph.
  285
  286
  287/* Differences from the previous predicates:
  288 * =========================================
  289 *
  290 * Transform names column into a string column. 
  291 *
  292 * The use of max/1 instead of 'NA'/0
  293 * is a hack (because NA does not work).
  294 *
  295 * Reorder by decreasing frequency.
  296 */
  297geom_mc_sample_arg_bar(L) :-
  298    get_set_from_xy_list(L,R),
  299    r_data_frame_from_rows(df1, R),
  300    colnames(df1) <- c("names", "prob"),    df <- data.frame(        ids=as.character(df1$names),        probabilities=c(df1$prob)    ),    scalingthresholD <- 20,    {|r||xbreakS <- if(max(df$probabilities) > scalingthresholD) element_blank() else scale_y_continuous(breaks=seq(0,max(df$probabilities),1))|},    <- ggplot(        data=df,        aes(            x=reorder(                ids,                probabilities            ),            y=probabilities        )    ) + geom_bar(        stat="identity",        width=0.5    ) + xbreakS + coord_flip(        ylim=c(0,max(df$probabilities))    ) + theme(        axis.title.y=element_blank()    ).
 mc_sample_arg_bar_r(:Query:atom, +Samples:int, ?Arg:var) is det
The predicate samples Query Samples times. Arg should be a variable in Query. The predicate plots a bar chart with a bar for each possible value of L, the list of values of Arg for which Query succeeds in a world sampled at random. The size of the bar is the number of samples returning that list of values. /
  337mc_sample_arg_bar_r(M:Goal,S,Arg):-
  338    load_r_libraries,
  339    mc_sample_arg(M:Goal,S,Arg,ValList0),
  340    maplist(to_atom,ValList0,ValList),
  341    geom_mc_sample_arg_bar(ValList),
  342    finalize_r_graph.
  343
  344
  345geom_mc_sample_arg_first_bar(L) :-
  346    get_set_from_xy_list(L,R),
  347    r_data_frame_from_rows(df1, R),
  348    colnames(df1) <- c("names", "prob"),    df <- data.frame(        ids=as.character(df1$names),        probabilities=c(df1$prob)    ),    <- ggplot(        data=df,        aes(            x=reorder(                ids,                probabilities            ),            y=probabilities        )    ) + geom_bar(        stat="identity",        width=0.5    )    + coord_flip() + theme(        axis.title.y=element_blank()    ).
 mc_sample_arg_first_bar_r(:Query:atom, +Samples:int, ?Arg:var) is det
The predicate samples Query Samples times. Arg should be a variable in Query. The predicate plots a bar chart with a bar for each value of Arg returned as a first answer by Query in a world sampled at random. The size of the bar is the number of samples that returned that value. The value is failure if the query fails. /
  381mc_sample_arg_first_bar_r(M:Goal,S,Arg):-
  382    load_r_libraries,
  383    mc_sample_arg_first(M:Goal,S,Arg,ValList0),
  384    maplist(to_atom,ValList0,ValList),
  385    geom_mc_sample_arg_first_bar(ValList),
  386    finalize_r_graph.
  387  
  388
  389geom_mc_rejection_sample_arg_bar(L) :-
  390    geom_mc_sample_arg_first_bar(L).
 mc_rejection_sample_arg_bar_r(:Query:atom, :Evidence:atom, +Samples:int, ?Arg:var) is det
The predicate calls mc_rejection_sample_arg/5 and builds an R graph of the results. It plots a bar chart with a bar for each possible value of L, the list of values of Arg for which Query succeeds given that Evidence is true The size of the bar is the number of samples returning that list of values. /
  403mc_rejection_sample_arg_bar_r(M:Goal,M:Ev,S,Arg):-
  404    load_r_libraries,
  405    mc_rejection_sample_arg(M:Goal,M:Ev,S,Arg,ValList0),
  406    maplist(to_atom,ValList0,ValList),
  407    geom_mc_rejection_sample_arg_bar(ValList),
  408    finalize_r_graph.
  409
  410
  411geom_mc_mh_sample_arg_bar(L) :-
  412    geom_mc_sample_arg_first_bar(L).
 mc_mh_sample_arg_bar_r(:Query:atom, :Evidence:atom, +Samples:int, +Mix:int, +Lag:int, ?Arg:var) is det
The predicate calls mc_mh_sample_arg/7 and builds an R graph of the results. The predicate plots a bar chart with a bar for each possible value of L, the list of values of Arg for which Query succeeds in a world sampled at random. The size of the bar is the number of samples returning that list of values. /
  426mc_mh_sample_arg_bar_r(M:Goal,M:Ev,S,Mix,L,Arg):-
  427    load_r_libraries,
  428    mc_mh_sample_arg(M:Goal,M:Ev,S,Mix,L,Arg,ValList0),
  429    maplist(to_atom,ValList0,ValList),
  430    geom_mc_mh_sample_arg_bar(ValList),
  431    finalize_r_graph.
 mc_mh_sample_arg_bar_r(:Query:atom, :Evidence:atom, +Samples:int, +Lag:int, ?Arg:var) is det
The predicate call mc_mh_sample_arg/6 and builds a R graph of the results. The predicate plots a bar chart with a bar for each possible value of L, the list of values of Arg for which Query succeeds in a world sampled at random. The size of the bar is the number of samples returning that list of values. /
  446mc_mh_sample_arg_bar_r(M:Goal,M:Ev,S,L,Arg):-
  447    load_r_libraries,
  448    mc_mh_sample_arg(M:Goal,M:Ev,S,L,Arg,ValList0),
  449    maplist(to_atom,ValList0,ValList),
  450    geom_mc_mh_sample_arg_bar(ValList),
  451    finalize_r_graph.
  452
  453
  454geom_histogram(L,Min,Max,BinWidth) :-
  455    binwidtH <- BinWidth,    get_set_from_xy_list(L,R),    r_data_frame_from_rows(df, R),    colnames(df) <- c("x", "y"),    miN <- Min,    maX <- Max,    <- ggplot(        data=df,        aes_string(            x="x"        )    ) + geom_histogram(        weight="y",        binwidth=binwidtH    ) + xlim(        miN,        maX    ) + theme(        axis.title.x=element_blank()    ).
 histogram_r(+List:list, +NBins:int) is det
Draws a histogram of the samples in List dividing the domain in NBins bins. List must be a list of couples of the form [V]-W or V-W where V is a sampled value and W is its weight. /
  483histogram_r(L0,NBins) :-
  484    load_r_libraries,
  485    maplist(to_pair,L0,L1),
  486    maplist(key,L1,L2),
  487    max_list(L2,Max),
  488    min_list(L2,Min),
  489    histogram_r(L0,NBins,Min,Max),
  490    finalize_r_graph.
 histogram_r(+List:list, +NBins:int, +Min:float, +Max:float) is det
Draws a histogram of the samples in List dividing the domain in NBins bins. List must be a list of couples of the form [V]-W or V-W where V is a sampled value and W is its weight. The minimum and maximum values of the domains must be provided. /
  500histogram_r(L0,NBins,Min,Max) :-
  501    maplist(to_pair,L0,L1),
  502    keysort(L1,L),
  503    bin_width(Min,Max,NBins,BinWidth),
  504    geom_histogram(L,Min,Max,BinWidth).
  505
  506
  507geom_density(L) :-
  508    get_set_from_xy_list(L,R),
  509    r_data_frame_from_rows(df, R),
  510    colnames(df) <- c("x", "y"),    <- ggplot(        data=df,        aes(x)    ) + geom_density(            aes(                fill="density",                weights=y            ),            alpha=0.5    ) + theme(        legend.title = element_blank(),        axis.title.x=element_blank()    ).
 density_r(+List:list) is det
Display a smooth density estimate of a sets of samples. The samples are in List as couples V-W where V is a value and W its weigth. /
  532density_r(Post0) :-
  533     load_r_libraries,
  534     maplist(to_pair,Post0,Post),
  535     geom_density(Post),
  536     finalize_r_graph.
  537
  538
  539geom_densities(LPr,LPo) :-
  540    get_set_from_xy_list(LPr,R1),
  541    get_set_from_xy_list(LPo,R2),
  542    r_data_frame_from_rows(df1, R1),
  543    r_data_frame_from_rows(df2, R2),
  544    colnames(df1) <- c("x1", "y1"),    colnames(df2) <- c("x2", "y2"),    df <- data.frame(        x1=df1$x1,        x2=df2$x2,        y1=df1$y1,        y2=df2$y2    ),    alphA <- 0.5,    <- ggplot(        data=df    ) + geom_density(            aes(                x=x1,                fill="pre",                weights=y1            ),            alpha=alphA    ) + geom_density(            aes(                x=x2,                fill="post",                weights=y2            ),            alpha=alphA    ) + theme(        legend.title = element_blank(),        axis.title.x=element_blank()    ).
 densities_r(+PriorList:list, +PostList:list) is det
Display a smooth density estimate of two sets of samples, usually prior and post observations. The samples from the prior are in PriorList while the samples from the posterior are in PostList as couples V-W where V is a value and W its weigth. /
  582densities_r(Pri0,Post0) :-
  583    load_r_libraries,
  584    maplist(to_pair,Pri0,Pri1),
  585    maplist(to_pair,Post0,Post1),
  586    geom_densities(Pri1,Post1),
  587    finalize_r_graph.
  588
  589/* auc */
  590
  591geom_compute_areas_diagram(L,Title,XName,YName) :-
  592    get_set_from_xy_list(L,R),
  593    r_data_frame_from_rows(df, R),
  594    titlE <- as.character(Title),    labelS <- labs(title = titlE, x=XName, y=YName),    colnames(df) <- c("x", "y"),    <- ggplot(        data=df,        aes_string(            x="x",            y="y",            group=1        )    ) + geom_line()    + geom_point()    + scale_x_continuous(        limits=c(0,1),        breaks=seq(0,1,0.1)    )    + scale_y_continuous(        limits=c(0,1),        breaks=seq(0,1,0.1)    )    + labelS    + theme(        legend.title = element_blank(),        plot.title = element_text(            size = rel(2)        )    ).
 compute_areas_diagrams_r(+LG:list, -AUCROC:float, -AUCPR:float) is det
The predicate takes as input a list LG of pairs probability-literal in asceding order on probability where the literal can be an Atom (incading a positive example) or \+ Atom, indicating a negative example while the probability is the probability of Atom of being true. The predicate returns AUCROC: the size of the area under the ROC curve AUCPR: the size of the area under the PR curve PR and ROC diagrams are plotted. See http://cplint.lamping.unife.it/example/exauc.pl for an example /
  637compute_areas_diagrams_r(LG,AUCROC,AUCPR) :-
  638    load_r_libraries,
  639    compute_areas(LG,AUCROC,ROC0,AUCPR,PR0),
  640    geom_compute_areas_diagram(ROC0,"ROC","FPR","TPR"),
  641    finalize_r_graph,
  642    load_r_libraries,
  643    geom_compute_areas_diagram(PR0,"PR","Precision","Recall"),
  644    finalize_r_graph.
 test_r(+P:probabilistic_program, +TestFolds:list_of_atoms, -LL:float, -AUCROC:float, -AUCPR:float) is det
The predicate takes as input in P a probabilistic program, tests P on the folds indicated in TestFolds and returns the log likelihood of the test examples in LL, the area under the Receiver Operating Characteristic curve in AUCROC, the area under the Precision Recall curve in AUCPR and draws R diagrams of the curves. /
  655test_r(P,TestFolds,LL,AUCROC,AUCPR):-
  656  test_prob(P,TestFolds,_NPos,_NNeg,LL,LG),
  657  compute_areas_diagrams_r(LG,AUCROC,AUCPR).
  658
  659:- multifile sandbox:safe_primitive/1.  660
  661sandbox:safe_primitive(cplint_r:build_xy_list(_,_,_)).
  662sandbox:safe_primitive(cplint_r:r_row(_,_,_)).
  663sandbox:safe_primitive(cplint_r:get_set_from_xy_list(_,_)).
  664sandbox:safe_primitive(cplint_r:histogram_r(_,_)).
  665sandbox:safe_primitive(cplint_r:density_r(_)).
  666sandbox:safe_primitive(cplint_r:densities_r(_,_)).
  667sandbox:safe_primitive(cplint_r:compute_areas_diagrams_r(_,_,_)).
  668
  669:- multifile sandbox:safe_meta/2.  670
  671sandbox:safe_meta(cplint_r:prob_bar_r(_),[]).
  672sandbox:safe_meta(cplint_r:prob_bar_r(_,_),[]).
  673sandbox:safe_meta(cplint_r:mc_prob_bar_r(_),[]).
  674sandbox:safe_meta(cplint_r:mc_sample_bar_r(_,_),[]).
  675sandbox:safe_meta(cplint_r:mc_sample_arg_bar_r(_,_,_),[]).
  676sandbox:safe_meta(cplint_r:mc_sample_arg_first_bar_r(_,_,_),[]).
  677sandbox:safe_meta(cplint_r:mc_rejection_sample_arg_bar_r(_,_,_,_),[]).
  678sandbox:safe_meta(cplint_r:mc_mh_sample_arg_bar_r(_,_,_,_,_),[]).
  679sandbox:safe_meta(cplint_r:mc_mh_sample_arg_bar_r(_,_,_,_,_,_),[]).
  680sandbox:safe_meta(cplint_r:test_r(_,_,_,_,_), [])