1/*
    2This program performs logistic regression using Iteratively reweighted least squares (IRLS)
    3(see https://en.wikipedia.org/wiki/Logistic_regression
    4Murphy, Kevin P. (2012). Machine Learning – A Probabilistic Perspective. The MIT Press)
    5
    6It also includes predicate generate_data(N,Variance,Coeff,X,Y)
    7that generates an N-row dataset with predictor variables in matrix X
    8and predicted variable in list Y.
    9The predicted variable is computed with the formula
   10Y=(Coeff dotprod X+Noise>0->1;0)
   11
   12Variance is the variance of Noise
   13
   14Predicate example_log_r(N,Coeff) is used to test the algorithm for logistic regression and
   15dataset generation: it generates a N-row dataset with 5 as the noise variance and
   16coefficients [1,2,3].
   17Then it performs 10 iterations of logistic regression
   18Coeff is the output of regression and should be a list of three numbers close to [1,2,3]
   19The higher N is, the closer to [1,2,3] Coeff should be.
   20
   21*/
   22:-module(logistic_regression, [example_log_r/2, logistic_regression/4]).   23
   24:-use_module(library(matrix)).   25:-use_module(library(mcintyre)).   26:-use_module(library(clpfd)).

?- example_log_r(100,Coeff). it should return [1,2,3]

*/

   34:- mc.   35
   36:- begin_lpad.   37
   38noise(_,Epsilon,Variance):gaussian(Epsilon,0,Variance).
   39
   40x(_,_,X_ij):gaussian(X_ij,0,10).
   41
   42:-end_lpad.   43
   44
   45
   46
   47example_log_r(N,CoeffNorm):-
   48    generate_data(N,5,[1,2,3],X,Y),
   49	  logistic_regression(X,Y,10,Coeff),
   50    Coeff=[C|_],
   51    maplist(norm(C),Coeff,CoeffNorm).
   52
   53norm(C,A,AN):-
   54    AN is A/C.
   55
   56/*
   57version for swhish that draws the dataset
   58example_log_r(N,CoeffNorm):-
   59    generate_data(N,5,[1,2,3],X,Y),
   60	  logistic_regression(X,Y,10,Coeff),
   61    draw_dataset(X,Y,Coeff),
   62    Coeff=[C|_],
   63    maplist(norm(C),Coeff,CoeffNorm).
   64
   65draw_dataset(X,Y,Coeff):-
   66    <-library(plot3D),
   67    transpose(X,XT),
   68    XT=[X1,X2|_],
   69    filter_pos(X1,Y,X1P),
   70    filter_pos(X2,Y,X2P),
   71    filter_neg(X1,Y,X1N),
   72    filter_neg(X2,Y,X2N),
   73    XPV=..[c|X1P],
   74    xp<-XPV,
   75    YPV=..[c|X2P],
   76    yp<-YPV,
   77    XNV=..[c|X1N],
   78    xn<-XNV,
   79    YNV=..[c|X2N],
   80    yn<-YNV,
   81    numlist(-10,10,L),
   82    LV=..[c|L],
   83    xs<-LV,
   84    generate_line(L,Coeff,YS),
   85    YSV=..[c|YS],
   86    ys<-YSV,
   87    generate_line(L,[1,2,3],YT),
   88    YTV=..[c|YT],
   89    yt<-YTV,
   90    <- {|r||plot(xp,yp,pch="+",xlim=c(-10,10), ylim=c(-10,10),xlab="x1",ylab="x2")
   91            points(xn,yn,pch="-",xlim=c(-10,10), ylim=c(-10,10))
   92            lines(xs,ys)
   93            lines(xs,yt,col="red")
   94            legend(x= -10, y=10, legend=c("Predicted", "True"),col=c("black", "red"),
   95              lty=1, cex=0.8)|}.
   96*/
   97generate_line(L,Coeff,Y):-
   98    maplist(lin(Coeff),L,Y).
   99
  100lin([A,B,C],X,Y):-
  101	Y is -A/B*X-C/B.
  102
  103filter_pos([],[],[]).
  104
  105filter_pos([X|TX],[1|TY],[X|TXO]):-!,
  106    filter_pos(TX,TY,TXO).
  107
  108filter_pos([_|TX],[_|TY],TXO):-
  109    filter_pos(TX,TY,TXO).
  110
  111filter_neg([],[],[]).
  112
  113filter_neg([X|TX],[0|TY],[X|TXO]):-!,
  114    filter_neg(TX,TY,TXO).
  115
  116filter_neg([_|TX],[_|TY],TXO):-!,
  117    filter_neg(TX,TY,TXO).
  118
  119
  120neg(0).
 logistic_regression(+X, +Y, +It, -Coeff) is det
perform It iterations of logistic regression on data X,Y and return the coefficients in Coeff X is a matrix with one row per example and one entry per predictor variable except for the last one that should always be 1 (used for the intercept). Y is a vector with one element per example encoding the class (0 or 1) So X is [[x_1(1),x_2(1),...,1], [x_1(2),x_2(2),...,1], ... [x_1(N),x_2(N),...,1]] and Y is Y=[y(1),y(2),...,y(N)]
  134logistic_regression(X,Y,Iterations,Coeff):-
  135    transpose(X,XT),
  136    X=[Row1|_],
  137    length(Row1,N),
  138    list0(N,Coeff0),
  139    logistic_regression_iter(0,Iterations,X,XT,Y,Coeff0,Coeff).
  140
  141
  142logistic_regression_iter(Iterations,Iterations,_X,_XT,_Y,Coeff,Coeff):-!.
  143
  144logistic_regression_iter(I,Iterations,X,XT,Y,Coeff0,Coeff):-
  145    generate_nu(X,Coeff0,Nu),
  146    maplist(logistic,Nu,Mu),
  147    generate_s(Mu,S_vec),
  148    generate_zs(Nu,Y,Mu,S_vec,ZS),
  149    matrix_diagonal(S_vec,S),
  150    matrix_multiply(XT,S,XTS),
  151    matrix_multiply(XTS,X,XTSX),
  152    matrix_inversion(XTSX,XTSX_1),
  153    matrix_multiply(XTSX_1,XT,XTSX_1XT),
  154    transpose([ZS],ZST),
  155    matrix_multiply(XTSX_1XT,ZST,Coeff1T),
  156    transpose(Coeff1T,[Coeff1]),
  157    compute_log_lik(Mu,Y,LL),
  158    compute_accuracy(Mu,Y,Acc),
  159    I1 is I+1,
  160    format("Iteration ~d, log likelihood ~7f, accuracy ~5f~n",[I1,LL,Acc]), 
  161	logistic_regression_iter(I1,Iterations,X,XT,Y,Coeff1,Coeff).
  162
  163compute_accuracy(Mu,Y,Acc):-
  164    foldl(correct,Mu,Y,0,Correct),
  165    length(Y,N),
  166    Acc is Correct/N.
  167
  168correct(Mu,Y,C0,C):-
  169    (Mu>0.5->  
  170    	(Y=1->  
  171        	C is C0+1
  172        ;   
  173        	C is C0
  174        )
  175    ;   
  176    	(Y=0->
  177        	C is C0+1
  178        ;   
  179        	C is C0
  180        )
  181	).        
  182
  183compute_log_lik(Mu,Y,LL):-
  184    foldl(log_lik,Mu,Y,0,LL).
  185
  186log_lik(Mu,Y,LL0,LL):-
  187    (Y=1->
  188    	LL is LL0+log(Mu)
  189    ;   
  190    	LL is LL0+log(1-Mu)
  191    ).
  192    
  193generate_nu(X,W,Mu):-
  194    maplist(gen_nu_i(W),X,Mu).
  195
  196gen_nu_i(W,X_i,Nu_i):-
  197	foldl(prod,W,X_i,0,Nu_i).
  198
  199generate_s(Mu,S_vec):-
  200    maplist(mu_1_mu,Mu,S_vec).
  201
  202mu_1_mu(Mu_i,S_i):-
  203    S_i is Mu_i*(1-Mu_i).
  204
  205generate_zs(Nu,Y,Mu,S_vec,Z):-
  206    maplist(gen_xs,Nu,Y,Mu,S_vec,Z).
  207
  208maplist(Goal, List1, List2, List3, List4, List5) :-
  209    maplist_(List1, List2, List3, List4, List5, Goal).
  210
  211maplist_([], [], [], [], [], _).
  212maplist_([Elem1|Tail1], [Elem2|Tail2], [Elem3|Tail3], [Elem4|Tail4], [Elem5|Tail5], Goal) :-
  213    call(Goal, Elem1, Elem2, Elem3, Elem4, Elem5),
  214    maplist_(Tail1, Tail2, Tail3, Tail4, Tail5, Goal).
  215
  216gen_xs(Nu,Y,Mu,S,ZS):-
  217    ZS is S*Nu +(Y-Mu).
  218
  219logistic(X,Sigma_X):-
  220    Sigma_X is 1/(1+exp(-X)).
  221
  222% generates data
  223% Variance is the noise variance
  224% Coeff is a list of coefficients for the predictor variables
  225% the last element of Coeff is the y-intercept (fixed term)
  226% the number of predictors is |Coeff|-1
  227% each predictor is sampled from Gauss(0,10)
  228generate_data(N,Variance,Coeff,X,Y):-
  229    numlist(1,N,Indexes),
  230    maplist(linear_funct(Variance,Coeff),Indexes,X,Y).
  231
  232% computes Y_I=X_I dotprod Coeff + Noise
  233% Noise is Epsilon
  234linear_funct(Variance,Coeff,I,X_I,Y_I):-
  235    sample_noise(I,Variance,Epsilon),
  236    length(Coeff,N_predictors_1),
  237    N_predictors is N_predictors_1-1,
  238    numlist(1,N_predictors,Predictors_Indexes),
  239    maplist(sample_data(I),Predictors_Indexes,X_Ip),
  240	append(X_Ip,[1],X_I),
  241    foldl(prod,Coeff,X_I,0,Y_I_0),
  242    Y_I_R is Y_I_0+Epsilon,
  243    (Y_I_R>0->
  244     	Y_I=1
  245   	;   
  246    	Y_I=0
  247    ).
  248
  249prod(A,X,Y0,Y):-
  250    Y is Y0+ A*X.
  251
  252
  253sample_noise(I,Variance,Epsilon):-
  254    mc_sample_arg_raw(noise(I,Noise,Variance),1,Noise,Sample),
  255    Sample=[Epsilon].
  256
  257sample_data(I,J,X_IJ):-
  258    mc_sample_arg_raw(x(I,J,X),1,X,Sample),
  259    Sample=[X_IJ]