logistic_regression.pl

1/* 2This program performs logistic regression using Iteratively reweighted least squares (IRLS) 3(see https://en.wikipedia.org/wiki/Logistic_regression 4Murphy, Kevin P. (2012). Machine Learning – A Probabilistic Perspective. The MIT Press) 5 6It also includes predicate generate_data(N,Variance,Coeff,X,Y) 7that generates an N-row dataset with predictor variables in matrix X 8and predicted variable in list Y. 9The predicted variable is computed with the formula 10Y=(Coeff dotprod X+Noise>0->1;0) 11 12Variance is the variance of Noise 13 14Predicate example_log_r(N,Coeff) is used to test the algorithm for logistic regression and 15dataset generation: it generates a N-row dataset with 5 as the noise variance and 16coefficients [1,2,3]. 17Then it performs 10 iterations of logistic regression 18Coeff is the output of regression and should be a list of three numbers close to [1,2,3] 19The higher N is, the closer to [1,2,3] Coeff should be. 20 21*/ 22:-module(logistic_regression, [example_log_r/2, logistic_regression/4]). 23 24:-use_module(library(matrix)). 25:-use_module(library(mcintyre)). 26:-use_module(library(clpfd)).

?- example_log_r(100,Coeff). it should return [1,2,3]

34:- mc. 35 36:- begin_lpad. 37 38noise(_,Epsilon,Variance):gaussian(Epsilon,0,Variance). 39 40x(_,_,X_ij):gaussian(X_ij,0,10). 41 42:-end_lpad. 43 44 45 46 47example_log_r(N,CoeffNorm):- 48 generate_data(N,5,[1,2,3],X,Y), 49 logistic_regression(X,Y,10,Coeff), 50 Coeff=[C|_], 51 maplist(norm(C),Coeff,CoeffNorm). 52 53norm(C,A,AN):- 54 AN is A/C. 55 56/* 57version for swhish that draws the dataset 58example_log_r(N,CoeffNorm):- 59 generate_data(N,5,[1,2,3],X,Y), 60 logistic_regression(X,Y,10,Coeff), 61 draw_dataset(X,Y,Coeff), 62 Coeff=[C|_], 63 maplist(norm(C),Coeff,CoeffNorm). 64 65draw_dataset(X,Y,Coeff):- 66 <-library(plot3D), 67 transpose(X,XT), 68 XT=[X1,X2|_], 69 filter_pos(X1,Y,X1P), 70 filter_pos(X2,Y,X2P), 71 filter_neg(X1,Y,X1N), 72 filter_neg(X2,Y,X2N), 73 XPV=..[c|X1P], 74 xp<-XPV, 75 YPV=..[c|X2P], 76 yp<-YPV, 77 XNV=..[c|X1N], 78 xn<-XNV, 79 YNV=..[c|X2N], 80 yn<-YNV, 81 numlist(-10,10,L), 82 LV=..[c|L], 83 xs<-LV, 84 generate_line(L,Coeff,YS), 85 YSV=..[c|YS], 86 ys<-YSV, 87 generate_line(L,[1,2,3],YT), 88 YTV=..[c|YT], 89 yt<-YTV, 90 <- {|r||plot(xp,yp,pch="+",xlim=c(-10,10), ylim=c(-10,10),xlab="x1",ylab="x2") 91 points(xn,yn,pch="-",xlim=c(-10,10), ylim=c(-10,10)) 92 lines(xs,ys) 93 lines(xs,yt,col="red") 94 legend(x= -10, y=10, legend=c("Predicted", "True"),col=c("black", "red"), 95 lty=1, cex=0.8)|}. 96*/ 97generate_line(L,Coeff,Y):- 98 maplist(lin(Coeff),L,Y). 99 100lin([A,B,C],X,Y):- 101 Y is -A/B*X-C/B. 102 103filter_pos([],[],[]). 104 105filter_pos([X|TX],[1|TY],[X|TXO]):-!, 106 filter_pos(TX,TY,TXO). 107 108filter_pos([_|TX],[_|TY],TXO):- 109 filter_pos(TX,TY,TXO). 110 111filter_neg([],[],[]). 112 113filter_neg([X|TX],[0|TY],[X|TXO]):-!, 114 filter_neg(TX,TY,TXO). 115 116filter_neg([_|TX],[_|TY],TXO):-!, 117 filter_neg(TX,TY,TXO). 118 119 120neg(0).

134logistic_regression(X,Y,Iterations,Coeff):- 135 transpose(X,XT), 136 X=[Row1|_], 137 length(Row1,N), 138 list0(N,Coeff0), 139 logistic_regression_iter(0,Iterations,X,XT,Y,Coeff0,Coeff). 140 141 142logistic_regression_iter(Iterations,Iterations,_X,_XT,_Y,Coeff,Coeff):-!. 143 144logistic_regression_iter(I,Iterations,X,XT,Y,Coeff0,Coeff):- 145 generate_nu(X,Coeff0,Nu), 146 maplist(logistic,Nu,Mu), 147 generate_s(Mu,S_vec), 148 generate_zs(Nu,Y,Mu,S_vec,ZS), 149 matrix_diagonal(S_vec,S), 150 matrix_multiply(XT,S,XTS), 151 matrix_multiply(XTS,X,XTSX), 152 matrix_inversion(XTSX,XTSX_1), 153 matrix_multiply(XTSX_1,XT,XTSX_1XT), 154 transpose([ZS],ZST), 155 matrix_multiply(XTSX_1XT,ZST,Coeff1T), 156 transpose(Coeff1T,[Coeff1]), 157 compute_log_lik(Mu,Y,LL), 158 compute_accuracy(Mu,Y,Acc), 159 I1 is I+1, 160 format("Iteration ~d, log likelihood ~7f, accuracy ~5f~n",[I1,LL,Acc]), 161 logistic_regression_iter(I1,Iterations,X,XT,Y,Coeff1,Coeff). 162 163compute_accuracy(Mu,Y,Acc):- 164 foldl(correct,Mu,Y,0,Correct), 165 length(Y,N), 166 Acc is Correct/N. 167 168correct(Mu,Y,C0,C):- 169 (Mu>0.5-> 170 (Y=1-> 171 C is C0+1 172 ; 173 C is C0 174 ) 175 ; 176 (Y=0-> 177 C is C0+1 178 ; 179 C is C0 180 ) 181 ). 182 183compute_log_lik(Mu,Y,LL):- 184 foldl(log_lik,Mu,Y,0,LL). 185 186log_lik(Mu,Y,LL0,LL):- 187 (Y=1-> 188 LL is LL0+log(Mu) 189 ; 190 LL is LL0+log(1-Mu) 191 ). 192 193generate_nu(X,W,Mu):- 194 maplist(gen_nu_i(W),X,Mu). 195 196gen_nu_i(W,X_i,Nu_i):- 197 foldl(prod,W,X_i,0,Nu_i). 198 199generate_s(Mu,S_vec):- 200 maplist(mu_1_mu,Mu,S_vec). 201 202mu_1_mu(Mu_i,S_i):- 203 S_i is Mu_i*(1-Mu_i). 204 205generate_zs(Nu,Y,Mu,S_vec,Z):- 206 maplist(gen_xs,Nu,Y,Mu,S_vec,Z). 207 208maplist(Goal, List1, List2, List3, List4, List5) :- 209 maplist_(List1, List2, List3, List4, List5, Goal). 210 211maplist_([], [], [], [], [], _). 212maplist_([Elem1|Tail1], [Elem2|Tail2], [Elem3|Tail3], [Elem4|Tail4], [Elem5|Tail5], Goal) :- 213 call(Goal, Elem1, Elem2, Elem3, Elem4, Elem5), 214 maplist_(Tail1, Tail2, Tail3, Tail4, Tail5, Goal). 215 216gen_xs(Nu,Y,Mu,S,ZS):- 217 ZS is S*Nu +(Y-Mu). 218 219logistic(X,Sigma_X):- 220 Sigma_X is 1/(1+exp(-X)). 221 222% generates data 223% Variance is the noise variance 224% Coeff is a list of coefficients for the predictor variables 225% the last element of Coeff is the y-intercept (fixed term) 226% the number of predictors is |Coeff|-1 227% each predictor is sampled from Gauss(0,10) 228generate_data(N,Variance,Coeff,X,Y):- 229 numlist(1,N,Indexes), 230 maplist(linear_funct(Variance,Coeff),Indexes,X,Y). 231 232% computes Y_I=X_I dotprod Coeff + Noise 233% Noise is Epsilon 234linear_funct(Variance,Coeff,I,X_I,Y_I):- 235 sample_noise(I,Variance,Epsilon), 236 length(Coeff,N_predictors_1), 237 N_predictors is N_predictors_1-1, 238 numlist(1,N_predictors,Predictors_Indexes), 239 maplist(sample_data(I),Predictors_Indexes,X_Ip), 240 append(X_Ip,[1],X_I), 241 foldl(prod,Coeff,X_I,0,Y_I_0), 242 Y_I_R is Y_I_0+Epsilon, 243 (Y_I_R>0-> 244 Y_I=1 245 ; 246 Y_I=0 247 ). 248 249prod(A,X,Y0,Y):- 250 Y is Y0+ A*X. 251 252 253sample_noise(I,Variance,Epsilon):- 254 mc_sample_arg_raw(noise(I,Noise,Variance),1,Noise,Sample), 255 Sample=[Epsilon]. 256 257sample_data(I,J,X_IJ):- 258 mc_sample_arg_raw(x(I,J,X),1,X,Sample), 259 Sample=[X_IJ]