plstat.pl

    1:- module(plstat,[
    2    % help/0, % defined in utils.pl
    3    % help/1, % defined in utils.pl
    4    % help/2, % defined in utils.pl
    5    bug/0,
    6    list/0,
    7    suggestion/0,
    8    mean/2,
    9    median/2,
   10    mode/2,
   11    percentile/3,
   12    quartile/3,
   13    iqr/2,
   14    rms/2,
   15    sum_of_squares/2,
   16    variance/2,
   17    pop_variance/2,
   18    std_dev/2,
   19    pop_std_dev/2,
   20    range/2,
   21    midrange/2,
   22    mean_absolute_deviation/2,
   23    covariance/3,
   24    correlation/3,
   25    pearson_correlation/3,
   26    spearman_correlation/3,
   27    weighted_mean/3,
   28    geometric_mean/2,
   29    harmonic_mean/2,
   30    trimmed_mean/4,
   31    trimmed_variance/4,
   32    skew/2,
   33    kurtosis/2,
   34    moment/3,
   35    sum/2, % wrapper for sum_list
   36    prod/2,
   37    rescale/2,
   38    rescale/4,
   39    mean_normalize/2,
   40    standardize/2,
   41    entropy/2,
   42    entropy/3,
   43    min_val/2, % wrapper for min_list
   44    max_val/2, % wrapper for max_list
   45    min_max_val/3,
   46    rank/2,
   47    rank/3,
   48    nth_row/3,
   49    nth_column/3,
   50    swap_rows_columns/2,
   51    split_n_parts/3,
   52    occurrences/2,
   53    occurrences/3,
   54    normalize_prob/2,
   55    delete_nth1/3,
   56    sample/3,
   57    sample/4,
   58    sample/5,
   59    empirical_distribution/3,
   60    seq/4,
   61    factorial/2,
   62    choose/3,
   63    search_position_sorted/3,
   64    search_position_sorted/4
   65    ]).
   66
   67:- [utils].   68
   69% multidimensional wrapper
   70multidim2(Predicate,List,Res):-
   71    ( List = [A|_], is_list(A) ->
   72        maplist(Predicate,List,Res);
   73        Call =.. [Predicate,List,Res],
   74        Call
   75    ).
   76
   77ground_and_numbers_and_nonempty(Predicate, L):-
   78    ground_list(Predicate,L),
   79    numbers(Predicate,L),
   80    nonempty(Predicate,L).
   81nonempty(Predicate, L):-
   82    length(L,N),
   83    ( N > 0 -> true ; write(Predicate), writeln(": list cannot be empty"), false).
   84ground_list(Predicate, L):-
   85    ( ground(L) -> true ; write(Predicate), writeln(": input list must be ground"), false).
   86numbers(Predicate, L):- 
   87    ( maplist(number,L) -> true ; write(Predicate), writeln(": all the elements must be numbers"), false).
   88same_length_lists(Predicate, L1, L2):-
   89    length(L1,N),
   90    length(L2,M),
   91    ( M = N -> true ;
   92        write(Predicate), writeln(': input lists must have the same length'),
   93        write('Found '), write(M), write(' expected '), writeln(N),
   94        false
   95    ).
   96
   97all_positive(Predicate,W):-
   98    ( maplist(=<(0),W) -> true ; write(Predicate), writeln(": all the weigths must be positive (>= 0)"), false).
   99all_strictly_positive(Predicate,W):-
  100    ( maplist(<(0),W) -> true ; write(Predicate), writeln(": all the weigths must be positive (> 0)"), false).
  101nonzerosum(Predicate,W,Sum):-
  102    sum(W,Sum),
  103    (Sum > 0 -> true ; write(Predicate), writeln(": weigths must not sum to 0"), false).
  104check_bounds(Predicate,L,U):-
  105    (number(L) -> true; write(Predicate), writeln(": lower bound must be a number."), false),
  106    (number(U) -> true; write(Predicate), writeln(": upper bound must be a number."), false),
  107    (L < U ->
  108        true ;
  109        write(Predicate),
  110        writeln(": the lower bound must be smaller than the upper bound"),
  111        false
  112    ).

  123mean(L,Mean):-
  124    multidim2(mean_,L,Mean).
  125mean_(L,Mean):-
  126    ground_and_numbers_and_nonempty(mean/2,L),
  127	length(L,N),
  128	sum(L,S),
  129    Mean is S/N.

  139median(L,Median):-
  140    multidim2(median_,L,Median).
  141median_(Lin,Median):-
  142    ground_and_numbers_and_nonempty(median/2,Lin),
  143    msort(Lin,L),
  144	length(L,N),
  145    (1 is N mod 2 ->
  146        N1 is N + 1,
  147        M is N1/2,
  148        nth1(M,L,Median) ;
  149    	NH is N/2, 
  150        N2 is (N + 2)/2,
  151        nth1(NH,L,XN2),
  152        nth1(N2,L,XN22),
  153        Median is (XN2 + XN22) / 2        
  154    ).

  166comp(<,[_,A1],[_,A2]) :- A1 > A2. % to have in descending order
  167comp(>, _, _).
  168mode(L,Mode):-
  169    multidim2(mode_,L,Mode).
  170mode_(L,ModeC):-
  171	occurrences(L,Occ),
  172    predsort(comp,Occ,X), !,
  173    X = [[_,O]|_],
  174    findall(V,member([V,O],Occ),Mode),
  175    length(Mode,Mod),
  176    (Mod = 1 -> Mode = [ModeC] ; ModeC = Mode).

  198percentile(L,K,Percentile):-
  199    ( L = [A|_], is_list(A) -> 
  200        maplist(percentile_list(K),L,Percentile);
  201        percentile_list(K,L,Percentile)
  202    ).
  203percentile_list(K,L,P):-
  204    ( is_list(K) ->
  205        maplist(percentile_single(L),K,P);
  206        percentile_single(L,K,P)
  207    ).
  208percentile_single(L,K,Percentile):-
  209    ground_and_numbers_and_nonempty(percentile/3,L),
  210    (number(K) -> true ; writeln("percentile/3: the percentile must be a number"), false),
  211    ( (K >= 0, K =< 100) -> true ; writeln("percentile/3: k must be between 0 and 100"), false),
  212    ( L = [V] -> % only one value
  213        Percentile = V ;
  214        msort(L,LS),
  215        length(L,N),
  216        R is (K / 100)*(N - 1) + 1,
  217        RI is floor(R),
  218        RII is RI + 1,
  219        nth1(RI,LS,V1),
  220        nth1(RII,LS,V2),
  221        RF is R - RI,
  222        Percentile is V1 + RF * (V2 - V1)
  223    ).

  233quartile(L,Q,Quartile):-
  234    (number(Q) -> true ; writeln("quartile/3: the quartile must be a number"), false),
  235    ( member(Q,[1,2,3]) ->
  236        !,
  237        QP is Q * 25, 
  238        percentile(L,QP,Quartile) ;
  239        writeln("quartile/3: the quartile must be a number among 1, 2, or 3"),
  240        false
  241    ).

  251iqr(L,IQR):-
  252    quartile(L,3,III),
  253    quartile(L,1,I),
  254    IQR is III - I.

  264square(X,XS):-
  265    pow(X,2,XS).
  266rms(L,RMS):-
  267    multidim2(rms_,L,RMS).
  268rms_(L,RMS):-
  269    ground_and_numbers_and_nonempty(rms/2,L),
  270    length(L,N),
  271    maplist(square,L,LS),
  272    sum(LS, SS),
  273    RMS is sqrt(SS / N).

  283diff_square(Mu,B,D):-
  284    AB is B - Mu,
  285    pow(AB,2,D).
  286sum_of_squares(List,SumOfSquares):-
  287    multidim2(sum_of_squares_,List,SumOfSquares).
  288sum_of_squares_(L,SumSquared):-
  289    ground_and_numbers_and_nonempty(rms/2,L),
  290    mean(L,Mean),
  291    maplist(diff_square(Mean),L,Res),
  292    sum(Res,SumSquared).

  302variance(L,Var):-
  303    multidim2(variance_,L,Var).
  304variance_(L,Var):-
  305    ground_and_numbers_and_nonempty(rms/2,L),
  306    length(L,N),
  307    ( N < 2 -> writeln("variance/3: the quartile must be a number"), false ; true),
  308    sum_of_squares(L,SS),
  309    N1 is N - 1,
  310    Var is (1/N1) * SS.

  320pop_variance(L,Var):-
  321    multidim2(pop_variance_,L,Var).
  322pop_variance_(L,Var):-
  323    ground_and_numbers_and_nonempty(rms/2,L),
  324    length(L,N),
  325    ( N < 2 -> writeln("variance/3: the quartile must be a number"), false ; true),
  326    length(L,N),
  327    sum_of_squares(L,SS),
  328    Var is (1/N) * SS.
  329
  330/* std_dev(+List:numbers,-StdDev:number)
  331 * StdDev is the standard deviation of the list List (square root of the sample variance).
  332 * List can also be multidimensional (list of lists).
  333 * Example: std_dev([1,2,4,6,7,8,9],S).
  334 * Expected: S = 3.039424.
  335 * */
  336std_dev(L,StdDev):-
  337    multidim2(std_dev_,L,StdDev).
  338std_dev_(L,StdDev):-
  339    variance(L,V),
  340    StdDev is sqrt(V).
  341
  342/* pop_std_dev(+List:numbers,-StdDev:number)
  343 * StdDev is the population standard deviation of the list List (square root of the population variance)
  344 * List can also be multidimensional (list of lists)
  345 * Example: pop_std_dev([1,2,4,6,7,8,9],S).
  346 * Expected: S = 3.039424.
  347 * */
  348pop_std_dev(L,StdDev):-
  349    multidim2(pop_std_dev_,L,StdDev).
  350pop_std_dev_(L,StdDev):-
  351    pop_variance(L,V),
  352    StdDev is sqrt(V).

  362range(L,Range):-
  363    multidim2(range_,L,Range).
  364range_(L,Range):-
  365    ground_and_numbers_and_nonempty(range/2,L),
  366    min_val(L,Min),
  367    max_val(L,Max),
  368    Range is Max - Min.

  377midrange(L,Range):-
  378    multidim2(midrange_,L,Range).
  379midrange_(L,Midrange):-
  380    range(L,Range),
  381    Midrange is Range / 2.

  391diff_abs(A,B,D):-
  392    AB is A - B,
  393    D is abs(AB).
  394mean_absolute_deviation(L,MAD):-
  395    multidim2(mean_absolute_deviation_,L,MAD).
  396mean_absolute_deviation_(L,MAD):-
  397    mean(L,Mean),
  398    maplist(diff_abs(Mean),L,LD),
  399    sum(LD,S),
  400    length(L,N),
  401    MAD is (1/N) * S.

  409sub(A,B,C):-
  410    C is B - A.
  411mul(A,B,C):-
  412    C is A * B.
  413covariance(L1,L2,Cov):-
  414    ground_and_numbers_and_nonempty(covariance/3,L1),
  415    ground_and_numbers_and_nonempty(covariance/3,L2),
  416    same_length_lists(covariance/3,L1,L2),
  417    length(L1,N),
  418    mean(L1,M1),
  419    mean(L2,M2),
  420    maplist(sub(M1),L1,LS1),
  421    maplist(sub(M2),L2,LS2),
  422    maplist(mul,LS1,LS2,LP),
  423    sum(LP,S),
  424    Cov is S / (N - 1).

  435correlation(L1,L2,Corr):-
  436    covariance(L1,L2,Cov),
  437    std_dev(L1,S1),
  438    std_dev(L2,S2),
  439    Corr is Cov / (S1 * S2).
  440pearson_correlation(L1,L2,C):-
  441    correlation(L1,L2,C).
  442spearman_correlation(L1,L2,C):-
  443    rank(L1,fractional,LF1),
  444    rank(L2,fractional,LF2),
  445    correlation(LF1,LF2,C).

  459geometric_mean(L,Mean):-
  460    multidim2(geometric_mean_,L,Mean).
  461geometric_mean_(List, GM):-
  462    ground_and_numbers_and_nonempty(geometric_mean/3,List),
  463    all_strictly_positive(geometric_mean/3,List),
  464    length(List,N),
  465    prod(List,P),
  466    GM is P**(1/N).

  475weighted_mean(List,Weights,WM):-
  476    ground_and_numbers_and_nonempty(weighted_mean/3,List),
  477    ground_and_numbers_and_nonempty(weighted_mean/3,Weights),
  478    same_length_lists(weighted_mean/3,List,Weights),
  479    all_positive(weighted_mean/3,Weights),
  480    nonzerosum(weighted_mean/3,Weights,SW),
  481    maplist(mul,List,Weights,LP),
  482    sum(LP,LS),
  483    WM is LS / SW.

  494rec(X,X1):- X1 is 1/X.
  495harmonic_mean(L,HM):-
  496    multidim2(harmonic_mean_,L,HM).
  497harmonic_mean_(L,HM):-
  498    ground_and_numbers_and_nonempty(harmonic_mean/2,L),
  499    all_strictly_positive(harmonic_mean/2,L),
  500    length(L,N),
  501    maplist(rec,L,LR),
  502    sum(LR,SLR),
  503    HM is N / SLR.

  513trimmed_mean(L,Lower,Upper,TM):-
  514    check_bounds(trimmed_mean/3,Lower,Upper),
  515    include(between(Lower,Upper),L,LO),
  516    mean(LO,TM).

  526trimmed_variance(L,Lower,Upper,TV):-
  527    check_bounds(trimmed_variance/3,Lower,Upper),
  528    include(between(Lower,Upper),L,LO),
  529    variance(LO,TV).

  538diff_and_power(Exp,Mean,Number,R):-
  539    D is Number - Mean,
  540    pow(D,Exp,R).
  541moment(L,M,Moment):-
  542    ground_and_numbers_and_nonempty(moment/3,L),
  543    (number(M) -> true; writeln("moment/3: moment must be a number."), false),
  544    (M > 0 -> true ; writeln("moment/3: moment must be a positive (>0) number."), false),
  545    length(L,N),
  546    mean(L,Mean),
  547    maplist(diff_and_power(M,Mean),L,Res),
  548    sum(Res,S),
  549    Moment is (1/N) * S.

  561skew(L,Skew):-
  562    multidim2(skew_,L,Skew).
  563skew_(List,Skew):-
  564    moment(List,2,M2),
  565    moment(List,3,M3),
  566    pow(M2,3/2,Den),
  567    Skew is M3 / Den.

  577kurtosis(L,Kurtosis):-
  578    multidim2(kurtosis_,L,Kurtosis).
  579kurtosis_(List,Kurtosis):-
  580    moment(List,4,L4),
  581    moment(List,2,L2),
  582    pow(L2,2,L22),
  583    Kurtosis is L4 / L22.

  616rank(L,Rank):-
  617    rank(L,average,Rank).
  618rank(LL,Mode,Rank):-
  619    flatten(LL,L),
  620    ground_and_numbers_and_nonempty(rank/3,L),
  621    msort(L,LS),
  622    (   Mode = dense ->    
  623    	compute_dense(LS,LS,1,LR);
  624    	(Mode = min ; Mode = competition) ->
  625    	compute_min(LS,LS,1,LR);
  626    	(Mode = max ; Mode = modified_competition) ->  
  627    	compute_max(LS,LS,1,LR);
  628    	(Mode = average ; Mode = fractional) -> 
  629     	compute_average(LS,1,LR) ;
  630    	Mode = ordinal ->  
  631    	compute_ordinal(LS,1,LR)
  632    ),
  633    extract_map(LL,LR,Rank,Mode).
  634
  635extract_map([],_LR,[],_):- !.
  636extract_map([H|T],LR,[HM|TM],Mode):-
  637    extract_map_(H,LR,HM,Mode), !,
  638	extract_map(T,LR,TM,Mode).    
  639extract_map([H|T],LR,[V|TV],M):-
  640    extract_map_([H|T],LR,[V|TV],M).
  641
  642extract_map_([],_,[],_).
  643extract_map_([H|T],LR,[V|TV],M):-
  644    memberchk([H,V],LR),
  645    ( M = ordinal -> 
  646    	delete(LR,[H,V],LR1);
  647    	LR1 = LR
  648    ),
  649    extract_map_(T,LR1,TV,M).
  650
  651compute_ordinal([],_,[]).
  652compute_ordinal([H|T],I,[[H,I]|T3]):-
  653    I1 is I+1,
  654    compute_ordinal(T,I1,T3).
  655
  656% dense
  657compute_dense([],_,_,[]).
  658compute_dense([H|_],L,IN,[[H,IN]|TR]):-
  659    findall(I,nth0(I,L,H),LI),
  660    length(LI,N),
  661    length(LAux,N),
  662    append(LAux,LN,L),
  663    I1 is IN+1,
  664    compute_dense(LN,LN,I1,TR).
  665
  666% min
  667compute_min([],_,_,[]).
  668compute_min([H|_],L,IN,[[H,IN]|TR]):-
  669    findall(I,nth0(I,L,H),LI),
  670    length(LI,N),
  671    length(LAux,N),
  672    append(LAux,LN,L),
  673    I1 is IN+N,
  674    compute_min(LN,LN,I1,TR).
  675
  676% max
  677compute_max([],_,_,[]).
  678compute_max([H|_],L,IN,[[H,IT]|TR]):-
  679    findall(I,nth0(I,L,H),LI),
  680    length(LI,N),
  681    length(LAux,N),
  682    append(LAux,LN,L),
  683    I1 is IN+N,
  684    IT is I1 - 1,
  685    compute_max(LN,LN,I1,TR).
  686
  687
  688look_in_average([],[]).
  689look_in_average([[V,Rank]|T],[[V,Rank1]|T0]):-
  690    findall(R,member([V,R],T),LR),
  691    length(LR,NR),
  692    ( NR is 0 ->  
  693    	Rank1 = Rank,
  694        T1 = T;
  695    	sum(LR,S),
  696        Rank1 is (S+Rank) / (NR+1),
  697        delete(T,[V,_],T1)
  698    ),
  699    look_in_average(T1,T0).
  700
  701% average
  702compute_average(LS,I,LAvg):-
  703    compute_ordinal(LS,I,LAvg1),
  704    look_in_average(LAvg1,LAvg).
  705
  706%%%%%%%%
  707% other utils
  708%%%%%%%%

  716nth_row(L,Nth,NthRow):-
  717    nth1(Nth,L,NthRow).

  725nth_column(L,Nth,NthColumn):-
  726    findall(E,(member(R,L),nth1(Nth,R,E)),NthColumn).

  734swap_rows_columns([H|T],MT):-
  735    transpose(H,[H|T],MT).
  736transpose([],_,[]).
  737transpose([_|T],M,[HMT|TMT]):-
  738    transposeIn(M,HMT,M1),
  739    transpose(T,M1,TMT).
  740transposeIn([],[],[]).
  741transposeIn([[V1|V2]|T],[V1|TV],[V2|TM]):-
  742    transposeIn(T,TV,TM).

  751split_n_parts(L,1,L):- !.
  752split_n_parts(_,N,_):- N =< 0, !, false.
  753split_n_parts(L,N,_):- 
  754    length(L,LN), 
  755    A is LN mod N, 
  756    A \= 0,
  757    writeln("List cannot be divided in the desired parts"), !,
  758    false.
  759split_n_parts([],_,[]):- !.
  760split_n_parts(List,Parts,[H|T]):-
  761    length(H,Parts),
  762    append(H,LT,List),
  763    split_n_parts(LT,Parts,T).

  772occurrences(L,LO):-
  773    sort(L,LS),
  774    count_occ(LS,L,LO).
  775count_occ([],_,[]).
  776count_occ([H|T],L,[[H,Occ]|T1]):-
  777    occurrences(L,H,Occ),
  778    count_occ(T,L,T1).

  786occurrences([],_,0).
  787occurrences(L,E,0):- ground(E), \+member(E,L).
  788occurrences(L,E,N):-
  789    findall(I,nth0(I,L,E),LI),
  790    length(LI,N).

  799min_val(L,M):-
  800    multidim2(min_list,L,M).

  809max_val(L,M):-
  810    multidim2(max_list,L,M).

  818min_max_val(L,Min,Max):-
  819    min_val(L,Min),
  820    max_val(L,Max).

  829sum(L,S):-
  830    multidim2(sum_list,L,S).

  839prod([],0).
  840prod(L,P):-
  841    ( L = [A|_], is_list(A) -> 
  842        maplist(prod(1),L,P);
  843        prod(1,L,P)
  844    ).
  845prod(N,[],N):- !.
  846prod(_,[0|_],0):- !.
  847prod(P0,[H|T],P1):-
  848    PT is P0 * H,
  849    prod(PT,T,P1).
  850
  851
  852%%%%%%%%%%%%%%%%%%%%%%%%

  863div(Sum,El,Div):-
  864    Sum \= 0,
  865    Div is El / Sum.
  866
  867between_float(L,U,N):-
  868    N >= L,
  869    N =< U.
  870
  871normalize_prob([],[]):- !.
  872normalize_prob(L,LNorm):-
  873    multidim2(normalize_prob_,L,LNorm).
  874normalize_prob_(L,LNorm):-
  875    maplist(between_float(0,1),L),
  876    sum(L,SL),
  877    maplist(div(SL),L,LNorm).

  892rescale([],[]).
  893rescale(L,LResc):-
  894    rescale(L,0,1,LResc).
  895rescale(L,Lower,Upper,LResc):-
  896    ( Lower >= Upper ->
  897        writeln("Upper bound should be greater than lower bound");
  898        true
  899    ),
  900    ( L = [A|_], is_list(A) -> 
  901        maplist(rescaling_(Lower,Upper),L,LResc);
  902        rescaling_(Lower,Upper,L,LResc)
  903    ).
  904rescaling_(Lower,Upper,L,LR):-
  905    min_val(L,Min),
  906    max_val(L,Max),
  907    (Min = Max ->
  908        writeln("Min = Max, cannot divide by 0"),
  909        false;
  910        maplist(rescaling(Min,Max,Lower,Upper),L,LR)
  911    ).
  912
  913rescaling(Min,Max,Lower,Upper,V,VResc):-
  914    VResc is Lower + ((V - Min)*(Upper - Lower))/(Max - Min).

  924mean_normalize([],[]).
  925mean_normalize(List,Normalized):-
  926    multidim2(mean_normalize_,List,Normalized).
  927mean_normalize_(List,Normalized):-
  928    min_val(List,Min),
  929    max_val(List,Max),
  930    mean(List,Mean),
  931    (Min = Max ->
  932        writeln("Min = Max, cannot divide by 0"),
  933        false;
  934        maplist(mean_normalizing(Min,Max,Mean),List,Normalized)
  935    ).
  936mean_normalizing(Min,Max,Mean,X,Normalized):-
  937    Normalized is (X - Mean) / (Max - Min).

  948standardize(List,Standardized):-
  949    multidim2(standardize_,List,Standardized).
  950standardize_(List,Standardized):-
  951    mean(List,Mean),
  952    pop_std_dev(List,SD),
  953    (SD = 0 -> 
  954        writeln("Cannot standardize, standard deviation in 0"),
  955        false;
  956        maplist(standardizing(Mean,SD),List,Standardized)
  957    ).
  958standardizing(Mean,SD,X,Standardized):-
  959    Standardized is (X - Mean) / SD.

  975prod_and_log(P,PR):-
  976    PR is P * log(P).
  977prod_and_log_div(P,Q,PR):-
  978    D is P/Q,
  979    PR is P * log(D).
  980entropy(L,E):-
  981    maplist(prod_and_log,L,PR),
  982    sum(PR,S),
  983    E is -S.
  984entropy(L,LP,E):-
  985    sum(LP,S),
  986    ( S \= 1.0 ->
  987        writeln("Probabilities must sum to 1"), 
  988        false;
  989        true
  990    ),
  991    writeln(LPP),
  992    ( maplist(<(0),LPP),maplist(>(1),LPP) -> 
  993        true;
  994        writeln("Probabilities must be between 0 and 1"),
  995        false
  996    ),
  997    maplist(prod_and_log_div,L,LP,P),
  998    sum(P,E).

 1007delete_nth1(L,N,LDeleted):-
 1008    L = [A|_],
 1009    (is_list(A) ->
 1010        maplist(delete_nth1_(N),L,LDeleted) ;
 1011        delete_nth1_(N,L,LDeleted)
 1012    ).
 1013delete_nth1_(1,[_|T],T):- !.
 1014delete_nth1_(Nth,[H|T],[H|T1]):-
 1015    Nth > 1,
 1016	N is Nth-1,
 1017    delete_nth1_(N,T,T1).

 1034sample_list(_,0,_,[]):- !.
 1035sample_list(L,N,Replace,[H|T]):-
 1036    length(L,Len),
 1037    random_between(1,Len,R),
 1038    nth1(R,L,H),
 1039    ( Replace = true ->
 1040        L1 = L ;
 1041        delete_nth1(L,R,L1)
 1042    ),
 1043    N1 is N - 1,
 1044    sample_list(L1,N1,Replace,T).
 1045
 1046get_random_el([H],_,_,_,_,H):- !.
 1047get_random_el([H|_],P,I,I,_,H):-
 1048    P =< 0, !.
 1049get_random_el([_|T],P,I,I0,[CP|TP],El):-
 1050    P > 0,
 1051    P1 is P - CP,
 1052    I1 is I + 1,
 1053    get_random_el(T,P1,I1,I0,TP,El).
 1054
 1055sample_list_prob(_,0,_,_,[]):- !.
 1056sample_list_prob(L,Size,Replace,[HP|TP],[Out|TOut]):-
 1057    Size > 0,
 1058    random(R),
 1059    RP is R - HP,
 1060    get_random_el(L,RP,1,Index,[HP|TP],Out),
 1061    ( Replace = true ->
 1062        L1 = L,
 1063        PL = [HP|TP] ;
 1064        delete_nth1(L,Index,L1),
 1065        delete_nth1([HP|TP],Index,PTemp),
 1066        normalize_prob(PTemp,PL)
 1067    ),
 1068    S1 is Size - 1,
 1069    sample_list_prob(L1,S1,Replace,PL,TOut).
 1070
 1071sample(List,Size,_):-
 1072    length(List,N),
 1073    Size > N,
 1074    writeln("Sample size must be smaller or equal to the possible elements. Set Replace to true if you want to sample with replacement."), !,
 1075    false.
 1076sample(List,Size,Result):-
 1077    sample_list(List,Size,false,Result).
 1078sample(List,Size,Replace,Result):-
 1079    ( ( Replace = true ; Replace = false ) ->
 1080        sample_list(List,Size,Replace,Result) ;
 1081        writeln("Set Replace to true or false"),
 1082        false
 1083    ).
 1084sample(List,Size,Replace,Probabilities,Result):-
 1085    ( ( Replace \= true , Replace \= false ) ->
 1086        writeln("Set Replace to true or false"),
 1087        false ;
 1088        \+ sum(Probabilities,1.0) ->
 1089            writeln("Probabilities must sum to 1"),
 1090            false ;
 1091            length(List,NL), length(Probabilities,NP),
 1092            NP \= NL ->
 1093            writeln("The list with the probabilities must be of the same length of the list with the elements to sample"),
 1094            false ;
 1095            sample_list_prob(List,Size,Replace,Probabilities,Result)
 1096    ).

 1111empirical_distribution(L,X,R):-
 1112    ( L = [A|_], is_list(A) -> 
 1113        maplist(empirical_distribution_list(X),L,R);
 1114        empirical_distribution_list(X,L,R)
 1115    ).
 1116empirical_distribution_list(X,L,R):-
 1117    ( is_list(X) ->
 1118        maplist(empirical_distribution_single(L),X,R);
 1119        empirical_distribution_single(L,X,R)
 1120    ).
 1121empirical_distribution_single(List,X,R):-
 1122    length(List,N),
 1123    ( X < 0 ->
 1124        R = 0 ;
 1125      X >= N ->
 1126        R = 1 ;
 1127        msort(List,LS),
 1128        nth0(X,LS,Num),
 1129        findall(I,nth0(I,LS,Num),LI),
 1130        max_list(LI,Max),
 1131        D is Max - X + 1,
 1132        X1 is X + D,
 1133        R is X1 / N
 1134    ).

 1144seq(A,B,L):-
 1145    seq(A,B,1,L).
 1146seq(A,A,1,[A]):- !.
 1147seq(A,A,V,[]):- V \= 1.
 1148seq(_,_,0,[]).
 1149seq(A,B,_,[]):- A > B.
 1150seq(A,B,Step,[A|T]):-
 1151	A < B,
 1152	A1 is A + Step,
 1153	seq(A1,B,Step,T).

 1161factorial(0,1).
 1162factorial(1,1):- !.
 1163factorial(N,F):-
 1164    factorial_aux(N,1,F).
 1165factorial_aux(1,F,F):- !.
 1166factorial_aux(N,V,F):-
 1167    V1 is V * N,
 1168    N1 is N-1,
 1169    factorial_aux(N1,V1,F).

 1178choose(N,K,1):- N =< K.
 1179choose(N,K,C):-
 1180    N > 0,
 1181    K > 0,
 1182    N > K,
 1183    factorial(N,NF),
 1184    factorial(K,KF),
 1185    NMinusK is N - K,
 1186    factorial(NMinusK,NMKF),
 1187    C is NF / (NMKF * KF).

 1194% random_list(N,Lower,Upper,L):-
 1195%     ( N =< 0 ->
 1196%         writeln("The number of elements must be greater than 0"),
 1197%         false;
 1198%         sample_var(uniform,Lower,Upper,N,L)
 1199%     ).
 1200
 1201% random variables
 1202% expected_value_var(Val,Occ,Exp)
 1203% variance_var(Val,Occ,Exp)
 1204
 1205% TODO: set verbose/0 with assert to test also failures without printing

 1223search_position_sorted(List,Element,Pos):-
 1224    search_position_sorted(List,Element,left,Pos).
 1225search_position_sorted(List,Element,Direction,Pos):-
 1226    L = [right,left],
 1227    (\+member(Direction,L) -> writeln("Position must be left or right"), false ; true),
 1228    ( List = [A|_], is_list(A) -> 
 1229        maplist(search_position_sorted_list(Element,Direction),List,Pos);
 1230        search_position_sorted_list(Element,Direction,List,Pos)
 1231    ).
 1232search_position_sorted_list(X,Direction,L,R):-
 1233    ( is_list(X) ->
 1234        maplist(search_position_sorted_single(L,Direction),X,R);
 1235        search_position_sorted_single(L,Direction,X,R)
 1236    ).
 1237search_position_sorted_single(L,Direction,X,R):-
 1238    msort(L,Sorted),
 1239    ( Direction = right -> 
 1240        search_pos(L,X,1,0,R) ; 
 1241        length(Sorted,N),
 1242        search_pos(L,X,-1,N,R)
 1243    ).
 1244search_pos([],_,_,P,P).
 1245search_pos([H|T],El,Inc,CP,P):-
 1246    ( El < H -> 
 1247        CP = P ;
 1248        CP1 is CP + Inc,
 1249        search_pos(T,El,Inc,CP1,P)
 1250    ).
 1251
 1252
 1253% /**
 1254%  * sample_distribution()
 1255%  * Sample from the specified distribution
 1256%  * */
 1257% sample_distribution(uniform,S):-
 1258%     sample_distribution(uniform,1,0,1,S).
 1259% sample_distribution(uniform,Lower,Upper,S):-
 1260%     sample_distribution(uniform,1,Lower,Upper,S).
 1261% sample_distribution(uniform,NSamples,Lower,Upper,S):-
 1262%     ( NSamples < 0 ->
 1263%         writeln("Number of samples must be greater than 0"),
 1264%         false ;
 1265%         length(S,NSamples),
 1266%         maplist(sample_uniform(Lower,Upper),S)
 1267%     ).
 1268% sample_uniform(Lower,Upper,S):-
 1269%     random(R),
 1270%     S is (Upper - Lower + 1) * R + Lower.