:- lib(promise(exp_go_over_bioc_deps/0,call(exp_go_over_bioc_deps_load))).
:- lib(stoics_lib:kv_decompose/3).
exp_go_over_bioc_deps_load :-
lib(suggests(bioc("GOstats"))),
lib(suggests(bioc("GSEABase"))), % installed with GOstats
lib(suggests(bioc("GO.db"))), % installed with GOstats
lib(suggests(bioc("Category"))), % installed with GOstats
assert(exp_go_over_bioc_deps).
exp_go_exp_id_default( hs, symb ).
exp_go_exp_id_default( gallus, symb).
exp_go_exp_id_default( mouse, symb ).
exp_go_exp_id_default( pig, ensg ).
exp_go_over_defaults( Args, Defs ) :-
Defs = [
go('BP'),
go_frame(lib_org),
go_over_pv_cut(0.05),
org(hs),
org_exp_id(ExpId),
stem(go_over),
to_file(false),
universe(go_exp)
],
( (memberchk(org(InOrg),Args),ground(InOrg)) -> true; InOrg=hs ),
bio_db_organism( InOrg, BdOrg ),
exp_go_exp_id_default( BdOrg, ExpId ).
/** exp_go_over( +CsvF, -GoOver, +Opts ).
Perform gene ontology over-representation analysis.
For experimental data in CsvF select de-regulated genes and on
those perform over representation analysis in gene ontology.
Results in GoOver are either as a values list or a csv file, the name of which is
assumed to be the ground value of GoOVer.
Opts
* go(GoSec='BP')
gene ontology section, in: =|[BP,MF,CC]|=
* go_frame(GoFrame=lib_org)
which go frame of GoTerms, Evidence and Gid to use.
Default, lib_org, uses r_lib(org..eg.db) frames, alternative bio_db uses bio_db tables,
before the intro of the option this was the way, so use this for backward compatibility
* go_over_pv_cut(PvCut=0.05)
p value filter for the results
* org(Org=hs)
one of bio_db_organism/2 first argument values (hs, gallus and mouse for now)
* org_exp_id(OrgExpId)
the type of the experimental gene ids for the organism. The default depends on Org, but currently all map to symb.
* stem(Stem=false)
stem for output csv file. When false, use basename of CsvF.
* to_file(ToF=false)
when GoOver is unbound, this controls whether the output goes to a file or a values list
* universe(Univ=go_exp)
Univ in : =|[genome,go,go_exp,experiment]|=
* genome(Gen)
all gene identifiers in relevant map predicates
* go(Go)
all gene identifies appearing in any ontology
* go_exp(GoExp)
gene ontology genes that appear in experiment
* experiment(Exp)
all identifiers in the experiment
* ontology(Onto)
not implemented yet. like OG above, but only include this Ontology branch
Options are also passed to bio_diffex/4.
==
?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ),
exp_go_over( CsvF, GoOver, [] ).
CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv',
GoOver = [row('GOBPID', 'Pvalue', adj.pvalue, ...), row('GO:0061387', 0.000187, ...)|...].
?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ),
exp_go_over( CsvF, GoOver, [stem(here),to_file(true)] ).
CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv',
GoOver = here_gontBP_p0.05_univExp.csv.
?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ),
exp_go_over( CsvF, 'a_file.csv', [stem(here),to_file(true)] ).
CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv',
?- lib(by_unix).
?- @wc(-l,'a_file.csv').
183 a_file.csv
?- @wc(-l,'here_gontBP_p0.05_univExp.csv').
183 here_gontBP_p0.05_univExp.csv
true.
?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ),
exp_go_over( CsvF, OverF, [to_file(true)] ).
CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv',
OverF = go_over_gontBP_p0.05_univExp.csv.
?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ),
exp_go_over( CsvF, OverF, [to_file(true),stem(false)] ).
CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv',
OverF = '.../swipl/pack/bio_analytics/data/silac/bt_gontBP_p0.05_univExp.csv'.
==
@author nicos angelopoulos
@version 0.1 2019/5/2
@version 0.2 2022/12/20, =|Univ=go|= and =|Org=gallus|=
@version 0.3 2023/6/5, option go_frame(GoFrame)
@see go_over_universe/6
*/
exp_go_over( CsvF, GoOver, Args ) :-
Self = exp_go_over,
options_append( Self, Args, Opts ),
exp_go_over_bioc_deps, % make sure deps are loaded
debug_call( exp_go_over, options, Self/Opts ),
bio_diffex( CsvF, DEPrs, NDEPrs, Opts ),
options( org(OrgPrv), Opts ),
bio_db_organism( OrgPrv, Org ),
kv_decompose( DEPrs, DEGenes, _ ),
debug_call( exp_go_over, length, de_pairs/DEPrs ),
bio_list_sort_ne( DEGenes, DEGenesSet ),
debug_call( exp_go_over, length, de_genes_set/DEGenesSet ),
bio_list_sort_ne( DEGenes, DEGenesSet ),
options( org_exp_id(ExpId), Opts ),
org_go_over_std_gene_ids( Org, ExpId, DEGenesSet, Gids ),
debug_call( exp_go_over, length, gids/Gids ),
options( go_frame(GfOpt), Opts ),
go_over_frame( Org, GfOpt, goFrameData, GofOrg ),
goFrame <- 'GOFrame'(goFrameData, organism= +GofOrg),
goAllFrame <- 'GOAllFrame'(goFrame),
gsc <- 'GeneSetCollection'(goAllFrame, setType = 'GOCollection'() ),
genes <- Gids,
options( universe(UnivOpt), Opts ),
go_over_universe( UnivOpt, Org, ExpId, Gids, NDEPrs, Univ ),
debug_call( exp_go_over, length, universe/Univ ),
options( go(GoAspect), Opts ),
universe <- Univ,
universe <- 'as.character'(universe),
genes <- 'as.character'(genes),
options( go_over_pv_cut(PvCut), Opts ),
( debugging(Self) ->
<- print(paste("length of universe: ", length(universe)))
;
true
),
% this is in r_lib('Category')
gGparams <- 'GSEAGOHyperGParams'(
name="bio_analytics_gont",
geneSetCollection=gsc,
geneIds = genes,
universeGeneIds = universe,
ontology = + GoAspect,
pvalueCutoff = PvCut,
conditional = 'FALSE',
testDirection = "over"
),
over <- hyperGTest(gGparams),
dfOver <- 'data.frame'( summary(over) ),
dfOver$'adj.pvalue' <- 'p.adjust'( dfOver$'Pvalue', method=+'BH' ),
dfOver <- dfOver[*,c(1,2,8,3,4,5,6,7)],
Use = use(Self,GoAspect,UnivOpt,PvCut),
exp_go_over_return( GoOver, dfOver, CsvF, Use, Opts ).
bio_list_sort_ne( List, SetNe ) :-
sort( List, Set ),
( select('',Set,SetNe) -> true; Set = SetNe ).
exp_go_over_return( GoOver, DfOveR, _CsvF, _Use, Opts ) :-
var( GoOver ),
options( to_file(false), Opts ),
!,
Rlist <- 'as.list'(DfOveR),
findall( List, (member(Head=Tail,Rlist),List=[Head|Tail]), Lists),
mtx_lists( GoOver, Lists ).
exp_go_over_return( GoOver, DfOveR, CsvF, Use, Opts ) :-
Use = use(Self,GoAspect,UnivOpt,PvCut),
SubSep = '',
options( stem(Stem), Opts ),
atomic_list_concat( [gont,GoAspect], SubSep, GontAspTkn ),
capit( UnivOpt, ShUniv ),
atomic_list_concat( [univ,ShUniv], SubSep, UnivTkn ),
(Stem == false -> TempF = CsvF; os_ext(csv,Stem,TempF)),
atom_concat( p, PvCut, PvTkn ),
Postfixes = [GontAspTkn,PvTkn,UnivTkn],
(var(GoOver) -> os_postfix(Postfixes, TempF, GoOver) ; true),
<- 'write.csv'(DfOveR, file=+GoOver, 'row.names'='FALSE'),
% <- print( warnings() ),
debug( Self, 'Wrote: ~p', GoOver ).
%% go_over_universe( +Token, +Org, +ExpIdTkn, +DEGenes, +NDEPrs, -Universe )
%
% Universe is the list of gene identifiers to be used as universe/background for GOstats.
%
% In human (=|Org=hs|=), this is a list of Entrez ids, and in =|Org=mouse|=, a list of Mgim identifiers.
% DEGenes is a list of deferentially expressed gene identifiers, and NDEPrs is a list of non-differential
% expressed Symbol-Pvalue pairs.
%
% ExpIdTkn identifies the type of ids coming in in NDEPrs, DEGens, are already in standard form for Org.
go_over_universe( experiment, Org, ExpId, DeGids, NDEPrs, Univ ) :-
go_over_universe_exp( Org, ExpId, DeGids, NDEPrs, Univ ).
go_over_universe( genome, Org, _ExpId, _DEGids, _NDEPrs, Univ ) :-
go_over_universe_genome( Org, Univ ).
go_over_universe( go, Org, _ExpId, _DEGids, _NDEPrs, Univ ) :-
go_over_universe_go( Org, Univ ).
go_over_universe( go_exp, Org, ExpId, DEGids, NDEPrs, Univ ) :-
go_over_universe_go_exp( Org, ExpId, DEGids, NDEPrs, Univ ).
go_over_universe_genome( gallus, Univ ) :-
findall( Ncbi, cgnc_galg_cgnc_ncbi(_Cgnc,Ncbi), Ncbis ),
sort( Ncbis, Univ ).
go_over_universe_genome( hs, Univ ) :-
findall( Ncbi, hgnc_homs_symb_ncbi(_Symb,Ncbi), Ncbis ),
sort( Ncbis, Univ ).
go_over_universe_genome( mouse, Univ ) :-
findall( Mgim, mgim_musm_mgim_symb(Mgim,_Symb), Mgims ),
sort( Mgims, Univ ).
go_over_universe_genome( pig, Univ ) :-
findall( Ncbi, ncbi_suss_ncbi_ensg(Ncbi,_), Ncbis ),
sort( Ncbis, Univ ).
go_over_universe_go( gallus, Univ ) :-
findall( Ncbi, ( gont_galg_symb_gont(Symb,_Rl,_Ev,_Go),
cgnc_galg_cgnc_symb(Cgnc,Symb),
cgnc_galg_cgnc_ncbi(Cgnc,Ncbi)
),
Ncbis
),
sort( Ncbis, Univ ).
go_over_universe_go( hs, Univ ) :-
findall( Ncbi, ( gont_homs_edge_symb(_Go,_En,Symb),
hgnc_homs_symb_ncbi(Symb,Ncbi)
),
Ncbis
),
sort( Ncbis, Univ ).
go_over_universe_go( mouse, Univ ) :-
findall( Mgim, gont_musm_mgim_gont(Mgim,_E,_G), Mgims ),
sort( Mgims, Univ ).
go_over_universe_go( pig, Univ ) :-
findall( Ncbi, (gont_suss_symb_gont(Symb,_,_,_),ense_suss_ensg_symb(EnsG,Symb),ncbi_suss_ensg_ncbi(EnsG,Ncbi)), Ncbis ),
sort( Ncbis, Univ ).
% fixme: give doc here, what is this for ?
go_over_universe_exp( gallus, DeGids, NDEPrs, Univ ) :-
findall( Ncbi, ( member(Symb-NDEPrs),
cgnc_galg_cgnc_symb(Cgnc,Symb),
cgnc_galg_cgnc_ncbi(Cgnc,Ncbi)
), NDENcbis ),
append( DeGids, NDENcbis, Ncbis ),
sort( Ncbis, Univ ).
go_over_universe_exp( hs, DeGids, NDEPrs, Univ ) :-
findall( Ncbi, (member(Symb-_,NDEPrs),hgnc_homs_symb_ncbi(Symb,Ncbi)), NDENcbis ),
% findall( Entz1, (member(Symb,DEGenes),hgnc_homs_symb_entz(Symb,Entz1)), DEEntzs ),
% append( DEEntzs, NDEEntzs, Entzs ),
append( DeGids, NDENcbis, Ncbis ),
sort( Ncbis, Univ ).
go_over_universe_exp( mouse, DeGids, NDEPrs, Univ ) :-
findall( Mgim, (member(Symb-_,NDEPrs),mgim_musm_mgim_symb(Mgim,Symb)), NDEMgims ),
append( DeGids, NDEMgims, Mgims ),
sort( Mgims, Univ ).
go_over_universe_exp( gallus, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_gallus( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
% fixme: we can add tests here, that the ids exist in some table ?
bio_list_sort_ne( ExpGids, Univ ).
go_over_universe_exp( hs, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_hs( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
% fixme: we can add tests here, that the ids exist in some table ?
bio_list_sort_ne( ExpGids, Univ ).
go_over_universe_exp( mouse, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_hs( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
% fixme: we can add tests here, that the ids exist in some table ?
bio_list_sort_ne( ExpGids, Univ ).
go_over_universe_exp( pig, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_pig( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
% fixme: we can add tests here, that the ids exist in some table ?
bio_list_sort_ne( ExpGids, Univ ).
go_over_universe_go_exp( gallus, ExpId, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_gallus( ExpId, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
findall( ExpGid,( member(ExpGid,ExpGids), % these are std form, here NCBI Gene ids
cgnc_galg_cgnc_ncbi(Cgnc,ExpGid),
cgnc_galg_cgnc_symb(Cgnc,Symb),
once(gont_galg_symb_gont(Symb,_,_,_))
),
List ),
bio_list_sort_ne( List, Univ ).
go_over_universe_go_exp( hs, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_hs( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
findall( Ncbi, ( member(Ncbi,ExpGids),
hgnc_homs_symb_ncbi(Symb,Ncbi),
once(gont_homs_edge_symb(_,_,Symb))
),
List ),
bio_list_sort_ne( List, Univ ).
go_over_universe_go_exp( mouse, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_hs( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
findall( Mgim, ( member(Mgim,ExpGids),
mgim_musm_mgim_symb(Mgim,Symb),
once(gont_musm_gont_symb(_,_,Symb))
),
List ),
bio_list_sort_ne( List, Univ ).
go_over_universe_go_exp( pig, ExpIdTkn, DEGids, NDEPrs, Univ ) :-
% fixme: we should be using go_exp probably, because it only comes gont only comes with symbs and unlikely to have a good coverage.
findall( Id, member(Id-_,NDEPrs), Ids ),
org_go_over_std_gene_ids_pig( ExpIdTkn, Ids, NDGids ),
append( DEGids, NDGids, ExpGids ),
findall( Ncbi, ( member(Ncbi,ExpGids),
ncbi_suss_ncbi_ensg(Ncbi,EnsG),
ense_suss_ensg_symb(EnsG,Symb),
gont_suss_symb_gont(Symb,_,_,_)
),
Ncbis
),
bio_list_sort_ne( Ncbis, Univ ).
org_go_over_std_gene_ids( Org, Gtyp, Set, Gids ) :-
at_con( [org_go_over_std_gene_ids,Org], '_', Pname ),
( call(Pname,Gtyp,Set,Gids) ->
true
;
throw( go_over(could_not_convert(org(Org),gene_id_type(Gtyp))) )
).
% it should for ensg and symb
org_go_over_std_gene_ids_gallus( ncbi, Set, Gids ) :-
!, % not really needes as it called from an if() above
Set = Gids.
org_go_over_std_gene_ids_gallus( cgnc, Set, Gids ) :-
!,
findall( Ncbi, (member(Cgnc,Set),cgnc_galg_cgnc_ncbi(Cgnc,Ncbi)), Ncbis ),
sort( Ncbis, Gids ).
org_go_over_std_gene_ids_gallus( SrcT, Set, Gids ) :-
atom_concat( cgnc_galg_cgnc_, SrcT, SrcNm ),
findall( Ncbi, (member(SrcG,Set),call(SrcNm,Cgnc,SrcG),cgnc_galg_cgnc_ncbi(Cgnc,Ncbi)), Ncbis ),
sort( Ncbis, Gids ).
% fixme: add more rules... for hs and mouse
org_go_over_std_gene_ids_hs( ncbi, Set, Gids ) :-
sort( Set, Gids ).
org_go_over_std_gene_ids_hs( symb, Set, Gids ) :-
findall( Ncbi, (member(Symb,Set),hgnc_homs_symb_ncbi(Symb,Ncbi)), Ncbis ),
sort( Ncbis, Gids ).
org_go_over_std_gene_ids_mouse( ncbi, Set, Gids ) :-
findall( Mgim, (member(Ncbi,Set),mgim_musm_mgim_ncbi(Mgim,Ncbi)), Mgims ),
sort( Mgims, Gids ).
org_go_over_std_gene_ids_mouse( symb, Set, Gids ) :-
findall( Mgim, (member(Symb,Set),mgim_musm_mgim_symb(Mgim,Symb)), Mgims ),
sort( Mgims, Gids ).
org_go_over_std_gene_ids_pig( ncbi, Set, Gids ) :-
!,
Set = Gids.
org_go_over_std_gene_ids_pig( ensg, Set, Gids ) :-
!,
findall( Ncbi, (member(EnsG,Set), org_go_over_std_gene_ids_pig_ensg_ncbi(EnsG,Ncbi)), Ncbis ),
sort( Ncbis, Gids ).
org_go_over_std_gene_ids_pig( symb, Set, Gids ) :-
!,
findall( Ncbi, ( member(Symb,Set),
ense_suss_ensg_symb(EnsG,Symb),
org_go_over_std_gene_ids_pig_ensg_ncbi(EnsG,Ncbi)
),
Ncbis ),
sort( Ncbis, Gids ).
org_go_over_std_gene_ids_pig_ensg_ncbi( EnsG, Ncbi ) :-
ncbi_suss_ensg_ncbi( EnsG, Ncbi ),
!.
% org_go_over_std_gene_ids_pig_ensg_ncbi( EnsG, Ncbi ) :-
% fixme: there might be other ways? although ense seems to pick up symbols from ense
go_over_frame( gallus, GfOpt, GoFra, GofOrg ) :-
go_over_frame_chicken( GfOpt, GoFra, GofOrg ).
go_over_frame( hs, GfOpt, GoFra, GofOrg ) :-
go_over_frame_human( GfOpt, GoFra, GofOrg ).
go_over_frame( mouse, GfOpt, GoFra, GofOrg ) :-
go_over_frame_mouse( GfOpt, GoFra, GofOrg ).
go_over_frame( pig, GfOpt, GoFra, GofOrg ) :-
go_over_frame_pig( GfOpt, GoFra, GofOrg ).
go_over_frame_chicken( bio_db, GoFra, GofOrg ) :-
!,
findall( row(Gid,E,Ncbi), (
gont_galg_symb_gont(Symb,_,E,G),
go_id(Gid,G),
cgnc_galg_cgnc_symb(Cgnc,Symb),
cgnc_galg_cgnc_ncbi(Cgnc,Ncbi)
),
Rows
),
go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ),
GofOrg = "Gallus gallus".
go_over_frame_chicken( lib_org, GoFra, GofOrg ) :-
lib(bioc("org.Gg.eg.db")),
ggframe <- toTable('org.Gg.egGO'),
GoFra <- 'data.frame'(ggframe$go_id, ggframe$'Evidence', ggframe$gene_id),
GofOrg = "Gallus gallus".
go_over_frame_human( bio_db, GoFra, GofOrg ) :-
!,
findall( row(Gid,E,Ncbi),
( gont_homs_edge_symb(G,E,S),
go_id(Gid,G),
hgnc_homs_symb_ncbi(S,Ncbi)
),
Rows ),
go_mtx_df( [row(go_id,'Evidence',gene_id)|Rows], GoFra, [] ),
GofOrg = "Homo sapiens".
go_over_frame_human( lib_org, GoFra, GofOrg ) :-
lib(bioc("org.Hs.eg.db")),
hsframe <- toTable('org.Hs.egGO'),
GoFra <- 'data.frame'(hsframe$go_id, hsframe$'Evidence', hsframe$gene_id),
GofOrg = "Homo sapiens".
go_over_frame_mouse( bio_db, GoFra, GofOrg ) :-
!,
findall( row(Gid,E,M),
( gont_musm_mgim_gont(M,E,G),
go_id(Gid,G)
),
Rows ),
go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ),
GofOrg = "Mus musculus".
go_over_frame_mouse( lib_org, GoFra, GofOrg ) :-
lib(bioc("org.Mm.eg.db")),
mmframe <- toTable('org.Mm.egGO'),
GoFra <- 'data.frame'(mmframe$go_id, mmframe$'Evidence', mmframe$gene_id),
GofOrg = "Mus musculus".
go_over_frame_pig( bio_db, GoFra, GofOrg ) :-
!,
findall( row(Gid,Evi,Ncbi), ( gont_suss_symb_gont(Symb,_,Evi,G),
go_id(Gid,G),
ense_suss_ensg_symb(EnsG,Symb),
ncbi_suss_ensg_ncbi(EnsG,Ncbi)
),
Rows ),
go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ),
GofOrg = "Sus scrofa".
go_over_frame_pig( lib_org, GoFra, GofOrg ) :-
lib( bioc("org.Ss.eg.db") ),
ssframe <- toTable('org.Ss.egGO'),
GoFra <- 'data.frame'(ssframe$go_id, ssframe$'Evidence', ssframe$gene_id),
GofOrg = "Sus scrofa".
capit( Atom, Capit ) :-
atom_codes( Atom, [A,B,C|_] ),
atom_codes( Aat, [A] ),
atom_codes( BCat, [B,C] ),
upcase_atom( Aat, CapA ),
downcase_atom( BCat, DwnBC ),
atom_concat( CapA, DwnBC, Capit ).
go_mtx_df_defaults([check_names(false)]).
go_mtx_df( Csv, Df, Args ) :-
options_append( go_mtx_df, Args, Opts ),
mtx_lists( Csv, Lists ),
findall( Head=Tail, ( member(List,Lists), List=[Head|Tail] ), Pairs ),
Df <- Pairs,
( options(check_names(true),Opts) -> Check = 'T'; Check = 'F' ),
Df <- 'data.frame'(Df,'check.names'=Check).