git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/MineICA@73179 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+Package: MineICA |
|
2 |
+Type: Package |
|
3 |
+Title: Analysis of an ICA decomposition obtained on genomics data |
|
4 |
+Version: 0.1.0 |
|
5 |
+Date: 2012-01-21 |
|
6 |
+Author: Anne Biton |
|
7 |
+Maintainer: <anne.biton@gmail.com> |
|
8 |
+Description: The goal of MineICA is to make easier the interpretation of the |
|
9 |
+ interpretation of a decomposition obtained by Independent Component |
|
10 |
+ Analysis on transcriptomic data. It helps the biological interpretation of |
|
11 |
+ the components by studying their association with variables (e.g sample |
|
12 |
+ annotations) and gene sets, and enables the comparison of components from |
|
13 |
+ different datasets using correlation-based graph. |
|
14 |
+License: GPL-2 |
|
15 |
+LazyLoad: yes |
|
16 |
+BiocViews: Bioinformatics, Visualizations, MultipleComparisons |
|
17 |
+Imports: AnnotationDbi, lumi, fpc, lumiHumanAll.db |
|
18 |
+Depends: R (>= 2.10), Biobase, plyr, ggplot2, scales, foreach, xtable, |
|
19 |
+ biomaRt, gtools, GOstats, cluster, marray, mclust, |
|
20 |
+ RColorBrewer, colorspace, igraph, Rgraphviz, graph, annotate, |
|
21 |
+ Hmisc, fastICA, JADE, methods |
|
22 |
+Suggests: biomaRt, GOstats, cluster, hgu133a.db, mclust, igraph, |
|
23 |
+ breastCancerMAINZ, breastCancerTRANSBIG, breastCancerUPP, |
|
24 |
+ breastCancerVDX |
|
25 |
+Enhances: doMC |
|
26 |
+Collate: 'AllClasses.R' 'AllGeneric.R' 'methods-IcaSet.R' |
|
27 |
+ 'methods-MineICAParams.R' 'compareAnalysis.R' |
|
28 |
+ 'functions_comp2annot.R' 'functions_comp2annottests.R' |
|
29 |
+ 'functions_enrich.R' 'functions.R' 'heatmap.plus.R' |
|
30 |
+ 'heatmapsOnSel.R' 'runAn.R' 'compareGenes.R' |
0 | 31 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,56 @@ |
1 |
+import(Biobase) |
|
2 |
+ |
|
3 |
+ |
|
4 |
+exportClasses( |
|
5 |
+ IcaSet, MineICAParams |
|
6 |
+) |
|
7 |
+ |
|
8 |
+exportMethods( |
|
9 |
+ dat, "dat<-", datByGene, "datByGene<-", nbComp, organism, "organism<-", A, Alist, "A<-", S, "S<-", SByGene, "SByGene<-", SByGene, "SByGene<-", |
|
10 |
+ "[", "[<-", getComp, indComp, "indComp<-", compNames, "compNames<-", "witGenes<-", witGenes, Slist, SlistByGene, |
|
11 |
+ chipVersion, "chipVersion<-", "chipManu<-", chipManu, "refSamples<-", refSamples, "typeID<-", typeID, |
|
12 |
+annotfile, "annotfile<-", Afile, "Afile<-", Sfile, "Sfile<-", datfile, "datfile<-", resPath, "resPath<-", genesPath, "genesPath<-", annot2col, "annot2col<-", selCutoff, "selCutoff<-", pvalCutoff, "pvalCutoff<-", geneNames, mart, "mart<-" |
|
13 |
+ |
|
14 |
+) |
|
15 |
+ |
|
16 |
+export(annot2Color) |
|
17 |
+export(annotFeatures) |
|
18 |
+export(annotFeaturesComp) |
|
19 |
+export(annotFeaturesWithBiomaRt) |
|
20 |
+export(annotInGene) |
|
21 |
+export(annotReciprocal) |
|
22 |
+export(buildIcaSet) |
|
23 |
+export(buildMineICAParams) |
|
24 |
+export(clusterFastICARuns) |
|
25 |
+export(clusterSamplesByComp) |
|
26 |
+export(clusterSamplesByComp_multiple) |
|
27 |
+export(clusVarAnalysis) |
|
28 |
+export(compareAn) |
|
29 |
+export(compareAn2graphfile) |
|
30 |
+export(compareGenes) |
|
31 |
+export(cor2An) |
|
32 |
+export(getProj) |
|
33 |
+export(getSdExpr) |
|
34 |
+export(hypergeoAn) |
|
35 |
+export(nbOccInComp) |
|
36 |
+export(nodeAttrs) |
|
37 |
+export(plotAllMix) |
|
38 |
+export(plotCorGraph) |
|
39 |
+export(plot_heatmapsOnSel) |
|
40 |
+export(plotMix) |
|
41 |
+export(plotPosAnnotInComp) |
|
42 |
+export(plotPosSamplesInComp) |
|
43 |
+export(qualVarAnalysis) |
|
44 |
+export(quantVarAnalysis) |
|
45 |
+export(relativePath) |
|
46 |
+export(runAn) |
|
47 |
+export(runCompareIcaSets) |
|
48 |
+export(runEnrich) |
|
49 |
+export(runICA) |
|
50 |
+export(selectContrib) |
|
51 |
+export(selectFeatures_IQR) |
|
52 |
+export(selectWitnessGenes) |
|
53 |
+export(writeGenes) |
|
54 |
+export(writeHtmlResTestsByAnnot) |
|
55 |
+export(writeProjByComp) |
|
56 |
+export(writeRnkFiles) |
0 | 57 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
1 |
+setClass(Class = "IcaSet", |
|
2 |
+ contains = "eSet", |
|
3 |
+ representation = representation( |
|
4 |
+ A="data.frame", |
|
5 |
+ S="data.frame", |
|
6 |
+ SByGene="data.frame", |
|
7 |
+ compNames="character", |
|
8 |
+ indComp="numeric", |
|
9 |
+ witGenes="character", |
|
10 |
+ datByGene="data.frame", |
|
11 |
+ chipManu="character", |
|
12 |
+ chipVersion="character", |
|
13 |
+ refSamples="character", |
|
14 |
+ typeID="character", |
|
15 |
+ organism = "character", |
|
16 |
+ mart="Mart" |
|
17 |
+ ), |
|
18 |
+ prototype = prototype(new("VersionedBiobase", versions = c(classVersion("eSet"), IcaSet="0.1.0"))) |
|
19 |
+ ) |
|
20 |
+ |
|
21 |
+ |
|
22 |
+setClass(Class = "MineICAParams", |
|
23 |
+ representation = representation( |
|
24 |
+ Sfile="character", |
|
25 |
+ Afile="character", |
|
26 |
+ datfile="character", |
|
27 |
+ annotfile="character", |
|
28 |
+ resPath="character", |
|
29 |
+ genesPath="character", |
|
30 |
+ annot2col="character", |
|
31 |
+ pvalCutoff="numeric", |
|
32 |
+ selCutoff="numeric" |
|
33 |
+ ) |
|
34 |
+ ) |
|
35 |
+ |
|
36 |
+ |
0 | 37 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,131 @@ |
1 |
+setGeneric ( "geneNames" , function (object){ standardGeneric ( "geneNames" )}) |
|
2 |
+setGeneric ( "selectContrib" , function ( object, cutoff, level, ...){ standardGeneric ( "selectContrib" )}) |
|
3 |
+setGeneric ( "nbComp" , function ( object){ standardGeneric ( "nbComp" )}) |
|
4 |
+setGeneric ( "getComp" , function ( object, level, ind){ standardGeneric ( "getComp" )}) |
|
5 |
+setGeneric ( "getAssayData" , function ( object ){ standardGeneric ( "getAssayData" )}) |
|
6 |
+setGeneric ( "datByGene<-" , function ( object, value){ standardGeneric ( "datByGene<-" )}) |
|
7 |
+setGeneric ( "datByGene" , function ( object){ standardGeneric ( "datByGene" )}) |
|
8 |
+setGeneric ( "getFeatureData" , function ( object ){ standardGeneric ( "getFeatureData" )}) |
|
9 |
+setGeneric ( "getPhenoData" , function ( object ){ standardGeneric ( "getPhenoData" )}) |
|
10 |
+setGeneric ( "getfData" , function ( object ){ standardGeneric ( "getfData" )}) |
|
11 |
+setGeneric ( "S" , function ( object ){ standardGeneric ( "S" )}) |
|
12 |
+setGeneric ( "getS" , function ( object ){ standardGeneric ( "getS" )}) |
|
13 |
+setGeneric ( "S<-" , function ( object, value){ standardGeneric ( "S<-" )}) |
|
14 |
+setGeneric ( "A" , function ( object ){ standardGeneric ( "A" )}) |
|
15 |
+setGeneric ( "dat" , function ( object ){ standardGeneric ( "dat" )}) |
|
16 |
+setGeneric ( "getA" , function ( object ){ standardGeneric ( "getA" )}) |
|
17 |
+setGeneric ( "A<-" , function ( object, value){ standardGeneric ( "A<-" )}) |
|
18 |
+setGeneric ( "dat<-" , function ( object, value){ standardGeneric ( "dat<-" )}) |
|
19 |
+setGeneric ( "Slist" , function ( object ){ standardGeneric ( "Slist" )}) |
|
20 |
+setGeneric ( "Alist" , function ( object ){ standardGeneric ( "Alist" )}) |
|
21 |
+setGeneric ( "SlistByGene" , function ( object){ standardGeneric ( "SlistByGene" )}) |
|
22 |
+setGeneric ( "getSByGene" , function ( object ){ standardGeneric ( "getSByGene" )}) |
|
23 |
+setGeneric ( "SByGene" , function ( object ){ standardGeneric ( "SByGene" )}) |
|
24 |
+setGeneric ( "SByGene<-" , function ( object, value){ standardGeneric ( "SByGene<-" )}) |
|
25 |
+setGeneric ( "getLabelsComp" , function ( object ){ standardGeneric ( "getLabelsComp" )}) |
|
26 |
+setGeneric ( "compNames" , function ( object ){ standardGeneric ( "compNames" )}) |
|
27 |
+setGeneric ( "compNames<-" , function ( object, value){ standardGeneric ( "compNames<-" )}) |
|
28 |
+setGeneric ( "sampleNames<-" , function ( object, value){ standardGeneric ( "sampleNames<-" )}) |
|
29 |
+setGeneric ( "getIndComp" , function ( object ){ standardGeneric ( "getIndComp" )}) |
|
30 |
+setGeneric ( "indComp" , function ( object ){ standardGeneric ( "indComp" )}) |
|
31 |
+setGeneric ( "indComp<-" , function ( object, value){ standardGeneric ( "indComp<-" )}) |
|
32 |
+setGeneric ( "witGenes" , function ( object ){ standardGeneric ( "witGenes" )}) |
|
33 |
+setGeneric ( "witGenes<-" , function ( object, value ){ standardGeneric ( "witGenes<-" )}) |
|
34 |
+setGeneric ( "getWitGenes" , function ( object ){ standardGeneric ( "getWitGenes" )}) |
|
35 |
+setGeneric ( "getWitData" , function ( object ){ standardGeneric ( "getWitData" )}) |
|
36 |
+setGeneric ( "getPackage" , function ( object ){ standardGeneric ( "getPackage" )}) |
|
37 |
+setGeneric ( "getAnnotation" , function ( object ){ standardGeneric ( "getAnnotation" )}) |
|
38 |
+setGeneric ( "package" , function ( object ){ standardGeneric ( "package" )}) |
|
39 |
+setGeneric ( "chipVersion" , function ( object ){ standardGeneric ( "chipVersion" )}) |
|
40 |
+setGeneric ( "getChipVersion" , function ( object ){ standardGeneric ( "getChipVersion" )}) |
|
41 |
+setGeneric ( "chipVersion<-" , function ( object, value ){ standardGeneric ( "chipVersion<-" )}) |
|
42 |
+setGeneric ( "getChipManu" , function ( object ){ standardGeneric ( "getChipManu" )}) |
|
43 |
+setGeneric ( "chipManu" , function ( object ){ standardGeneric ( "chipManu" )}) |
|
44 |
+setGeneric ( "chipManu<-" , function ( object, value ){ standardGeneric ( "chipManu<-" )}) |
|
45 |
+setGeneric ( "getTypeID" , function ( object ){ standardGeneric ( "getTypeID" )}) |
|
46 |
+setGeneric ( "typeID<-" , function ( object, value){ standardGeneric ( "typeID<-" )}) |
|
47 |
+setGeneric ( "typeID" , function ( object ){ standardGeneric ( "typeID" )}) |
|
48 |
+setGeneric ( "getOrganism" , function ( object ){ standardGeneric ( "getOrganism" )}) |
|
49 |
+setGeneric ( "organism<-" , function ( object, value){ standardGeneric ( "organism<-" )}) |
|
50 |
+setGeneric ( "organism" , function ( object ){ standardGeneric ( "organism" )}) |
|
51 |
+setGeneric ( "getMart" , function ( object ){ standardGeneric ( "getMart" )}) |
|
52 |
+setGeneric ( "mart<-" , function ( object, value){ standardGeneric ( "mart<-" )}) |
|
53 |
+setGeneric ( "mart" , function ( object ){ standardGeneric ( "mart" )}) |
|
54 |
+ |
|
55 |
+# set |
|
56 |
+setGeneric ( "setFeatureData<-" , function ( object, value ){ standardGeneric ( "setFeatureData<-" )}) |
|
57 |
+setGeneric ( "setfData<-" , function ( object, value ){ standardGeneric ( "setfData<-" )}) |
|
58 |
+setGeneric ( "setPhenoData<-" , function ( object, value ){ standardGeneric ( "setPhenoData<-" )}) |
|
59 |
+setGeneric ( "setAssayData<-" , function ( object, value ){ standardGeneric ( "setAssayData<-" )}) |
|
60 |
+setGeneric ( "setAssayDataByGene<-" , function ( object, value){ standardGeneric ( "setAssayDataByGene<-" )}) |
|
61 |
+setGeneric ( "setA<-" , function ( object, value){ standardGeneric ( "setA<-" )}) |
|
62 |
+setGeneric ( "setS<-" , function ( object, value){ standardGeneric ( "setS<-" )}) |
|
63 |
+setGeneric ( "setSByGene<-" , function ( object, value){ standardGeneric ( "setSByGene<-" )}) |
|
64 |
+setGeneric ( "setLabelsComp<-" , function ( object, value ){ standardGeneric ( "setLabelsComp<-" )}) |
|
65 |
+setGeneric ( "setIndComp<-" , function ( object, value ){ standardGeneric ( "setIndComp<-" )}) |
|
66 |
+setGeneric ( "setWitGenes<-" , function ( object, value ){ standardGeneric ( "setWitGenes<-" )}) |
|
67 |
+setGeneric ( "setWitData<-" , function ( object, value ){ standardGeneric ( "setWitData<-" )}) |
|
68 |
+setGeneric ( "setChipVersion<-" , function ( object, value ){ standardGeneric ( "setChipVersion<-" )}) |
|
69 |
+setGeneric ( "setChipManu<-" , function ( object, value ){ standardGeneric ( "setChipManu<-" )}) |
|
70 |
+setGeneric ( "setAnnotation<-" , function ( object, value ){ standardGeneric ( "setAnnotation<-" )}) |
|
71 |
+setGeneric ( "setPackage<-" , function ( object, value ){ standardGeneric ( "setPackage<-" )}) |
|
72 |
+setGeneric ( "package<-" , function ( object, value ){ standardGeneric ( "package<-" )}) |
|
73 |
+setGeneric ( "setTypeID<-" , function ( object, value ){ standardGeneric ( "setTypeID<-" )}) |
|
74 |
+setGeneric ( "typeID<-" , function ( object, value ){ standardGeneric ( "typeID<-" )}) |
|
75 |
+setGeneric ( "setOrganism<-" , function ( object, value ){ standardGeneric ( "setOrganism<-" )}) |
|
76 |
+setGeneric ( "organism<-" , function ( object, value ){ standardGeneric ( "organism<-" )}) |
|
77 |
+setGeneric ( "setMart<-" , function ( object, value ){ standardGeneric ( "setMart<-" )}) |
|
78 |
+setGeneric ( "mart<-" , function ( object, value ){ standardGeneric ( "mart<-" )}) |
|
79 |
+ |
|
80 |
+setGeneric ( "Afile" , function ( object){ standardGeneric ( "Afile" )}) |
|
81 |
+setGeneric ( "Sfile" , function ( object){ standardGeneric ( "Sfile" )}) |
|
82 |
+setGeneric ( "datfile" , function ( object){ standardGeneric ( "datfile" )}) |
|
83 |
+setGeneric ( "annotfile" , function ( object){ standardGeneric ( "annotfile" )}) |
|
84 |
+setGeneric ( "resPath" , function ( object){ standardGeneric ( "resPath" )}) |
|
85 |
+setGeneric ( "genesPath" , function ( object){ standardGeneric ( "genesPath" )}) |
|
86 |
+setGeneric ( "namesAnnot" , function ( object){ standardGeneric ( "namesAnnot" )}) |
|
87 |
+setGeneric ( "annot2col" , function ( object){ standardGeneric ( "annot2col" )}) |
|
88 |
+setGeneric ( "pvalCutoff" , function ( object){ standardGeneric ( "pvalCutoff" )}) |
|
89 |
+setGeneric ( "selCutoff" , function ( object){ standardGeneric ( "selCutoff" )}) |
|
90 |
+setGeneric ( "refSamples" , function ( object){ standardGeneric ( "refSamples" )}) |
|
91 |
+ |
|
92 |
+setGeneric ( "getAfile" , function ( object){ standardGeneric ( "getAfile" )}) |
|
93 |
+setGeneric ( "getSfile" , function ( object){ standardGeneric ( "getSfile" )}) |
|
94 |
+setGeneric ( "getDatfile" , function ( object){ standardGeneric ( "getDatfile" )}) |
|
95 |
+setGeneric ( "getAnnotfile" , function ( object){ standardGeneric ( "getAnnotfile" )}) |
|
96 |
+setGeneric ( "getResPath" , function ( object){ standardGeneric ( "getResPath" )}) |
|
97 |
+setGeneric ( "getGenesPath" , function ( object){ standardGeneric ( "getGenesPath" )}) |
|
98 |
+setGeneric ( "getNamesAnnot" , function ( object){ standardGeneric ( "getNamesAnnot" )}) |
|
99 |
+setGeneric ( "getAnnot2col" , function ( object){ standardGeneric ( "getAnnot2col" )}) |
|
100 |
+setGeneric ( "getPvalCutoff" , function ( object){ standardGeneric ( "getPvalCutoff" )}) |
|
101 |
+setGeneric ( "getSelCutoff" , function ( object){ standardGeneric ( "getSelCutoff" )}) |
|
102 |
+setGeneric ( "getRefSamples" , function ( object){ standardGeneric ( "getRefSamples" )}) |
|
103 |
+ |
|
104 |
+ |
|
105 |
+setGeneric ( "Afile<-" , function ( object, value ){ standardGeneric ( "Afile<-" )}) |
|
106 |
+setGeneric ( "Sfile<-" , function ( object, value ){ standardGeneric ( "Sfile<-" )}) |
|
107 |
+setGeneric ( "datfile<-" , function ( object, value ){ standardGeneric ( "datfile<-" )}) |
|
108 |
+setGeneric ( "annotfile<-" , function ( object, value ){ standardGeneric ( "annotfile<-" )}) |
|
109 |
+setGeneric ( "namesAnnot<-" , function ( object, value ){ standardGeneric ( "namesAnnot<-" )}) |
|
110 |
+setGeneric ( "resPath<-" , function ( object, value ){ standardGeneric ( "resPath<-" )}) |
|
111 |
+setGeneric ( "genesPath<-" , function ( object, value ){ standardGeneric ( "genesPath<-" )}) |
|
112 |
+setGeneric ( "annot2col<-" , function ( object, value ){ standardGeneric ( "annot2col<-" )}) |
|
113 |
+setGeneric ( "pvalCutoff<-" , function ( object, value ){ standardGeneric ( "pvalCutoff<-" )}) |
|
114 |
+setGeneric ( "selCutoff<-" , function ( object, value ){ standardGeneric ( "selCutoff<-" )}) |
|
115 |
+setGeneric ( "plotHist<-" , function ( object, value ){ standardGeneric ( "plotHist<-" )}) |
|
116 |
+setGeneric ( "plotHeatmap<-" , function ( object, value ){ standardGeneric ( "plotHeatmap<-" )}) |
|
117 |
+setGeneric ( "refSamples<-" , function ( object, value ){ standardGeneric ( "refSamples<-" )}) |
|
118 |
+ |
|
119 |
+setGeneric ( "setAfile<-" , function ( object, value ){ standardGeneric ( "setAfile<-" )}) |
|
120 |
+setGeneric ( "setSfile<-" , function ( object, value ){ standardGeneric ( "setSfile<-" )}) |
|
121 |
+setGeneric ( "setAnnotfile<-" , function ( object, value ){ standardGeneric ( "setAnnotfile<-" )}) |
|
122 |
+setGeneric ( "setDatfile<-" , function ( object, value ){ standardGeneric ( "setDatfile<-" )}) |
|
123 |
+setGeneric ( "setResPath<-" , function ( object, value ){ standardGeneric ( "setResPath<-" )}) |
|
124 |
+setGeneric ( "setGenesPath<-" , function ( object, value ){ standardGeneric ( "setGenesPath<-" )}) |
|
125 |
+setGeneric ( "setNamesAnnot<-" , function ( object, value ){ standardGeneric ( "setNamesAnnot<-" )}) |
|
126 |
+setGeneric ( "setAnnot2col<-" , function ( object, value ){ standardGeneric ( "setAnnot2col<-" )}) |
|
127 |
+setGeneric ( "setPvalCutoff<-" , function ( object, value ){ standardGeneric ( "setPvalCutoff<-" )}) |
|
128 |
+setGeneric ( "setSelCutoff<-" , function ( object, value ){ standardGeneric ( "setSelCutoff<-" )}) |
|
129 |
+setGeneric ( "setRefSamples<-" , function ( object, value ){ standardGeneric ( "setRefSamples<-" )}) |
|
130 |
+ |
|
131 |
+ |
0 | 132 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1313 @@ |
1 |
+##' Compare \code{\link{IcaSet}} objects by computing the correlation between either |
|
2 |
+##' projection values of common features or genes, or contributions of common |
|
3 |
+##' samples. |
|
4 |
+##' |
|
5 |
+##' The user must carefully choose the object on which the correlation will be |
|
6 |
+##' computed. |
|
7 |
+##' If \code{level='samples'}, the correlations are based on the mixing |
|
8 |
+##' matrices of the ICA decompositions (of dimension samples x components). \code{'A'} |
|
9 |
+##' will be typically chosen when the ICA decompositions were computed on the |
|
10 |
+##' same dataset, or on datasets that include the same samples. |
|
11 |
+##' If \code{level='features'} is |
|
12 |
+##' chosen, the correlation is calculated between the source matrices (of dimension |
|
13 |
+##' features x components) of the ICA decompositions. \code{'S'} will be typically |
|
14 |
+##' used when the ICA decompositions share common features (e.g same microarrays). |
|
15 |
+##' If \code{level='genes'}, the correlations are calculated |
|
16 |
+##' on the attributes \code{'SByGene'} which store the |
|
17 |
+##' projections of the annotated features. \code{'SByGene'} will be typically chosen |
|
18 |
+##' when ICA were computed on datasets from different technologies, for which |
|
19 |
+##' comparison is possible only after annotation into a common ID, like genes. |
|
20 |
+##' |
|
21 |
+##' \code{cutoff_zval} is only used when \code{level} is one of \code{c('genes','features')}, in |
|
22 |
+##' order to restrict the correlation to the contributing features or genes. |
|
23 |
+##' |
|
24 |
+##' When \code{cutoff_zval} is specified, for each pair of components, genes or features that are |
|
25 |
+##' included in the circle of center 0 and radius \code{cutoff_zval} are excluded from |
|
26 |
+##' the computation of the correlation. |
|
27 |
+##' |
|
28 |
+##' It must be taken into account by the user that if |
|
29 |
+##' \code{cutoff_zval} is different from \code{NULL} or \code{0}, the computation will be much |
|
30 |
+##' slowler since each pair of component is treated individually. |
|
31 |
+##' |
|
32 |
+##' @title Comparison of IcaSet objects using correlation |
|
33 |
+##' @param icaSets list of IcaSet objects, e.g results of ICA decompositions |
|
34 |
+##' obtained on several datasets. |
|
35 |
+##' @param labAn vector of names for each icaSet, e.g the the names of the |
|
36 |
+##' datasets on which were calculated the decompositions. |
|
37 |
+##' @param type.corr Type of correlation to compute, either |
|
38 |
+##' \code{'pearson'} or \code{'spearman'}. |
|
39 |
+##' @param cutoff_zval either NULL or 0 (default) if all genes are used to |
|
40 |
+##' compute the correlation between the components, or a threshold to compute |
|
41 |
+##' the correlation on the genes that have at least a scaled projection higher |
|
42 |
+##' than cutoff_zval. Will be used only when correlations are calculated on S or |
|
43 |
+##' SByGene. |
|
44 |
+##' @param level Data level of the \code{IcaSet} objects on which is applied the correlation. |
|
45 |
+##' It must correspond to a feature shared by the IcaSet objects: |
|
46 |
+##' \code{'samples'} if they were applied to common samples (correlations are computed between matrix \code{A}), \code{'features'} if they were applied to |
|
47 |
+##' common features (correlations are computed between matrix \code{S}), \code{'genes'} if they share gene IDs after |
|
48 |
+##' annotation into genes (correlations are computed between matrix \code{SByGene}). |
|
49 |
+##' @return A list whose length equals the number of pairs of \code{IcaSet} and whose elements |
|
50 |
+##' are outputs of function \code{\link{cor2An}}. |
|
51 |
+##' @export |
|
52 |
+##' @examples |
|
53 |
+##' |
|
54 |
+##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
55 |
+##' rownames(dat1) <- paste("g", 1:1000, sep="") |
|
56 |
+##' colnames(dat1) <- paste("s", 1:10, sep="") |
|
57 |
+##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
58 |
+##' rownames(dat2) <- paste("g", 1:1000, sep="") |
|
59 |
+##' colnames(dat2) <- paste("s", 1:10, sep="") |
|
60 |
+##' |
|
61 |
+##' ## run ICA |
|
62 |
+##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE") |
|
63 |
+##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE") |
|
64 |
+##' |
|
65 |
+##' ## build params |
|
66 |
+##' params <- buildMineICAParams(resPath="toy/") |
|
67 |
+##' |
|
68 |
+##' ## build IcaSet object |
|
69 |
+##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S), |
|
70 |
+##' dat=dat1, alreadyAnnot=TRUE)$icaSet |
|
71 |
+##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S), |
|
72 |
+##' dat=dat2, alreadyAnnot=TRUE)$icaSet |
|
73 |
+##' |
|
74 |
+##' listPairCor <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), |
|
75 |
+##' type.corr="pearson", level="genes", cutoff_zval=0) |
|
76 |
+##' |
|
77 |
+##' |
|
78 |
+##' \dontrun{ |
|
79 |
+##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets. |
|
80 |
+##' ## load the two datasets |
|
81 |
+##' library(breastCancerMAINZ) |
|
82 |
+##' library(breastCancerVDX) |
|
83 |
+##' data(mainz) |
|
84 |
+##' data(vdx) |
|
85 |
+##' |
|
86 |
+##' ## Define a function used to build two examples of IcaSet objects |
|
87 |
+##' treat <- function(es, annot="hgu133a.db") { |
|
88 |
+##' es <- selectFeatures_IQR(es,10000) |
|
89 |
+##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE)) |
|
90 |
+##' colnames(exprs(es)) <- sampleNames(es) |
|
91 |
+##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000) |
|
92 |
+##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S), |
|
93 |
+##' dat=exprs(es), pData=pData(es), refSamples=character(0), |
|
94 |
+##' annotation=annot, typeID= typeIDmainz, |
|
95 |
+##' chipManu = "affymetrix", mart=mart) |
|
96 |
+##' icaSet <- resBuild$icaSet |
|
97 |
+##' } |
|
98 |
+##' ## Build the two IcaSet objects |
|
99 |
+##' icaSetMainz <- treat(mainz) |
|
100 |
+##' icaSetVdx <- treat(vdx) |
|
101 |
+##' |
|
102 |
+##' ## The pearson correlation is used as a measure of association between the gene projections |
|
103 |
+##' # on the different components (type.corr="pearson"). |
|
104 |
+##' listPairCor <- compareAn(icaSets=list(icaSetMainz,icaSetVdx), |
|
105 |
+##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0) |
|
106 |
+##' |
|
107 |
+##' ## Same thing but adding a selection of genes on which the correlation between two components is computed: |
|
108 |
+##' # when considering pairs of components, only projections whose scaled values are not located within |
|
109 |
+##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1). |
|
110 |
+##' listPairCor <- compareAn(icaSets=list(icaSetMainz,icaSetVdx), |
|
111 |
+##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes") |
|
112 |
+##' } |
|
113 |
+##' @export |
|
114 |
+##' @author Anne Biton |
|
115 |
+##' @seealso \code{\link{cor2An}} |
|
116 |
+ |
|
117 |
+compareAn <- function (icaSets, |
|
118 |
+ labAn, |
|
119 |
+ type.corr = c("pearson","spearman"), |
|
120 |
+ cutoff_zval=0, |
|
121 |
+ level = c("samples","features","genes") |
|
122 |
+ ) { |
|
123 |
+ |
|
124 |
+ if (missing(labAn)) |
|
125 |
+ if (is.null(names(icaSets))) |
|
126 |
+ labAn <- c(1:nbAn) |
|
127 |
+ else |
|
128 |
+ labAn <- names(icaSets) |
|
129 |
+ |
|
130 |
+ nbAn <- length(icaSets) |
|
131 |
+ |
|
132 |
+ if (length(labAn) != nbAn) |
|
133 |
+ stop("labAn must have the same length than the list icaSets") |
|
134 |
+ |
|
135 |
+ level <- match.arg(tolower(level), choices = c("features","genes","samples")) |
|
136 |
+ switch(level, |
|
137 |
+ features={ object="S"}, |
|
138 |
+ genes={object="SByGene"}, |
|
139 |
+ samples={object="A"} |
|
140 |
+ ) |
|
141 |
+ |
|
142 |
+ |
|
143 |
+ type.corr <- match.arg(type.corr) |
|
144 |
+ nbCompByAn <- lapply(icaSets, function(x) ncol(A(x))) |
|
145 |
+ |
|
146 |
+ |
|
147 |
+ matlist <- |
|
148 |
+ lapply(icaSets, |
|
149 |
+ function(icaSet, object) { |
|
150 |
+ data.frame(icaSet[object],check.names = FALSE, stringsAsFactors = FALSE) |
|
151 |
+ } |
|
152 |
+ , object = object |
|
153 |
+ ) |
|
154 |
+ |
|
155 |
+ |
|
156 |
+ |
|
157 |
+ allpairs <- combn(x=1:nbAn,m = 2, simplify = FALSE) |
|
158 |
+ pairnames <- unlist(lapply(allpairs, function(x,labAn) paste(labAn[x[1]],labAn[x[2]],sep="-"), labAn = labAn)) |
|
159 |
+ |
|
160 |
+ listCorPair <- |
|
161 |
+ lapply(allpairs, |
|
162 |
+ function(pa, matlist, labAn, cutoff_zval, type.corr) { |
|
163 |
+ mat1 <- matlist[[pa[1]]] |
|
164 |
+ mat2 <- matlist[[pa[2]]] |
|
165 |
+ resCor <- cor2An(mat1, mat2, lab = c(labAn[pa[1]],labAn[pa[2]]), type.corr = type.corr, cutoff_zval = cutoff_zval) |
|
166 |
+ |
|
167 |
+ |
|
168 |
+ |
|
169 |
+ } |
|
170 |
+ , matlist = matlist |
|
171 |
+ , labAn = labAn |
|
172 |
+ , type.corr = type.corr |
|
173 |
+ , cutoff_zval = if (missing(cutoff_zval)) NA else cutoff_zval |
|
174 |
+ ) |
|
175 |
+ names(listCorPair) <- pairnames |
|
176 |
+ |
|
177 |
+ return(listCorPair) |
|
178 |
+} |
|
179 |
+ |
|
180 |
+ |
|
181 |
+ |
|
182 |
+##' This function measures the correlation between two matrices containing the results of |
|
183 |
+##' two decompositions. |
|
184 |
+##' |
|
185 |
+##' Before computing the correlations, the components are scaled and restricted |
|
186 |
+##' to common row names. |
|
187 |
+##' |
|
188 |
+##' It must be taken into account by the user that if \code{cutoff_zval} is different |
|
189 |
+##' from NULL or zero, the computation will be slowler since each pair of |
|
190 |
+##' component is treated individually. |
|
191 |
+##' |
|
192 |
+##' When \code{cutoff_zval} is specified, for each |
|
193 |
+##' pair of components, genes that are included in the circle of center 0 and |
|
194 |
+##' radius \code{cutoff_zval} are excluded from the computation of the correlation |
|
195 |
+##' between the gene projection of the two components. |
|
196 |
+##' @title Correlation between two matrices |
|
197 |
+##' @param mat1 matrix of dimension features/genes x number of components, e.g |
|
198 |
+##' the results of an ICA decomposition |
|
199 |
+##' @param mat2 matrix of dimension features/genes x number of components, e.g |
|
200 |
+##' the results of an ICA decomposition |
|
201 |
+##' @param lab The vector of labels for mat1 and mat2, e.g the the names of the |
|
202 |
+##' two datasets on which were calculated the two decompositions |
|
203 |
+##' @param type.corr Type of correlation, either \code{'pearson'} or |
|
204 |
+##' \code{'spearman'} |
|
205 |
+##' @param cutoff_zval cutoff_zval: 0 (default) if all genes are |
|
206 |
+##' used to compute the correlation between the components, or a threshold to |
|
207 |
+##' compute the correlation on the genes that have at least a scaled projection |
|
208 |
+##' higher than cutoff_zval. |
|
209 |
+##' @return This function returns a list consisting of: |
|
210 |
+##' \item{cor}{a matrix of dimensions '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)', |
|
211 |
+##' containing the correlation values between each pair of components,} |
|
212 |
+##' \item{pval}{ a matrix of dimension '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)', |
|
213 |
+##' containing the p-value of the correlation tests for each |
|
214 |
+##' pair of components,} |
|
215 |
+##' \item{inter}{ the intersection between the features/genes of \code{mat1} |
|
216 |
+##' and \code{mat2},} |
|
217 |
+##' \item{labAn}{ the labels of the compared matrices.} |
|
218 |
+##' @author Anne Biton |
|
219 |
+##' @export |
|
220 |
+##' @examples |
|
221 |
+##' cor2An(mat1=matrix(rnorm(10000),nrow=1000,ncol=10), mat2=matrix(rnorm(10000),nrow=1000,ncol=10), |
|
222 |
+##' lab=c("An1","An2"), type.corr="pearson") |
|
223 |
+##' |
|
224 |
+##' @seealso \code{rcorr}, \code{cor.test}, \code{\link{compareAn}} |
|
225 |
+ |
|
226 |
+cor2An <- function (mat1, |
|
227 |
+ mat2, |
|
228 |
+ lab, |
|
229 |
+ type.corr = c("pearson","spearman"), |
|
230 |
+ cutoff_zval = 0 |
|
231 |
+ ) { |
|
232 |
+ |
|
233 |
+ if (is.null(cutoff_zval)) |
|
234 |
+ cutoff_zval <- 0 |
|
235 |
+ |
|
236 |
+ if (cutoff_zval < 0) |
|
237 |
+ stop("'cutoff_zval' must be positive") |
|
238 |
+ |
|
239 |
+ comp1 <- comp2 <- NULL |
|
240 |
+ |
|
241 |
+ mat1 <- data.frame(scale(mat1), check.names = FALSE, stringsAsFactors = FALSE) |
|
242 |
+ mat2 <- data.frame(scale(mat2), check.names = FALSE, stringsAsFactors = FALSE) |
|
243 |
+ nbcomp1 <- ncol(mat1) |
|
244 |
+ nbcomp2 <- ncol(mat2) |
|
245 |
+ |
|
246 |
+ gg <- intersect(rownames(mat1),rownames(mat2)) |
|
247 |
+ |
|
248 |
+ if (length(gg)<10) |
|
249 |
+ warning(paste("Less than 10 elements are common between the icaSet objects ", lab[1], " and ", lab[2], ", the comparison of these two decompositions should be reconsidered.", sep = "")) |
|
250 |
+ |
|
251 |
+ mat1 <- mat1[gg,] |
|
252 |
+ mat2 <- mat2[gg,] |
|
253 |
+ |
|
254 |
+ if (missing(lab)) |
|
255 |
+ lab <- paste("an",c(1:2),sep = "") |
|
256 |
+ |
|
257 |
+ if (cutoff_zval == 0) { |
|
258 |
+ r <- signif(rcorr(as.matrix(mat1), as.matrix(mat2), type = type.corr)$r, digits = 5) |
|
259 |
+ dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = ""))) |
|
260 |
+ |
|
261 |
+ rpval <- NULL |
|
262 |
+ |
|
263 |
+ rpval <- rcorr(as.matrix(mat1),as.matrix(mat2),type=type.corr)$P |
|
264 |
+ dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = ""))) |
|
265 |
+ |
|
266 |
+ } |
|
267 |
+ else { |
|
268 |
+ |
|
269 |
+ ## transform matrix into lists |
|
270 |
+ l1 <- lapply(as.list(mat1), |
|
271 |
+ function(x,n) { |
|
272 |
+ names(x) <- n |
|
273 |
+ return(x) |
|
274 |
+ } |
|
275 |
+ , n = rownames(mat1) |
|
276 |
+ ) |
|
277 |
+ l2 <- lapply(as.list(mat2), |
|
278 |
+ function(x,n) { |
|
279 |
+ names(x) <- n |
|
280 |
+ return(x) |
|
281 |
+ } |
|
282 |
+ , n = rownames(mat2) |
|
283 |
+ ) |
|
284 |
+ |
|
285 |
+ rpval <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1) |
|
286 |
+ r <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1) |
|
287 |
+ |
|
288 |
+ for (i in 1:nbcomp1) { |
|
289 |
+ reslist <- |
|
290 |
+ sapply(1:nbcomp2, |
|
291 |
+ function(j, i, l1, l2, cutoff_zval, gg) { |
|
292 |
+ dc <- data.frame(comp1=l1[[i]],comp2=l2[[j]]) |
|
293 |
+ rownames(dc) <- gg |
|
294 |
+ g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval )) |
|
295 |
+ inter <- gg[!(gg %in% g2del)] |
|
296 |
+ res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat2[inter,j]), method = type.corr) |
|
297 |
+ return(list(cor=res$estimate,pval=res$p.value)) |
|
298 |
+ } |
|
299 |
+ , i = i |
|
300 |
+ , l1 = l1 |
|
301 |
+ , l2 = l2 |
|
302 |
+ , cutoff_zval = cutoff_zval |
|
303 |
+ , gg = gg) |
|
304 |
+ |
|
305 |
+ reslist <- as.data.frame(reslist) |
|
306 |
+ r[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- r[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["cor",]) |
|
307 |
+ rpval[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- rpval[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["pval",]) |
|
308 |
+ |
|
309 |
+ for (j in 1:nbcomp1) { |
|
310 |
+ dc <- data.frame(comp1=l1[[i]],comp2=l1[[j]]) |
|
311 |
+ rownames(dc) <- gg |
|
312 |
+ g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval )) |
|
313 |
+ inter <- names(l1[[1]])[!(names(l1[[1]]) %in% g2del)] |
|
314 |
+ res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat1[inter,j]), method = type.corr) |
|
315 |
+ rpval[i,j] <- rpval[j,i] <- res$p.value |
|
316 |
+ r[i,j] <- r[j,i] <- res$estimate |
|
317 |
+ } |
|
318 |
+ } |
|
319 |
+ |
|
320 |
+ for (i in 1:nbcomp2) { |
|
321 |
+ for (j in 1:nbcomp2) { |
|
322 |
+ dc <- data.frame(comp1=l2[[i]],comp2=l2[[j]]) |
|
323 |
+ rownames(dc) <- gg |
|
324 |
+ g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval )) |
|
325 |
+ inter <- names(l2[[1]])[!(names(l2[[1]]) %in% g2del)] |
|
326 |
+ res <- cor.test(as.matrix(mat2[inter,i]),as.matrix(mat2[inter,j]), method = type.corr) |
|
327 |
+ rpval[nbcomp1+i,nbcomp1+j] <- rpval[nbcomp1+j,nbcomp1+i] <- res$p.value |
|
328 |
+ r[nbcomp1+i,nbcomp1+j] <- r[nbcomp1+j,nbcomp1+i] <- res$estimate |
|
329 |
+ |
|
330 |
+ } |
|
331 |
+ } |
|
332 |
+ |
|
333 |
+ dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = ""))) |
|
334 |
+ dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = ""))) |
|
335 |
+ |
|
336 |
+ } |
|
337 |
+ return(list(cor=r,pval=rpval,inter = gg, nbComp = c(nbcomp1,nbcomp2), labAn = lab)) |
|
338 |
+} |
|
339 |
+ |
|
340 |
+ |
|
341 |
+ |
|
342 |
+ |
|
343 |
+ |
|
344 |
+ |
|
345 |
+correl2Comp <- function ( |
|
346 |
+### This function computes the correlation between two components. |
|
347 |
+ comp1 |
|
348 |
+ ### The first component, a vector of projections or contributions indexed by labels |
|
349 |
+ ,comp2 |
|
350 |
+ ### The second component, a vector of projections or contributions indexed by labels |
|
351 |
+ ,type.corr = "pearson" |
|
352 |
+ ### name of the type of correlation to compute, either 'pearson' or 'spearman' |
|
353 |
+ ,plot = FALSE |
|
354 |
+ ### if TRUE the plot of comp1 vs comp2 is drawn |
|
355 |
+ ,cutoff_zval = 0 |
|
356 |
+ ### either NULL or 0 (default) if all genes are used to compute the correlation between the components, or a threshold to compute the correlation on the genes that have at least a scaled projection higher than cutoff_zval. |
|
357 |
+ ,test = FALSE |
|
358 |
+ ### if TRUE the p-value of correlation is returned instead of the correlation value |
|
359 |
+ ,alreadyTreat = FALSE |
|
360 |
+ ### if TRUE comp1 and comp2 are considered as being already treated (i.e scaled and restricted to common elements) |
|
361 |
+ ) { |
|
362 |
+##details<< Before computing the correlation, the components are scaled and restricted to common labels. |
|
363 |
+## When \code{cutoff_zval} is different from 0, the elements that are included in the circle of center 0 and radius \code{cutoff_zval} are not taken into account during the computation of the correlation. |
|
364 |
+ |
|
365 |
+ if(!alreadyTreat) { |
|
366 |
+ comp1 <- (comp1-mean(comp1))/sd(comp1) |
|
367 |
+ comp2 <- (comp2-mean(comp2))/sd(comp2) |
|
368 |
+ gg <- intersect(names(comp1),names(comp2)) |
|
369 |
+ comp1 <- comp1[gg] |
|
370 |
+ comp2 <- comp2[gg] |
|
371 |
+ } |
|
372 |
+ |
|
373 |
+ dc <- data.frame(comp1=comp1,comp2=comp2) |
|
374 |
+ rownames(dc) <- gg |
|
375 |
+ g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval )) |
|
376 |
+ genes.intersect <- gg[!(gg %in% g2del)] |
|
377 |
+ |
|
378 |
+ if (length(genes.intersect) < 4) |
|
379 |
+ if (test) return(1) else return(0) |
|
380 |
+ comp1.ord = comp1[genes.intersect] |
|
381 |
+ comp2.ord = comp2[genes.intersect] |
|
382 |
+ |
|
383 |
+ |
|
384 |
+ if (test) { |
|
385 |
+ |
|
386 |
+ r = cor.test(comp1.ord,comp2.ord,method = type.corr, alternative = "two.sided")$p.value |
|
387 |
+ } |
|
388 |
+ else { |
|
389 |
+ r = cor(comp1.ord,comp2.ord,method = type.corr) |
|
390 |
+ r = signif(r, digits = 3) |
|
391 |
+ |
|
392 |
+ } |
|
393 |
+ if (plot) plot(comp1.ord,comp2.ord,xlab = paste("comp1"),ylab=paste("comp2"),pch = 16,sub = paste("corr_",type.corr," = ",r,sep="")) |
|
394 |
+ return(r) |
|
395 |
+### This function returns either the correlation value or the p-value of the correlation test. |
|
396 |
+} |
|
397 |
+ |
|
398 |
+ |
|
399 |
+##' This function builds a data.frame describing for each node of the graph |
|
400 |
+##' its ID and which analysis/data it comes from. |
|
401 |
+##' |
|
402 |
+##' The created file is used in Cytoscape. |
|
403 |
+##' @title Generate node attributes |
|
404 |
+##' @param nbAn Number of analyses being considered, i.e number of IcaSet |
|
405 |
+##' objects |
|
406 |
+##' @param nbComp Number of components by analysis, if of length 1 then |
|
407 |
+##' it is assumed that each analysis has the same number of components. |
|
408 |
+##' @param labAn Labels of the analysis, if missing it will be generated |
|
409 |
+##' as an1, an2, ... |
|
410 |
+##' @param labComp List containing the component labels indexed by analysis, if missing |
|
411 |
+##' will be generated as comp1, comp2, ... |
|
412 |
+##' @param file File where the description of the node attributes will be |
|
413 |
+##' written |
|
414 |
+##' @return A data.frame describing each node/component |
|
415 |
+##' @export |
|
416 |
+##' @examples |
|
417 |
+##' ## 4 datasets, 20 components calculated in each dataset, labAn |
|
418 |
+##' nodeAttrs(nbAn=4, nbComp=20, labAn=c("tutu","titi","toto","tata")) |
|
419 |
+##' |
|
420 |
+##' @author Anne Biton |
|
421 |
+ |
|
422 |
+nodeAttrs <- function(nbAn, |
|
423 |
+ nbComp, |
|
424 |
+ labAn, |
|
425 |
+ labComp, |
|
426 |
+ file |
|
427 |
+ ) { |
|
428 |
+ |
|
429 |
+ nb <- an <- lab <- comp <- NULL |
|
430 |
+ |
|
431 |
+ if (missing(labAn)) |
|
432 |
+ labAn <- paste(rep("an",nbAn),1:nbAn,sep = "") |
|
433 |
+ else if (length(labAn) != nbAn) |
|
434 |
+ stop("The length of 'labAn' must equal 'nbAn'.") |
|
435 |
+ |
|
436 |
+ |
|
437 |
+ if (missing(labComp)) { |
|
438 |
+ if (length(nbComp) == 1) { |
|
439 |
+ labid <- |
|
440 |
+ paste( |
|
441 |
+ rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn), |
|
442 |
+ paste(rep("an",nbComp*nbAn),sapply(c(1:nbAn),rep,nbComp),sep=""), |
|
443 |
+ sep = "_" |
|
444 |
+ ) |
|
445 |
+ labComp <-rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn) |
|
446 |
+ |
|
447 |
+ nbComp <- rep(nbComp,nbAn) |
|
448 |
+ } |
|
449 |
+ else { |
|
450 |
+ if (length(nbComp) != nbAn) |
|
451 |
+ stop("The provided number of components must equal the number of analyses") |
|
452 |
+ |
|
453 |
+ labid <- foreach(nb = nbComp, an = labAn) %do% {paste(paste(rep("comp",nb),c(1:nb),sep = ""),rep(an,nb),sep="_")} |
|
454 |
+ labid <- unlist(labid) |
|
455 |
+ labComp <- unlist(foreach(nb = nbComp, an = 1:nbAn) %do% {paste(rep("comp",sum(nb)),c(1:nb),sep = "")}) |
|
456 |
+ } |
|
457 |
+ } |
|
458 |
+ else { |
|
459 |
+ if (!is.list(labComp)) |
|
460 |
+ stop("The component labels must be provided using a list.") |
|
461 |
+ if (length(labComp) != nbAn) |
|
462 |
+ stop("The length of the list containing the component labels 'labComp' must have the same length than the list containing the analysis labels 'labAn'.") |
|
463 |
+ names(labComp) <- names(labAn) |
|
464 |
+ |
|
465 |
+ foreach(lab = labComp, nb = nbComp) %do% { |
|
466 |
+ if (length(lab) != nb) |
|
467 |
+ stop("The length of the list 'labComp' containing component labels does not map to 'nbComp'.") |
|
468 |
+ } |
|
469 |
+ labid <- unlist(foreach(nb = nbComp, an = labAn, comp = labComp) %do% {paste(comp,rep(an,nb),sep="_")}) |
|
470 |
+ labComp <- unlist(labComp) |
|
471 |
+ |
|
472 |
+ |
|
473 |
+ } |
|
474 |
+ |
|
475 |
+ compnb <- unlist(lapply(nbComp,function(x) c(1:x))) |
|
476 |
+ names(nbComp) <- labAn |
|
477 |
+ an <- unlist(lapply(names(nbComp), function(x,nb) rep(x,nb[x]), nb = nbComp)) |
|
478 |
+ |
|
479 |
+ dataAttr <- data.frame(id = labid, labComp = labComp, indComp = compnb, labAn = an, stringsAsFactors = FALSE, check.names = FALSE) |
|
480 |
+ |
|
481 |
+ if (!missing(file)) |
|
482 |
+ if (!is.null(file)) |
|
483 |
+ write.table(dataAttr,file=file,sep = " ", row.names = FALSE, quote = FALSE) |
|
484 |
+ |
|
485 |
+ return(dataAttr) |
|
486 |
+} |
|
487 |
+ |
|
488 |
+##' compareAn2graphfile |
|
489 |
+##' |
|
490 |
+##' This function builds a correlation graph from the outputs of function \code{\link{compareAn}}. |
|
491 |
+##' |
|
492 |
+##' When correlations are considered (\code{useVal}="cor"), absolute values |
|
493 |
+##' are used since the components have no direction. |
|
494 |
+##' |
|
495 |
+##' If \code{useMax} is \code{TRUE} each component is linked to the most correlated component of |
|
496 |
+##' each different \code{IcaSet}. |
|
497 |
+##' |
|
498 |
+##' If \code{cutoff} is specified, only |
|
499 |
+##' correlations exceeding this value are taken into account during the graph construction. |
|
500 |
+##' For example, if \code{cutoff} is 1, only relationships between components |
|
501 |
+##' that correspond to a correlation value larger than 1 will be included. |
|
502 |
+##' |
|
503 |
+##' When \code{useVal="pval"} and \code{useMax=TRUE}, the minimum value is taken |
|
504 |
+##' instead of the maximum. |
|
505 |
+##' |
|
506 |
+##' @param listPairCor The output of the function \code{\link{compareAn}}, containing the |
|
507 |
+##' correlation between several pairs of objects of class \code{\link{IcaSet}}. |
|
508 |
+##' @param useMax If TRUE, the graph is restricted to edges that correspond to |
|
509 |
+##' maximum score, see details |
|
510 |
+##' @param cutoff Cutoff used to select pairs that will be included in the |
|
511 |
+##' graph. |
|
512 |
+##' @param useVal The value on which is based the graph, either \code{"cor"} for |
|
513 |
+##' correlation or \code{"pval"} for p-values of correlation tests. |
|
514 |
+##' @param file File name. |
|
515 |
+##' @return A data.frame with the graph description, has |
|
516 |
+##' two columns \code{n1} and \code{n2} filled with node IDs, each row denotes that there is an edge from \code{n1} to \code{n2}. Additional columns quantify the strength of association: correlation (\code{cor}), p-value (\code{pval}), (\code{1-abs(cor)}) (\code{distcor}), log10-pvalue (\code{logpval}). |
|
517 |
+##' @author Anne Biton |
|
518 |
+##' @seealso \code{\link{compareAn}}, \code{\link{cor2An}} |
|
519 |
+##' @export |
|
520 |
+##' @examples |
|
521 |
+##' |
|
522 |
+##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
523 |
+##' rownames(dat1) <- paste("g", 1:1000, sep="") |
|
524 |
+##' colnames(dat1) <- paste("s", 1:10, sep="") |
|
525 |
+##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
526 |
+##' rownames(dat2) <- paste("g", 1:1000, sep="") |
|
527 |
+##' colnames(dat2) <- paste("s", 1:10, sep="") |
|
528 |
+##' |
|
529 |
+##' ## run ICA |
|
530 |
+##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE") |
|
531 |
+##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE") |
|
532 |
+##' |
|
533 |
+##' ## build params |
|
534 |
+##' params <- buildMineICAParams(resPath="toy/") |
|
535 |
+##' |
|
536 |
+##' ## build IcaSet object |
|
537 |
+##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S), |
|
538 |
+##' dat=dat1, alreadyAnnot=TRUE)$icaSet |
|
539 |
+##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S), |
|
540 |
+##' dat=dat2, alreadyAnnot=TRUE)$icaSet |
|
541 |
+##' |
|
542 |
+##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), |
|
543 |
+##' type.corr="pearson", level="genes", cutoff_zval=0) |
|
544 |
+##' |
|
545 |
+##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"), |
|
546 |
+##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt") |
|
547 |
+##' |
|
548 |
+##' |
|
549 |
+##' \dontrun{ |
|
550 |
+##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets. |
|
551 |
+##' ## load the two datasets |
|
552 |
+##' library(breastCancerMAINZ) |
|
553 |
+##' library(breastCancerVDX) |
|
554 |
+##' data(mainz) |
|
555 |
+##' data(vdx) |
|
556 |
+##' |
|
557 |
+##' ## Define a function used to build two examples of IcaSet objects |
|
558 |
+##' treat <- function(es, annot="hgu133a.db") { |
|
559 |
+##' es <- selectFeatures_IQR(es,10000) |
|
560 |
+##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE)) |
|
561 |
+##' colnames(exprs(es)) <- sampleNames(es) |
|
562 |
+##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000) |
|
563 |
+##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S), |
|
564 |
+##' dat=exprs(es), pData=pData(es), refSamples=character(0), |
|
565 |
+##' annotation=annot, typeID= typeIDmainz, |
|
566 |
+##' chipManu = "affymetrix", mart=mart) |
|
567 |
+##' icaSet <- resBuild$icaSet |
|
568 |
+##' } |
|
569 |
+##' ## Build the two IcaSet objects |
|
570 |
+##' icaSetMainz <- treat(mainz) |
|
571 |
+##' icaSetVdx <- treat(vdx) |
|
572 |
+##' |
|
573 |
+##' ## Compute correlation between every pair of IcaSet objects. |
|
574 |
+##' resCompareAn <- compareAn(icaSets=list(icaSetMainz,icaSetVdx), |
|
575 |
+##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0) |
|
576 |
+##' |
|
577 |
+##' ## Same thing but adding a selection of genes on which the correlation between two components is computed: |
|
578 |
+##' # when considering pairs of components, only projections whose scaled values are not located within |
|
579 |
+##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1). |
|
580 |
+##' resCompareAn <- compareAn(icaSets=list(icaSetMainz,icaSetVdx), |
|
581 |
+##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes") |
|
582 |
+##' |
|
583 |
+##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"), |
|
584 |
+##' ## i.e, component A of analysis i is linked to component B of analysis j, |
|
585 |
+##' ## only if component B is the most correlated component to A amongst all component of analysis j. |
|
586 |
+##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt") |
|
587 |
+##' |
|
588 |
+##' ## Restrict the graph to correlation values exceeding 0.4 |
|
589 |
+##' compareAn2graphfile(listPairCor=resCompareAn, useMax=FALSE, cutoff=0.4, useVal="cor", file="myGraph.txt") |
|
590 |
+##' |
|
591 |
+##' } |
|
592 |
+compareAn2graphfile <- function (listPairCor, |
|
593 |
+ useMax = TRUE, |
|
594 |
+ cutoff = NULL, |
|
595 |
+ useVal = c("cor","pval"), |
|
596 |
+ file = NULL |
|
597 |
+ ) { |
|
598 |
+ useVal <- match.arg(useVal) |
|
599 |
+ dataGraphs <- |
|
600 |
+ lapply(listPairCor, |
|
601 |
+ function(res, useMax, cutoff, useVal) { |
|
602 |
+ dd <- abs(res[[useVal]]) |
|
603 |
+ |
|
604 |
+ nbcomp1 <- res$nbComp[1] |
|
605 |
+ nbcomp2 <- res$nbComp[2] |
|
606 |
+ attrGraph <- c("cor",if ("pval" %in% names(res)) "pval") |
|
607 |
+ if (useMax) { |
|
608 |
+ if (useVal == "pval") |
|
609 |
+ op <- "which.min" |
|
610 |
+ else |
|
611 |
+ op <- "which.max" |
|
612 |
+ |
|
613 |
+ |
|
614 |
+ ## find correlation max both direction an1 <-> an2 since the max is not necessarily reciprocal |
|
615 |
+ subdd <- dd[(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1] |
|
616 |
+ ind <- apply(subdd,2,op) |
|
617 |
+ coord <- as.list(data.frame(t(cbind(ind,1:nbcomp1)))) |
|
618 |
+ |
|
619 |
+ dataGraph <- data.frame(n1=rownames(dd)[1:nbcomp1], |
|
620 |
+ n2=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)][ind]) |
|
621 |
+ dataGraph[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd)) |
|
622 |
+ addval <- setdiff(attrGraph,useVal) |
|
623 |
+ |
|
624 |
+ if (length(addval)==1) |
|
625 |
+ dataGraph[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1])) |
|
626 |
+ |
|
627 |
+ ## an2 -> an1 |
|
628 |
+ subdd <- dd[1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)] |
|
629 |
+ ind <- apply(subdd,2,op) |
|
630 |
+ coord <- as.list(data.frame(t(cbind(ind,1:nbcomp2)))) |
|
631 |
+ |
|
632 |
+ dataGraph2 <- data.frame(n1=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)], |
|
633 |
+ n2=rownames(dd)[1:nbcomp1][ind]) |
|
634 |
+ |
|
635 |
+ dataGraph2[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd)) |
|
636 |
+ |
|
637 |
+ addval <- setdiff(attrGraph,useVal) |
|
638 |
+ |
|
639 |
+ if (length(addval)==1) |
|
640 |
+ dataGraph2[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)])) |
|
641 |
+ |
|
642 |
+ dataGraph <- rbind(dataGraph,dataGraph2) |
|
643 |
+ |
|
644 |
+ pval <- NULL |
|
645 |
+ if (!is.null(cutoff)) { |
|
646 |
+ if (useVal == "pval") |
|
647 |
+ dataGraph <- subset(dataGraph, pval <= cutoff) |
|
648 |
+ else |
|
649 |
+ dataGraph <- subset(dataGraph, cor >= cutoff) |
|
650 |
+ |
|
651 |
+ } |
|
652 |
+ } |
|
653 |
+ else if (!is.null(cutoff)) { |
|
654 |
+ subdd <- dd[1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2)] |
|
655 |
+ dataGraph <- |
|
656 |
+ lapply(as.list(colnames(subdd)), |
|
657 |
+ function(nn, cutoff, subdd, useVal, attrGraph, res,op, subInd) { |
|
658 |
+ x <- subdd[,nn] |
|
659 |
+ |
|
660 |
+ ind <- which(eval(parse(text=paste("x",op,"cutoff")))) |
|
661 |
+ if (length(ind) > 0) { |
|
662 |
+ dataGraph <- data.frame(n1 = rep(nn,length(ind)), n2 = rownames(subdd)[ind]) |
|
663 |
+ dataGraph[[useVal]] <- subdd[ind,nn] |
|
664 |
+ addval <- setdiff(attrGraph,useVal) |
|
665 |
+ if (length(addval)==1) { |
|
666 |
+ coord <- as.list(data.frame(t(cbind(dataGraph$n1,dataGraph$n2)))) |
|
667 |
+ dataGraph[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][subInd[[1]],subInd[[2]]])) |
|
668 |
+ } |
|
669 |
+ |
|
670 |
+ return(dataGraph) |
|
671 |
+ |
|
672 |
+ } |
|
673 |
+ } |
|
674 |
+ , cutoff = cutoff |
|
675 |
+ , subdd = subdd |
|
676 |
+ , useVal = useVal |
|
677 |
+ , attrGraph = attrGraph |
|
678 |
+ , res = res |
|
679 |
+ , op = if (useVal == "pval") "<=" else ">=" |
|
680 |
+ , subInd = list(1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2)) |
|
681 |
+ ) |
|
682 |
+ dataGraph <- do.call(rbind,dataGraph) |
|
683 |
+ |
|
684 |
+ |
|
685 |
+ } |
|
686 |
+ return(dataGraph) |
|
687 |
+ |
|
688 |
+ } |
|
689 |
+ , useMax = useMax |
|
690 |
+ , cutoff = if (missing(cutoff)) NULL else cutoff |
|
691 |
+ , useVal = useVal |
|
692 |
+ ) |
|
693 |
+ dataGraphs <- do.call(rbind,dataGraphs) |
|
694 |
+ |
|
695 |
+ |
|
696 |
+ dataGraphs$invcor = 1/(10*as.numeric(dataGraphs$cor)) |
|
697 |
+ dataGraphs$distcor = 1-abs(as.numeric(dataGraphs$cor)) |
|
698 |
+ dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric) |
|
699 |
+ dataGraphs <- annotReciprocal(dataGraph = dataGraphs, keepOnlyReciprocal = FALSE)# file |
|
700 |
+ dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric) |
|
701 |
+ |
|
702 |
+ if (!missing(file)) |
|
703 |
+ if (!is.null(file)) |
|
704 |
+ write.table(dataGraphs, file = file, row.names = FALSE, quote = FALSE, sep = "\t") |
|
705 |
+ |
|
706 |
+ return(dataGraphs) |
|
707 |
+ |
|
708 |
+} |
|
709 |
+ |
|
710 |
+##' annotReciprocal |
|
711 |
+##' |
|
712 |
+##' This function notes edges of a graph as reciprocal or not. |
|
713 |
+##' |
|
714 |
+##' |
|
715 |
+##' @param dataGraph data.frame which contains the graph description, must have |
|
716 |
+##' two columns \code{n1} and \code{n2} filled with node IDs, each row denoting there is an edge from \code{n1} to \code{n2}. |
|
717 |
+##' @param file file where the graph description is written |
|
718 |
+##' @param keepOnlyReciprocal if TRUE \code{dataGraph} is restricted to |
|
719 |
+##' reciprocal edges, else all edges are kept (default). |
|
720 |
+##' @return This function returns the argument \code{dataGraph} with an |
|
721 |
+##' additional column named 'reciprocal' which contains TRUE if the edge |
|
722 |
+##' described by the row is reciprocal, and FALSE if it is not reciprocal. |
|
723 |
+##' @examples |
|
724 |
+##' dg <- data.frame(n1=c("A","B","B","C","C","D","E","F"),n2=c("B","G","A","B","D","C","F","E")) |
|
725 |
+##' annotReciprocal(dataGraph=dg) |
|
726 |
+##' |
|
727 |
+##' @export |
|
728 |
+##' @author Anne Biton |
|
729 |
+annotReciprocal <- function (dataGraph, |
|
730 |
+ file, |
|
731 |
+ keepOnlyReciprocal = FALSE |
|
732 |
+ ) { |
|
733 |
+ |
|
734 |
+ names(dataGraph)[1:2] <- c("n1","n2") |
|
735 |
+ dataGraph$keep <- rep(NA, length=nrow(dataGraph)) |
|
736 |
+ |
|
737 |
+ dataGraph_keep <- apply( dataGraph, MARGIN = 1 , |
|
738 |
+ function (r, dataGraph) { |
|
739 |
+ dataN2 = subset(dataGraph, dataGraph$n1 == r["n2"]) |
|
740 |
+ if (r["n1"] %in% dataN2$n2) r["keep"] = TRUE |
|
741 |
+ else r["keep"] = FALSE |
|
742 |
+ return(r) |
|
743 |
+ } |
|
744 |
+ , dataGraph = dataGraph |
|
745 |
+ ) |
|
746 |
+ |
|
747 |
+ dataGraph_keep <- as.data.frame(t(dataGraph_keep), check.names = FALSE, stringsAsFactors = FALSE) |
|
748 |
+ |
|
749 |
+ names(dataGraph_keep)[ncol(dataGraph_keep)] <- "reciprocal" |
|
750 |
+ |
|
751 |
+ if (keepOnlyReciprocal) { |
|
752 |
+ dataGraph_keep<- subset(dataGraph_keep, dataGraph_keep$reciprocal == TRUE ) |
|
753 |
+ dataGraph_keep <- dataGraph_keep[,-ncol(dataGraph_keep)] |
|
754 |
+ } |
|
755 |
+ |
|
756 |
+ if (!missing(file)) |
|
757 |
+ if(!is.null(file)) |
|
758 |
+ write.table(dataGraph_keep, file = file, sep = "\t", quote = FALSE, row.names = FALSE) |
|
759 |
+ |
|
760 |
+ return(dataGraph_keep) |
|
761 |
+} |
|
762 |
+ |
|
763 |
+ |
|
764 |
+##' This function plots the |
|
765 |
+##' correlation graph in an interactive device using function \code{tkplot}. |
|
766 |
+##' |
|
767 |
+##' You have to slighly move the nodes to see cliques because strongly related nodes are often superimposed. |
|
768 |
+##' The \code{edgeWeight} column is used to weight the edges within the |
|
769 |
+##' fruchterman.reingold layout available in the package \code{igraph}. |
|
770 |
+##' |
|
771 |
+##' The argument |
|
772 |
+##' \code{nodeCol} typically denotes the column containing the names of the datasets. |
|
773 |
+##' Colors are automatically |
|
774 |
+##' attributed to the nodes using palette Set3 of package \code{RColorBrewer}. The |
|
775 |
+##' corresponding colors can be directly specified in the 'col' argument. In |
|
776 |
+##' that case, 'col' must be a vector of colors indexed by the unique elements |
|
777 |
+##' contained in \code{nodeCol} column (e.g dataset ids). |
|
778 |
+##' |
|
779 |
+##' As for colors, one can define |
|
780 |
+##' the column of \code{nodeAttrs} that is used to define the node shapes. The |
|
781 |
+##' corresponding shapes can be directly specified in the \code{shape} argument. In |
|
782 |
+##' that case, \code{shape} must be one of \code{c("circle","square", " vcsquare", "rectangle", "crectangle", "vrectangle")} and must be |
|
783 |
+##' indexed by the unique elements of \code{nodeShape} column. |
|
784 |
+##' |
|
785 |
+##' Unfortunately, shapes |
|
786 |
+##' can't be taken into account when tkplot is TRUE (interactive plot). |
|
787 |
+##' |
|
788 |
+##' If \code{reciproCol} is not missing, it is used to color the edges, either in grey if the |
|
789 |
+##' edge is not reciprocal or in black if the edge is reciprocal. |
|
790 |
+##' |
|
791 |
+##' |
|
792 |
+##' @title Plots graph using |
|
793 |
+##' @param dataGraph A data.frame containing the graph description. It must |
|
794 |
+##' have two columns \code{n1} and \code{n2}, each row denoting that there is an edge from n1 |
|
795 |
+##' to n2. Node labels in columns \code{n1} and \code{n2} of \code{dataGraph} must correspond to |
|
796 |
+##' node IDs in column \code{id} of \code{nodeAttrs}. |
|
797 |
+##' @param edgeWeight The column of dataGraph used to weight edges. |
|
798 |
+##' @param nodeAttrs A data.frame with node description, see function \code{nodeAttrs}. |
|
799 |
+##' @param nodeShape Denotes the column of \code{nodeAttrs} used to attribute the node shapes. |
|
800 |
+##' @param nodeCol Denotes the column of \code{nodeAttrs} used to |
|
801 |
+##' color the nodes in the graph. |
|
802 |
+##' @param nodeName Denotes the column of \code{nodeAttrs} used |
|
803 |
+##' as labels for the nodes in the graph. |
|
804 |
+##' @param col A vector of colors, for the nodes, indexed by the unique elements of \code{nodeCol} |
|
805 |
+##' column from \code{nodeAttrs}. If missing, colors will be automatically attributed. |
|
806 |
+##' @param shape A vector of shapes indexed by the unique elements of |
|
807 |
+##' column \code{nodeShape} from \code{nodeAttrs}. If missing, shapes will be automatically |
|
808 |
+##' attributed. |
|
809 |
+##' @param title Title for the plot |
|
810 |
+##' @param reciproCol Denotes the column of \code{dataGraph} containing \code{TRUE} if the |
|
811 |
+##' row defines a reciprocal node, else \code{FALSE}. See \code{\link{annotReciprocal}}. |
|
812 |
+##' @param tkplot If TRUE, performs interactive plot with function \code{tkplot}, else uses \code{plot.igraph}. |
|
813 |
+##' @param \dots Additional parameters as required by \code{tkplot}. |
|
814 |
+##' @return A list consisting of \describe{ |
|
815 |
+##' \item{dataGraph}{a data.frame defining the correlation |
|
816 |
+##' graph} |
|
817 |
+##' \item{nodeAttrs}{a data.frame describing the node |
|
818 |
+##' of the graph} |
|
819 |
+##' \item{graph}{the graph as an object of class \code{igraph}} |
|
820 |
+##' \item{graphid}{the id of the graph plotted using \code{tkplot}} } |
|
821 |
+##' @export |
|
822 |
+##' @author Anne Biton |
|
823 |
+##' @seealso \code{\link{compareAn}}, \code{\link{nodeAttrs}}, \code{\link{compareAn2graphfile}}, \code{\link{runCompareIcaSets}} |
|
824 |
+##' @examples |
|
825 |
+##' |
|
826 |
+##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
827 |
+##' rownames(dat1) <- paste("g", 1:1000, sep="") |
|
828 |
+##' colnames(dat1) <- paste("s", 1:10, sep="") |
|
829 |
+##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000)) |
|
830 |
+##' rownames(dat2) <- paste("g", 1:1000, sep="") |
|
831 |
+##' colnames(dat2) <- paste("s", 1:10, sep="") |
|
832 |
+##' |
|
833 |
+##' ## run ICA |
|
834 |
+##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE") |
|
835 |
+##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE") |
|
836 |
+##' |
|
837 |
+##' ## build params |
|
838 |
+##' params <- buildMineICAParams(resPath="toy/") |
|
839 |
+##' |
|
840 |
+##' ## build IcaSet object |
|
841 |
+##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S), |
|
842 |
+##' dat=dat1, alreadyAnnot=TRUE)$icaSet |
|
843 |
+##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S), |
|
844 |
+##' dat=dat2, alreadyAnnot=TRUE)$icaSet |
|
845 |
+##' icaSets <- list(icaSettoy1, icaSettoy2) |
|
846 |
+##' |
|
847 |
+##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), |
|
848 |
+##' type.corr="pearson", level="genes", cutoff_zval=0) |
|
849 |
+##' |
|
850 |
+##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"), |
|
851 |
+##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt") |
|
852 |
+##' |
|
853 |
+##' ## construction of the data.frame with the node description |
|
854 |
+##' nbComp <- rep(3,2) #each IcaSet contains 3 components |
|
855 |
+##' nbAn <- 2 # we are comparing 2 IcaSets |
|
856 |
+##' # labels of components created as comp*i* |
|
857 |
+##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% { |
|
858 |
+##' paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")} |
|
859 |
+##' |
|
860 |
+##' # creation of the data.frame with the node description |
|
861 |
+##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp, |
|
862 |
+##' labAn = c("toy1","toy2"), file = "nodeInfo.txt") |
|
863 |
+##' |
|
864 |
+##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible |
|
865 |
+##' ## use tkplot=TRUE to have an interactive graph |
|
866 |
+##' res <- plotCorGraph(title = "Compare toy 1 and 2", dataGraph = dataGraph, nodeName = "indComp", tkplot = FALSE, |
|
867 |
+##' nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal") |
|
868 |
+##' |
|
869 |
+##' |
|
870 |
+##' \dontrun{ |
|
871 |
+##' ## load two microarray datasets |
|
872 |
+##' library(breastCancerMAINZ) |
|
873 |
+##' library(breastCancerVDX) |
|
874 |
+##' data(mainz) |
|
875 |
+##' data(vdx) |
|
876 |
+##' |
|
877 |
+##' ## Define a function used to build two examples of IcaSet objects |
|
878 |
+##' treat <- function(es, annot="hgu133a.db") { |
|
879 |
+##' es <- selectFeatures_IQR(es,10000) |
|
880 |
+##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE)) |
|
881 |
+##' colnames(exprs(es)) <- sampleNames(es) |
|
882 |
+##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000) |
|
883 |
+##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S), |
|
884 |
+##' dat=exprs(es), pData=pData(es), refSamples=character(0), |
|
885 |
+##' annotation=annot, typeID= typeIDmainz, |
|
886 |
+##' chipManu = "affymetrix", mart=mart) |
|
887 |
+##' icaSet <- resBuild$icaSet |
|
888 |
+##' } |
|
889 |
+##' ## Build the two IcaSet objects |
|
890 |
+##' icaSetMainz <- treat(mainz) |
|
891 |
+##' icaSetVdx <- treat(vdx) |
|
892 |
+##' |
|
893 |
+##' icaSets <- list(icaSetMainz, icaSetVdx) |
|
894 |
+##' labAn <- c("Mainz", "Vdx") |
|
895 |
+##' |
|
896 |
+##' ## correlations between gene projections of each pair of IcaSet |
|
897 |
+##' resCompareAn <- compareAn(icaSets = icaSets, level = "genes", type.corr= "pearson", |
|
898 |
+##' labAn = labAn, cutoff_zval=0) |
|
899 |
+##' |
|
900 |
+##' ## construction of the correlation graph using previous output |
|
901 |
+##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, file="corGraph.txt") |
|
902 |
+##' |
|
903 |
+##' ## construction of the data.frame with the node description |
|
904 |
+##' nbComp <- rep(10,2) #each IcaSet contains 10 components |
|
905 |
+##' nbAn <- 2 # we are comparing 2 IcaSets |
|
906 |
+##' # labels of components created as comp*i* |
|
907 |
+##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% { |
|
908 |
+##' paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")} |
|
909 |
+##' |
|
910 |
+##' # creation of the data.frame with the node description |
|
911 |
+##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp, |
|
912 |
+##' labAn = labAn, file = "nodeInfo.txt") |
|
913 |
+##' |
|
914 |
+##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible |
|
915 |
+##' res <- plotCorGraph(title = "Compare two ICA decomsitions obtained on \n two |
|
916 |
+##' microarray-based data of breast tumors", dataGraph = dataGraph, nodeName = "indComp", |
|
917 |
+##' nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal") |
|
918 |
+##' |
|
919 |
+##' } |
|
920 |
+plotCorGraph <- function( |
|
921 |
+ dataGraph, |
|
922 |
+ edgeWeight = "cor", |
|
923 |
+ nodeAttrs, |
|
924 |
+ nodeShape, |
|
925 |
+ nodeCol = "labAn", |
|
926 |
+ nodeName = "indComp", |
|
927 |
+ col, |
|
928 |
+ shape, |
|
929 |
+ title = "", |
|
930 |
+ reciproCol = "reciprocal", |
|
931 |
+ tkplot = FALSE, |
|
932 |
+ ... |
|
933 |
+ ) { |
|
934 |
+ if (!(edgeWeight %in% colnames(dataGraph))) { |
|
935 |
+ stop (paste(edgeWeight,"is not available within the columns of dataGraph.")) |
|
936 |
+ } |
|
937 |
+ nodeName <- match.arg(nodeName, choices = colnames(nodeAttrs)) |
|
938 |
+ |
|
939 |
+ |
|
940 |
+ if (missing(nodeShape)) |
|
941 |
+ nodeAttrs$shape <- rep("circle",nrow(nodeAttrs)) |
|
942 |
+ else if (!(nodeShape %in% colnames(nodeAttrs))) { |
|
943 |
+ nodeAttrs$shape <- rep("circle",nrow(nodeAttrs)) |
|
944 |
+ warning(paste("The column ", nodeShape, " is not available in 'dataGraph'.",sep = "")) |
|
945 |
+ } |
|
946 |
+ else { |
|
947 |
+ potShapes <- c("circle","square", "csquare", "rectangle", "crectangle", "vrectangle") |
|
948 |
+ |
|
949 |
+ autoShape <- TRUE |
|
950 |
+ if (!missing(shape)) { |
|
951 |
+ autoShape <- FALSE |
|
952 |
+ if (length(intersect(names(shape),unique(nodeAttrs[[nodeShape]]))) != length(unique(nodeAttrs[[nodeShape]]))) { |
|
953 |
+ warning("'shape' argument is incorrect, some elements of nodeAttrs$'nodeShape' are not indexed. Shapes will be attributed automatically.") |
|
954 |
+ autoShape <- TRUE |
|
955 |
+ } |
|
956 |
+ |
|
957 |
+ if (length(setdiff(shape,potShapes))>0) { |
|
958 |
+ warning("'shape' argument is incorrect, the available shapes are: 'circle','square', 'csquare', 'rectangle', 'crectangle', and 'vrectangle'. Shapes will be attributed automatically.") |
|
959 |
+ autoShape <- TRUE |
|
960 |
+ |
|
961 |
+ } |
|
962 |
+ } |
|
963 |
+ if (autoShape) { |
|
964 |
+ levs <- unique(nodeAttrs[[nodeShape]]) |
|
965 |
+ if (length(levs) > length(potShapes)) { |
|
966 |
+ warning("R graphs can only handle 6 node shapes at the most.") |
|
967 |
+ shape <- rep(potShapes,6)[1:length(levs)] |
|
968 |
+ } |
|
969 |
+ else |
|
970 |
+ shape <- potShapes[1:length(levs)] |
|
971 |
+ names(shape) <- levs |
|
972 |
+ nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]] |
|
973 |
+ } |
|
974 |
+ else |
|
975 |
+ nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]] |
|
976 |
+ |
|
977 |
+ } |
|
978 |
+ |
|
979 |
+ |
|
980 |
+ if (!(nodeCol %in% colnames(nodeAttrs))) { |
|
981 |
+ stop(paste("The column ",nodeCol, " is not available in nodeAttrs.",sep="")) |
|
982 |
+ } |
|
983 |
+ else { |
|
984 |
+ nbAn <- length(unique(nodeAttrs[[nodeCol]])) |
|
985 |
+ |
|
986 |
+ if (!missing(col) && !is.null(col)) { |
|
987 |
+ if (length(intersect(names(col),unique(nodeAttrs[[nodeCol]]))) != length(unique(nodeAttrs[[nodeCol]]))) { |
|
988 |
+ warning("'col' argument is incorrect, some elements of nodeAttrs$'nodeCol' are not indexed. Colors will be attributed automatically.") |
|
989 |
+ colAn <- brewer.pal(nbAn,"Set3") |
|
990 |
+ names(colAn) <- unique(nodeAttrs[[nodeCol]]) |
|
991 |
+ nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]] |
|
992 |
+ } |
|
993 |
+ else |
|
994 |
+ nodeAttrs$col <- col[nodeAttrs[[nodeCol]]] |
|
995 |
+ } |
|
996 |
+ else { |
|
997 |
+ colAn <- brewer.pal(nbAn,"Set3") |
|
998 |
+ names(colAn) <- unique(nodeAttrs[[nodeCol]]) |
|
999 |
+ nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]] |
|
1000 |
+ } |
|
1001 |
+ } |
|
1002 |
+ |
|
1003 |
+ |
|
1004 |
+ n1 <- NULL |
|
1005 |
+ nodes <- as.character(unique(as.character(dataGraph$n1))) |
|
1006 |
+ edges <- llply(nodes, |
|
1007 |
+ function(n,data,edgeWeight) { |
|
1008 |
+ ll <- list(edges = as.character(subset(data, n1 == n)$n2), weights = subset(data, n1 == n)[[edgeWeight]]) |
|
1009 |
+ }, |
|
1010 |
+ data = dataGraph, edgeWeight = edgeWeight) |
|
1011 |
+ names(edges) <- nodes |
|
1012 |
+ |
|
1013 |
+ |
|
1014 |
+ |
|
1015 |
+ |
|
1016 |
+ g <- new("graphNEL", nodes = nodes, edgeL = edges, edgemode = "directed") |
|
1017 |
+ # build igraph object from graphNEL object |
|
1018 |
+ ig <- igraph.from.graphNEL(g, name = TRUE, weight = TRUE) |
|
1019 |
+ |
|
1020 |
+ V(ig)$color <- nodeAttrs$col[match(V(ig)$name,nodeAttrs$id)] |
|
1021 |
+ V(ig)$shape <- nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)] |
|
1022 |
+ E(ig)$width <- 10^E(ig)$weight |
|
1023 |
+ indHighCor <- which(abs(E(ig)$weight)>0.4) |
|
1024 |
+ indLowCor <- which(abs(E(ig)$weight)<=0.4) |
|
1025 |
+ E(ig)$weight <- 1/abs(log2(abs(E(ig)$weight))) |
|
1026 |
+ E(ig)$weight[indHighCor] <- E(ig)$weight[indHighCor]+6 |
|
1027 |
+ E(ig)$weight[indLowCor] <- E(ig)$weight[indLowCor]-0.25 |
|
1028 |
+ |
|
1029 |
+ if (!missing(reciproCol)) { |
|
1030 |
+ if (reciproCol %in% colnames(dataGraph)) { |
|
1031 |
+ |
|
1032 |
+ nodes <- as.character(unique(as.character(dataGraph$n1))) |
|
1033 |
+ colEdges <- llply(nodes, |
|
1034 |
+ function(n,data,reciproCol) { |
|
1035 |
+ c('TRUE' = "black", 'FALSE' = "gray50")[as.character(subset(data, n1 == n)[[reciproCol]])] |
|
1036 |
+ }, |
|
1037 |
+ data = dataGraph, reciproCol = reciproCol) |
|
1038 |
+ |
|
1039 |
+ colEdges <- unlist(colEdges) |
|
1040 |
+ E(ig)$color <- colEdges |
|
1041 |
+ } |
|
1042 |
+ else { |
|
1043 |
+ warning(paste("No column of 'dataGraph' has the name",reciproCol)) |
|
1044 |
+ E(ig)$color <- "black" |
|
1045 |
+ } |
|
1046 |
+ |
|
1047 |
+ } |
|
1048 |
+ else |
|
1049 |
+ E(ig)$color <- "black" |
|
1050 |
+ |
|
1051 |
+ lay <- layout.fruchterman.reingold(ig,niter=500,area=vcount(ig)^2.3,repulserad=vcount(ig)^100, weights = E(ig)$weight) |
|
1052 |
+ |
|
1053 |
+ graph <- ig |
|
1054 |
+ |
|
1055 |
+ if (!capabilities()[["X11"]]) |
|
1056 |
+ tkplot <- FALSE |
|
1057 |
+ |
|
1058 |
+ if (tkplot) { |
|
1059 |
+ |
|
1060 |
+ if(capabilities()[["X11"]] & tolower(Sys.info()["sysname"])!="windows") { |
|
1061 |
+ dimScreen <- system("xdpyinfo | grep dimensions", intern = TRUE) |
|
1062 |
+ dimScreen <- as.numeric(strsplit(gsub(gsub(gsub(dimScreen, pattern = " ", replacement = ""), pattern = "[A-Za-z0-9]*:", replacement = ""), pattern = "pixels\\([A-Za-z0-9]*\\)", replacement = ""), split = "x")[[1]]) |
|
1063 |
+ } |
|
1064 |
+ else |
|
1065 |
+ dimScreen <- c(450,450) |
|
1066 |
+ |
|
1067 |
+ graphid <- tkplot(ig, |
|
1068 |
+ layout = lay, |
|
1069 |
+ vertex.label = nodeAttrs[[nodeName]][match(V(ig)$name,nodeAttrs$id)], |
|
1070 |
+ vertex.shape = nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)], |
|
1071 |
+ canvas.width=dimScreen[1], canvas.height=dimScreen[2], |
|
1072 |
+ ...) |
|
1073 |
+ |
|
1074 |
+ |
|
1075 |
+ tkplot.fit.to.screen(graphid) |
|
1076 |
+ |
|
1077 |
+ } |
|