git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/CoGAPS@68145 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: CoGAPS |
2 |
-Version: 1.7.0 |
|
2 |
+Version: 1.7.1 |
|
3 | 3 |
Date: 2011-09-02 |
4 | 4 |
Title: Coordinated Gene Activity in Pattern Sets |
5 | 5 |
Author: Elana J. Fertig |
... | ... |
@@ -10,7 +10,7 @@ Description: Coordinated Gene Activity in Pattern Sets (CoGAPS) infers |
10 | 10 |
inferring activity on gene sets. |
11 | 11 |
Maintainer: Elana J. Fertig <ejfertig@jhmi.edu>, Michael F. Ochs <mfo@jhu.edu> |
12 | 12 |
SystemRequirements: GAPS-JAGS (==1.0.2) |
13 |
-Depends: R (>= 2.9.0), R.utils (>= 1.2.4) |
|
13 |
+Depends: R (>= 2.9.0), R.utils (>= 1.2.4), gplots (>=2.8.0) |
|
14 | 14 |
Imports: graphics, grDevices, methods, stats, utils |
15 | 15 |
License: GPL (== 2) |
16 | 16 |
URL: http://www.cancerbiostats.onc.jhmi.edu/CoGAPS.cfm |
... | ... |
@@ -1,4 +1,4 @@ |
1 | 1 |
CoGAPS Algorithm for coordination of activity in gene sets with patterns from GAPS |
2 | 2 |
GAPS MCMC matrix decomposition algorithm |
3 |
-calcCoGASPStat Computes the CoGAPS gene set statistic |
|
3 |
+calcCoGASPStat Computes the CoGAPS gene set statistic |
|
4 | 4 |
LoadGAPSJAGSLib Loads in the c++ libraries that perform the GAPS matrix decomposition. Recommended to be performed before any run of CoGAPS. |
... | ... |
@@ -2,6 +2,10 @@ export(CoGAPS) |
2 | 2 |
export(GAPS) |
3 | 3 |
export(calcCoGAPSStat) |
4 | 4 |
export(plotGAPS) |
5 |
+export(ReadCoGAPSResults) |
|
6 |
+export(GSTargetHeatmaps) |
|
7 |
+export(computeGeneGSProb) |
|
8 |
+ |
|
5 | 9 |
if(tools:::.OStype() == "windows") { |
6 | 10 |
importFrom(utils, readRegistry, winProgressBar, setWinProgressBar) |
7 | 11 |
} |
... | ... |
@@ -12,6 +16,7 @@ importFrom(grDevices, dev.new, dev.off, pdf) |
12 | 16 |
importFrom(methods, is) |
13 | 17 |
importFrom(stats, heatmap, runif) |
14 | 18 |
importFrom(utils, read.table, write.table) |
19 |
+importFrom(gplots, heatmap.2) |
|
15 | 20 |
S3method(coef, jags) |
16 | 21 |
S3method(update, jags) |
17 | 22 |
S3method(variable.names, jags) |
... | ... |
@@ -1,4 +1,5 @@ |
1 |
-CoGAPS requires GAPS-JAGS available from http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm. This c++ package is a redistribution of JAGS version 2.1.0 with a module implementing the GAPS matrix decomposition for microarray data. Please see the installation instructions in the users manual. If you have any questions, please comment Elana Fertig <ejfertig@jhmi.edu> or Michael Ochs <mfo@jhu.edu>. |
|
1 |
+CoGAPS requires GAPS-JAGS available from http://www.rits.onc.jhmi.edu/dbb/custom/A6/CoGAPS.cfm or http://astor.som.jhmi.edu/~ejfertig/ejfertig/Software.html. This c++ package is a redistribution of JAGS version 2.1.0 with a module implementing the GAPS matrix decomposition for microarray data. Please see the installation instructions in the users manual. If you have any questions, please comment Elana Fertig <ejfertig@jhmi.edu> or Michael Ochs <mfo@jhu.edu>. |
|
2 | 2 |
|
3 | 3 |
01Sep2011 - Removed dependency on rjags package on CRAN |
4 | 4 |
02Sep2011 - Included loading of jags libraries upon package loading to avoid setting the LD_LIBRARY_PATH variable |
5 |
+01Aug2012 - Incorporated statistic to quantify inferred membership of each gene in a specified gene set with corresponding examples published in Fertig, Favorov, and Ochs (2012) IEEE Conference on Bioinformatics and Biomedicine (B310). |
5 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,102 @@ |
1 |
+ReadCoGAPSResults <- function(path=getwd(), output.list=TRUE) { |
|
2 |
+ origDir <- getwd() |
|
3 |
+ |
|
4 |
+ message(origDir) |
|
5 |
+ message('\n') |
|
6 |
+ |
|
7 |
+ setwd(path) |
|
8 |
+ |
|
9 |
+ # identify experiments in the folder |
|
10 |
+ PMeanFiles <- list.files(pattern='Pmean',full.names=F) |
|
11 |
+ fileIDS <- sapply(strsplit(PMeanFiles,'\\.'),function(x){x[3]}) |
|
12 |
+ names(PMeanFiles) <- fileIDS |
|
13 |
+ |
|
14 |
+ # find other files |
|
15 |
+ PSDFiles <- paste('Psd','0',fileIDS,'txt',sep=".") |
|
16 |
+ names(PSDFiles) <- fileIDS |
|
17 |
+ |
|
18 |
+ AMeanFiles <- paste('Amean','0',fileIDS,'txt',sep=".") |
|
19 |
+ names(AMeanFiles) <- fileIDS |
|
20 |
+ |
|
21 |
+ ASDFiles <- paste('Asd','0',fileIDS,'txt',sep=".") |
|
22 |
+ names(ASDFiles) <- fileIDS |
|
23 |
+ |
|
24 |
+ # check if files are in the folder |
|
25 |
+ files <- c(PSDFiles,AMeanFiles,ASDFiles) |
|
26 |
+ if (!all(file.exists(files))) { |
|
27 |
+ missingFiles <- files[which(!file.exists(files))] |
|
28 |
+ stop(paste('Cannot read CoGAPS results: missing files:', |
|
29 |
+ paste(missingFiles,collapse=","))) |
|
30 |
+ } |
|
31 |
+ |
|
32 |
+ # read in data from the files |
|
33 |
+ A.mean <- list() |
|
34 |
+ P.mean <- list() |
|
35 |
+ M <- list() |
|
36 |
+ A.sd <- list() |
|
37 |
+ P.sd <- list() |
|
38 |
+ for (ID in fileIDS) { |
|
39 |
+ A.mean[[ID]] <- as.matrix(read.table(AMeanFiles[ID], |
|
40 |
+ header=T,row.names=1,sep="\t")) |
|
41 |
+ A.sd[[ID]] <- as.matrix(read.table(ASDFiles[ID], |
|
42 |
+ header=T,row.names=1,sep="\t")) |
|
43 |
+ |
|
44 |
+ P.mean[[ID]] <- as.matrix(read.table(PMeanFiles[ID], |
|
45 |
+ header=T,row.names=1,sep="\t")) |
|
46 |
+ P.sd[[ID]] <- as.matrix(read.table(PSDFiles[ID], |
|
47 |
+ header=T,row.names=1,sep="\t")) |
|
48 |
+ |
|
49 |
+ M[[ID]] <- A.mean[[ID]]%*%P.mean[[ID]] |
|
50 |
+ |
|
51 |
+ } |
|
52 |
+ |
|
53 |
+ # return to original directory |
|
54 |
+ setwd(origDir) |
|
55 |
+ |
|
56 |
+ #return files |
|
57 |
+ if (output.list) { |
|
58 |
+ results <- list(A.mean=A.mean, A.sd=A.sd, P.mean=P.mean, P.sd=P.sd, M=M) |
|
59 |
+ } else { |
|
60 |
+ A.mean.matrix <- A.mean[[fileIDS[1]]] |
|
61 |
+ colnames(A.mean.matrix) <- paste(colnames(A.mean[[fileIDS[1]]]), |
|
62 |
+ rep(fileIDS[1],ncol(A.mean[[fileIDS[1]]])), sep=".") |
|
63 |
+ A.sd.matrix <- A.sd[[fileIDS[1]]] |
|
64 |
+ colnames(A.sd.matrix) <- paste(colnames(A.sd[[fileIDS[1]]]), |
|
65 |
+ rep(fileIDS[1],ncol(A.sd[[fileIDS[1]]])), sep=".") |
|
66 |
+ P.mean.matrix <- P.mean[[fileIDS[1]]] |
|
67 |
+ row.names(P.mean.matrix) <- paste(row.names(P.mean[[fileIDS[1]]]), |
|
68 |
+ rep(fileIDS[1],nrow(P.mean[[fileIDS[1]]])), sep=".") |
|
69 |
+ P.sd.matrix <- P.sd[[fileIDS[1]]] |
|
70 |
+ row.names(P.sd.matrix) <- paste(row.names(P.sd[[fileIDS[1]]]), |
|
71 |
+ rep(fileIDS[1],nrow(P.sd[[fileIDS[1]]])), sep=".") |
|
72 |
+ M.matrix <- M[[fileIDS[1]]] |
|
73 |
+ colnames(M.matrix) <- paste(colnames(M[[fileIDS[1]]]), |
|
74 |
+ rep(fileIDS[1],ncol(M[[fileIDS[1]]])), sep=".") |
|
75 |
+ } |
|
76 |
+ |
|
77 |
+ if (length(fileIDS)>1) { |
|
78 |
+ for (ID in fileIDS[2:length(fileIDS)]) { |
|
79 |
+ |
|
80 |
+ A.mean.matrix <- cbind(A.mean.matrix, A.mean[[ID]]) |
|
81 |
+ colnames(A.mean.matrix)[(ncol(A.mean.matrix)-ncol(A.mean[[ID]])+1):ncol(A.mean.matrix)] <- paste(colnames(A.mean[[ID]]),rep(ID,ncol(A.mean[[ID]])), sep=".") |
|
82 |
+ A.sd.matrix <- cbind(A.sd.matrix, A.sd[[ID]]) |
|
83 |
+ colnames(A.sd.matrix)[(ncol(A.sd.matrix)-ncol(A.sd[[ID]])+1):ncol(A.sd.matrix)] <- paste(colnames(A.sd[[ID]]),rep(ID,ncol(A.sd[[ID]])), sep=".") |
|
84 |
+ |
|
85 |
+ M.matrix <- cbind(M.matrix, M[[ID]]) |
|
86 |
+ colnames(M.matrix)[(ncol(M.matrix)-ncol(M[[ID]])+1):ncol(M.matrix)] <- paste(colnames(M[[ID]]),rep(ID,ncol(M[[ID]])), sep=".") |
|
87 |
+ |
|
88 |
+ P.mean.matrix <- rbind(P.mean.matrix, P.mean[[ID]]) |
|
89 |
+ row.names(P.mean.matrix)[(nrow(P.mean.matrix)-nrow(P.mean[[ID]])+1):nrow(P.mean.matrix)] <- paste(row.names(P.mean[[ID]]),rep(ID,nrow(P.mean[[ID]])), sep=".") |
|
90 |
+ P.sd.matrix <- rbind(P.sd.matrix, P.sd[[ID]]) |
|
91 |
+ row.names(P.sd.matrix)[(nrow(P.sd.matrix)-nrow(P.sd[[ID]])+1):nrow(P.sd.matrix)] <- paste(row.names(P.sd[[ID]]),rep(ID,nrow(P.sd[[ID]])), sep=".") |
|
92 |
+ |
|
93 |
+ } |
|
94 |
+ } |
|
95 |
+ |
|
96 |
+ results <- list(A.mean=A.mean.matrix, A.sd=A.sd.matrix, |
|
97 |
+ P.mean=P.mean.matrix, P.sd=P.sd.matrix, M=M.matrix) |
|
98 |
+ } |
|
99 |
+ |
|
100 |
+ return(results) |
|
101 |
+ |
|
102 |
+} |
... | ... |
@@ -2,13 +2,33 @@ |
2 | 2 |
%% http://bibdesk.sourceforge.net/ |
3 | 3 |
|
4 | 4 |
|
5 |
-%% Created for Elana Fertig at 2010-02-04 11:48:54 -0500 |
|
5 |
+%% Created for Elana Fertig at 2012-08-01 12:57:24 -0400 |
|
6 | 6 |
|
7 | 7 |
|
8 | 8 |
%% Saved with string encoding Unicode (UTF-8) |
9 | 9 |
|
10 | 10 |
|
11 | 11 |
|
12 |
+@article{Fertig2010, |
|
13 |
+ Abstract = {SUMMARY: Coordinated Gene Activity in Pattern Sets (CoGAPS) provides an integrated package for isolating gene expression driven by a biological process, enhancing inference of biological processes from transcriptomic data. CoGAPS improves on other enrichment measurement methods by combining a Markov chain Monte Carlo (MCMC) matrix factorization algorithm (GAPS) with a threshold-independent statistic inferring activity on gene sets. The software is provided as open source C++ code built on top of JAGS software with an R interface. AVAILABILITY: The R package CoGAPS and the C++ package GAPS-JAGS are provided open source under the GNU Lesser Public License (GLPL) with a users manual containing installation and operating instructions. CoGAPS is available through Bioconductor and depends on the rjags package available through CRAN to interface CoGAPS with GAPS-JAGS. URL: http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm .}, |
|
14 |
+ Author = {Fertig, EJ and Ding, J and Favorov, AV and Parmigiani, G and Ochs, MF}, |
|
15 |
+ Date-Added = {2012-08-01 12:56:23 -0400}, |
|
16 |
+ Date-Modified = {2012-08-01 12:57:24 -0400}, |
|
17 |
+ Doi = {10.1093/bioinformatics/btq503}, |
|
18 |
+ Journal = {Bioinformatics}, |
|
19 |
+ Journal-Full = {Bioinformatics (Oxford, England)}, |
|
20 |
+ Mesh = {Computational Biology; Gene Expression; Gene Expression Profiling; Genomics; Markov Chains; Oligonucleotide Array Sequence Analysis; Software}, |
|
21 |
+ Month = {Nov}, |
|
22 |
+ Number = {21}, |
|
23 |
+ Pages = {2792-3}, |
|
24 |
+ Pmc = {PMC3025742}, |
|
25 |
+ Pmid = {20810601}, |
|
26 |
+ Pst = {ppublish}, |
|
27 |
+ Title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data}}, |
|
28 |
+ Volume = {26}, |
|
29 |
+ Year = {2010}, |
|
30 |
+ Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btq503}} |
|
31 |
+ |
|
12 | 32 |
@article{Tavazoie1999, |
13 | 33 |
Abstract = {Technologies to measure whole-genome mRNA abundances and methods to organize and display such data are emerging as valuable tools for systems-level exploration of transcriptional regulatory networks. For instance, it has been shown that mRNA data from 118 genes, measured at several time points in the developing hindbrain of mice, can be hierarchically clustered into various patterns (or 'waves') whose members tend to participate in common processes. We have previously shown that hierarchical clustering can group together genes whose cis-regulatory elements are bound by the same proteins in vivo. Hierarchical clustering has also been used to organize genes into hierarchical dendograms on the basis of their expression across multiple growth conditions. The application of Fourier analysis to synchronized yeast mRNA expression data has identified cell-cycle periodic genes, many of which have expected cis-regulatory elements. Here we apply a systematic set of statistical algorithms, based on whole-genome mRNA data, partitional clustering and motif discovery, to identify transcriptional regulatory sub-networks in yeast-without any a priori knowledge of their structure or any assumptions about their dynamics. This approach uncovered new regulons (sets of co-regulated genes) and their putative cis-regulatory elements. We used statistical characterization of known regulons and motifs to derive criteria by which we infer the biological significance of newly discovered regulons and motifs. Our approach holds promise for the rapid elucidation of genetic network architecture in sequenced organisms in which little biology is known.}, |
14 | 34 |
Address = {Department of Genetics, Harvard Medical School, Boston, Massachusetts 02115, USA.}, |
... | ... |
@@ -39,7 +59,7 @@ |
39 | 59 |
Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.}, |
40 | 60 |
Rn = {0 (RNA, Messenger); 9007-49-2 (DNA)}, |
41 | 61 |
Sb = {IM}, |
42 |
- So = {Nat Genet. 1999 Jul;22(3):281-5. }, |
|
62 |
+ So = {Nat Genet. 1999 Jul;22(3):281-5.}, |
|
43 | 63 |
Stat = {MEDLINE}, |
44 | 64 |
Title = {Systematic determination of genetic network architecture.}, |
45 | 65 |
Volume = {22}, |
... | ... |
@@ -78,7 +98,7 @@ |
78 | 98 |
Pst = {ppublish}, |
79 | 99 |
Pt = {Comparative Study; Evaluation Studies; Journal Article}, |
80 | 100 |
Sb = {IM}, |
81 |
- So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15. }, |
|
101 |
+ So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15.}, |
|
82 | 102 |
Stat = {MEDLINE}, |
83 | 103 |
Title = {Analyzing gene expression data in terms of gene sets: methodological issues.}, |
84 | 104 |
Volume = {23}, |
... | ... |
@@ -117,7 +137,7 @@ |
117 | 137 |
Pst = {ppublish}, |
118 | 138 |
Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, |
119 | 139 |
Sb = {IM}, |
120 |
- So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14. }, |
|
140 |
+ So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14.}, |
|
121 | 141 |
Stat = {MEDLINE}, |
122 | 142 |
Title = {ClutrFree: cluster tree visualization and interpretation.}, |
123 | 143 |
Volume = {20}, |
... | ... |
@@ -125,12 +145,12 @@ |
125 | 145 |
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bth307}} |
126 | 146 |
|
127 | 147 |
@article{Carvalho2008, |
128 |
- Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M. }, |
|
148 |
+ Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M.}, |
|
129 | 149 |
Date-Added = {2010-02-02 12:36:36 -0500}, |
130 | 150 |
Date-Modified = {2010-02-02 12:36:36 -0500}, |
131 |
- Journal = {J. Am. Stat. Assoc. }, |
|
151 |
+ Journal = {J. Am. Stat. Assoc.}, |
|
132 | 152 |
Pages = {1438 - 1456}, |
133 |
- Title = {High-dimensional sparse factor modelling: Applications in gene expression genomics}, |
|
153 |
+ Title = {High-dimensional sparse factor modelling: Applications in gene expression genomics}, |
|
134 | 154 |
Volume = {103}, |
135 | 155 |
Year = {2008}} |
136 | 156 |
|
... | ... |
@@ -197,7 +217,7 @@ |
197 | 217 |
Pst = {ppublish}, |
198 | 218 |
Pt = {Journal Article}, |
199 | 219 |
Sb = {IM}, |
200 |
- So = {Methods Enzymol. 2009;467:59-77. }, |
|
220 |
+ So = {Methods Enzymol. 2009;467:59-77.}, |
|
201 | 221 |
Stat = {MEDLINE}, |
202 | 222 |
Title = {Matrix factorization for recovery of biological processes from microarray data.}, |
203 | 223 |
Volume = {467}, |
... | ... |
@@ -239,7 +259,7 @@ |
239 | 259 |
Pst = {ppublish}, |
240 | 260 |
Pt = {Journal Article}, |
241 | 261 |
Sb = {IM}, |
242 |
- So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30. }, |
|
262 |
+ So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30.}, |
|
243 | 263 |
Stat = {MEDLINE}, |
244 | 264 |
Title = {Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles.}, |
245 | 265 |
Volume = {102}, |
... | ... |
@@ -278,7 +298,7 @@ |
278 | 298 |
Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.}, |
279 | 299 |
Rn = {0 (Proteins)}, |
280 | 300 |
Sb = {IM}, |
281 |
- So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81. }, |
|
301 |
+ So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81.}, |
|
282 | 302 |
Stat = {MEDLINE}, |
283 | 303 |
Title = {{Onto-Tools, the toolkit of the modern biologist: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate.}}, |
284 | 304 |
Volume = {31}, |
... | ... |
@@ -330,7 +350,7 @@ |
330 | 350 |
Pubm = {Electronic}, |
331 | 351 |
Rn = {0 (Saccharomyces cerevisiae Proteins); 0 (Transcription Factors)}, |
332 | 352 |
Sb = {IM}, |
333 |
- So = {BMC Bioinformatics. 2006 Feb 28;7:99. }, |
|
353 |
+ So = {BMC Bioinformatics. 2006 Feb 28;7:99.}, |
|
334 | 354 |
Stat = {MEDLINE}, |
335 | 355 |
Title = {Determination of strongly overlapping signaling activity from microarray data.}, |
336 | 356 |
Volume = {7}, |
... | ... |
@@ -367,7 +387,7 @@ |
367 | 387 |
Pubm = {Print}, |
368 | 388 |
Rn = {0 (Transcription Factors)}, |
369 | 389 |
Sb = {T}, |
370 |
- So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4. }, |
|
390 |
+ So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4.}, |
|
371 | 391 |
Stat = {MEDLINE}, |
372 | 392 |
Title = {Determining transcription factor activity from microarray data using {Bayesian Markov chain Monte Carlo} sampling.}, |
373 | 393 |
Volume = {129}, |
... | ... |
@@ -402,12 +422,12 @@ |
402 | 422 |
Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, |
403 | 423 |
Pubm = {Print}, |
404 | 424 |
Sb = {IM}, |
405 |
- So = {Bioinformatics. 2002 Apr;18(4):566-75. }, |
|
425 |
+ So = {Bioinformatics. 2002 Apr;18(4):566-75.}, |
|
406 | 426 |
Stat = {MEDLINE}, |
407 | 427 |
Title = {Application of {B}ayesian decomposition for analysing microarray data.}, |
408 | 428 |
Volume = {18}, |
409 | 429 |
Year = {2002}, |
410 |
- Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQJGAAAAAAJGAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADD4wxdSCsAAAUWsv4VTW9sb3Nob2tfQkRfWWVhc3QucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABRay/8Xm4ONQREYgQ0FSTwACAAkAAAkgAAAAAAAAAAAAAAAAAAAAATIAABAACAAAw+NSrQAAABEACAAAxecZIwAAAAEAKAUWsv4FFrL9Ajn6RAI5RRoAFB2EABQZOQAP6lwACqQSAAqkEQAAetwAAgCFTWFjaW50b3NoIEhEOlVzZXJzOmVqZmVydGlnOkxpYnJhcnk6TWFpbDpJTUFQLWVqZmVydGlnQG1haWwubCMxNDE5Mzkub3JnOklOQk9YOk9jaHMuaW1hcG1ib3g6QXR0YWNobWVudHM6MzA4NzoyOk1vbG9zaG9rX0JEX1llYXN0LnBkZgAADgAsABUATQBvAGwAbwBzAGgAbwBrAF8AQgBEAF8AWQBlAGEAcwB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgB6VXNlcnMvZWpmZXJ0aWcvTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGYAEwABLwAAFQACAA///wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEHEuLi8uLi9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkDAwMIAxEDHAMgAy4DNQM+A7IDtwO6AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}} |
|
430 |
+ Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAkYAAAAAAkYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAABRay/hVNb2xvc2hva19CRF9ZZWFzdC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFFrL/xebg41BERiBDQVJPAAIACQAACSAAAAAAAAAAAAAAAAAAAAABMgAAEAAIAADD41KtAAAAEQAIAADF5xkjAAAAAQAoBRay/gUWsv0COfpEAjlFGgAUHYQAFBk5AA/qXAAKpBIACqQRAAB63AACAIVNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6TGlicmFyeTpNYWlsOklNQVAtZWpmZXJ0aWdAbWFpbC5sIzE0MTkzOS5vcmc6SU5CT1g6T2Nocy5pbWFwbWJveDpBdHRhY2htZW50czozMDg3OjI6TW9sb3Nob2tfQkRfWWVhc3QucGRmAAAOACwAFQBNAG8AbABvAHMAaABvAGsAXwBCAEQAXwBZAGUAYQBzAHQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAHpVc2Vycy9lamZlcnRpZy9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBxLi4vLi4vTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAC6gLsAvEC+gMFAwkDFwMeAycDmwOgA6MDsAO1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}} |
|
411 | 431 |
|
412 | 432 |
@article{Ochs1999, |
413 | 433 |
Abstract = {A frequent problem in analysis is the need to find two matrices, closely related to the underlying measurement process, which when multiplied together reproduce the matrix of data points. Such problems arise throughout science, for example, in imaging where both the calibration of the sensor and the true scene may be unknown and in localized spectroscopy where multiple components may be present in varying amounts in any spectrum. Since both matrices are unknown, such a decomposition is a bilinear problem. We report here a solution to this problem for the case in which the decomposition results in matrices with elements drawn from positive additive distributions. We demonstrate the power of the methodology on chemical shift images (CSI). The new method, Bayesian spectral decomposition (BSD), reduces the CSI data to a small number of basis spectra together with their localized amplitudes. We apply this new algorithm to a 19F nonlocalized study of the catabolism of 5-fluorouracil in human liver, 31P CSI studies of a human head and calf muscle, and simulations which show its strengths and limitations. In all cases, the dataset, viewed as a matrix with rows containing the individual NMR spectra, results from the multiplication of a matrix of generally nonorthogonal basis spectra (the spectral matrix) by a matrix of the amplitudes of each basis spectrum in the the individual voxels (the amplitude matrix). The results show that BSD can simultaneously determine both the basis spectra and their distribution. In principle, BSD should solve this bilinear problem for any dataset which results from multiplication of matrices representing positive additive distributions if the data overdetermine the solutions.}, |
... | ... |
@@ -442,7 +462,7 @@ |
442 | 462 |
Pubm = {Print}, |
443 | 463 |
Rn = {51-21-8 (Fluorouracil); 56-65-5 (Adenosine Triphosphate); 7439-95-4 (Magnesium)}, |
444 | 464 |
Sb = {IM}, |
445 |
- So = {J Magn Reson. 1999 Mar;137(1):161-76. }, |
|
465 |
+ So = {J Magn Reson. 1999 Mar;137(1):161-76.}, |
|
446 | 466 |
Stat = {MEDLINE}, |
447 | 467 |
Title = {A new method for spectral decomposition using a bilinear Bayesian approach.}, |
448 | 468 |
Volume = {137}, |
... | ... |
@@ -497,7 +517,7 @@ |
497 | 517 |
Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't}, |
498 | 518 |
Rn = {0 (Antineoplastic Agents); 0 (ELK1 protein, human); 0 (Piperazines); 0 (Pyrimidines); 0 (RNA, Messenger); 0 (STAT3 Transcription Factor); 0 (STAT3 protein, human); 0 (TP53 protein, human); 0 (Tumor Suppressor Protein p53); 0 (ets-Domain Protein Elk-1); 152459-95-5 (imatinib)}, |
499 | 519 |
Sb = {IM}, |
500 |
- So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10. }, |
|
520 |
+ So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10.}, |
|
501 | 521 |
Stat = {MEDLINE}, |
502 | 522 |
Title = {Detection of treatment-induced changes in signaling pathways in gastrointestinal stromal tumors using transcriptomic data.}, |
503 | 523 |
Volume = {69}, |
... | ... |
@@ -526,4 +546,4 @@ |
526 | 546 |
Title = {Prior distributions on measure space}, |
527 | 547 |
Volume = {59}, |
528 | 548 |
Year = {1997}, |
529 |
- Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQHyAAAAAAHyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADD4wxdSCsAAAAQbOgfU2liaXNpU2tpbGxpbmdfSlJveSMxMDI4OURGLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQKJ38SrsAMAAAAAAAAAAAADAAIAAAkgAAAAAAAAAAAAAAAAAAAAClJlZmVyZW5jZXMAEAAIAADD41KtAAAAEQAIAADEq+hDAAAAAQAQABBs6AAKpCgACqQRAAB63AACAFBNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6RG9jdW1lbnRzOlJlZmVyZW5jZXM6U2liaXNpU2tpbGxpbmdfSlJveSMxMDI4OURGLnBkZgAOAE4AJgBTAGkAYgBpAHMAaQBTAGsAaQBsAGwAaQBuAGcAXwBKAFIAbwB5AGEAbABTAHQAYQB0AFMAbwBjAEIAXwAxADkAOQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBKVXNlcnMvZWpmZXJ0aWcvRG9jdW1lbnRzL1JlZmVyZW5jZXMvU2liaXNpU2tpbGxpbmdfSlJveWFsU3RhdFNvY0JfMTk5Ny5wZGYAEwABLwAAFQACAA///wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDouLi8uLi8uLi9SZWZlcmVuY2VzL1NpYmlzaVNraWxsaW5nX0pSb3lhbFN0YXRTb2NCXzE5OTcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQKvArQCvQLIAswC2gLhAuoDJwMsAy8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADPA==}} |
|
549 |
+ Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAAABBs6B9TaWJpc2lTa2lsbGluZ19KUm95IzEwMjg5REYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAonfxKuwAwAAAAAAAAAAAAMAAgAACSAAAAAAAAAAAAAAAAAAAAAKUmVmZXJlbmNlcwAQAAgAAMPjUq0AAAARAAgAAMSr6EMAAAABABAAEGzoAAqkKAAKpBEAAHrcAAIAUE1hY2ludG9zaCBIRDpVc2VyczplamZlcnRpZzpEb2N1bWVudHM6UmVmZXJlbmNlczpTaWJpc2lTa2lsbGluZ19KUm95IzEwMjg5REYucGRmAA4ATgAmAFMAaQBiAGkAcwBpAFMAawBpAGwAbABpAG4AZwBfAEoAUgBvAHkAYQBsAFMAdABhAHQAUwBvAGMAQgBfADEAOQA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEpVc2Vycy9lamZlcnRpZy9Eb2N1bWVudHMvUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA6Li4vLi4vLi4vUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZQBsAG8AcQBzAHYAeAB6AHwAhgCTAJgAoAKWApgCnQKmArECtQLDAsoC0wMQAxUDGAMlAyoAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADPA==}} |
530 | 550 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+\name{TFGeneReg} |
|
2 |
+\docType{data} |
|
3 |
+\alias{TFGeneReg} |
|
4 |
+\alias{TFSimData} |
|
5 |
+\alias{GSSimData} |
|
6 |
+\title{Simulated dataset to quantify gene set membership.} |
|
7 |
+\description{Simulated data and components used to generate it resulting from the differential activity of four simulated gene sets (\code{TFGeneReg$TFGeneReg}) in different samples (\code{TFGeneReg$P}).} |
|
8 |
+\usage{TFGeneReg} |
|
9 |
+\format{A \code{\link{list}} containing: |
|
10 |
+ \item{A}{Matrix of 100 rows and 4 columns representing the simulated amplitude matrix for activity of each of the four simulated gene sets in each pattern.} |
|
11 |
+ \item{D}{Matrix of 100 rows and 20 columns containing simulated data generated with \code{M + 0.1*pmax(TFGeneReg$M,1)*matrix(rnorm(length(TFGeneReg$M)),nrow=nrow(TFGeneReg$M))}.} |
|
12 |
+ \item{M}{Matrix of 100 rows and 20 columns containing noise-free simulated data generated with \code{TFGeneReg$A %*% TFGeneReg$P}.} |
|
13 |
+ \item{P}{Matrix of 4 columns and 20 columns representing relative activity of each of the four gene sets in \code{TFGeneReg$TFGeneReg} in each of the 20 samples.} |
|
14 |
+ \item{TFGeneReg}{List containing genes and relative activity for each of four gene sets used to formulate the amplitude matrix \code{TFGeneReg$A}.} |
|
15 |
+} |
|
16 |
+\references{EJ Fertig, AV Favorov, and Ochs MF (2012) Identifying context-specific transcription factor targets from prior knowledge and gene expression data. 2012 IEEE International Conference on Bioinformatics and Biomedicine.} |
|
17 |
+\keyword{datasets} |